1*67e74705SXin Li# -*- coding: utf-8 -*- 2*67e74705SXin Li# The LLVM Compiler Infrastructure 3*67e74705SXin Li# 4*67e74705SXin Li# This file is distributed under the University of Illinois Open Source 5*67e74705SXin Li# License. See LICENSE.TXT for details. 6*67e74705SXin Li""" This module is responsible to run the analyzer commands. """ 7*67e74705SXin Li 8*67e74705SXin Liimport re 9*67e74705SXin Liimport os 10*67e74705SXin Liimport os.path 11*67e74705SXin Liimport tempfile 12*67e74705SXin Liimport functools 13*67e74705SXin Liimport subprocess 14*67e74705SXin Liimport logging 15*67e74705SXin Lifrom libscanbuild.compilation import classify_source, compiler_language 16*67e74705SXin Lifrom libscanbuild.clang import get_version, get_arguments 17*67e74705SXin Lifrom libscanbuild.shell import decode 18*67e74705SXin Li 19*67e74705SXin Li__all__ = ['run'] 20*67e74705SXin Li 21*67e74705SXin Li# To have good results from static analyzer certain compiler options shall be 22*67e74705SXin Li# omitted. The compiler flag filtering only affects the static analyzer run. 23*67e74705SXin Li# 24*67e74705SXin Li# Keys are the option name, value number of options to skip 25*67e74705SXin LiIGNORED_FLAGS = { 26*67e74705SXin Li '-c': 0, # compile option will be overwritten 27*67e74705SXin Li '-fsyntax-only': 0, # static analyzer option will be overwritten 28*67e74705SXin Li '-o': 1, # will set up own output file 29*67e74705SXin Li # flags below are inherited from the perl implementation. 30*67e74705SXin Li '-g': 0, 31*67e74705SXin Li '-save-temps': 0, 32*67e74705SXin Li '-install_name': 1, 33*67e74705SXin Li '-exported_symbols_list': 1, 34*67e74705SXin Li '-current_version': 1, 35*67e74705SXin Li '-compatibility_version': 1, 36*67e74705SXin Li '-init': 1, 37*67e74705SXin Li '-e': 1, 38*67e74705SXin Li '-seg1addr': 1, 39*67e74705SXin Li '-bundle_loader': 1, 40*67e74705SXin Li '-multiply_defined': 1, 41*67e74705SXin Li '-sectorder': 3, 42*67e74705SXin Li '--param': 1, 43*67e74705SXin Li '--serialize-diagnostics': 1 44*67e74705SXin Li} 45*67e74705SXin Li 46*67e74705SXin Li 47*67e74705SXin Lidef require(required): 48*67e74705SXin Li """ Decorator for checking the required values in state. 49*67e74705SXin Li 50*67e74705SXin Li It checks the required attributes in the passed state and stop when 51*67e74705SXin Li any of those is missing. """ 52*67e74705SXin Li 53*67e74705SXin Li def decorator(function): 54*67e74705SXin Li @functools.wraps(function) 55*67e74705SXin Li def wrapper(*args, **kwargs): 56*67e74705SXin Li for key in required: 57*67e74705SXin Li if key not in args[0]: 58*67e74705SXin Li raise KeyError('{0} not passed to {1}'.format( 59*67e74705SXin Li key, function.__name__)) 60*67e74705SXin Li 61*67e74705SXin Li return function(*args, **kwargs) 62*67e74705SXin Li 63*67e74705SXin Li return wrapper 64*67e74705SXin Li 65*67e74705SXin Li return decorator 66*67e74705SXin Li 67*67e74705SXin Li 68*67e74705SXin Li@require(['command', # entry from compilation database 69*67e74705SXin Li 'directory', # entry from compilation database 70*67e74705SXin Li 'file', # entry from compilation database 71*67e74705SXin Li 'clang', # clang executable name (and path) 72*67e74705SXin Li 'direct_args', # arguments from command line 73*67e74705SXin Li 'force_debug', # kill non debug macros 74*67e74705SXin Li 'output_dir', # where generated report files shall go 75*67e74705SXin Li 'output_format', # it's 'plist' or 'html' or both 76*67e74705SXin Li 'output_failures']) # generate crash reports or not 77*67e74705SXin Lidef run(opts): 78*67e74705SXin Li """ Entry point to run (or not) static analyzer against a single entry 79*67e74705SXin Li of the compilation database. 80*67e74705SXin Li 81*67e74705SXin Li This complex task is decomposed into smaller methods which are calling 82*67e74705SXin Li each other in chain. If the analyzis is not possibe the given method 83*67e74705SXin Li just return and break the chain. 84*67e74705SXin Li 85*67e74705SXin Li The passed parameter is a python dictionary. Each method first check 86*67e74705SXin Li that the needed parameters received. (This is done by the 'require' 87*67e74705SXin Li decorator. It's like an 'assert' to check the contract between the 88*67e74705SXin Li caller and the called method.) """ 89*67e74705SXin Li 90*67e74705SXin Li try: 91*67e74705SXin Li command = opts.pop('command') 92*67e74705SXin Li command = command if isinstance(command, list) else decode(command) 93*67e74705SXin Li logging.debug("Run analyzer against '%s'", command) 94*67e74705SXin Li opts.update(classify_parameters(command)) 95*67e74705SXin Li 96*67e74705SXin Li return arch_check(opts) 97*67e74705SXin Li except Exception: 98*67e74705SXin Li logging.error("Problem occured during analyzis.", exc_info=1) 99*67e74705SXin Li return None 100*67e74705SXin Li 101*67e74705SXin Li 102*67e74705SXin Li@require(['clang', 'directory', 'flags', 'file', 'output_dir', 'language', 103*67e74705SXin Li 'error_type', 'error_output', 'exit_code']) 104*67e74705SXin Lidef report_failure(opts): 105*67e74705SXin Li """ Create report when analyzer failed. 106*67e74705SXin Li 107*67e74705SXin Li The major report is the preprocessor output. The output filename generated 108*67e74705SXin Li randomly. The compiler output also captured into '.stderr.txt' file. 109*67e74705SXin Li And some more execution context also saved into '.info.txt' file. """ 110*67e74705SXin Li 111*67e74705SXin Li def extension(opts): 112*67e74705SXin Li """ Generate preprocessor file extension. """ 113*67e74705SXin Li 114*67e74705SXin Li mapping = {'objective-c++': '.mii', 'objective-c': '.mi', 'c++': '.ii'} 115*67e74705SXin Li return mapping.get(opts['language'], '.i') 116*67e74705SXin Li 117*67e74705SXin Li def destination(opts): 118*67e74705SXin Li """ Creates failures directory if not exits yet. """ 119*67e74705SXin Li 120*67e74705SXin Li name = os.path.join(opts['output_dir'], 'failures') 121*67e74705SXin Li if not os.path.isdir(name): 122*67e74705SXin Li os.makedirs(name) 123*67e74705SXin Li return name 124*67e74705SXin Li 125*67e74705SXin Li error = opts['error_type'] 126*67e74705SXin Li (handle, name) = tempfile.mkstemp(suffix=extension(opts), 127*67e74705SXin Li prefix='clang_' + error + '_', 128*67e74705SXin Li dir=destination(opts)) 129*67e74705SXin Li os.close(handle) 130*67e74705SXin Li cwd = opts['directory'] 131*67e74705SXin Li cmd = get_arguments([opts['clang'], '-fsyntax-only', '-E'] + 132*67e74705SXin Li opts['flags'] + [opts['file'], '-o', name], cwd) 133*67e74705SXin Li logging.debug('exec command in %s: %s', cwd, ' '.join(cmd)) 134*67e74705SXin Li subprocess.call(cmd, cwd=cwd) 135*67e74705SXin Li # write general information about the crash 136*67e74705SXin Li with open(name + '.info.txt', 'w') as handle: 137*67e74705SXin Li handle.write(opts['file'] + os.linesep) 138*67e74705SXin Li handle.write(error.title().replace('_', ' ') + os.linesep) 139*67e74705SXin Li handle.write(' '.join(cmd) + os.linesep) 140*67e74705SXin Li handle.write(' '.join(os.uname()) + os.linesep) 141*67e74705SXin Li handle.write(get_version(opts['clang'])) 142*67e74705SXin Li handle.close() 143*67e74705SXin Li # write the captured output too 144*67e74705SXin Li with open(name + '.stderr.txt', 'w') as handle: 145*67e74705SXin Li handle.writelines(opts['error_output']) 146*67e74705SXin Li handle.close() 147*67e74705SXin Li # return with the previous step exit code and output 148*67e74705SXin Li return { 149*67e74705SXin Li 'error_output': opts['error_output'], 150*67e74705SXin Li 'exit_code': opts['exit_code'] 151*67e74705SXin Li } 152*67e74705SXin Li 153*67e74705SXin Li 154*67e74705SXin Li@require(['clang', 'directory', 'flags', 'direct_args', 'file', 'output_dir', 155*67e74705SXin Li 'output_format']) 156*67e74705SXin Lidef run_analyzer(opts, continuation=report_failure): 157*67e74705SXin Li """ It assembles the analysis command line and executes it. Capture the 158*67e74705SXin Li output of the analysis and returns with it. If failure reports are 159*67e74705SXin Li requested, it calls the continuation to generate it. """ 160*67e74705SXin Li 161*67e74705SXin Li def output(): 162*67e74705SXin Li """ Creates output file name for reports. """ 163*67e74705SXin Li if opts['output_format'] in {'plist', 'plist-html'}: 164*67e74705SXin Li (handle, name) = tempfile.mkstemp(prefix='report-', 165*67e74705SXin Li suffix='.plist', 166*67e74705SXin Li dir=opts['output_dir']) 167*67e74705SXin Li os.close(handle) 168*67e74705SXin Li return name 169*67e74705SXin Li return opts['output_dir'] 170*67e74705SXin Li 171*67e74705SXin Li cwd = opts['directory'] 172*67e74705SXin Li cmd = get_arguments([opts['clang'], '--analyze'] + opts['direct_args'] + 173*67e74705SXin Li opts['flags'] + [opts['file'], '-o', output()], 174*67e74705SXin Li cwd) 175*67e74705SXin Li logging.debug('exec command in %s: %s', cwd, ' '.join(cmd)) 176*67e74705SXin Li child = subprocess.Popen(cmd, 177*67e74705SXin Li cwd=cwd, 178*67e74705SXin Li universal_newlines=True, 179*67e74705SXin Li stdout=subprocess.PIPE, 180*67e74705SXin Li stderr=subprocess.STDOUT) 181*67e74705SXin Li output = child.stdout.readlines() 182*67e74705SXin Li child.stdout.close() 183*67e74705SXin Li # do report details if it were asked 184*67e74705SXin Li child.wait() 185*67e74705SXin Li if opts.get('output_failures', False) and child.returncode: 186*67e74705SXin Li error_type = 'crash' if child.returncode & 127 else 'other_error' 187*67e74705SXin Li opts.update({ 188*67e74705SXin Li 'error_type': error_type, 189*67e74705SXin Li 'error_output': output, 190*67e74705SXin Li 'exit_code': child.returncode 191*67e74705SXin Li }) 192*67e74705SXin Li return continuation(opts) 193*67e74705SXin Li # return the output for logging and exit code for testing 194*67e74705SXin Li return {'error_output': output, 'exit_code': child.returncode} 195*67e74705SXin Li 196*67e74705SXin Li 197*67e74705SXin Li@require(['flags', 'force_debug']) 198*67e74705SXin Lidef filter_debug_flags(opts, continuation=run_analyzer): 199*67e74705SXin Li """ Filter out nondebug macros when requested. """ 200*67e74705SXin Li 201*67e74705SXin Li if opts.pop('force_debug'): 202*67e74705SXin Li # lazy implementation just append an undefine macro at the end 203*67e74705SXin Li opts.update({'flags': opts['flags'] + ['-UNDEBUG']}) 204*67e74705SXin Li 205*67e74705SXin Li return continuation(opts) 206*67e74705SXin Li 207*67e74705SXin Li 208*67e74705SXin Li@require(['file', 'directory']) 209*67e74705SXin Lidef set_file_path_relative(opts, continuation=filter_debug_flags): 210*67e74705SXin Li """ Set source file path to relative to the working directory. 211*67e74705SXin Li 212*67e74705SXin Li The only purpose of this function is to pass the SATestBuild.py tests. """ 213*67e74705SXin Li 214*67e74705SXin Li opts.update({'file': os.path.relpath(opts['file'], opts['directory'])}) 215*67e74705SXin Li 216*67e74705SXin Li return continuation(opts) 217*67e74705SXin Li 218*67e74705SXin Li 219*67e74705SXin Li@require(['language', 'compiler', 'file', 'flags']) 220*67e74705SXin Lidef language_check(opts, continuation=set_file_path_relative): 221*67e74705SXin Li """ Find out the language from command line parameters or file name 222*67e74705SXin Li extension. The decision also influenced by the compiler invocation. """ 223*67e74705SXin Li 224*67e74705SXin Li accepted = frozenset({ 225*67e74705SXin Li 'c', 'c++', 'objective-c', 'objective-c++', 'c-cpp-output', 226*67e74705SXin Li 'c++-cpp-output', 'objective-c-cpp-output' 227*67e74705SXin Li }) 228*67e74705SXin Li 229*67e74705SXin Li # language can be given as a parameter... 230*67e74705SXin Li language = opts.pop('language') 231*67e74705SXin Li compiler = opts.pop('compiler') 232*67e74705SXin Li # ... or find out from source file extension 233*67e74705SXin Li if language is None and compiler is not None: 234*67e74705SXin Li language = classify_source(opts['file'], compiler == 'c') 235*67e74705SXin Li 236*67e74705SXin Li if language is None: 237*67e74705SXin Li logging.debug('skip analysis, language not known') 238*67e74705SXin Li return None 239*67e74705SXin Li elif language not in accepted: 240*67e74705SXin Li logging.debug('skip analysis, language not supported') 241*67e74705SXin Li return None 242*67e74705SXin Li else: 243*67e74705SXin Li logging.debug('analysis, language: %s', language) 244*67e74705SXin Li opts.update({'language': language, 245*67e74705SXin Li 'flags': ['-x', language] + opts['flags']}) 246*67e74705SXin Li return continuation(opts) 247*67e74705SXin Li 248*67e74705SXin Li 249*67e74705SXin Li@require(['arch_list', 'flags']) 250*67e74705SXin Lidef arch_check(opts, continuation=language_check): 251*67e74705SXin Li """ Do run analyzer through one of the given architectures. """ 252*67e74705SXin Li 253*67e74705SXin Li disabled = frozenset({'ppc', 'ppc64'}) 254*67e74705SXin Li 255*67e74705SXin Li received_list = opts.pop('arch_list') 256*67e74705SXin Li if received_list: 257*67e74705SXin Li # filter out disabled architectures and -arch switches 258*67e74705SXin Li filtered_list = [a for a in received_list if a not in disabled] 259*67e74705SXin Li if filtered_list: 260*67e74705SXin Li # There should be only one arch given (or the same multiple 261*67e74705SXin Li # times). If there are multiple arch are given and are not 262*67e74705SXin Li # the same, those should not change the pre-processing step. 263*67e74705SXin Li # But that's the only pass we have before run the analyzer. 264*67e74705SXin Li current = filtered_list.pop() 265*67e74705SXin Li logging.debug('analysis, on arch: %s', current) 266*67e74705SXin Li 267*67e74705SXin Li opts.update({'flags': ['-arch', current] + opts['flags']}) 268*67e74705SXin Li return continuation(opts) 269*67e74705SXin Li else: 270*67e74705SXin Li logging.debug('skip analysis, found not supported arch') 271*67e74705SXin Li return None 272*67e74705SXin Li else: 273*67e74705SXin Li logging.debug('analysis, on default arch') 274*67e74705SXin Li return continuation(opts) 275*67e74705SXin Li 276*67e74705SXin Li 277*67e74705SXin Lidef classify_parameters(command): 278*67e74705SXin Li """ Prepare compiler flags (filters some and add others) and take out 279*67e74705SXin Li language (-x) and architecture (-arch) flags for future processing. """ 280*67e74705SXin Li 281*67e74705SXin Li result = { 282*67e74705SXin Li 'flags': [], # the filtered compiler flags 283*67e74705SXin Li 'arch_list': [], # list of architecture flags 284*67e74705SXin Li 'language': None, # compilation language, None, if not specified 285*67e74705SXin Li 'compiler': compiler_language(command) # 'c' or 'c++' 286*67e74705SXin Li } 287*67e74705SXin Li 288*67e74705SXin Li # iterate on the compile options 289*67e74705SXin Li args = iter(command[1:]) 290*67e74705SXin Li for arg in args: 291*67e74705SXin Li # take arch flags into a separate basket 292*67e74705SXin Li if arg == '-arch': 293*67e74705SXin Li result['arch_list'].append(next(args)) 294*67e74705SXin Li # take language 295*67e74705SXin Li elif arg == '-x': 296*67e74705SXin Li result['language'] = next(args) 297*67e74705SXin Li # parameters which looks source file are not flags 298*67e74705SXin Li elif re.match(r'^[^-].+', arg) and classify_source(arg): 299*67e74705SXin Li pass 300*67e74705SXin Li # ignore some flags 301*67e74705SXin Li elif arg in IGNORED_FLAGS: 302*67e74705SXin Li count = IGNORED_FLAGS[arg] 303*67e74705SXin Li for _ in range(count): 304*67e74705SXin Li next(args) 305*67e74705SXin Li # we don't care about extra warnings, but we should suppress ones 306*67e74705SXin Li # that we don't want to see. 307*67e74705SXin Li elif re.match(r'^-W.+', arg) and not re.match(r'^-Wno-.+', arg): 308*67e74705SXin Li pass 309*67e74705SXin Li # and consider everything else as compilation flag. 310*67e74705SXin Li else: 311*67e74705SXin Li result['flags'].append(arg) 312*67e74705SXin Li 313*67e74705SXin Li return result 314