1*7c3d14c8STreehugger Robot#!/usr/bin/env python 2*7c3d14c8STreehugger Robot#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# 3*7c3d14c8STreehugger Robot# 4*7c3d14c8STreehugger Robot# The LLVM Compiler Infrastructure 5*7c3d14c8STreehugger Robot# 6*7c3d14c8STreehugger Robot# This file is distributed under the University of Illinois Open Source 7*7c3d14c8STreehugger Robot# License. See LICENSE.TXT for details. 8*7c3d14c8STreehugger Robot# 9*7c3d14c8STreehugger Robot#===------------------------------------------------------------------------===# 10*7c3d14c8STreehugger Robotimport argparse 11*7c3d14c8STreehugger Robotimport bisect 12*7c3d14c8STreehugger Robotimport getopt 13*7c3d14c8STreehugger Robotimport os 14*7c3d14c8STreehugger Robotimport re 15*7c3d14c8STreehugger Robotimport subprocess 16*7c3d14c8STreehugger Robotimport sys 17*7c3d14c8STreehugger Robot 18*7c3d14c8STreehugger Robotsymbolizers = {} 19*7c3d14c8STreehugger RobotDEBUG = False 20*7c3d14c8STreehugger Robotdemangle = False 21*7c3d14c8STreehugger Robotbinutils_prefix = None 22*7c3d14c8STreehugger Robotsysroot_path = None 23*7c3d14c8STreehugger Robotbinary_name_filter = None 24*7c3d14c8STreehugger Robotfix_filename_patterns = None 25*7c3d14c8STreehugger Robotlogfile = sys.stdin 26*7c3d14c8STreehugger Robotallow_system_symbolizer = True 27*7c3d14c8STreehugger Robot 28*7c3d14c8STreehugger Robot# FIXME: merge the code that calls fix_filename(). 29*7c3d14c8STreehugger Robotdef fix_filename(file_name): 30*7c3d14c8STreehugger Robot if fix_filename_patterns: 31*7c3d14c8STreehugger Robot for path_to_cut in fix_filename_patterns: 32*7c3d14c8STreehugger Robot file_name = re.sub('.*' + path_to_cut, '', file_name) 33*7c3d14c8STreehugger Robot file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) 34*7c3d14c8STreehugger Robot file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) 35*7c3d14c8STreehugger Robot return file_name 36*7c3d14c8STreehugger Robot 37*7c3d14c8STreehugger Robotdef sysroot_path_filter(binary_name): 38*7c3d14c8STreehugger Robot return sysroot_path + binary_name 39*7c3d14c8STreehugger Robot 40*7c3d14c8STreehugger Robotdef guess_arch(addr): 41*7c3d14c8STreehugger Robot # Guess which arch we're running. 10 = len('0x') + 8 hex digits. 42*7c3d14c8STreehugger Robot if len(addr) > 10: 43*7c3d14c8STreehugger Robot return 'x86_64' 44*7c3d14c8STreehugger Robot else: 45*7c3d14c8STreehugger Robot return 'i386' 46*7c3d14c8STreehugger Robot 47*7c3d14c8STreehugger Robotclass Symbolizer(object): 48*7c3d14c8STreehugger Robot def __init__(self): 49*7c3d14c8STreehugger Robot pass 50*7c3d14c8STreehugger Robot 51*7c3d14c8STreehugger Robot def symbolize(self, addr, binary, offset): 52*7c3d14c8STreehugger Robot """Symbolize the given address (pair of binary and offset). 53*7c3d14c8STreehugger Robot 54*7c3d14c8STreehugger Robot Overriden in subclasses. 55*7c3d14c8STreehugger Robot Args: 56*7c3d14c8STreehugger Robot addr: virtual address of an instruction. 57*7c3d14c8STreehugger Robot binary: path to executable/shared object containing this instruction. 58*7c3d14c8STreehugger Robot offset: instruction offset in the @binary. 59*7c3d14c8STreehugger Robot Returns: 60*7c3d14c8STreehugger Robot list of strings (one string for each inlined frame) describing 61*7c3d14c8STreehugger Robot the code locations for this instruction (that is, function name, file 62*7c3d14c8STreehugger Robot name, line and column numbers). 63*7c3d14c8STreehugger Robot """ 64*7c3d14c8STreehugger Robot return None 65*7c3d14c8STreehugger Robot 66*7c3d14c8STreehugger Robot 67*7c3d14c8STreehugger Robotclass LLVMSymbolizer(Symbolizer): 68*7c3d14c8STreehugger Robot def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]): 69*7c3d14c8STreehugger Robot super(LLVMSymbolizer, self).__init__() 70*7c3d14c8STreehugger Robot self.symbolizer_path = symbolizer_path 71*7c3d14c8STreehugger Robot self.default_arch = default_arch 72*7c3d14c8STreehugger Robot self.system = system 73*7c3d14c8STreehugger Robot self.dsym_hints = dsym_hints 74*7c3d14c8STreehugger Robot self.pipe = self.open_llvm_symbolizer() 75*7c3d14c8STreehugger Robot 76*7c3d14c8STreehugger Robot def open_llvm_symbolizer(self): 77*7c3d14c8STreehugger Robot cmd = [self.symbolizer_path, 78*7c3d14c8STreehugger Robot '--use-symbol-table=true', 79*7c3d14c8STreehugger Robot '--demangle=%s' % demangle, 80*7c3d14c8STreehugger Robot '--functions=linkage', 81*7c3d14c8STreehugger Robot '--inlining=true', 82*7c3d14c8STreehugger Robot '--default-arch=%s' % self.default_arch] 83*7c3d14c8STreehugger Robot if self.system == 'Darwin': 84*7c3d14c8STreehugger Robot for hint in self.dsym_hints: 85*7c3d14c8STreehugger Robot cmd.append('--dsym-hint=%s' % hint) 86*7c3d14c8STreehugger Robot if DEBUG: 87*7c3d14c8STreehugger Robot print ' '.join(cmd) 88*7c3d14c8STreehugger Robot try: 89*7c3d14c8STreehugger Robot result = subprocess.Popen(cmd, stdin=subprocess.PIPE, 90*7c3d14c8STreehugger Robot stdout=subprocess.PIPE) 91*7c3d14c8STreehugger Robot except OSError: 92*7c3d14c8STreehugger Robot result = None 93*7c3d14c8STreehugger Robot return result 94*7c3d14c8STreehugger Robot 95*7c3d14c8STreehugger Robot def symbolize(self, addr, binary, offset): 96*7c3d14c8STreehugger Robot """Overrides Symbolizer.symbolize.""" 97*7c3d14c8STreehugger Robot if not self.pipe: 98*7c3d14c8STreehugger Robot return None 99*7c3d14c8STreehugger Robot result = [] 100*7c3d14c8STreehugger Robot try: 101*7c3d14c8STreehugger Robot symbolizer_input = '"%s" %s' % (binary, offset) 102*7c3d14c8STreehugger Robot if DEBUG: 103*7c3d14c8STreehugger Robot print symbolizer_input 104*7c3d14c8STreehugger Robot print >> self.pipe.stdin, symbolizer_input 105*7c3d14c8STreehugger Robot while True: 106*7c3d14c8STreehugger Robot function_name = self.pipe.stdout.readline().rstrip() 107*7c3d14c8STreehugger Robot if not function_name: 108*7c3d14c8STreehugger Robot break 109*7c3d14c8STreehugger Robot file_name = self.pipe.stdout.readline().rstrip() 110*7c3d14c8STreehugger Robot file_name = fix_filename(file_name) 111*7c3d14c8STreehugger Robot if (not function_name.startswith('??') or 112*7c3d14c8STreehugger Robot not file_name.startswith('??')): 113*7c3d14c8STreehugger Robot # Append only non-trivial frames. 114*7c3d14c8STreehugger Robot result.append('%s in %s %s' % (addr, function_name, 115*7c3d14c8STreehugger Robot file_name)) 116*7c3d14c8STreehugger Robot except Exception: 117*7c3d14c8STreehugger Robot result = [] 118*7c3d14c8STreehugger Robot if not result: 119*7c3d14c8STreehugger Robot result = None 120*7c3d14c8STreehugger Robot return result 121*7c3d14c8STreehugger Robot 122*7c3d14c8STreehugger Robot 123*7c3d14c8STreehugger Robotdef LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]): 124*7c3d14c8STreehugger Robot symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') 125*7c3d14c8STreehugger Robot if not symbolizer_path: 126*7c3d14c8STreehugger Robot symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH') 127*7c3d14c8STreehugger Robot if not symbolizer_path: 128*7c3d14c8STreehugger Robot # Assume llvm-symbolizer is in PATH. 129*7c3d14c8STreehugger Robot symbolizer_path = 'llvm-symbolizer' 130*7c3d14c8STreehugger Robot return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints) 131*7c3d14c8STreehugger Robot 132*7c3d14c8STreehugger Robot 133*7c3d14c8STreehugger Robotclass Addr2LineSymbolizer(Symbolizer): 134*7c3d14c8STreehugger Robot def __init__(self, binary): 135*7c3d14c8STreehugger Robot super(Addr2LineSymbolizer, self).__init__() 136*7c3d14c8STreehugger Robot self.binary = binary 137*7c3d14c8STreehugger Robot self.pipe = self.open_addr2line() 138*7c3d14c8STreehugger Robot self.output_terminator = -1 139*7c3d14c8STreehugger Robot 140*7c3d14c8STreehugger Robot def open_addr2line(self): 141*7c3d14c8STreehugger Robot addr2line_tool = 'addr2line' 142*7c3d14c8STreehugger Robot if binutils_prefix: 143*7c3d14c8STreehugger Robot addr2line_tool = binutils_prefix + addr2line_tool 144*7c3d14c8STreehugger Robot cmd = [addr2line_tool, '-fi'] 145*7c3d14c8STreehugger Robot if demangle: 146*7c3d14c8STreehugger Robot cmd += ['--demangle'] 147*7c3d14c8STreehugger Robot cmd += ['-e', self.binary] 148*7c3d14c8STreehugger Robot if DEBUG: 149*7c3d14c8STreehugger Robot print ' '.join(cmd) 150*7c3d14c8STreehugger Robot return subprocess.Popen(cmd, 151*7c3d14c8STreehugger Robot stdin=subprocess.PIPE, stdout=subprocess.PIPE) 152*7c3d14c8STreehugger Robot 153*7c3d14c8STreehugger Robot def symbolize(self, addr, binary, offset): 154*7c3d14c8STreehugger Robot """Overrides Symbolizer.symbolize.""" 155*7c3d14c8STreehugger Robot if self.binary != binary: 156*7c3d14c8STreehugger Robot return None 157*7c3d14c8STreehugger Robot lines = [] 158*7c3d14c8STreehugger Robot try: 159*7c3d14c8STreehugger Robot print >> self.pipe.stdin, offset 160*7c3d14c8STreehugger Robot print >> self.pipe.stdin, self.output_terminator 161*7c3d14c8STreehugger Robot is_first_frame = True 162*7c3d14c8STreehugger Robot while True: 163*7c3d14c8STreehugger Robot function_name = self.pipe.stdout.readline().rstrip() 164*7c3d14c8STreehugger Robot file_name = self.pipe.stdout.readline().rstrip() 165*7c3d14c8STreehugger Robot if is_first_frame: 166*7c3d14c8STreehugger Robot is_first_frame = False 167*7c3d14c8STreehugger Robot elif function_name in ['', '??']: 168*7c3d14c8STreehugger Robot assert file_name == function_name 169*7c3d14c8STreehugger Robot break 170*7c3d14c8STreehugger Robot lines.append((function_name, file_name)); 171*7c3d14c8STreehugger Robot except Exception: 172*7c3d14c8STreehugger Robot lines.append(('??', '??:0')) 173*7c3d14c8STreehugger Robot return ['%s in %s %s' % (addr, function, fix_filename(file)) for (function, file) in lines] 174*7c3d14c8STreehugger Robot 175*7c3d14c8STreehugger Robotclass UnbufferedLineConverter(object): 176*7c3d14c8STreehugger Robot """ 177*7c3d14c8STreehugger Robot Wrap a child process that responds to each line of input with one line of 178*7c3d14c8STreehugger Robot output. Uses pty to trick the child into providing unbuffered output. 179*7c3d14c8STreehugger Robot """ 180*7c3d14c8STreehugger Robot def __init__(self, args, close_stderr=False): 181*7c3d14c8STreehugger Robot # Local imports so that the script can start on Windows. 182*7c3d14c8STreehugger Robot import pty 183*7c3d14c8STreehugger Robot import termios 184*7c3d14c8STreehugger Robot pid, fd = pty.fork() 185*7c3d14c8STreehugger Robot if pid == 0: 186*7c3d14c8STreehugger Robot # We're the child. Transfer control to command. 187*7c3d14c8STreehugger Robot if close_stderr: 188*7c3d14c8STreehugger Robot dev_null = os.open('/dev/null', 0) 189*7c3d14c8STreehugger Robot os.dup2(dev_null, 2) 190*7c3d14c8STreehugger Robot os.execvp(args[0], args) 191*7c3d14c8STreehugger Robot else: 192*7c3d14c8STreehugger Robot # Disable echoing. 193*7c3d14c8STreehugger Robot attr = termios.tcgetattr(fd) 194*7c3d14c8STreehugger Robot attr[3] = attr[3] & ~termios.ECHO 195*7c3d14c8STreehugger Robot termios.tcsetattr(fd, termios.TCSANOW, attr) 196*7c3d14c8STreehugger Robot # Set up a file()-like interface to the child process 197*7c3d14c8STreehugger Robot self.r = os.fdopen(fd, "r", 1) 198*7c3d14c8STreehugger Robot self.w = os.fdopen(os.dup(fd), "w", 1) 199*7c3d14c8STreehugger Robot 200*7c3d14c8STreehugger Robot def convert(self, line): 201*7c3d14c8STreehugger Robot self.w.write(line + "\n") 202*7c3d14c8STreehugger Robot return self.readline() 203*7c3d14c8STreehugger Robot 204*7c3d14c8STreehugger Robot def readline(self): 205*7c3d14c8STreehugger Robot return self.r.readline().rstrip() 206*7c3d14c8STreehugger Robot 207*7c3d14c8STreehugger Robot 208*7c3d14c8STreehugger Robotclass DarwinSymbolizer(Symbolizer): 209*7c3d14c8STreehugger Robot def __init__(self, addr, binary): 210*7c3d14c8STreehugger Robot super(DarwinSymbolizer, self).__init__() 211*7c3d14c8STreehugger Robot self.binary = binary 212*7c3d14c8STreehugger Robot self.arch = guess_arch(addr) 213*7c3d14c8STreehugger Robot self.open_atos() 214*7c3d14c8STreehugger Robot 215*7c3d14c8STreehugger Robot def open_atos(self): 216*7c3d14c8STreehugger Robot if DEBUG: 217*7c3d14c8STreehugger Robot print 'atos -o %s -arch %s' % (self.binary, self.arch) 218*7c3d14c8STreehugger Robot cmdline = ['atos', '-o', self.binary, '-arch', self.arch] 219*7c3d14c8STreehugger Robot self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) 220*7c3d14c8STreehugger Robot 221*7c3d14c8STreehugger Robot def symbolize(self, addr, binary, offset): 222*7c3d14c8STreehugger Robot """Overrides Symbolizer.symbolize.""" 223*7c3d14c8STreehugger Robot if self.binary != binary: 224*7c3d14c8STreehugger Robot return None 225*7c3d14c8STreehugger Robot atos_line = self.atos.convert('0x%x' % int(offset, 16)) 226*7c3d14c8STreehugger Robot while "got symbolicator for" in atos_line: 227*7c3d14c8STreehugger Robot atos_line = self.atos.readline() 228*7c3d14c8STreehugger Robot # A well-formed atos response looks like this: 229*7c3d14c8STreehugger Robot # foo(type1, type2) (in object.name) (filename.cc:80) 230*7c3d14c8STreehugger Robot match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) 231*7c3d14c8STreehugger Robot if DEBUG: 232*7c3d14c8STreehugger Robot print 'atos_line: ', atos_line 233*7c3d14c8STreehugger Robot if match: 234*7c3d14c8STreehugger Robot function_name = match.group(1) 235*7c3d14c8STreehugger Robot function_name = re.sub('\(.*?\)', '', function_name) 236*7c3d14c8STreehugger Robot file_name = fix_filename(match.group(3)) 237*7c3d14c8STreehugger Robot return ['%s in %s %s' % (addr, function_name, file_name)] 238*7c3d14c8STreehugger Robot else: 239*7c3d14c8STreehugger Robot return ['%s in %s' % (addr, atos_line)] 240*7c3d14c8STreehugger Robot 241*7c3d14c8STreehugger Robot 242*7c3d14c8STreehugger Robot# Chain several symbolizers so that if one symbolizer fails, we fall back 243*7c3d14c8STreehugger Robot# to the next symbolizer in chain. 244*7c3d14c8STreehugger Robotclass ChainSymbolizer(Symbolizer): 245*7c3d14c8STreehugger Robot def __init__(self, symbolizer_list): 246*7c3d14c8STreehugger Robot super(ChainSymbolizer, self).__init__() 247*7c3d14c8STreehugger Robot self.symbolizer_list = symbolizer_list 248*7c3d14c8STreehugger Robot 249*7c3d14c8STreehugger Robot def symbolize(self, addr, binary, offset): 250*7c3d14c8STreehugger Robot """Overrides Symbolizer.symbolize.""" 251*7c3d14c8STreehugger Robot for symbolizer in self.symbolizer_list: 252*7c3d14c8STreehugger Robot if symbolizer: 253*7c3d14c8STreehugger Robot result = symbolizer.symbolize(addr, binary, offset) 254*7c3d14c8STreehugger Robot if result: 255*7c3d14c8STreehugger Robot return result 256*7c3d14c8STreehugger Robot return None 257*7c3d14c8STreehugger Robot 258*7c3d14c8STreehugger Robot def append_symbolizer(self, symbolizer): 259*7c3d14c8STreehugger Robot self.symbolizer_list.append(symbolizer) 260*7c3d14c8STreehugger Robot 261*7c3d14c8STreehugger Robot 262*7c3d14c8STreehugger Robotdef BreakpadSymbolizerFactory(binary): 263*7c3d14c8STreehugger Robot suffix = os.getenv('BREAKPAD_SUFFIX') 264*7c3d14c8STreehugger Robot if suffix: 265*7c3d14c8STreehugger Robot filename = binary + suffix 266*7c3d14c8STreehugger Robot if os.access(filename, os.F_OK): 267*7c3d14c8STreehugger Robot return BreakpadSymbolizer(filename) 268*7c3d14c8STreehugger Robot return None 269*7c3d14c8STreehugger Robot 270*7c3d14c8STreehugger Robot 271*7c3d14c8STreehugger Robotdef SystemSymbolizerFactory(system, addr, binary): 272*7c3d14c8STreehugger Robot if system == 'Darwin': 273*7c3d14c8STreehugger Robot return DarwinSymbolizer(addr, binary) 274*7c3d14c8STreehugger Robot elif system == 'Linux' or system == 'FreeBSD': 275*7c3d14c8STreehugger Robot return Addr2LineSymbolizer(binary) 276*7c3d14c8STreehugger Robot 277*7c3d14c8STreehugger Robot 278*7c3d14c8STreehugger Robotclass BreakpadSymbolizer(Symbolizer): 279*7c3d14c8STreehugger Robot def __init__(self, filename): 280*7c3d14c8STreehugger Robot super(BreakpadSymbolizer, self).__init__() 281*7c3d14c8STreehugger Robot self.filename = filename 282*7c3d14c8STreehugger Robot lines = file(filename).readlines() 283*7c3d14c8STreehugger Robot self.files = [] 284*7c3d14c8STreehugger Robot self.symbols = {} 285*7c3d14c8STreehugger Robot self.address_list = [] 286*7c3d14c8STreehugger Robot self.addresses = {} 287*7c3d14c8STreehugger Robot # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t 288*7c3d14c8STreehugger Robot fragments = lines[0].rstrip().split() 289*7c3d14c8STreehugger Robot self.arch = fragments[2] 290*7c3d14c8STreehugger Robot self.debug_id = fragments[3] 291*7c3d14c8STreehugger Robot self.binary = ' '.join(fragments[4:]) 292*7c3d14c8STreehugger Robot self.parse_lines(lines[1:]) 293*7c3d14c8STreehugger Robot 294*7c3d14c8STreehugger Robot def parse_lines(self, lines): 295*7c3d14c8STreehugger Robot cur_function_addr = '' 296*7c3d14c8STreehugger Robot for line in lines: 297*7c3d14c8STreehugger Robot fragments = line.split() 298*7c3d14c8STreehugger Robot if fragments[0] == 'FILE': 299*7c3d14c8STreehugger Robot assert int(fragments[1]) == len(self.files) 300*7c3d14c8STreehugger Robot self.files.append(' '.join(fragments[2:])) 301*7c3d14c8STreehugger Robot elif fragments[0] == 'PUBLIC': 302*7c3d14c8STreehugger Robot self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) 303*7c3d14c8STreehugger Robot elif fragments[0] in ['CFI', 'STACK']: 304*7c3d14c8STreehugger Robot pass 305*7c3d14c8STreehugger Robot elif fragments[0] == 'FUNC': 306*7c3d14c8STreehugger Robot cur_function_addr = int(fragments[1], 16) 307*7c3d14c8STreehugger Robot if not cur_function_addr in self.symbols.keys(): 308*7c3d14c8STreehugger Robot self.symbols[cur_function_addr] = ' '.join(fragments[4:]) 309*7c3d14c8STreehugger Robot else: 310*7c3d14c8STreehugger Robot # Line starting with an address. 311*7c3d14c8STreehugger Robot addr = int(fragments[0], 16) 312*7c3d14c8STreehugger Robot self.address_list.append(addr) 313*7c3d14c8STreehugger Robot # Tuple of symbol address, size, line, file number. 314*7c3d14c8STreehugger Robot self.addresses[addr] = (cur_function_addr, 315*7c3d14c8STreehugger Robot int(fragments[1], 16), 316*7c3d14c8STreehugger Robot int(fragments[2]), 317*7c3d14c8STreehugger Robot int(fragments[3])) 318*7c3d14c8STreehugger Robot self.address_list.sort() 319*7c3d14c8STreehugger Robot 320*7c3d14c8STreehugger Robot def get_sym_file_line(self, addr): 321*7c3d14c8STreehugger Robot key = None 322*7c3d14c8STreehugger Robot if addr in self.addresses.keys(): 323*7c3d14c8STreehugger Robot key = addr 324*7c3d14c8STreehugger Robot else: 325*7c3d14c8STreehugger Robot index = bisect.bisect_left(self.address_list, addr) 326*7c3d14c8STreehugger Robot if index == 0: 327*7c3d14c8STreehugger Robot return None 328*7c3d14c8STreehugger Robot else: 329*7c3d14c8STreehugger Robot key = self.address_list[index - 1] 330*7c3d14c8STreehugger Robot sym_id, size, line_no, file_no = self.addresses[key] 331*7c3d14c8STreehugger Robot symbol = self.symbols[sym_id] 332*7c3d14c8STreehugger Robot filename = self.files[file_no] 333*7c3d14c8STreehugger Robot if addr < key + size: 334*7c3d14c8STreehugger Robot return symbol, filename, line_no 335*7c3d14c8STreehugger Robot else: 336*7c3d14c8STreehugger Robot return None 337*7c3d14c8STreehugger Robot 338*7c3d14c8STreehugger Robot def symbolize(self, addr, binary, offset): 339*7c3d14c8STreehugger Robot if self.binary != binary: 340*7c3d14c8STreehugger Robot return None 341*7c3d14c8STreehugger Robot res = self.get_sym_file_line(int(offset, 16)) 342*7c3d14c8STreehugger Robot if res: 343*7c3d14c8STreehugger Robot function_name, file_name, line_no = res 344*7c3d14c8STreehugger Robot result = ['%s in %s %s:%d' % ( 345*7c3d14c8STreehugger Robot addr, function_name, file_name, line_no)] 346*7c3d14c8STreehugger Robot print result 347*7c3d14c8STreehugger Robot return result 348*7c3d14c8STreehugger Robot else: 349*7c3d14c8STreehugger Robot return None 350*7c3d14c8STreehugger Robot 351*7c3d14c8STreehugger Robot 352*7c3d14c8STreehugger Robotclass SymbolizationLoop(object): 353*7c3d14c8STreehugger Robot def __init__(self, binary_name_filter=None, dsym_hint_producer=None): 354*7c3d14c8STreehugger Robot if sys.platform == 'win32': 355*7c3d14c8STreehugger Robot # ASan on Windows uses dbghelp.dll to symbolize in-process, which works 356*7c3d14c8STreehugger Robot # even in sandboxed processes. Nothing needs to be done here. 357*7c3d14c8STreehugger Robot self.process_line = self.process_line_echo 358*7c3d14c8STreehugger Robot else: 359*7c3d14c8STreehugger Robot # Used by clients who may want to supply a different binary name. 360*7c3d14c8STreehugger Robot # E.g. in Chrome several binaries may share a single .dSYM. 361*7c3d14c8STreehugger Robot self.binary_name_filter = binary_name_filter 362*7c3d14c8STreehugger Robot self.dsym_hint_producer = dsym_hint_producer 363*7c3d14c8STreehugger Robot self.system = os.uname()[0] 364*7c3d14c8STreehugger Robot if self.system not in ['Linux', 'Darwin', 'FreeBSD']: 365*7c3d14c8STreehugger Robot raise Exception('Unknown system') 366*7c3d14c8STreehugger Robot self.llvm_symbolizers = {} 367*7c3d14c8STreehugger Robot self.last_llvm_symbolizer = None 368*7c3d14c8STreehugger Robot self.dsym_hints = set([]) 369*7c3d14c8STreehugger Robot self.frame_no = 0 370*7c3d14c8STreehugger Robot self.process_line = self.process_line_posix 371*7c3d14c8STreehugger Robot 372*7c3d14c8STreehugger Robot def symbolize_address(self, addr, binary, offset): 373*7c3d14c8STreehugger Robot # On non-Darwin (i.e. on platforms without .dSYM debug info) always use 374*7c3d14c8STreehugger Robot # a single symbolizer binary. 375*7c3d14c8STreehugger Robot # On Darwin, if the dsym hint producer is present: 376*7c3d14c8STreehugger Robot # 1. check whether we've seen this binary already; if so, 377*7c3d14c8STreehugger Robot # use |llvm_symbolizers[binary]|, which has already loaded the debug 378*7c3d14c8STreehugger Robot # info for this binary (might not be the case for 379*7c3d14c8STreehugger Robot # |last_llvm_symbolizer|); 380*7c3d14c8STreehugger Robot # 2. otherwise check if we've seen all the hints for this binary already; 381*7c3d14c8STreehugger Robot # if so, reuse |last_llvm_symbolizer| which has the full set of hints; 382*7c3d14c8STreehugger Robot # 3. otherwise create a new symbolizer and pass all currently known 383*7c3d14c8STreehugger Robot # .dSYM hints to it. 384*7c3d14c8STreehugger Robot if not binary in self.llvm_symbolizers: 385*7c3d14c8STreehugger Robot use_new_symbolizer = True 386*7c3d14c8STreehugger Robot if self.system == 'Darwin' and self.dsym_hint_producer: 387*7c3d14c8STreehugger Robot dsym_hints_for_binary = set(self.dsym_hint_producer(binary)) 388*7c3d14c8STreehugger Robot use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints) 389*7c3d14c8STreehugger Robot self.dsym_hints |= dsym_hints_for_binary 390*7c3d14c8STreehugger Robot if self.last_llvm_symbolizer and not use_new_symbolizer: 391*7c3d14c8STreehugger Robot self.llvm_symbolizers[binary] = self.last_llvm_symbolizer 392*7c3d14c8STreehugger Robot else: 393*7c3d14c8STreehugger Robot self.last_llvm_symbolizer = LLVMSymbolizerFactory( 394*7c3d14c8STreehugger Robot self.system, guess_arch(addr), self.dsym_hints) 395*7c3d14c8STreehugger Robot self.llvm_symbolizers[binary] = self.last_llvm_symbolizer 396*7c3d14c8STreehugger Robot # Use the chain of symbolizers: 397*7c3d14c8STreehugger Robot # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos 398*7c3d14c8STreehugger Robot # (fall back to next symbolizer if the previous one fails). 399*7c3d14c8STreehugger Robot if not binary in symbolizers: 400*7c3d14c8STreehugger Robot symbolizers[binary] = ChainSymbolizer( 401*7c3d14c8STreehugger Robot [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]]) 402*7c3d14c8STreehugger Robot result = symbolizers[binary].symbolize(addr, binary, offset) 403*7c3d14c8STreehugger Robot if result is None: 404*7c3d14c8STreehugger Robot if not allow_system_symbolizer: 405*7c3d14c8STreehugger Robot raise Exception('Failed to launch or use llvm-symbolizer.') 406*7c3d14c8STreehugger Robot # Initialize system symbolizer only if other symbolizers failed. 407*7c3d14c8STreehugger Robot symbolizers[binary].append_symbolizer( 408*7c3d14c8STreehugger Robot SystemSymbolizerFactory(self.system, addr, binary)) 409*7c3d14c8STreehugger Robot result = symbolizers[binary].symbolize(addr, binary, offset) 410*7c3d14c8STreehugger Robot # The system symbolizer must produce some result. 411*7c3d14c8STreehugger Robot assert result 412*7c3d14c8STreehugger Robot return result 413*7c3d14c8STreehugger Robot 414*7c3d14c8STreehugger Robot def get_symbolized_lines(self, symbolized_lines): 415*7c3d14c8STreehugger Robot if not symbolized_lines: 416*7c3d14c8STreehugger Robot return [self.current_line] 417*7c3d14c8STreehugger Robot else: 418*7c3d14c8STreehugger Robot result = [] 419*7c3d14c8STreehugger Robot for symbolized_frame in symbolized_lines: 420*7c3d14c8STreehugger Robot result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstrip())) 421*7c3d14c8STreehugger Robot self.frame_no += 1 422*7c3d14c8STreehugger Robot return result 423*7c3d14c8STreehugger Robot 424*7c3d14c8STreehugger Robot def process_logfile(self): 425*7c3d14c8STreehugger Robot self.frame_no = 0 426*7c3d14c8STreehugger Robot for line in logfile: 427*7c3d14c8STreehugger Robot processed = self.process_line(line) 428*7c3d14c8STreehugger Robot print '\n'.join(processed) 429*7c3d14c8STreehugger Robot 430*7c3d14c8STreehugger Robot def process_line_echo(self, line): 431*7c3d14c8STreehugger Robot return [line.rstrip()] 432*7c3d14c8STreehugger Robot 433*7c3d14c8STreehugger Robot def process_line_posix(self, line): 434*7c3d14c8STreehugger Robot self.current_line = line.rstrip() 435*7c3d14c8STreehugger Robot #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 436*7c3d14c8STreehugger Robot stack_trace_line_format = ( 437*7c3d14c8STreehugger Robot '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') 438*7c3d14c8STreehugger Robot match = re.match(stack_trace_line_format, line) 439*7c3d14c8STreehugger Robot if not match: 440*7c3d14c8STreehugger Robot return [self.current_line] 441*7c3d14c8STreehugger Robot if DEBUG: 442*7c3d14c8STreehugger Robot print line 443*7c3d14c8STreehugger Robot _, frameno_str, addr, binary, offset = match.groups() 444*7c3d14c8STreehugger Robot if frameno_str == '0': 445*7c3d14c8STreehugger Robot # Assume that frame #0 is the first frame of new stack trace. 446*7c3d14c8STreehugger Robot self.frame_no = 0 447*7c3d14c8STreehugger Robot original_binary = binary 448*7c3d14c8STreehugger Robot if self.binary_name_filter: 449*7c3d14c8STreehugger Robot binary = self.binary_name_filter(binary) 450*7c3d14c8STreehugger Robot symbolized_line = self.symbolize_address(addr, binary, offset) 451*7c3d14c8STreehugger Robot if not symbolized_line: 452*7c3d14c8STreehugger Robot if original_binary != binary: 453*7c3d14c8STreehugger Robot symbolized_line = self.symbolize_address(addr, binary, offset) 454*7c3d14c8STreehugger Robot return self.get_symbolized_lines(symbolized_line) 455*7c3d14c8STreehugger Robot 456*7c3d14c8STreehugger Robot 457*7c3d14c8STreehugger Robotif __name__ == '__main__': 458*7c3d14c8STreehugger Robot parser = argparse.ArgumentParser( 459*7c3d14c8STreehugger Robot formatter_class=argparse.RawDescriptionHelpFormatter, 460*7c3d14c8STreehugger Robot description='ASan symbolization script', 461*7c3d14c8STreehugger Robot epilog='Example of use:\n' 462*7c3d14c8STreehugger Robot 'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" ' 463*7c3d14c8STreehugger Robot '-s "$HOME/SymbolFiles" < asan.log') 464*7c3d14c8STreehugger Robot parser.add_argument('path_to_cut', nargs='*', 465*7c3d14c8STreehugger Robot help='pattern to be cut from the result file path ') 466*7c3d14c8STreehugger Robot parser.add_argument('-d','--demangle', action='store_true', 467*7c3d14c8STreehugger Robot help='demangle function names') 468*7c3d14c8STreehugger Robot parser.add_argument('-s', metavar='SYSROOT', 469*7c3d14c8STreehugger Robot help='set path to sysroot for sanitized binaries') 470*7c3d14c8STreehugger Robot parser.add_argument('-c', metavar='CROSS_COMPILE', 471*7c3d14c8STreehugger Robot help='set prefix for binutils') 472*7c3d14c8STreehugger Robot parser.add_argument('-l','--logfile', default=sys.stdin, 473*7c3d14c8STreehugger Robot type=argparse.FileType('r'), 474*7c3d14c8STreehugger Robot help='set log file name to parse, default is stdin') 475*7c3d14c8STreehugger Robot args = parser.parse_args() 476*7c3d14c8STreehugger Robot if args.path_to_cut: 477*7c3d14c8STreehugger Robot fix_filename_patterns = args.path_to_cut 478*7c3d14c8STreehugger Robot if args.demangle: 479*7c3d14c8STreehugger Robot demangle = True 480*7c3d14c8STreehugger Robot if args.s: 481*7c3d14c8STreehugger Robot binary_name_filter = sysroot_path_filter 482*7c3d14c8STreehugger Robot sysroot_path = args.s 483*7c3d14c8STreehugger Robot if args.c: 484*7c3d14c8STreehugger Robot binutils_prefix = args.c 485*7c3d14c8STreehugger Robot if args.logfile: 486*7c3d14c8STreehugger Robot logfile = args.logfile 487*7c3d14c8STreehugger Robot else: 488*7c3d14c8STreehugger Robot logfile = sys.stdin 489*7c3d14c8STreehugger Robot loop = SymbolizationLoop(binary_name_filter) 490*7c3d14c8STreehugger Robot loop.process_logfile() 491