xref: /aosp_15_r20/external/compiler-rt/lib/asan/scripts/asan_symbolize.py (revision 7c3d14c8b49c529e04be81a3ce6f5cc23712e4c6)
1*7c3d14c8STreehugger Robot#!/usr/bin/env python
2*7c3d14c8STreehugger Robot#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
3*7c3d14c8STreehugger Robot#
4*7c3d14c8STreehugger Robot#                     The LLVM Compiler Infrastructure
5*7c3d14c8STreehugger Robot#
6*7c3d14c8STreehugger Robot# This file is distributed under the University of Illinois Open Source
7*7c3d14c8STreehugger Robot# License. See LICENSE.TXT for details.
8*7c3d14c8STreehugger Robot#
9*7c3d14c8STreehugger Robot#===------------------------------------------------------------------------===#
10*7c3d14c8STreehugger Robotimport argparse
11*7c3d14c8STreehugger Robotimport bisect
12*7c3d14c8STreehugger Robotimport getopt
13*7c3d14c8STreehugger Robotimport os
14*7c3d14c8STreehugger Robotimport re
15*7c3d14c8STreehugger Robotimport subprocess
16*7c3d14c8STreehugger Robotimport sys
17*7c3d14c8STreehugger Robot
18*7c3d14c8STreehugger Robotsymbolizers = {}
19*7c3d14c8STreehugger RobotDEBUG = False
20*7c3d14c8STreehugger Robotdemangle = False
21*7c3d14c8STreehugger Robotbinutils_prefix = None
22*7c3d14c8STreehugger Robotsysroot_path = None
23*7c3d14c8STreehugger Robotbinary_name_filter = None
24*7c3d14c8STreehugger Robotfix_filename_patterns = None
25*7c3d14c8STreehugger Robotlogfile = sys.stdin
26*7c3d14c8STreehugger Robotallow_system_symbolizer = True
27*7c3d14c8STreehugger Robot
28*7c3d14c8STreehugger Robot# FIXME: merge the code that calls fix_filename().
29*7c3d14c8STreehugger Robotdef fix_filename(file_name):
30*7c3d14c8STreehugger Robot  if fix_filename_patterns:
31*7c3d14c8STreehugger Robot    for path_to_cut in fix_filename_patterns:
32*7c3d14c8STreehugger Robot      file_name = re.sub('.*' + path_to_cut, '', file_name)
33*7c3d14c8STreehugger Robot  file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
34*7c3d14c8STreehugger Robot  file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
35*7c3d14c8STreehugger Robot  return file_name
36*7c3d14c8STreehugger Robot
37*7c3d14c8STreehugger Robotdef sysroot_path_filter(binary_name):
38*7c3d14c8STreehugger Robot  return sysroot_path + binary_name
39*7c3d14c8STreehugger Robot
40*7c3d14c8STreehugger Robotdef guess_arch(addr):
41*7c3d14c8STreehugger Robot  # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
42*7c3d14c8STreehugger Robot  if len(addr) > 10:
43*7c3d14c8STreehugger Robot    return 'x86_64'
44*7c3d14c8STreehugger Robot  else:
45*7c3d14c8STreehugger Robot    return 'i386'
46*7c3d14c8STreehugger Robot
47*7c3d14c8STreehugger Robotclass Symbolizer(object):
48*7c3d14c8STreehugger Robot  def __init__(self):
49*7c3d14c8STreehugger Robot    pass
50*7c3d14c8STreehugger Robot
51*7c3d14c8STreehugger Robot  def symbolize(self, addr, binary, offset):
52*7c3d14c8STreehugger Robot    """Symbolize the given address (pair of binary and offset).
53*7c3d14c8STreehugger Robot
54*7c3d14c8STreehugger Robot    Overriden in subclasses.
55*7c3d14c8STreehugger Robot    Args:
56*7c3d14c8STreehugger Robot        addr: virtual address of an instruction.
57*7c3d14c8STreehugger Robot        binary: path to executable/shared object containing this instruction.
58*7c3d14c8STreehugger Robot        offset: instruction offset in the @binary.
59*7c3d14c8STreehugger Robot    Returns:
60*7c3d14c8STreehugger Robot        list of strings (one string for each inlined frame) describing
61*7c3d14c8STreehugger Robot        the code locations for this instruction (that is, function name, file
62*7c3d14c8STreehugger Robot        name, line and column numbers).
63*7c3d14c8STreehugger Robot    """
64*7c3d14c8STreehugger Robot    return None
65*7c3d14c8STreehugger Robot
66*7c3d14c8STreehugger Robot
67*7c3d14c8STreehugger Robotclass LLVMSymbolizer(Symbolizer):
68*7c3d14c8STreehugger Robot  def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]):
69*7c3d14c8STreehugger Robot    super(LLVMSymbolizer, self).__init__()
70*7c3d14c8STreehugger Robot    self.symbolizer_path = symbolizer_path
71*7c3d14c8STreehugger Robot    self.default_arch = default_arch
72*7c3d14c8STreehugger Robot    self.system = system
73*7c3d14c8STreehugger Robot    self.dsym_hints = dsym_hints
74*7c3d14c8STreehugger Robot    self.pipe = self.open_llvm_symbolizer()
75*7c3d14c8STreehugger Robot
76*7c3d14c8STreehugger Robot  def open_llvm_symbolizer(self):
77*7c3d14c8STreehugger Robot    cmd = [self.symbolizer_path,
78*7c3d14c8STreehugger Robot           '--use-symbol-table=true',
79*7c3d14c8STreehugger Robot           '--demangle=%s' % demangle,
80*7c3d14c8STreehugger Robot           '--functions=linkage',
81*7c3d14c8STreehugger Robot           '--inlining=true',
82*7c3d14c8STreehugger Robot           '--default-arch=%s' % self.default_arch]
83*7c3d14c8STreehugger Robot    if self.system == 'Darwin':
84*7c3d14c8STreehugger Robot      for hint in self.dsym_hints:
85*7c3d14c8STreehugger Robot        cmd.append('--dsym-hint=%s' % hint)
86*7c3d14c8STreehugger Robot    if DEBUG:
87*7c3d14c8STreehugger Robot      print ' '.join(cmd)
88*7c3d14c8STreehugger Robot    try:
89*7c3d14c8STreehugger Robot      result = subprocess.Popen(cmd, stdin=subprocess.PIPE,
90*7c3d14c8STreehugger Robot                                stdout=subprocess.PIPE)
91*7c3d14c8STreehugger Robot    except OSError:
92*7c3d14c8STreehugger Robot      result = None
93*7c3d14c8STreehugger Robot    return result
94*7c3d14c8STreehugger Robot
95*7c3d14c8STreehugger Robot  def symbolize(self, addr, binary, offset):
96*7c3d14c8STreehugger Robot    """Overrides Symbolizer.symbolize."""
97*7c3d14c8STreehugger Robot    if not self.pipe:
98*7c3d14c8STreehugger Robot      return None
99*7c3d14c8STreehugger Robot    result = []
100*7c3d14c8STreehugger Robot    try:
101*7c3d14c8STreehugger Robot      symbolizer_input = '"%s" %s' % (binary, offset)
102*7c3d14c8STreehugger Robot      if DEBUG:
103*7c3d14c8STreehugger Robot        print symbolizer_input
104*7c3d14c8STreehugger Robot      print >> self.pipe.stdin, symbolizer_input
105*7c3d14c8STreehugger Robot      while True:
106*7c3d14c8STreehugger Robot        function_name = self.pipe.stdout.readline().rstrip()
107*7c3d14c8STreehugger Robot        if not function_name:
108*7c3d14c8STreehugger Robot          break
109*7c3d14c8STreehugger Robot        file_name = self.pipe.stdout.readline().rstrip()
110*7c3d14c8STreehugger Robot        file_name = fix_filename(file_name)
111*7c3d14c8STreehugger Robot        if (not function_name.startswith('??') or
112*7c3d14c8STreehugger Robot            not file_name.startswith('??')):
113*7c3d14c8STreehugger Robot          # Append only non-trivial frames.
114*7c3d14c8STreehugger Robot          result.append('%s in %s %s' % (addr, function_name,
115*7c3d14c8STreehugger Robot                                         file_name))
116*7c3d14c8STreehugger Robot    except Exception:
117*7c3d14c8STreehugger Robot      result = []
118*7c3d14c8STreehugger Robot    if not result:
119*7c3d14c8STreehugger Robot      result = None
120*7c3d14c8STreehugger Robot    return result
121*7c3d14c8STreehugger Robot
122*7c3d14c8STreehugger Robot
123*7c3d14c8STreehugger Robotdef LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]):
124*7c3d14c8STreehugger Robot  symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
125*7c3d14c8STreehugger Robot  if not symbolizer_path:
126*7c3d14c8STreehugger Robot    symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH')
127*7c3d14c8STreehugger Robot    if not symbolizer_path:
128*7c3d14c8STreehugger Robot      # Assume llvm-symbolizer is in PATH.
129*7c3d14c8STreehugger Robot      symbolizer_path = 'llvm-symbolizer'
130*7c3d14c8STreehugger Robot  return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints)
131*7c3d14c8STreehugger Robot
132*7c3d14c8STreehugger Robot
133*7c3d14c8STreehugger Robotclass Addr2LineSymbolizer(Symbolizer):
134*7c3d14c8STreehugger Robot  def __init__(self, binary):
135*7c3d14c8STreehugger Robot    super(Addr2LineSymbolizer, self).__init__()
136*7c3d14c8STreehugger Robot    self.binary = binary
137*7c3d14c8STreehugger Robot    self.pipe = self.open_addr2line()
138*7c3d14c8STreehugger Robot    self.output_terminator = -1
139*7c3d14c8STreehugger Robot
140*7c3d14c8STreehugger Robot  def open_addr2line(self):
141*7c3d14c8STreehugger Robot    addr2line_tool = 'addr2line'
142*7c3d14c8STreehugger Robot    if binutils_prefix:
143*7c3d14c8STreehugger Robot      addr2line_tool = binutils_prefix + addr2line_tool
144*7c3d14c8STreehugger Robot    cmd = [addr2line_tool, '-fi']
145*7c3d14c8STreehugger Robot    if demangle:
146*7c3d14c8STreehugger Robot      cmd += ['--demangle']
147*7c3d14c8STreehugger Robot    cmd += ['-e', self.binary]
148*7c3d14c8STreehugger Robot    if DEBUG:
149*7c3d14c8STreehugger Robot      print ' '.join(cmd)
150*7c3d14c8STreehugger Robot    return subprocess.Popen(cmd,
151*7c3d14c8STreehugger Robot                            stdin=subprocess.PIPE, stdout=subprocess.PIPE)
152*7c3d14c8STreehugger Robot
153*7c3d14c8STreehugger Robot  def symbolize(self, addr, binary, offset):
154*7c3d14c8STreehugger Robot    """Overrides Symbolizer.symbolize."""
155*7c3d14c8STreehugger Robot    if self.binary != binary:
156*7c3d14c8STreehugger Robot      return None
157*7c3d14c8STreehugger Robot    lines = []
158*7c3d14c8STreehugger Robot    try:
159*7c3d14c8STreehugger Robot      print >> self.pipe.stdin, offset
160*7c3d14c8STreehugger Robot      print >> self.pipe.stdin, self.output_terminator
161*7c3d14c8STreehugger Robot      is_first_frame = True
162*7c3d14c8STreehugger Robot      while True:
163*7c3d14c8STreehugger Robot        function_name = self.pipe.stdout.readline().rstrip()
164*7c3d14c8STreehugger Robot        file_name = self.pipe.stdout.readline().rstrip()
165*7c3d14c8STreehugger Robot        if is_first_frame:
166*7c3d14c8STreehugger Robot          is_first_frame = False
167*7c3d14c8STreehugger Robot        elif function_name in ['', '??']:
168*7c3d14c8STreehugger Robot          assert file_name == function_name
169*7c3d14c8STreehugger Robot          break
170*7c3d14c8STreehugger Robot        lines.append((function_name, file_name));
171*7c3d14c8STreehugger Robot    except Exception:
172*7c3d14c8STreehugger Robot      lines.append(('??', '??:0'))
173*7c3d14c8STreehugger Robot    return ['%s in %s %s' % (addr, function, fix_filename(file)) for (function, file) in lines]
174*7c3d14c8STreehugger Robot
175*7c3d14c8STreehugger Robotclass UnbufferedLineConverter(object):
176*7c3d14c8STreehugger Robot  """
177*7c3d14c8STreehugger Robot  Wrap a child process that responds to each line of input with one line of
178*7c3d14c8STreehugger Robot  output.  Uses pty to trick the child into providing unbuffered output.
179*7c3d14c8STreehugger Robot  """
180*7c3d14c8STreehugger Robot  def __init__(self, args, close_stderr=False):
181*7c3d14c8STreehugger Robot    # Local imports so that the script can start on Windows.
182*7c3d14c8STreehugger Robot    import pty
183*7c3d14c8STreehugger Robot    import termios
184*7c3d14c8STreehugger Robot    pid, fd = pty.fork()
185*7c3d14c8STreehugger Robot    if pid == 0:
186*7c3d14c8STreehugger Robot      # We're the child. Transfer control to command.
187*7c3d14c8STreehugger Robot      if close_stderr:
188*7c3d14c8STreehugger Robot        dev_null = os.open('/dev/null', 0)
189*7c3d14c8STreehugger Robot        os.dup2(dev_null, 2)
190*7c3d14c8STreehugger Robot      os.execvp(args[0], args)
191*7c3d14c8STreehugger Robot    else:
192*7c3d14c8STreehugger Robot      # Disable echoing.
193*7c3d14c8STreehugger Robot      attr = termios.tcgetattr(fd)
194*7c3d14c8STreehugger Robot      attr[3] = attr[3] & ~termios.ECHO
195*7c3d14c8STreehugger Robot      termios.tcsetattr(fd, termios.TCSANOW, attr)
196*7c3d14c8STreehugger Robot      # Set up a file()-like interface to the child process
197*7c3d14c8STreehugger Robot      self.r = os.fdopen(fd, "r", 1)
198*7c3d14c8STreehugger Robot      self.w = os.fdopen(os.dup(fd), "w", 1)
199*7c3d14c8STreehugger Robot
200*7c3d14c8STreehugger Robot  def convert(self, line):
201*7c3d14c8STreehugger Robot    self.w.write(line + "\n")
202*7c3d14c8STreehugger Robot    return self.readline()
203*7c3d14c8STreehugger Robot
204*7c3d14c8STreehugger Robot  def readline(self):
205*7c3d14c8STreehugger Robot    return self.r.readline().rstrip()
206*7c3d14c8STreehugger Robot
207*7c3d14c8STreehugger Robot
208*7c3d14c8STreehugger Robotclass DarwinSymbolizer(Symbolizer):
209*7c3d14c8STreehugger Robot  def __init__(self, addr, binary):
210*7c3d14c8STreehugger Robot    super(DarwinSymbolizer, self).__init__()
211*7c3d14c8STreehugger Robot    self.binary = binary
212*7c3d14c8STreehugger Robot    self.arch = guess_arch(addr)
213*7c3d14c8STreehugger Robot    self.open_atos()
214*7c3d14c8STreehugger Robot
215*7c3d14c8STreehugger Robot  def open_atos(self):
216*7c3d14c8STreehugger Robot    if DEBUG:
217*7c3d14c8STreehugger Robot      print 'atos -o %s -arch %s' % (self.binary, self.arch)
218*7c3d14c8STreehugger Robot    cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
219*7c3d14c8STreehugger Robot    self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
220*7c3d14c8STreehugger Robot
221*7c3d14c8STreehugger Robot  def symbolize(self, addr, binary, offset):
222*7c3d14c8STreehugger Robot    """Overrides Symbolizer.symbolize."""
223*7c3d14c8STreehugger Robot    if self.binary != binary:
224*7c3d14c8STreehugger Robot      return None
225*7c3d14c8STreehugger Robot    atos_line = self.atos.convert('0x%x' % int(offset, 16))
226*7c3d14c8STreehugger Robot    while "got symbolicator for" in atos_line:
227*7c3d14c8STreehugger Robot      atos_line = self.atos.readline()
228*7c3d14c8STreehugger Robot    # A well-formed atos response looks like this:
229*7c3d14c8STreehugger Robot    #   foo(type1, type2) (in object.name) (filename.cc:80)
230*7c3d14c8STreehugger Robot    match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
231*7c3d14c8STreehugger Robot    if DEBUG:
232*7c3d14c8STreehugger Robot      print 'atos_line: ', atos_line
233*7c3d14c8STreehugger Robot    if match:
234*7c3d14c8STreehugger Robot      function_name = match.group(1)
235*7c3d14c8STreehugger Robot      function_name = re.sub('\(.*?\)', '', function_name)
236*7c3d14c8STreehugger Robot      file_name = fix_filename(match.group(3))
237*7c3d14c8STreehugger Robot      return ['%s in %s %s' % (addr, function_name, file_name)]
238*7c3d14c8STreehugger Robot    else:
239*7c3d14c8STreehugger Robot      return ['%s in %s' % (addr, atos_line)]
240*7c3d14c8STreehugger Robot
241*7c3d14c8STreehugger Robot
242*7c3d14c8STreehugger Robot# Chain several symbolizers so that if one symbolizer fails, we fall back
243*7c3d14c8STreehugger Robot# to the next symbolizer in chain.
244*7c3d14c8STreehugger Robotclass ChainSymbolizer(Symbolizer):
245*7c3d14c8STreehugger Robot  def __init__(self, symbolizer_list):
246*7c3d14c8STreehugger Robot    super(ChainSymbolizer, self).__init__()
247*7c3d14c8STreehugger Robot    self.symbolizer_list = symbolizer_list
248*7c3d14c8STreehugger Robot
249*7c3d14c8STreehugger Robot  def symbolize(self, addr, binary, offset):
250*7c3d14c8STreehugger Robot    """Overrides Symbolizer.symbolize."""
251*7c3d14c8STreehugger Robot    for symbolizer in self.symbolizer_list:
252*7c3d14c8STreehugger Robot      if symbolizer:
253*7c3d14c8STreehugger Robot        result = symbolizer.symbolize(addr, binary, offset)
254*7c3d14c8STreehugger Robot        if result:
255*7c3d14c8STreehugger Robot          return result
256*7c3d14c8STreehugger Robot    return None
257*7c3d14c8STreehugger Robot
258*7c3d14c8STreehugger Robot  def append_symbolizer(self, symbolizer):
259*7c3d14c8STreehugger Robot    self.symbolizer_list.append(symbolizer)
260*7c3d14c8STreehugger Robot
261*7c3d14c8STreehugger Robot
262*7c3d14c8STreehugger Robotdef BreakpadSymbolizerFactory(binary):
263*7c3d14c8STreehugger Robot  suffix = os.getenv('BREAKPAD_SUFFIX')
264*7c3d14c8STreehugger Robot  if suffix:
265*7c3d14c8STreehugger Robot    filename = binary + suffix
266*7c3d14c8STreehugger Robot    if os.access(filename, os.F_OK):
267*7c3d14c8STreehugger Robot      return BreakpadSymbolizer(filename)
268*7c3d14c8STreehugger Robot  return None
269*7c3d14c8STreehugger Robot
270*7c3d14c8STreehugger Robot
271*7c3d14c8STreehugger Robotdef SystemSymbolizerFactory(system, addr, binary):
272*7c3d14c8STreehugger Robot  if system == 'Darwin':
273*7c3d14c8STreehugger Robot    return DarwinSymbolizer(addr, binary)
274*7c3d14c8STreehugger Robot  elif system == 'Linux' or system == 'FreeBSD':
275*7c3d14c8STreehugger Robot    return Addr2LineSymbolizer(binary)
276*7c3d14c8STreehugger Robot
277*7c3d14c8STreehugger Robot
278*7c3d14c8STreehugger Robotclass BreakpadSymbolizer(Symbolizer):
279*7c3d14c8STreehugger Robot  def __init__(self, filename):
280*7c3d14c8STreehugger Robot    super(BreakpadSymbolizer, self).__init__()
281*7c3d14c8STreehugger Robot    self.filename = filename
282*7c3d14c8STreehugger Robot    lines = file(filename).readlines()
283*7c3d14c8STreehugger Robot    self.files = []
284*7c3d14c8STreehugger Robot    self.symbols = {}
285*7c3d14c8STreehugger Robot    self.address_list = []
286*7c3d14c8STreehugger Robot    self.addresses = {}
287*7c3d14c8STreehugger Robot    # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
288*7c3d14c8STreehugger Robot    fragments = lines[0].rstrip().split()
289*7c3d14c8STreehugger Robot    self.arch = fragments[2]
290*7c3d14c8STreehugger Robot    self.debug_id = fragments[3]
291*7c3d14c8STreehugger Robot    self.binary = ' '.join(fragments[4:])
292*7c3d14c8STreehugger Robot    self.parse_lines(lines[1:])
293*7c3d14c8STreehugger Robot
294*7c3d14c8STreehugger Robot  def parse_lines(self, lines):
295*7c3d14c8STreehugger Robot    cur_function_addr = ''
296*7c3d14c8STreehugger Robot    for line in lines:
297*7c3d14c8STreehugger Robot      fragments = line.split()
298*7c3d14c8STreehugger Robot      if fragments[0] == 'FILE':
299*7c3d14c8STreehugger Robot        assert int(fragments[1]) == len(self.files)
300*7c3d14c8STreehugger Robot        self.files.append(' '.join(fragments[2:]))
301*7c3d14c8STreehugger Robot      elif fragments[0] == 'PUBLIC':
302*7c3d14c8STreehugger Robot        self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
303*7c3d14c8STreehugger Robot      elif fragments[0] in ['CFI', 'STACK']:
304*7c3d14c8STreehugger Robot        pass
305*7c3d14c8STreehugger Robot      elif fragments[0] == 'FUNC':
306*7c3d14c8STreehugger Robot        cur_function_addr = int(fragments[1], 16)
307*7c3d14c8STreehugger Robot        if not cur_function_addr in self.symbols.keys():
308*7c3d14c8STreehugger Robot          self.symbols[cur_function_addr] = ' '.join(fragments[4:])
309*7c3d14c8STreehugger Robot      else:
310*7c3d14c8STreehugger Robot        # Line starting with an address.
311*7c3d14c8STreehugger Robot        addr = int(fragments[0], 16)
312*7c3d14c8STreehugger Robot        self.address_list.append(addr)
313*7c3d14c8STreehugger Robot        # Tuple of symbol address, size, line, file number.
314*7c3d14c8STreehugger Robot        self.addresses[addr] = (cur_function_addr,
315*7c3d14c8STreehugger Robot                                int(fragments[1], 16),
316*7c3d14c8STreehugger Robot                                int(fragments[2]),
317*7c3d14c8STreehugger Robot                                int(fragments[3]))
318*7c3d14c8STreehugger Robot    self.address_list.sort()
319*7c3d14c8STreehugger Robot
320*7c3d14c8STreehugger Robot  def get_sym_file_line(self, addr):
321*7c3d14c8STreehugger Robot    key = None
322*7c3d14c8STreehugger Robot    if addr in self.addresses.keys():
323*7c3d14c8STreehugger Robot      key = addr
324*7c3d14c8STreehugger Robot    else:
325*7c3d14c8STreehugger Robot      index = bisect.bisect_left(self.address_list, addr)
326*7c3d14c8STreehugger Robot      if index == 0:
327*7c3d14c8STreehugger Robot        return None
328*7c3d14c8STreehugger Robot      else:
329*7c3d14c8STreehugger Robot        key = self.address_list[index - 1]
330*7c3d14c8STreehugger Robot    sym_id, size, line_no, file_no = self.addresses[key]
331*7c3d14c8STreehugger Robot    symbol = self.symbols[sym_id]
332*7c3d14c8STreehugger Robot    filename = self.files[file_no]
333*7c3d14c8STreehugger Robot    if addr < key + size:
334*7c3d14c8STreehugger Robot      return symbol, filename, line_no
335*7c3d14c8STreehugger Robot    else:
336*7c3d14c8STreehugger Robot      return None
337*7c3d14c8STreehugger Robot
338*7c3d14c8STreehugger Robot  def symbolize(self, addr, binary, offset):
339*7c3d14c8STreehugger Robot    if self.binary != binary:
340*7c3d14c8STreehugger Robot      return None
341*7c3d14c8STreehugger Robot    res = self.get_sym_file_line(int(offset, 16))
342*7c3d14c8STreehugger Robot    if res:
343*7c3d14c8STreehugger Robot      function_name, file_name, line_no = res
344*7c3d14c8STreehugger Robot      result = ['%s in %s %s:%d' % (
345*7c3d14c8STreehugger Robot          addr, function_name, file_name, line_no)]
346*7c3d14c8STreehugger Robot      print result
347*7c3d14c8STreehugger Robot      return result
348*7c3d14c8STreehugger Robot    else:
349*7c3d14c8STreehugger Robot      return None
350*7c3d14c8STreehugger Robot
351*7c3d14c8STreehugger Robot
352*7c3d14c8STreehugger Robotclass SymbolizationLoop(object):
353*7c3d14c8STreehugger Robot  def __init__(self, binary_name_filter=None, dsym_hint_producer=None):
354*7c3d14c8STreehugger Robot    if sys.platform == 'win32':
355*7c3d14c8STreehugger Robot      # ASan on Windows uses dbghelp.dll to symbolize in-process, which works
356*7c3d14c8STreehugger Robot      # even in sandboxed processes.  Nothing needs to be done here.
357*7c3d14c8STreehugger Robot      self.process_line = self.process_line_echo
358*7c3d14c8STreehugger Robot    else:
359*7c3d14c8STreehugger Robot      # Used by clients who may want to supply a different binary name.
360*7c3d14c8STreehugger Robot      # E.g. in Chrome several binaries may share a single .dSYM.
361*7c3d14c8STreehugger Robot      self.binary_name_filter = binary_name_filter
362*7c3d14c8STreehugger Robot      self.dsym_hint_producer = dsym_hint_producer
363*7c3d14c8STreehugger Robot      self.system = os.uname()[0]
364*7c3d14c8STreehugger Robot      if self.system not in ['Linux', 'Darwin', 'FreeBSD']:
365*7c3d14c8STreehugger Robot        raise Exception('Unknown system')
366*7c3d14c8STreehugger Robot      self.llvm_symbolizers = {}
367*7c3d14c8STreehugger Robot      self.last_llvm_symbolizer = None
368*7c3d14c8STreehugger Robot      self.dsym_hints = set([])
369*7c3d14c8STreehugger Robot      self.frame_no = 0
370*7c3d14c8STreehugger Robot      self.process_line = self.process_line_posix
371*7c3d14c8STreehugger Robot
372*7c3d14c8STreehugger Robot  def symbolize_address(self, addr, binary, offset):
373*7c3d14c8STreehugger Robot    # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
374*7c3d14c8STreehugger Robot    # a single symbolizer binary.
375*7c3d14c8STreehugger Robot    # On Darwin, if the dsym hint producer is present:
376*7c3d14c8STreehugger Robot    #  1. check whether we've seen this binary already; if so,
377*7c3d14c8STreehugger Robot    #     use |llvm_symbolizers[binary]|, which has already loaded the debug
378*7c3d14c8STreehugger Robot    #     info for this binary (might not be the case for
379*7c3d14c8STreehugger Robot    #     |last_llvm_symbolizer|);
380*7c3d14c8STreehugger Robot    #  2. otherwise check if we've seen all the hints for this binary already;
381*7c3d14c8STreehugger Robot    #     if so, reuse |last_llvm_symbolizer| which has the full set of hints;
382*7c3d14c8STreehugger Robot    #  3. otherwise create a new symbolizer and pass all currently known
383*7c3d14c8STreehugger Robot    #     .dSYM hints to it.
384*7c3d14c8STreehugger Robot    if not binary in self.llvm_symbolizers:
385*7c3d14c8STreehugger Robot      use_new_symbolizer = True
386*7c3d14c8STreehugger Robot      if self.system == 'Darwin' and self.dsym_hint_producer:
387*7c3d14c8STreehugger Robot        dsym_hints_for_binary = set(self.dsym_hint_producer(binary))
388*7c3d14c8STreehugger Robot        use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints)
389*7c3d14c8STreehugger Robot        self.dsym_hints |= dsym_hints_for_binary
390*7c3d14c8STreehugger Robot      if self.last_llvm_symbolizer and not use_new_symbolizer:
391*7c3d14c8STreehugger Robot          self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
392*7c3d14c8STreehugger Robot      else:
393*7c3d14c8STreehugger Robot        self.last_llvm_symbolizer = LLVMSymbolizerFactory(
394*7c3d14c8STreehugger Robot            self.system, guess_arch(addr), self.dsym_hints)
395*7c3d14c8STreehugger Robot        self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
396*7c3d14c8STreehugger Robot    # Use the chain of symbolizers:
397*7c3d14c8STreehugger Robot    # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
398*7c3d14c8STreehugger Robot    # (fall back to next symbolizer if the previous one fails).
399*7c3d14c8STreehugger Robot    if not binary in symbolizers:
400*7c3d14c8STreehugger Robot      symbolizers[binary] = ChainSymbolizer(
401*7c3d14c8STreehugger Robot          [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]])
402*7c3d14c8STreehugger Robot    result = symbolizers[binary].symbolize(addr, binary, offset)
403*7c3d14c8STreehugger Robot    if result is None:
404*7c3d14c8STreehugger Robot      if not allow_system_symbolizer:
405*7c3d14c8STreehugger Robot        raise Exception('Failed to launch or use llvm-symbolizer.')
406*7c3d14c8STreehugger Robot      # Initialize system symbolizer only if other symbolizers failed.
407*7c3d14c8STreehugger Robot      symbolizers[binary].append_symbolizer(
408*7c3d14c8STreehugger Robot          SystemSymbolizerFactory(self.system, addr, binary))
409*7c3d14c8STreehugger Robot      result = symbolizers[binary].symbolize(addr, binary, offset)
410*7c3d14c8STreehugger Robot    # The system symbolizer must produce some result.
411*7c3d14c8STreehugger Robot    assert result
412*7c3d14c8STreehugger Robot    return result
413*7c3d14c8STreehugger Robot
414*7c3d14c8STreehugger Robot  def get_symbolized_lines(self, symbolized_lines):
415*7c3d14c8STreehugger Robot    if not symbolized_lines:
416*7c3d14c8STreehugger Robot      return [self.current_line]
417*7c3d14c8STreehugger Robot    else:
418*7c3d14c8STreehugger Robot      result = []
419*7c3d14c8STreehugger Robot      for symbolized_frame in symbolized_lines:
420*7c3d14c8STreehugger Robot        result.append('    #%s %s' % (str(self.frame_no), symbolized_frame.rstrip()))
421*7c3d14c8STreehugger Robot        self.frame_no += 1
422*7c3d14c8STreehugger Robot      return result
423*7c3d14c8STreehugger Robot
424*7c3d14c8STreehugger Robot  def process_logfile(self):
425*7c3d14c8STreehugger Robot    self.frame_no = 0
426*7c3d14c8STreehugger Robot    for line in logfile:
427*7c3d14c8STreehugger Robot      processed = self.process_line(line)
428*7c3d14c8STreehugger Robot      print '\n'.join(processed)
429*7c3d14c8STreehugger Robot
430*7c3d14c8STreehugger Robot  def process_line_echo(self, line):
431*7c3d14c8STreehugger Robot    return [line.rstrip()]
432*7c3d14c8STreehugger Robot
433*7c3d14c8STreehugger Robot  def process_line_posix(self, line):
434*7c3d14c8STreehugger Robot    self.current_line = line.rstrip()
435*7c3d14c8STreehugger Robot    #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
436*7c3d14c8STreehugger Robot    stack_trace_line_format = (
437*7c3d14c8STreehugger Robot        '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
438*7c3d14c8STreehugger Robot    match = re.match(stack_trace_line_format, line)
439*7c3d14c8STreehugger Robot    if not match:
440*7c3d14c8STreehugger Robot      return [self.current_line]
441*7c3d14c8STreehugger Robot    if DEBUG:
442*7c3d14c8STreehugger Robot      print line
443*7c3d14c8STreehugger Robot    _, frameno_str, addr, binary, offset = match.groups()
444*7c3d14c8STreehugger Robot    if frameno_str == '0':
445*7c3d14c8STreehugger Robot      # Assume that frame #0 is the first frame of new stack trace.
446*7c3d14c8STreehugger Robot      self.frame_no = 0
447*7c3d14c8STreehugger Robot    original_binary = binary
448*7c3d14c8STreehugger Robot    if self.binary_name_filter:
449*7c3d14c8STreehugger Robot      binary = self.binary_name_filter(binary)
450*7c3d14c8STreehugger Robot    symbolized_line = self.symbolize_address(addr, binary, offset)
451*7c3d14c8STreehugger Robot    if not symbolized_line:
452*7c3d14c8STreehugger Robot      if original_binary != binary:
453*7c3d14c8STreehugger Robot        symbolized_line = self.symbolize_address(addr, binary, offset)
454*7c3d14c8STreehugger Robot    return self.get_symbolized_lines(symbolized_line)
455*7c3d14c8STreehugger Robot
456*7c3d14c8STreehugger Robot
457*7c3d14c8STreehugger Robotif __name__ == '__main__':
458*7c3d14c8STreehugger Robot  parser = argparse.ArgumentParser(
459*7c3d14c8STreehugger Robot      formatter_class=argparse.RawDescriptionHelpFormatter,
460*7c3d14c8STreehugger Robot      description='ASan symbolization script',
461*7c3d14c8STreehugger Robot      epilog='Example of use:\n'
462*7c3d14c8STreehugger Robot             'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" '
463*7c3d14c8STreehugger Robot             '-s "$HOME/SymbolFiles" < asan.log')
464*7c3d14c8STreehugger Robot  parser.add_argument('path_to_cut', nargs='*',
465*7c3d14c8STreehugger Robot                      help='pattern to be cut from the result file path ')
466*7c3d14c8STreehugger Robot  parser.add_argument('-d','--demangle', action='store_true',
467*7c3d14c8STreehugger Robot                      help='demangle function names')
468*7c3d14c8STreehugger Robot  parser.add_argument('-s', metavar='SYSROOT',
469*7c3d14c8STreehugger Robot                      help='set path to sysroot for sanitized binaries')
470*7c3d14c8STreehugger Robot  parser.add_argument('-c', metavar='CROSS_COMPILE',
471*7c3d14c8STreehugger Robot                      help='set prefix for binutils')
472*7c3d14c8STreehugger Robot  parser.add_argument('-l','--logfile', default=sys.stdin,
473*7c3d14c8STreehugger Robot                      type=argparse.FileType('r'),
474*7c3d14c8STreehugger Robot                      help='set log file name to parse, default is stdin')
475*7c3d14c8STreehugger Robot  args = parser.parse_args()
476*7c3d14c8STreehugger Robot  if args.path_to_cut:
477*7c3d14c8STreehugger Robot    fix_filename_patterns = args.path_to_cut
478*7c3d14c8STreehugger Robot  if args.demangle:
479*7c3d14c8STreehugger Robot    demangle = True
480*7c3d14c8STreehugger Robot  if args.s:
481*7c3d14c8STreehugger Robot    binary_name_filter = sysroot_path_filter
482*7c3d14c8STreehugger Robot    sysroot_path = args.s
483*7c3d14c8STreehugger Robot  if args.c:
484*7c3d14c8STreehugger Robot    binutils_prefix = args.c
485*7c3d14c8STreehugger Robot  if args.logfile:
486*7c3d14c8STreehugger Robot    logfile = args.logfile
487*7c3d14c8STreehugger Robot  else:
488*7c3d14c8STreehugger Robot    logfile = sys.stdin
489*7c3d14c8STreehugger Robot  loop = SymbolizationLoop(binary_name_filter)
490*7c3d14c8STreehugger Robot  loop.process_logfile()
491