xref: /aosp_15_r20/external/cronet/testing/merge_scripts/code_coverage/merge_lib.py (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1# Copyright 2019 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""Functions for interacting with llvm-profdata"""
5
6import logging
7import multiprocessing
8import os
9import re
10import shutil
11import subprocess
12import sys
13
14_DIR_SOURCE_ROOT = os.path.normpath(
15    os.path.join(os.path.dirname(__file__), '..', '..', '..'))
16
17_JAVA_PATH = os.path.join(_DIR_SOURCE_ROOT, 'third_party', 'jdk', 'current',
18                          'bin', 'java')
19
20logging.basicConfig(
21    format='[%(asctime)s %(levelname)s] %(message)s', level=logging.DEBUG)
22
23
24def _call_profdata_tool(profile_input_file_paths,
25                        profile_output_file_path,
26                        profdata_tool_path,
27                        sparse=False,
28                        timeout=3600):
29  """Calls the llvm-profdata tool.
30
31  Args:
32    profile_input_file_paths: A list of relative paths to the files that
33        are to be merged.
34    profile_output_file_path: The path to the merged file to write.
35    profdata_tool_path: The path to the llvm-profdata executable.
36    sparse (bool): flag to indicate whether to run llvm-profdata with --sparse.
37      Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge
38    timeout (int): timeout (sec) for the call to merge profiles. This should
39      not take > 1 hr, and so defaults to 3600 seconds.
40
41  Raises:
42    CalledProcessError: An error occurred merging profiles.
43  """
44  # There might be too many files in input and argument limit might be
45  # violated, so make the tool read a list of paths from a file.
46  output_dir = os.path.dirname(profile_output_file_path)
47  # Normalize to POSIX style paths for consistent results.
48  input_file = os.path.join(output_dir,
49                            'input-profdata-files.txt').replace('\\', '/')
50  with open(input_file, 'w') as fd:
51    logging.info("List of .profdata files...")
52    for file_path in profile_input_file_paths:
53      logging.info(file_path)
54      fd.write('%s\n' % file_path)
55  try:
56    subprocess_cmd = [
57        profdata_tool_path, 'merge', '-o', profile_output_file_path,
58    ]
59    if sparse:
60      subprocess_cmd += ['-sparse=true',]
61    subprocess_cmd.extend(['-f', input_file])
62    logging.info('profdata command: %r', subprocess_cmd)
63
64    # Redirecting stderr is required because when error happens, llvm-profdata
65    # writes the error output to stderr and our error handling logic relies on
66    # that output. stdout=None should print to console.
67    # Timeout in seconds, set to 1 hr (60*60)
68    p = subprocess.run(subprocess_cmd,
69                        capture_output=True,
70                        text=True,
71                        timeout=timeout,
72                        check=True)
73    logging.info(p.stdout)
74  except subprocess.CalledProcessError as error:
75    logging.info('stdout: %s' % error.output)
76    logging.error('Failed to merge profiles, return code (%d), error: %r' %
77                  (error.returncode, error.stderr))
78    raise error
79  except subprocess.TimeoutExpired as e:
80    logging.info('stdout: %s' % e.output)
81    raise e
82
83  logging.info('Profile data is created as: "%r".', profile_output_file_path)
84
85
86def _get_profile_paths(input_dir,
87                       input_extension,
88                       input_filename_pattern='.*'):
89  """Finds all the profiles in the given directory (recursively)."""
90  paths = []
91  for dir_path, _sub_dirs, file_names in os.walk(input_dir):
92    paths.extend([
93        # Normalize to POSIX style paths for consistent results.
94        os.path.join(dir_path, fn).replace('\\', '/')
95        for fn in file_names
96        if fn.endswith(input_extension) and re.search(input_filename_pattern,fn)
97    ])
98  return paths
99
100
101def _validate_and_convert_profraws(profraw_files,
102                                   profdata_tool_path,
103                                   sparse=False):
104  """Validates and converts profraws to profdatas.
105
106  For each given .profraw file in the input, this method first validates it by
107  trying to convert it to an indexed .profdata file, and if the validation and
108  conversion succeeds, the generated .profdata file will be included in the
109  output, otherwise, won't.
110
111  This method is mainly used to filter out invalid profraw files.
112
113  Args:
114    profraw_files: A list of .profraw paths.
115    profdata_tool_path: The path to the llvm-profdata executable.
116    sparse (bool): flag to indicate whether to run llvm-profdata with --sparse.
117      Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge
118
119  Returns:
120    A tuple:
121      A list of converted .profdata files of *valid* profraw files.
122      A list of *invalid* profraw files.
123      A list of profraw files that have counter overflows.
124  """
125  for profraw_file in profraw_files:
126    if not profraw_file.endswith('.profraw'):
127      raise RuntimeError('%r is expected to be a .profraw file.' % profraw_file)
128
129  cpu_count = multiprocessing.cpu_count()
130  counts = max(10, cpu_count - 5)  # Use 10+ processes, but leave 5 cpu cores.
131  if sys.platform == 'win32':
132    # TODO(crbug.com/1190269) - we can't use more than 56 child processes on
133    # Windows or Python3 may hang.
134    counts = min(counts, 56)
135  pool = multiprocessing.Pool(counts)
136  output_profdata_files = multiprocessing.Manager().list()
137  invalid_profraw_files = multiprocessing.Manager().list()
138  counter_overflows = multiprocessing.Manager().list()
139
140  results = []
141  for profraw_file in profraw_files:
142    results.append(pool.apply_async(
143      _validate_and_convert_profraw,
144      (profraw_file, output_profdata_files, invalid_profraw_files,
145        counter_overflows, profdata_tool_path, sparse)))
146
147  pool.close()
148  pool.join()
149
150  for x in results:
151    x.get()
152
153  # Remove inputs, as they won't be needed and they can be pretty large.
154  for input_file in profraw_files:
155    os.remove(input_file)
156
157  return list(output_profdata_files), list(invalid_profraw_files), list(
158      counter_overflows)
159
160
161def _validate_and_convert_profraw(profraw_file, output_profdata_files,
162                                  invalid_profraw_files, counter_overflows,
163                                  profdata_tool_path, sparse=False):
164  output_profdata_file = profraw_file.replace('.profraw', '.profdata')
165  subprocess_cmd = [
166      profdata_tool_path,
167      'merge',
168      '-o',
169      output_profdata_file,
170  ]
171  if sparse:
172    subprocess_cmd.append('--sparse')
173
174  subprocess_cmd.append(profraw_file)
175  logging.info('profdata command: %r', subprocess_cmd)
176
177  profile_valid = False
178  counter_overflow = False
179  validation_output = None
180
181  # 1. Determine if the profile is valid.
182  try:
183    # Redirecting stderr is required because when error happens, llvm-profdata
184    # writes the error output to stderr and our error handling logic relies on
185    # that output.
186    validation_output = subprocess.check_output(
187        subprocess_cmd, stderr=subprocess.STDOUT, encoding = 'UTF-8')
188    if 'Counter overflow' in validation_output:
189      counter_overflow = True
190    else:
191      profile_valid = True
192  except subprocess.CalledProcessError as error:
193    logging.warning('Validating and converting %r to %r failed with output: %r',
194                    profraw_file, output_profdata_file, error.output)
195    validation_output = error.output
196
197  # 2. Add the profile to the appropriate list(s).
198  if profile_valid:
199    output_profdata_files.append(output_profdata_file)
200  else:
201    invalid_profraw_files.append(profraw_file)
202    if counter_overflow:
203      counter_overflows.append(profraw_file)
204
205  # 3. Log appropriate message
206  if not profile_valid:
207    template = 'Bad profile: %r, output: %r'
208    if counter_overflow:
209      template = 'Counter overflow: %r, output: %r'
210    logging.warning(template, profraw_file, validation_output)
211
212    # 4. Delete profdata for invalid profiles if present.
213    if os.path.exists(output_profdata_file):
214      # The output file may be created before llvm-profdata determines the
215      # input is invalid. Delete it so that it does not leak and affect other
216      # merge scripts.
217      os.remove(output_profdata_file)
218
219def merge_java_exec_files(input_dir, output_path, jacococli_path):
220  """Merges generated .exec files to output_path.
221
222  Args:
223    input_dir (str): The path to traverse to find input files.
224    output_path (str): Where to write the merged .exec file.
225    jacococli_path: The path to jacococli.jar.
226
227  Raises:
228    CalledProcessError: merge command failed.
229  """
230  exec_input_file_paths = _get_profile_paths(input_dir, '.exec')
231  if not exec_input_file_paths:
232    logging.info('No exec file found under %s', input_dir)
233    return
234
235  cmd = [_JAVA_PATH, '-jar', jacococli_path, 'merge']
236  cmd.extend(exec_input_file_paths)
237  cmd.extend(['--destfile', output_path])
238  subprocess.check_call(cmd, stderr=subprocess.STDOUT)
239
240
241def merge_profiles(input_dir,
242                   output_file,
243                   input_extension,
244                   profdata_tool_path,
245                   input_filename_pattern='.*',
246                   sparse=False,
247                   skip_validation=False,
248                   merge_timeout=3600):
249  """Merges the profiles produced by the shards using llvm-profdata.
250
251  Args:
252    input_dir (str): The path to traverse to find input profiles.
253    output_file (str): Where to write the merged profile.
254    input_extension (str): File extension to look for in the input_dir.
255        e.g. '.profdata' or '.profraw'
256    profdata_tool_path: The path to the llvm-profdata executable.
257    input_filename_pattern (str): The regex pattern of input filename. Should be
258        a valid regex pattern if present.
259    sparse (bool): flag to indicate whether to run llvm-profdata with --sparse.
260      Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge
261    skip_validation (bool): flag to skip the _validate_and_convert_profraws
262        invocation. only applicable when input_extension is .profraw.
263    merge_timeout (int): timeout (sec) for the call to merge profiles. This
264      should not take > 1 hr, and so defaults to 3600 seconds.
265
266  Returns:
267    The list of profiles that had to be excluded to get the merge to
268    succeed and a list of profiles that had a counter overflow.
269  """
270  profile_input_file_paths = _get_profile_paths(input_dir,
271                                                input_extension,
272                                                input_filename_pattern)
273  invalid_profraw_files = []
274  counter_overflows = []
275
276  if skip_validation:
277    logging.warning('--skip-validation has been enabled. Skipping conversion '
278                    'to ensure that profiles are valid.')
279
280  if input_extension == '.profraw' and not skip_validation:
281    profile_input_file_paths, invalid_profraw_files, counter_overflows = (
282        _validate_and_convert_profraws(profile_input_file_paths,
283                                       profdata_tool_path,
284                                       sparse=sparse))
285    logging.info((
286        'List of invalid .profraw files that failed to validate and convert: %r'
287    ), invalid_profraw_files)
288
289    if counter_overflows:
290      logging.warning('There were %d profiles with counter overflows',
291                      len(counter_overflows))
292
293  # The list of input files could be empty in the following scenarios:
294  # 1. The test target is pure Python scripts test which doesn't execute any
295  #    C/C++ binaries, such as devtools_type_check.
296  # 2. The test target executes binary and does dumps coverage profile data
297  #    files, however, all of them turned out to be invalid.
298  if not profile_input_file_paths:
299    logging.info('There is no valid profraw/profdata files to merge, skip '
300                 'invoking profdata tools.')
301    return invalid_profraw_files, counter_overflows
302
303  _call_profdata_tool(
304      profile_input_file_paths=profile_input_file_paths,
305      profile_output_file_path=output_file,
306      profdata_tool_path=profdata_tool_path,
307      sparse=sparse,
308      timeout=merge_timeout)
309
310  # Remove inputs when merging profraws as they won't be needed and they can be
311  # pretty large. If the inputs are profdata files, do not remove them as they
312  # might be used again for multiple test types coverage.
313  if input_extension == '.profraw':
314    for input_file in profile_input_file_paths:
315      os.remove(input_file)
316
317  return invalid_profraw_files, counter_overflows
318
319# We want to retry shards that contain one or more profiles that cannot be
320# merged (typically due to corruption described in crbug.com/937521).
321def get_shards_to_retry(bad_profiles):
322  bad_shard_ids = set()
323
324  def is_task_id(s):
325    # Swarming task ids are 16 hex chars. The pythonic way to validate this is
326    # to cast to int and catch a value error.
327    try:
328      assert len(s) == 16, 'Swarming task IDs are expected be of length 16'
329      _int_id = int(s, 16)
330      return True
331    except (AssertionError, ValueError):
332      return False
333
334  for profile in bad_profiles:
335    # E.g. /b/s/w/ir/tmp/t/tmpSvBRii/44b643576cf39f10/profraw/default-1.profraw
336    _base_path, task_id, _profraw, _filename = os.path.normpath(profile).rsplit(
337        os.path.sep, 3)
338    # Since we are getting a task_id from a file path, which is less than ideal,
339    # do some checking to at least verify that the snippet looks like a valid
340    # task id.
341    assert is_task_id(task_id)
342    bad_shard_ids.add(task_id)
343  return bad_shard_ids
344