1# Copyright 2019 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4"""Functions for interacting with llvm-profdata""" 5 6import logging 7import multiprocessing 8import os 9import re 10import shutil 11import subprocess 12import sys 13 14_DIR_SOURCE_ROOT = os.path.normpath( 15 os.path.join(os.path.dirname(__file__), '..', '..', '..')) 16 17_JAVA_PATH = os.path.join(_DIR_SOURCE_ROOT, 'third_party', 'jdk', 'current', 18 'bin', 'java') 19 20logging.basicConfig( 21 format='[%(asctime)s %(levelname)s] %(message)s', level=logging.DEBUG) 22 23 24def _call_profdata_tool(profile_input_file_paths, 25 profile_output_file_path, 26 profdata_tool_path, 27 sparse=False, 28 timeout=3600): 29 """Calls the llvm-profdata tool. 30 31 Args: 32 profile_input_file_paths: A list of relative paths to the files that 33 are to be merged. 34 profile_output_file_path: The path to the merged file to write. 35 profdata_tool_path: The path to the llvm-profdata executable. 36 sparse (bool): flag to indicate whether to run llvm-profdata with --sparse. 37 Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge 38 timeout (int): timeout (sec) for the call to merge profiles. This should 39 not take > 1 hr, and so defaults to 3600 seconds. 40 41 Raises: 42 CalledProcessError: An error occurred merging profiles. 43 """ 44 # There might be too many files in input and argument limit might be 45 # violated, so make the tool read a list of paths from a file. 46 output_dir = os.path.dirname(profile_output_file_path) 47 # Normalize to POSIX style paths for consistent results. 48 input_file = os.path.join(output_dir, 49 'input-profdata-files.txt').replace('\\', '/') 50 with open(input_file, 'w') as fd: 51 logging.info("List of .profdata files...") 52 for file_path in profile_input_file_paths: 53 logging.info(file_path) 54 fd.write('%s\n' % file_path) 55 try: 56 subprocess_cmd = [ 57 profdata_tool_path, 'merge', '-o', profile_output_file_path, 58 ] 59 if sparse: 60 subprocess_cmd += ['-sparse=true',] 61 subprocess_cmd.extend(['-f', input_file]) 62 logging.info('profdata command: %r', subprocess_cmd) 63 64 # Redirecting stderr is required because when error happens, llvm-profdata 65 # writes the error output to stderr and our error handling logic relies on 66 # that output. stdout=None should print to console. 67 # Timeout in seconds, set to 1 hr (60*60) 68 p = subprocess.run(subprocess_cmd, 69 capture_output=True, 70 text=True, 71 timeout=timeout, 72 check=True) 73 logging.info(p.stdout) 74 except subprocess.CalledProcessError as error: 75 logging.info('stdout: %s' % error.output) 76 logging.error('Failed to merge profiles, return code (%d), error: %r' % 77 (error.returncode, error.stderr)) 78 raise error 79 except subprocess.TimeoutExpired as e: 80 logging.info('stdout: %s' % e.output) 81 raise e 82 83 logging.info('Profile data is created as: "%r".', profile_output_file_path) 84 85 86def _get_profile_paths(input_dir, 87 input_extension, 88 input_filename_pattern='.*'): 89 """Finds all the profiles in the given directory (recursively).""" 90 paths = [] 91 for dir_path, _sub_dirs, file_names in os.walk(input_dir): 92 paths.extend([ 93 # Normalize to POSIX style paths for consistent results. 94 os.path.join(dir_path, fn).replace('\\', '/') 95 for fn in file_names 96 if fn.endswith(input_extension) and re.search(input_filename_pattern,fn) 97 ]) 98 return paths 99 100 101def _validate_and_convert_profraws(profraw_files, 102 profdata_tool_path, 103 sparse=False): 104 """Validates and converts profraws to profdatas. 105 106 For each given .profraw file in the input, this method first validates it by 107 trying to convert it to an indexed .profdata file, and if the validation and 108 conversion succeeds, the generated .profdata file will be included in the 109 output, otherwise, won't. 110 111 This method is mainly used to filter out invalid profraw files. 112 113 Args: 114 profraw_files: A list of .profraw paths. 115 profdata_tool_path: The path to the llvm-profdata executable. 116 sparse (bool): flag to indicate whether to run llvm-profdata with --sparse. 117 Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge 118 119 Returns: 120 A tuple: 121 A list of converted .profdata files of *valid* profraw files. 122 A list of *invalid* profraw files. 123 A list of profraw files that have counter overflows. 124 """ 125 for profraw_file in profraw_files: 126 if not profraw_file.endswith('.profraw'): 127 raise RuntimeError('%r is expected to be a .profraw file.' % profraw_file) 128 129 cpu_count = multiprocessing.cpu_count() 130 counts = max(10, cpu_count - 5) # Use 10+ processes, but leave 5 cpu cores. 131 if sys.platform == 'win32': 132 # TODO(crbug.com/1190269) - we can't use more than 56 child processes on 133 # Windows or Python3 may hang. 134 counts = min(counts, 56) 135 pool = multiprocessing.Pool(counts) 136 output_profdata_files = multiprocessing.Manager().list() 137 invalid_profraw_files = multiprocessing.Manager().list() 138 counter_overflows = multiprocessing.Manager().list() 139 140 results = [] 141 for profraw_file in profraw_files: 142 results.append(pool.apply_async( 143 _validate_and_convert_profraw, 144 (profraw_file, output_profdata_files, invalid_profraw_files, 145 counter_overflows, profdata_tool_path, sparse))) 146 147 pool.close() 148 pool.join() 149 150 for x in results: 151 x.get() 152 153 # Remove inputs, as they won't be needed and they can be pretty large. 154 for input_file in profraw_files: 155 os.remove(input_file) 156 157 return list(output_profdata_files), list(invalid_profraw_files), list( 158 counter_overflows) 159 160 161def _validate_and_convert_profraw(profraw_file, output_profdata_files, 162 invalid_profraw_files, counter_overflows, 163 profdata_tool_path, sparse=False): 164 output_profdata_file = profraw_file.replace('.profraw', '.profdata') 165 subprocess_cmd = [ 166 profdata_tool_path, 167 'merge', 168 '-o', 169 output_profdata_file, 170 ] 171 if sparse: 172 subprocess_cmd.append('--sparse') 173 174 subprocess_cmd.append(profraw_file) 175 logging.info('profdata command: %r', subprocess_cmd) 176 177 profile_valid = False 178 counter_overflow = False 179 validation_output = None 180 181 # 1. Determine if the profile is valid. 182 try: 183 # Redirecting stderr is required because when error happens, llvm-profdata 184 # writes the error output to stderr and our error handling logic relies on 185 # that output. 186 validation_output = subprocess.check_output( 187 subprocess_cmd, stderr=subprocess.STDOUT, encoding = 'UTF-8') 188 if 'Counter overflow' in validation_output: 189 counter_overflow = True 190 else: 191 profile_valid = True 192 except subprocess.CalledProcessError as error: 193 logging.warning('Validating and converting %r to %r failed with output: %r', 194 profraw_file, output_profdata_file, error.output) 195 validation_output = error.output 196 197 # 2. Add the profile to the appropriate list(s). 198 if profile_valid: 199 output_profdata_files.append(output_profdata_file) 200 else: 201 invalid_profraw_files.append(profraw_file) 202 if counter_overflow: 203 counter_overflows.append(profraw_file) 204 205 # 3. Log appropriate message 206 if not profile_valid: 207 template = 'Bad profile: %r, output: %r' 208 if counter_overflow: 209 template = 'Counter overflow: %r, output: %r' 210 logging.warning(template, profraw_file, validation_output) 211 212 # 4. Delete profdata for invalid profiles if present. 213 if os.path.exists(output_profdata_file): 214 # The output file may be created before llvm-profdata determines the 215 # input is invalid. Delete it so that it does not leak and affect other 216 # merge scripts. 217 os.remove(output_profdata_file) 218 219def merge_java_exec_files(input_dir, output_path, jacococli_path): 220 """Merges generated .exec files to output_path. 221 222 Args: 223 input_dir (str): The path to traverse to find input files. 224 output_path (str): Where to write the merged .exec file. 225 jacococli_path: The path to jacococli.jar. 226 227 Raises: 228 CalledProcessError: merge command failed. 229 """ 230 exec_input_file_paths = _get_profile_paths(input_dir, '.exec') 231 if not exec_input_file_paths: 232 logging.info('No exec file found under %s', input_dir) 233 return 234 235 cmd = [_JAVA_PATH, '-jar', jacococli_path, 'merge'] 236 cmd.extend(exec_input_file_paths) 237 cmd.extend(['--destfile', output_path]) 238 subprocess.check_call(cmd, stderr=subprocess.STDOUT) 239 240 241def merge_profiles(input_dir, 242 output_file, 243 input_extension, 244 profdata_tool_path, 245 input_filename_pattern='.*', 246 sparse=False, 247 skip_validation=False, 248 merge_timeout=3600): 249 """Merges the profiles produced by the shards using llvm-profdata. 250 251 Args: 252 input_dir (str): The path to traverse to find input profiles. 253 output_file (str): Where to write the merged profile. 254 input_extension (str): File extension to look for in the input_dir. 255 e.g. '.profdata' or '.profraw' 256 profdata_tool_path: The path to the llvm-profdata executable. 257 input_filename_pattern (str): The regex pattern of input filename. Should be 258 a valid regex pattern if present. 259 sparse (bool): flag to indicate whether to run llvm-profdata with --sparse. 260 Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge 261 skip_validation (bool): flag to skip the _validate_and_convert_profraws 262 invocation. only applicable when input_extension is .profraw. 263 merge_timeout (int): timeout (sec) for the call to merge profiles. This 264 should not take > 1 hr, and so defaults to 3600 seconds. 265 266 Returns: 267 The list of profiles that had to be excluded to get the merge to 268 succeed and a list of profiles that had a counter overflow. 269 """ 270 profile_input_file_paths = _get_profile_paths(input_dir, 271 input_extension, 272 input_filename_pattern) 273 invalid_profraw_files = [] 274 counter_overflows = [] 275 276 if skip_validation: 277 logging.warning('--skip-validation has been enabled. Skipping conversion ' 278 'to ensure that profiles are valid.') 279 280 if input_extension == '.profraw' and not skip_validation: 281 profile_input_file_paths, invalid_profraw_files, counter_overflows = ( 282 _validate_and_convert_profraws(profile_input_file_paths, 283 profdata_tool_path, 284 sparse=sparse)) 285 logging.info(( 286 'List of invalid .profraw files that failed to validate and convert: %r' 287 ), invalid_profraw_files) 288 289 if counter_overflows: 290 logging.warning('There were %d profiles with counter overflows', 291 len(counter_overflows)) 292 293 # The list of input files could be empty in the following scenarios: 294 # 1. The test target is pure Python scripts test which doesn't execute any 295 # C/C++ binaries, such as devtools_type_check. 296 # 2. The test target executes binary and does dumps coverage profile data 297 # files, however, all of them turned out to be invalid. 298 if not profile_input_file_paths: 299 logging.info('There is no valid profraw/profdata files to merge, skip ' 300 'invoking profdata tools.') 301 return invalid_profraw_files, counter_overflows 302 303 _call_profdata_tool( 304 profile_input_file_paths=profile_input_file_paths, 305 profile_output_file_path=output_file, 306 profdata_tool_path=profdata_tool_path, 307 sparse=sparse, 308 timeout=merge_timeout) 309 310 # Remove inputs when merging profraws as they won't be needed and they can be 311 # pretty large. If the inputs are profdata files, do not remove them as they 312 # might be used again for multiple test types coverage. 313 if input_extension == '.profraw': 314 for input_file in profile_input_file_paths: 315 os.remove(input_file) 316 317 return invalid_profraw_files, counter_overflows 318 319# We want to retry shards that contain one or more profiles that cannot be 320# merged (typically due to corruption described in crbug.com/937521). 321def get_shards_to_retry(bad_profiles): 322 bad_shard_ids = set() 323 324 def is_task_id(s): 325 # Swarming task ids are 16 hex chars. The pythonic way to validate this is 326 # to cast to int and catch a value error. 327 try: 328 assert len(s) == 16, 'Swarming task IDs are expected be of length 16' 329 _int_id = int(s, 16) 330 return True 331 except (AssertionError, ValueError): 332 return False 333 334 for profile in bad_profiles: 335 # E.g. /b/s/w/ir/tmp/t/tmpSvBRii/44b643576cf39f10/profraw/default-1.profraw 336 _base_path, task_id, _profraw, _filename = os.path.normpath(profile).rsplit( 337 os.path.sep, 3) 338 # Since we are getting a task_id from a file path, which is less than ideal, 339 # do some checking to at least verify that the snippet looks like a valid 340 # task id. 341 assert is_task_id(task_id) 342 bad_shard_ids.add(task_id) 343 return bad_shard_ids 344