1*3f982cf4SFabien Sanglard#!/usr/bin/env python3 2*3f982cf4SFabien Sanglard# Copyright 2020 The Chromium Authors. All rights reserved. 3*3f982cf4SFabien Sanglard# Use of this source code is governed by a BSD-style license that can be 4*3f982cf4SFabien Sanglard# found in the LICENSE file. 5*3f982cf4SFabien Sanglard"""Functions for interacting with llvm-profdata 6*3f982cf4SFabien Sanglard 7*3f982cf4SFabien SanglardThis script is taken from the chromium build tools and is synced 8*3f982cf4SFabien Sanglardmanually on an as-needed basis: 9*3f982cf4SFabien Sanglardhttps://source.chromium.org/chromium/chromium/src/+/master:testing/merge_scripts/code_coverage/merge_lib.py 10*3f982cf4SFabien Sanglard""" 11*3f982cf4SFabien Sanglard 12*3f982cf4SFabien Sanglardimport logging 13*3f982cf4SFabien Sanglardimport multiprocessing 14*3f982cf4SFabien Sanglardimport os 15*3f982cf4SFabien Sanglardimport re 16*3f982cf4SFabien Sanglardimport shutil 17*3f982cf4SFabien Sanglardimport subprocess 18*3f982cf4SFabien Sanglard 19*3f982cf4SFabien Sanglard_DIR_SOURCE_ROOT = os.path.normpath( 20*3f982cf4SFabien Sanglard os.path.join(os.path.dirname(__file__), '..', '..', '..')) 21*3f982cf4SFabien Sanglard 22*3f982cf4SFabien Sanglard_JAVA_PATH = os.path.join(_DIR_SOURCE_ROOT, 'third_party', 'jdk', 'current', 23*3f982cf4SFabien Sanglard 'bin', 'java') 24*3f982cf4SFabien Sanglard 25*3f982cf4SFabien Sanglardlogging.basicConfig( 26*3f982cf4SFabien Sanglard format='[%(asctime)s %(levelname)s] %(message)s', level=logging.DEBUG) 27*3f982cf4SFabien Sanglard 28*3f982cf4SFabien Sanglard 29*3f982cf4SFabien Sanglarddef _call_profdata_tool(profile_input_file_paths, 30*3f982cf4SFabien Sanglard profile_output_file_path, 31*3f982cf4SFabien Sanglard profdata_tool_path, 32*3f982cf4SFabien Sanglard sparse=True): 33*3f982cf4SFabien Sanglard """Calls the llvm-profdata tool. 34*3f982cf4SFabien Sanglard 35*3f982cf4SFabien Sanglard Args: 36*3f982cf4SFabien Sanglard profile_input_file_paths: A list of relative paths to the files that 37*3f982cf4SFabien Sanglard are to be merged. 38*3f982cf4SFabien Sanglard profile_output_file_path: The path to the merged file to write. 39*3f982cf4SFabien Sanglard profdata_tool_path: The path to the llvm-profdata executable. 40*3f982cf4SFabien Sanglard sparse (bool): flag to indicate whether to run llvm-profdata with --sparse. 41*3f982cf4SFabien Sanglard Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge 42*3f982cf4SFabien Sanglard 43*3f982cf4SFabien Sanglard Returns: 44*3f982cf4SFabien Sanglard A list of paths to profiles that had to be excluded to get the merge to 45*3f982cf4SFabien Sanglard succeed, suspected of being corrupted or malformed. 46*3f982cf4SFabien Sanglard 47*3f982cf4SFabien Sanglard Raises: 48*3f982cf4SFabien Sanglard CalledProcessError: An error occurred merging profiles. 49*3f982cf4SFabien Sanglard """ 50*3f982cf4SFabien Sanglard logging.debug('Profile input paths: %r' % profile_input_file_paths) 51*3f982cf4SFabien Sanglard logging.debug('Profile output path: %r' % profile_output_file_path) 52*3f982cf4SFabien Sanglard try: 53*3f982cf4SFabien Sanglard subprocess_cmd = [ 54*3f982cf4SFabien Sanglard profdata_tool_path, 'merge', '-o', profile_output_file_path, 55*3f982cf4SFabien Sanglard ] 56*3f982cf4SFabien Sanglard if sparse: 57*3f982cf4SFabien Sanglard subprocess_cmd += ['-sparse=true',] 58*3f982cf4SFabien Sanglard subprocess_cmd.extend(profile_input_file_paths) 59*3f982cf4SFabien Sanglard logging.info('profdata command: %r', ' '.join(subprocess_cmd)) 60*3f982cf4SFabien Sanglard 61*3f982cf4SFabien Sanglard # Redirecting stderr is required because when error happens, llvm-profdata 62*3f982cf4SFabien Sanglard # writes the error output to stderr and our error handling logic relies on 63*3f982cf4SFabien Sanglard # that output. 64*3f982cf4SFabien Sanglard output = subprocess.check_output(subprocess_cmd, stderr=subprocess.STDOUT) 65*3f982cf4SFabien Sanglard logging.info('Merge succeeded with output: %r', output) 66*3f982cf4SFabien Sanglard except subprocess.CalledProcessError as error: 67*3f982cf4SFabien Sanglard logging.error('Failed to merge profiles, return code (%d), output: %r' % 68*3f982cf4SFabien Sanglard (error.returncode, error.output)) 69*3f982cf4SFabien Sanglard raise error 70*3f982cf4SFabien Sanglard 71*3f982cf4SFabien Sanglard logging.info('Profile data is created as: "%r".', profile_output_file_path) 72*3f982cf4SFabien Sanglard return [] 73*3f982cf4SFabien Sanglard 74*3f982cf4SFabien Sanglard 75*3f982cf4SFabien Sanglarddef _get_profile_paths(input_dir, 76*3f982cf4SFabien Sanglard input_extension, 77*3f982cf4SFabien Sanglard input_filename_pattern='.*'): 78*3f982cf4SFabien Sanglard """Finds all the profiles in the given directory (recursively).""" 79*3f982cf4SFabien Sanglard paths = [] 80*3f982cf4SFabien Sanglard for dir_path, _sub_dirs, file_names in os.walk(input_dir): 81*3f982cf4SFabien Sanglard paths.extend([ 82*3f982cf4SFabien Sanglard os.path.join(dir_path, fn) 83*3f982cf4SFabien Sanglard for fn in file_names 84*3f982cf4SFabien Sanglard if fn.endswith(input_extension) and re.search(input_filename_pattern,fn) 85*3f982cf4SFabien Sanglard ]) 86*3f982cf4SFabien Sanglard return paths 87*3f982cf4SFabien Sanglard 88*3f982cf4SFabien Sanglard 89*3f982cf4SFabien Sanglarddef _validate_and_convert_profraws(profraw_files, 90*3f982cf4SFabien Sanglard profdata_tool_path, 91*3f982cf4SFabien Sanglard sparse=True): 92*3f982cf4SFabien Sanglard """Validates and converts profraws to profdatas. 93*3f982cf4SFabien Sanglard 94*3f982cf4SFabien Sanglard For each given .profraw file in the input, this method first validates it by 95*3f982cf4SFabien Sanglard trying to convert it to an indexed .profdata file, and if the validation and 96*3f982cf4SFabien Sanglard conversion succeeds, the generated .profdata file will be included in the 97*3f982cf4SFabien Sanglard output, otherwise, won't. 98*3f982cf4SFabien Sanglard 99*3f982cf4SFabien Sanglard This method is mainly used to filter out invalid profraw files. 100*3f982cf4SFabien Sanglard 101*3f982cf4SFabien Sanglard Args: 102*3f982cf4SFabien Sanglard profraw_files: A list of .profraw paths. 103*3f982cf4SFabien Sanglard profdata_tool_path: The path to the llvm-profdata executable. 104*3f982cf4SFabien Sanglard sparse (bool): flag to indicate whether to run llvm-profdata with --sparse. 105*3f982cf4SFabien Sanglard Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge 106*3f982cf4SFabien Sanglard 107*3f982cf4SFabien Sanglard Returns: 108*3f982cf4SFabien Sanglard A tulple: 109*3f982cf4SFabien Sanglard A list of converted .profdata files of *valid* profraw files. 110*3f982cf4SFabien Sanglard A list of *invalid* profraw files. 111*3f982cf4SFabien Sanglard A list of profraw files that have counter overflows. 112*3f982cf4SFabien Sanglard """ 113*3f982cf4SFabien Sanglard for profraw_file in profraw_files: 114*3f982cf4SFabien Sanglard if not profraw_file.endswith('.profraw'): 115*3f982cf4SFabien Sanglard raise RuntimeError('%r is expected to be a .profraw file.' % profraw_file) 116*3f982cf4SFabien Sanglard 117*3f982cf4SFabien Sanglard cpu_count = multiprocessing.cpu_count() 118*3f982cf4SFabien Sanglard counts = max(10, cpu_count - 5) # Use 10+ processes, but leave 5 cpu cores. 119*3f982cf4SFabien Sanglard pool = multiprocessing.Pool(counts) 120*3f982cf4SFabien Sanglard output_profdata_files = multiprocessing.Manager().list() 121*3f982cf4SFabien Sanglard invalid_profraw_files = multiprocessing.Manager().list() 122*3f982cf4SFabien Sanglard counter_overflows = multiprocessing.Manager().list() 123*3f982cf4SFabien Sanglard 124*3f982cf4SFabien Sanglard for profraw_file in profraw_files: 125*3f982cf4SFabien Sanglard logging.info('Converting profraw file: %r', profraw_file) 126*3f982cf4SFabien Sanglard pool.apply_async( 127*3f982cf4SFabien Sanglard _validate_and_convert_profraw, 128*3f982cf4SFabien Sanglard (profraw_file, output_profdata_files, invalid_profraw_files, 129*3f982cf4SFabien Sanglard counter_overflows, profdata_tool_path, sparse)) 130*3f982cf4SFabien Sanglard 131*3f982cf4SFabien Sanglard pool.close() 132*3f982cf4SFabien Sanglard pool.join() 133*3f982cf4SFabien Sanglard 134*3f982cf4SFabien Sanglard # Remove inputs, as they won't be needed and they can be pretty large. 135*3f982cf4SFabien Sanglard for input_file in profraw_files: 136*3f982cf4SFabien Sanglard os.remove(input_file) 137*3f982cf4SFabien Sanglard 138*3f982cf4SFabien Sanglard return list(output_profdata_files), list(invalid_profraw_files), list( 139*3f982cf4SFabien Sanglard counter_overflows) 140*3f982cf4SFabien Sanglard 141*3f982cf4SFabien Sanglard 142*3f982cf4SFabien Sanglarddef _validate_and_convert_profraw(profraw_file, output_profdata_files, 143*3f982cf4SFabien Sanglard invalid_profraw_files, counter_overflows, 144*3f982cf4SFabien Sanglard profdata_tool_path, sparse=True): 145*3f982cf4SFabien Sanglard output_profdata_file = profraw_file.replace('.profraw', '.profdata') 146*3f982cf4SFabien Sanglard subprocess_cmd = [ 147*3f982cf4SFabien Sanglard profdata_tool_path, 148*3f982cf4SFabien Sanglard 'merge', 149*3f982cf4SFabien Sanglard '-o', 150*3f982cf4SFabien Sanglard output_profdata_file, 151*3f982cf4SFabien Sanglard ] 152*3f982cf4SFabien Sanglard if sparse: 153*3f982cf4SFabien Sanglard subprocess_cmd.append('--sparse') 154*3f982cf4SFabien Sanglard 155*3f982cf4SFabien Sanglard subprocess_cmd.append(profraw_file) 156*3f982cf4SFabien Sanglard 157*3f982cf4SFabien Sanglard profile_valid = False 158*3f982cf4SFabien Sanglard counter_overflow = False 159*3f982cf4SFabien Sanglard validation_output = None 160*3f982cf4SFabien Sanglard 161*3f982cf4SFabien Sanglard logging.info('profdata command: %r', ' '.join(subprocess_cmd)) 162*3f982cf4SFabien Sanglard 163*3f982cf4SFabien Sanglard # 1. Determine if the profile is valid. 164*3f982cf4SFabien Sanglard try: 165*3f982cf4SFabien Sanglard # Redirecting stderr is required because when error happens, llvm-profdata 166*3f982cf4SFabien Sanglard # writes the error output to stderr and our error handling logic relies on 167*3f982cf4SFabien Sanglard # that output. 168*3f982cf4SFabien Sanglard logging.info('Converting %r to %r', profraw_file, output_profdata_file) 169*3f982cf4SFabien Sanglard validation_output = subprocess.check_output( 170*3f982cf4SFabien Sanglard subprocess_cmd, stderr=subprocess.STDOUT) 171*3f982cf4SFabien Sanglard logging.info('Validating and converting %r to %r succeeded with output: %r', 172*3f982cf4SFabien Sanglard profraw_file, output_profdata_file, validation_output) 173*3f982cf4SFabien Sanglard if 'Counter overflow' in validation_output: 174*3f982cf4SFabien Sanglard counter_overflow = True 175*3f982cf4SFabien Sanglard else: 176*3f982cf4SFabien Sanglard profile_valid = True 177*3f982cf4SFabien Sanglard except subprocess.CalledProcessError as error: 178*3f982cf4SFabien Sanglard logging.warning('Validating and converting %r to %r failed with output: %r', 179*3f982cf4SFabien Sanglard profraw_file, output_profdata_file, error.output) 180*3f982cf4SFabien Sanglard validation_output = error.output 181*3f982cf4SFabien Sanglard 182*3f982cf4SFabien Sanglard # 2. Add the profile to the appropriate list(s). 183*3f982cf4SFabien Sanglard if profile_valid: 184*3f982cf4SFabien Sanglard output_profdata_files.append(output_profdata_file) 185*3f982cf4SFabien Sanglard else: 186*3f982cf4SFabien Sanglard invalid_profraw_files.append(profraw_file) 187*3f982cf4SFabien Sanglard if counter_overflow: 188*3f982cf4SFabien Sanglard counter_overflows.append(profraw_file) 189*3f982cf4SFabien Sanglard 190*3f982cf4SFabien Sanglard # 3. Log appropriate message 191*3f982cf4SFabien Sanglard if not profile_valid: 192*3f982cf4SFabien Sanglard template = 'Bad profile: %r, output: %r' 193*3f982cf4SFabien Sanglard if counter_overflow: 194*3f982cf4SFabien Sanglard template = 'Counter overflow: %r, output: %r' 195*3f982cf4SFabien Sanglard logging.warning(template, profraw_file, validation_output) 196*3f982cf4SFabien Sanglard 197*3f982cf4SFabien Sanglard # 4. Delete profdata for invalid profiles if present. 198*3f982cf4SFabien Sanglard if os.path.exists(output_profdata_file): 199*3f982cf4SFabien Sanglard # The output file may be created before llvm-profdata determines the 200*3f982cf4SFabien Sanglard # input is invalid. Delete it so that it does not leak and affect other 201*3f982cf4SFabien Sanglard # merge scripts. 202*3f982cf4SFabien Sanglard os.remove(output_profdata_file) 203*3f982cf4SFabien Sanglard 204*3f982cf4SFabien Sanglarddef merge_java_exec_files(input_dir, output_path, jacococli_path): 205*3f982cf4SFabien Sanglard """Merges generated .exec files to output_path. 206*3f982cf4SFabien Sanglard 207*3f982cf4SFabien Sanglard Args: 208*3f982cf4SFabien Sanglard input_dir (str): The path to traverse to find input files. 209*3f982cf4SFabien Sanglard output_path (str): Where to write the merged .exec file. 210*3f982cf4SFabien Sanglard jacococli_path: The path to jacococli.jar. 211*3f982cf4SFabien Sanglard 212*3f982cf4SFabien Sanglard Raises: 213*3f982cf4SFabien Sanglard CalledProcessError: merge command failed. 214*3f982cf4SFabien Sanglard """ 215*3f982cf4SFabien Sanglard exec_input_file_paths = _get_profile_paths(input_dir, '.exec') 216*3f982cf4SFabien Sanglard if not exec_input_file_paths: 217*3f982cf4SFabien Sanglard logging.info('No exec file found under %s', input_dir) 218*3f982cf4SFabien Sanglard return 219*3f982cf4SFabien Sanglard 220*3f982cf4SFabien Sanglard cmd = [_JAVA_PATH, '-jar', jacococli_path, 'merge'] 221*3f982cf4SFabien Sanglard cmd.extend(exec_input_file_paths) 222*3f982cf4SFabien Sanglard cmd.extend(['--destfile', output_path]) 223*3f982cf4SFabien Sanglard output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) 224*3f982cf4SFabien Sanglard logging.info('Merge succeeded with output: %r', output) 225*3f982cf4SFabien Sanglard 226*3f982cf4SFabien Sanglard 227*3f982cf4SFabien Sanglarddef merge_profiles(input_dir, 228*3f982cf4SFabien Sanglard output_file, 229*3f982cf4SFabien Sanglard input_extension, 230*3f982cf4SFabien Sanglard profdata_tool_path, 231*3f982cf4SFabien Sanglard input_filename_pattern='.*', 232*3f982cf4SFabien Sanglard sparse=True, 233*3f982cf4SFabien Sanglard skip_validation=False): 234*3f982cf4SFabien Sanglard """Merges the profiles produced by the shards using llvm-profdata. 235*3f982cf4SFabien Sanglard 236*3f982cf4SFabien Sanglard Args: 237*3f982cf4SFabien Sanglard input_dir (str): The path to traverse to find input profiles. 238*3f982cf4SFabien Sanglard output_file (str): Where to write the merged profile. 239*3f982cf4SFabien Sanglard input_extension (str): File extension to look for in the input_dir. 240*3f982cf4SFabien Sanglard e.g. '.profdata' or '.profraw' 241*3f982cf4SFabien Sanglard profdata_tool_path: The path to the llvm-profdata executable. 242*3f982cf4SFabien Sanglard input_filename_pattern (str): The regex pattern of input filename. Should be 243*3f982cf4SFabien Sanglard a valid regex pattern if present. 244*3f982cf4SFabien Sanglard sparse (bool): flag to indicate whether to run llvm-profdata with --sparse. 245*3f982cf4SFabien Sanglard Doc: https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge 246*3f982cf4SFabien Sanglard skip_validation (bool): flag to skip the _validate_and_convert_profraws 247*3f982cf4SFabien Sanglard invocation. only applicable when input_extension is .profraw. 248*3f982cf4SFabien Sanglard 249*3f982cf4SFabien Sanglard Returns: 250*3f982cf4SFabien Sanglard The list of profiles that had to be excluded to get the merge to 251*3f982cf4SFabien Sanglard succeed and a list of profiles that had a counter overflow. 252*3f982cf4SFabien Sanglard """ 253*3f982cf4SFabien Sanglard profile_input_file_paths = _get_profile_paths(input_dir, 254*3f982cf4SFabien Sanglard input_extension, 255*3f982cf4SFabien Sanglard input_filename_pattern) 256*3f982cf4SFabien Sanglard invalid_profraw_files = [] 257*3f982cf4SFabien Sanglard counter_overflows = [] 258*3f982cf4SFabien Sanglard 259*3f982cf4SFabien Sanglard if skip_validation: 260*3f982cf4SFabien Sanglard logging.warning('--skip-validation has been enabled. Skipping conversion ' 261*3f982cf4SFabien Sanglard 'to ensure that profiles are valid.') 262*3f982cf4SFabien Sanglard 263*3f982cf4SFabien Sanglard if input_extension == '.profraw' and not skip_validation: 264*3f982cf4SFabien Sanglard profile_input_file_paths, invalid_profraw_files, counter_overflows = ( 265*3f982cf4SFabien Sanglard _validate_and_convert_profraws(profile_input_file_paths, 266*3f982cf4SFabien Sanglard profdata_tool_path, 267*3f982cf4SFabien Sanglard sparse=sparse)) 268*3f982cf4SFabien Sanglard logging.info('List of converted .profdata files: %r', 269*3f982cf4SFabien Sanglard profile_input_file_paths) 270*3f982cf4SFabien Sanglard logging.info(( 271*3f982cf4SFabien Sanglard 'List of invalid .profraw files that failed to validate and convert: %r' 272*3f982cf4SFabien Sanglard ), invalid_profraw_files) 273*3f982cf4SFabien Sanglard 274*3f982cf4SFabien Sanglard if counter_overflows: 275*3f982cf4SFabien Sanglard logging.warning('There were %d profiles with counter overflows', 276*3f982cf4SFabien Sanglard len(counter_overflows)) 277*3f982cf4SFabien Sanglard 278*3f982cf4SFabien Sanglard # The list of input files could be empty in the following scenarios: 279*3f982cf4SFabien Sanglard # 1. The test target is pure Python scripts test which doesn't execute any 280*3f982cf4SFabien Sanglard # C/C++ binaries, such as devtools_type_check. 281*3f982cf4SFabien Sanglard # 2. The test target executes binary and does dumps coverage profile data 282*3f982cf4SFabien Sanglard # files, however, all of them turned out to be invalid. 283*3f982cf4SFabien Sanglard if not profile_input_file_paths: 284*3f982cf4SFabien Sanglard logging.info('There is no valid profraw/profdata files to merge, skip ' 285*3f982cf4SFabien Sanglard 'invoking profdata tools.') 286*3f982cf4SFabien Sanglard return invalid_profraw_files, counter_overflows 287*3f982cf4SFabien Sanglard 288*3f982cf4SFabien Sanglard invalid_profdata_files = _call_profdata_tool( 289*3f982cf4SFabien Sanglard profile_input_file_paths=profile_input_file_paths, 290*3f982cf4SFabien Sanglard profile_output_file_path=output_file, 291*3f982cf4SFabien Sanglard profdata_tool_path=profdata_tool_path, 292*3f982cf4SFabien Sanglard sparse=sparse) 293*3f982cf4SFabien Sanglard 294*3f982cf4SFabien Sanglard # Remove inputs when merging profraws as they won't be needed and they can be 295*3f982cf4SFabien Sanglard # pretty large. If the inputs are profdata files, do not remove them as they 296*3f982cf4SFabien Sanglard # might be used again for multiple test types coverage. 297*3f982cf4SFabien Sanglard if input_extension == '.profraw': 298*3f982cf4SFabien Sanglard for input_file in profile_input_file_paths: 299*3f982cf4SFabien Sanglard os.remove(input_file) 300*3f982cf4SFabien Sanglard 301*3f982cf4SFabien Sanglard return invalid_profraw_files + invalid_profdata_files, counter_overflows 302*3f982cf4SFabien Sanglard 303*3f982cf4SFabien Sanglard# We want to retry shards that contain one or more profiles that cannot be 304*3f982cf4SFabien Sanglard# merged (typically due to corruption described in crbug.com/937521). 305*3f982cf4SFabien Sanglarddef get_shards_to_retry(bad_profiles): 306*3f982cf4SFabien Sanglard bad_shard_ids = set() 307*3f982cf4SFabien Sanglard 308*3f982cf4SFabien Sanglard def is_task_id(s): 309*3f982cf4SFabien Sanglard # Swarming task ids are 16 hex chars. The pythonic way to validate this is 310*3f982cf4SFabien Sanglard # to cast to int and catch a value error. 311*3f982cf4SFabien Sanglard try: 312*3f982cf4SFabien Sanglard assert len(s) == 16, 'Swarming task IDs are expected be of length 16' 313*3f982cf4SFabien Sanglard _int_id = int(s, 16) 314*3f982cf4SFabien Sanglard return True 315*3f982cf4SFabien Sanglard except (AssertionError, ValueError): 316*3f982cf4SFabien Sanglard return False 317*3f982cf4SFabien Sanglard 318*3f982cf4SFabien Sanglard for profile in bad_profiles: 319*3f982cf4SFabien Sanglard # E.g. /b/s/w/ir/tmp/t/tmpSvBRii/44b643576cf39f10/profraw/default-1.profraw 320*3f982cf4SFabien Sanglard _base_path, task_id, _profraw, _filename = os.path.normpath(profile).rsplit( 321*3f982cf4SFabien Sanglard os.path.sep, 3) 322*3f982cf4SFabien Sanglard # Since we are getting a task_id from a file path, which is less than ideal, 323*3f982cf4SFabien Sanglard # do some checking to at least verify that the snippet looks like a valid 324*3f982cf4SFabien Sanglard # task id. 325*3f982cf4SFabien Sanglard assert is_task_id(task_id) 326*3f982cf4SFabien Sanglard bad_shard_ids.add(task_id) 327*3f982cf4SFabien Sanglard return bad_shard_ids 328