1*9c5db199SXin Li#!/usr/bin/env python3 2*9c5db199SXin Li 3*9c5db199SXin Li# Copyright 2016 The Chromium OS Authors. All rights reserved. 4*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be 5*9c5db199SXin Li# found in the LICENSE file. 6*9c5db199SXin Li 7*9c5db199SXin Li""" 8*9c5db199SXin LiThis module is used to upload csv files generated by performance related tests 9*9c5db199SXin Lito cns. More details about the implementation can be found in crbug.com/598504. 10*9c5db199SXin Li 11*9c5db199SXin LiThe overall work flow is as follows. 12*9c5db199SXin Li1. Query tko_test_attributes table for perf_csv_folder attribute. The attribute 13*9c5db199SXin Licontains a path to csv files need to be uploaded to cns. 14*9c5db199SXin Li2. Filter the perf_csv_folder attributes only for test jobs have finished an 15*9c5db199SXin Lihour before. This is to make sure the results have already being uploaded to GS. 16*9c5db199SXin Li3. Locate the csv files in GS, and upload them to desired cns location. 17*9c5db199SXin Li 18*9c5db199SXin LiAfter every run, the script saves the maximum test idx to a local file, and 19*9c5db199SXin Lirepeats the workflow. 20*9c5db199SXin Li 21*9c5db199SXin Li""" 22*9c5db199SXin Li 23*9c5db199SXin Liimport argparse 24*9c5db199SXin Liimport datetime 25*9c5db199SXin Liimport logging 26*9c5db199SXin Liimport os 27*9c5db199SXin Liimport shutil 28*9c5db199SXin Liimport tempfile 29*9c5db199SXin Liimport time 30*9c5db199SXin Li 31*9c5db199SXin Liimport common 32*9c5db199SXin Lifrom autotest_lib.client.bin import utils 33*9c5db199SXin Lifrom autotest_lib.client.common_lib import logging_config 34*9c5db199SXin Lifrom autotest_lib.client.common_lib.cros import retry 35*9c5db199SXin Lifrom autotest_lib.frontend import setup_django_environment 36*9c5db199SXin Lifrom autotest_lib.frontend.tko import models as tko_models 37*9c5db199SXin Li 38*9c5db199SXin Li 39*9c5db199SXin Li# Number of hours that a test has to be finished for the script to process. 40*9c5db199SXin Li# This allows gs_offloader to have enough time to upload the results to GS. 41*9c5db199SXin LiCUTOFF_TIME_HOURS = 1 42*9c5db199SXin Li 43*9c5db199SXin Li# Default wait time in seconds after each run. 44*9c5db199SXin LiDEFAULT_INTERVAL_SEC = 60 45*9c5db199SXin Li 46*9c5db199SXin Li# Timeout in minutes for upload attempts for a given folder. 47*9c5db199SXin LiUPLOAD_TIMEOUT_MINS = 5 48*9c5db199SXin Li 49*9c5db199SXin Liclass CsvNonexistenceException(Exception): 50*9c5db199SXin Li """Exception raised when csv files not found in GS.""" 51*9c5db199SXin Li 52*9c5db199SXin Li 53*9c5db199SXin Liclass CsvFolder(object): 54*9c5db199SXin Li """A class contains the information of a folder storing csv files to be 55*9c5db199SXin Li uploaded, and logic to upload the csv files. 56*9c5db199SXin Li """ 57*9c5db199SXin Li 58*9c5db199SXin Li # A class variable whose value is the GoogleStorage path to the test 59*9c5db199SXin Li # results. 60*9c5db199SXin Li gs_path = None 61*9c5db199SXin Li 62*9c5db199SXin Li # A class variable whose value is the cns path to upload the csv files to. 63*9c5db199SXin Li cns_path = None 64*9c5db199SXin Li 65*9c5db199SXin Li def __init__(self, test_attribute_id, perf_csv_folder, test_view): 66*9c5db199SXin Li """Initialize a CsvFolder object. 67*9c5db199SXin Li 68*9c5db199SXin Li @param test_attribute_id: ID of test attribute record. 69*9c5db199SXin Li @param perf_csv_folder: Path of the folder contains csv files in test 70*9c5db199SXin Li results. It's the value of perf_csv_folder attribute from 71*9c5db199SXin Li tko_test_attributes table. 72*9c5db199SXin Li @param test_view: A db object from querying tko_test_view_2 for the 73*9c5db199SXin Li related tko_test_attributes. 74*9c5db199SXin Li """ 75*9c5db199SXin Li self.test_attribute_id = test_attribute_id 76*9c5db199SXin Li self.perf_csv_folder = perf_csv_folder 77*9c5db199SXin Li self.test_view = test_view 78*9c5db199SXin Li 79*9c5db199SXin Li 80*9c5db199SXin Li def __str__(self): 81*9c5db199SXin Li return '%s:%s:%s' % (self.test_view.job_name, self.test_view.job_tag, 82*9c5db199SXin Li self.perf_csv_folder) 83*9c5db199SXin Li 84*9c5db199SXin Li 85*9c5db199SXin Li def _get_url(self): 86*9c5db199SXin Li """Get the url to the folder storing csv files in GS. 87*9c5db199SXin Li 88*9c5db199SXin Li The url can be formulated based on csv folder, test_name and hostname. 89*9c5db199SXin Li For example: 90*9c5db199SXin Li gs://chromeos-autotest-results/123-chromeos-test/host1/ 91*9c5db199SXin Li gsutil is used to download the csv files with this gs url. 92*9c5db199SXin Li """ 93*9c5db199SXin Li return os.path.join(self.gs_path, self.test_view.job_tag) 94*9c5db199SXin Li 95*9c5db199SXin Li 96*9c5db199SXin Li def _download(self, dest_dir): 97*9c5db199SXin Li """Download the folder containing csv files to the given dest_dir. 98*9c5db199SXin Li 99*9c5db199SXin Li @param dest_dir: A directory to store the downloaded csv files. 100*9c5db199SXin Li 101*9c5db199SXin Li @return: A list of strings, each is a path to a csv file in the 102*9c5db199SXin Li downloaded folder. 103*9c5db199SXin Li @raise CsvNonexistenceException: If no csv file found in the GS. 104*9c5db199SXin Li """ 105*9c5db199SXin Li gs_url = self._get_url() 106*9c5db199SXin Li # Find all csv files in given GS url recursively 107*9c5db199SXin Li files = utils.run('gsutil ls -r %s | grep -e .*\\\\.csv$' % 108*9c5db199SXin Li gs_url, ignore_status=True).stdout.strip().split('\n') 109*9c5db199SXin Li if not files or files == ['']: 110*9c5db199SXin Li raise CsvNonexistenceException('No csv file found in %s', gs_url) 111*9c5db199SXin Li 112*9c5db199SXin Li # Copy files from GS to temp_dir 113*9c5db199SXin Li for f in files: 114*9c5db199SXin Li utils.run('gsutil cp %s %s' % (f, dest_dir)) 115*9c5db199SXin Li 116*9c5db199SXin Li 117*9c5db199SXin Li @retry.retry(Exception, raiselist=[CsvNonexistenceException], 118*9c5db199SXin Li timeout_min=UPLOAD_TIMEOUT_MINS) 119*9c5db199SXin Li def upload(self): 120*9c5db199SXin Li """Upload the folder to cns. 121*9c5db199SXin Li """ 122*9c5db199SXin Li temp_dir = tempfile.mkdtemp(suffix='perf_csv') 123*9c5db199SXin Li try: 124*9c5db199SXin Li self._download(temp_dir) 125*9c5db199SXin Li files = os.listdir(temp_dir) 126*9c5db199SXin Li # File in cns is stored under folder with format of: 127*9c5db199SXin Li # <test_name>/<host_name>/YYYY/mm/dd/hh/mm 128*9c5db199SXin Li path_in_cns = os.path.join( 129*9c5db199SXin Li self.cns_path, 130*9c5db199SXin Li self.test_view.test_name, self.test_view.hostname, 131*9c5db199SXin Li str(self.test_view.job_finished_time.year), 132*9c5db199SXin Li str(self.test_view.job_finished_time.month).zfill(2), 133*9c5db199SXin Li str(self.test_view.job_finished_time.day).zfill(2), 134*9c5db199SXin Li str(self.test_view.job_finished_time.hour).zfill(2), 135*9c5db199SXin Li str(self.test_view.job_finished_time.minute).zfill(2)) 136*9c5db199SXin Li utils.run('fileutil mkdir -p %s' % path_in_cns) 137*9c5db199SXin Li for f in files: 138*9c5db199SXin Li utils.run('fileutil copytodir -f %s %s' % 139*9c5db199SXin Li (os.path.join(temp_dir, f), path_in_cns)) 140*9c5db199SXin Li finally: 141*9c5db199SXin Li shutil.rmtree(temp_dir) 142*9c5db199SXin Li 143*9c5db199SXin Li 144*9c5db199SXin Liclass DBScanner(object): 145*9c5db199SXin Li """Class contains the logic to query tko_test_attributes table for 146*9c5db199SXin Li new perf_csv_folder attributes and create CsvFolder object for each 147*9c5db199SXin Li new perf_csv_folder attribute. 148*9c5db199SXin Li """ 149*9c5db199SXin Li 150*9c5db199SXin Li # Minimum test_attribute id for querying tko_test_attributes table. 151*9c5db199SXin Li min_test_attribute_id = -1 152*9c5db199SXin Li 153*9c5db199SXin Li @classmethod 154*9c5db199SXin Li def get_perf_csv_folders(cls): 155*9c5db199SXin Li """Query tko_test_attributes table for new entries of perf_csv_folder. 156*9c5db199SXin Li 157*9c5db199SXin Li @return: A list of CsvFolder objects for each new entry of 158*9c5db199SXin Li perf_csv_folder attribute in tko_test_attributes table. 159*9c5db199SXin Li """ 160*9c5db199SXin Li attributes = tko_models.TestAttribute.objects.filter( 161*9c5db199SXin Li attribute='perf_csv_folder', id__gte=cls.min_test_attribute_id) 162*9c5db199SXin Li folders = [] 163*9c5db199SXin Li 164*9c5db199SXin Li cutoff_time = (datetime.datetime.now() - 165*9c5db199SXin Li datetime.timedelta(hours=CUTOFF_TIME_HOURS)) 166*9c5db199SXin Li for attribute in attributes: 167*9c5db199SXin Li test_views = tko_models.TestView.objects.filter( 168*9c5db199SXin Li test_idx=attribute.test_id) 169*9c5db199SXin Li if test_views[0].job_finished_time > cutoff_time: 170*9c5db199SXin Li continue 171*9c5db199SXin Li folders.append(CsvFolder(attribute.id, attribute.value, 172*9c5db199SXin Li test_views[0])) 173*9c5db199SXin Li return folders 174*9c5db199SXin Li 175*9c5db199SXin Li 176*9c5db199SXin Lidef setup_logging(log_dir): 177*9c5db199SXin Li """Setup logging information. 178*9c5db199SXin Li 179*9c5db199SXin Li @param log_dir: Path to the directory storing logs of this script. 180*9c5db199SXin Li """ 181*9c5db199SXin Li config = logging_config.LoggingConfig() 182*9c5db199SXin Li logfile = os.path.join(os.path.abspath(log_dir), 'perf_csv_uploader.log') 183*9c5db199SXin Li config.add_file_handler(file_path=logfile, level=logging.DEBUG) 184*9c5db199SXin Li 185*9c5db199SXin Li 186*9c5db199SXin Lidef save_min_test_attribute_id(test_attribute_id_file): 187*9c5db199SXin Li """Save the minimum test attribute id to a cached file. 188*9c5db199SXin Li 189*9c5db199SXin Li @param test_attribute_id_file: Path to the file storing the value of 190*9c5db199SXin Li min_test_attribute_id. 191*9c5db199SXin Li """ 192*9c5db199SXin Li with open(test_attribute_id_file, 'w') as f: 193*9c5db199SXin Li return f.write(str(DBScanner.min_test_attribute_id)) 194*9c5db199SXin Li 195*9c5db199SXin Li 196*9c5db199SXin Lidef get_min_test_attribute_id(test_attribute_id_file): 197*9c5db199SXin Li """Get the minimum test attribute id from a cached file. 198*9c5db199SXin Li 199*9c5db199SXin Li @param test_attribute_id_file: Path to the file storing the value of 200*9c5db199SXin Li min_test_attribute_id. 201*9c5db199SXin Li """ 202*9c5db199SXin Li try: 203*9c5db199SXin Li with open(test_attribute_id_file, 'r') as f: 204*9c5db199SXin Li return int(f.read()) 205*9c5db199SXin Li except IOError: 206*9c5db199SXin Li # min_test_attribute_id has not been set, default to -1. 207*9c5db199SXin Li return -1 208*9c5db199SXin Li 209*9c5db199SXin Li 210*9c5db199SXin Lidef get_options(): 211*9c5db199SXin Li """Get the command line options. 212*9c5db199SXin Li 213*9c5db199SXin Li @return: Command line options of the script. 214*9c5db199SXin Li """ 215*9c5db199SXin Li parser = argparse.ArgumentParser() 216*9c5db199SXin Li parser.add_argument('--gs_path', type=str, dest='gs_path', 217*9c5db199SXin Li help='GoogleStorage path that stores test results.') 218*9c5db199SXin Li parser.add_argument('--cns_path', type=str, dest='cns_path', 219*9c5db199SXin Li help='cns path to where csv files are uploaded to.') 220*9c5db199SXin Li parser.add_argument('--log_dir', type=str, dest='log_dir', 221*9c5db199SXin Li help='Directory used to store logs.') 222*9c5db199SXin Li 223*9c5db199SXin Li options = parser.parse_args() 224*9c5db199SXin Li CsvFolder.gs_path = options.gs_path 225*9c5db199SXin Li CsvFolder.cns_path = options.cns_path 226*9c5db199SXin Li 227*9c5db199SXin Li return options 228*9c5db199SXin Li 229*9c5db199SXin Li 230*9c5db199SXin Lidef main(): 231*9c5db199SXin Li """Main process to repeat the workflow of searching/uploading csv files. 232*9c5db199SXin Li """ 233*9c5db199SXin Li options = get_options() 234*9c5db199SXin Li setup_logging(options.log_dir) 235*9c5db199SXin Li test_attribute_id_file = os.path.join(options.log_dir, 236*9c5db199SXin Li 'perf_csv_uploader_test_attr_id') 237*9c5db199SXin Li DBScanner.min_test_attribute_id = get_min_test_attribute_id( 238*9c5db199SXin Li test_attribute_id_file) 239*9c5db199SXin Li 240*9c5db199SXin Li while True: 241*9c5db199SXin Li folders = DBScanner.get_perf_csv_folders() 242*9c5db199SXin Li if not folders: 243*9c5db199SXin Li logging.info('No new folders found. Wait...') 244*9c5db199SXin Li time.sleep(DEFAULT_INTERVAL_SEC) 245*9c5db199SXin Li continue 246*9c5db199SXin Li 247*9c5db199SXin Li failed_folders = [] 248*9c5db199SXin Li for folder in folders: 249*9c5db199SXin Li try: 250*9c5db199SXin Li logging.info('Uploading folder: %s', folder) 251*9c5db199SXin Li folder.upload() 252*9c5db199SXin Li except CsvNonexistenceException: 253*9c5db199SXin Li # Ignore the failure if CSV files are not found in GS. 254*9c5db199SXin Li pass 255*9c5db199SXin Li except Exception as e: 256*9c5db199SXin Li failed_folders.append(folder) 257*9c5db199SXin Li logging.error('Failed to upload folder %s, error: %s', 258*9c5db199SXin Li folder, e) 259*9c5db199SXin Li if failed_folders: 260*9c5db199SXin Li # Set the min_test_attribute_id to be the smallest one that failed 261*9c5db199SXin Li # to upload. 262*9c5db199SXin Li min_test_attribute_id = min([folder.test_attribute_id for folder in 263*9c5db199SXin Li failed_folders]) 264*9c5db199SXin Li else: 265*9c5db199SXin Li min_test_attribute_id = max([folder.test_attribute_id for folder in 266*9c5db199SXin Li folders]) + 1 267*9c5db199SXin Li if DBScanner.min_test_attribute_id != min_test_attribute_id: 268*9c5db199SXin Li DBScanner.min_test_attribute_id = min_test_attribute_id 269*9c5db199SXin Li save_min_test_attribute_id(test_attribute_id_file) 270*9c5db199SXin Li 271*9c5db199SXin Li 272*9c5db199SXin Liif __name__ == '__main__': 273*9c5db199SXin Li main() 274