xref: /aosp_15_r20/external/autotest/site_utils/perf_csv_uploader.py (revision 9c5db1993ded3edbeafc8092d69fe5de2ee02df7)
1*9c5db199SXin Li#!/usr/bin/env python3
2*9c5db199SXin Li
3*9c5db199SXin Li# Copyright 2016 The Chromium OS Authors. All rights reserved.
4*9c5db199SXin Li# Use of this source code is governed by a BSD-style license that can be
5*9c5db199SXin Li# found in the LICENSE file.
6*9c5db199SXin Li
7*9c5db199SXin Li"""
8*9c5db199SXin LiThis module is used to upload csv files generated by performance related tests
9*9c5db199SXin Lito cns. More details about the implementation can be found in crbug.com/598504.
10*9c5db199SXin Li
11*9c5db199SXin LiThe overall work flow is as follows.
12*9c5db199SXin Li1. Query tko_test_attributes table for perf_csv_folder attribute. The attribute
13*9c5db199SXin Licontains a path to csv files need to be uploaded to cns.
14*9c5db199SXin Li2. Filter the perf_csv_folder attributes only for test jobs have finished an
15*9c5db199SXin Lihour before. This is to make sure the results have already being uploaded to GS.
16*9c5db199SXin Li3. Locate the csv files in GS, and upload them to desired cns location.
17*9c5db199SXin Li
18*9c5db199SXin LiAfter every run, the script saves the maximum test idx to a local file, and
19*9c5db199SXin Lirepeats the workflow.
20*9c5db199SXin Li
21*9c5db199SXin Li"""
22*9c5db199SXin Li
23*9c5db199SXin Liimport argparse
24*9c5db199SXin Liimport datetime
25*9c5db199SXin Liimport logging
26*9c5db199SXin Liimport os
27*9c5db199SXin Liimport shutil
28*9c5db199SXin Liimport tempfile
29*9c5db199SXin Liimport time
30*9c5db199SXin Li
31*9c5db199SXin Liimport common
32*9c5db199SXin Lifrom autotest_lib.client.bin import utils
33*9c5db199SXin Lifrom autotest_lib.client.common_lib import logging_config
34*9c5db199SXin Lifrom autotest_lib.client.common_lib.cros import retry
35*9c5db199SXin Lifrom autotest_lib.frontend import setup_django_environment
36*9c5db199SXin Lifrom autotest_lib.frontend.tko import models as tko_models
37*9c5db199SXin Li
38*9c5db199SXin Li
39*9c5db199SXin Li# Number of hours that a test has to be finished for the script to process.
40*9c5db199SXin Li# This allows gs_offloader to have enough time to upload the results to GS.
41*9c5db199SXin LiCUTOFF_TIME_HOURS = 1
42*9c5db199SXin Li
43*9c5db199SXin Li# Default wait time in seconds after each run.
44*9c5db199SXin LiDEFAULT_INTERVAL_SEC = 60
45*9c5db199SXin Li
46*9c5db199SXin Li# Timeout in minutes for upload attempts for a given folder.
47*9c5db199SXin LiUPLOAD_TIMEOUT_MINS = 5
48*9c5db199SXin Li
49*9c5db199SXin Liclass CsvNonexistenceException(Exception):
50*9c5db199SXin Li    """Exception raised when csv files not found in GS."""
51*9c5db199SXin Li
52*9c5db199SXin Li
53*9c5db199SXin Liclass CsvFolder(object):
54*9c5db199SXin Li    """A class contains the information of a folder storing csv files to be
55*9c5db199SXin Li    uploaded, and logic to upload the csv files.
56*9c5db199SXin Li    """
57*9c5db199SXin Li
58*9c5db199SXin Li    # A class variable whose value is the GoogleStorage path to the test
59*9c5db199SXin Li    # results.
60*9c5db199SXin Li    gs_path = None
61*9c5db199SXin Li
62*9c5db199SXin Li    # A class variable whose value is the cns path to upload the csv files to.
63*9c5db199SXin Li    cns_path = None
64*9c5db199SXin Li
65*9c5db199SXin Li    def __init__(self, test_attribute_id, perf_csv_folder, test_view):
66*9c5db199SXin Li        """Initialize a CsvFolder object.
67*9c5db199SXin Li
68*9c5db199SXin Li        @param test_attribute_id: ID of test attribute record.
69*9c5db199SXin Li        @param perf_csv_folder: Path of the folder contains csv files in test
70*9c5db199SXin Li                results. It's the value of perf_csv_folder attribute from
71*9c5db199SXin Li                tko_test_attributes table.
72*9c5db199SXin Li        @param test_view: A db object from querying tko_test_view_2 for the
73*9c5db199SXin Li                related tko_test_attributes.
74*9c5db199SXin Li        """
75*9c5db199SXin Li        self.test_attribute_id = test_attribute_id
76*9c5db199SXin Li        self.perf_csv_folder = perf_csv_folder
77*9c5db199SXin Li        self.test_view = test_view
78*9c5db199SXin Li
79*9c5db199SXin Li
80*9c5db199SXin Li    def __str__(self):
81*9c5db199SXin Li        return '%s:%s:%s' % (self.test_view.job_name, self.test_view.job_tag,
82*9c5db199SXin Li                             self.perf_csv_folder)
83*9c5db199SXin Li
84*9c5db199SXin Li
85*9c5db199SXin Li    def _get_url(self):
86*9c5db199SXin Li        """Get the url to the folder storing csv files in GS.
87*9c5db199SXin Li
88*9c5db199SXin Li        The url can be formulated based on csv folder, test_name and hostname.
89*9c5db199SXin Li        For example:
90*9c5db199SXin Li        gs://chromeos-autotest-results/123-chromeos-test/host1/
91*9c5db199SXin Li        gsutil is used to download the csv files with this gs url.
92*9c5db199SXin Li        """
93*9c5db199SXin Li        return os.path.join(self.gs_path, self.test_view.job_tag)
94*9c5db199SXin Li
95*9c5db199SXin Li
96*9c5db199SXin Li    def _download(self, dest_dir):
97*9c5db199SXin Li        """Download the folder containing csv files to the given dest_dir.
98*9c5db199SXin Li
99*9c5db199SXin Li        @param dest_dir: A directory to store the downloaded csv files.
100*9c5db199SXin Li
101*9c5db199SXin Li        @return: A list of strings, each is a path to a csv file in the
102*9c5db199SXin Li                 downloaded folder.
103*9c5db199SXin Li        @raise CsvNonexistenceException: If no csv file found in the GS.
104*9c5db199SXin Li        """
105*9c5db199SXin Li        gs_url = self._get_url()
106*9c5db199SXin Li        # Find all csv files in given GS url recursively
107*9c5db199SXin Li        files = utils.run('gsutil ls -r %s | grep -e .*\\\\.csv$' %
108*9c5db199SXin Li                          gs_url, ignore_status=True).stdout.strip().split('\n')
109*9c5db199SXin Li        if not files or files == ['']:
110*9c5db199SXin Li            raise CsvNonexistenceException('No csv file found in %s', gs_url)
111*9c5db199SXin Li
112*9c5db199SXin Li        # Copy files from GS to temp_dir
113*9c5db199SXin Li        for f in files:
114*9c5db199SXin Li            utils.run('gsutil cp %s %s' % (f, dest_dir))
115*9c5db199SXin Li
116*9c5db199SXin Li
117*9c5db199SXin Li    @retry.retry(Exception, raiselist=[CsvNonexistenceException],
118*9c5db199SXin Li                 timeout_min=UPLOAD_TIMEOUT_MINS)
119*9c5db199SXin Li    def upload(self):
120*9c5db199SXin Li        """Upload the folder to cns.
121*9c5db199SXin Li        """
122*9c5db199SXin Li        temp_dir = tempfile.mkdtemp(suffix='perf_csv')
123*9c5db199SXin Li        try:
124*9c5db199SXin Li            self._download(temp_dir)
125*9c5db199SXin Li            files = os.listdir(temp_dir)
126*9c5db199SXin Li            # File in cns is stored under folder with format of:
127*9c5db199SXin Li            # <test_name>/<host_name>/YYYY/mm/dd/hh/mm
128*9c5db199SXin Li            path_in_cns = os.path.join(
129*9c5db199SXin Li                    self.cns_path,
130*9c5db199SXin Li                    self.test_view.test_name, self.test_view.hostname,
131*9c5db199SXin Li                    str(self.test_view.job_finished_time.year),
132*9c5db199SXin Li                    str(self.test_view.job_finished_time.month).zfill(2),
133*9c5db199SXin Li                    str(self.test_view.job_finished_time.day).zfill(2),
134*9c5db199SXin Li                    str(self.test_view.job_finished_time.hour).zfill(2),
135*9c5db199SXin Li                    str(self.test_view.job_finished_time.minute).zfill(2))
136*9c5db199SXin Li            utils.run('fileutil mkdir -p %s' % path_in_cns)
137*9c5db199SXin Li            for f in files:
138*9c5db199SXin Li                utils.run('fileutil copytodir -f %s %s' %
139*9c5db199SXin Li                          (os.path.join(temp_dir, f), path_in_cns))
140*9c5db199SXin Li        finally:
141*9c5db199SXin Li            shutil.rmtree(temp_dir)
142*9c5db199SXin Li
143*9c5db199SXin Li
144*9c5db199SXin Liclass DBScanner(object):
145*9c5db199SXin Li    """Class contains the logic to query tko_test_attributes table for
146*9c5db199SXin Li    new perf_csv_folder attributes and create CsvFolder object for each
147*9c5db199SXin Li    new perf_csv_folder attribute.
148*9c5db199SXin Li    """
149*9c5db199SXin Li
150*9c5db199SXin Li    # Minimum test_attribute id for querying tko_test_attributes table.
151*9c5db199SXin Li    min_test_attribute_id = -1
152*9c5db199SXin Li
153*9c5db199SXin Li    @classmethod
154*9c5db199SXin Li    def get_perf_csv_folders(cls):
155*9c5db199SXin Li        """Query tko_test_attributes table for new entries of perf_csv_folder.
156*9c5db199SXin Li
157*9c5db199SXin Li        @return: A list of CsvFolder objects for each new entry of
158*9c5db199SXin Li                 perf_csv_folder attribute in tko_test_attributes table.
159*9c5db199SXin Li        """
160*9c5db199SXin Li        attributes = tko_models.TestAttribute.objects.filter(
161*9c5db199SXin Li                attribute='perf_csv_folder', id__gte=cls.min_test_attribute_id)
162*9c5db199SXin Li        folders = []
163*9c5db199SXin Li
164*9c5db199SXin Li        cutoff_time = (datetime.datetime.now() -
165*9c5db199SXin Li                       datetime.timedelta(hours=CUTOFF_TIME_HOURS))
166*9c5db199SXin Li        for attribute in attributes:
167*9c5db199SXin Li            test_views = tko_models.TestView.objects.filter(
168*9c5db199SXin Li                    test_idx=attribute.test_id)
169*9c5db199SXin Li            if test_views[0].job_finished_time > cutoff_time:
170*9c5db199SXin Li                continue
171*9c5db199SXin Li            folders.append(CsvFolder(attribute.id, attribute.value,
172*9c5db199SXin Li                                     test_views[0]))
173*9c5db199SXin Li        return folders
174*9c5db199SXin Li
175*9c5db199SXin Li
176*9c5db199SXin Lidef setup_logging(log_dir):
177*9c5db199SXin Li    """Setup logging information.
178*9c5db199SXin Li
179*9c5db199SXin Li    @param log_dir: Path to the directory storing logs of this script.
180*9c5db199SXin Li    """
181*9c5db199SXin Li    config = logging_config.LoggingConfig()
182*9c5db199SXin Li    logfile = os.path.join(os.path.abspath(log_dir), 'perf_csv_uploader.log')
183*9c5db199SXin Li    config.add_file_handler(file_path=logfile, level=logging.DEBUG)
184*9c5db199SXin Li
185*9c5db199SXin Li
186*9c5db199SXin Lidef save_min_test_attribute_id(test_attribute_id_file):
187*9c5db199SXin Li    """Save the minimum test attribute id to a cached file.
188*9c5db199SXin Li
189*9c5db199SXin Li    @param test_attribute_id_file: Path to the file storing the value of
190*9c5db199SXin Li            min_test_attribute_id.
191*9c5db199SXin Li    """
192*9c5db199SXin Li    with open(test_attribute_id_file, 'w') as f:
193*9c5db199SXin Li        return f.write(str(DBScanner.min_test_attribute_id))
194*9c5db199SXin Li
195*9c5db199SXin Li
196*9c5db199SXin Lidef get_min_test_attribute_id(test_attribute_id_file):
197*9c5db199SXin Li    """Get the minimum test attribute id from a cached file.
198*9c5db199SXin Li
199*9c5db199SXin Li    @param test_attribute_id_file: Path to the file storing the value of
200*9c5db199SXin Li            min_test_attribute_id.
201*9c5db199SXin Li    """
202*9c5db199SXin Li    try:
203*9c5db199SXin Li        with open(test_attribute_id_file, 'r') as f:
204*9c5db199SXin Li            return int(f.read())
205*9c5db199SXin Li    except IOError:
206*9c5db199SXin Li        # min_test_attribute_id has not been set, default to -1.
207*9c5db199SXin Li        return -1
208*9c5db199SXin Li
209*9c5db199SXin Li
210*9c5db199SXin Lidef get_options():
211*9c5db199SXin Li    """Get the command line options.
212*9c5db199SXin Li
213*9c5db199SXin Li    @return: Command line options of the script.
214*9c5db199SXin Li    """
215*9c5db199SXin Li    parser = argparse.ArgumentParser()
216*9c5db199SXin Li    parser.add_argument('--gs_path', type=str, dest='gs_path',
217*9c5db199SXin Li                        help='GoogleStorage path that stores test results.')
218*9c5db199SXin Li    parser.add_argument('--cns_path', type=str, dest='cns_path',
219*9c5db199SXin Li                        help='cns path to where csv files are uploaded to.')
220*9c5db199SXin Li    parser.add_argument('--log_dir', type=str, dest='log_dir',
221*9c5db199SXin Li                        help='Directory used to store logs.')
222*9c5db199SXin Li
223*9c5db199SXin Li    options = parser.parse_args()
224*9c5db199SXin Li    CsvFolder.gs_path = options.gs_path
225*9c5db199SXin Li    CsvFolder.cns_path = options.cns_path
226*9c5db199SXin Li
227*9c5db199SXin Li    return options
228*9c5db199SXin Li
229*9c5db199SXin Li
230*9c5db199SXin Lidef main():
231*9c5db199SXin Li    """Main process to repeat the workflow of searching/uploading csv files.
232*9c5db199SXin Li    """
233*9c5db199SXin Li    options = get_options()
234*9c5db199SXin Li    setup_logging(options.log_dir)
235*9c5db199SXin Li    test_attribute_id_file = os.path.join(options.log_dir,
236*9c5db199SXin Li                                          'perf_csv_uploader_test_attr_id')
237*9c5db199SXin Li    DBScanner.min_test_attribute_id = get_min_test_attribute_id(
238*9c5db199SXin Li            test_attribute_id_file)
239*9c5db199SXin Li
240*9c5db199SXin Li    while True:
241*9c5db199SXin Li        folders = DBScanner.get_perf_csv_folders()
242*9c5db199SXin Li        if not folders:
243*9c5db199SXin Li            logging.info('No new folders found. Wait...')
244*9c5db199SXin Li            time.sleep(DEFAULT_INTERVAL_SEC)
245*9c5db199SXin Li            continue
246*9c5db199SXin Li
247*9c5db199SXin Li        failed_folders = []
248*9c5db199SXin Li        for folder in folders:
249*9c5db199SXin Li            try:
250*9c5db199SXin Li                logging.info('Uploading folder: %s', folder)
251*9c5db199SXin Li                folder.upload()
252*9c5db199SXin Li            except CsvNonexistenceException:
253*9c5db199SXin Li                # Ignore the failure if CSV files are not found in GS.
254*9c5db199SXin Li                pass
255*9c5db199SXin Li            except Exception as e:
256*9c5db199SXin Li                failed_folders.append(folder)
257*9c5db199SXin Li                logging.error('Failed to upload folder %s, error: %s',
258*9c5db199SXin Li                              folder, e)
259*9c5db199SXin Li        if failed_folders:
260*9c5db199SXin Li            # Set the min_test_attribute_id to be the smallest one that failed
261*9c5db199SXin Li            # to upload.
262*9c5db199SXin Li            min_test_attribute_id = min([folder.test_attribute_id for folder in
263*9c5db199SXin Li                                         failed_folders])
264*9c5db199SXin Li        else:
265*9c5db199SXin Li            min_test_attribute_id = max([folder.test_attribute_id for folder in
266*9c5db199SXin Li                                         folders]) + 1
267*9c5db199SXin Li        if DBScanner.min_test_attribute_id != min_test_attribute_id:
268*9c5db199SXin Li            DBScanner.min_test_attribute_id = min_test_attribute_id
269*9c5db199SXin Li            save_min_test_attribute_id(test_attribute_id_file)
270*9c5db199SXin Li
271*9c5db199SXin Li
272*9c5db199SXin Liif __name__ == '__main__':
273*9c5db199SXin Li    main()
274