xref: /aosp_15_r20/external/protobuf/benchmarks/util/big_query_utils.py (revision 1b3f573f81763fcece89efc2b6a5209149e44ab8)
1*1b3f573fSAndroid Build Coastguard Worker#!/usr/bin/env python2.7
2*1b3f573fSAndroid Build Coastguard Worker
3*1b3f573fSAndroid Build Coastguard Workerfrom __future__ import print_function
4*1b3f573fSAndroid Build Coastguard Workerimport argparse
5*1b3f573fSAndroid Build Coastguard Workerimport json
6*1b3f573fSAndroid Build Coastguard Workerimport uuid
7*1b3f573fSAndroid Build Coastguard Workerimport httplib2
8*1b3f573fSAndroid Build Coastguard Worker
9*1b3f573fSAndroid Build Coastguard Workerfrom apiclient import discovery
10*1b3f573fSAndroid Build Coastguard Workerfrom apiclient.errors import HttpError
11*1b3f573fSAndroid Build Coastguard Workerfrom oauth2client.client import GoogleCredentials
12*1b3f573fSAndroid Build Coastguard Worker
13*1b3f573fSAndroid Build Coastguard Worker# 30 days in milliseconds
14*1b3f573fSAndroid Build Coastguard Worker_EXPIRATION_MS = 30 * 24 * 60 * 60 * 1000
15*1b3f573fSAndroid Build Coastguard WorkerNUM_RETRIES = 3
16*1b3f573fSAndroid Build Coastguard Worker
17*1b3f573fSAndroid Build Coastguard Worker
18*1b3f573fSAndroid Build Coastguard Workerdef create_big_query():
19*1b3f573fSAndroid Build Coastguard Worker    """Authenticates with cloud platform and gets a BiqQuery service object
20*1b3f573fSAndroid Build Coastguard Worker  """
21*1b3f573fSAndroid Build Coastguard Worker    creds = GoogleCredentials.get_application_default()
22*1b3f573fSAndroid Build Coastguard Worker    return discovery.build(
23*1b3f573fSAndroid Build Coastguard Worker        'bigquery', 'v2', credentials=creds, cache_discovery=False)
24*1b3f573fSAndroid Build Coastguard Worker
25*1b3f573fSAndroid Build Coastguard Worker
26*1b3f573fSAndroid Build Coastguard Workerdef create_dataset(biq_query, project_id, dataset_id):
27*1b3f573fSAndroid Build Coastguard Worker    is_success = True
28*1b3f573fSAndroid Build Coastguard Worker    body = {
29*1b3f573fSAndroid Build Coastguard Worker        'datasetReference': {
30*1b3f573fSAndroid Build Coastguard Worker            'projectId': project_id,
31*1b3f573fSAndroid Build Coastguard Worker            'datasetId': dataset_id
32*1b3f573fSAndroid Build Coastguard Worker        }
33*1b3f573fSAndroid Build Coastguard Worker    }
34*1b3f573fSAndroid Build Coastguard Worker
35*1b3f573fSAndroid Build Coastguard Worker    try:
36*1b3f573fSAndroid Build Coastguard Worker        dataset_req = biq_query.datasets().insert(
37*1b3f573fSAndroid Build Coastguard Worker            projectId=project_id, body=body)
38*1b3f573fSAndroid Build Coastguard Worker        dataset_req.execute(num_retries=NUM_RETRIES)
39*1b3f573fSAndroid Build Coastguard Worker    except HttpError as http_error:
40*1b3f573fSAndroid Build Coastguard Worker        if http_error.resp.status == 409:
41*1b3f573fSAndroid Build Coastguard Worker            print('Warning: The dataset %s already exists' % dataset_id)
42*1b3f573fSAndroid Build Coastguard Worker        else:
43*1b3f573fSAndroid Build Coastguard Worker            # Note: For more debugging info, print "http_error.content"
44*1b3f573fSAndroid Build Coastguard Worker            print('Error in creating dataset: %s. Err: %s' % (dataset_id,
45*1b3f573fSAndroid Build Coastguard Worker                                                              http_error))
46*1b3f573fSAndroid Build Coastguard Worker            is_success = False
47*1b3f573fSAndroid Build Coastguard Worker    return is_success
48*1b3f573fSAndroid Build Coastguard Worker
49*1b3f573fSAndroid Build Coastguard Worker
50*1b3f573fSAndroid Build Coastguard Workerdef create_table(big_query, project_id, dataset_id, table_id, table_schema,
51*1b3f573fSAndroid Build Coastguard Worker                 description):
52*1b3f573fSAndroid Build Coastguard Worker    fields = [{
53*1b3f573fSAndroid Build Coastguard Worker        'name': field_name,
54*1b3f573fSAndroid Build Coastguard Worker        'type': field_type,
55*1b3f573fSAndroid Build Coastguard Worker        'description': field_description
56*1b3f573fSAndroid Build Coastguard Worker    } for (field_name, field_type, field_description) in table_schema]
57*1b3f573fSAndroid Build Coastguard Worker    return create_table2(big_query, project_id, dataset_id, table_id, fields,
58*1b3f573fSAndroid Build Coastguard Worker                         description)
59*1b3f573fSAndroid Build Coastguard Worker
60*1b3f573fSAndroid Build Coastguard Worker
61*1b3f573fSAndroid Build Coastguard Workerdef create_partitioned_table(big_query,
62*1b3f573fSAndroid Build Coastguard Worker                             project_id,
63*1b3f573fSAndroid Build Coastguard Worker                             dataset_id,
64*1b3f573fSAndroid Build Coastguard Worker                             table_id,
65*1b3f573fSAndroid Build Coastguard Worker                             table_schema,
66*1b3f573fSAndroid Build Coastguard Worker                             description,
67*1b3f573fSAndroid Build Coastguard Worker                             partition_type='DAY',
68*1b3f573fSAndroid Build Coastguard Worker                             expiration_ms=_EXPIRATION_MS):
69*1b3f573fSAndroid Build Coastguard Worker    """Creates a partitioned table. By default, a date-paritioned table is created with
70*1b3f573fSAndroid Build Coastguard Worker  each partition lasting 30 days after it was last modified.
71*1b3f573fSAndroid Build Coastguard Worker  """
72*1b3f573fSAndroid Build Coastguard Worker    fields = [{
73*1b3f573fSAndroid Build Coastguard Worker        'name': field_name,
74*1b3f573fSAndroid Build Coastguard Worker        'type': field_type,
75*1b3f573fSAndroid Build Coastguard Worker        'description': field_description
76*1b3f573fSAndroid Build Coastguard Worker    } for (field_name, field_type, field_description) in table_schema]
77*1b3f573fSAndroid Build Coastguard Worker    return create_table2(big_query, project_id, dataset_id, table_id, fields,
78*1b3f573fSAndroid Build Coastguard Worker                         description, partition_type, expiration_ms)
79*1b3f573fSAndroid Build Coastguard Worker
80*1b3f573fSAndroid Build Coastguard Worker
81*1b3f573fSAndroid Build Coastguard Workerdef create_table2(big_query,
82*1b3f573fSAndroid Build Coastguard Worker                  project_id,
83*1b3f573fSAndroid Build Coastguard Worker                  dataset_id,
84*1b3f573fSAndroid Build Coastguard Worker                  table_id,
85*1b3f573fSAndroid Build Coastguard Worker                  fields_schema,
86*1b3f573fSAndroid Build Coastguard Worker                  description,
87*1b3f573fSAndroid Build Coastguard Worker                  partition_type=None,
88*1b3f573fSAndroid Build Coastguard Worker                  expiration_ms=None):
89*1b3f573fSAndroid Build Coastguard Worker    is_success = True
90*1b3f573fSAndroid Build Coastguard Worker
91*1b3f573fSAndroid Build Coastguard Worker    body = {
92*1b3f573fSAndroid Build Coastguard Worker        'description': description,
93*1b3f573fSAndroid Build Coastguard Worker        'schema': {
94*1b3f573fSAndroid Build Coastguard Worker            'fields': fields_schema
95*1b3f573fSAndroid Build Coastguard Worker        },
96*1b3f573fSAndroid Build Coastguard Worker        'tableReference': {
97*1b3f573fSAndroid Build Coastguard Worker            'datasetId': dataset_id,
98*1b3f573fSAndroid Build Coastguard Worker            'projectId': project_id,
99*1b3f573fSAndroid Build Coastguard Worker            'tableId': table_id
100*1b3f573fSAndroid Build Coastguard Worker        }
101*1b3f573fSAndroid Build Coastguard Worker    }
102*1b3f573fSAndroid Build Coastguard Worker
103*1b3f573fSAndroid Build Coastguard Worker    if partition_type and expiration_ms:
104*1b3f573fSAndroid Build Coastguard Worker        body["timePartitioning"] = {
105*1b3f573fSAndroid Build Coastguard Worker            "type": partition_type,
106*1b3f573fSAndroid Build Coastguard Worker            "expirationMs": expiration_ms
107*1b3f573fSAndroid Build Coastguard Worker        }
108*1b3f573fSAndroid Build Coastguard Worker
109*1b3f573fSAndroid Build Coastguard Worker    try:
110*1b3f573fSAndroid Build Coastguard Worker        table_req = big_query.tables().insert(
111*1b3f573fSAndroid Build Coastguard Worker            projectId=project_id, datasetId=dataset_id, body=body)
112*1b3f573fSAndroid Build Coastguard Worker        res = table_req.execute(num_retries=NUM_RETRIES)
113*1b3f573fSAndroid Build Coastguard Worker        print('Successfully created %s "%s"' % (res['kind'], res['id']))
114*1b3f573fSAndroid Build Coastguard Worker    except HttpError as http_error:
115*1b3f573fSAndroid Build Coastguard Worker        if http_error.resp.status == 409:
116*1b3f573fSAndroid Build Coastguard Worker            print('Warning: Table %s already exists' % table_id)
117*1b3f573fSAndroid Build Coastguard Worker        else:
118*1b3f573fSAndroid Build Coastguard Worker            print('Error in creating table: %s. Err: %s' % (table_id,
119*1b3f573fSAndroid Build Coastguard Worker                                                            http_error))
120*1b3f573fSAndroid Build Coastguard Worker            is_success = False
121*1b3f573fSAndroid Build Coastguard Worker    return is_success
122*1b3f573fSAndroid Build Coastguard Worker
123*1b3f573fSAndroid Build Coastguard Worker
124*1b3f573fSAndroid Build Coastguard Workerdef patch_table(big_query, project_id, dataset_id, table_id, fields_schema):
125*1b3f573fSAndroid Build Coastguard Worker    is_success = True
126*1b3f573fSAndroid Build Coastguard Worker
127*1b3f573fSAndroid Build Coastguard Worker    body = {
128*1b3f573fSAndroid Build Coastguard Worker        'schema': {
129*1b3f573fSAndroid Build Coastguard Worker            'fields': fields_schema
130*1b3f573fSAndroid Build Coastguard Worker        },
131*1b3f573fSAndroid Build Coastguard Worker        'tableReference': {
132*1b3f573fSAndroid Build Coastguard Worker            'datasetId': dataset_id,
133*1b3f573fSAndroid Build Coastguard Worker            'projectId': project_id,
134*1b3f573fSAndroid Build Coastguard Worker            'tableId': table_id
135*1b3f573fSAndroid Build Coastguard Worker        }
136*1b3f573fSAndroid Build Coastguard Worker    }
137*1b3f573fSAndroid Build Coastguard Worker
138*1b3f573fSAndroid Build Coastguard Worker    try:
139*1b3f573fSAndroid Build Coastguard Worker        table_req = big_query.tables().patch(
140*1b3f573fSAndroid Build Coastguard Worker            projectId=project_id,
141*1b3f573fSAndroid Build Coastguard Worker            datasetId=dataset_id,
142*1b3f573fSAndroid Build Coastguard Worker            tableId=table_id,
143*1b3f573fSAndroid Build Coastguard Worker            body=body)
144*1b3f573fSAndroid Build Coastguard Worker        res = table_req.execute(num_retries=NUM_RETRIES)
145*1b3f573fSAndroid Build Coastguard Worker        print('Successfully patched %s "%s"' % (res['kind'], res['id']))
146*1b3f573fSAndroid Build Coastguard Worker    except HttpError as http_error:
147*1b3f573fSAndroid Build Coastguard Worker        print('Error in creating table: %s. Err: %s' % (table_id, http_error))
148*1b3f573fSAndroid Build Coastguard Worker        is_success = False
149*1b3f573fSAndroid Build Coastguard Worker    return is_success
150*1b3f573fSAndroid Build Coastguard Worker
151*1b3f573fSAndroid Build Coastguard Worker
152*1b3f573fSAndroid Build Coastguard Workerdef insert_rows(big_query, project_id, dataset_id, table_id, rows_list):
153*1b3f573fSAndroid Build Coastguard Worker    is_success = True
154*1b3f573fSAndroid Build Coastguard Worker    body = {'rows': rows_list}
155*1b3f573fSAndroid Build Coastguard Worker    try:
156*1b3f573fSAndroid Build Coastguard Worker        insert_req = big_query.tabledata().insertAll(
157*1b3f573fSAndroid Build Coastguard Worker            projectId=project_id,
158*1b3f573fSAndroid Build Coastguard Worker            datasetId=dataset_id,
159*1b3f573fSAndroid Build Coastguard Worker            tableId=table_id,
160*1b3f573fSAndroid Build Coastguard Worker            body=body)
161*1b3f573fSAndroid Build Coastguard Worker        res = insert_req.execute(num_retries=NUM_RETRIES)
162*1b3f573fSAndroid Build Coastguard Worker        if res.get('insertErrors', None):
163*1b3f573fSAndroid Build Coastguard Worker            print('Error inserting rows! Response: %s' % res)
164*1b3f573fSAndroid Build Coastguard Worker            is_success = False
165*1b3f573fSAndroid Build Coastguard Worker    except HttpError as http_error:
166*1b3f573fSAndroid Build Coastguard Worker        print('Error inserting rows to the table %s' % table_id)
167*1b3f573fSAndroid Build Coastguard Worker        is_success = False
168*1b3f573fSAndroid Build Coastguard Worker
169*1b3f573fSAndroid Build Coastguard Worker    return is_success
170*1b3f573fSAndroid Build Coastguard Worker
171*1b3f573fSAndroid Build Coastguard Worker
172*1b3f573fSAndroid Build Coastguard Workerdef sync_query_job(big_query, project_id, query, timeout=5000):
173*1b3f573fSAndroid Build Coastguard Worker    query_data = {'query': query, 'timeoutMs': timeout}
174*1b3f573fSAndroid Build Coastguard Worker    query_job = None
175*1b3f573fSAndroid Build Coastguard Worker    try:
176*1b3f573fSAndroid Build Coastguard Worker        query_job = big_query.jobs().query(
177*1b3f573fSAndroid Build Coastguard Worker            projectId=project_id,
178*1b3f573fSAndroid Build Coastguard Worker            body=query_data).execute(num_retries=NUM_RETRIES)
179*1b3f573fSAndroid Build Coastguard Worker    except HttpError as http_error:
180*1b3f573fSAndroid Build Coastguard Worker        print('Query execute job failed with error: %s' % http_error)
181*1b3f573fSAndroid Build Coastguard Worker        print(http_error.content)
182*1b3f573fSAndroid Build Coastguard Worker    return query_job
183*1b3f573fSAndroid Build Coastguard Worker
184*1b3f573fSAndroid Build Coastguard Worker
185*1b3f573fSAndroid Build Coastguard Worker    # List of (column name, column type, description) tuples
186*1b3f573fSAndroid Build Coastguard Workerdef make_row(unique_row_id, row_values_dict):
187*1b3f573fSAndroid Build Coastguard Worker    """row_values_dict is a dictionary of column name and column value.
188*1b3f573fSAndroid Build Coastguard Worker  """
189*1b3f573fSAndroid Build Coastguard Worker    return {'insertId': unique_row_id, 'json': row_values_dict}
190