1*1b3f573fSAndroid Build Coastguard Worker#!/usr/bin/env python2.7 2*1b3f573fSAndroid Build Coastguard Worker 3*1b3f573fSAndroid Build Coastguard Workerfrom __future__ import print_function 4*1b3f573fSAndroid Build Coastguard Workerimport argparse 5*1b3f573fSAndroid Build Coastguard Workerimport json 6*1b3f573fSAndroid Build Coastguard Workerimport uuid 7*1b3f573fSAndroid Build Coastguard Workerimport httplib2 8*1b3f573fSAndroid Build Coastguard Worker 9*1b3f573fSAndroid Build Coastguard Workerfrom apiclient import discovery 10*1b3f573fSAndroid Build Coastguard Workerfrom apiclient.errors import HttpError 11*1b3f573fSAndroid Build Coastguard Workerfrom oauth2client.client import GoogleCredentials 12*1b3f573fSAndroid Build Coastguard Worker 13*1b3f573fSAndroid Build Coastguard Worker# 30 days in milliseconds 14*1b3f573fSAndroid Build Coastguard Worker_EXPIRATION_MS = 30 * 24 * 60 * 60 * 1000 15*1b3f573fSAndroid Build Coastguard WorkerNUM_RETRIES = 3 16*1b3f573fSAndroid Build Coastguard Worker 17*1b3f573fSAndroid Build Coastguard Worker 18*1b3f573fSAndroid Build Coastguard Workerdef create_big_query(): 19*1b3f573fSAndroid Build Coastguard Worker """Authenticates with cloud platform and gets a BiqQuery service object 20*1b3f573fSAndroid Build Coastguard Worker """ 21*1b3f573fSAndroid Build Coastguard Worker creds = GoogleCredentials.get_application_default() 22*1b3f573fSAndroid Build Coastguard Worker return discovery.build( 23*1b3f573fSAndroid Build Coastguard Worker 'bigquery', 'v2', credentials=creds, cache_discovery=False) 24*1b3f573fSAndroid Build Coastguard Worker 25*1b3f573fSAndroid Build Coastguard Worker 26*1b3f573fSAndroid Build Coastguard Workerdef create_dataset(biq_query, project_id, dataset_id): 27*1b3f573fSAndroid Build Coastguard Worker is_success = True 28*1b3f573fSAndroid Build Coastguard Worker body = { 29*1b3f573fSAndroid Build Coastguard Worker 'datasetReference': { 30*1b3f573fSAndroid Build Coastguard Worker 'projectId': project_id, 31*1b3f573fSAndroid Build Coastguard Worker 'datasetId': dataset_id 32*1b3f573fSAndroid Build Coastguard Worker } 33*1b3f573fSAndroid Build Coastguard Worker } 34*1b3f573fSAndroid Build Coastguard Worker 35*1b3f573fSAndroid Build Coastguard Worker try: 36*1b3f573fSAndroid Build Coastguard Worker dataset_req = biq_query.datasets().insert( 37*1b3f573fSAndroid Build Coastguard Worker projectId=project_id, body=body) 38*1b3f573fSAndroid Build Coastguard Worker dataset_req.execute(num_retries=NUM_RETRIES) 39*1b3f573fSAndroid Build Coastguard Worker except HttpError as http_error: 40*1b3f573fSAndroid Build Coastguard Worker if http_error.resp.status == 409: 41*1b3f573fSAndroid Build Coastguard Worker print('Warning: The dataset %s already exists' % dataset_id) 42*1b3f573fSAndroid Build Coastguard Worker else: 43*1b3f573fSAndroid Build Coastguard Worker # Note: For more debugging info, print "http_error.content" 44*1b3f573fSAndroid Build Coastguard Worker print('Error in creating dataset: %s. Err: %s' % (dataset_id, 45*1b3f573fSAndroid Build Coastguard Worker http_error)) 46*1b3f573fSAndroid Build Coastguard Worker is_success = False 47*1b3f573fSAndroid Build Coastguard Worker return is_success 48*1b3f573fSAndroid Build Coastguard Worker 49*1b3f573fSAndroid Build Coastguard Worker 50*1b3f573fSAndroid Build Coastguard Workerdef create_table(big_query, project_id, dataset_id, table_id, table_schema, 51*1b3f573fSAndroid Build Coastguard Worker description): 52*1b3f573fSAndroid Build Coastguard Worker fields = [{ 53*1b3f573fSAndroid Build Coastguard Worker 'name': field_name, 54*1b3f573fSAndroid Build Coastguard Worker 'type': field_type, 55*1b3f573fSAndroid Build Coastguard Worker 'description': field_description 56*1b3f573fSAndroid Build Coastguard Worker } for (field_name, field_type, field_description) in table_schema] 57*1b3f573fSAndroid Build Coastguard Worker return create_table2(big_query, project_id, dataset_id, table_id, fields, 58*1b3f573fSAndroid Build Coastguard Worker description) 59*1b3f573fSAndroid Build Coastguard Worker 60*1b3f573fSAndroid Build Coastguard Worker 61*1b3f573fSAndroid Build Coastguard Workerdef create_partitioned_table(big_query, 62*1b3f573fSAndroid Build Coastguard Worker project_id, 63*1b3f573fSAndroid Build Coastguard Worker dataset_id, 64*1b3f573fSAndroid Build Coastguard Worker table_id, 65*1b3f573fSAndroid Build Coastguard Worker table_schema, 66*1b3f573fSAndroid Build Coastguard Worker description, 67*1b3f573fSAndroid Build Coastguard Worker partition_type='DAY', 68*1b3f573fSAndroid Build Coastguard Worker expiration_ms=_EXPIRATION_MS): 69*1b3f573fSAndroid Build Coastguard Worker """Creates a partitioned table. By default, a date-paritioned table is created with 70*1b3f573fSAndroid Build Coastguard Worker each partition lasting 30 days after it was last modified. 71*1b3f573fSAndroid Build Coastguard Worker """ 72*1b3f573fSAndroid Build Coastguard Worker fields = [{ 73*1b3f573fSAndroid Build Coastguard Worker 'name': field_name, 74*1b3f573fSAndroid Build Coastguard Worker 'type': field_type, 75*1b3f573fSAndroid Build Coastguard Worker 'description': field_description 76*1b3f573fSAndroid Build Coastguard Worker } for (field_name, field_type, field_description) in table_schema] 77*1b3f573fSAndroid Build Coastguard Worker return create_table2(big_query, project_id, dataset_id, table_id, fields, 78*1b3f573fSAndroid Build Coastguard Worker description, partition_type, expiration_ms) 79*1b3f573fSAndroid Build Coastguard Worker 80*1b3f573fSAndroid Build Coastguard Worker 81*1b3f573fSAndroid Build Coastguard Workerdef create_table2(big_query, 82*1b3f573fSAndroid Build Coastguard Worker project_id, 83*1b3f573fSAndroid Build Coastguard Worker dataset_id, 84*1b3f573fSAndroid Build Coastguard Worker table_id, 85*1b3f573fSAndroid Build Coastguard Worker fields_schema, 86*1b3f573fSAndroid Build Coastguard Worker description, 87*1b3f573fSAndroid Build Coastguard Worker partition_type=None, 88*1b3f573fSAndroid Build Coastguard Worker expiration_ms=None): 89*1b3f573fSAndroid Build Coastguard Worker is_success = True 90*1b3f573fSAndroid Build Coastguard Worker 91*1b3f573fSAndroid Build Coastguard Worker body = { 92*1b3f573fSAndroid Build Coastguard Worker 'description': description, 93*1b3f573fSAndroid Build Coastguard Worker 'schema': { 94*1b3f573fSAndroid Build Coastguard Worker 'fields': fields_schema 95*1b3f573fSAndroid Build Coastguard Worker }, 96*1b3f573fSAndroid Build Coastguard Worker 'tableReference': { 97*1b3f573fSAndroid Build Coastguard Worker 'datasetId': dataset_id, 98*1b3f573fSAndroid Build Coastguard Worker 'projectId': project_id, 99*1b3f573fSAndroid Build Coastguard Worker 'tableId': table_id 100*1b3f573fSAndroid Build Coastguard Worker } 101*1b3f573fSAndroid Build Coastguard Worker } 102*1b3f573fSAndroid Build Coastguard Worker 103*1b3f573fSAndroid Build Coastguard Worker if partition_type and expiration_ms: 104*1b3f573fSAndroid Build Coastguard Worker body["timePartitioning"] = { 105*1b3f573fSAndroid Build Coastguard Worker "type": partition_type, 106*1b3f573fSAndroid Build Coastguard Worker "expirationMs": expiration_ms 107*1b3f573fSAndroid Build Coastguard Worker } 108*1b3f573fSAndroid Build Coastguard Worker 109*1b3f573fSAndroid Build Coastguard Worker try: 110*1b3f573fSAndroid Build Coastguard Worker table_req = big_query.tables().insert( 111*1b3f573fSAndroid Build Coastguard Worker projectId=project_id, datasetId=dataset_id, body=body) 112*1b3f573fSAndroid Build Coastguard Worker res = table_req.execute(num_retries=NUM_RETRIES) 113*1b3f573fSAndroid Build Coastguard Worker print('Successfully created %s "%s"' % (res['kind'], res['id'])) 114*1b3f573fSAndroid Build Coastguard Worker except HttpError as http_error: 115*1b3f573fSAndroid Build Coastguard Worker if http_error.resp.status == 409: 116*1b3f573fSAndroid Build Coastguard Worker print('Warning: Table %s already exists' % table_id) 117*1b3f573fSAndroid Build Coastguard Worker else: 118*1b3f573fSAndroid Build Coastguard Worker print('Error in creating table: %s. Err: %s' % (table_id, 119*1b3f573fSAndroid Build Coastguard Worker http_error)) 120*1b3f573fSAndroid Build Coastguard Worker is_success = False 121*1b3f573fSAndroid Build Coastguard Worker return is_success 122*1b3f573fSAndroid Build Coastguard Worker 123*1b3f573fSAndroid Build Coastguard Worker 124*1b3f573fSAndroid Build Coastguard Workerdef patch_table(big_query, project_id, dataset_id, table_id, fields_schema): 125*1b3f573fSAndroid Build Coastguard Worker is_success = True 126*1b3f573fSAndroid Build Coastguard Worker 127*1b3f573fSAndroid Build Coastguard Worker body = { 128*1b3f573fSAndroid Build Coastguard Worker 'schema': { 129*1b3f573fSAndroid Build Coastguard Worker 'fields': fields_schema 130*1b3f573fSAndroid Build Coastguard Worker }, 131*1b3f573fSAndroid Build Coastguard Worker 'tableReference': { 132*1b3f573fSAndroid Build Coastguard Worker 'datasetId': dataset_id, 133*1b3f573fSAndroid Build Coastguard Worker 'projectId': project_id, 134*1b3f573fSAndroid Build Coastguard Worker 'tableId': table_id 135*1b3f573fSAndroid Build Coastguard Worker } 136*1b3f573fSAndroid Build Coastguard Worker } 137*1b3f573fSAndroid Build Coastguard Worker 138*1b3f573fSAndroid Build Coastguard Worker try: 139*1b3f573fSAndroid Build Coastguard Worker table_req = big_query.tables().patch( 140*1b3f573fSAndroid Build Coastguard Worker projectId=project_id, 141*1b3f573fSAndroid Build Coastguard Worker datasetId=dataset_id, 142*1b3f573fSAndroid Build Coastguard Worker tableId=table_id, 143*1b3f573fSAndroid Build Coastguard Worker body=body) 144*1b3f573fSAndroid Build Coastguard Worker res = table_req.execute(num_retries=NUM_RETRIES) 145*1b3f573fSAndroid Build Coastguard Worker print('Successfully patched %s "%s"' % (res['kind'], res['id'])) 146*1b3f573fSAndroid Build Coastguard Worker except HttpError as http_error: 147*1b3f573fSAndroid Build Coastguard Worker print('Error in creating table: %s. Err: %s' % (table_id, http_error)) 148*1b3f573fSAndroid Build Coastguard Worker is_success = False 149*1b3f573fSAndroid Build Coastguard Worker return is_success 150*1b3f573fSAndroid Build Coastguard Worker 151*1b3f573fSAndroid Build Coastguard Worker 152*1b3f573fSAndroid Build Coastguard Workerdef insert_rows(big_query, project_id, dataset_id, table_id, rows_list): 153*1b3f573fSAndroid Build Coastguard Worker is_success = True 154*1b3f573fSAndroid Build Coastguard Worker body = {'rows': rows_list} 155*1b3f573fSAndroid Build Coastguard Worker try: 156*1b3f573fSAndroid Build Coastguard Worker insert_req = big_query.tabledata().insertAll( 157*1b3f573fSAndroid Build Coastguard Worker projectId=project_id, 158*1b3f573fSAndroid Build Coastguard Worker datasetId=dataset_id, 159*1b3f573fSAndroid Build Coastguard Worker tableId=table_id, 160*1b3f573fSAndroid Build Coastguard Worker body=body) 161*1b3f573fSAndroid Build Coastguard Worker res = insert_req.execute(num_retries=NUM_RETRIES) 162*1b3f573fSAndroid Build Coastguard Worker if res.get('insertErrors', None): 163*1b3f573fSAndroid Build Coastguard Worker print('Error inserting rows! Response: %s' % res) 164*1b3f573fSAndroid Build Coastguard Worker is_success = False 165*1b3f573fSAndroid Build Coastguard Worker except HttpError as http_error: 166*1b3f573fSAndroid Build Coastguard Worker print('Error inserting rows to the table %s' % table_id) 167*1b3f573fSAndroid Build Coastguard Worker is_success = False 168*1b3f573fSAndroid Build Coastguard Worker 169*1b3f573fSAndroid Build Coastguard Worker return is_success 170*1b3f573fSAndroid Build Coastguard Worker 171*1b3f573fSAndroid Build Coastguard Worker 172*1b3f573fSAndroid Build Coastguard Workerdef sync_query_job(big_query, project_id, query, timeout=5000): 173*1b3f573fSAndroid Build Coastguard Worker query_data = {'query': query, 'timeoutMs': timeout} 174*1b3f573fSAndroid Build Coastguard Worker query_job = None 175*1b3f573fSAndroid Build Coastguard Worker try: 176*1b3f573fSAndroid Build Coastguard Worker query_job = big_query.jobs().query( 177*1b3f573fSAndroid Build Coastguard Worker projectId=project_id, 178*1b3f573fSAndroid Build Coastguard Worker body=query_data).execute(num_retries=NUM_RETRIES) 179*1b3f573fSAndroid Build Coastguard Worker except HttpError as http_error: 180*1b3f573fSAndroid Build Coastguard Worker print('Query execute job failed with error: %s' % http_error) 181*1b3f573fSAndroid Build Coastguard Worker print(http_error.content) 182*1b3f573fSAndroid Build Coastguard Worker return query_job 183*1b3f573fSAndroid Build Coastguard Worker 184*1b3f573fSAndroid Build Coastguard Worker 185*1b3f573fSAndroid Build Coastguard Worker # List of (column name, column type, description) tuples 186*1b3f573fSAndroid Build Coastguard Workerdef make_row(unique_row_id, row_values_dict): 187*1b3f573fSAndroid Build Coastguard Worker """row_values_dict is a dictionary of column name and column value. 188*1b3f573fSAndroid Build Coastguard Worker """ 189*1b3f573fSAndroid Build Coastguard Worker return {'insertId': unique_row_id, 'json': row_values_dict} 190