Example of using the Google API Client to access BigQuery

Note that this is not the recommended approach. You should use the BigQuery client library because that is idiomatic Python.

See the bigquery_client notebook for examples.

Authenticate and build stubs


In [56]:
PROJECT='cloud-training-demos'  # CHANGE THIS
from googleapiclient.discovery import build
service = build('bigquery', 'v2')

Get info about a dataset


In [44]:
# information about the ch04 dataset
dsinfo = service.datasets().get(datasetId="ch04", projectId=PROJECT).execute()
for info in dsinfo.items():
  print(info)


('selfLink', 'https://www.googleapis.com/bigquery/v2/projects/cloud-training-demos/datasets/ch04')
('etag', '5Ezj1PxU7/pwe1aBwcMG9Q==')
('kind', 'bigquery#dataset')
('creationTime', '1548463261350')
('id', 'cloud-training-demos:ch04')
('lastModifiedTime', '1548654986051')
('access', [{'specialGroup': 'projectWriters', 'role': 'WRITER'}, {'userByEmail': 'gcs-dt@bigquery-data-connectors.iam.gserviceaccount.com', 'role': 'WRITER'}, {'userByEmail': 'service-663413318684@gcp-sa-bigquerydatatransfer.iam.gserviceaccount.com', 'role': 'WRITER'}, {'specialGroup': 'projectOwners', 'role': 'OWNER'}, {'userByEmail': 'vlakshmanan@google.com', 'role': 'OWNER'}, {'specialGroup': 'projectReaders', 'role': 'READER'}])
('datasetReference', {'projectId': 'cloud-training-demos', 'datasetId': 'ch04'})
('location', 'US')

List tables and creation times


In [40]:
# list tables in dataset
tables = service.tables().list(datasetId="ch04", projectId=PROJECT).execute()
for t in tables['tables']:
  print(t['tableReference']['tableId'] + ' was created at ' + t['creationTime'])


cloudaudit_googleapis_com_activity_20190128 was created at 1548654277622
cloudaudit_googleapis_com_data_access_20190128 was created at 1548654276918
college_scorecard_clean was created at 1549858608756
college_scorecard_df was created at 1548696007442
college_scorecard_dts was created at 1548655943233
college_scorecard_etl was created at 1549870472227
college_scorecard_gs was created at 1548463434535
logs was created at 1548565079270

Query and get result


In [80]:
# send a query request
request={
  "useLegacySql": False, 
  "query": "SELECT start_station_name , AVG(duration) as duration , COUNT(duration) as num_trips FROM `bigquery-public-data`.london_bicycles.cycle_hire GROUP BY start_station_name ORDER BY num_trips DESC LIMIT 5" 
}
print(request)
response = service.jobs().query(projectId=PROJECT, body=request).execute()
print('----' * 10)
for r in response['rows']:
  print(r['f'][0]['v'])


{'query': 'SELECT start_station_name , AVG(duration) as duration , COUNT(duration) as num_trips FROM `bigquery-public-data`.london_bicycles.cycle_hire GROUP BY start_station_name ORDER BY num_trips DESC LIMIT 5', 'useLegacySql': False}
----------------------------------------
Belgrove Street , King's Cross
Hyde Park Corner, Hyde Park
Waterloo Station 3, Waterloo
Black Lion Gate, Kensington Gardens
Albert Gate, Hyde Park

Asynchronous query and paging through results


In [77]:
# send a query request that will not terminate within the timeout specified and will require paging
request={
  "useLegacySql": False,
  "timeoutMs": 0,
  "useQueryCache": False,
  "query": "SELECT start_station_name , AVG(duration) as duration , COUNT(duration) as num_trips FROM `bigquery-public-data`.london_bicycles.cycle_hire GROUP BY start_station_name ORDER BY num_trips DESC LIMIT 5" 
}
response = service.jobs().query(projectId=PROJECT, body=request).execute()
print(response)


{'jobReference': {'jobId': 'job_MJdV2bvUWAns4rFCu48x2v2O6yTv', 'projectId': 'cloud-training-demos', 'location': 'EU'}, 'jobComplete': False, 'kind': 'bigquery#queryResponse'}

In [78]:
jobId = response['jobReference']['jobId']
print(jobId)


job_MJdV2bvUWAns4rFCu48x2v2O6yTv

In [79]:
# get query results
while (not response['jobComplete']):
  response = service.jobs().getQueryResults(projectId=PROJECT, 
                                            jobId=jobId, 
                                            maxResults=2, 
                                            timeoutMs=5).execute()

while (True):
  # print responses
  for row in response['rows']:
    print(row['f'][0]['v']) # station name
  print('--' * 5)
  # page through responses
  if 'pageToken' in response:
    pageToken = response['pageToken']
    # get next page
    response = service.jobs().getQueryResults(projectId=PROJECT, 
                                              jobId=jobId, 
                                              maxResults=2,
                                              pageToken=pageToken,
                                              timeoutMs=5).execute()
  else:
    break


Belgrove Street , King's Cross
Hyde Park Corner, Hyde Park
----------
{'totalBytesProcessed': '903989528', 'totalRows': '5', 'etag': 'tQOWiG42eMs7tZ2A3Cj5iQ==', 'kind': 'bigquery#getQueryResultsResponse', 'rows': [{'f': [{'v': "Belgrove Street , King's Cross"}, {'v': '1011.0766960393793'}, {'v': '234458'}]}, {'f': [{'v': 'Hyde Park Corner, Hyde Park'}, {'v': '2782.7307087636773'}, {'v': '215629'}]}], 'pageToken': 'BFKX27XENAAQAAASA4EAAEEAQCAAKGQEBABBAARAWCXBK===', 'schema': {'fields': [{'mode': 'NULLABLE', 'name': 'start_station_name', 'type': 'STRING'}, {'mode': 'NULLABLE', 'name': 'duration', 'type': 'FLOAT'}, {'mode': 'NULLABLE', 'name': 'num_trips', 'type': 'INTEGER'}]}, 'jobReference': {'jobId': 'job_MJdV2bvUWAns4rFCu48x2v2O6yTv', 'projectId': 'cloud-training-demos', 'location': 'EU'}, 'jobComplete': True, 'cacheHit': False}
Waterloo Station 3, Waterloo
Black Lion Gate, Kensington Gardens
----------
{'totalBytesProcessed': '903989528', 'totalRows': '5', 'etag': 'eqV8OPEVgvnMzvNKgABQNQ==', 'kind': 'bigquery#getQueryResultsResponse', 'rows': [{'f': [{'v': 'Waterloo Station 3, Waterloo'}, {'v': '866.3761345037934'}, {'v': '201630'}]}, {'f': [{'v': 'Black Lion Gate, Kensington Gardens'}, {'v': '3588.012003556605'}, {'v': '161952'}]}], 'pageToken': 'BFKX27XENAAQAAASA4EAAEEAQCAAKGQEBACBAARAWCXBK===', 'schema': {'fields': [{'mode': 'NULLABLE', 'name': 'start_station_name', 'type': 'STRING'}, {'mode': 'NULLABLE', 'name': 'duration', 'type': 'FLOAT'}, {'mode': 'NULLABLE', 'name': 'num_trips', 'type': 'INTEGER'}]}, 'jobReference': {'jobId': 'job_MJdV2bvUWAns4rFCu48x2v2O6yTv', 'projectId': 'cloud-training-demos', 'location': 'EU'}, 'jobComplete': True, 'cacheHit': False}
Albert Gate, Hyde Park
----------
{'totalBytesProcessed': '903989528', 'totalRows': '5', 'etag': '0NrkGXvueSL96yj1aB2oRA==', 'kind': 'bigquery#getQueryResultsResponse', 'rows': [{'f': [{'v': 'Albert Gate, Hyde Park'}, {'v': '2359.4139302395765'}, {'v': '155647'}]}], 'schema': {'fields': [{'mode': 'NULLABLE', 'name': 'start_station_name', 'type': 'STRING'}, {'mode': 'NULLABLE', 'name': 'duration', 'type': 'FLOAT'}, {'mode': 'NULLABLE', 'name': 'num_trips', 'type': 'INTEGER'}]}, 'jobReference': {'jobId': 'job_MJdV2bvUWAns4rFCu48x2v2O6yTv', 'projectId': 'cloud-training-demos', 'location': 'EU'}, 'jobComplete': True, 'cacheHit': False}

Copyright 2019 Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License