In [0]:
from google.colab import auth
auth.authenticate_user()
print('Authenticated')
In [0]:
%load_ext google.colab.data_table
If you would prefer to return to the classic Pandas dataframe display, you can disable this by running:
%unload_ext google.colab.data_table
In [0]:
# Display query output immediately
%%bigquery --project yourprojectid
SELECT
COUNT(*) as total_rows
FROM `bigquery-public-data.samples.gsod`
Out[0]:
In [0]:
# Save output in a variable `df`
%%bigquery --project yourprojectid df
SELECT
COUNT(*) as total_rows
FROM `bigquery-public-data.samples.gsod`
In [0]:
df
Out[0]:
See BigQuery documentation and library reference documentation.
The GSOD sample table contains weather information collected by NOAA, such as precipitation amounts and wind speeds from late 1929 to early 2010.
In [0]:
project_id = '[your project ID]'
In [0]:
from google.cloud import bigquery
client = bigquery.Client(project=project_id)
sample_count = 2000
row_count = client.query('''
SELECT
COUNT(*) as total
FROM `bigquery-public-data.samples.gsod`''').to_dataframe().total[0]
df = client.query('''
SELECT
*
FROM
`bigquery-public-data.samples.gsod`
WHERE RAND() < %d/%d
''' % (sample_count, row_count)).to_dataframe()
print('Full dataset has %d rows' % row_count)
In [0]:
df.describe()
In [0]:
df.head(10)
Out[0]:
In [0]:
# 10 highest total_precipitation samples
df.sort_values('total_precipitation', ascending=False).head(10)[['station_number', 'year', 'month', 'day', 'total_precipitation']]
Out[0]:
In [0]:
import pandas as pd
sample_count = 2000
df = pd.io.gbq.read_gbq('''
SELECT name, SUM(number) as count
FROM `bigquery-public-data.usa_names.usa_1910_2013`
WHERE state = 'TX'
GROUP BY name
ORDER BY count DESC
LIMIT 100
''', project_id=project_id, dialect='standard')
df.head()
Out[0]: