Full scan on 10B rows of Wikimedia logs


In [22]:
%%bq query -n wiki10B
SELECT
  title,
  SUM(views) AS views
FROM
  `bigquery-samples.wikipedia_benchmark.Wiki10B`
WHERE
  REGEXP_CONTAINS(title, r' Cloud ')
GROUP BY
  title
ORDER BY
  views DESC
LIMIT 
  10

In [24]:
import google.datalab.bigquery as bq
options = bq.QueryOutput.dataframe(use_cache=False)
dataframe = wiki10B.execute(options).result()
dataframe


Out[24]:
title views
0 Charlie St. Cloud (film) 361
1 St. Cloud State University 357
2 The Death and Life of Charlie St. Cloud (film) 302
3 The Cloud Room 231
4 The Cloud of Unknowing 169
5 Charlie St Cloud Exclusive Scenes Full Movie 166
6 Charlie St Cloud Movie Clip Poetry 163
7 White Cloud Lake 156
8 The Cloud Minders 144
9 Panda Cloud Antivirus 135

In [ ]: