Compute GitHub Stats

Notebook setup


In [1]:
import os
import subprocess
if os.path.exists("/var/run/secrets/kubernetes.io/serviceaccount"):
    subprocess.check_call(["pip", "install", "--user", "-r", "requirements.txt"], stderr=subprocess.STDOUT, bufsize=1)

In [2]:
# NOTE: The RuntimeWarnings (if any) are harmless. See ContinuumIO/anaconda-issues#6678.
from pandas.io import gbq
import pandas as pd
import numpy as np

from importlib import reload
import itertools

In [3]:
import getpass
import subprocess
# Configuration Variables. Modify as desired.

PROJECT = subprocess.check_output(["gcloud", "config", "get-value", "project"]).strip().decode()

In [4]:
%matplotlib


Using matplotlib backend: agg

Setup Authorization

If you are using a service account run %%bash

Activate Service Account provided by Kubeflow.

gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS}

If you are running using user credentials

gcloud auth application-default login


In [74]:
import datetime
month = datetime.datetime.now().month
year = datetime.datetime.now().year

num_months = 12

months = []
for i in range(num_months):
    months.append("\"{0}{1:02}\"".format(year, month))
    month -= 1
    if month == 0:
        month = 12
        year -=1

Unique PR Creators


In [76]:
query = """
SELECT
    DATE(created_at) AS pr_date,
    actor.id,
    actor.login,
    JSON_EXTRACT(payload, '$.pull_request.user.id') as user_id,
    JSON_EXTRACT(payload, '$.pull_request.id') as pr_id,
    JSON_EXTRACT(payload, '$.pull_request.merged') as merged
  FROM `githubarchive.month.*`
  WHERE
    _TABLE_SUFFIX IN ({0})
    AND type = 'PullRequestEvent'
    AND org.login = 'kubeflow'
    AND JSON_EXTRACT(payload, '$.action') IN ('"closed"')
""".format(",".join(months))

all_prs=gbq.read_gbq(str(query), dialect='standard', project_id=PROJECT)


/home/jovyan/.local/lib/python3.6/site-packages/pandas_gbq/gbq.py:555: UserWarning: A progress bar was requested, but there was an error loading the tqdm library. Please install tqdm to use the progress bar functionality.
  progress_bar_type=progress_bar_type,

In [77]:
# Filter PRs to merged PRs
v=all_prs["merged"].values == 'true'
merged_all_prs = all_prs.iloc[v]

In [78]:
p=pd.Series(data=merged_all_prs["user_id"].values,index=merged_all_prs["pr_date"])
p=p.sort_index()

In [111]:
# Some solutions here: https://stackoverflow.com/questions/46470743/how-to-efficiently-compute-a-rolling-unique-count-in-a-pandas-time-series
# Need to figure out how to do a time based window

# TODO(jlewi): Is there a bug in the rolling window computation? creators ends up having the same number
# of rows as p; so we end up with multiple datapoints for each day; but the values aren't the same for
# each day. What is causing this effect?
creators = p.rolling('28d').apply(lambda arr: pd.Series(arr).nunique())

# We need to group the days. Rolling window will create a point for each data point
creators_df = pd.DataFrame({"day": creators.index, "num_authors": creators.values})
creators_df = creators_df.groupby("day", as_index=False).max()

In [112]:
import altair as alt
chart = alt.Chart(creators_df, title= "Unique PR Authors (Last 28 Days)")
line = chart.mark_line().encode(
  x= alt.X('day', title = "Day"),
  y=alt.Y("num_authors", title="# Unique Authors"),    
)

point = line + line.mark_point()
point.interactive()


Out[112]:

Number Prs


In [116]:
pr_impulse=pd.Series(data=merged_all_prs["pr_id"].values,index=merged_all_prs["pr_date"])
pr_impulse=pr_impulse.sort_index()

unique_prs = pr_impulse.rolling('28d').apply(lambda arr: pd.Series(arr).nunique())

prs_df = pd.DataFrame({"day": unique_prs.index, "num_prs": unique_prs.values})
prs_df = prs_df.groupby("day", as_index=False).max()

In [117]:
chart = alt.Chart(prs_df, title= "Merged PRs (Last 28 Days)")
line = chart.mark_line().encode(
  x= alt.X('day', title = "Day"),
  y=alt.Y("num_prs", title="# PRs"),    
)

point = line + line.mark_point()
point.interactive()


Out[117]:

Release stats per release (quarter)


In [85]:
release_months = []
year = 2019


for month in range(8, 11):
    release_months.append("\"{0}{1:02}\"".format(year, month))
        

query = """
SELECT
    DATE(created_at) AS pr_date,
    actor.id,
    actor.login,
    JSON_EXTRACT(payload, '$.pull_request.merged') as merged,
    JSON_EXTRACT(payload, '$.pull_request.id') as pr_id,
    JSON_EXTRACT(payload, '$.pull_request.url') as pr_url,
    JSON_EXTRACT(payload, '$.pull_request.user.id') as user_id
  FROM `githubarchive.month.*`
  WHERE
    _TABLE_SUFFIX IN ({0})
    AND type = 'PullRequestEvent'
    AND org.login = 'kubeflow'
    AND JSON_EXTRACT(payload, '$.action') IN ('"closed"')
""".format(",".join(release_months))

prs=gbq.read_gbq(str(query), dialect='standard', project_id=PROJECT)

In [86]:
# Filter PRs to merged PRs
v=prs["merged"].values == 'true'
merged_prs = prs.iloc[v]

In [87]:
unique_pr_logins = prs["user_id"].unique()
unique_prs = prs["pr_id"].unique()

merged_unique_logins = merged_prs["user_id"].unique()
merged_unique_prs = merged_prs["pr_id"].unique()
print("Number of unique pr authors (merged & unmerged) {0}".format(unique_pr_logins.shape))
print("Number of unique prs (merged & unmerged) {0}".format(unique_prs.shape))
print("Number of unique pr authors (merged) {0}".format(merged_unique_logins.shape))
print("Number of unique prs (merged) {0}".format(merged_unique_prs.shape))


Number of unique pr authors (merged & unmerged) (190,)
Number of unique prs (merged & unmerged) (1788,)
Number of unique pr authors (merged) (155,)
Number of unique prs (merged) (1492,)

Get a list of distinct actions

  • Here's a list of events in the api
  • It looks like these are different from the ones in the github archive

In [88]:
query = """
SELECT
    distinct JSON_EXTRACT(payload, '$.action')
  FROM `githubarchive.month.*`
  WHERE
    _TABLE_SUFFIX IN ({0})    
""".format(",".join(months))

actions=gbq.read_gbq(str(query), dialect='standard', project_id=PROJECT)

In [89]:
actions


Out[89]:
f0_
0 "opened"
1 "started"
2 None
3 "published"
4 "created"
5 "added"
6 "reopened"
7 "closed"

New Issues Last 28 Days


In [90]:
query = """
SELECT
    DATE(created_at) AS issue_date,
    actor.id,
    actor.login,
    JSON_EXTRACT(payload, '$.pull_request.id') as issue_id,
    JSON_EXTRACT(payload, '$.pull_request.url') as issue_url  
  FROM `githubarchive.month.*`
  WHERE
    _TABLE_SUFFIX IN ({0})
    AND type = 'IssuesEvent'
    AND org.login = 'kubeflow'
    AND JSON_EXTRACT(payload, '$.action') IN ('"opened"')
""".format(",".join(months))

issues=gbq.read_gbq(str(query), dialect='standard', project_id=PROJECT)


/home/jovyan/.local/lib/python3.6/site-packages/pandas_gbq/gbq.py:555: UserWarning: A progress bar was requested, but there was an error loading the tqdm library. Please install tqdm to use the progress bar functionality.
  progress_bar_type=progress_bar_type,

In [91]:
issue_counts=issues["issue_date"].value_counts()
issue_counts=issue_counts.sort_index()
rolling_issue_count = issue_counts.rolling('28d').sum()

In [129]:
issues_df = pd.DataFrame({"day": rolling_issue_count.index, "num_issues": rolling_issue_count.values})
issues_df = issues_df.groupby("day", as_index=False).max()

chart = alt.Chart(issues_df, title= "New Issues (Last 28 Days)")
line = chart.mark_line().encode(
  x= alt.X('day', title = "Day"),
  y=alt.Y("num_issues", title="# PRs"),    
)

point = line + line.mark_point()
point.interactive()


Out[129]:

In [92]:
import matplotlib
from matplotlib import pylab
matplotlib.rcParams.update({'font.size': 22})
hf = pylab.figure()
hf.set_size_inches(18.5, 10.5)
pylab.plot(rolling_issue_count, linewidth=5)
ha = pylab.gca()
ha.set_title("New Kubeflow Issues (28 Days)")
ha.set_xlabel("Date")
ha.set_ylabel("# Of Issues")


Out[92]:
Text(0, 0.5, '# Of Issues')

GetSomeSampleIssue Events


In [93]:
query = """
SELECT
    *
  FROM `githubarchive.month.*`
  WHERE
    _TABLE_SUFFIX IN ({0})
    AND type = 'IssuesEvent'
    AND org.login = 'kubeflow'
    
    limit 20
""".format(",".join(months))

events=gbq.read_gbq(str(query), dialect='standard', project_id=PROJECT)

In [94]:
events


Out[94]:
type public payload repo actor org created_at id other
0 IssuesEvent True {"action":"opened","issue":{"url":"https://api... {'id': 177896927, 'name': 'kubeflow/metadata',... {'id': 1383056, 'login': 'kwasi', 'gravatar_id... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-02 22:16:30+00:00 10773687172 {"actor":{"display_login":"kwasi"}}
1 IssuesEvent True {"action":"closed","issue":{"url":"https://api... {'id': 112647343, 'name': 'kubeflow/kubeflow',... {'id': 26384082, 'login': 'stale[bot]', 'grava... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-02 20:18:42+00:00 10773383242 {"actor":{"display_login":"stale"}}
2 IssuesEvent True {"action":"closed","issue":{"url":"https://api... {'id': 112647343, 'name': 'kubeflow/kubeflow',... {'id': 26384082, 'login': 'stale[bot]', 'grava... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-02 21:18:45+00:00 10773540968 {"actor":{"display_login":"stale"}}
3 IssuesEvent True {"action":"closed","issue":{"url":"https://api... {'id': 178075572, 'name': 'kubeflow/kfserving'... {'id': 20407524, 'login': 'k8s-ci-robot', 'gra... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-02 02:01:43+00:00 10770687104 {"actor":{"display_login":"k8s-ci-robot"}}
4 IssuesEvent True {"action":"closed","issue":{"url":"https://api... {'id': 112647343, 'name': 'kubeflow/kubeflow',... {'id': 777219, 'login': 'jlewi', 'gravatar_id'... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-02 20:00:49+00:00 10773336386 {"actor":{"display_login":"jlewi"}}
5 IssuesEvent True {"action":"opened","issue":{"url":"https://api... {'id': 133100880, 'name': 'kubeflow/pipelines'... {'id': 777219, 'login': 'jlewi', 'gravatar_id'... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-02 19:56:58+00:00 10773325771 {"actor":{"display_login":"jlewi"}}
6 IssuesEvent True {"action":"closed","issue":{"url":"https://api... {'id': 178075572, 'name': 'kubeflow/kfserving'... {'id': 20407524, 'login': 'k8s-ci-robot', 'gra... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-02 16:19:40+00:00 10772712638 {"actor":{"display_login":"k8s-ci-robot"}}
7 IssuesEvent True {"action":"closed","issue":{"url":"https://api... {'id': 133100880, 'name': 'kubeflow/pipelines'... {'id': 777219, 'login': 'jlewi', 'gravatar_id'... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-02 20:01:48+00:00 10773339049 {"actor":{"display_login":"jlewi"}}
8 IssuesEvent True {"action":"closed","issue":{"url":"https://api... {'id': 128353922, 'name': 'kubeflow/website', ... {'id': 20407524, 'login': 'k8s-ci-robot', 'gra... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-02 02:47:41+00:00 10770792321 {"actor":{"display_login":"k8s-ci-robot"}}
9 IssuesEvent True {"action":"opened","issue":{"url":"https://api... {'id': 133100880, 'name': 'kubeflow/pipelines'... {'id': 777219, 'login': 'jlewi', 'gravatar_id'... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-02 20:12:01+00:00 10773366395 {"actor":{"display_login":"jlewi"}}
10 IssuesEvent True {"action":"closed","issue":{"url":"https://api... {'id': 112647343, 'name': 'kubeflow/kubeflow',... {'id': 26384082, 'login': 'stale[bot]', 'grava... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-02 20:18:42+00:00 10773383195 {"actor":{"display_login":"stale"}}
11 IssuesEvent True {"action":"opened","issue":{"url":"https://api... {'id': 133100880, 'name': 'kubeflow/pipelines'... {'id': 1829149, 'login': 'Ark-kun', 'gravatar_... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-02 03:32:03+00:00 10770890970 {"actor":{"display_login":"Ark-kun"}}
12 IssuesEvent True {"action":"closed","issue":{"url":"https://api... {'id': 112647343, 'name': 'kubeflow/kubeflow',... {'id': 777219, 'login': 'jlewi', 'gravatar_id'... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-02 20:14:39+00:00 10773372657 {"actor":{"display_login":"jlewi"}}
13 IssuesEvent True {"action":"closed","issue":{"url":"https://api... {'id': 112647343, 'name': 'kubeflow/kubeflow',... {'id': 777219, 'login': 'jlewi', 'gravatar_id'... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-02 02:01:31+00:00 10770686609 {"actor":{"display_login":"jlewi"}}
14 IssuesEvent True {"action":"closed","issue":{"url":"https://api... {'id': 172819949, 'name': 'kubeflow/manifests'... {'id': 20407524, 'login': 'k8s-ci-robot', 'gra... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-01 22:39:42+00:00 10770060176 {"actor":{"display_login":"k8s-ci-robot"}}
15 IssuesEvent True {"action":"closed","issue":{"url":"https://api... {'id': 178075572, 'name': 'kubeflow/kfserving'... {'id': 20407524, 'login': 'k8s-ci-robot', 'gra... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-01 23:04:58+00:00 10770164943 {"actor":{"display_login":"k8s-ci-robot"}}
16 IssuesEvent True {"action":"opened","issue":{"url":"https://api... {'id': 178075572, 'name': 'kubeflow/kfserving'... {'id': 10563075, 'login': 'cliveseldon', 'grav... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-01 10:46:17+00:00 10765348294 {"actor":{"display_login":"cliveseldon"}}
17 IssuesEvent True {"action":"closed","issue":{"url":"https://api... {'id': 178075572, 'name': 'kubeflow/kfserving'... {'id': 20407524, 'login': 'k8s-ci-robot', 'gra... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-01 23:08:05+00:00 10770177338 {"actor":{"display_login":"k8s-ci-robot"}}
18 IssuesEvent True {"action":"opened","issue":{"url":"https://api... {'id': 128353922, 'name': 'kubeflow/website', ... {'id': 1772874, 'login': 'alecglassford', 'gra... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-01 19:38:14+00:00 10769045766 {"actor":{"display_login":"alecglassford"}}
19 IssuesEvent True {"action":"closed","issue":{"url":"https://api... {'id': 128353922, 'name': 'kubeflow/website', ... {'id': 740031, 'login': 'thavlik', 'gravatar_i... {'id': 33164907, 'login': 'kubeflow', 'gravata... 2019-11-01 22:00:42+00:00 10769883977 {"actor":{"display_login":"thavlik"}}

Get some sample pull request events

  • Want to inspect the data

In [95]:
query = """
SELECT
    *
  FROM `githubarchive.month.*`
  WHERE
    _TABLE_SUFFIX IN ({0})
    AND type = 'PullRequestEvent'
    AND org.login = 'kubeflow'
    
    limit 20
""".format(",".join(months))

events=gbq.read_gbq(str(query), dialect='standard', project_id=PROJECT)


/home/jovyan/.local/lib/python3.6/site-packages/pandas_gbq/gbq.py:555: UserWarning: A progress bar was requested, but there was an error loading the tqdm library. Please install tqdm to use the progress bar functionality.
  progress_bar_type=progress_bar_type,

In [96]:
import pprint
import json
data = json.loads(events["payload"].values[3])
pprint.pprint(data)


{'action': 'closed',
 'number': 1112,
 'pull_request': {'_links': {'comments': {'href': 'https://api.github.com/repos/kubeflow/website/issues/1112/comments'},
                             'commits': {'href': 'https://api.github.com/repos/kubeflow/website/pulls/1112/commits'},
                             'html': {'href': 'https://github.com/kubeflow/website/pull/1112'},
                             'issue': {'href': 'https://api.github.com/repos/kubeflow/website/issues/1112'},
                             'review_comment': {'href': 'https://api.github.com/repos/kubeflow/website/pulls/comments{/number}'},
                             'review_comments': {'href': 'https://api.github.com/repos/kubeflow/website/pulls/1112/comments'},
                             'self': {'href': 'https://api.github.com/repos/kubeflow/website/pulls/1112'},
                             'statuses': {'href': 'https://api.github.com/repos/kubeflow/website/statuses/f7a8f4d4247b84ed31988b7e9657a3355026de9a'}},
                  'additions': 1,
                  'assignee': {'avatar_url': 'https://avatars0.githubusercontent.com/u/6138251?v=4',
                               'events_url': 'https://api.github.com/users/sarahmaddox/events{/privacy}',
                               'followers_url': 'https://api.github.com/users/sarahmaddox/followers',
                               'following_url': 'https://api.github.com/users/sarahmaddox/following{/other_user}',
                               'gists_url': 'https://api.github.com/users/sarahmaddox/gists{/gist_id}',
                               'gravatar_id': '',
                               'html_url': 'https://github.com/sarahmaddox',
                               'id': 6138251,
                               'login': 'sarahmaddox',
                               'node_id': 'MDQ6VXNlcjYxMzgyNTE=',
                               'organizations_url': 'https://api.github.com/users/sarahmaddox/orgs',
                               'received_events_url': 'https://api.github.com/users/sarahmaddox/received_events',
                               'repos_url': 'https://api.github.com/users/sarahmaddox/repos',
                               'site_admin': False,
                               'starred_url': 'https://api.github.com/users/sarahmaddox/starred{/owner}{/repo}',
                               'subscriptions_url': 'https://api.github.com/users/sarahmaddox/subscriptions',
                               'type': 'User',
                               'url': 'https://api.github.com/users/sarahmaddox'},
                  'assignees': [{'avatar_url': 'https://avatars0.githubusercontent.com/u/6138251?v=4',
                                 'events_url': 'https://api.github.com/users/sarahmaddox/events{/privacy}',
                                 'followers_url': 'https://api.github.com/users/sarahmaddox/followers',
                                 'following_url': 'https://api.github.com/users/sarahmaddox/following{/other_user}',
                                 'gists_url': 'https://api.github.com/users/sarahmaddox/gists{/gist_id}',
                                 'gravatar_id': '',
                                 'html_url': 'https://github.com/sarahmaddox',
                                 'id': 6138251,
                                 'login': 'sarahmaddox',
                                 'node_id': 'MDQ6VXNlcjYxMzgyNTE=',
                                 'organizations_url': 'https://api.github.com/users/sarahmaddox/orgs',
                                 'received_events_url': 'https://api.github.com/users/sarahmaddox/received_events',
                                 'repos_url': 'https://api.github.com/users/sarahmaddox/repos',
                                 'site_admin': False,
                                 'starred_url': 'https://api.github.com/users/sarahmaddox/starred{/owner}{/repo}',
                                 'subscriptions_url': 'https://api.github.com/users/sarahmaddox/subscriptions',
                                 'type': 'User',
                                 'url': 'https://api.github.com/users/sarahmaddox'}],
                  'author_association': 'CONTRIBUTOR',
                  'base': {'label': 'kubeflow:master',
                           'ref': 'master',
                           'repo': {'archive_url': 'https://api.github.com/repos/kubeflow/website/{archive_format}{/ref}',
                                    'archived': False,
                                    'assignees_url': 'https://api.github.com/repos/kubeflow/website/assignees{/user}',
                                    'blobs_url': 'https://api.github.com/repos/kubeflow/website/git/blobs{/sha}',
                                    'branches_url': 'https://api.github.com/repos/kubeflow/website/branches{/branch}',
                                    'clone_url': 'https://github.com/kubeflow/website.git',
                                    'collaborators_url': 'https://api.github.com/repos/kubeflow/website/collaborators{/collaborator}',
                                    'comments_url': 'https://api.github.com/repos/kubeflow/website/comments{/number}',
                                    'commits_url': 'https://api.github.com/repos/kubeflow/website/commits{/sha}',
                                    'compare_url': 'https://api.github.com/repos/kubeflow/website/compare/{base}...{head}',
                                    'contents_url': 'https://api.github.com/repos/kubeflow/website/contents/{+path}',
                                    'contributors_url': 'https://api.github.com/repos/kubeflow/website/contributors',
                                    'created_at': '2018-04-06T06:09:27Z',
                                    'default_branch': 'master',
                                    'deployments_url': 'https://api.github.com/repos/kubeflow/website/deployments',
                                    'description': "Kubeflow's public website",
                                    'disabled': False,
                                    'downloads_url': 'https://api.github.com/repos/kubeflow/website/downloads',
                                    'events_url': 'https://api.github.com/repos/kubeflow/website/events',
                                    'fork': False,
                                    'forks': 187,
                                    'forks_count': 187,
                                    'forks_url': 'https://api.github.com/repos/kubeflow/website/forks',
                                    'full_name': 'kubeflow/website',
                                    'git_commits_url': 'https://api.github.com/repos/kubeflow/website/git/commits{/sha}',
                                    'git_refs_url': 'https://api.github.com/repos/kubeflow/website/git/refs{/sha}',
                                    'git_tags_url': 'https://api.github.com/repos/kubeflow/website/git/tags{/sha}',
                                    'git_url': 'git://github.com/kubeflow/website.git',
                                    'has_downloads': True,
                                    'has_issues': True,
                                    'has_pages': False,
                                    'has_projects': True,
                                    'has_wiki': True,
                                    'homepage': None,
                                    'hooks_url': 'https://api.github.com/repos/kubeflow/website/hooks',
                                    'html_url': 'https://github.com/kubeflow/website',
                                    'id': 128353922,
                                    'issue_comment_url': 'https://api.github.com/repos/kubeflow/website/issues/comments{/number}',
                                    'issue_events_url': 'https://api.github.com/repos/kubeflow/website/issues/events{/number}',
                                    'issues_url': 'https://api.github.com/repos/kubeflow/website/issues{/number}',
                                    'keys_url': 'https://api.github.com/repos/kubeflow/website/keys{/key_id}',
                                    'labels_url': 'https://api.github.com/repos/kubeflow/website/labels{/name}',
                                    'language': 'HTML',
                                    'languages_url': 'https://api.github.com/repos/kubeflow/website/languages',
                                    'license': {'key': 'cc-by-4.0',
                                                'name': 'Creative Commons '
                                                        'Attribution 4.0 '
                                                        'International',
                                                'node_id': 'MDc6TGljZW5zZTI1',
                                                'spdx_id': 'CC-BY-4.0',
                                                'url': 'https://api.github.com/licenses/cc-by-4.0'},
                                    'merges_url': 'https://api.github.com/repos/kubeflow/website/merges',
                                    'milestones_url': 'https://api.github.com/repos/kubeflow/website/milestones{/number}',
                                    'mirror_url': None,
                                    'name': 'website',
                                    'node_id': 'MDEwOlJlcG9zaXRvcnkxMjgzNTM5MjI=',
                                    'notifications_url': 'https://api.github.com/repos/kubeflow/website/notifications{?since,all,participating}',
                                    'open_issues': 122,
                                    'open_issues_count': 122,
                                    'owner': {'avatar_url': 'https://avatars0.githubusercontent.com/u/33164907?v=4',
                                              'events_url': 'https://api.github.com/users/kubeflow/events{/privacy}',
                                              'followers_url': 'https://api.github.com/users/kubeflow/followers',
                                              'following_url': 'https://api.github.com/users/kubeflow/following{/other_user}',
                                              'gists_url': 'https://api.github.com/users/kubeflow/gists{/gist_id}',
                                              'gravatar_id': '',
                                              'html_url': 'https://github.com/kubeflow',
                                              'id': 33164907,
                                              'login': 'kubeflow',
                                              'node_id': 'MDEyOk9yZ2FuaXphdGlvbjMzMTY0OTA3',
                                              'organizations_url': 'https://api.github.com/users/kubeflow/orgs',
                                              'received_events_url': 'https://api.github.com/users/kubeflow/received_events',
                                              'repos_url': 'https://api.github.com/users/kubeflow/repos',
                                              'site_admin': False,
                                              'starred_url': 'https://api.github.com/users/kubeflow/starred{/owner}{/repo}',
                                              'subscriptions_url': 'https://api.github.com/users/kubeflow/subscriptions',
                                              'type': 'Organization',
                                              'url': 'https://api.github.com/users/kubeflow'},
                                    'private': False,
                                    'pulls_url': 'https://api.github.com/repos/kubeflow/website/pulls{/number}',
                                    'pushed_at': '2019-09-02T05:45:41Z',
                                    'releases_url': 'https://api.github.com/repos/kubeflow/website/releases{/id}',
                                    'size': 32934,
                                    'ssh_url': 'git@github.com:kubeflow/website.git',
                                    'stargazers_count': 55,
                                    'stargazers_url': 'https://api.github.com/repos/kubeflow/website/stargazers',
                                    'statuses_url': 'https://api.github.com/repos/kubeflow/website/statuses/{sha}',
                                    'subscribers_url': 'https://api.github.com/repos/kubeflow/website/subscribers',
                                    'subscription_url': 'https://api.github.com/repos/kubeflow/website/subscription',
                                    'svn_url': 'https://github.com/kubeflow/website',
                                    'tags_url': 'https://api.github.com/repos/kubeflow/website/tags',
                                    'teams_url': 'https://api.github.com/repos/kubeflow/website/teams',
                                    'trees_url': 'https://api.github.com/repos/kubeflow/website/git/trees{/sha}',
                                    'updated_at': '2019-08-30T13:38:02Z',
                                    'url': 'https://api.github.com/repos/kubeflow/website',
                                    'watchers': 55,
                                    'watchers_count': 55},
                           'sha': 'a3a1cb38dadf3e1e88ceca771fcdaab9663c9771',
                           'user': {'avatar_url': 'https://avatars0.githubusercontent.com/u/33164907?v=4',
                                    'events_url': 'https://api.github.com/users/kubeflow/events{/privacy}',
                                    'followers_url': 'https://api.github.com/users/kubeflow/followers',
                                    'following_url': 'https://api.github.com/users/kubeflow/following{/other_user}',
                                    'gists_url': 'https://api.github.com/users/kubeflow/gists{/gist_id}',
                                    'gravatar_id': '',
                                    'html_url': 'https://github.com/kubeflow',
                                    'id': 33164907,
                                    'login': 'kubeflow',
                                    'node_id': 'MDEyOk9yZ2FuaXphdGlvbjMzMTY0OTA3',
                                    'organizations_url': 'https://api.github.com/users/kubeflow/orgs',
                                    'received_events_url': 'https://api.github.com/users/kubeflow/received_events',
                                    'repos_url': 'https://api.github.com/users/kubeflow/repos',
                                    'site_admin': False,
                                    'starred_url': 'https://api.github.com/users/kubeflow/starred{/owner}{/repo}',
                                    'subscriptions_url': 'https://api.github.com/users/kubeflow/subscriptions',
                                    'type': 'Organization',
                                    'url': 'https://api.github.com/users/kubeflow'}},
                  'body': 'on v0.6.2 we still need `--use_basic_auth` during '
                          'init\n'
                          '\n'
                          '<!-- Reviewable:start -->\n'
                          '---\n'
                          'This change is\u2002[<img '
                          'src="https://reviewable.io/review_button.svg" '
                          'height="34" align="absmiddle" '
                          'alt="Reviewable"/>](https://reviewable.io/reviews/kubeflow/website/1112)\n'
                          '<!-- Reviewable:end -->\n',
                  'changed_files': 1,
                  'closed_at': '2019-09-02T05:45:41Z',
                  'comments': 2,
                  'comments_url': 'https://api.github.com/repos/kubeflow/website/issues/1112/comments',
                  'commits': 1,
                  'commits_url': 'https://api.github.com/repos/kubeflow/website/pulls/1112/commits',
                  'created_at': '2019-08-30T21:58:25Z',
                  'deletions': 1,
                  'diff_url': 'https://github.com/kubeflow/website/pull/1112.diff',
                  'head': {'label': 'kunmingg:cli_doc',
                           'ref': 'cli_doc',
                           'repo': {'archive_url': 'https://api.github.com/repos/kunmingg/website/{archive_format}{/ref}',
                                    'archived': False,
                                    'assignees_url': 'https://api.github.com/repos/kunmingg/website/assignees{/user}',
                                    'blobs_url': 'https://api.github.com/repos/kunmingg/website/git/blobs{/sha}',
                                    'branches_url': 'https://api.github.com/repos/kunmingg/website/branches{/branch}',
                                    'clone_url': 'https://github.com/kunmingg/website.git',
                                    'collaborators_url': 'https://api.github.com/repos/kunmingg/website/collaborators{/collaborator}',
                                    'comments_url': 'https://api.github.com/repos/kunmingg/website/comments{/number}',
                                    'commits_url': 'https://api.github.com/repos/kunmingg/website/commits{/sha}',
                                    'compare_url': 'https://api.github.com/repos/kunmingg/website/compare/{base}...{head}',
                                    'contents_url': 'https://api.github.com/repos/kunmingg/website/contents/{+path}',
                                    'contributors_url': 'https://api.github.com/repos/kunmingg/website/contributors',
                                    'created_at': '2018-06-12T22:24:01Z',
                                    'default_branch': 'master',
                                    'deployments_url': 'https://api.github.com/repos/kunmingg/website/deployments',
                                    'description': "Kubeflow's public website",
                                    'disabled': False,
                                    'downloads_url': 'https://api.github.com/repos/kunmingg/website/downloads',
                                    'events_url': 'https://api.github.com/repos/kunmingg/website/events',
                                    'fork': True,
                                    'forks': 0,
                                    'forks_count': 0,
                                    'forks_url': 'https://api.github.com/repos/kunmingg/website/forks',
                                    'full_name': 'kunmingg/website',
                                    'git_commits_url': 'https://api.github.com/repos/kunmingg/website/git/commits{/sha}',
                                    'git_refs_url': 'https://api.github.com/repos/kunmingg/website/git/refs{/sha}',
                                    'git_tags_url': 'https://api.github.com/repos/kunmingg/website/git/tags{/sha}',
                                    'git_url': 'git://github.com/kunmingg/website.git',
                                    'has_downloads': True,
                                    'has_issues': False,
                                    'has_pages': False,
                                    'has_projects': True,
                                    'has_wiki': True,
                                    'homepage': None,
                                    'hooks_url': 'https://api.github.com/repos/kunmingg/website/hooks',
                                    'html_url': 'https://github.com/kunmingg/website',
                                    'id': 137134035,
                                    'issue_comment_url': 'https://api.github.com/repos/kunmingg/website/issues/comments{/number}',
                                    'issue_events_url': 'https://api.github.com/repos/kunmingg/website/issues/events{/number}',
                                    'issues_url': 'https://api.github.com/repos/kunmingg/website/issues{/number}',
                                    'keys_url': 'https://api.github.com/repos/kunmingg/website/keys{/key_id}',
                                    'labels_url': 'https://api.github.com/repos/kunmingg/website/labels{/name}',
                                    'language': 'CSS',
                                    'languages_url': 'https://api.github.com/repos/kunmingg/website/languages',
                                    'license': {'key': 'apache-2.0',
                                                'name': 'Apache License 2.0',
                                                'node_id': 'MDc6TGljZW5zZTI=',
                                                'spdx_id': 'Apache-2.0',
                                                'url': 'https://api.github.com/licenses/apache-2.0'},
                                    'merges_url': 'https://api.github.com/repos/kunmingg/website/merges',
                                    'milestones_url': 'https://api.github.com/repos/kunmingg/website/milestones{/number}',
                                    'mirror_url': None,
                                    'name': 'website',
                                    'node_id': 'MDEwOlJlcG9zaXRvcnkxMzcxMzQwMzU=',
                                    'notifications_url': 'https://api.github.com/repos/kunmingg/website/notifications{?since,all,participating}',
                                    'open_issues': 0,
                                    'open_issues_count': 0,
                                    'owner': {'avatar_url': 'https://avatars2.githubusercontent.com/u/37601826?v=4',
                                              'events_url': 'https://api.github.com/users/kunmingg/events{/privacy}',
                                              'followers_url': 'https://api.github.com/users/kunmingg/followers',
                                              'following_url': 'https://api.github.com/users/kunmingg/following{/other_user}',
                                              'gists_url': 'https://api.github.com/users/kunmingg/gists{/gist_id}',
                                              'gravatar_id': '',
                                              'html_url': 'https://github.com/kunmingg',
                                              'id': 37601826,
                                              'login': 'kunmingg',
                                              'node_id': 'MDQ6VXNlcjM3NjAxODI2',
                                              'organizations_url': 'https://api.github.com/users/kunmingg/orgs',
                                              'received_events_url': 'https://api.github.com/users/kunmingg/received_events',
                                              'repos_url': 'https://api.github.com/users/kunmingg/repos',
                                              'site_admin': False,
                                              'starred_url': 'https://api.github.com/users/kunmingg/starred{/owner}{/repo}',
                                              'subscriptions_url': 'https://api.github.com/users/kunmingg/subscriptions',
                                              'type': 'User',
                                              'url': 'https://api.github.com/users/kunmingg'},
                                    'private': False,
                                    'pulls_url': 'https://api.github.com/repos/kunmingg/website/pulls{/number}',
                                    'pushed_at': '2019-08-30T21:57:04Z',
                                    'releases_url': 'https://api.github.com/repos/kunmingg/website/releases{/id}',
                                    'size': 28091,
                                    'ssh_url': 'git@github.com:kunmingg/website.git',
                                    'stargazers_count': 0,
                                    'stargazers_url': 'https://api.github.com/repos/kunmingg/website/stargazers',
                                    'statuses_url': 'https://api.github.com/repos/kunmingg/website/statuses/{sha}',
                                    'subscribers_url': 'https://api.github.com/repos/kunmingg/website/subscribers',
                                    'subscription_url': 'https://api.github.com/repos/kunmingg/website/subscription',
                                    'svn_url': 'https://github.com/kunmingg/website',
                                    'tags_url': 'https://api.github.com/repos/kunmingg/website/tags',
                                    'teams_url': 'https://api.github.com/repos/kunmingg/website/teams',
                                    'trees_url': 'https://api.github.com/repos/kunmingg/website/git/trees{/sha}',
                                    'updated_at': '2018-06-12T22:24:03Z',
                                    'url': 'https://api.github.com/repos/kunmingg/website',
                                    'watchers': 0,
                                    'watchers_count': 0},
                           'sha': 'f7a8f4d4247b84ed31988b7e9657a3355026de9a',
                           'user': {'avatar_url': 'https://avatars2.githubusercontent.com/u/37601826?v=4',
                                    'events_url': 'https://api.github.com/users/kunmingg/events{/privacy}',
                                    'followers_url': 'https://api.github.com/users/kunmingg/followers',
                                    'following_url': 'https://api.github.com/users/kunmingg/following{/other_user}',
                                    'gists_url': 'https://api.github.com/users/kunmingg/gists{/gist_id}',
                                    'gravatar_id': '',
                                    'html_url': 'https://github.com/kunmingg',
                                    'id': 37601826,
                                    'login': 'kunmingg',
                                    'node_id': 'MDQ6VXNlcjM3NjAxODI2',
                                    'organizations_url': 'https://api.github.com/users/kunmingg/orgs',
                                    'received_events_url': 'https://api.github.com/users/kunmingg/received_events',
                                    'repos_url': 'https://api.github.com/users/kunmingg/repos',
                                    'site_admin': False,
                                    'starred_url': 'https://api.github.com/users/kunmingg/starred{/owner}{/repo}',
                                    'subscriptions_url': 'https://api.github.com/users/kunmingg/subscriptions',
                                    'type': 'User',
                                    'url': 'https://api.github.com/users/kunmingg'}},
                  'html_url': 'https://github.com/kubeflow/website/pull/1112',
                  'id': 312895771,
                  'issue_url': 'https://api.github.com/repos/kubeflow/website/issues/1112',
                  'labels': [{'color': '03db12',
                              'default': False,
                              'id': 907551316,
                              'name': 'approved',
                              'node_id': 'MDU6TGFiZWw5MDc1NTEzMTY=',
                              'url': 'https://api.github.com/repos/kubeflow/website/labels/approved'},
                             {'color': '2efca4',
                              'default': False,
                              'id': 907551315,
                              'name': 'lgtm',
                              'node_id': 'MDU6TGFiZWw5MDc1NTEzMTU=',
                              'url': 'https://api.github.com/repos/kubeflow/website/labels/lgtm'},
                             {'color': 'ededed',
                              'default': False,
                              'id': 907423557,
                              'name': 'size/XS',
                              'node_id': 'MDU6TGFiZWw5MDc0MjM1NTc=',
                              'url': 'https://api.github.com/repos/kubeflow/website/labels/size/XS'}],
                  'locked': False,
                  'maintainer_can_modify': False,
                  'merge_commit_sha': '843796e0c549fa8ee98d0afc07ea4479719f66a3',
                  'mergeable': None,
                  'mergeable_state': 'unknown',
                  'merged': True,
                  'merged_at': '2019-09-02T05:45:41Z',
                  'merged_by': {'avatar_url': 'https://avatars0.githubusercontent.com/u/20407524?v=4',
                                'events_url': 'https://api.github.com/users/k8s-ci-robot/events{/privacy}',
                                'followers_url': 'https://api.github.com/users/k8s-ci-robot/followers',
                                'following_url': 'https://api.github.com/users/k8s-ci-robot/following{/other_user}',
                                'gists_url': 'https://api.github.com/users/k8s-ci-robot/gists{/gist_id}',
                                'gravatar_id': '',
                                'html_url': 'https://github.com/k8s-ci-robot',
                                'id': 20407524,
                                'login': 'k8s-ci-robot',
                                'node_id': 'MDQ6VXNlcjIwNDA3NTI0',
                                'organizations_url': 'https://api.github.com/users/k8s-ci-robot/orgs',
                                'received_events_url': 'https://api.github.com/users/k8s-ci-robot/received_events',
                                'repos_url': 'https://api.github.com/users/k8s-ci-robot/repos',
                                'site_admin': False,
                                'starred_url': 'https://api.github.com/users/k8s-ci-robot/starred{/owner}{/repo}',
                                'subscriptions_url': 'https://api.github.com/users/k8s-ci-robot/subscriptions',
                                'type': 'User',
                                'url': 'https://api.github.com/users/k8s-ci-robot'},
                  'milestone': None,
                  'node_id': 'MDExOlB1bGxSZXF1ZXN0MzEyODk1Nzcx',
                  'number': 1112,
                  'patch_url': 'https://github.com/kubeflow/website/pull/1112.patch',
                  'rebaseable': None,
                  'requested_reviewers': [{'avatar_url': 'https://avatars2.githubusercontent.com/u/2348602?v=4',
                                           'events_url': 'https://api.github.com/users/IronPan/events{/privacy}',
                                           'followers_url': 'https://api.github.com/users/IronPan/followers',
                                           'following_url': 'https://api.github.com/users/IronPan/following{/other_user}',
                                           'gists_url': 'https://api.github.com/users/IronPan/gists{/gist_id}',
                                           'gravatar_id': '',
                                           'html_url': 'https://github.com/IronPan',
                                           'id': 2348602,
                                           'login': 'IronPan',
                                           'node_id': 'MDQ6VXNlcjIzNDg2MDI=',
                                           'organizations_url': 'https://api.github.com/users/IronPan/orgs',
                                           'received_events_url': 'https://api.github.com/users/IronPan/received_events',
                                           'repos_url': 'https://api.github.com/users/IronPan/repos',
                                           'site_admin': False,
                                           'starred_url': 'https://api.github.com/users/IronPan/starred{/owner}{/repo}',
                                           'subscriptions_url': 'https://api.github.com/users/IronPan/subscriptions',
                                           'type': 'User',
                                           'url': 'https://api.github.com/users/IronPan'},
                                          {'avatar_url': 'https://avatars3.githubusercontent.com/u/11774566?v=4',
                                           'events_url': 'https://api.github.com/users/abhi-g/events{/privacy}',
                                           'followers_url': 'https://api.github.com/users/abhi-g/followers',
                                           'following_url': 'https://api.github.com/users/abhi-g/following{/other_user}',
                                           'gists_url': 'https://api.github.com/users/abhi-g/gists{/gist_id}',
                                           'gravatar_id': '',
                                           'html_url': 'https://github.com/abhi-g',
                                           'id': 11774566,
                                           'login': 'abhi-g',
                                           'node_id': 'MDQ6VXNlcjExNzc0NTY2',
                                           'organizations_url': 'https://api.github.com/users/abhi-g/orgs',
                                           'received_events_url': 'https://api.github.com/users/abhi-g/received_events',
                                           'repos_url': 'https://api.github.com/users/abhi-g/repos',
                                           'site_admin': False,
                                           'starred_url': 'https://api.github.com/users/abhi-g/starred{/owner}{/repo}',
                                           'subscriptions_url': 'https://api.github.com/users/abhi-g/subscriptions',
                                           'type': 'User',
                                           'url': 'https://api.github.com/users/abhi-g'},
                                          {'avatar_url': 'https://avatars0.githubusercontent.com/u/32310205?v=4',
                                           'events_url': 'https://api.github.com/users/lluunn/events{/privacy}',
                                           'followers_url': 'https://api.github.com/users/lluunn/followers',
                                           'following_url': 'https://api.github.com/users/lluunn/following{/other_user}',
                                           'gists_url': 'https://api.github.com/users/lluunn/gists{/gist_id}',
                                           'gravatar_id': '',
                                           'html_url': 'https://github.com/lluunn',
                                           'id': 32310205,
                                           'login': 'lluunn',
                                           'node_id': 'MDQ6VXNlcjMyMzEwMjA1',
                                           'organizations_url': 'https://api.github.com/users/lluunn/orgs',
                                           'received_events_url': 'https://api.github.com/users/lluunn/received_events',
                                           'repos_url': 'https://api.github.com/users/lluunn/repos',
                                           'site_admin': False,
                                           'starred_url': 'https://api.github.com/users/lluunn/starred{/owner}{/repo}',
                                           'subscriptions_url': 'https://api.github.com/users/lluunn/subscriptions',
                                           'type': 'User',
                                           'url': 'https://api.github.com/users/lluunn'}],
                  'requested_teams': [],
                  'review_comment_url': 'https://api.github.com/repos/kubeflow/website/pulls/comments{/number}',
                  'review_comments': 0,
                  'review_comments_url': 'https://api.github.com/repos/kubeflow/website/pulls/1112/comments',
                  'state': 'closed',
                  'statuses_url': 'https://api.github.com/repos/kubeflow/website/statuses/f7a8f4d4247b84ed31988b7e9657a3355026de9a',
                  'title': 'add --use_basic_auth flag to cli doc',
                  'updated_at': '2019-09-02T05:45:42Z',
                  'url': 'https://api.github.com/repos/kubeflow/website/pulls/1112',
                  'user': {'avatar_url': 'https://avatars2.githubusercontent.com/u/37601826?v=4',
                           'events_url': 'https://api.github.com/users/kunmingg/events{/privacy}',
                           'followers_url': 'https://api.github.com/users/kunmingg/followers',
                           'following_url': 'https://api.github.com/users/kunmingg/following{/other_user}',
                           'gists_url': 'https://api.github.com/users/kunmingg/gists{/gist_id}',
                           'gravatar_id': '',
                           'html_url': 'https://github.com/kunmingg',
                           'id': 37601826,
                           'login': 'kunmingg',
                           'node_id': 'MDQ6VXNlcjM3NjAxODI2',
                           'organizations_url': 'https://api.github.com/users/kunmingg/orgs',
                           'received_events_url': 'https://api.github.com/users/kunmingg/received_events',
                           'repos_url': 'https://api.github.com/users/kunmingg/repos',
                           'site_admin': False,
                           'starred_url': 'https://api.github.com/users/kunmingg/starred{/owner}{/repo}',
                           'subscriptions_url': 'https://api.github.com/users/kunmingg/subscriptions',
                           'type': 'User',
                           'url': 'https://api.github.com/users/kunmingg'}}}

In [97]:
data["pull_request"]["id"]


Out[97]:
312895771

Get Distinct Types


In [98]:
query = """
SELECT
    distinct type
  FROM `githubarchive.month.*`
  WHERE
    _TABLE_SUFFIX IN ({0})    
    AND org.login = 'kubeflow'
    
    limit 20
""".format(",".join(months))

events=gbq.read_gbq(str(query), dialect='standard', project_id=PROJECT)

In [99]:
events


Out[99]:
type
0 CreateEvent
1 MemberEvent
2 IssueCommentEvent
3 ReleaseEvent
4 CommitCommentEvent
5 IssuesEvent
6 GollumEvent
7 DeleteEvent
8 PullRequestReviewCommentEvent
9 WatchEvent
10 PullRequestEvent
11 ForkEvent
12 PushEvent