In [1]:
import augur

# import everything from githubapi.py and ghtorrent.py so we can
# just copy and paste our function later
import json
import re
from dateutil.parser import parse
import pandas as pd
import github
import numpy as np
import sqlalchemy as s
import datetime
import requests
import time

%matplotlib inline
# create an Augur application so we can test our function
augur_app = augur.Application()('../augur.cfg')
github = augurApp.github()
ghtorrent = augurApp.ghtorrent()


2018-05-21 11:59:04 augur[10330] INFO GHTorrent: Connecting to augurlabs.io:3306/ghtorrent as ghdata

In [2]:
def code_reviews(self, owner, repo=None):
        url = 'https://api.github.com/repos/{}/{}/pulls'.format(owner,repo)
        json = requests.get(url, auth=('user', self.GITHUB_API_KEY)).json()

        dicts = []
        pullNums = []

        for item in json:
            info = {}
            #repoID
            info['pullNum'] = item['number']
            info['state'] = item['state']
            info['createdAt'] = item['created_at']

            dicts.append(info)
            pullNums.append(item['number'])

        countReviews = []
        for index in range(0,len(pullNums)):
            url2 = 'https://api.github.com/repos/{}/{}/pulls/{}/reviews'.format(owner,repo,pullNums[index])
            j = requests.get(url2, auth=('user', self.GITHUB_API_KEY)).json()
            countReviews = np.append(countReviews, len(j))


        return pd.DataFrame(dicts).join(pd.DataFrame(data=countReviews, columns=['num_reviews']))

In [3]:
# add our new function to the class
augur.GitHubAPI.code_reviews = code_reviews

# test our function on the initialized instance
ld = github.code_reviews('rails', 'rails')

In [68]:
ld


Out[68]:
createdAt pullNum state num_reviews
0 2018-05-18T00:13:41Z 32924 open 0.0
1 2018-05-17T23:41:13Z 32923 open 1.0
2 2018-05-17T04:08:34Z 32913 open 0.0
3 2018-05-16T22:46:33Z 32911 open 0.0
4 2018-05-15T11:12:18Z 32893 open 0.0
5 2018-05-14T11:46:45Z 32885 open 0.0
6 2018-05-14T04:00:27Z 32883 open 0.0
7 2018-05-13T22:01:19Z 32882 open 0.0
8 2018-05-13T19:41:53Z 32881 open 0.0
9 2018-05-11T18:56:53Z 32875 open 0.0
10 2018-05-11T13:14:10Z 32872 open 2.0
11 2018-05-10T22:33:13Z 32868 open 3.0
12 2018-05-10T12:32:14Z 32865 open 0.0
13 2018-05-10T06:57:21Z 32863 open 0.0
14 2018-05-09T22:14:37Z 32861 open 0.0
15 2018-05-09T21:41:33Z 32860 open 0.0
16 2018-05-09T06:08:14Z 32852 open 0.0
17 2018-05-09T04:23:37Z 32851 open 0.0
18 2018-05-09T04:15:51Z 32850 open 2.0
19 2018-05-08T11:38:29Z 32846 open 2.0
20 2018-05-08T07:28:32Z 32844 open 0.0
21 2018-05-06T09:36:43Z 32828 open 0.0
22 2018-05-05T18:12:02Z 32825 open 0.0
23 2018-05-05T12:41:13Z 32822 open 1.0
24 2018-05-05T06:41:12Z 32820 open 0.0
25 2018-05-04T01:56:12Z 32808 open 0.0
26 2018-05-03T17:40:26Z 32805 open 0.0
27 2018-05-03T17:19:23Z 32804 open 0.0
28 2018-05-02T20:31:40Z 32796 open 9.0
29 2018-05-02T19:25:05Z 32795 open 0.0

In [4]:
ld['num_reviews'].plot()


Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x117be6ac8>

In [5]:
ld['deletions'].plot()


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~/anaconda3/envs/augur/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2524             try:
-> 2525                 return self._engine.get_loc(key)
   2526             except KeyError:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'deletions'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-5-b067bb705f9b> in <module>()
----> 1 ld['deletions'].plot()

~/anaconda3/envs/augur/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2137             return self._getitem_multilevel(key)
   2138         else:
-> 2139             return self._getitem_column(key)
   2140 
   2141     def _getitem_column(self, key):

~/anaconda3/envs/augur/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
   2144         # get column
   2145         if self.columns.is_unique:
-> 2146             return self._get_item_cache(key)
   2147 
   2148         # duplicate columns & possible reduce dimensionality

~/anaconda3/envs/augur/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
   1840         res = cache.get(item)
   1841         if res is None:
-> 1842             values = self._data.get(item)
   1843             res = self._box_item_values(item, values)
   1844             cache[item] = res

~/anaconda3/envs/augur/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath)
   3841 
   3842             if not isna(item):
-> 3843                 loc = self.items.get_loc(item)
   3844             else:
   3845                 indexer = np.arange(len(self.items))[isna(self.items)]

~/anaconda3/envs/augur/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2525                 return self._engine.get_loc(key)
   2526             except KeyError:
-> 2527                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2528 
   2529         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'deletions'

In [ ]:
## GraphQL Explorer note

{
  repository(owner: "OSSHealth", name: "augur") {
    pullRequests(first: 100, after: "Y3Vyc29yOnYyOpHOCy9BZQ==") {
      edges {
        cursor
        node {
          number
          createdAt
          reviews(first: 100) {
            edges {
              node {
                createdAt
                author {
                  login
                }
                createdAt
              }
            }
          }
        }
        cursor
      }
    }
  }
}