In [147]:
import augur

# import everything from githubapi.py and ghtorrent.py so we can
# just copy and paste our function later
import json
import re
from dateutil.parser import parse
import pandas as pd
import github
import numpy as np
import sqlalchemy as s
import datetime
import requests

%matplotlib inline
# create an Augur application so we can test our function
augur_app = augur.Application()('../augur.cfg')
github = augurApp.github()
ghtorrent = augurApp.ghtorrent()

In [148]:
df = ghtorrent.issue_comment_time('rails', 'rails')
df


Out[148]:
id opened first_commented pull_request minutes_to_comment
0 2 2012-01-19 05:24:55 2012-01-19 05:30:13 0 5.0
1 3 2012-01-26 15:07:56 2012-01-26 15:09:28 0 1.0
2 4 2012-01-26 15:20:54 2012-02-02 20:58:26 0 10417.0
3 5 2012-01-26 15:37:31 2012-01-26 16:01:18 0 23.0
4 6 2012-01-26 16:28:33 2012-01-28 12:25:55 0 2637.0
5 7 2012-01-25 05:24:41 2012-01-25 16:24:26 0 659.0
6 8 2012-01-25 00:22:20 2012-01-25 00:29:26 0 7.0
7 9 2012-01-26 17:18:23 2012-01-26 20:04:45 0 166.0
8 10 2012-01-26 17:35:50 2012-01-27 08:47:57 0 912.0
9 11 2012-01-26 17:50:28 NaT 0 NaN
10 12 2012-01-26 03:07:34 2012-01-26 12:17:26 0 549.0
11 13 2012-01-26 19:31:48 2012-01-26 19:35:18 0 3.0
12 14 2012-01-26 11:32:48 2012-01-26 14:39:35 0 186.0
13 15 2012-01-26 22:27:26 2012-01-26 23:11:46 0 44.0
14 16 2012-01-20 18:17:04 2012-01-20 19:46:14 0 89.0
15 17 2012-01-27 04:30:24 2012-01-27 05:21:48 0 51.0
16 18 2012-01-27 06:36:18 2012-01-27 15:56:29 0 560.0
17 19 2012-01-27 07:20:24 2012-04-29 17:41:35 0 134541.0
18 20 2012-01-27 12:18:16 2012-01-27 15:53:16 0 215.0
19 21 2012-01-27 13:53:02 2012-01-28 17:46:31 0 1673.0
20 22 2012-01-27 15:43:10 2012-03-12 11:05:05 0 64521.0
21 23 2012-01-19 22:48:36 2012-01-20 17:17:08 0 1108.0
22 24 2012-01-27 19:50:57 2012-02-01 15:15:39 0 6924.0
23 25 2012-01-28 08:44:02 2012-01-28 09:01:58 0 17.0
24 26 2012-01-25 11:07:03 2012-01-25 16:18:47 0 311.0
25 28 2012-01-28 19:22:57 2012-01-28 19:53:23 0 30.0
26 29 2012-01-28 20:15:28 2012-01-30 11:00:06 0 2324.0
27 30 2012-01-30 02:39:04 2012-01-30 20:28:50 0 1069.0
28 31 2012-01-30 05:20:00 2012-01-30 05:28:17 0 8.0
29 32 2012-01-30 05:28:53 2012-01-30 07:05:27 0 96.0
... ... ... ... ... ...
31241 55447333 2017-08-29 00:00:18 NaT 1 NaN
31242 55447830 2017-08-29 00:13:03 NaT 1 NaN
31243 55462578 2017-08-29 07:52:56 NaT 1 NaN
31244 55468768 2017-08-29 09:54:09 NaT 1 NaN
31245 55469365 2017-08-29 10:05:17 NaT 1 NaN
31246 55469487 2017-08-29 10:07:11 NaT 1 NaN
31247 55470588 2017-08-29 10:30:44 NaT 1 NaN
31248 55497851 2017-08-29 17:47:00 NaT 1 NaN
31249 55500568 2017-08-29 18:34:07 NaT 1 NaN
31250 55504970 2017-08-29 19:48:50 2017-08-29 19:54:36 1 5.0
31251 55505284 2017-08-29 19:54:34 NaT 1 NaN
31252 55506164 2017-08-29 20:10:01 NaT 1 NaN
31253 55513305 2017-08-29 22:21:13 NaT 1 NaN
31254 55515800 2017-08-29 23:18:48 NaT 1 NaN
31255 55516860 2017-08-29 23:47:24 NaT 1 NaN
31256 55532488 2017-08-30 07:30:34 NaT 1 NaN
31257 55556742 2017-08-30 14:43:38 NaT 1 NaN
31258 55557542 2017-08-30 14:55:32 NaT 1 NaN
31259 55557962 2017-08-30 15:01:21 NaT 1 NaN
31260 55562505 2017-08-30 16:07:55 NaT 1 NaN
31261 55563127 2017-08-30 16:18:44 NaT 1 NaN
31262 55563128 2017-08-30 16:18:44 NaT 1 NaN
31263 55566908 2017-08-30 17:13:56 NaT 1 NaN
31264 55566909 2017-08-30 17:13:56 NaT 1 NaN
31265 55580102 2017-08-30 21:01:58 NaT 1 NaN
31266 55581505 2017-08-30 21:29:11 NaT 1 NaN
31267 55593367 2017-08-31 02:32:18 NaT 1 NaN
31268 55616806 2017-08-31 12:12:09 NaT 1 NaN
31269 55633098 2017-08-31 16:25:14 2017-08-31 16:52:01 1 26.0
31270 55652974 2017-08-31 22:11:55 NaT 1 NaN

31271 rows × 5 columns


In [149]:
df["opened"] = pd.to_datetime(df["opened"],errors='coerce')
df = df.set_index(['opened'])
df = df.dropna()

In [81]:
df


Out[81]:
id first_commented pull_request minutes_to_comment
opened
2012-01-19 05:24:55 2 2012-01-19 05:30:13 0 5.0
2012-01-26 15:07:56 3 2012-01-26 15:09:28 0 1.0
2012-01-26 15:20:54 4 2012-02-02 20:58:26 0 10417.0
2012-01-26 15:37:31 5 2012-01-26 16:01:18 0 23.0
2012-01-26 16:28:33 6 2012-01-28 12:25:55 0 2637.0
2012-01-25 05:24:41 7 2012-01-25 16:24:26 0 659.0
2012-01-25 00:22:20 8 2012-01-25 00:29:26 0 7.0
2012-01-26 17:18:23 9 2012-01-26 20:04:45 0 166.0
2012-01-26 17:35:50 10 2012-01-27 08:47:57 0 912.0
2012-01-26 03:07:34 12 2012-01-26 12:17:26 0 549.0
2012-01-26 19:31:48 13 2012-01-26 19:35:18 0 3.0
2012-01-26 11:32:48 14 2012-01-26 14:39:35 0 186.0
2012-01-26 22:27:26 15 2012-01-26 23:11:46 0 44.0
2012-01-20 18:17:04 16 2012-01-20 19:46:14 0 89.0
2012-01-27 04:30:24 17 2012-01-27 05:21:48 0 51.0
2012-01-27 06:36:18 18 2012-01-27 15:56:29 0 560.0
2012-01-27 07:20:24 19 2012-04-29 17:41:35 0 134541.0
2012-01-27 12:18:16 20 2012-01-27 15:53:16 0 215.0
2012-01-27 13:53:02 21 2012-01-28 17:46:31 0 1673.0
2012-01-27 15:43:10 22 2012-03-12 11:05:05 0 64521.0
2012-01-19 22:48:36 23 2012-01-20 17:17:08 0 1108.0
2012-01-27 19:50:57 24 2012-02-01 15:15:39 0 6924.0
2012-01-28 08:44:02 25 2012-01-28 09:01:58 0 17.0
2012-01-25 11:07:03 26 2012-01-25 16:18:47 0 311.0
2012-01-28 19:22:57 28 2012-01-28 19:53:23 0 30.0
2012-01-28 20:15:28 29 2012-01-30 11:00:06 0 2324.0
2012-01-30 02:39:04 30 2012-01-30 20:28:50 0 1069.0
2012-01-30 05:20:00 31 2012-01-30 05:28:17 0 8.0
2012-01-30 05:28:53 32 2012-01-30 07:05:27 0 96.0
2012-01-02 13:14:19 33 2012-01-03 16:41:49 0 1647.0
... ... ... ... ...
2017-08-18 11:46:23 54871921 2017-08-18 17:47:17 1 360.0
2017-08-18 13:32:14 54877756 2017-08-18 14:55:36 1 83.0
2017-08-18 15:51:00 54886597 2017-08-18 15:53:35 1 2.0
2017-08-18 23:56:26 54909414 2017-08-19 17:06:57 1 1030.0
2017-08-18 23:56:26 54909415 2017-08-19 17:06:57 1 1030.0
2017-08-19 05:18:20 54918769 2017-08-19 05:43:30 1 25.0
2017-08-20 00:52:13 54948443 2017-08-20 02:12:00 1 79.0
2017-08-20 14:43:59 54965758 2017-08-20 18:40:10 1 236.0
2017-08-20 14:43:59 54965759 2017-08-20 18:40:10 1 236.0
2017-08-21 18:49:52 55026018 2017-08-21 19:17:45 1 27.0
2017-08-22 00:07:02 55040649 2017-08-22 03:15:15 1 188.0
2017-08-22 00:12:45 55040834 2017-08-22 02:22:39 1 129.0
2017-08-22 00:25:06 55041186 2017-08-22 02:07:24 1 102.0
2017-08-22 13:11:57 55071969 2017-08-22 15:56:22 1 164.0
2017-08-22 18:44:59 55092568 2017-08-22 19:38:11 1 53.0
2017-08-22 18:52:34 55092993 2017-08-22 18:53:16 1 0.0
2017-08-23 00:40:34 55109035 2017-08-23 01:14:47 1 34.0
2017-08-24 00:34:06 55178370 2017-08-24 12:19:56 1 705.0
2017-08-24 10:11:37 55200671 2017-08-24 10:14:15 1 2.0
2017-08-24 14:32:24 55214510 2017-08-24 14:33:34 1 1.0
2017-08-25 15:54:38 55286526 2017-08-25 15:59:38 1 5.0
2017-08-25 18:55:39 55295635 2017-08-25 18:58:53 1 3.0
2017-08-27 06:45:46 55355078 2017-08-27 07:03:30 1 17.0
2017-08-27 12:37:14 55363405 2017-08-28 08:04:05 1 1166.0
2017-08-27 15:43:18 55368906 2017-08-27 15:48:06 1 4.0
2017-08-28 04:11:11 55390048 2017-08-29 01:31:46 1 1280.0
2017-08-28 16:33:40 55424873 2017-08-28 18:47:41 1 134.0
2017-08-28 21:45:38 55441902 2017-08-29 11:27:05 1 821.0
2017-08-29 19:48:50 55504970 2017-08-29 19:54:36 1 5.0
2017-08-31 16:25:14 55633098 2017-08-31 16:52:01 1 26.0

14596 rows × 4 columns


In [150]:
df2 = df.resample('W-MON').mean



df2


Out[150]:
<bound method f of DatetimeIndexResampler [freq=<Week: weekday=0>, axis=0, closed=right, label=right, convention=start, base=0]>

In [ ]:


In [142]:
import pylab



pylab.plot(df2.index,df2['minutes_to_comment'])
pylab.ylim(0, 150000)


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-142-69622784d81a> in <module>()
      3 
      4 
----> 5 pylab.plot(df2.index,df2['minutes_to_comment'])
      6 pylab.ylim(0, 150000)

AttributeError: 'function' object has no attribute 'index'

In [ ]:


In [152]:
def code_reviews(self, owner, repo=None):
        url = 'https://api.github.com/repos/{}/{}/pulls'.format(owner,repo)
        json = requests.get(url, auth('user', self.GITHUB_API_KEY)).json()

        dicts = []

        for item in json:
            info = {}
            #repoID
            info['pullNum'] = item['number']
            info['state'] = item['state']
            info['createdAt'] = item['created_at']

            dicts.append(info)


        countReviews = []
        for index in range(0,len(pullNums)):
            url2 = 'https://api.github.com/repos/{}/{}/pulls/{}/reviews'.format(owner,repo,pullNums[index])
            j = requests.get(url2, auth=('gabe-heim', password)).json()
            countReviews = np.append(countReviews, len(j))


        return pd.DataFrame(dicts).join(pd.DataFrame(data=countReviews, columns=['num_reviews']))

In [ ]: