In [147]:
import augur
# import everything from githubapi.py and ghtorrent.py so we can
# just copy and paste our function later
import json
import re
from dateutil.parser import parse
import pandas as pd
import github
import numpy as np
import sqlalchemy as s
import datetime
import requests
%matplotlib inline
# create an Augur application so we can test our function
augur_app = augur.Application()('../augur.cfg')
github = augurApp.github()
ghtorrent = augurApp.ghtorrent()
In [148]:
df = ghtorrent.issue_comment_time('rails', 'rails')
df
Out[148]:
id
opened
first_commented
pull_request
minutes_to_comment
0
2
2012-01-19 05:24:55
2012-01-19 05:30:13
0
5.0
1
3
2012-01-26 15:07:56
2012-01-26 15:09:28
0
1.0
2
4
2012-01-26 15:20:54
2012-02-02 20:58:26
0
10417.0
3
5
2012-01-26 15:37:31
2012-01-26 16:01:18
0
23.0
4
6
2012-01-26 16:28:33
2012-01-28 12:25:55
0
2637.0
5
7
2012-01-25 05:24:41
2012-01-25 16:24:26
0
659.0
6
8
2012-01-25 00:22:20
2012-01-25 00:29:26
0
7.0
7
9
2012-01-26 17:18:23
2012-01-26 20:04:45
0
166.0
8
10
2012-01-26 17:35:50
2012-01-27 08:47:57
0
912.0
9
11
2012-01-26 17:50:28
NaT
0
NaN
10
12
2012-01-26 03:07:34
2012-01-26 12:17:26
0
549.0
11
13
2012-01-26 19:31:48
2012-01-26 19:35:18
0
3.0
12
14
2012-01-26 11:32:48
2012-01-26 14:39:35
0
186.0
13
15
2012-01-26 22:27:26
2012-01-26 23:11:46
0
44.0
14
16
2012-01-20 18:17:04
2012-01-20 19:46:14
0
89.0
15
17
2012-01-27 04:30:24
2012-01-27 05:21:48
0
51.0
16
18
2012-01-27 06:36:18
2012-01-27 15:56:29
0
560.0
17
19
2012-01-27 07:20:24
2012-04-29 17:41:35
0
134541.0
18
20
2012-01-27 12:18:16
2012-01-27 15:53:16
0
215.0
19
21
2012-01-27 13:53:02
2012-01-28 17:46:31
0
1673.0
20
22
2012-01-27 15:43:10
2012-03-12 11:05:05
0
64521.0
21
23
2012-01-19 22:48:36
2012-01-20 17:17:08
0
1108.0
22
24
2012-01-27 19:50:57
2012-02-01 15:15:39
0
6924.0
23
25
2012-01-28 08:44:02
2012-01-28 09:01:58
0
17.0
24
26
2012-01-25 11:07:03
2012-01-25 16:18:47
0
311.0
25
28
2012-01-28 19:22:57
2012-01-28 19:53:23
0
30.0
26
29
2012-01-28 20:15:28
2012-01-30 11:00:06
0
2324.0
27
30
2012-01-30 02:39:04
2012-01-30 20:28:50
0
1069.0
28
31
2012-01-30 05:20:00
2012-01-30 05:28:17
0
8.0
29
32
2012-01-30 05:28:53
2012-01-30 07:05:27
0
96.0
...
...
...
...
...
...
31241
55447333
2017-08-29 00:00:18
NaT
1
NaN
31242
55447830
2017-08-29 00:13:03
NaT
1
NaN
31243
55462578
2017-08-29 07:52:56
NaT
1
NaN
31244
55468768
2017-08-29 09:54:09
NaT
1
NaN
31245
55469365
2017-08-29 10:05:17
NaT
1
NaN
31246
55469487
2017-08-29 10:07:11
NaT
1
NaN
31247
55470588
2017-08-29 10:30:44
NaT
1
NaN
31248
55497851
2017-08-29 17:47:00
NaT
1
NaN
31249
55500568
2017-08-29 18:34:07
NaT
1
NaN
31250
55504970
2017-08-29 19:48:50
2017-08-29 19:54:36
1
5.0
31251
55505284
2017-08-29 19:54:34
NaT
1
NaN
31252
55506164
2017-08-29 20:10:01
NaT
1
NaN
31253
55513305
2017-08-29 22:21:13
NaT
1
NaN
31254
55515800
2017-08-29 23:18:48
NaT
1
NaN
31255
55516860
2017-08-29 23:47:24
NaT
1
NaN
31256
55532488
2017-08-30 07:30:34
NaT
1
NaN
31257
55556742
2017-08-30 14:43:38
NaT
1
NaN
31258
55557542
2017-08-30 14:55:32
NaT
1
NaN
31259
55557962
2017-08-30 15:01:21
NaT
1
NaN
31260
55562505
2017-08-30 16:07:55
NaT
1
NaN
31261
55563127
2017-08-30 16:18:44
NaT
1
NaN
31262
55563128
2017-08-30 16:18:44
NaT
1
NaN
31263
55566908
2017-08-30 17:13:56
NaT
1
NaN
31264
55566909
2017-08-30 17:13:56
NaT
1
NaN
31265
55580102
2017-08-30 21:01:58
NaT
1
NaN
31266
55581505
2017-08-30 21:29:11
NaT
1
NaN
31267
55593367
2017-08-31 02:32:18
NaT
1
NaN
31268
55616806
2017-08-31 12:12:09
NaT
1
NaN
31269
55633098
2017-08-31 16:25:14
2017-08-31 16:52:01
1
26.0
31270
55652974
2017-08-31 22:11:55
NaT
1
NaN
31271 rows × 5 columns
In [149]:
df["opened"] = pd.to_datetime(df["opened"],errors='coerce')
df = df.set_index(['opened'])
df = df.dropna()
In [81]:
df
Out[81]:
id
first_commented
pull_request
minutes_to_comment
opened
2012-01-19 05:24:55
2
2012-01-19 05:30:13
0
5.0
2012-01-26 15:07:56
3
2012-01-26 15:09:28
0
1.0
2012-01-26 15:20:54
4
2012-02-02 20:58:26
0
10417.0
2012-01-26 15:37:31
5
2012-01-26 16:01:18
0
23.0
2012-01-26 16:28:33
6
2012-01-28 12:25:55
0
2637.0
2012-01-25 05:24:41
7
2012-01-25 16:24:26
0
659.0
2012-01-25 00:22:20
8
2012-01-25 00:29:26
0
7.0
2012-01-26 17:18:23
9
2012-01-26 20:04:45
0
166.0
2012-01-26 17:35:50
10
2012-01-27 08:47:57
0
912.0
2012-01-26 03:07:34
12
2012-01-26 12:17:26
0
549.0
2012-01-26 19:31:48
13
2012-01-26 19:35:18
0
3.0
2012-01-26 11:32:48
14
2012-01-26 14:39:35
0
186.0
2012-01-26 22:27:26
15
2012-01-26 23:11:46
0
44.0
2012-01-20 18:17:04
16
2012-01-20 19:46:14
0
89.0
2012-01-27 04:30:24
17
2012-01-27 05:21:48
0
51.0
2012-01-27 06:36:18
18
2012-01-27 15:56:29
0
560.0
2012-01-27 07:20:24
19
2012-04-29 17:41:35
0
134541.0
2012-01-27 12:18:16
20
2012-01-27 15:53:16
0
215.0
2012-01-27 13:53:02
21
2012-01-28 17:46:31
0
1673.0
2012-01-27 15:43:10
22
2012-03-12 11:05:05
0
64521.0
2012-01-19 22:48:36
23
2012-01-20 17:17:08
0
1108.0
2012-01-27 19:50:57
24
2012-02-01 15:15:39
0
6924.0
2012-01-28 08:44:02
25
2012-01-28 09:01:58
0
17.0
2012-01-25 11:07:03
26
2012-01-25 16:18:47
0
311.0
2012-01-28 19:22:57
28
2012-01-28 19:53:23
0
30.0
2012-01-28 20:15:28
29
2012-01-30 11:00:06
0
2324.0
2012-01-30 02:39:04
30
2012-01-30 20:28:50
0
1069.0
2012-01-30 05:20:00
31
2012-01-30 05:28:17
0
8.0
2012-01-30 05:28:53
32
2012-01-30 07:05:27
0
96.0
2012-01-02 13:14:19
33
2012-01-03 16:41:49
0
1647.0
...
...
...
...
...
2017-08-18 11:46:23
54871921
2017-08-18 17:47:17
1
360.0
2017-08-18 13:32:14
54877756
2017-08-18 14:55:36
1
83.0
2017-08-18 15:51:00
54886597
2017-08-18 15:53:35
1
2.0
2017-08-18 23:56:26
54909414
2017-08-19 17:06:57
1
1030.0
2017-08-18 23:56:26
54909415
2017-08-19 17:06:57
1
1030.0
2017-08-19 05:18:20
54918769
2017-08-19 05:43:30
1
25.0
2017-08-20 00:52:13
54948443
2017-08-20 02:12:00
1
79.0
2017-08-20 14:43:59
54965758
2017-08-20 18:40:10
1
236.0
2017-08-20 14:43:59
54965759
2017-08-20 18:40:10
1
236.0
2017-08-21 18:49:52
55026018
2017-08-21 19:17:45
1
27.0
2017-08-22 00:07:02
55040649
2017-08-22 03:15:15
1
188.0
2017-08-22 00:12:45
55040834
2017-08-22 02:22:39
1
129.0
2017-08-22 00:25:06
55041186
2017-08-22 02:07:24
1
102.0
2017-08-22 13:11:57
55071969
2017-08-22 15:56:22
1
164.0
2017-08-22 18:44:59
55092568
2017-08-22 19:38:11
1
53.0
2017-08-22 18:52:34
55092993
2017-08-22 18:53:16
1
0.0
2017-08-23 00:40:34
55109035
2017-08-23 01:14:47
1
34.0
2017-08-24 00:34:06
55178370
2017-08-24 12:19:56
1
705.0
2017-08-24 10:11:37
55200671
2017-08-24 10:14:15
1
2.0
2017-08-24 14:32:24
55214510
2017-08-24 14:33:34
1
1.0
2017-08-25 15:54:38
55286526
2017-08-25 15:59:38
1
5.0
2017-08-25 18:55:39
55295635
2017-08-25 18:58:53
1
3.0
2017-08-27 06:45:46
55355078
2017-08-27 07:03:30
1
17.0
2017-08-27 12:37:14
55363405
2017-08-28 08:04:05
1
1166.0
2017-08-27 15:43:18
55368906
2017-08-27 15:48:06
1
4.0
2017-08-28 04:11:11
55390048
2017-08-29 01:31:46
1
1280.0
2017-08-28 16:33:40
55424873
2017-08-28 18:47:41
1
134.0
2017-08-28 21:45:38
55441902
2017-08-29 11:27:05
1
821.0
2017-08-29 19:48:50
55504970
2017-08-29 19:54:36
1
5.0
2017-08-31 16:25:14
55633098
2017-08-31 16:52:01
1
26.0
14596 rows × 4 columns
In [150]:
df2 = df.resample('W-MON').mean
df2
Out[150]:
<bound method f of DatetimeIndexResampler [freq=<Week: weekday=0>, axis=0, closed=right, label=right, convention=start, base=0]>
In [ ]:
In [142]:
import pylab
pylab.plot(df2.index,df2['minutes_to_comment'])
pylab.ylim(0, 150000)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-142-69622784d81a> in <module>()
3
4
----> 5 pylab.plot(df2.index,df2['minutes_to_comment'])
6 pylab.ylim(0, 150000)
AttributeError: 'function' object has no attribute 'index'
In [ ]:
In [152]:
def code_reviews(self, owner, repo=None):
url = 'https://api.github.com/repos/{}/{}/pulls'.format(owner,repo)
json = requests.get(url, auth('user', self.GITHUB_API_KEY)).json()
dicts = []
for item in json:
info = {}
#repoID
info['pullNum'] = item['number']
info['state'] = item['state']
info['createdAt'] = item['created_at']
dicts.append(info)
countReviews = []
for index in range(0,len(pullNums)):
url2 = 'https://api.github.com/repos/{}/{}/pulls/{}/reviews'.format(owner,repo,pullNums[index])
j = requests.get(url2, auth=('gabe-heim', password)).json()
countReviews = np.append(countReviews, len(j))
return pd.DataFrame(dicts).join(pd.DataFrame(data=countReviews, columns=['num_reviews']))
In [ ]:
Content source: OSSHealth/ghdata
Similar notebooks: