In [12]:
import augur
import pandas as pd
import numpy as np
augur_app = augur.Application()('../augur.cfg')
github = augurApp.github()
ghtorrent = augurApp.ghtorrent()
In [13]:
contributors = ghtorrent.contributors("rails", "rails")
sums = contributors.sum()
contributors
Out[13]:
user
commits
issues
commit_comments
issue_comments
pull_requests
pull_request_comments
total
0
8153
6825.0
127.0
313.0
13152.0
1.0
0.0
20418.0
1
45381
2192.0
202.0
130.0
4633.0
0.0
0.0
7157.0
2
129
4737.0
9.0
66.0
1478.0
0.0
0.0
6290.0
3
16484
4213.0
12.0
99.0
1474.0
1.0
0.0
5799.0
4
8179
4071.0
91.0
84.0
1352.0
0.0
6.0
5604.0
5
8150
3222.0
11.0
7.0
2102.0
0.0
0.0
5342.0
6
4034367
0.0
0.0
0.0
5316.0
0.0
0.0
5316.0
7
8148
1726.0
98.0
59.0
3025.0
0.0
0.0
4908.0
8
323865
1302.0
24.0
130.0
3094.0
0.0
0.0
4550.0
9
8174
2127.0
5.0
131.0
1941.0
0.0
0.0
4204.0
10
8941
975.0
19.0
54.0
2805.0
0.0
0.0
3853.0
11
165022
1113.0
8.0
79.0
2167.0
0.0
7.0
3374.0
12
884966
0.0
509.0
0.0
2804.0
0.0
0.0
3313.0
13
86929
705.0
7.0
54.0
2539.0
0.0
0.0
3305.0
14
17141
258.0
69.0
9.0
2763.0
0.0
0.0
3099.0
15
1240486
599.0
17.0
8.0
2365.0
0.0
0.0
2989.0
16
8158
1627.0
4.0
32.0
1017.0
0.0
0.0
2680.0
17
5113
932.0
186.0
18.0
1326.0
0.0
2.0
2464.0
18
13201
504.0
402.0
11.0
1281.0
0.0
0.0
2198.0
19
30351173
2150.0
0.0
0.0
0.0
0.0
0.0
2150.0
20
129693
577.0
7.0
68.0
1389.0
0.0
0.0
2041.0
21
64341
555.0
129.0
40.0
1272.0
0.0
0.0
1996.0
22
38980
556.0
39.0
100.0
1157.0
1.0
0.0
1853.0
23
7987
422.0
71.0
46.0
1241.0
0.0
0.0
1780.0
24
8147
878.0
22.0
17.0
831.0
0.0
0.0
1748.0
25
23468
1077.0
46.0
28.0
584.0
0.0
0.0
1735.0
26
143208
1003.0
31.0
7.0
691.0
0.0
0.0
1732.0
27
116520
523.0
56.0
33.0
1069.0
1.0
0.0
1682.0
28
1635
331.0
26.0
29.0
1090.0
0.0
0.0
1476.0
29
1565
1282.0
17.0
4.0
167.0
0.0
0.0
1470.0
...
...
...
...
...
...
...
...
...
15451
52805
0.0
0.0
1.0
0.0
0.0
0.0
1.0
15452
28956
0.0
0.0
0.0
1.0
0.0
0.0
1.0
15453
112259
0.0
0.0
0.0
1.0
0.0
0.0
1.0
15454
244624
0.0
0.0
0.0
1.0
0.0
0.0
1.0
15455
1686573
0.0
0.0
0.0
1.0
0.0
0.0
1.0
15456
7790608
0.0
0.0
0.0
1.0
0.0
0.0
1.0
15457
463666
0.0
0.0
0.0
0.0
1.0
0.0
1.0
15458
263142
1.0
0.0
0.0
0.0
0.0
0.0
1.0
15459
1680838
1.0
0.0
0.0
0.0
0.0
0.0
1.0
15460
3119912
0.0
0.0
0.0
0.0
1.0
0.0
1.0
15461
17736
1.0
0.0
0.0
0.0
0.0
0.0
1.0
15462
813978
0.0
1.0
0.0
0.0
0.0
0.0
1.0
15463
1358
0.0
0.0
0.0
1.0
0.0
0.0
1.0
15464
167422
0.0
0.0
0.0
1.0
0.0
0.0
1.0
15465
356632
0.0
0.0
0.0
1.0
0.0
0.0
1.0
15466
16152
0.0
0.0
0.0
0.0
1.0
0.0
1.0
15467
3316878
0.0
0.0
0.0
0.0
1.0
0.0
1.0
15468
18254
1.0
0.0
0.0
0.0
0.0
0.0
1.0
15469
3151007
0.0
0.0
0.0
1.0
0.0
0.0
1.0
15470
20149
0.0
0.0
0.0
0.0
1.0
0.0
1.0
15471
279181
1.0
0.0
0.0
0.0
0.0
0.0
1.0
15472
260888
0.0
1.0
0.0
0.0
0.0
0.0
1.0
15473
3474569
0.0
0.0
0.0
0.0
1.0
0.0
1.0
15474
8082100
0.0
1.0
0.0
0.0
0.0
0.0
1.0
15475
3214
0.0
0.0
0.0
1.0
0.0
0.0
1.0
15476
63919
0.0
0.0
0.0
1.0
0.0
0.0
1.0
15477
170892
0.0
0.0
0.0
1.0
0.0
0.0
1.0
15478
368908
0.0
0.0
0.0
1.0
0.0
0.0
1.0
15479
964451
0.0
0.0
0.0
1.0
0.0
0.0
1.0
15480
24827
0.0
0.0
0.0
0.0
1.0
0.0
1.0
15481 rows × 8 columns
In [14]:
def classify(row):
role = 'user'
ratio = row / sums
if (ratio['issue_comments'] > 0.05):
role = 'tester'
if (row['pull_requests'] >= 1 and row['commits'] == 0):
role = 'rejected_contributor'
if (row['pull_requests'] >= 1 and row['commits'] >= 1):
role = 'contributor'
if (ratio['pull_requests'] > 0.10 or ratio['commits'] > 0.01):
role = 'major_contributor'
if (ratio['commits'] > 0.02 or ratio['pull_request_comments'] > 0.15):
role = 'maintainer'
return pd.Series({'login': row['login'], 'role': role})
In [15]:
roles = contributors.apply(classify, axis=1)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
2565 try:
-> 2566 return libts.get_value_box(s, key)
2567 except IndexError:
pandas/_libs/tslib.pyx in pandas._libs.tslib.get_value_box()
pandas/_libs/tslib.pyx in pandas._libs.tslib.get_value_box()
TypeError: 'str' object cannot be interpreted as an integer
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-15-e13c68759f88> in <module>()
----> 1 roles = contributors.apply(classify, axis=1)
~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in apply(self, func, axis, broadcast, raw, reduce, args, **kwds)
4875 f, axis,
4876 reduce=reduce,
-> 4877 ignore_failures=ignore_failures)
4878 else:
4879 return self._apply_broadcast(f, axis)
~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in _apply_standard(self, func, axis, ignore_failures, reduce)
4971 try:
4972 for i, v in enumerate(series_gen):
-> 4973 results[i] = func(v)
4974 keys.append(v.name)
4975 except Exception as e:
<ipython-input-14-44b6084ff7b2> in classify(row)
13 role = 'maintainer'
14
---> 15 return pd.Series({'login': row['login'], 'role': role})
16
~/anaconda3/lib/python3.6/site-packages/pandas/core/series.py in __getitem__(self, key)
621 key = com._apply_if_callable(key, self)
622 try:
--> 623 result = self.index.get_value(self, key)
624
625 if not is_scalar(result):
~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
2572 raise InvalidIndexError(key)
2573 else:
-> 2574 raise e1
2575 except Exception: # pragma: no cover
2576 raise e1
~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
2558 try:
2559 return self._engine.get_value(s, k,
-> 2560 tz=getattr(series.dtype, 'tz', None))
2561 except KeyError as e1:
2562 if len(self) > 0 and self.inferred_type in ['integer', 'boolean']:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: ('login', 'occurred at index 0')
In [11]:
roles
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-11-6c14d8f62772> in <module>()
----> 1 roles
NameError: name 'roles' is not defined
In [ ]:
Content source: OSSHealth/ghdata
Similar notebooks: