In [12]:
import augur
import pandas as pd
import numpy as np

augur_app = augur.Application()('../augur.cfg')
github = augurApp.github()
ghtorrent = augurApp.ghtorrent()

In [13]:
contributors = ghtorrent.contributors("rails", "rails")
sums = contributors.sum()

contributors


Out[13]:
user commits issues commit_comments issue_comments pull_requests pull_request_comments total
0 8153 6825.0 127.0 313.0 13152.0 1.0 0.0 20418.0
1 45381 2192.0 202.0 130.0 4633.0 0.0 0.0 7157.0
2 129 4737.0 9.0 66.0 1478.0 0.0 0.0 6290.0
3 16484 4213.0 12.0 99.0 1474.0 1.0 0.0 5799.0
4 8179 4071.0 91.0 84.0 1352.0 0.0 6.0 5604.0
5 8150 3222.0 11.0 7.0 2102.0 0.0 0.0 5342.0
6 4034367 0.0 0.0 0.0 5316.0 0.0 0.0 5316.0
7 8148 1726.0 98.0 59.0 3025.0 0.0 0.0 4908.0
8 323865 1302.0 24.0 130.0 3094.0 0.0 0.0 4550.0
9 8174 2127.0 5.0 131.0 1941.0 0.0 0.0 4204.0
10 8941 975.0 19.0 54.0 2805.0 0.0 0.0 3853.0
11 165022 1113.0 8.0 79.0 2167.0 0.0 7.0 3374.0
12 884966 0.0 509.0 0.0 2804.0 0.0 0.0 3313.0
13 86929 705.0 7.0 54.0 2539.0 0.0 0.0 3305.0
14 17141 258.0 69.0 9.0 2763.0 0.0 0.0 3099.0
15 1240486 599.0 17.0 8.0 2365.0 0.0 0.0 2989.0
16 8158 1627.0 4.0 32.0 1017.0 0.0 0.0 2680.0
17 5113 932.0 186.0 18.0 1326.0 0.0 2.0 2464.0
18 13201 504.0 402.0 11.0 1281.0 0.0 0.0 2198.0
19 30351173 2150.0 0.0 0.0 0.0 0.0 0.0 2150.0
20 129693 577.0 7.0 68.0 1389.0 0.0 0.0 2041.0
21 64341 555.0 129.0 40.0 1272.0 0.0 0.0 1996.0
22 38980 556.0 39.0 100.0 1157.0 1.0 0.0 1853.0
23 7987 422.0 71.0 46.0 1241.0 0.0 0.0 1780.0
24 8147 878.0 22.0 17.0 831.0 0.0 0.0 1748.0
25 23468 1077.0 46.0 28.0 584.0 0.0 0.0 1735.0
26 143208 1003.0 31.0 7.0 691.0 0.0 0.0 1732.0
27 116520 523.0 56.0 33.0 1069.0 1.0 0.0 1682.0
28 1635 331.0 26.0 29.0 1090.0 0.0 0.0 1476.0
29 1565 1282.0 17.0 4.0 167.0 0.0 0.0 1470.0
... ... ... ... ... ... ... ... ...
15451 52805 0.0 0.0 1.0 0.0 0.0 0.0 1.0
15452 28956 0.0 0.0 0.0 1.0 0.0 0.0 1.0
15453 112259 0.0 0.0 0.0 1.0 0.0 0.0 1.0
15454 244624 0.0 0.0 0.0 1.0 0.0 0.0 1.0
15455 1686573 0.0 0.0 0.0 1.0 0.0 0.0 1.0
15456 7790608 0.0 0.0 0.0 1.0 0.0 0.0 1.0
15457 463666 0.0 0.0 0.0 0.0 1.0 0.0 1.0
15458 263142 1.0 0.0 0.0 0.0 0.0 0.0 1.0
15459 1680838 1.0 0.0 0.0 0.0 0.0 0.0 1.0
15460 3119912 0.0 0.0 0.0 0.0 1.0 0.0 1.0
15461 17736 1.0 0.0 0.0 0.0 0.0 0.0 1.0
15462 813978 0.0 1.0 0.0 0.0 0.0 0.0 1.0
15463 1358 0.0 0.0 0.0 1.0 0.0 0.0 1.0
15464 167422 0.0 0.0 0.0 1.0 0.0 0.0 1.0
15465 356632 0.0 0.0 0.0 1.0 0.0 0.0 1.0
15466 16152 0.0 0.0 0.0 0.0 1.0 0.0 1.0
15467 3316878 0.0 0.0 0.0 0.0 1.0 0.0 1.0
15468 18254 1.0 0.0 0.0 0.0 0.0 0.0 1.0
15469 3151007 0.0 0.0 0.0 1.0 0.0 0.0 1.0
15470 20149 0.0 0.0 0.0 0.0 1.0 0.0 1.0
15471 279181 1.0 0.0 0.0 0.0 0.0 0.0 1.0
15472 260888 0.0 1.0 0.0 0.0 0.0 0.0 1.0
15473 3474569 0.0 0.0 0.0 0.0 1.0 0.0 1.0
15474 8082100 0.0 1.0 0.0 0.0 0.0 0.0 1.0
15475 3214 0.0 0.0 0.0 1.0 0.0 0.0 1.0
15476 63919 0.0 0.0 0.0 1.0 0.0 0.0 1.0
15477 170892 0.0 0.0 0.0 1.0 0.0 0.0 1.0
15478 368908 0.0 0.0 0.0 1.0 0.0 0.0 1.0
15479 964451 0.0 0.0 0.0 1.0 0.0 0.0 1.0
15480 24827 0.0 0.0 0.0 0.0 1.0 0.0 1.0

15481 rows × 8 columns


In [14]:
def classify(row):
    role = 'user'
    ratio = row / sums
    if (ratio['issue_comments'] > 0.05):
        role = 'tester'
    if (row['pull_requests'] >= 1 and row['commits'] == 0):
        role = 'rejected_contributor'
    if (row['pull_requests'] >= 1 and row['commits'] >= 1):
        role = 'contributor'
    if (ratio['pull_requests'] > 0.10 or ratio['commits'] > 0.01):
        role = 'major_contributor'
    if (ratio['commits'] > 0.02 or ratio['pull_request_comments'] > 0.15):
        role = 'maintainer'
    
    return pd.Series({'login': row['login'], 'role': role})

In [15]:
roles = contributors.apply(classify, axis=1)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
   2565             try:
-> 2566                 return libts.get_value_box(s, key)
   2567             except IndexError:

pandas/_libs/tslib.pyx in pandas._libs.tslib.get_value_box()

pandas/_libs/tslib.pyx in pandas._libs.tslib.get_value_box()

TypeError: 'str' object cannot be interpreted as an integer

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-15-e13c68759f88> in <module>()
----> 1 roles = contributors.apply(classify, axis=1)

~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in apply(self, func, axis, broadcast, raw, reduce, args, **kwds)
   4875                         f, axis,
   4876                         reduce=reduce,
-> 4877                         ignore_failures=ignore_failures)
   4878             else:
   4879                 return self._apply_broadcast(f, axis)

~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in _apply_standard(self, func, axis, ignore_failures, reduce)
   4971             try:
   4972                 for i, v in enumerate(series_gen):
-> 4973                     results[i] = func(v)
   4974                     keys.append(v.name)
   4975             except Exception as e:

<ipython-input-14-44b6084ff7b2> in classify(row)
     13         role = 'maintainer'
     14 
---> 15     return pd.Series({'login': row['login'], 'role': role})
     16 

~/anaconda3/lib/python3.6/site-packages/pandas/core/series.py in __getitem__(self, key)
    621         key = com._apply_if_callable(key, self)
    622         try:
--> 623             result = self.index.get_value(self, key)
    624 
    625             if not is_scalar(result):

~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
   2572                     raise InvalidIndexError(key)
   2573                 else:
-> 2574                     raise e1
   2575             except Exception:  # pragma: no cover
   2576                 raise e1

~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
   2558         try:
   2559             return self._engine.get_value(s, k,
-> 2560                                           tz=getattr(series.dtype, 'tz', None))
   2561         except KeyError as e1:
   2562             if len(self) > 0 and self.inferred_type in ['integer', 'boolean']:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: ('login', 'occurred at index 0')

In [11]:
roles


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-11-6c14d8f62772> in <module>()
----> 1 roles

NameError: name 'roles' is not defined

In [ ]: