notebook.community

Edit and run



In [9]:

    
import augur

# import everything that githubapi.py imports so we can just copy and paste our function later
import json
import re
from dateutil.parser import parse
import pandas as pd
import github
import numpy as np
import datetime
import requests



In [5]:

    
augur_app = augur.Application()('../augur.config.json')
# we only need an instance of the GitHubAPI class
github = augurApp.githubapi()



In [3]:

    
# We are writing this function to be run as part of the GitHub class, so 
# the "self" in this function will be an instance of an augur.GitHubAPI
def lines_deleted(self, owner, repo=None): 
    """
    Additions and deletions each week

    :param owner: The name of the project owner
    :param repo: The name of the repo
    :return: DataFrame with each row being am issue
    """
    # get the data we need from the GitHub API
    # see <project_root>/augur/githubapi.py for examples using the GraphQL API
    url = "https://api.github.com/repos/{}/{}/stats/code_frequency".format(owner, repo)
    json = requests.get(url, auth=('user', self.GITHUB_API_KEY)).json()
    # get our data into a dataframe
    df = pd.DataFrame(json, columns=['date', 'additions', 'deletions'])
    # all timeseries metrics need a 'date' column
    df['date'] = pd.to_datetime(df['date'], unit='s', infer_datetime_format=True)
    # normalize our data
    df['deletions'] = df['deletions'] * -1
    # return the dataframe
    return df



In [4]:

    
## Note that this is an example of how to call the function

lines_deleted(github,"rails", "rails")









    Out[4]:







  
    
      
      date
      additions
      deletions
    
  
  
    
      0
      2004-11-21
      31261
      253
    
    
      1
      2004-11-28
      271
      113
    
    
      2
      2004-12-05
      3436
      981
    
    
      3
      2004-12-12
      3142
      1885
    
    
      4
      2004-12-19
      1476
      315
    
    
      5
      2004-12-26
      3079
      1802
    
    
      6
      2005-01-02
      1542
      764
    
    
      7
      2005-01-09
      4690
      2312
    
    
      8
      2005-01-16
      690
      369
    
    
      9
      2005-01-23
      2089
      1645
    
    
      10
      2005-01-30
      0
      0
    
    
      11
      2005-02-06
      3128
      1116
    
    
      12
      2005-02-13
      24299
      16060
    
    
      13
      2005-02-20
      6461
      4656
    
    
      14
      2005-02-27
      1232
      250
    
    
      15
      2005-03-06
      1851
      807
    
    
      16
      2005-03-13
      641
      240
    
    
      17
      2005-03-20
      4011
      2006
    
    
      18
      2005-03-27
      3819
      2336
    
    
      19
      2005-04-03
      1553
      801
    
    
      20
      2005-04-10
      1093
      340
    
    
      21
      2005-04-17
      3143
      1128
    
    
      22
      2005-04-24
      611
      130
    
    
      23
      2005-05-01
      700
      246
    
    
      24
      2005-05-08
      1535
      52
    
    
      25
      2005-05-15
      662
      184
    
    
      26
      2005-05-22
      777
      403
    
    
      27
      2005-05-29
      865
      311
    
    
      28
      2005-06-05
      690
      365
    
    
      29
      2005-06-12
      1619
      513
    
    
      ...
      ...
      ...
      ...
    
    
      674
      2017-10-22
      1338
      928
    
    
      675
      2017-10-29
      419
      255
    
    
      676
      2017-11-05
      979
      464
    
    
      677
      2017-11-12
      4611
      2801
    
    
      678
      2017-11-19
      688
      195
    
    
      679
      2017-11-26
      1753
      594
    
    
      680
      2017-12-03
      873
      396
    
    
      681
      2017-12-10
      1852
      781
    
    
      682
      2017-12-17
      1102
      189
    
    
      683
      2017-12-24
      411
      344
    
    
      684
      2017-12-31
      327
      210
    
    
      685
      2018-01-07
      959
      739
    
    
      686
      2018-01-14
      739
      216
    
    
      687
      2018-01-21
      3085
      2820
    
    
      688
      2018-01-28
      413
      2157
    
    
      689
      2018-02-04
      1035
      532
    
    
      690
      2018-02-11
      1407
      1238
    
    
      691
      2018-02-18
      4303
      3616
    
    
      692
      2018-02-25
      1273
      1030
    
    
      693
      2018-03-04
      1752
      480
    
    
      694
      2018-03-11
      839
      673
    
    
      695
      2018-03-18
      512
      320
    
    
      696
      2018-03-25
      228
      196
    
    
      697
      2018-04-01
      974
      502
    
    
      698
      2018-04-08
      479
      202
    
    
      699
      2018-04-15
      1290
      779
    
    
      700
      2018-04-22
      2127
      1991
    
    
      701
      2018-04-29
      310
      145
    
    
      702
      2018-05-06
      307
      202
    
    
      703
      2018-05-13
      291
      269
    
  

704 rows × 3 columns



In [9]:

    
stan = augurApp.github()



In [10]:

    
def bus_factor(self, owner, repo, filename=None, start=None, end=None, threshold=50):
        """
        Calculates bus factor by adding up percentages from highest to lowest until they exceed threshold

        :param owner: repo owner username
        :param repo: repo name
        :param filename: optional; file or directory for function to run on
        :param start: optional; start time for analysis
        :param end: optional; end time for analysis
        :param threshold: Default 50;
        """

        if start != None:
            start = parse(start)
        else:
            start = github.GithubObject.NotSet

        if end != None:
            end = parse(end)
        else:
            end = github.GithubObject.NotSet

        commits = self.__api.get_repo((owner + "/" + repo)).get_commits(since=start, until=end)

        if filename != None:
            self.__api.get_repo((owner + "/" + repo)).get_contents(filename)

        df = []

        if filename != None:
            for commit in commits:
                for file in commit.files:
                    if file.filename == filename:
                        try:
                            df.append({'userid': commit.author.id})
                        except AttributeError:
                            pass
                        break
        else:
            for commit in commits:
                try:
                    df.append({'userid': commit.author.id})
                except AttributeError:
                    pass

        df = pd.DataFrame(df)

        df = df.groupby(['userid']).userid.count() / df.groupby(['userid']).userid.count().sum() * 100

        i = 0
        for num in df.cumsum():
            i = i + 1
            if num >= threshold:
                worst = i
                break

        i = 0
        for num in df.sort_values(ascending=True).cumsum():
            i = i + 1
            if num >= threshold:
                best = i
                break

        bus_factor = [{'worst': worst, 'best' : best}]

        return pd.DataFrame(bus_factor)



In [11]:

    
bus_factor(stan, "rails", "rails")









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-11-6402d82c815e> in <module>()
----> 1 bus_factor(stan, "rails", "rails")

<ipython-input-10-faaec3518e98> in bus_factor(self, owner, repo, filename, start, end, threshold)
     14         start = parse(start)
     15     else:
---> 16         start = github.GithubObject.NotSet
     17 
     18     if end != None:

AttributeError: 'GitHubAPI' object has no attribute 'GithubObject'



In [ ]:



In [ ]:



In [ ]:

	date	additions	deletions
0	2004-11-21	31261	253
1	2004-11-28	271	113
2	2004-12-05	3436	981
3	2004-12-12	3142	1885
4	2004-12-19	1476	315
5	2004-12-26	3079	1802
6	2005-01-02	1542	764
7	2005-01-09	4690	2312
8	2005-01-16	690	369
9	2005-01-23	2089	1645
10	2005-01-30	0	0
11	2005-02-06	3128	1116
12	2005-02-13	24299	16060
13	2005-02-20	6461	4656
14	2005-02-27	1232	250
15	2005-03-06	1851	807
16	2005-03-13	641	240
17	2005-03-20	4011	2006
18	2005-03-27	3819	2336
19	2005-04-03	1553	801
20	2005-04-10	1093	340
21	2005-04-17	3143	1128
22	2005-04-24	611	130
23	2005-05-01	700	246
24	2005-05-08	1535	52
25	2005-05-15	662	184
26	2005-05-22	777	403
27	2005-05-29	865	311
28	2005-06-05	690	365
29	2005-06-12	1619	513
...	...	...	...
674	2017-10-22	1338	928
675	2017-10-29	419	255
676	2017-11-05	979	464
677	2017-11-12	4611	2801
678	2017-11-19	688	195
679	2017-11-26	1753	594
680	2017-12-03	873	396
681	2017-12-10	1852	781
682	2017-12-17	1102	189
683	2017-12-24	411	344
684	2017-12-31	327	210
685	2018-01-07	959	739
686	2018-01-14	739	216
687	2018-01-21	3085	2820
688	2018-01-28	413	2157
689	2018-02-04	1035	532
690	2018-02-11	1407	1238
691	2018-02-18	4303	3616
692	2018-02-25	1273	1030
693	2018-03-04	1752	480
694	2018-03-11	839	673
695	2018-03-18	512	320
696	2018-03-25	228	196
697	2018-04-01	974	502
698	2018-04-08	479	202
699	2018-04-15	1290	779
700	2018-04-22	2127	1991
701	2018-04-29	310	145
702	2018-05-06	307	202
703	2018-05-13	291	269