In [9]:
import augur

# import everything that githubapi.py imports so we can just copy and paste our function later
import json
import re
from dateutil.parser import parse
import pandas as pd
import github
import numpy as np
import datetime
import requests

In [5]:
augur_app = augur.Application()('../augur.config.json')
# we only need an instance of the GitHubAPI class
github = augurApp.githubapi()

In [3]:
# We are writing this function to be run as part of the GitHub class, so 
# the "self" in this function will be an instance of an augur.GitHubAPI
def lines_deleted(self, owner, repo=None): 
    """
    Additions and deletions each week

    :param owner: The name of the project owner
    :param repo: The name of the repo
    :return: DataFrame with each row being am issue
    """
    # get the data we need from the GitHub API
    # see <project_root>/augur/githubapi.py for examples using the GraphQL API
    url = "https://api.github.com/repos/{}/{}/stats/code_frequency".format(owner, repo)
    json = requests.get(url, auth=('user', self.GITHUB_API_KEY)).json()
    # get our data into a dataframe
    df = pd.DataFrame(json, columns=['date', 'additions', 'deletions'])
    # all timeseries metrics need a 'date' column
    df['date'] = pd.to_datetime(df['date'], unit='s', infer_datetime_format=True)
    # normalize our data
    df['deletions'] = df['deletions'] * -1
    # return the dataframe
    return df

In [4]:
## Note that this is an example of how to call the function

lines_deleted(github,"rails", "rails")


Out[4]:
date additions deletions
0 2004-11-21 31261 253
1 2004-11-28 271 113
2 2004-12-05 3436 981
3 2004-12-12 3142 1885
4 2004-12-19 1476 315
5 2004-12-26 3079 1802
6 2005-01-02 1542 764
7 2005-01-09 4690 2312
8 2005-01-16 690 369
9 2005-01-23 2089 1645
10 2005-01-30 0 0
11 2005-02-06 3128 1116
12 2005-02-13 24299 16060
13 2005-02-20 6461 4656
14 2005-02-27 1232 250
15 2005-03-06 1851 807
16 2005-03-13 641 240
17 2005-03-20 4011 2006
18 2005-03-27 3819 2336
19 2005-04-03 1553 801
20 2005-04-10 1093 340
21 2005-04-17 3143 1128
22 2005-04-24 611 130
23 2005-05-01 700 246
24 2005-05-08 1535 52
25 2005-05-15 662 184
26 2005-05-22 777 403
27 2005-05-29 865 311
28 2005-06-05 690 365
29 2005-06-12 1619 513
... ... ... ...
674 2017-10-22 1338 928
675 2017-10-29 419 255
676 2017-11-05 979 464
677 2017-11-12 4611 2801
678 2017-11-19 688 195
679 2017-11-26 1753 594
680 2017-12-03 873 396
681 2017-12-10 1852 781
682 2017-12-17 1102 189
683 2017-12-24 411 344
684 2017-12-31 327 210
685 2018-01-07 959 739
686 2018-01-14 739 216
687 2018-01-21 3085 2820
688 2018-01-28 413 2157
689 2018-02-04 1035 532
690 2018-02-11 1407 1238
691 2018-02-18 4303 3616
692 2018-02-25 1273 1030
693 2018-03-04 1752 480
694 2018-03-11 839 673
695 2018-03-18 512 320
696 2018-03-25 228 196
697 2018-04-01 974 502
698 2018-04-08 479 202
699 2018-04-15 1290 779
700 2018-04-22 2127 1991
701 2018-04-29 310 145
702 2018-05-06 307 202
703 2018-05-13 291 269

704 rows × 3 columns


In [9]:
stan = augurApp.github()

In [10]:
def bus_factor(self, owner, repo, filename=None, start=None, end=None, threshold=50):
        """
        Calculates bus factor by adding up percentages from highest to lowest until they exceed threshold

        :param owner: repo owner username
        :param repo: repo name
        :param filename: optional; file or directory for function to run on
        :param start: optional; start time for analysis
        :param end: optional; end time for analysis
        :param threshold: Default 50;
        """

        if start != None:
            start = parse(start)
        else:
            start = github.GithubObject.NotSet

        if end != None:
            end = parse(end)
        else:
            end = github.GithubObject.NotSet

        commits = self.__api.get_repo((owner + "/" + repo)).get_commits(since=start, until=end)

        if filename != None:
            self.__api.get_repo((owner + "/" + repo)).get_contents(filename)

        df = []

        if filename != None:
            for commit in commits:
                for file in commit.files:
                    if file.filename == filename:
                        try:
                            df.append({'userid': commit.author.id})
                        except AttributeError:
                            pass
                        break
        else:
            for commit in commits:
                try:
                    df.append({'userid': commit.author.id})
                except AttributeError:
                    pass

        df = pd.DataFrame(df)

        df = df.groupby(['userid']).userid.count() / df.groupby(['userid']).userid.count().sum() * 100

        i = 0
        for num in df.cumsum():
            i = i + 1
            if num >= threshold:
                worst = i
                break

        i = 0
        for num in df.sort_values(ascending=True).cumsum():
            i = i + 1
            if num >= threshold:
                best = i
                break

        bus_factor = [{'worst': worst, 'best' : best}]

        return pd.DataFrame(bus_factor)

In [11]:
bus_factor(stan, "rails", "rails")


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-11-6402d82c815e> in <module>()
----> 1 bus_factor(stan, "rails", "rails")

<ipython-input-10-faaec3518e98> in bus_factor(self, owner, repo, filename, start, end, threshold)
     14         start = parse(start)
     15     else:
---> 16         start = github.GithubObject.NotSet
     17 
     18     if end != None:

AttributeError: 'GitHubAPI' object has no attribute 'GithubObject'

In [ ]:


In [ ]:


In [ ]: