In [9]:
import augur
# import everything that githubapi.py imports so we can just copy and paste our function later
import json
import re
from dateutil.parser import parse
import pandas as pd
import github
import numpy as np
import datetime
import requests
In [5]:
augur_app = augur.Application()('../augur.config.json')
# we only need an instance of the GitHubAPI class
github = augurApp.githubapi()
In [3]:
# We are writing this function to be run as part of the GitHub class, so
# the "self" in this function will be an instance of an augur.GitHubAPI
def lines_deleted(self, owner, repo=None):
"""
Additions and deletions each week
:param owner: The name of the project owner
:param repo: The name of the repo
:return: DataFrame with each row being am issue
"""
# get the data we need from the GitHub API
# see <project_root>/augur/githubapi.py for examples using the GraphQL API
url = "https://api.github.com/repos/{}/{}/stats/code_frequency".format(owner, repo)
json = requests.get(url, auth=('user', self.GITHUB_API_KEY)).json()
# get our data into a dataframe
df = pd.DataFrame(json, columns=['date', 'additions', 'deletions'])
# all timeseries metrics need a 'date' column
df['date'] = pd.to_datetime(df['date'], unit='s', infer_datetime_format=True)
# normalize our data
df['deletions'] = df['deletions'] * -1
# return the dataframe
return df
In [4]:
## Note that this is an example of how to call the function
lines_deleted(github,"rails", "rails")
Out[4]:
In [9]:
stan = augurApp.github()
In [10]:
def bus_factor(self, owner, repo, filename=None, start=None, end=None, threshold=50):
"""
Calculates bus factor by adding up percentages from highest to lowest until they exceed threshold
:param owner: repo owner username
:param repo: repo name
:param filename: optional; file or directory for function to run on
:param start: optional; start time for analysis
:param end: optional; end time for analysis
:param threshold: Default 50;
"""
if start != None:
start = parse(start)
else:
start = github.GithubObject.NotSet
if end != None:
end = parse(end)
else:
end = github.GithubObject.NotSet
commits = self.__api.get_repo((owner + "/" + repo)).get_commits(since=start, until=end)
if filename != None:
self.__api.get_repo((owner + "/" + repo)).get_contents(filename)
df = []
if filename != None:
for commit in commits:
for file in commit.files:
if file.filename == filename:
try:
df.append({'userid': commit.author.id})
except AttributeError:
pass
break
else:
for commit in commits:
try:
df.append({'userid': commit.author.id})
except AttributeError:
pass
df = pd.DataFrame(df)
df = df.groupby(['userid']).userid.count() / df.groupby(['userid']).userid.count().sum() * 100
i = 0
for num in df.cumsum():
i = i + 1
if num >= threshold:
worst = i
break
i = 0
for num in df.sort_values(ascending=True).cumsum():
i = i + 1
if num >= threshold:
best = i
break
bus_factor = [{'worst': worst, 'best' : best}]
return pd.DataFrame(bus_factor)
In [11]:
bus_factor(stan, "rails", "rails")
In [ ]:
In [ ]:
In [ ]: