In [ ]:
import os
from tinydb import TinyDB
import pandas as pd
import time
from DashPykpi.kpistats import KpiStats, GitURLs, GraphKPIs
Need to cover the repos assoicated with the below github user accounts
✔ Indicates the repos are covered by rc-softdev-admin account...
In [ ]:
# or... use a list of URLS fetched from the GitURLs class
url_fetch = GitURLs()
urls = url_fetch.urls
print("Retrieved {0} urls.".format(len(urls)))
In [ ]:
# These projects don't exist anymore and break get_repo_stats
urls.remove('https://github.com/UCL/ucl')
urls.remove('https://github.com/UCL-RITS/ucl-rits')
urls
In [ ]:
# needed = [
# "https://github.com/UCL-RITS",
# "https://github.com/astro-informatics",
# "https://github.com/bempp",
# "https://github.com/DCPROGS",
# "https://github.com/OPTIMET",
# "https://github.com/UCL-Biochemical-Engineering",
# "https://github.com/UCL-CSS",
# "https://github.com/UCLProgrammingHub",
# "https://github.com/UCL-HIC",
# "https://github.com/EIT-team",
# "https://github.com/Astrophysics-UCL",
# "https://github.com/Bahler-Lab",
# "https://github.com/CCPPETMR",
# "https://github.com/Euclid-OULE3",
# "https://github.com/FieldingChemistryUCL",
# "https://github.com/PancreaticProject",
# "https://github.com/ShapsUcl",
# "https://github.com/msslsolar",
# "https://github.com/mssl-plasma-group",
# "https://github.com/UCL-ShippingGroup"]
# need_accounts = [n.split('/')[3] for n in needed]
# got_accounts = [url.split('/')[-2] for url in urls]
In [ ]:
# from github3 import GitHub
# fn = open("secret_key")
# g = GitHub(token=fn.read().split()[0])
# for thing in g.iter_user_repos(login='benlaken'):
# print(thing)
# for need in need_accounts:
# if not need in got_accounts:
# print("Missing {0} repos ".format(need, need in got_accounts))
# print(" Attempting to access - will work if public")
# for thing in g.iter_user_repos(login=need):
# print(" {0}".format(thing))
In [ ]:
#for url in urls:
# user_str, repo_str = url.split('/')[-2:]
# print(url, user_str, repo_str)
In [ ]:
# Run the code to generate a dababase if needed...
test = KpiStats(urls=urls)
test.work(verbose=False, debug=False, status=True)
In [ ]:
db = TinyDB('tinydb_for_KPI.json')
In [ ]:
df = pd.DataFrame(db.all())
In [ ]:
df.head()
In [ ]:
#for n in df['repo_owner']:
# print(n)
In [ ]:
running = 0
num_repos = 0
tmp_hold = {}
for n, weekly in enumerate(df['weekly_commits']):
if sum(weekly) > 1:
tmp_hold[df['repo_name'][n]] = weekly
running += sum(weekly)
num_repos += 1
print("{0:3,} commits, in {1} active repos (out of {2} total repos), during past 52 weeks".format(
running, num_repos, len(df)))
In [ ]:
import numpy as np
In [ ]:
tmp = []
for n, weekly in enumerate(df['weekly_commits']):
if sum(weekly) > 1:
tmp.append(weekly)
tmp = np.array(tmp)
tmp = tmp.sum(axis=0)
all_weekly_commits = {"All repos": tmp}
In [ ]:
width=4
data = tmp
result = data[:(data.size // width) * width].reshape(-1, width).sum(axis=1)
result
print(52/4)
In [ ]:
all_weekly_commits
In [ ]:
#from bokeh.charts import Area, show, output_notebook, defaults
#import numpy as np
from bokeh.charts import show, output_notebook
output_notebook()
In [ ]:
bk = GraphKPIs()
show(bk.weekly_activity(per_repo=False, verbose=True, bin=4))