Import modules


In [ ]:
import os
from tinydb import TinyDB
import pandas as pd
import time
from DashPykpi.kpistats import KpiStats, GitURLs, GraphKPIs

In [ ]:
# or... use a list of URLS fetched from the GitURLs class
url_fetch = GitURLs()
urls = url_fetch.urls
print("Retrieved {0} urls.".format(len(urls)))

In [ ]:
# These projects don't exist anymore and break get_repo_stats
urls.remove('https://github.com/UCL/ucl')
urls.remove('https://github.com/UCL-RITS/ucl-rits')
urls

In [ ]:
# needed = [
#     "https://github.com/UCL-RITS",
#     "https://github.com/astro-informatics",
#     "https://github.com/bempp",
#     "https://github.com/DCPROGS",
#     "https://github.com/OPTIMET",
#     "https://github.com/UCL-Biochemical-Engineering",
#     "https://github.com/UCL-CSS",
#     "https://github.com/UCLProgrammingHub",
#     "https://github.com/UCL-HIC",
#     "https://github.com/EIT-team",
#     "https://github.com/Astrophysics-UCL",
#     "https://github.com/Bahler-Lab",
#     "https://github.com/CCPPETMR",
#     "https://github.com/Euclid-OULE3",
#     "https://github.com/FieldingChemistryUCL",
#     "https://github.com/PancreaticProject",
#     "https://github.com/ShapsUcl",
#     "https://github.com/msslsolar",
#     "https://github.com/mssl-plasma-group",
#     "https://github.com/UCL-ShippingGroup"]
    
# need_accounts = [n.split('/')[3] for n in needed]
# got_accounts = [url.split('/')[-2] for url in urls]

In [ ]:
# from github3 import GitHub
# fn = open("secret_key")
# g = GitHub(token=fn.read().split()[0])

# for thing in g.iter_user_repos(login='benlaken'):
#     print(thing)
    
# for need in need_accounts:
#     if not need in got_accounts:
#         print("Missing {0} repos ".format(need, need in got_accounts))
#         print("    Attempting to access - will work if public")
        
#         for thing in g.iter_user_repos(login=need):
#             print("    {0}".format(thing))

In [ ]:
#for url in urls:
#    user_str, repo_str = url.split('/')[-2:]
#    print(url, user_str, repo_str)

In [ ]:
# Run the code to generate a dababase if needed...
test = KpiStats(urls=urls)
test.work(verbose=False, debug=False, status=True)

Plotting section


In [ ]:
db = TinyDB('tinydb_for_KPI.json')

In [ ]:
df = pd.DataFrame(db.all())

In [ ]:
df.head()

In [ ]:
#for n in df['repo_owner']:
#    print(n)

In [ ]:
running = 0
num_repos = 0
tmp_hold = {}
for n, weekly in enumerate(df['weekly_commits']):
    if sum(weekly) > 1:
        tmp_hold[df['repo_name'][n]] = weekly
        running += sum(weekly)
        num_repos += 1
print("{0:3,} commits, in {1} active repos (out of {2} total repos), during past 52 weeks".format(
        running, num_repos, len(df)))

In [ ]:
import numpy as np

In [ ]:
tmp = []
for n, weekly in enumerate(df['weekly_commits']):
    if sum(weekly) > 1:
        tmp.append(weekly)

tmp = np.array(tmp)
tmp = tmp.sum(axis=0)
all_weekly_commits = {"All repos": tmp}

In [ ]:
width=4
data = tmp

result = data[:(data.size // width) * width].reshape(-1, width).sum(axis=1)
result
print(52/4)

In [ ]:
all_weekly_commits

Stacked area chart


In [ ]:
#from bokeh.charts import Area, show, output_notebook, defaults
#import numpy as np
from bokeh.charts import show, output_notebook
output_notebook()

In [ ]:
bk = GraphKPIs()
show(bk.weekly_activity(per_repo=False, verbose=True, bin=4))