In [1]:
import requests


FORKS_URL = "https://api.github.com/repos/authman/DAT210x/forks"
#FORKS_URL = "https://api.github.com/repos/spring-projects/spring-petclinic/forks"
#FORKS_URL = "https://api.github.com/repos/feststelltaste/software-analytics/forks"

api_token= "-"
headers = {'Authorization': 'token %s' % api_token}

def get_json_data(url):
    json_data = []
    page = 1

    has_next = True
    while has_next:
        request_url = url + "?page={}&per_page=100".format(page)
        response = requests.get(request_url, headers=headers)
        json_response = response.json()
        if response.ok:
            json_data.extend(json_response)
            page = page + 1
            has_next = len(json_response) > 0
        else:
            has_next = False
    
    return json_data

In [2]:
json_data = get_json_data(FORKS_URL)
str(json_data)[:500]


Out[2]:
"[{'id': 133137393, 'name': 'DAT210x', 'full_name': 'domi65/DAT210x', 'owner': {'login': 'domi65', 'id': 33196607, 'avatar_url': 'https://avatars1.githubusercontent.com/u/33196607?v=4', 'gravatar_id': '', 'url': 'https://api.github.com/users/domi65', 'html_url': 'https://github.com/domi65', 'followers_url': 'https://api.github.com/users/domi65/followers', 'following_url': 'https://api.github.com/users/domi65/following{/other_user}', 'gists_url': 'https://api.github.com/users/domi65/gists{/gist_id"

In [3]:
from pandas.io.json import json_normalize
import pandas as pd

fork_data = json_normalize(json_data)
#fork_data['created_at'] = pd.to_datetime(fork_data['created_at'])
#fork_data['username'] = fork_data.index.str.rsplit("/").str[-2]

fork_data.head()


Out[3]:
archive_url archived assignees_url blobs_url branches_url clone_url collaborators_url comments_url commits_url compare_url ... subscribers_url subscription_url svn_url tags_url teams_url trees_url updated_at url watchers watchers_count
0 https://api.github.com/repos/domi65/DAT210x/{a... False https://api.github.com/repos/domi65/DAT210x/as... https://api.github.com/repos/domi65/DAT210x/gi... https://api.github.com/repos/domi65/DAT210x/br... https://github.com/domi65/DAT210x.git https://api.github.com/repos/domi65/DAT210x/co... https://api.github.com/repos/domi65/DAT210x/co... https://api.github.com/repos/domi65/DAT210x/co... https://api.github.com/repos/domi65/DAT210x/co... ... https://api.github.com/repos/domi65/DAT210x/su... https://api.github.com/repos/domi65/DAT210x/su... https://github.com/domi65/DAT210x https://api.github.com/repos/domi65/DAT210x/tags https://api.github.com/repos/domi65/DAT210x/teams https://api.github.com/repos/domi65/DAT210x/gi... 2018-05-12T10:52:37Z https://api.github.com/repos/domi65/DAT210x 0 0
1 https://api.github.com/repos/zarubakICT/DAT210... False https://api.github.com/repos/zarubakICT/DAT210... https://api.github.com/repos/zarubakICT/DAT210... https://api.github.com/repos/zarubakICT/DAT210... https://github.com/zarubakICT/DAT210x.git https://api.github.com/repos/zarubakICT/DAT210... https://api.github.com/repos/zarubakICT/DAT210... https://api.github.com/repos/zarubakICT/DAT210... https://api.github.com/repos/zarubakICT/DAT210... ... https://api.github.com/repos/zarubakICT/DAT210... https://api.github.com/repos/zarubakICT/DAT210... https://github.com/zarubakICT/DAT210x https://api.github.com/repos/zarubakICT/DAT210... https://api.github.com/repos/zarubakICT/DAT210... https://api.github.com/repos/zarubakICT/DAT210... 2018-05-11T14:02:30Z https://api.github.com/repos/zarubakICT/DAT210x 0 0
2 https://api.github.com/repos/Rashmi-77/DAT210x... False https://api.github.com/repos/Rashmi-77/DAT210x... https://api.github.com/repos/Rashmi-77/DAT210x... https://api.github.com/repos/Rashmi-77/DAT210x... https://github.com/Rashmi-77/DAT210x.git https://api.github.com/repos/Rashmi-77/DAT210x... https://api.github.com/repos/Rashmi-77/DAT210x... https://api.github.com/repos/Rashmi-77/DAT210x... https://api.github.com/repos/Rashmi-77/DAT210x... ... https://api.github.com/repos/Rashmi-77/DAT210x... https://api.github.com/repos/Rashmi-77/DAT210x... https://github.com/Rashmi-77/DAT210x https://api.github.com/repos/Rashmi-77/DAT210x... https://api.github.com/repos/Rashmi-77/DAT210x... https://api.github.com/repos/Rashmi-77/DAT210x... 2018-05-10T21:41:16Z https://api.github.com/repos/Rashmi-77/DAT210x 0 0
3 https://api.github.com/repos/jer-win/DAT210x/{... False https://api.github.com/repos/jer-win/DAT210x/a... https://api.github.com/repos/jer-win/DAT210x/g... https://api.github.com/repos/jer-win/DAT210x/b... https://github.com/jer-win/DAT210x.git https://api.github.com/repos/jer-win/DAT210x/c... https://api.github.com/repos/jer-win/DAT210x/c... https://api.github.com/repos/jer-win/DAT210x/c... https://api.github.com/repos/jer-win/DAT210x/c... ... https://api.github.com/repos/jer-win/DAT210x/s... https://api.github.com/repos/jer-win/DAT210x/s... https://github.com/jer-win/DAT210x https://api.github.com/repos/jer-win/DAT210x/tags https://api.github.com/repos/jer-win/DAT210x/t... https://api.github.com/repos/jer-win/DAT210x/g... 2018-05-09T23:47:40Z https://api.github.com/repos/jer-win/DAT210x 0 0
4 https://api.github.com/repos/joewjh/DAT210x/{a... False https://api.github.com/repos/joewjh/DAT210x/as... https://api.github.com/repos/joewjh/DAT210x/gi... https://api.github.com/repos/joewjh/DAT210x/br... https://github.com/joewjh/DAT210x.git https://api.github.com/repos/joewjh/DAT210x/co... https://api.github.com/repos/joewjh/DAT210x/co... https://api.github.com/repos/joewjh/DAT210x/co... https://api.github.com/repos/joewjh/DAT210x/co... ... https://api.github.com/repos/joewjh/DAT210x/su... https://api.github.com/repos/joewjh/DAT210x/su... https://github.com/joewjh/DAT210x https://api.github.com/repos/joewjh/DAT210x/tags https://api.github.com/repos/joewjh/DAT210x/teams https://api.github.com/repos/joewjh/DAT210x/gi... 2018-05-06T13:45:48Z https://api.github.com/repos/joewjh/DAT210x 0 0

5 rows × 93 columns


In [4]:
len(fork_data)


Out[4]:
368

In [5]:
import json
COMMITS_URL_SUFFIX = "/commits"

commits_dfs = []

for url in fork_data['url']:

    commit_url = url + COMMITS_URL_SUFFIX
    commits_json = get_json_data(commit_url) 
    commit_df = json_normalize(commits_json)
    commit_df['url'] = url
    commits_dfs.append(commit_df)

commits_of_all_repos = pd.concat(commits_dfs)
len(commits_of_all_repos)


Out[5]:
7079

In [8]:
commits_of_all_repos.to_csv("commits_of_all_repos.csv", index=None)

In [10]:
csv_data = pd.read_csv("commits_of_all_repos.csv")
csv_data.head()


Out[10]:
author author.avatar_url author.events_url author.followers_url author.following_url author.gists_url author.gravatar_id author.html_url author.id author.login ... committer.repos_url committer.site_admin committer.starred_url committer.subscriptions_url committer.type committer.url html_url parents sha url
0 NaN https://avatars1.githubusercontent.com/u/18032... https://api.github.com/users/authman/events{/p... https://api.github.com/users/authman/followers https://api.github.com/users/authman/following... https://api.github.com/users/authman/gists{/gi... NaN https://github.com/authman 1803297.0 authman ... https://api.github.com/users/authman/repos False https://api.github.com/users/authman/starred{/... https://api.github.com/users/authman/subscript... User https://api.github.com/users/authman https://github.com/domi65/DAT210x/commit/00ded... [{'sha': 'c9dbcd9adeac1f50ca3608ab01c01446a6f1... 00dedb8817aadbff7b0296a7790b8d6c8b365516 https://api.github.com/repos/domi65/DAT210x
1 NaN https://avatars1.githubusercontent.com/u/18032... https://api.github.com/users/authman/events{/p... https://api.github.com/users/authman/followers https://api.github.com/users/authman/following... https://api.github.com/users/authman/gists{/gi... NaN https://github.com/authman 1803297.0 authman ... https://api.github.com/users/authman/repos False https://api.github.com/users/authman/starred{/... https://api.github.com/users/authman/subscript... User https://api.github.com/users/authman https://github.com/domi65/DAT210x/commit/c9dbc... [{'sha': '9cb109bc07484cc5198766d1bd99a87f7450... c9dbcd9adeac1f50ca3608ab01c01446a6f16db4 https://api.github.com/repos/domi65/DAT210x
2 NaN https://avatars1.githubusercontent.com/u/18032... https://api.github.com/users/authman/events{/p... https://api.github.com/users/authman/followers https://api.github.com/users/authman/following... https://api.github.com/users/authman/gists{/gi... NaN https://github.com/authman 1803297.0 authman ... https://api.github.com/users/authman/repos False https://api.github.com/users/authman/starred{/... https://api.github.com/users/authman/subscript... User https://api.github.com/users/authman https://github.com/domi65/DAT210x/commit/9cb10... [{'sha': '36d3bc18dac01b22bc1bbe0c607228806627... 9cb109bc07484cc5198766d1bd99a87f74500bba https://api.github.com/repos/domi65/DAT210x
3 NaN https://avatars1.githubusercontent.com/u/18032... https://api.github.com/users/authman/events{/p... https://api.github.com/users/authman/followers https://api.github.com/users/authman/following... https://api.github.com/users/authman/gists{/gi... NaN https://github.com/authman 1803297.0 authman ... https://api.github.com/users/authman/repos False https://api.github.com/users/authman/starred{/... https://api.github.com/users/authman/subscript... User https://api.github.com/users/authman https://github.com/domi65/DAT210x/commit/36d3b... [{'sha': '2e1f2df8099a99baddb6757f0f74eab5749c... 36d3bc18dac01b22bc1bbe0c607228806627fb27 https://api.github.com/repos/domi65/DAT210x
4 NaN https://avatars1.githubusercontent.com/u/18032... https://api.github.com/users/authman/events{/p... https://api.github.com/users/authman/followers https://api.github.com/users/authman/following... https://api.github.com/users/authman/gists{/gi... NaN https://github.com/authman 1803297.0 authman ... https://api.github.com/users/authman/repos False https://api.github.com/users/authman/starred{/... https://api.github.com/users/authman/subscript... User https://api.github.com/users/authman https://github.com/domi65/DAT210x/commit/2e1f2... [{'sha': 'f3ac2d018fc74def2b7d6a564b1c93b48a7e... 2e1f2df8099a99baddb6757f0f74eab5749c2185 https://api.github.com/repos/domi65/DAT210x

5 rows × 56 columns


In [162]:
%matplotlib inline
csv_data['commit.author.date'] = pd.to_datetime(csv_data['commit.author.date'])
csv_data = csv_data[csv_data['author.login'] != "authman"]
commits_per_time = csv_data.set_index('commit.author.date').resample("D").count()
commits_per_time


Out[162]:
author author.avatar_url author.events_url author.followers_url author.following_url author.gists_url author.gravatar_id author.html_url author.id author.login ... committer.repos_url committer.site_admin committer.starred_url committer.subscriptions_url committer.type committer.url html_url parents sha url
commit.author.date
2016-08-27 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 3 3 3 3
2016-08-28 0 1 1 1 1 1 0 1 1 1 ... 1 1 1 1 1 1 3 3 3 3
2016-08-29 0 1 1 1 1 1 0 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
2016-08-30 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2016-08-31 0 1 1 1 1 1 0 1 1 1 ... 1 1 1 1 1 1 3 3 3 3
2016-09-01 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2016-09-02 0 1 1 1 1 1 0 1 1 1 ... 1 1 1 1 1 1 4 4 4 4
2016-09-03 0 1 1 1 1 1 0 1 1 1 ... 1 1 1 1 1 1 2 2 2 2
2016-09-04 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 5 5 5 5
2016-09-05 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 5 5 5 5
2016-09-06 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 3 3 3 3
2016-09-07 0 1 1 1 1 1 0 1 1 1 ... 1 1 1 1 1 1 3 3 3 3
2016-09-08 0 2 2 2 2 2 0 2 2 2 ... 2 2 2 2 2 2 3 3 3 3
2016-09-09 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2016-09-10 0 1 1 1 1 1 0 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
2016-09-11 0 1 1 1 1 1 0 1 1 1 ... 1 1 1 1 1 1 2 2 2 2
2016-09-12 0 1 1 1 1 1 0 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
2016-09-13 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2016-09-14 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2016-09-15 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2016-09-16 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2016-09-17 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2016-09-18 0 2 2 2 2 2 0 2 2 2 ... 2 2 2 2 2 2 2 2 2 2
2016-09-19 0 1 1 1 1 1 0 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
2016-09-20 0 2 2 2 2 2 0 2 2 2 ... 2 2 2 2 2 2 2 2 2 2
2016-09-21 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2016-09-22 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2016-09-23 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2016-09-24 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2016-09-25 0 1 1 1 1 1 0 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2018-04-11 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-04-12 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-04-13 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-04-14 0 1 1 1 1 1 0 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
2018-04-15 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-04-16 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-04-17 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-04-18 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-04-19 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-04-20 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-04-21 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-04-22 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-04-23 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-04-24 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-04-25 0 1 1 1 1 1 0 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
2018-04-26 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-04-27 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-04-28 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-04-29 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-04-30 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 2 2 2 2
2018-05-01 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-05-02 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-05-03 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-05-04 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-05-05 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-05-06 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-05-07 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-05-08 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-05-09 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2018-05-10 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 1 1 1

622 rows × 55 columns


In [73]:


In [161]:
import matplotlib.pyplot as plt

course_end = pd.Timestamp("Sep 30, 2016")
dates = pd.date_range(end=course_end, periods=8, freq="W")
exam_time = commits_per_time[commits_per_time.index <= course_end]

fig, ax = plt.subplots()

ax = exam_time['url'].plot()

for date in dates:
    ax.axvline(date, color="red", alpha=0.5) 

plt.show()