In [1]:
import requests
FORKS_URL = "https://api.github.com/repos/authman/DAT210x/forks"
#FORKS_URL = "https://api.github.com/repos/spring-projects/spring-petclinic/forks"
#FORKS_URL = "https://api.github.com/repos/feststelltaste/software-analytics/forks"
api_token= "-"
headers = {'Authorization': 'token %s' % api_token}
def get_json_data(url):
json_data = []
page = 1
has_next = True
while has_next:
request_url = url + "?page={}&per_page=100".format(page)
response = requests.get(request_url, headers=headers)
json_response = response.json()
if response.ok:
json_data.extend(json_response)
page = page + 1
has_next = len(json_response) > 0
else:
has_next = False
return json_data
In [2]:
json_data = get_json_data(FORKS_URL)
str(json_data)[:500]
Out[2]:
"[{'id': 133137393, 'name': 'DAT210x', 'full_name': 'domi65/DAT210x', 'owner': {'login': 'domi65', 'id': 33196607, 'avatar_url': 'https://avatars1.githubusercontent.com/u/33196607?v=4', 'gravatar_id': '', 'url': 'https://api.github.com/users/domi65', 'html_url': 'https://github.com/domi65', 'followers_url': 'https://api.github.com/users/domi65/followers', 'following_url': 'https://api.github.com/users/domi65/following{/other_user}', 'gists_url': 'https://api.github.com/users/domi65/gists{/gist_id"
In [3]:
from pandas.io.json import json_normalize
import pandas as pd
fork_data = json_normalize(json_data)
#fork_data['created_at'] = pd.to_datetime(fork_data['created_at'])
#fork_data['username'] = fork_data.index.str.rsplit("/").str[-2]
fork_data.head()
Out[3]:
archive_url
archived
assignees_url
blobs_url
branches_url
clone_url
collaborators_url
comments_url
commits_url
compare_url
...
subscribers_url
subscription_url
svn_url
tags_url
teams_url
trees_url
updated_at
url
watchers
watchers_count
0
https://api.github.com/repos/domi65/DAT210x/{a...
False
https://api.github.com/repos/domi65/DAT210x/as...
https://api.github.com/repos/domi65/DAT210x/gi...
https://api.github.com/repos/domi65/DAT210x/br...
https://github.com/domi65/DAT210x.git
https://api.github.com/repos/domi65/DAT210x/co...
https://api.github.com/repos/domi65/DAT210x/co...
https://api.github.com/repos/domi65/DAT210x/co...
https://api.github.com/repos/domi65/DAT210x/co...
...
https://api.github.com/repos/domi65/DAT210x/su...
https://api.github.com/repos/domi65/DAT210x/su...
https://github.com/domi65/DAT210x
https://api.github.com/repos/domi65/DAT210x/tags
https://api.github.com/repos/domi65/DAT210x/teams
https://api.github.com/repos/domi65/DAT210x/gi...
2018-05-12T10:52:37Z
https://api.github.com/repos/domi65/DAT210x
0
0
1
https://api.github.com/repos/zarubakICT/DAT210...
False
https://api.github.com/repos/zarubakICT/DAT210...
https://api.github.com/repos/zarubakICT/DAT210...
https://api.github.com/repos/zarubakICT/DAT210...
https://github.com/zarubakICT/DAT210x.git
https://api.github.com/repos/zarubakICT/DAT210...
https://api.github.com/repos/zarubakICT/DAT210...
https://api.github.com/repos/zarubakICT/DAT210...
https://api.github.com/repos/zarubakICT/DAT210...
...
https://api.github.com/repos/zarubakICT/DAT210...
https://api.github.com/repos/zarubakICT/DAT210...
https://github.com/zarubakICT/DAT210x
https://api.github.com/repos/zarubakICT/DAT210...
https://api.github.com/repos/zarubakICT/DAT210...
https://api.github.com/repos/zarubakICT/DAT210...
2018-05-11T14:02:30Z
https://api.github.com/repos/zarubakICT/DAT210x
0
0
2
https://api.github.com/repos/Rashmi-77/DAT210x...
False
https://api.github.com/repos/Rashmi-77/DAT210x...
https://api.github.com/repos/Rashmi-77/DAT210x...
https://api.github.com/repos/Rashmi-77/DAT210x...
https://github.com/Rashmi-77/DAT210x.git
https://api.github.com/repos/Rashmi-77/DAT210x...
https://api.github.com/repos/Rashmi-77/DAT210x...
https://api.github.com/repos/Rashmi-77/DAT210x...
https://api.github.com/repos/Rashmi-77/DAT210x...
...
https://api.github.com/repos/Rashmi-77/DAT210x...
https://api.github.com/repos/Rashmi-77/DAT210x...
https://github.com/Rashmi-77/DAT210x
https://api.github.com/repos/Rashmi-77/DAT210x...
https://api.github.com/repos/Rashmi-77/DAT210x...
https://api.github.com/repos/Rashmi-77/DAT210x...
2018-05-10T21:41:16Z
https://api.github.com/repos/Rashmi-77/DAT210x
0
0
3
https://api.github.com/repos/jer-win/DAT210x/{...
False
https://api.github.com/repos/jer-win/DAT210x/a...
https://api.github.com/repos/jer-win/DAT210x/g...
https://api.github.com/repos/jer-win/DAT210x/b...
https://github.com/jer-win/DAT210x.git
https://api.github.com/repos/jer-win/DAT210x/c...
https://api.github.com/repos/jer-win/DAT210x/c...
https://api.github.com/repos/jer-win/DAT210x/c...
https://api.github.com/repos/jer-win/DAT210x/c...
...
https://api.github.com/repos/jer-win/DAT210x/s...
https://api.github.com/repos/jer-win/DAT210x/s...
https://github.com/jer-win/DAT210x
https://api.github.com/repos/jer-win/DAT210x/tags
https://api.github.com/repos/jer-win/DAT210x/t...
https://api.github.com/repos/jer-win/DAT210x/g...
2018-05-09T23:47:40Z
https://api.github.com/repos/jer-win/DAT210x
0
0
4
https://api.github.com/repos/joewjh/DAT210x/{a...
False
https://api.github.com/repos/joewjh/DAT210x/as...
https://api.github.com/repos/joewjh/DAT210x/gi...
https://api.github.com/repos/joewjh/DAT210x/br...
https://github.com/joewjh/DAT210x.git
https://api.github.com/repos/joewjh/DAT210x/co...
https://api.github.com/repos/joewjh/DAT210x/co...
https://api.github.com/repos/joewjh/DAT210x/co...
https://api.github.com/repos/joewjh/DAT210x/co...
...
https://api.github.com/repos/joewjh/DAT210x/su...
https://api.github.com/repos/joewjh/DAT210x/su...
https://github.com/joewjh/DAT210x
https://api.github.com/repos/joewjh/DAT210x/tags
https://api.github.com/repos/joewjh/DAT210x/teams
https://api.github.com/repos/joewjh/DAT210x/gi...
2018-05-06T13:45:48Z
https://api.github.com/repos/joewjh/DAT210x
0
0
5 rows × 93 columns
In [4]:
len(fork_data)
Out[4]:
368
In [5]:
import json
COMMITS_URL_SUFFIX = "/commits"
commits_dfs = []
for url in fork_data['url']:
commit_url = url + COMMITS_URL_SUFFIX
commits_json = get_json_data(commit_url)
commit_df = json_normalize(commits_json)
commit_df['url'] = url
commits_dfs.append(commit_df)
commits_of_all_repos = pd.concat(commits_dfs)
len(commits_of_all_repos)
Out[5]:
7079
In [8]:
commits_of_all_repos.to_csv("commits_of_all_repos.csv", index=None)
In [10]:
csv_data = pd.read_csv("commits_of_all_repos.csv")
csv_data.head()
Out[10]:
author
author.avatar_url
author.events_url
author.followers_url
author.following_url
author.gists_url
author.gravatar_id
author.html_url
author.id
author.login
...
committer.repos_url
committer.site_admin
committer.starred_url
committer.subscriptions_url
committer.type
committer.url
html_url
parents
sha
url
0
NaN
https://avatars1.githubusercontent.com/u/18032...
https://api.github.com/users/authman/events{/p...
https://api.github.com/users/authman/followers
https://api.github.com/users/authman/following...
https://api.github.com/users/authman/gists{/gi...
NaN
https://github.com/authman
1803297.0
authman
...
https://api.github.com/users/authman/repos
False
https://api.github.com/users/authman/starred{/...
https://api.github.com/users/authman/subscript...
User
https://api.github.com/users/authman
https://github.com/domi65/DAT210x/commit/00ded...
[{'sha': 'c9dbcd9adeac1f50ca3608ab01c01446a6f1...
00dedb8817aadbff7b0296a7790b8d6c8b365516
https://api.github.com/repos/domi65/DAT210x
1
NaN
https://avatars1.githubusercontent.com/u/18032...
https://api.github.com/users/authman/events{/p...
https://api.github.com/users/authman/followers
https://api.github.com/users/authman/following...
https://api.github.com/users/authman/gists{/gi...
NaN
https://github.com/authman
1803297.0
authman
...
https://api.github.com/users/authman/repos
False
https://api.github.com/users/authman/starred{/...
https://api.github.com/users/authman/subscript...
User
https://api.github.com/users/authman
https://github.com/domi65/DAT210x/commit/c9dbc...
[{'sha': '9cb109bc07484cc5198766d1bd99a87f7450...
c9dbcd9adeac1f50ca3608ab01c01446a6f16db4
https://api.github.com/repos/domi65/DAT210x
2
NaN
https://avatars1.githubusercontent.com/u/18032...
https://api.github.com/users/authman/events{/p...
https://api.github.com/users/authman/followers
https://api.github.com/users/authman/following...
https://api.github.com/users/authman/gists{/gi...
NaN
https://github.com/authman
1803297.0
authman
...
https://api.github.com/users/authman/repos
False
https://api.github.com/users/authman/starred{/...
https://api.github.com/users/authman/subscript...
User
https://api.github.com/users/authman
https://github.com/domi65/DAT210x/commit/9cb10...
[{'sha': '36d3bc18dac01b22bc1bbe0c607228806627...
9cb109bc07484cc5198766d1bd99a87f74500bba
https://api.github.com/repos/domi65/DAT210x
3
NaN
https://avatars1.githubusercontent.com/u/18032...
https://api.github.com/users/authman/events{/p...
https://api.github.com/users/authman/followers
https://api.github.com/users/authman/following...
https://api.github.com/users/authman/gists{/gi...
NaN
https://github.com/authman
1803297.0
authman
...
https://api.github.com/users/authman/repos
False
https://api.github.com/users/authman/starred{/...
https://api.github.com/users/authman/subscript...
User
https://api.github.com/users/authman
https://github.com/domi65/DAT210x/commit/36d3b...
[{'sha': '2e1f2df8099a99baddb6757f0f74eab5749c...
36d3bc18dac01b22bc1bbe0c607228806627fb27
https://api.github.com/repos/domi65/DAT210x
4
NaN
https://avatars1.githubusercontent.com/u/18032...
https://api.github.com/users/authman/events{/p...
https://api.github.com/users/authman/followers
https://api.github.com/users/authman/following...
https://api.github.com/users/authman/gists{/gi...
NaN
https://github.com/authman
1803297.0
authman
...
https://api.github.com/users/authman/repos
False
https://api.github.com/users/authman/starred{/...
https://api.github.com/users/authman/subscript...
User
https://api.github.com/users/authman
https://github.com/domi65/DAT210x/commit/2e1f2...
[{'sha': 'f3ac2d018fc74def2b7d6a564b1c93b48a7e...
2e1f2df8099a99baddb6757f0f74eab5749c2185
https://api.github.com/repos/domi65/DAT210x
5 rows × 56 columns
In [162]:
%matplotlib inline
csv_data['commit.author.date'] = pd.to_datetime(csv_data['commit.author.date'])
csv_data = csv_data[csv_data['author.login'] != "authman"]
commits_per_time = csv_data.set_index('commit.author.date').resample("D").count()
commits_per_time
Out[162]:
author
author.avatar_url
author.events_url
author.followers_url
author.following_url
author.gists_url
author.gravatar_id
author.html_url
author.id
author.login
...
committer.repos_url
committer.site_admin
committer.starred_url
committer.subscriptions_url
committer.type
committer.url
html_url
parents
sha
url
commit.author.date
2016-08-27
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
3
3
3
3
2016-08-28
0
1
1
1
1
1
0
1
1
1
...
1
1
1
1
1
1
3
3
3
3
2016-08-29
0
1
1
1
1
1
0
1
1
1
...
1
1
1
1
1
1
1
1
1
1
2016-08-30
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2016-08-31
0
1
1
1
1
1
0
1
1
1
...
1
1
1
1
1
1
3
3
3
3
2016-09-01
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2016-09-02
0
1
1
1
1
1
0
1
1
1
...
1
1
1
1
1
1
4
4
4
4
2016-09-03
0
1
1
1
1
1
0
1
1
1
...
1
1
1
1
1
1
2
2
2
2
2016-09-04
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
5
5
5
5
2016-09-05
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
5
5
5
5
2016-09-06
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
3
3
3
3
2016-09-07
0
1
1
1
1
1
0
1
1
1
...
1
1
1
1
1
1
3
3
3
3
2016-09-08
0
2
2
2
2
2
0
2
2
2
...
2
2
2
2
2
2
3
3
3
3
2016-09-09
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2016-09-10
0
1
1
1
1
1
0
1
1
1
...
1
1
1
1
1
1
1
1
1
1
2016-09-11
0
1
1
1
1
1
0
1
1
1
...
1
1
1
1
1
1
2
2
2
2
2016-09-12
0
1
1
1
1
1
0
1
1
1
...
1
1
1
1
1
1
1
1
1
1
2016-09-13
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2016-09-14
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2016-09-15
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2016-09-16
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2016-09-17
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2016-09-18
0
2
2
2
2
2
0
2
2
2
...
2
2
2
2
2
2
2
2
2
2
2016-09-19
0
1
1
1
1
1
0
1
1
1
...
1
1
1
1
1
1
1
1
1
1
2016-09-20
0
2
2
2
2
2
0
2
2
2
...
2
2
2
2
2
2
2
2
2
2
2016-09-21
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2016-09-22
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2016-09-23
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2016-09-24
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2016-09-25
0
1
1
1
1
1
0
1
1
1
...
1
1
1
1
1
1
1
1
1
1
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
2018-04-11
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-04-12
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-04-13
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-04-14
0
1
1
1
1
1
0
1
1
1
...
1
1
1
1
1
1
1
1
1
1
2018-04-15
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-04-16
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-04-17
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-04-18
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-04-19
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-04-20
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-04-21
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-04-22
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-04-23
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-04-24
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-04-25
0
1
1
1
1
1
0
1
1
1
...
1
1
1
1
1
1
1
1
1
1
2018-04-26
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-04-27
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-04-28
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-04-29
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-04-30
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
2
2
2
2
2018-05-01
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-05-02
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-05-03
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-05-04
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-05-05
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-05-06
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-05-07
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-05-08
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-05-09
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
2018-05-10
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
1
1
1
1
622 rows × 55 columns
In [73]:
In [161]:
import matplotlib.pyplot as plt
course_end = pd.Timestamp("Sep 30, 2016")
dates = pd.date_range(end=course_end, periods=8, freq="W")
exam_time = commits_per_time[commits_per_time.index <= course_end]
fig, ax = plt.subplots()
ax = exam_time['url'].plot()
for date in dates:
ax.axvline(date, color="red", alpha=0.5)
plt.show()
Content source: feststelltaste/software-analytics
Similar notebooks: