This notebook generates the summary statistics for use in the 6-month releases of PySAL, which is now a meta package.
It assumes the subpackages have been git cloned in a directory below the location of this notebook. It also requires network connectivity for some of the reporting.
Run this notebook after 100-gitcount.ipynb
In [1]:
from __future__ import print_function
import os
import json
import re
import sys
import pandas
import subprocess
from subprocess import check_output
#import yaml
from datetime import datetime, timedelta
from dateutil.parser import parse
import pytz
utc=pytz.UTC
try:
from urllib import urlopen
except:
from urllib.request import urlopen
In [2]:
PYSAL_RELEASE = '2020-07-27'
release_date = datetime.strptime(PYSAL_RELEASE+" 0:0:0", "%Y-%m-%d %H:%M:%S")
In [3]:
release_date
Out[3]:
In [4]:
CWD = os.path.abspath(os.path.curdir)
In [5]:
CWD
Out[5]:
In [6]:
start_date = '2020-02-09'
since_date = '--since="{start}"'.format(start=start_date)
since_date
since = datetime.strptime(start_date+" 0:0:0", "%Y-%m-%d %H:%M:%S")
since
Out[6]:
In [7]:
with open('package_versions.txt', 'r') as package_list:
packages = dict([line.strip().split() for line in package_list.readlines()])
In [8]:
packages
Out[8]:
In [9]:
import pickle
In [10]:
issues_closed = pickle.load(open("issues_closed.p", 'rb'))
pulls_closed = pickle.load(open('pulls_closed.p', 'rb'))
In [11]:
type(issues_closed)
Out[11]:
In [12]:
issues_closed.keys()
Out[12]:
In [13]:
from release_info import get_pypi_info, get_github_info, clone_masters
In [14]:
clone_masters()
In [15]:
github_releases = get_github_info()
In [16]:
pypi_releases = get_pypi_info()
In [17]:
pypi_releases['esda']
Out[17]:
In [18]:
# only issues closed before or on release date on pypi
In [19]:
from datetime import datetime
In [20]:
cut_off = pypi_releases['esda']['released']
In [21]:
pysal_date = datetime.strptime('2020-02-09T12:00:00Z', '%Y-%m-%dT%H:%M:%SZ')
#ISO8601 = "%Y-%m-%dT%H:%M:%SZ"
In [22]:
pysal_rel = {'version': 'v2.2.0',
'release_date': pysal_date}
github_releases['pysal'] = pysal_rel
In [23]:
github_releases
Out[23]:
In [24]:
packages['pysal'] = '2.3.0'
In [25]:
from datetime import datetime
datetime.fromtimestamp(0)
ISO8601 = "%Y-%m-%dT%H:%M:%SZ"
final_pulls = {}
final_issues = {}
for package in packages:
filtered_issues = []
filtered_pulls = []
released = github_releases[package]['release_date']
package_pulls = pulls_closed[package]
package_issues = issues_closed[package]
for issue in package_issues:
#print(issue['number'], issue['title'], issue['closed_at'])
closed = datetime.strptime(issue['closed_at'], ISO8601)
if closed <= released:
filtered_issues.append(issue)
final_issues[package] = filtered_issues
for pull in package_pulls:
#print(pull['number'], pull['title'], pull['closed_at'])
closed = datetime.strptime(pull['closed_at'], ISO8601)
if closed <= released:
filtered_pulls.append(pull)
final_pulls[package] = filtered_pulls
print(package, released, len(package_issues), len(filtered_issues), len(package_pulls),
len(filtered_pulls))
In [26]:
issue_details = final_issues
pull_details = final_pulls
In [27]:
packages
Out[27]:
In [28]:
github_releases['pysal']['release_date'] = release_date
In [29]:
released
Out[29]:
In [30]:
# commits
cmd = ['git', 'log', '--oneline', since_date]
activity = {}
total_commits = 0
tag_dates = {}
for subpackage in packages:
released = github_releases[subpackage]['release_date']
tag_date = released.strftime("%Y-%m-%d")
tag_dates[subpackage] = tag_date
#print(tag_date)
#tag_date = tag_dates[subpackage]
os.chdir(CWD)
os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]
ncommits = len(check_output(cmd_until).splitlines())
ncommits_total = len(check_output(cmd).splitlines())
print(subpackage, ncommits_total, ncommits, tag_date)
total_commits += ncommits
activity[subpackage] = ncommits
In [ ]:
In [31]:
CWD
Out[31]:
In [32]:
# commits
cmd = ['git', 'log', '--oneline', since_date]
activity = {}
total_commits = 0
for subpackage in packages:
tag_date = tag_dates[subpackage]
os.chdir(CWD)
os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]
ncommits = len(check_output(cmd_until).splitlines())
ncommits_total = len(check_output(cmd).splitlines())
print(subpackage, ncommits_total, ncommits, tag_date)
total_commits += ncommits
activity[subpackage] = ncommits
In [33]:
cmd_until
Out[33]:
In [34]:
identities = {'Levi John Wolf': ('ljwolf', 'Levi John Wolf'),
'Serge Rey': ('Serge Rey', 'Sergio Rey', 'sjsrey', 'serge'),
'Wei Kang': ('Wei Kang', 'weikang9009'),
'Dani Arribas-Bel': ('Dani Arribas-Bel', 'darribas'),
'Antti Härkönen': ( 'antth', 'Antti Härkönen', 'Antti Härkönen', 'Antth' ),
'Juan C Duque': ('Juan C Duque', "Juan Duque"),
'Renan Xavier Cortes': ('Renan Xavier Cortes', 'renanxcortes', 'Renan Xavier Cortes' ),
'Taylor Oshan': ('Tayloroshan', 'Taylor Oshan', 'TaylorOshan'),
'Tom Gertin': ('@Tomgertin', 'Tom Gertin', '@tomgertin')
}
def regularize_identity(string):
string = string.decode()
for name, aliases in identities.items():
for alias in aliases:
if alias in string:
string = string.replace(alias, name)
if len(string.split(' '))>1:
string = string.title()
return string.lstrip('* ')
In [35]:
author_cmd = ['git', 'log', '--format=* %aN', since_date]
In [36]:
author_cmd.append('blank')
In [37]:
author_cmd
Out[37]:
In [38]:
from collections import Counter
In [39]:
tag_dates
Out[39]:
In [40]:
authors_global = set()
authors = {}
global_counter = Counter()
counters = dict()
cmd = ['git', 'log', '--oneline', since_date]
total_commits = 0
activity = {}
for subpackage in packages:
os.chdir(CWD)
os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
ncommits = len(check_output(cmd).splitlines())
tag_date = tag_dates[subpackage]
tag_date = (datetime.strptime(tag_date, '%Y-%m-%d') + timedelta(days=1)).strftime('%Y-%m-%d')
author_cmd[-1] = '--until="{tag_date}"'.format(tag_date=tag_date)
#cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]
print(author_cmd)
all_authors = check_output(author_cmd).splitlines()
counter = Counter([regularize_identity(author) for author in all_authors])
global_counter += counter
counters.update({subpackage: counter})
unique_authors = sorted(set(all_authors))
authors[subpackage] = unique_authors
authors_global.update(unique_authors)
total_commits += ncommits
activity[subpackage] = ncommits
In [41]:
counter
Out[41]:
In [42]:
authors_global
Out[42]:
In [43]:
activity
Out[43]:
In [44]:
counters
Out[44]:
In [45]:
counters
Out[45]:
In [46]:
def get_tag(title, level="##", as_string=True):
words = title.split()
tag = "-".join([word.lower() for word in words])
heading = level+" "+title
line = "\n\n<a name=\"{}\"></a>".format(tag)
lines = [line]
lines.append(heading)
if as_string:
return "\n".join(lines)
else:
return lines
In [47]:
subs = issue_details.keys()
table = []
txt = []
lines = get_tag("Changes by Package", as_string=False)
for sub in subs:
total= issue_details[sub]
pr = pull_details[sub]
row = [sub, activity[sub], len(total), len(pr)]
table.append(row)
#line = "\n<a name=\"{sub}\"></a>".format(sub=sub)
#lines.append(line)
#line = "### {sub}".format(sub=sub)
#lines.append(line)
lines.extend(get_tag(sub.lower(), "###", as_string=False))
for issue in total:
url = issue['html_url']
title = issue['title']
number = issue['number']
line = "* [#{number}:]({url}) {title} ".format(title=title,
number=number,
url=url)
lines.append(line)
In [48]:
line
Out[48]:
In [49]:
table
Out[49]:
In [50]:
os.chdir(CWD)
import pandas
In [51]:
df = pandas.DataFrame(table, columns=['package', 'commits', 'total issues', 'pulls'])
In [52]:
df.head()
Out[52]:
In [53]:
df.sort_values(['commits','pulls'], ascending=False)\
.to_html('./commit_table.html', index=None)
In [54]:
df.sum()
Out[54]:
In [55]:
contributor_table = pandas.DataFrame.from_dict(counters).fillna(0).astype(int).T
In [56]:
contributor_table.to_html('./contributor_table.html')
In [57]:
totals = contributor_table.sum(axis=0).T
totals.sort_index().to_frame('commits')
Out[57]:
In [58]:
totals = contributor_table.sum(axis=0).T
totals.sort_index().to_frame('commits').to_html('./commits_by_person.html')
In [59]:
totals
Out[59]:
In [60]:
n_commits = df.commits.sum()
n_issues = df['total issues'].sum()
n_pulls = df.pulls.sum()
In [61]:
n_commits
Out[61]:
In [62]:
#Overall, there were 719 commits that closed 240 issues, together with 105 pull requests across 12 packages since our last release on 2017-11-03.
#('{0} Here is a really long '
# 'sentence with {1}').format(3, 5))
line = ('Overall, there were {n_commits} commits that closed {n_issues} issues,'
' together with {n_pulls} pull requests since our last release'
' on {since_date}.\n'.format(n_commits=n_commits, n_issues=n_issues,
n_pulls=n_pulls, since_date = start_date))
In [63]:
line
Out[63]:
In [64]:
with open('changes.md', 'w') as fout:
fout.write(line)
fout.write("\n".join(lines))
fout.write(get_tag("Summary Statistics"))
with open('commit_table.html') as table:
table_lines = table.readlines()
title = "Package Activity"
fout.write(get_tag(title,"###"))
fout.write("\n")
fout.write("".join(table_lines))
with open('commits_by_person.html') as table:
table_lines = table.readlines()
title = "Contributor Activity"
fout.write(get_tag(title,"###"))
fout.write("\n")
fout.write("".join(table_lines))
with open('contributor_table.html') as table:
table_lines = table.readlines()
title = "Contributor by Package Activity"
fout.write(get_tag(title,"###"))
fout.write("\n")
fout.write("".join(table_lines))
In [ ]:
In [ ]: