This notebook generates the summary statistics for use in the 6-month releases of PySAL, which is now (2017-07) a meta package.
It assumes the subpackages have been git cloned in a directory below the location of this notebook. It also requires network connectivity for some of the reporting.
Run this notebook after gitcount.ipynb
In [1]:
from __future__ import print_function
import os
import json
import re
import sys
import pandas
import subprocess
from subprocess import check_output
import yaml
from datetime import datetime, timedelta
from dateutil.parser import parse
import pytz
utc=pytz.UTC
In [2]:
from datetime import datetime, timedelta
from time import sleep
from subprocess import check_output
try:
from urllib import urlopen
except:
from urllib.request import urlopen
import ssl
import yaml
context = ssl._create_unverified_context()
In [3]:
with open('packages.yml') as package_file:
packages = yaml.load(package_file)
In [4]:
CWD = os.path.abspath(os.path.curdir)
In [5]:
CWD
Out[5]:
Our last main release was 2017-11-03
:
In [6]:
start_date = '2017-11-03'
since_date = '--since="{start}"'.format(start=start_date)
since_date
since = datetime.strptime(start_date+" 0:0:0", "%Y-%m-%d %H:%M:%S")
since
Out[6]:
In [7]:
import pickle
In [8]:
issue_details = pickle.load( open( "issue_details.p", "rb" ) )
pull_details = pickle.load( open( "pull_details.p", "rb" ) )
In [9]:
# get dates of tags
with open('subtags', 'r') as tag_name:
tags = tag_name.readlines()
In [10]:
tag_dates = {}
#root = '/home/serge/Dropbox/p/pysal/src/pysal/tmp/'
root = CWD + "/tmp/"
for record in tags:
pkg, tag = record.strip().split()
tag = tag.split('/')[-1]
pkdir = root+pkg
cmd = "git log -1 --format=%ai {tag}".format(tag=tag)
os.chdir(pkdir)
#print(cmd)
result = subprocess.run(cmd, check=True, shell=True, stdout=subprocess.PIPE)
tag_string = result.stdout.decode('utf-8')
tag_date = tag_string.split()[0]
tag_dates[pkg] = tag_date
print(pkg, tag, tag_date)
os.chdir(CWD)
In [11]:
# get issues for a package and filter on tag date
for pkg in tag_dates.keys():
issues = issue_details[pkg]
tag_date = utc.localize(parse(tag_dates[pkg]))
keep = []
for issue in issues:
closed = parse(issue['closed_at'])
if closed <= tag_date:
keep.append(issue)
print(pkg, len(issues), len(keep))
issue_details[pkg] = keep
keep = []
pulls = pull_details[pkg]
for pull in pulls:
closed = parse(pull['closed_at'])
if closed <= tag_date:
keep.append(pull)
print(pkg, len(pulls), len(keep))
pull_details[pkg] = keep
In [12]:
# commits
cmd = ['git', 'log', '--oneline', since_date]
activity = {}
total_commits = 0
for package in packages:
subpackages = packages[package].split()
for subpackage in subpackages:
tag_date = tag_dates[subpackage]
os.chdir(CWD)
os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]
ncommits = len(check_output(cmd_until).splitlines())
ncommits_total = len(check_output(cmd).splitlines())
print(subpackage, ncommits_total, ncommits)
total_commits += ncommits
activity[subpackage] = ncommits
In [13]:
cmd_until
Out[13]:
In [14]:
identities = {'Levi John Wolf': ('ljwolf', 'Levi John Wolf'),
'Serge Rey': ('Serge Rey', 'Sergio Rey', 'sjsrey', 'serge'),
'Wei Kang': ('Wei Kang', 'weikang9009'),
'Dani Arribas-Bel': ('Dani Arribas-Bel', 'darribas')
}
def regularize_identity(string):
string = string.decode()
for name, aliases in identities.items():
for alias in aliases:
if alias in string:
string = string.replace(alias, name)
if len(string.split(' '))>1:
string = string.title()
return string.lstrip('* ')
In [15]:
author_cmd = ['git', 'log', '--format=* %aN', since_date]
In [16]:
author_cmd.append('blank')
In [17]:
author_cmd
Out[17]:
In [18]:
from collections import Counter
In [19]:
authors_global = set()
authors = {}
global_counter = Counter()
counters = dict()
cmd = ['git', 'log', '--oneline', since_date]
total_commits = 0
activity = {}
for package in packages:
subpackages = packages[package].split()
for subpackage in subpackages:
os.chdir(CWD)
os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
ncommits = len(check_output(cmd).splitlines())
tag_date = tag_dates[subpackage]
author_cmd[-1] = '--until="{tag_date}"'.format(tag_date=tag_date)
#cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]
all_authors = check_output(author_cmd).splitlines()
counter = Counter([regularize_identity(author) for author in all_authors])
global_counter += counter
counters.update({'.'.join((package,subpackage)): counter})
unique_authors = sorted(set(all_authors))
authors[subpackage] = unique_authors
authors_global.update(unique_authors)
total_commits += ncommits
activity[subpackage] = ncommits
In [20]:
authors_global
Out[20]:
In [21]:
activity
Out[21]:
In [22]:
counters
Out[22]:
In [23]:
counters
Out[23]:
In [24]:
issues[0]
Out[24]:
In [78]:
def get_tag(title, level="##", as_string=True):
words = title.split()
tag = "-".join([word.lower() for word in words])
heading = level+" "+title
line = "\n\n<a name=\"{}\"></a>".format(tag)
lines = [line]
lines.append(heading)
if as_string:
return "\n".join(lines)
else:
return lines
In [79]:
subs = issue_details.keys()
table = []
txt = []
lines = get_tag("Changes by Package", as_string=False)
for sub in subs:
total= issue_details[sub]
pr = pull_details[sub]
row = [sub, activity[sub], len(total), len(pr)]
table.append(row)
#line = "\n<a name=\"{sub}\"></a>".format(sub=sub)
#lines.append(line)
#line = "### {sub}".format(sub=sub)
#lines.append(line)
lines.extend(get_tag(sub.lower(), "###", as_string=False))
for issue in total:
url = issue['html_url']
title = issue['title']
number = issue['number']
line = "* {title} [(#{number})]({url})".format(title=title,
number=number,
url=url)
lines.append(line)
In [80]:
table
Out[80]:
In [81]:
os.chdir(CWD)
import pandas
In [82]:
df = pandas.DataFrame(table, columns=['package', 'commits', 'total issues', 'pulls'])
In [83]:
df.sort_values(['commits','pulls'], ascending=False)\
.to_html('./commit_table.html', index=None)
In [84]:
df.sum()
Out[84]:
In [85]:
contributor_table = pandas.DataFrame.from_dict(counters).fillna(0).astype(int).T
In [86]:
contributor_table.to_html('./contributor_table.html')
In [87]:
totals = contributor_table.sum(axis=0).T
totals.sort_index().to_frame('commits')
Out[87]:
In [88]:
totals = contributor_table.sum(axis=0).T
totals.sort_index().to_frame('commits').to_html('./commits_by_person.html')
In [89]:
n_commits = df.commits.sum()
n_issues = df['total issues'].sum()
n_pulls = df.pulls.sum()
In [90]:
n_commits
Out[90]:
In [91]:
#Overall, there were 719 commits that closed 240 issues, together with 105 pull requests across 12 packages since our last release on 2017-11-03.
#('{0} Here is a really long '
# 'sentence with {1}').format(3, 5))
line = ('Overall, there were {n_commits} commits that closed {n_issues} issues,'
' together with {n_pulls} pull requests since our last release'
' on {since_date}.\n'.format(n_commits=n_commits, n_issues=n_issues,
n_pulls=n_pulls, since_date = start_date))
In [92]:
line
Out[92]:
In [93]:
with open('changes.md', 'w') as fout:
fout.write(line)
fout.write("\n".join(lines))
fout.write(get_tag("Summary Statistics"))
with open('commit_table.html') as table:
table_lines = table.readlines()
title = "Package Activity"
fout.write(get_tag(title,"###"))
fout.write("\n")
fout.write("".join(table_lines))
with open('commits_by_person.html') as table:
table_lines = table.readlines()
title = "Contributor Activity"
fout.write(get_tag(title,"###"))
fout.write("\n")
fout.write("".join(table_lines))
with open('contributor_table.html') as table:
table_lines = table.readlines()
title = "Contributor by Package Activity"
fout.write(get_tag(title,"###"))
fout.write("\n")
fout.write("".join(table_lines))