This notebook pulls the summary statistics for use in the 6-month releases of PySAL, which is now (2017-07) a meta package.
It assumes the subpackages have been git cloned in a directory below the location of this notebook. It also requires network connectivity for some of the reporting.
In [1]:
from __future__ import print_function
import os
import json
import re
import sys
import pandas
from datetime import datetime, timedelta
from time import sleep
from subprocess import check_output
try:
from urllib import urlopen
except:
from urllib.request import urlopen
import ssl
import yaml
context = ssl._create_unverified_context()
In [2]:
with open('packages.yml') as package_file:
packages = yaml.load(package_file)
In [3]:
CWD = os.path.abspath(os.path.curdir)
Our last main release was 2017-11-03
:
In [4]:
start_date = '2017-11-03'
since_date = '--since="{start}"'.format(start=start_date)
since_date
since = datetime.strptime(start_date+" 0:0:0", "%Y-%m-%d %H:%M:%S")
since
Out[4]:
In [ ]:
In [5]:
from datetime import datetime, timedelta
ISO8601 = "%Y-%m-%dT%H:%M:%SZ"
PER_PAGE = 100
element_pat = re.compile(r'<(.+?)>')
rel_pat = re.compile(r'rel=[\'"](\w+)[\'"]')
In [6]:
def parse_link_header(headers):
link_s = headers.get('link', '')
urls = element_pat.findall(link_s)
rels = rel_pat.findall(link_s)
d = {}
for rel,url in zip(rels, urls):
d[rel] = url
return d
def get_paged_request(url):
"""get a full list, handling APIv3's paging"""
results = []
while url:
#print("fetching %s" % url, file=sys.stderr)
f = urlopen(url)
results.extend(json.load(f))
links = parse_link_header(f.headers)
url = links.get('next')
return results
def get_issues(project="pysal/pysal", state="closed", pulls=False):
"""Get a list of the issues from the Github API."""
which = 'pulls' if pulls else 'issues'
url = "https://api.github.com/repos/%s/%s?state=%s&per_page=%i" % (project, which, state, PER_PAGE)
return get_paged_request(url)
def _parse_datetime(s):
"""Parse dates in the format returned by the Github API."""
if s:
return datetime.strptime(s, ISO8601)
else:
return datetime.fromtimestamp(0)
def issues2dict(issues):
"""Convert a list of issues to a dict, keyed by issue number."""
idict = {}
for i in issues:
idict[i['number']] = i
return idict
def is_pull_request(issue):
"""Return True if the given issue is a pull request."""
return 'pull_request_url' in issue
def issues_closed_since(period=timedelta(days=365), project="pysal/pysal", pulls=False):
"""Get all issues closed since a particular point in time. period
can either be a datetime object, or a timedelta object. In the
latter case, it is used as a time before the present."""
which = 'pulls' if pulls else 'issues'
if isinstance(period, timedelta):
period = datetime.now() - period
url = "https://api.github.com/repos/%s/%s?state=closed&sort=updated&since=%s&per_page=%i" % (project, which, period.strftime(ISO8601), PER_PAGE)
allclosed = get_paged_request(url)
# allclosed = get_issues(project=project, state='closed', pulls=pulls, since=period)
filtered = [i for i in allclosed if _parse_datetime(i['closed_at']) > period]
# exclude rejected PRs
if pulls:
filtered = [ pr for pr in filtered if pr['merged_at'] ]
return filtered
def sorted_by_field(issues, field='closed_at', reverse=False):
"""Return a list of issues sorted by closing date date."""
return sorted(issues, key = lambda i:i[field], reverse=reverse)
def report(issues, show_urls=False):
"""Summary report about a list of issues, printing number and title.
"""
# titles may have unicode in them, so we must encode everything below
if show_urls:
for i in issues:
role = 'ghpull' if 'merged_at' in i else 'ghissue'
print('* :%s:`%d`: %s' % (role, i['number'],
i['title'].encode('utf-8')))
else:
for i in issues:
print('* %d: %s' % (i['number'], i['title'].encode('utf-8')))
In [7]:
all_issues = {}
all_pulls = {}
total_commits = 0
issue_details = {}
pull_details = {}
for package in packages:
subpackages = packages[package].split()
for subpackage in subpackages:
prj = 'pysal/{subpackage}'.format(subpackage=subpackage)
os.chdir(CWD)
os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
#sub_issues = issues_closed_since(project=prj, period=since)
#sleep(5)
issues = issues_closed_since(since, project=prj,pulls=False)
pulls = issues_closed_since(since, project=prj,pulls=True)
issues = sorted_by_field(issues, reverse=True)
pulls = sorted_by_field(pulls, reverse=True)
issue_details[subpackage] = issues
pull_details[subpackage] = pulls
n_issues, n_pulls = map(len, (issues, pulls))
n_total = n_issues + n_pulls
all_issues[subpackage] = n_total, n_pulls
os.chdir(CWD)
The issues are pulled since the last release date of the meta package. However, each package that is going into the meta release, has a specific release tag that pins the code making it into the release. We don't want to report the commits post the packages tag date so we have to do some filtering here before building our change log statistics for the meta package.
For now let's pickle the issues and pull records to filter later and not have to rehit github api
In [8]:
import pickle
pickle.dump( issue_details, open( "issue_details.p", "wb" ) )
pickle.dump( pull_details, open("pull_details.p", "wb"))
In [ ]: