PySAL Change Log Statistics: Table Generation

This notebook generates the summary statistics for use in the 6-month releases of PySAL, which is now (2017-07) a meta package.

It assumes the subpackages have been git cloned in a directory below the location of this notebook. It also requires network connectivity for some of the reporting.

Run this notebook after gitcount.ipynb


In [1]:
from __future__ import print_function
import os
import json
import re
import sys
import pandas
import subprocess
from subprocess import check_output

import yaml
from datetime import datetime, timedelta

from dateutil.parser import parse
import pytz

utc=pytz.UTC

In [2]:
from datetime import datetime, timedelta
from time import sleep
from subprocess import check_output
try:
    from urllib import urlopen
except:
    from urllib.request import urlopen

import ssl
import yaml

context = ssl._create_unverified_context()

In [3]:
with open('packages.yml') as package_file:
    packages = yaml.load(package_file)

In [4]:
CWD = os.path.abspath(os.path.curdir)

In [5]:
CWD


Out[5]:
'/Users/serge/Dropbox/p/pysal/src/pysal'

Our last main release was 2017-11-03:


In [6]:
start_date = '2017-11-03'
since_date = '--since="{start}"'.format(start=start_date)
since_date
since = datetime.strptime(start_date+" 0:0:0", "%Y-%m-%d %H:%M:%S")
since


Out[6]:
datetime.datetime(2017, 11, 3, 0, 0)

In [7]:
import pickle

In [8]:
issue_details = pickle.load( open( "issue_details.p", "rb" ) )
pull_details = pickle.load( open( "pull_details.p", "rb" ) )

In [9]:
# get dates of tags
with open('subtags', 'r') as tag_name:
        tags = tag_name.readlines()

In [10]:
tag_dates = {}
#root = '/home/serge/Dropbox/p/pysal/src/pysal/tmp/'
root = CWD + "/tmp/"
for record in tags:
    pkg, tag = record.strip().split()
    tag = tag.split('/')[-1]
    pkdir = root+pkg
    cmd = "git log -1 --format=%ai {tag}".format(tag=tag)
    os.chdir(pkdir)
    #print(cmd)
    result = subprocess.run(cmd, check=True, shell=True, stdout=subprocess.PIPE)
    tag_string = result.stdout.decode('utf-8')
    tag_date = tag_string.split()[0]
    tag_dates[pkg] = tag_date
    print(pkg, tag, tag_date)

os.chdir(CWD)


libpysal v4.0.1 2018-10-27
esda v2.0.1 2018-11-04
giddy v2.0.0 2018-08-26
inequality v1.0.0 2018-10-31
pointpats v2.0.0 2018-11-13
spaghetti v1.1.0 2018-10-31
mapclassify v2.0.1 2018-10-28
spreg v1.0.4 2018-08-24
spglm v1.0.6 2018-10-31
spint v1.0.5 2019-01-04
splot v1.0.0 2018-11-30
mgwr v2.0.2 2019-01-05
spvcm v0.2.1 2019-01-04

In [11]:
# get issues for a package and filter on tag date


for pkg in tag_dates.keys():
    issues = issue_details[pkg]
    tag_date = utc.localize(parse(tag_dates[pkg]))
    keep = []
    for issue in issues:
        closed = parse(issue['closed_at'])
        if closed <= tag_date:
            keep.append(issue)
    print(pkg, len(issues), len(keep))
    issue_details[pkg] = keep
    keep = []
    pulls = pull_details[pkg]
    for pull in pulls:
        closed = parse(pull['closed_at'])
        if closed <= tag_date:
            keep.append(pull)
    print(pkg, len(pulls), len(keep)) 
    pull_details[pkg] = keep


libpysal 83 79
libpysal 61 57
esda 33 29
esda 22 19
giddy 58 38
giddy 47 31
inequality 11 6
inequality 9 5
pointpats 15 10
pointpats 14 10
spaghetti 205 94
spaghetti 101 34
mapclassify 15 9
mapclassify 13 8
spreg 11 8
spreg 7 6
spglm 15 12
spglm 11 9
spint 18 14
spint 14 11
splot 33 27
splot 25 21
mgwr 41 39
mgwr 26 24
spvcm 4 3
spvcm 2 1

In [12]:
# commits
cmd = ['git', 'log', '--oneline', since_date]

activity = {}
total_commits = 0
for package in packages:
    subpackages = packages[package].split()
    for subpackage in subpackages:
        tag_date = tag_dates[subpackage]
        os.chdir(CWD)
        os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
        cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]
        ncommits = len(check_output(cmd_until).splitlines())
        ncommits_total = len(check_output(cmd).splitlines())
        print(subpackage, ncommits_total, ncommits)
        total_commits += ncommits
        activity[subpackage] = ncommits


libpysal 276 276
esda 80 80
giddy 137 134
inequality 36 32
pointpats 40 40
spaghetti 282 282
mapclassify 48 48
splot 247 242
spreg 57 57
spglm 70 69
spint 63 62
mgwr 245 245
spvcm 55 55

In [13]:
cmd_until


Out[13]:
['git', 'log', '--oneline', '--since="2017-11-03"', '--until="2019-01-04"']

In [14]:
identities = {'Levi John Wolf': ('ljwolf', 'Levi John Wolf'),
              'Serge Rey': ('Serge Rey', 'Sergio Rey', 'sjsrey', 'serge'),
              'Wei Kang': ('Wei Kang', 'weikang9009'),
              'Dani Arribas-Bel': ('Dani Arribas-Bel', 'darribas')
}

def regularize_identity(string):
    string = string.decode()
    for name, aliases in identities.items():
        for alias in aliases:
            if alias in string:
                string = string.replace(alias, name)
    if len(string.split(' '))>1:
        string = string.title()
    return string.lstrip('* ')

In [15]:
author_cmd = ['git', 'log', '--format=* %aN', since_date]

In [16]:
author_cmd.append('blank')

In [17]:
author_cmd


Out[17]:
['git', 'log', '--format=* %aN', '--since="2017-11-03"', 'blank']

In [18]:
from collections import Counter

In [19]:
authors_global = set()
authors = {}
global_counter = Counter()
counters = dict()
cmd = ['git', 'log', '--oneline', since_date]
total_commits = 0
activity = {}
for package in packages:
    subpackages = packages[package].split()
    for subpackage in subpackages:
        os.chdir(CWD)
        os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
        ncommits = len(check_output(cmd).splitlines())
        
        
        tag_date = tag_dates[subpackage]
        author_cmd[-1] = '--until="{tag_date}"'.format(tag_date=tag_date)
        #cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]

        
        all_authors = check_output(author_cmd).splitlines()
        counter = Counter([regularize_identity(author) for author in all_authors])
        global_counter += counter
        counters.update({'.'.join((package,subpackage)): counter})
        unique_authors = sorted(set(all_authors))
        authors[subpackage] =  unique_authors
        authors_global.update(unique_authors)
        total_commits += ncommits
        activity[subpackage] = ncommits

In [20]:
authors_global


Out[20]:
{b'* Dani Arribas-Bel',
 b'* Hu Shao',
 b'* James Gaboardi',
 b'* Levi John Wolf',
 b'* Philip Kahn',
 b'* Serge Rey',
 b'* Sergio Rey',
 b'* Stefanie Lumnitz',
 b'* Taylor Oshan',
 b'* Wei Kang',
 b'* Ziqi Li',
 b'* eli knaap',
 b'* jsignell',
 b'* ljwolf',
 b'* serge',
 b'* thequackdaddy',
 b'* weikang9009'}

In [21]:
activity


Out[21]:
{'libpysal': 276,
 'esda': 80,
 'giddy': 137,
 'inequality': 36,
 'pointpats': 40,
 'spaghetti': 282,
 'mapclassify': 48,
 'splot': 247,
 'spreg': 57,
 'spglm': 70,
 'spint': 63,
 'mgwr': 245,
 'spvcm': 55}

In [22]:
counters


Out[22]:
{'lib.libpysal': Counter({'Serge Rey': 101,
          'Levi John Wolf': 112,
          'Wei Kang': 18,
          'James Gaboardi': 8,
          'Eli Knaap': 12,
          'Dani Arribas-Bel': 19,
          'Taylor Oshan': 4,
          'Stefanie Lumnitz': 2}),
 'explore.esda': Counter({'Serge Rey': 50,
          'James Gaboardi': 1,
          'Wei Kang': 8,
          'Levi John Wolf': 11,
          'Stefanie Lumnitz': 10}),
 'explore.giddy': Counter({'Wei Kang': 103,
          'Stefanie Lumnitz': 10,
          'Serge Rey': 18,
          'Levi John Wolf': 1,
          'Eli Knaap': 2}),
 'explore.inequality': Counter({'Wei Kang': 11,
          'Serge Rey': 20,
          'Levi John Wolf': 1}),
 'explore.pointpats': Counter({'Wei Kang': 26,
          'Hu Shao': 5,
          'Levi John Wolf': 7,
          'Serge Rey': 2}),
 'explore.spaghetti': Counter({'James Gaboardi': 278,
          'Levi John Wolf': 3,
          'Wei Kang': 1}),
 'viz.mapclassify': Counter({'Wei Kang': 23,
          'Serge Rey': 22,
          'Levi John Wolf': 3}),
 'viz.splot': Counter({'Dani Arribas-Bel': 6,
          'Stefanie Lumnitz': 218,
          'Levi John Wolf': 9,
          'Serge Rey': 7,
          'Thequackdaddy': 1,
          'Jsignell': 1}),
 'model.spreg': Counter({'Levi John Wolf': 40, 'Serge Rey': 17}),
 'model.spglm': Counter({'Taylor Oshan': 60,
          'Wei Kang': 1,
          'James Gaboardi': 7,
          'Levi John Wolf': 1}),
 'model.spint': Counter({'Wei Kang': 4,
          'Levi John Wolf': 9,
          'Taylor Oshan': 42,
          'James Gaboardi': 7}),
 'model.mgwr': Counter({'Levi John Wolf': 13,
          'James Gaboardi': 12,
          'Wei Kang': 30,
          'Taylor Oshan': 137,
          'Philip Kahn': 2,
          'Ziqi Li': 51}),
 'model.spvcm': Counter({'Levi John Wolf': 52,
          'James Gaboardi': 1,
          'Serge Rey': 2})}

In [23]:
counters


Out[23]:
{'lib.libpysal': Counter({'Serge Rey': 101,
          'Levi John Wolf': 112,
          'Wei Kang': 18,
          'James Gaboardi': 8,
          'Eli Knaap': 12,
          'Dani Arribas-Bel': 19,
          'Taylor Oshan': 4,
          'Stefanie Lumnitz': 2}),
 'explore.esda': Counter({'Serge Rey': 50,
          'James Gaboardi': 1,
          'Wei Kang': 8,
          'Levi John Wolf': 11,
          'Stefanie Lumnitz': 10}),
 'explore.giddy': Counter({'Wei Kang': 103,
          'Stefanie Lumnitz': 10,
          'Serge Rey': 18,
          'Levi John Wolf': 1,
          'Eli Knaap': 2}),
 'explore.inequality': Counter({'Wei Kang': 11,
          'Serge Rey': 20,
          'Levi John Wolf': 1}),
 'explore.pointpats': Counter({'Wei Kang': 26,
          'Hu Shao': 5,
          'Levi John Wolf': 7,
          'Serge Rey': 2}),
 'explore.spaghetti': Counter({'James Gaboardi': 278,
          'Levi John Wolf': 3,
          'Wei Kang': 1}),
 'viz.mapclassify': Counter({'Wei Kang': 23,
          'Serge Rey': 22,
          'Levi John Wolf': 3}),
 'viz.splot': Counter({'Dani Arribas-Bel': 6,
          'Stefanie Lumnitz': 218,
          'Levi John Wolf': 9,
          'Serge Rey': 7,
          'Thequackdaddy': 1,
          'Jsignell': 1}),
 'model.spreg': Counter({'Levi John Wolf': 40, 'Serge Rey': 17}),
 'model.spglm': Counter({'Taylor Oshan': 60,
          'Wei Kang': 1,
          'James Gaboardi': 7,
          'Levi John Wolf': 1}),
 'model.spint': Counter({'Wei Kang': 4,
          'Levi John Wolf': 9,
          'Taylor Oshan': 42,
          'James Gaboardi': 7}),
 'model.mgwr': Counter({'Levi John Wolf': 13,
          'James Gaboardi': 12,
          'Wei Kang': 30,
          'Taylor Oshan': 137,
          'Philip Kahn': 2,
          'Ziqi Li': 51}),
 'model.spvcm': Counter({'Levi John Wolf': 52,
          'James Gaboardi': 1,
          'Serge Rey': 2})}

In [24]:
issues[0]


Out[24]:
{'url': 'https://api.github.com/repos/pysal/spvcm/issues/6',
 'repository_url': 'https://api.github.com/repos/pysal/spvcm',
 'labels_url': 'https://api.github.com/repos/pysal/spvcm/issues/6/labels{/name}',
 'comments_url': 'https://api.github.com/repos/pysal/spvcm/issues/6/comments',
 'events_url': 'https://api.github.com/repos/pysal/spvcm/issues/6/events',
 'html_url': 'https://github.com/pysal/spvcm/pull/6',
 'id': 381923508,
 'node_id': 'MDExOlB1bGxSZXF1ZXN0MjMxNzY1NDM4',
 'number': 6,
 'title': 'enh: swap libpysal in for pysal',
 'user': {'login': 'sjsrey',
  'id': 118042,
  'node_id': 'MDQ6VXNlcjExODA0Mg==',
  'avatar_url': 'https://avatars1.githubusercontent.com/u/118042?v=4',
  'gravatar_id': '',
  'url': 'https://api.github.com/users/sjsrey',
  'html_url': 'https://github.com/sjsrey',
  'followers_url': 'https://api.github.com/users/sjsrey/followers',
  'following_url': 'https://api.github.com/users/sjsrey/following{/other_user}',
  'gists_url': 'https://api.github.com/users/sjsrey/gists{/gist_id}',
  'starred_url': 'https://api.github.com/users/sjsrey/starred{/owner}{/repo}',
  'subscriptions_url': 'https://api.github.com/users/sjsrey/subscriptions',
  'organizations_url': 'https://api.github.com/users/sjsrey/orgs',
  'repos_url': 'https://api.github.com/users/sjsrey/repos',
  'events_url': 'https://api.github.com/users/sjsrey/events{/privacy}',
  'received_events_url': 'https://api.github.com/users/sjsrey/received_events',
  'type': 'User',
  'site_admin': False},
 'labels': [],
 'state': 'closed',
 'locked': False,
 'assignee': None,
 'assignees': [],
 'milestone': None,
 'comments': 13,
 'created_at': '2018-11-18T03:08:27Z',
 'updated_at': '2019-01-04T19:22:46Z',
 'closed_at': '2019-01-04T19:22:46Z',
 'author_association': 'MEMBER',
 'pull_request': {'url': 'https://api.github.com/repos/pysal/spvcm/pulls/6',
  'html_url': 'https://github.com/pysal/spvcm/pull/6',
  'diff_url': 'https://github.com/pysal/spvcm/pull/6.diff',
  'patch_url': 'https://github.com/pysal/spvcm/pull/6.patch'},
 'body': ''}

In [78]:
def get_tag(title, level="##", as_string=True):
    words = title.split()
    tag = "-".join([word.lower() for word in words])
    heading = level+" "+title
    line = "\n\n<a name=\"{}\"></a>".format(tag)
    lines = [line]
    lines.append(heading)
    if as_string:
        return "\n".join(lines)
    else:
        return lines

In [79]:
subs = issue_details.keys()
table = []
txt = []
lines = get_tag("Changes by Package", as_string=False)

for sub in subs:
    total= issue_details[sub]
    pr = pull_details[sub]
    
    row = [sub, activity[sub], len(total), len(pr)]
    table.append(row)
    #line = "\n<a name=\"{sub}\"></a>".format(sub=sub)
    #lines.append(line)
    #line = "### {sub}".format(sub=sub)
    #lines.append(line)
    lines.extend(get_tag(sub.lower(), "###", as_string=False))
    for issue in total:
        url = issue['html_url']
        title = issue['title']
        number = issue['number']
        line = "* {title} [(#{number})]({url})".format(title=title,
                                                     number=number,
                                                     url=url)
        lines.append(line)

In [80]:
table


Out[80]:
[['libpysal', 276, 79, 57],
 ['esda', 80, 29, 19],
 ['giddy', 137, 38, 31],
 ['inequality', 36, 6, 5],
 ['pointpats', 40, 10, 10],
 ['spaghetti', 282, 94, 34],
 ['mapclassify', 48, 9, 8],
 ['splot', 247, 27, 21],
 ['spreg', 57, 8, 6],
 ['spglm', 70, 12, 9],
 ['spint', 63, 14, 11],
 ['mgwr', 245, 39, 24],
 ['spvcm', 55, 3, 1]]

In [81]:
os.chdir(CWD)

import pandas

In [82]:
df = pandas.DataFrame(table, columns=['package', 'commits', 'total issues', 'pulls'])

In [83]:
df.sort_values(['commits','pulls'], ascending=False)\
  .to_html('./commit_table.html', index=None)

In [84]:
df.sum()


Out[84]:
package         libpysalesdagiddyinequalitypointpatsspaghettim...
commits                                                      1636
total issues                                                  368
pulls                                                         236
dtype: object

In [85]:
contributor_table = pandas.DataFrame.from_dict(counters).fillna(0).astype(int).T

In [86]:
contributor_table.to_html('./contributor_table.html')

In [87]:
totals = contributor_table.sum(axis=0).T
totals.sort_index().to_frame('commits')


Out[87]:
commits
Dani Arribas-Bel 25
Eli Knaap 14
Hu Shao 5
James Gaboardi 314
Jsignell 1
Levi John Wolf 262
Philip Kahn 2
Serge Rey 239
Stefanie Lumnitz 240
Taylor Oshan 243
Thequackdaddy 1
Wei Kang 225
Ziqi Li 51

In [88]:
totals = contributor_table.sum(axis=0).T
totals.sort_index().to_frame('commits').to_html('./commits_by_person.html')

In [89]:
n_commits = df.commits.sum()
n_issues = df['total issues'].sum()
n_pulls = df.pulls.sum()

In [90]:
n_commits


Out[90]:
1636

In [91]:
#Overall, there were 719 commits that closed 240 issues, together with 105 pull requests across 12 packages since our last release on 2017-11-03.
#('{0} Here is a really long '
#           'sentence with {1}').format(3, 5))
line = ('Overall, there were {n_commits} commits that closed {n_issues} issues,'  
    ' together with {n_pulls} pull requests since our last release' 
        ' on {since_date}.\n'.format(n_commits=n_commits, n_issues=n_issues,
        n_pulls=n_pulls, since_date = start_date))

In [92]:
line


Out[92]:
'Overall, there were 1636 commits that closed 368 issues, together with 236 pull requests since our last release on 2017-11-03.\n'

append html files to end of changes.md with tags for toc


In [93]:
with open('changes.md', 'w') as fout:
    fout.write(line)
    fout.write("\n".join(lines))
    fout.write(get_tag("Summary Statistics"))
    
    with open('commit_table.html') as table:
        table_lines = table.readlines()
        title = "Package Activity"
        fout.write(get_tag(title,"###"))
        fout.write("\n")
        fout.write("".join(table_lines))
                
    with open('commits_by_person.html') as table:
        table_lines = table.readlines()
        title = "Contributor Activity"
        fout.write(get_tag(title,"###"))
        fout.write("\n")
        fout.write("".join(table_lines))
        
    with open('contributor_table.html') as table:
        table_lines = table.readlines()
        title = "Contributor by Package Activity"
        fout.write(get_tag(title,"###"))
        fout.write("\n")
        fout.write("".join(table_lines))