PySAL Change Log Statistics: Table Generation

This notebook generates the summary statistics for use in the 6-month releases of PySAL, which is now (2017-07) a meta package.

It assumes the subpackages have been git cloned in a directory below the location of this notebook. It also requires network connectivity for some of the reporting.

Run this notebook after gitcount.ipynb



In [1]:

    
from __future__ import print_function
import os
import json
import re
import sys
import pandas
import subprocess
from subprocess import check_output

import yaml
from datetime import datetime, timedelta

from dateutil.parser import parse
import pytz

utc=pytz.UTC



In [2]:

    
from datetime import datetime, timedelta
from time import sleep
from subprocess import check_output
try:
    from urllib import urlopen
except:
    from urllib.request import urlopen

import ssl
import yaml

context = ssl._create_unverified_context()



In [3]:

    
with open('packages.yml') as package_file:
    packages = yaml.load(package_file)



In [4]:

    
CWD = os.path.abspath(os.path.curdir)



In [5]:

    
CWD









    Out[5]:





'/Users/serge/Dropbox/p/pysal/src/pysal'

Our last main release was 2017-11-03:



In [6]:

    
start_date = '2017-11-03'
since_date = '--since="{start}"'.format(start=start_date)
since_date
since = datetime.strptime(start_date+" 0:0:0", "%Y-%m-%d %H:%M:%S")
since









    Out[6]:





datetime.datetime(2017, 11, 3, 0, 0)



In [7]:

    
import pickle



In [8]:

    
issue_details = pickle.load( open( "issue_details.p", "rb" ) )
pull_details = pickle.load( open( "pull_details.p", "rb" ) )



In [9]:

    
# get dates of tags
with open('subtags', 'r') as tag_name:
        tags = tag_name.readlines()



In [10]:

    
tag_dates = {}
#root = '/home/serge/Dropbox/p/pysal/src/pysal/tmp/'
root = CWD + "/tmp/"
for record in tags:
    pkg, tag = record.strip().split()
    tag = tag.split('/')[-1]
    pkdir = root+pkg
    cmd = "git log -1 --format=%ai {tag}".format(tag=tag)
    os.chdir(pkdir)
    #print(cmd)
    result = subprocess.run(cmd, check=True, shell=True, stdout=subprocess.PIPE)
    tag_string = result.stdout.decode('utf-8')
    tag_date = tag_string.split()[0]
    tag_dates[pkg] = tag_date
    print(pkg, tag, tag_date)

os.chdir(CWD)









    



libpysal v4.0.1 2018-10-27
esda v2.0.1 2018-11-04
giddy v2.0.0 2018-08-26
inequality v1.0.0 2018-10-31
pointpats v2.0.0 2018-11-13
spaghetti v1.1.0 2018-10-31
mapclassify v2.0.1 2018-10-28
spreg v1.0.4 2018-08-24
spglm v1.0.6 2018-10-31
spint v1.0.5 2019-01-04
splot v1.0.0 2018-11-30
mgwr v2.0.2 2019-01-05
spvcm v0.2.1 2019-01-04



In [11]:

    
# get issues for a package and filter on tag date


for pkg in tag_dates.keys():
    issues = issue_details[pkg]
    tag_date = utc.localize(parse(tag_dates[pkg]))
    keep = []
    for issue in issues:
        closed = parse(issue['closed_at'])
        if closed <= tag_date:
            keep.append(issue)
    print(pkg, len(issues), len(keep))
    issue_details[pkg] = keep
    keep = []
    pulls = pull_details[pkg]
    for pull in pulls:
        closed = parse(pull['closed_at'])
        if closed <= tag_date:
            keep.append(pull)
    print(pkg, len(pulls), len(keep)) 
    pull_details[pkg] = keep









    



libpysal 83 79
libpysal 61 57
esda 33 29
esda 22 19
giddy 58 38
giddy 47 31
inequality 11 6
inequality 9 5
pointpats 15 10
pointpats 14 10
spaghetti 205 94
spaghetti 101 34
mapclassify 15 9
mapclassify 13 8
spreg 11 8
spreg 7 6
spglm 15 12
spglm 11 9
spint 18 14
spint 14 11
splot 33 27
splot 25 21
mgwr 41 39
mgwr 26 24
spvcm 4 3
spvcm 2 1



In [12]:

    
# commits
cmd = ['git', 'log', '--oneline', since_date]

activity = {}
total_commits = 0
for package in packages:
    subpackages = packages[package].split()
    for subpackage in subpackages:
        tag_date = tag_dates[subpackage]
        os.chdir(CWD)
        os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
        cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]
        ncommits = len(check_output(cmd_until).splitlines())
        ncommits_total = len(check_output(cmd).splitlines())
        print(subpackage, ncommits_total, ncommits)
        total_commits += ncommits
        activity[subpackage] = ncommits









    



libpysal 276 276
esda 80 80
giddy 137 134
inequality 36 32
pointpats 40 40
spaghetti 282 282
mapclassify 48 48
splot 247 242
spreg 57 57
spglm 70 69
spint 63 62
mgwr 245 245
spvcm 55 55



In [13]:

    
cmd_until









    Out[13]:





['git', 'log', '--oneline', '--since="2017-11-03"', '--until="2019-01-04"']



In [14]:

    
identities = {'Levi John Wolf': ('ljwolf', 'Levi John Wolf'),
              'Serge Rey': ('Serge Rey', 'Sergio Rey', 'sjsrey', 'serge'),
              'Wei Kang': ('Wei Kang', 'weikang9009'),
              'Dani Arribas-Bel': ('Dani Arribas-Bel', 'darribas')
}

def regularize_identity(string):
    string = string.decode()
    for name, aliases in identities.items():
        for alias in aliases:
            if alias in string:
                string = string.replace(alias, name)
    if len(string.split(' '))>1:
        string = string.title()
    return string.lstrip('* ')



In [15]:

    
author_cmd = ['git', 'log', '--format=* %aN', since_date]



In [16]:

    
author_cmd.append('blank')



In [17]:

    
author_cmd









    Out[17]:





['git', 'log', '--format=* %aN', '--since="2017-11-03"', 'blank']



In [18]:

    
from collections import Counter



In [19]:

    
authors_global = set()
authors = {}
global_counter = Counter()
counters = dict()
cmd = ['git', 'log', '--oneline', since_date]
total_commits = 0
activity = {}
for package in packages:
    subpackages = packages[package].split()
    for subpackage in subpackages:
        os.chdir(CWD)
        os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
        ncommits = len(check_output(cmd).splitlines())
        
        
        tag_date = tag_dates[subpackage]
        author_cmd[-1] = '--until="{tag_date}"'.format(tag_date=tag_date)
        #cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]

        
        all_authors = check_output(author_cmd).splitlines()
        counter = Counter([regularize_identity(author) for author in all_authors])
        global_counter += counter
        counters.update({'.'.join((package,subpackage)): counter})
        unique_authors = sorted(set(all_authors))
        authors[subpackage] =  unique_authors
        authors_global.update(unique_authors)
        total_commits += ncommits
        activity[subpackage] = ncommits



In [20]:

    
authors_global









    Out[20]:





{b'* Dani Arribas-Bel',
 b'* Hu Shao',
 b'* James Gaboardi',
 b'* Levi John Wolf',
 b'* Philip Kahn',
 b'* Serge Rey',
 b'* Sergio Rey',
 b'* Stefanie Lumnitz',
 b'* Taylor Oshan',
 b'* Wei Kang',
 b'* Ziqi Li',
 b'* eli knaap',
 b'* jsignell',
 b'* ljwolf',
 b'* serge',
 b'* thequackdaddy',
 b'* weikang9009'}



In [21]:

    
activity









    Out[21]:





{'libpysal': 276,
 'esda': 80,
 'giddy': 137,
 'inequality': 36,
 'pointpats': 40,
 'spaghetti': 282,
 'mapclassify': 48,
 'splot': 247,
 'spreg': 57,
 'spglm': 70,
 'spint': 63,
 'mgwr': 245,
 'spvcm': 55}



In [22]:

    
counters









    Out[22]:





{'lib.libpysal': Counter({'Serge Rey': 101,
          'Levi John Wolf': 112,
          'Wei Kang': 18,
          'James Gaboardi': 8,
          'Eli Knaap': 12,
          'Dani Arribas-Bel': 19,
          'Taylor Oshan': 4,
          'Stefanie Lumnitz': 2}),
 'explore.esda': Counter({'Serge Rey': 50,
          'James Gaboardi': 1,
          'Wei Kang': 8,
          'Levi John Wolf': 11,
          'Stefanie Lumnitz': 10}),
 'explore.giddy': Counter({'Wei Kang': 103,
          'Stefanie Lumnitz': 10,
          'Serge Rey': 18,
          'Levi John Wolf': 1,
          'Eli Knaap': 2}),
 'explore.inequality': Counter({'Wei Kang': 11,
          'Serge Rey': 20,
          'Levi John Wolf': 1}),
 'explore.pointpats': Counter({'Wei Kang': 26,
          'Hu Shao': 5,
          'Levi John Wolf': 7,
          'Serge Rey': 2}),
 'explore.spaghetti': Counter({'James Gaboardi': 278,
          'Levi John Wolf': 3,
          'Wei Kang': 1}),
 'viz.mapclassify': Counter({'Wei Kang': 23,
          'Serge Rey': 22,
          'Levi John Wolf': 3}),
 'viz.splot': Counter({'Dani Arribas-Bel': 6,
          'Stefanie Lumnitz': 218,
          'Levi John Wolf': 9,
          'Serge Rey': 7,
          'Thequackdaddy': 1,
          'Jsignell': 1}),
 'model.spreg': Counter({'Levi John Wolf': 40, 'Serge Rey': 17}),
 'model.spglm': Counter({'Taylor Oshan': 60,
          'Wei Kang': 1,
          'James Gaboardi': 7,
          'Levi John Wolf': 1}),
 'model.spint': Counter({'Wei Kang': 4,
          'Levi John Wolf': 9,
          'Taylor Oshan': 42,
          'James Gaboardi': 7}),
 'model.mgwr': Counter({'Levi John Wolf': 13,
          'James Gaboardi': 12,
          'Wei Kang': 30,
          'Taylor Oshan': 137,
          'Philip Kahn': 2,
          'Ziqi Li': 51}),
 'model.spvcm': Counter({'Levi John Wolf': 52,
          'James Gaboardi': 1,
          'Serge Rey': 2})}



In [23]:

    
counters









    Out[23]:





{'lib.libpysal': Counter({'Serge Rey': 101,
          'Levi John Wolf': 112,
          'Wei Kang': 18,
          'James Gaboardi': 8,
          'Eli Knaap': 12,
          'Dani Arribas-Bel': 19,
          'Taylor Oshan': 4,
          'Stefanie Lumnitz': 2}),
 'explore.esda': Counter({'Serge Rey': 50,
          'James Gaboardi': 1,
          'Wei Kang': 8,
          'Levi John Wolf': 11,
          'Stefanie Lumnitz': 10}),
 'explore.giddy': Counter({'Wei Kang': 103,
          'Stefanie Lumnitz': 10,
          'Serge Rey': 18,
          'Levi John Wolf': 1,
          'Eli Knaap': 2}),
 'explore.inequality': Counter({'Wei Kang': 11,
          'Serge Rey': 20,
          'Levi John Wolf': 1}),
 'explore.pointpats': Counter({'Wei Kang': 26,
          'Hu Shao': 5,
          'Levi John Wolf': 7,
          'Serge Rey': 2}),
 'explore.spaghetti': Counter({'James Gaboardi': 278,
          'Levi John Wolf': 3,
          'Wei Kang': 1}),
 'viz.mapclassify': Counter({'Wei Kang': 23,
          'Serge Rey': 22,
          'Levi John Wolf': 3}),
 'viz.splot': Counter({'Dani Arribas-Bel': 6,
          'Stefanie Lumnitz': 218,
          'Levi John Wolf': 9,
          'Serge Rey': 7,
          'Thequackdaddy': 1,
          'Jsignell': 1}),
 'model.spreg': Counter({'Levi John Wolf': 40, 'Serge Rey': 17}),
 'model.spglm': Counter({'Taylor Oshan': 60,
          'Wei Kang': 1,
          'James Gaboardi': 7,
          'Levi John Wolf': 1}),
 'model.spint': Counter({'Wei Kang': 4,
          'Levi John Wolf': 9,
          'Taylor Oshan': 42,
          'James Gaboardi': 7}),
 'model.mgwr': Counter({'Levi John Wolf': 13,
          'James Gaboardi': 12,
          'Wei Kang': 30,
          'Taylor Oshan': 137,
          'Philip Kahn': 2,
          'Ziqi Li': 51}),
 'model.spvcm': Counter({'Levi John Wolf': 52,
          'James Gaboardi': 1,
          'Serge Rey': 2})}



In [24]:

    
issues[0]









    Out[24]:





{'url': 'https://api.github.com/repos/pysal/spvcm/issues/6',
 'repository_url': 'https://api.github.com/repos/pysal/spvcm',
 'labels_url': 'https://api.github.com/repos/pysal/spvcm/issues/6/labels{/name}',
 'comments_url': 'https://api.github.com/repos/pysal/spvcm/issues/6/comments',
 'events_url': 'https://api.github.com/repos/pysal/spvcm/issues/6/events',
 'html_url': 'https://github.com/pysal/spvcm/pull/6',
 'id': 381923508,
 'node_id': 'MDExOlB1bGxSZXF1ZXN0MjMxNzY1NDM4',
 'number': 6,
 'title': 'enh: swap libpysal in for pysal',
 'user': {'login': 'sjsrey',
  'id': 118042,
  'node_id': 'MDQ6VXNlcjExODA0Mg==',
  'avatar_url': 'https://avatars1.githubusercontent.com/u/118042?v=4',
  'gravatar_id': '',
  'url': 'https://api.github.com/users/sjsrey',
  'html_url': 'https://github.com/sjsrey',
  'followers_url': 'https://api.github.com/users/sjsrey/followers',
  'following_url': 'https://api.github.com/users/sjsrey/following{/other_user}',
  'gists_url': 'https://api.github.com/users/sjsrey/gists{/gist_id}',
  'starred_url': 'https://api.github.com/users/sjsrey/starred{/owner}{/repo}',
  'subscriptions_url': 'https://api.github.com/users/sjsrey/subscriptions',
  'organizations_url': 'https://api.github.com/users/sjsrey/orgs',
  'repos_url': 'https://api.github.com/users/sjsrey/repos',
  'events_url': 'https://api.github.com/users/sjsrey/events{/privacy}',
  'received_events_url': 'https://api.github.com/users/sjsrey/received_events',
  'type': 'User',
  'site_admin': False},
 'labels': [],
 'state': 'closed',
 'locked': False,
 'assignee': None,
 'assignees': [],
 'milestone': None,
 'comments': 13,
 'created_at': '2018-11-18T03:08:27Z',
 'updated_at': '2019-01-04T19:22:46Z',
 'closed_at': '2019-01-04T19:22:46Z',
 'author_association': 'MEMBER',
 'pull_request': {'url': 'https://api.github.com/repos/pysal/spvcm/pulls/6',
  'html_url': 'https://github.com/pysal/spvcm/pull/6',
  'diff_url': 'https://github.com/pysal/spvcm/pull/6.diff',
  'patch_url': 'https://github.com/pysal/spvcm/pull/6.patch'},
 'body': ''}



In [78]:

    
def get_tag(title, level="##", as_string=True):
    words = title.split()
    tag = "-".join([word.lower() for word in words])
    heading = level+" "+title
    line = "\n\n<a name=\"{}\"></a>".format(tag)
    lines = [line]
    lines.append(heading)
    if as_string:
        return "\n".join(lines)
    else:
        return lines



In [79]:

    
subs = issue_details.keys()
table = []
txt = []
lines = get_tag("Changes by Package", as_string=False)

for sub in subs:
    total= issue_details[sub]
    pr = pull_details[sub]
    
    row = [sub, activity[sub], len(total), len(pr)]
    table.append(row)
    #line = "\n<a name=\"{sub}\"></a>".format(sub=sub)
    #lines.append(line)
    #line = "### {sub}".format(sub=sub)
    #lines.append(line)
    lines.extend(get_tag(sub.lower(), "###", as_string=False))
    for issue in total:
        url = issue['html_url']
        title = issue['title']
        number = issue['number']
        line = "* {title} [(#{number})]({url})".format(title=title,
                                                     number=number,
                                                     url=url)
        lines.append(line)



In [80]:

    
table









    Out[80]:





[['libpysal', 276, 79, 57],
 ['esda', 80, 29, 19],
 ['giddy', 137, 38, 31],
 ['inequality', 36, 6, 5],
 ['pointpats', 40, 10, 10],
 ['spaghetti', 282, 94, 34],
 ['mapclassify', 48, 9, 8],
 ['splot', 247, 27, 21],
 ['spreg', 57, 8, 6],
 ['spglm', 70, 12, 9],
 ['spint', 63, 14, 11],
 ['mgwr', 245, 39, 24],
 ['spvcm', 55, 3, 1]]



In [81]:

    
os.chdir(CWD)

import pandas



In [82]:

    
df = pandas.DataFrame(table, columns=['package', 'commits', 'total issues', 'pulls'])



In [83]:

    
df.sort_values(['commits','pulls'], ascending=False)\
  .to_html('./commit_table.html', index=None)



In [84]:

    
df.sum()









    Out[84]:





package         libpysalesdagiddyinequalitypointpatsspaghettim...
commits                                                      1636
total issues                                                  368
pulls                                                         236
dtype: object



In [85]:

    
contributor_table = pandas.DataFrame.from_dict(counters).fillna(0).astype(int).T



In [86]:

    
contributor_table.to_html('./contributor_table.html')



In [87]:

    
totals = contributor_table.sum(axis=0).T
totals.sort_index().to_frame('commits')









    Out[87]:







  
    
      
      commits
    
  
  
    
      Dani Arribas-Bel
      25
    
    
      Eli Knaap
      14
    
    
      Hu Shao
      5
    
    
      James Gaboardi
      314
    
    
      Jsignell
      1
    
    
      Levi John Wolf
      262
    
    
      Philip Kahn
      2
    
    
      Serge Rey
      239
    
    
      Stefanie Lumnitz
      240
    
    
      Taylor Oshan
      243
    
    
      Thequackdaddy
      1
    
    
      Wei Kang
      225
    
    
      Ziqi Li
      51



In [88]:

    
totals = contributor_table.sum(axis=0).T
totals.sort_index().to_frame('commits').to_html('./commits_by_person.html')



In [89]:

    
n_commits = df.commits.sum()
n_issues = df['total issues'].sum()
n_pulls = df.pulls.sum()



In [90]:

    
n_commits









    Out[90]:





1636



In [91]:

    
#Overall, there were 719 commits that closed 240 issues, together with 105 pull requests across 12 packages since our last release on 2017-11-03.
#('{0} Here is a really long '
#           'sentence with {1}').format(3, 5))
line = ('Overall, there were {n_commits} commits that closed {n_issues} issues,'  
    ' together with {n_pulls} pull requests since our last release' 
        ' on {since_date}.\n'.format(n_commits=n_commits, n_issues=n_issues,
        n_pulls=n_pulls, since_date = start_date))



In [92]:

    
line









    Out[92]:





'Overall, there were 1636 commits that closed 368 issues, together with 236 pull requests since our last release on 2017-11-03.\n'

append html files to end of changes.md with tags for toc



In [93]:

    
with open('changes.md', 'w') as fout:
    fout.write(line)
    fout.write("\n".join(lines))
    fout.write(get_tag("Summary Statistics"))
    
    with open('commit_table.html') as table:
        table_lines = table.readlines()
        title = "Package Activity"
        fout.write(get_tag(title,"###"))
        fout.write("\n")
        fout.write("".join(table_lines))
                
    with open('commits_by_person.html') as table:
        table_lines = table.readlines()
        title = "Contributor Activity"
        fout.write(get_tag(title,"###"))
        fout.write("\n")
        fout.write("".join(table_lines))
        
    with open('contributor_table.html') as table:
        table_lines = table.readlines()
        title = "Contributor by Package Activity"
        fout.write(get_tag(title,"###"))
        fout.write("\n")
        fout.write("".join(table_lines))

	commits
Dani Arribas-Bel	25
Eli Knaap	14
Hu Shao	5
James Gaboardi	314
Jsignell	1
Levi John Wolf	262
Philip Kahn	2
Serge Rey	239
Stefanie Lumnitz	240
Taylor Oshan	243
Thequackdaddy	1
Wei Kang	225
Ziqi Li	51