PySAL Change Log Statistics: Table Generation

This notebook generates the summary statistics for use in the 6-month releases of PySAL, which is now a meta package.

It assumes the subpackages have been git cloned in a directory below the location of this notebook. It also requires network connectivity for some of the reporting.

Run this notebook after gitcount.ipynb



In [5]:

    
from __future__ import print_function
import os
import json
import re
import sys
import pandas
import subprocess
from subprocess import check_output

#import yaml
from datetime import datetime, timedelta

from dateutil.parser import parse
import pytz

utc=pytz.UTC



In [6]:

    
from datetime import datetime, timedelta
from time import sleep
from subprocess import check_output
try:
    from urllib import urlopen
except:
    from urllib.request import urlopen

import ssl
#import yaml

context = ssl._create_unverified_context()

with open('../packages.yml') as package_file: packages = yaml.load(package_file)



In [7]:

    
CWD = os.path.abspath(os.path.curdir)



In [8]:

    
CWD









    Out[8]:





'/home/serge/Dropbox/p/pysal/src/pysal/tools'

Our last main release was 2019-01-30:



In [9]:

    
start_date = '2019-07-29'
since_date = '--since="{start}"'.format(start=start_date)
since_date
since = datetime.strptime(start_date+" 0:0:0", "%Y-%m-%d %H:%M:%S")
since









    Out[9]:





datetime.datetime(2019, 7, 29, 0, 0)



In [11]:

    
with open('package_versions.txt', 'r') as package_list:
    packages = dict([line.strip().split() for line in package_list.readlines()])



In [23]:

    
import pickle
issue_details = pickle.load( open( "issue_details.p", "rb" ) )
pull_details = pickle.load( open( "pull_details.p", "rb" ) )



In [24]:

    
packages









    Out[24]:





{'libpysal': '4.2.1',
 'esda': '2.2.0',
 'giddy': '2.3.0',
 'inequality': '1.0.0',
 'pointpats': '2.1.0',
 'segregation': '1.1.1',
 'spaghetti': '1.4.0',
 'mgwr': '2.1.1',
 'spglm': '1.0.7',
 'spint': '1.0.6',
 'spreg': '1.0.4',
 'spvcm': '0.2.1.post1',
 'tobler': '0.2.0',
 'mapclassify': '2.2.0',
 'splot': '1.1.2'}

get dates of tags

with open('subtags', 'r') as tag_name: tags = tag_name.readlines()



In [25]:

    
tag_dates = {}
#root = '/home/serge/Dropbox/p/pysal/src/pysal/tmp/'
root = CWD + "/tmp/"
#for record in tags:
for pkg in packages:
    #pkg, tag = record.strip().split()
    tag = packages[pkg]
    print(pkg, tag)
    if pkg=='spvcm':
        tag = '0.2.1post1'
    
    #tag = tag.split('/')[-1]
    pkdir = root+pkg
    try:
        cmd = "git log -1 --format=%ai v{tag}".format(tag=tag)
        os.chdir(pkdir)
        result = subprocess.run(cmd, check=True, shell=True, stdout=subprocess.PIPE)
    except:
        cmd = "git log -1 --format=%ai {tag}".format(tag=tag)
        os.chdir(pkdir)
        result = subprocess.run(cmd, check=True, shell=True, stdout=subprocess.PIPE)
    tag_string = result.stdout.decode('utf-8')
    tag_date = tag_string.split()[0]
    tag_dates[pkg] = tag_date
    print(pkg, tag, tag_date)

os.chdir(CWD)









    



libpysal 4.2.1
libpysal 4.2.1 2020-01-04
esda 2.2.0
esda 2.2.0 2019-12-18
giddy 2.3.0
giddy 2.3.0 2019-12-20
inequality 1.0.0
inequality 1.0.0 2018-10-31
pointpats 2.1.0
pointpats 2.1.0 2019-07-01
segregation 1.1.1
segregation 1.1.1 2019-07-19
spaghetti 1.4.0
spaghetti 1.4.0 2019-12-31
mgwr 2.1.1
mgwr 2.1.1 2019-07-18
spglm 1.0.7
spglm 1.0.7 2019-07-18
spint 1.0.6
spint 1.0.6 2019-07-22
spreg 1.0.4
spreg 1.0.4 2018-08-24
spvcm 0.2.1.post1
spvcm 0.2.1post1 2019-01-04
tobler 0.2.0
tobler 0.2.0 2020-01-05
mapclassify 2.2.0
mapclassify 2.2.0 2020-01-02
splot 1.1.2
splot 1.1.2 2020-01-18



In [26]:

    
# get issues for a package and filter on tag date


for pkg in tag_dates.keys():
    issues = issue_details[pkg]
    tag_date = utc.localize(parse(tag_dates[pkg]))
    keep = []
    for issue in issues:
        closed = parse(issue['closed_at'])
        if closed <= tag_date:
            keep.append(issue)
    print(pkg, len(issues), len(keep))
    issue_details[pkg] = keep
    keep = []
    pulls = pull_details[pkg]
    for pull in pulls:
        closed = parse(pull['closed_at'])
        if closed <= tag_date:
            keep.append(pull)
    print(pkg, len(pulls), len(keep)) 
    pull_details[pkg] = keep









    



libpysal 61 43
libpysal 35 24
esda 12 8
esda 8 4
giddy 17 10
giddy 12 7
inequality 1 0
inequality 1 0
pointpats 6 0
pointpats 6 0
segregation 22 0
segregation 11 0
spaghetti 92 42
spaghetti 57 25
mgwr 1 0
mgwr 1 0
spglm 1 0
spglm 1 0
spint 1 0
spint 1 0
spreg 4 0
spreg 3 0
spvcm 1 0
spvcm 1 0
tobler 43 38
tobler 34 29
mapclassify 12 11
mapclassify 9 8
splot 17 11
splot 11 8



In [28]:

    
# commits
cmd = ['git', 'log', '--oneline', since_date]

activity = {}
total_commits = 0
for subpackage in packages:
    tag_date = tag_dates[subpackage]
    os.chdir(CWD)
    os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
    cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]
    ncommits = len(check_output(cmd_until).splitlines())
    ncommits_total = len(check_output(cmd).splitlines())
    print(subpackage, ncommits_total, ncommits, tag_date)
    total_commits += ncommits
    activity[subpackage] = ncommits









    



libpysal 170 170 2020-01-04
esda 21 18 2019-12-18
giddy 31 28 2019-12-20
inequality 0 0 2018-10-31
pointpats 0 0 2019-07-01
segregation 0 0 2019-07-19
spaghetti 172 154 2019-12-31
mgwr 0 0 2019-07-18
spglm 0 0 2019-07-18
spint 0 0 2019-07-22
spreg 0 0 2018-08-24
spvcm 0 0 2019-01-04
tobler 178 170 2020-01-05
mapclassify 32 31 2020-01-02
splot 42 36 2020-01-18



In [29]:

    
cmd_until









    Out[29]:





['git', 'log', '--oneline', '--since="2019-07-29"', '--until="2020-01-18"']



In [30]:

    
identities = {'Levi John Wolf': ('ljwolf', 'Levi John Wolf'),
              'Serge Rey': ('Serge Rey', 'Sergio Rey', 'sjsrey', 'serge'),
              'Wei Kang': ('Wei Kang', 'weikang9009'),
              'Dani Arribas-Bel': ('Dani Arribas-Bel', 'darribas'),
              'Antti Härkönen': ( 'antth', 'Antti Härkönen', 'Antti Härkönen', 'Antth'  ),
              'Juan C Duque': ('Juan C Duque', "Juan Duque"),
              'Renan Xavier Cortes': ('Renan Xavier Cortes', 'renanxcortes', 'Renan Xavier Cortes'   ),
              'Taylor Oshan': ('Tayloroshan', 'Taylor Oshan', 'TaylorOshan'),
              'Tom Gertin': ('@Tomgertin', 'Tom Gertin', '@tomgertin')
}

def regularize_identity(string):
    string = string.decode()
    for name, aliases in identities.items():
        for alias in aliases:
            if alias in string:
                string = string.replace(alias, name)
    if len(string.split(' '))>1:
        string = string.title()
    return string.lstrip('* ')



In [31]:

    
author_cmd = ['git', 'log', '--format=* %aN', since_date]



In [32]:

    
author_cmd.append('blank')



In [33]:

    
author_cmd









    Out[33]:





['git', 'log', '--format=* %aN', '--since="2019-07-29"', 'blank']



In [34]:

    
from collections import Counter



In [35]:

    
authors_global = set()
authors = {}
global_counter = Counter()
counters = dict()
cmd = ['git', 'log', '--oneline', since_date]
total_commits = 0
activity = {}
for subpackage in packages:
    os.chdir(CWD)
    os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
    ncommits = len(check_output(cmd).splitlines())


    tag_date = tag_dates[subpackage]
    author_cmd[-1] = '--until="{tag_date}"'.format(tag_date=tag_date)
    #cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]


    all_authors = check_output(author_cmd).splitlines()
    counter = Counter([regularize_identity(author) for author in all_authors])
    global_counter += counter
    counters.update({'.'.join((package,subpackage)): counter})
    unique_authors = sorted(set(all_authors))
    authors[subpackage] =  unique_authors
    authors_global.update(unique_authors)
    total_commits += ncommits
    activity[subpackage] = ncommits



In [36]:

    
authors_global









    Out[36]:





{b'* James Gaboardi',
 b'* Leonardo Uieda',
 b'* Levi John Wolf',
 b'* Martin Fleischmann',
 b'* Renan Xavier Cortes',
 b'* Serge Rey',
 b'* Sergio Rey',
 b'* Siddharths8212376',
 b'* Stefanie Lumnitz',
 b'* Wei Kang',
 b'* eli knaap',
 b'* ljwolf',
 b'* rahul799',
 b'* renanxcortes',
 b'* weikang9009'}



In [37]:

    
activity









    Out[37]:





{'libpysal': 170,
 'esda': 21,
 'giddy': 31,
 'inequality': 0,
 'pointpats': 0,
 'segregation': 0,
 'spaghetti': 172,
 'mgwr': 0,
 'spglm': 0,
 'spint': 0,
 'spreg': 0,
 'spvcm': 0,
 'tobler': 178,
 'mapclassify': 32,
 'splot': 42}



In [38]:

    
counters









    Out[38]:





{'libpysal.libpysal': Counter({'Serge Rey': 116,
          'James Gaboardi': 43,
          'Levi John Wolf': 3,
          'Wei Kang': 6,
          'Siddharths8212376': 2}),
 'libpysal.esda': Counter({'Serge Rey': 5,
          'Levi John Wolf': 3,
          'James Gaboardi': 9,
          'Wei Kang': 1}),
 'libpysal.giddy': Counter({'Wei Kang': 23,
          'James Gaboardi': 3,
          'Serge Rey': 2}),
 'libpysal.inequality': Counter(),
 'libpysal.pointpats': Counter(),
 'libpysal.segregation': Counter(),
 'libpysal.spaghetti': Counter({'James Gaboardi': 152, 'Rahul799': 2}),
 'libpysal.mgwr': Counter(),
 'libpysal.spglm': Counter(),
 'libpysal.spint': Counter(),
 'libpysal.spreg': Counter(),
 'libpysal.spvcm': Counter(),
 'libpysal.tobler': Counter({'Eli Knaap': 98,
          'Serge Rey': 31,
          'Renan Xavier Cortes': 41}),
 'libpysal.mapclassify': Counter({'Serge Rey': 21,
          'James Gaboardi': 3,
          'Wei Kang': 4,
          'Martin Fleischmann': 3}),
 'libpysal.splot': Counter({'Stefanie Lumnitz': 24,
          'Serge Rey': 1,
          'James Gaboardi': 7,
          'Martin Fleischmann': 1,
          'Leonardo Uieda': 1,
          'Levi John Wolf': 1,
          'Wei Kang': 1})}



In [39]:

    
counters









    Out[39]:





{'libpysal.libpysal': Counter({'Serge Rey': 116,
          'James Gaboardi': 43,
          'Levi John Wolf': 3,
          'Wei Kang': 6,
          'Siddharths8212376': 2}),
 'libpysal.esda': Counter({'Serge Rey': 5,
          'Levi John Wolf': 3,
          'James Gaboardi': 9,
          'Wei Kang': 1}),
 'libpysal.giddy': Counter({'Wei Kang': 23,
          'James Gaboardi': 3,
          'Serge Rey': 2}),
 'libpysal.inequality': Counter(),
 'libpysal.pointpats': Counter(),
 'libpysal.segregation': Counter(),
 'libpysal.spaghetti': Counter({'James Gaboardi': 152, 'Rahul799': 2}),
 'libpysal.mgwr': Counter(),
 'libpysal.spglm': Counter(),
 'libpysal.spint': Counter(),
 'libpysal.spreg': Counter(),
 'libpysal.spvcm': Counter(),
 'libpysal.tobler': Counter({'Eli Knaap': 98,
          'Serge Rey': 31,
          'Renan Xavier Cortes': 41}),
 'libpysal.mapclassify': Counter({'Serge Rey': 21,
          'James Gaboardi': 3,
          'Wei Kang': 4,
          'Martin Fleischmann': 3}),
 'libpysal.splot': Counter({'Stefanie Lumnitz': 24,
          'Serge Rey': 1,
          'James Gaboardi': 7,
          'Martin Fleischmann': 1,
          'Leonardo Uieda': 1,
          'Levi John Wolf': 1,
          'Wei Kang': 1})}



In [40]:

    
def get_tag(title, level="##", as_string=True):
    words = title.split()
    tag = "-".join([word.lower() for word in words])
    heading = level+" "+title
    line = "\n\n<a name=\"{}\"></a>".format(tag)
    lines = [line]
    lines.append(heading)
    if as_string:
        return "\n".join(lines)
    else:
        return lines



In [41]:

    
subs = issue_details.keys()
table = []
txt = []
lines = get_tag("Changes by Package", as_string=False)

for sub in subs:
    total= issue_details[sub]
    pr = pull_details[sub]
    
    row = [sub, activity[sub], len(total), len(pr)]
    table.append(row)
    #line = "\n<a name=\"{sub}\"></a>".format(sub=sub)
    #lines.append(line)
    #line = "### {sub}".format(sub=sub)
    #lines.append(line)
    lines.extend(get_tag(sub.lower(), "###", as_string=False))
    for issue in total:
        url = issue['html_url']
        title = issue['title']
        number = issue['number']
        line = "* [#{number}:]({url}) {title} ".format(title=title,
                                                     number=number,
                                                     url=url)
        lines.append(line)



In [42]:

    
line









    Out[42]:





'* [#82:](https://github.com/pysal/splot/issues/82) `plot_moran_simulation` weird dimensions '



In [43]:

    
table









    Out[43]:





[['libpysal', 170, 43, 24],
 ['esda', 21, 8, 4],
 ['giddy', 31, 10, 7],
 ['inequality', 0, 0, 0],
 ['pointpats', 0, 0, 0],
 ['segregation', 0, 0, 0],
 ['spaghetti', 172, 42, 25],
 ['mgwr', 0, 0, 0],
 ['spglm', 0, 0, 0],
 ['spint', 0, 0, 0],
 ['spreg', 0, 0, 0],
 ['spvcm', 0, 0, 0],
 ['tobler', 178, 38, 29],
 ['mapclassify', 32, 11, 8],
 ['splot', 42, 11, 8]]



In [44]:

    
os.chdir(CWD)

import pandas



In [45]:

    
df = pandas.DataFrame(table, columns=['package', 'commits', 'total issues', 'pulls'])



In [46]:

    
df.sort_values(['commits','pulls'], ascending=False)\
  .to_html('./commit_table.html', index=None)



In [47]:

    
df.sum()









    Out[47]:





package         libpysalesdagiddyinequalitypointpatssegregatio...
commits                                                       646
total issues                                                  163
pulls                                                         105
dtype: object



In [48]:

    
contributor_table = pandas.DataFrame.from_dict(counters).fillna(0).astype(int).T



In [49]:

    
contributor_table.to_html('./contributor_table.html')



In [50]:

    
totals = contributor_table.sum(axis=0).T
totals.sort_index().to_frame('commits')









    Out[50]:







  
    
      
      commits
    
  
  
    
      Eli Knaap
      98
    
    
      James Gaboardi
      217
    
    
      Leonardo Uieda
      1
    
    
      Levi John Wolf
      7
    
    
      Martin Fleischmann
      4
    
    
      Rahul799
      2
    
    
      Renan Xavier Cortes
      41
    
    
      Serge Rey
      176
    
    
      Siddharths8212376
      2
    
    
      Stefanie Lumnitz
      24
    
    
      Wei Kang
      35



In [51]:

    
totals = contributor_table.sum(axis=0).T
totals.sort_index().to_frame('commits').to_html('./commits_by_person.html')



In [52]:

    
totals









    Out[52]:





Serge Rey              176
James Gaboardi         217
Levi John Wolf           7
Wei Kang                35
Siddharths8212376        2
Rahul799                 2
Eli Knaap               98
Renan Xavier Cortes     41
Martin Fleischmann       4
Stefanie Lumnitz        24
Leonardo Uieda           1
dtype: int64



In [53]:

    
n_commits = df.commits.sum()
n_issues = df['total issues'].sum()
n_pulls = df.pulls.sum()



In [54]:

    
n_commits









    Out[54]:





646



In [55]:

    
#Overall, there were 719 commits that closed 240 issues, together with 105 pull requests across 12 packages since our last release on 2017-11-03.
#('{0} Here is a really long '
#           'sentence with {1}').format(3, 5))
line = ('Overall, there were {n_commits} commits that closed {n_issues} issues,'  
    ' together with {n_pulls} pull requests since our last release' 
        ' on {since_date}.\n'.format(n_commits=n_commits, n_issues=n_issues,
        n_pulls=n_pulls, since_date = start_date))



In [56]:

    
line









    Out[56]:





'Overall, there were 646 commits that closed 163 issues, together with 105 pull requests since our last release on 2019-07-29.\n'

append html files to end of changes.md with tags for toc



In [57]:

    
with open('changes.md', 'w') as fout:
    fout.write(line)
    fout.write("\n".join(lines))
    fout.write(get_tag("Summary Statistics"))
    
    with open('commit_table.html') as table:
        table_lines = table.readlines()
        title = "Package Activity"
        fout.write(get_tag(title,"###"))
        fout.write("\n")
        fout.write("".join(table_lines))
                
    with open('commits_by_person.html') as table:
        table_lines = table.readlines()
        title = "Contributor Activity"
        fout.write(get_tag(title,"###"))
        fout.write("\n")
        fout.write("".join(table_lines))
        
    with open('contributor_table.html') as table:
        table_lines = table.readlines()
        title = "Contributor by Package Activity"
        fout.write(get_tag(title,"###"))
        fout.write("\n")
        fout.write("".join(table_lines))



In [ ]:



In [ ]:



In [ ]:

	commits
Eli Knaap	98
James Gaboardi	217
Leonardo Uieda	1
Levi John Wolf	7
Martin Fleischmann	4
Rahul799	2
Renan Xavier Cortes	41
Serge Rey	176
Siddharths8212376	2
Stefanie Lumnitz	24
Wei Kang	35