PySAL Change Log Statistics: Table Generation

This notebook generates the summary statistics for use in the 6-month releases of PySAL, which is now a meta package.

It assumes the subpackages have been git cloned in a directory below the location of this notebook. It also requires network connectivity for some of the reporting.

Run this notebook after 100-gitcount.ipynb


In [1]:
from __future__ import print_function
import os
import json
import re
import sys
import pandas
import subprocess
from subprocess import check_output

#import yaml
from datetime import datetime, timedelta

from dateutil.parser import parse
import pytz

utc=pytz.UTC

try:
    from urllib import urlopen
except:
    from urllib.request import urlopen

In [2]:
PYSAL_RELEASE = '2020-07-27'
release_date = datetime.strptime(PYSAL_RELEASE+" 0:0:0", "%Y-%m-%d %H:%M:%S")

In [3]:
release_date


Out[3]:
datetime.datetime(2020, 7, 27, 0, 0)

In [4]:
CWD = os.path.abspath(os.path.curdir)

In [5]:
CWD


Out[5]:
'/home/serge/Dropbox/p/pysal/src/pysal/tools'

In [6]:
start_date = '2020-02-09'
since_date = '--since="{start}"'.format(start=start_date)
since_date
since = datetime.strptime(start_date+" 0:0:0", "%Y-%m-%d %H:%M:%S")
since


Out[6]:
datetime.datetime(2020, 2, 9, 0, 0)

In [7]:
with open('package_versions.txt', 'r') as package_list:
    packages = dict([line.strip().split() for line in package_list.readlines()])

In [8]:
packages


Out[8]:
{'libpysal': '4.3.0',
 'access': '1.1.1',
 'esda': '2.3.1',
 'giddy': '2.3.3',
 'inequality': '1.0.0',
 'pointpats': '2.2.0',
 'segregation': '1.3.0',
 'spaghetti': '1.5.0',
 'mgwr': '2.1.1',
 'spglm': '1.0.7',
 'spint': '1.0.6',
 'spreg': '1.1.1',
 'spvcm': '0.3.0',
 'tobler': '0.3.1',
 'mapclassify': '2.3.0',
 'splot': '1.1.3'}

In [9]:
import pickle

In [10]:
issues_closed = pickle.load(open("issues_closed.p", 'rb'))
pulls_closed = pickle.load(open('pulls_closed.p', 'rb'))

In [11]:
type(issues_closed)


Out[11]:
dict

In [12]:
issues_closed.keys()


Out[12]:
dict_keys(['libpysal', 'access', 'esda', 'giddy', 'inequality', 'pointpats', 'segregation', 'spaghetti', 'mgwr', 'spglm', 'spint', 'spreg', 'spvcm', 'tobler', 'mapclassify', 'splot', 'pysal'])

In [13]:
from release_info import get_pypi_info, get_github_info, clone_masters

In [14]:
clone_masters()


libpysal 4.3.0
git clone --branch master https://github.com/pysal/libpysal.git tmp/libpysal
access 1.1.1
git clone --branch master https://github.com/pysal/access.git tmp/access
esda 2.3.1
git clone --branch master https://github.com/pysal/esda.git tmp/esda
giddy 2.3.3
git clone --branch master https://github.com/pysal/giddy.git tmp/giddy
inequality 1.0.0
git clone --branch master https://github.com/pysal/inequality.git tmp/inequality
pointpats 2.2.0
git clone --branch master https://github.com/pysal/pointpats.git tmp/pointpats
segregation 1.3.0
git clone --branch master https://github.com/pysal/segregation.git tmp/segregation
spaghetti 1.5.0
git clone --branch master https://github.com/pysal/spaghetti.git tmp/spaghetti
mgwr 2.1.1
git clone --branch master https://github.com/pysal/mgwr.git tmp/mgwr
spglm 1.0.7
git clone --branch master https://github.com/pysal/spglm.git tmp/spglm
spint 1.0.6
git clone --branch master https://github.com/pysal/spint.git tmp/spint
spreg 1.1.1
git clone --branch master https://github.com/pysal/spreg.git tmp/spreg
spvcm 0.3.0
git clone --branch master https://github.com/pysal/spvcm.git tmp/spvcm
tobler 0.3.1
git clone --branch master https://github.com/pysal/tobler.git tmp/tobler
mapclassify 2.3.0
git clone --branch master https://github.com/pysal/mapclassify.git tmp/mapclassify
splot 1.1.3
git clone --branch master https://github.com/pysal/splot.git tmp/splot
pysal 2.3.0
git clone --branch master https://github.com/pysal/pysal.git tmp/pysal

In [15]:
github_releases = get_github_info()


https://api.github.com/repos/pysal/libpysal/releases/latest
https://api.github.com/repos/pysal/access/releases/latest
https://api.github.com/repos/pysal/esda/releases/latest
https://api.github.com/repos/pysal/giddy/releases/latest
https://api.github.com/repos/pysal/inequality/releases/latest
https://api.github.com/repos/pysal/pointpats/releases/latest
https://api.github.com/repos/pysal/segregation/releases/latest
https://api.github.com/repos/pysal/spaghetti/releases/latest
https://api.github.com/repos/pysal/mgwr/releases/latest
https://api.github.com/repos/pysal/spglm/releases/latest
https://api.github.com/repos/pysal/spint/releases/latest
https://api.github.com/repos/pysal/spreg/releases/latest
https://api.github.com/repos/pysal/spvcm/releases/latest
https://api.github.com/repos/pysal/tobler/releases/latest
https://api.github.com/repos/pysal/mapclassify/releases/latest
https://api.github.com/repos/pysal/splot/releases/latest
https://api.github.com/repos/pysal/pysal/releases/latest

In [16]:
pypi_releases = get_pypi_info()

In [17]:
pypi_releases['esda']


Out[17]:
{'version': '2.3.1', 'released': datetime.datetime(2020, 7, 12, 16, 55, 5)}

In [18]:
# only issues closed before or on release date on pypi

In [19]:
from datetime import datetime

In [20]:
cut_off = pypi_releases['esda']['released']

In [21]:
pysal_date = datetime.strptime('2020-02-09T12:00:00Z', '%Y-%m-%dT%H:%M:%SZ')
#ISO8601 = "%Y-%m-%dT%H:%M:%SZ"

In [22]:
pysal_rel = {'version': 'v2.2.0',
            'release_date': pysal_date}
github_releases['pysal'] = pysal_rel

In [23]:
github_releases


Out[23]:
{'libpysal': {'version': 'v4.3.0',
  'url': 'https://api.github.com/repos/pysal/libpysal/tarball/v4.3.0',
  'release_date': datetime.datetime(2020, 6, 28, 18, 43, 24)},
 'access': {'version': 'v1.1.1',
  'url': 'https://api.github.com/repos/pysal/access/tarball/v1.1.1',
  'release_date': datetime.datetime(2020, 7, 12, 17, 23, 50)},
 'esda': {'version': 'v2.3.1',
  'url': 'https://api.github.com/repos/pysal/esda/tarball/v2.3.1',
  'release_date': datetime.datetime(2020, 7, 12, 16, 36, 4)},
 'giddy': {'version': 'v2.3.3',
  'url': 'https://api.github.com/repos/pysal/giddy/tarball/v2.3.3',
  'release_date': datetime.datetime(2020, 6, 10, 4, 59, 45)},
 'inequality': {'version': 'v1.0.0',
  'url': 'https://api.github.com/repos/pysal/inequality/tarball/v1.0.0',
  'release_date': datetime.datetime(2018, 10, 31, 22, 28, 18)},
 'pointpats': {'version': 'v2.2.0',
  'url': 'https://api.github.com/repos/pysal/pointpats/tarball/v2.2.0',
  'release_date': datetime.datetime(2020, 7, 27, 22, 17, 33)},
 'segregation': {'version': 'v1.3.0',
  'url': 'https://api.github.com/repos/pysal/segregation/tarball/v1.3.0',
  'release_date': datetime.datetime(2020, 7, 1, 5, 10, 37)},
 'spaghetti': {'version': 'v1.5.0',
  'url': 'https://api.github.com/repos/pysal/spaghetti/tarball/v1.5.0',
  'release_date': datetime.datetime(2020, 5, 4, 0, 55, 49)},
 'mgwr': {'version': 'v2.1.1',
  'url': 'https://api.github.com/repos/pysal/mgwr/tarball/v2.1.1',
  'release_date': datetime.datetime(2019, 7, 18, 18, 42, 36)},
 'spglm': {'version': 'v1.0.7',
  'url': 'https://api.github.com/repos/pysal/spglm/tarball/v1.0.7',
  'release_date': datetime.datetime(2019, 7, 18, 17, 29, 39)},
 'spint': {'version': 'v1.0.6',
  'url': 'https://api.github.com/repos/pysal/spint/tarball/v1.0.6',
  'release_date': datetime.datetime(2019, 7, 23, 0, 51, 26)},
 'spreg': {'version': 'v1.1.1',
  'url': 'https://api.github.com/repos/pysal/spreg/tarball/v1.1.1',
  'release_date': datetime.datetime(2020, 2, 24, 22, 4, 26)},
 'spvcm': {'version': 'v0.3.0',
  'url': 'https://api.github.com/repos/pysal/spvcm/tarball/v0.3.0',
  'release_date': datetime.datetime(2020, 2, 2, 19, 42, 39)},
 'tobler': {'version': 'v0.3.1',
  'url': 'https://api.github.com/repos/pysal/tobler/tarball/v0.3.1',
  'release_date': datetime.datetime(2020, 7, 1, 5, 0, 47)},
 'mapclassify': {'version': '2.3.0',
  'url': 'https://api.github.com/repos/pysal/mapclassify/tarball/2.3.0',
  'release_date': datetime.datetime(2020, 6, 13, 19, 11, 28)},
 'splot': {'version': 'v.1.1.3',
  'url': 'https://api.github.com/repos/pysal/splot/tarball/v.1.1.3',
  'release_date': datetime.datetime(2020, 3, 23, 11, 53, 30)},
 'pysal': {'version': 'v2.2.0',
  'release_date': datetime.datetime(2020, 2, 9, 12, 0)}}

In [24]:
packages['pysal'] = '2.3.0'

In [25]:
from datetime import datetime
datetime.fromtimestamp(0)
ISO8601 = "%Y-%m-%dT%H:%M:%SZ"


final_pulls = {}
final_issues = {}
for package in packages:
    filtered_issues = []
    filtered_pulls = []
    released = github_releases[package]['release_date']
    package_pulls = pulls_closed[package]
    package_issues = issues_closed[package]
    for issue in package_issues:
        #print(issue['number'], issue['title'], issue['closed_at'])
        closed = datetime.strptime(issue['closed_at'], ISO8601)
        if closed <= released:
            filtered_issues.append(issue)
    final_issues[package] = filtered_issues
    for pull in package_pulls:
        #print(pull['number'], pull['title'], pull['closed_at'])
        closed = datetime.strptime(pull['closed_at'], ISO8601)
        if closed <= released:
            filtered_pulls.append(pull)
    final_pulls[package] = filtered_pulls
    print(package, released, len(package_issues), len(filtered_issues), len(package_pulls),
         len(filtered_pulls))


libpysal 2020-06-28 18:43:24 70 55 33 25
access 2020-07-12 17:23:50 6 6 5 5
esda 2020-07-12 16:36:04 30 27 18 16
giddy 2020-06-10 04:59:45 31 28 27 25
inequality 2018-10-31 22:28:18 0 0 0 0
pointpats 2020-07-27 22:17:33 11 9 8 7
segregation 2020-07-01 05:10:37 5 5 1 1
spaghetti 2020-05-04 00:55:49 99 78 65 47
mgwr 2019-07-18 18:42:36 2 0 2 0
spglm 2019-07-18 17:29:39 1 0 0 0
spint 2019-07-23 00:51:26 0 0 0 0
spreg 2020-02-24 22:04:26 16 3 8 3
spvcm 2020-02-02 19:42:39 0 0 0 0
tobler 2020-07-01 05:00:47 27 26 17 16
mapclassify 2020-06-13 19:11:28 32 32 17 17
splot 2020-03-23 11:53:30 11 4 8 4
pysal 2020-02-09 12:00:00 60 0 10 0

In [26]:
issue_details = final_issues
pull_details = final_pulls

In [27]:
packages


Out[27]:
{'libpysal': '4.3.0',
 'access': '1.1.1',
 'esda': '2.3.1',
 'giddy': '2.3.3',
 'inequality': '1.0.0',
 'pointpats': '2.2.0',
 'segregation': '1.3.0',
 'spaghetti': '1.5.0',
 'mgwr': '2.1.1',
 'spglm': '1.0.7',
 'spint': '1.0.6',
 'spreg': '1.1.1',
 'spvcm': '0.3.0',
 'tobler': '0.3.1',
 'mapclassify': '2.3.0',
 'splot': '1.1.3',
 'pysal': '2.3.0'}

In [28]:
github_releases['pysal']['release_date'] = release_date

In [29]:
released


Out[29]:
datetime.datetime(2020, 2, 9, 12, 0)

In [30]:
# commits
cmd = ['git', 'log', '--oneline', since_date]

activity = {}
total_commits = 0
tag_dates = {}
for subpackage in packages:
    released = github_releases[subpackage]['release_date']
    tag_date = released.strftime("%Y-%m-%d")
    tag_dates[subpackage] = tag_date
    #print(tag_date)
    #tag_date = tag_dates[subpackage]
    os.chdir(CWD)
    os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
    cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]
    ncommits = len(check_output(cmd_until).splitlines())
    ncommits_total = len(check_output(cmd).splitlines())
    print(subpackage, ncommits_total, ncommits, tag_date)
    total_commits += ncommits
    activity[subpackage] = ncommits


libpysal 190 173 2020-06-28
access 79 77 2020-07-12
esda 160 156 2020-07-12
giddy 103 103 2020-06-10
inequality 0 0 2018-10-31
pointpats 100 100 2020-07-27
segregation 24 24 2020-07-01
spaghetti 412 359 2020-05-04
mgwr 8 0 2019-07-18
spglm 0 0 2019-07-18
spint 0 0 2019-07-23
spreg 23 12 2020-02-24
spvcm 0 0 2020-02-02
tobler 64 62 2020-07-01
mapclassify 114 114 2020-06-13
splot 17 14 2020-03-23
pysal 49 49 2020-07-27

In [ ]:


In [31]:
CWD


Out[31]:
'/home/serge/Dropbox/p/pysal/src/pysal/tools'

In [32]:
# commits
cmd = ['git', 'log', '--oneline', since_date]

activity = {}
total_commits = 0
for subpackage in packages:
    tag_date = tag_dates[subpackage]
    os.chdir(CWD)
    os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
    cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]
    ncommits = len(check_output(cmd_until).splitlines())
    ncommits_total = len(check_output(cmd).splitlines())
    print(subpackage, ncommits_total, ncommits, tag_date)
    total_commits += ncommits
    activity[subpackage] = ncommits


libpysal 190 173 2020-06-28
access 79 77 2020-07-12
esda 160 156 2020-07-12
giddy 103 103 2020-06-10
inequality 0 0 2018-10-31
pointpats 100 100 2020-07-27
segregation 24 24 2020-07-01
spaghetti 412 359 2020-05-04
mgwr 8 0 2019-07-18
spglm 0 0 2019-07-18
spint 0 0 2019-07-23
spreg 23 12 2020-02-24
spvcm 0 0 2020-02-02
tobler 64 62 2020-07-01
mapclassify 114 114 2020-06-13
splot 17 14 2020-03-23
pysal 49 49 2020-07-27

In [33]:
cmd_until


Out[33]:
['git', 'log', '--oneline', '--since="2020-02-09"', '--until="2020-07-27"']

In [34]:
identities = {'Levi John Wolf': ('ljwolf', 'Levi John Wolf'),
              'Serge Rey': ('Serge Rey', 'Sergio Rey', 'sjsrey', 'serge'),
              'Wei Kang': ('Wei Kang', 'weikang9009'),
              'Dani Arribas-Bel': ('Dani Arribas-Bel', 'darribas'),
              'Antti Härkönen': ( 'antth', 'Antti Härkönen', 'Antti Härkönen', 'Antth'  ),
              'Juan C Duque': ('Juan C Duque', "Juan Duque"),
              'Renan Xavier Cortes': ('Renan Xavier Cortes', 'renanxcortes', 'Renan Xavier Cortes'   ),
              'Taylor Oshan': ('Tayloroshan', 'Taylor Oshan', 'TaylorOshan'),
              'Tom Gertin': ('@Tomgertin', 'Tom Gertin', '@tomgertin')
}

def regularize_identity(string):
    string = string.decode()
    for name, aliases in identities.items():
        for alias in aliases:
            if alias in string:
                string = string.replace(alias, name)
    if len(string.split(' '))>1:
        string = string.title()
    return string.lstrip('* ')

In [35]:
author_cmd = ['git', 'log', '--format=* %aN', since_date]

In [36]:
author_cmd.append('blank')

In [37]:
author_cmd


Out[37]:
['git', 'log', '--format=* %aN', '--since="2020-02-09"', 'blank']

In [38]:
from collections import Counter

In [39]:
tag_dates


Out[39]:
{'libpysal': '2020-06-28',
 'access': '2020-07-12',
 'esda': '2020-07-12',
 'giddy': '2020-06-10',
 'inequality': '2018-10-31',
 'pointpats': '2020-07-27',
 'segregation': '2020-07-01',
 'spaghetti': '2020-05-04',
 'mgwr': '2019-07-18',
 'spglm': '2019-07-18',
 'spint': '2019-07-23',
 'spreg': '2020-02-24',
 'spvcm': '2020-02-02',
 'tobler': '2020-07-01',
 'mapclassify': '2020-06-13',
 'splot': '2020-03-23',
 'pysal': '2020-07-27'}

In [40]:
authors_global = set()
authors = {}
global_counter = Counter()
counters = dict()
cmd = ['git', 'log', '--oneline', since_date]
total_commits = 0
activity = {}
for subpackage in packages:
    os.chdir(CWD)
    os.chdir('tmp/{subpackage}'.format(subpackage=subpackage))
    ncommits = len(check_output(cmd).splitlines())
    tag_date = tag_dates[subpackage]
    tag_date = (datetime.strptime(tag_date, '%Y-%m-%d') + timedelta(days=1)).strftime('%Y-%m-%d')
    author_cmd[-1] = '--until="{tag_date}"'.format(tag_date=tag_date)
    #cmd_until = cmd + ['--until="{tag_date}"'.format(tag_date=tag_date)]
    print(author_cmd)


    all_authors = check_output(author_cmd).splitlines()
    counter = Counter([regularize_identity(author) for author in all_authors])
    global_counter += counter
    counters.update({subpackage: counter})
    unique_authors = sorted(set(all_authors))
    authors[subpackage] =  unique_authors
    authors_global.update(unique_authors)
    total_commits += ncommits
    activity[subpackage] = ncommits


['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-06-29"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-07-13"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-07-13"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-06-11"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2018-11-01"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-07-28"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-07-02"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-05-05"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2019-07-19"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2019-07-19"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2019-07-24"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-02-25"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-02-03"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-07-02"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-06-14"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-03-24"']
['git', 'log', '--format=* %aN', '--since="2020-02-09"', '--until="2020-07-28"']

In [41]:
counter


Out[41]:
Counter({'Wei Kang': 8,
         'Serge Rey': 25,
         'Eli Knaap': 7,
         'James Gaboardi': 5,
         'Serge': 4})

In [42]:
authors_global


Out[42]:
{b'* Arfon Smith',
 b'* Bryan Bennett',
 b'* Dani Arribas-Bel',
 b'* Elliott Sales de Andrade',
 b'* James Gaboardi',
 b'* Jamie Saxon',
 b'* Jeffery Sauer',
 b'* Levi John Wolf',
 b'* Martin Fleischmann',
 b'* Pattyf',
 b'* Pedro Amaral',
 b'* Serge',
 b'* Serge Rey',
 b'* Sergio Rey',
 b'* Stefanie Lumnitz',
 b'* Sugam Srivastava',
 b'* Wagner',
 b'* Wei Kang',
 b'* dependabot[bot]',
 b'* eli knaap',
 b'* jkoschinsky',
 b'* ljwolf',
 b'* vidal-anguiano',
 b'* weikang9009'}

In [43]:
activity


Out[43]:
{'libpysal': 190,
 'access': 79,
 'esda': 160,
 'giddy': 103,
 'inequality': 0,
 'pointpats': 100,
 'segregation': 24,
 'spaghetti': 412,
 'mgwr': 8,
 'spglm': 0,
 'spint': 0,
 'spreg': 23,
 'spvcm': 0,
 'tobler': 64,
 'mapclassify': 114,
 'splot': 17,
 'pysal': 49}

In [44]:
counters


Out[44]:
{'libpysal': Counter({'Serge Rey': 88,
          'James Gaboardi': 58,
          'Martin Fleischmann': 2,
          'Dani Arribas-Bel': 1,
          'Levi John Wolf': 16,
          'Bryan Bennett': 1,
          'Jeffery Sauer': 1,
          'Elliott Sales De Andrade': 4,
          'Wagner': 2}),
 'access': Counter({'Jkoschinsky': 6,
          'Jamie Saxon': 41,
          'Vidal-Anguiano': 32}),
 'esda': Counter({'Serge Rey': 35,
          'Serge': 5,
          'James Gaboardi': 13,
          'Wei Kang': 1,
          'Dependabot[Bot]': 1,
          'Levi John Wolf': 61,
          'Dani Arribas-Bel': 38,
          'Jeffery Sauer': 1,
          'Eli Knaap': 1}),
 'giddy': Counter({'Wei Kang': 102, 'Serge Rey': 1}),
 'inequality': Counter(),
 'pointpats': Counter({'Serge Rey': 16,
          'Serge': 1,
          'Levi John Wolf': 75,
          'Wei Kang': 4,
          'Sugam Srivastava': 4}),
 'segregation': Counter({'Eli Knaap': 24}),
 'spaghetti': Counter({'James Gaboardi': 357, 'Serge Rey': 2}),
 'mgwr': Counter(),
 'spglm': Counter(),
 'spint': Counter(),
 'spreg': Counter({'Pedro Amaral': 6, 'Eli Knaap': 6}),
 'spvcm': Counter(),
 'tobler': Counter({'Eli Knaap': 60, 'Pattyf': 1, 'Serge Rey': 1}),
 'mapclassify': Counter({'Serge Rey': 84,
          'Martin Fleischmann': 8,
          'James Gaboardi': 20,
          'Eli Knaap': 2}),
 'splot': Counter({'Stefanie Lumnitz': 11,
          'Arfon Smith': 1,
          'James Gaboardi': 1,
          'Levi John Wolf': 1}),
 'pysal': Counter({'Wei Kang': 8,
          'Serge Rey': 25,
          'Eli Knaap': 7,
          'James Gaboardi': 5,
          'Serge': 4})}

In [45]:
counters


Out[45]:
{'libpysal': Counter({'Serge Rey': 88,
          'James Gaboardi': 58,
          'Martin Fleischmann': 2,
          'Dani Arribas-Bel': 1,
          'Levi John Wolf': 16,
          'Bryan Bennett': 1,
          'Jeffery Sauer': 1,
          'Elliott Sales De Andrade': 4,
          'Wagner': 2}),
 'access': Counter({'Jkoschinsky': 6,
          'Jamie Saxon': 41,
          'Vidal-Anguiano': 32}),
 'esda': Counter({'Serge Rey': 35,
          'Serge': 5,
          'James Gaboardi': 13,
          'Wei Kang': 1,
          'Dependabot[Bot]': 1,
          'Levi John Wolf': 61,
          'Dani Arribas-Bel': 38,
          'Jeffery Sauer': 1,
          'Eli Knaap': 1}),
 'giddy': Counter({'Wei Kang': 102, 'Serge Rey': 1}),
 'inequality': Counter(),
 'pointpats': Counter({'Serge Rey': 16,
          'Serge': 1,
          'Levi John Wolf': 75,
          'Wei Kang': 4,
          'Sugam Srivastava': 4}),
 'segregation': Counter({'Eli Knaap': 24}),
 'spaghetti': Counter({'James Gaboardi': 357, 'Serge Rey': 2}),
 'mgwr': Counter(),
 'spglm': Counter(),
 'spint': Counter(),
 'spreg': Counter({'Pedro Amaral': 6, 'Eli Knaap': 6}),
 'spvcm': Counter(),
 'tobler': Counter({'Eli Knaap': 60, 'Pattyf': 1, 'Serge Rey': 1}),
 'mapclassify': Counter({'Serge Rey': 84,
          'Martin Fleischmann': 8,
          'James Gaboardi': 20,
          'Eli Knaap': 2}),
 'splot': Counter({'Stefanie Lumnitz': 11,
          'Arfon Smith': 1,
          'James Gaboardi': 1,
          'Levi John Wolf': 1}),
 'pysal': Counter({'Wei Kang': 8,
          'Serge Rey': 25,
          'Eli Knaap': 7,
          'James Gaboardi': 5,
          'Serge': 4})}

In [46]:
def get_tag(title, level="##", as_string=True):
    words = title.split()
    tag = "-".join([word.lower() for word in words])
    heading = level+" "+title
    line = "\n\n<a name=\"{}\"></a>".format(tag)
    lines = [line]
    lines.append(heading)
    if as_string:
        return "\n".join(lines)
    else:
        return lines

In [47]:
subs = issue_details.keys()
table = []
txt = []
lines = get_tag("Changes by Package", as_string=False)

for sub in subs:
    total= issue_details[sub]
    pr = pull_details[sub]
    
    row = [sub, activity[sub], len(total), len(pr)]
    table.append(row)
    #line = "\n<a name=\"{sub}\"></a>".format(sub=sub)
    #lines.append(line)
    #line = "### {sub}".format(sub=sub)
    #lines.append(line)
    lines.extend(get_tag(sub.lower(), "###", as_string=False))
    for issue in total:
        url = issue['html_url']
        title = issue['title']
        number = issue['number']
        line = "* [#{number}:]({url}) {title} ".format(title=title,
                                                     number=number,
                                                     url=url)
        lines.append(line)

In [48]:
line


Out[48]:
'* [#99:](https://github.com/pysal/splot/pull/99) Remove the links around figures in the JOSS paper '

In [49]:
table


Out[49]:
[['libpysal', 190, 55, 25],
 ['access', 79, 6, 5],
 ['esda', 160, 27, 16],
 ['giddy', 103, 28, 25],
 ['inequality', 0, 0, 0],
 ['pointpats', 100, 9, 7],
 ['segregation', 24, 5, 1],
 ['spaghetti', 412, 78, 47],
 ['mgwr', 8, 0, 0],
 ['spglm', 0, 0, 0],
 ['spint', 0, 0, 0],
 ['spreg', 23, 3, 3],
 ['spvcm', 0, 0, 0],
 ['tobler', 64, 26, 16],
 ['mapclassify', 114, 32, 17],
 ['splot', 17, 4, 4],
 ['pysal', 49, 0, 0]]

In [50]:
os.chdir(CWD)

import pandas

In [51]:
df = pandas.DataFrame(table, columns=['package', 'commits', 'total issues', 'pulls'])

In [52]:
df.head()


Out[52]:
package commits total issues pulls
0 libpysal 190 55 25
1 access 79 6 5
2 esda 160 27 16
3 giddy 103 28 25
4 inequality 0 0 0

In [53]:
df.sort_values(['commits','pulls'], ascending=False)\
  .to_html('./commit_table.html', index=None)

In [54]:
df.sum()


Out[54]:
package         libpysalaccessesdagiddyinequalitypointpatssegr...
commits                                                      1343
total issues                                                  273
pulls                                                         166
dtype: object

In [55]:
contributor_table = pandas.DataFrame.from_dict(counters).fillna(0).astype(int).T

In [56]:
contributor_table.to_html('./contributor_table.html')

In [57]:
totals = contributor_table.sum(axis=0).T
totals.sort_index().to_frame('commits')


Out[57]:
commits
Arfon Smith 1
Bryan Bennett 1
Dani Arribas-Bel 39
Dependabot[Bot] 1
Eli Knaap 100
Elliott Sales De Andrade 4
James Gaboardi 454
Jamie Saxon 41
Jeffery Sauer 2
Jkoschinsky 6
Levi John Wolf 153
Martin Fleischmann 10
Pattyf 1
Pedro Amaral 6
Serge 10
Serge Rey 252
Stefanie Lumnitz 11
Sugam Srivastava 4
Vidal-Anguiano 32
Wagner 2
Wei Kang 115

In [58]:
totals = contributor_table.sum(axis=0).T
totals.sort_index().to_frame('commits').to_html('./commits_by_person.html')

In [59]:
totals


Out[59]:
Serge Rey                   252
James Gaboardi              454
Martin Fleischmann           10
Dani Arribas-Bel             39
Levi John Wolf              153
Bryan Bennett                 1
Jeffery Sauer                 2
Elliott Sales De Andrade      4
Wagner                        2
Jkoschinsky                   6
Jamie Saxon                  41
Vidal-Anguiano               32
Serge                        10
Wei Kang                    115
Dependabot[Bot]               1
Eli Knaap                   100
Sugam Srivastava              4
Pedro Amaral                  6
Pattyf                        1
Stefanie Lumnitz             11
Arfon Smith                   1
dtype: int64

In [60]:
n_commits = df.commits.sum()
n_issues = df['total issues'].sum()
n_pulls = df.pulls.sum()

In [61]:
n_commits


Out[61]:
1343

In [62]:
#Overall, there were 719 commits that closed 240 issues, together with 105 pull requests across 12 packages since our last release on 2017-11-03.
#('{0} Here is a really long '
#           'sentence with {1}').format(3, 5))
line = ('Overall, there were {n_commits} commits that closed {n_issues} issues,'  
    ' together with {n_pulls} pull requests since our last release' 
        ' on {since_date}.\n'.format(n_commits=n_commits, n_issues=n_issues,
        n_pulls=n_pulls, since_date = start_date))

In [63]:
line


Out[63]:
'Overall, there were 1343 commits that closed 273 issues, together with 166 pull requests since our last release on 2020-02-09.\n'

append html files to end of changes.md with tags for toc


In [64]:
with open('changes.md', 'w') as fout:
    fout.write(line)
    fout.write("\n".join(lines))
    fout.write(get_tag("Summary Statistics"))
    
    with open('commit_table.html') as table:
        table_lines = table.readlines()
        title = "Package Activity"
        fout.write(get_tag(title,"###"))
        fout.write("\n")
        fout.write("".join(table_lines))
                
    with open('commits_by_person.html') as table:
        table_lines = table.readlines()
        title = "Contributor Activity"
        fout.write(get_tag(title,"###"))
        fout.write("\n")
        fout.write("".join(table_lines))
        
    with open('contributor_table.html') as table:
        table_lines = table.readlines()
        title = "Contributor by Package Activity"
        fout.write(get_tag(title,"###"))
        fout.write("\n")
        fout.write("".join(table_lines))

In [ ]:


In [ ]: