Download commit info from a list of github organizations


In [1]:
import github3
import pandas as pd
from collections import deque

In [ ]:
# go here to get a github OAuth token --> https://github.com/settings/tokens

In [2]:
gh = github3.login(token='xxxxxxxxx')

In [3]:
def fetch_commits_for_user(user, blacklisted_repos=None):
    # repos that I don't care about are listed in 'blacklisted_repos'
    if not blacklisted_repos:
        blacklisted_repos = ['homebrew',]
    all_repos = list(gh.repositories_by(user))
    repos = deque()
    authors = deque()
    messages = deque()
    dates = deque()
    committers = deque()
    shas = deque()
#     all_repos = nsls2_repos + Nikea_repos + skxray_repos + beamline_repos + flattened
    for idx, repo in enumerate(all_repos):
        # make all repo names lowercase
        repo_name = repo.full_name.lower()
        if any([blacklisted in repo_name for blacklisted in blacklisted_repos]):
            # don't process this blacklisted repo
            print('skipping %s. It is blacklisted. %s of %s' % (repo, idx+1, len(all_repos)))
            continue
        print('processing %s. %s of %s' % (repo, idx+1, len(all_repos)))

        try:
            commits = list(repo.commits())
        except github3.exceptions.ClientError:
            continue
        for commit in commits:
            repos.append(repo_name)
            committer = commit.commit.committer['name']
            committers.append(committer)
            try:
                authors.append(commit.author.login)
            except AttributeError:
                # there is no reported author of this commit.
                # use the name of the committer instead.
                authors.append(committer)
    #             print('commit %s from repo %s has no author' % (commit.sha, repo_name))
            messages.append(commit.commit.message)
    #         print(commit.commit.committer/)
            dates.append(commit.commit.committer['date'])
            shas.append(commit.sha)
    return {
        'repo': repos,
        'authors': authors,
        'messages': messages,
        'dates': dates,
        'committers': committers,
        'shas': shas,
    }

In [4]:
softmatter_commits = fetch_commits_for_user('soft-matter')


processing soft-matter/mr. 1 of 8
processing soft-matter/pims. 2 of 8
processing soft-matter/pims_nd2. 3 of 8
processing soft-matter/slicerator. 4 of 8
processing soft-matter/sm_core. 5 of 8
processing soft-matter/trackpy. 6 of 8
processing soft-matter/trackpy-examples. 7 of 8
processing soft-matter/yaml-serialize. 8 of 8

In [5]:
matplotlib_commits = fetch_commits_for_user('matplotlib')


processing matplotlib/basemap. 1 of 15
processing matplotlib/cmocean. 2 of 15
processing matplotlib/cycler. 3 of 15
processing matplotlib/devdocs. 4 of 15
processing matplotlib/freetypy. 5 of 15
processing matplotlib/matplotlib. 6 of 15
processing matplotlib/matplotlib-jenkins. 7 of 15
processing matplotlib/matplotlib.github.com. 8 of 15
processing matplotlib/mplsizer. 9 of 15
processing matplotlib/mpl_mac_testing. 10 of 15
processing matplotlib/natgrid. 11 of 15
processing matplotlib/sampledoc. 12 of 15
processing matplotlib/sample_data. 13 of 15
processing matplotlib/trendvis. 14 of 15
processing matplotlib/viscm. 15 of 15

In [6]:
nsls2_commits = fetch_commits_for_user('NSLS-II')


processing NSLS-II/album. 1 of 26
processing NSLS-II/bluesky. 2 of 26
processing NSLS-II/brokerStreamServer. 3 of 26
processing NSLS-II/Bug-Reports. 4 of 26
processing NSLS-II/carchivetools. 5 of 26
processing NSLS-II/channelarchiver. 6 of 26
processing NSLS-II/conda-prescriptions. 7 of 26
processing NSLS-II/configs. 8 of 26
processing NSLS-II/databroker. 9 of 26
processing NSLS-II/datamuxer. 10 of 26
processing NSLS-II/dataportal. 11 of 26
processing NSLS-II/docs. 12 of 26
processing NSLS-II/EXLog. 13 of 26
processing NSLS-II/filestore. 14 of 26
processing NSLS-II/lz4-plugin. 15 of 26
processing NSLS-II/metadatastore. 16 of 26
processing NSLS-II/NSLS-II.github.io. 17 of 26
processing NSLS-II/ophyd. 18 of 26
processing NSLS-II/ophyd-examples. 19 of 26
processing NSLS-II/pyepics. 20 of 26
processing NSLS-II/pyOlog. 21 of 26
processing NSLS-II/replay. 22 of 26
processing NSLS-II/sampleManager. 23 of 26
processing NSLS-II/suitcase. 24 of 26
processing NSLS-II/vertical-integration. 25 of 26
processing NSLS-II/wishlist. 26 of 26

In [7]:
skxray_commits = fetch_commits_for_user('scikit-xray')


processing scikit-xray/scikit-xray. 1 of 5
processing scikit-xray/scikit-xray-bench. 2 of 5
processing scikit-xray/scikit-xray-examples. 3 of 5
processing scikit-xray/scikit-xray-feedstock. 4 of 5
processing scikit-xray/scikit-xray-feedstock-dev. 5 of 5

In [8]:
Nikea_commits = fetch_commits_for_user('Nikea')


processing Nikea/benchmark_scripts. 1 of 8
processing Nikea/history. 2 of 8
processing Nikea/nikea.github.io. 3 of 8
processing Nikea/nsls2_gui. 4 of 8
processing Nikea/pyXPCS. 5 of 8
processing Nikea/VisTrails. 6 of 8
processing Nikea/VTTools. 7 of 8
processing Nikea/xray-vision. 8 of 8

In [9]:
csx_commits = fetch_commits_for_user('NSLS-II-CSX')
chx_commits = fetch_commits_for_user('NSLS-II-CHX')
hxn_commits = fetch_commits_for_user('NSLS-II-HXN')
srx_commits = fetch_commits_for_user('NSLS-II-SRX')
xpd_commits = fetch_commits_for_user('NSLS-II-XPD')
ixs_commits = fetch_commits_for_user('NSLS-II-IXS')


processing NSLS-II-CSX/acopian. 1 of 27
processing NSLS-II-CSX/baffleslits. 2 of 27
processing NSLS-II-CSX/Bug-Reports. 3 of 27
processing NSLS-II-CSX/CAAutoConfig. 4 of 27
processing NSLS-II-CSX/CableDatabase. 5 of 27
processing NSLS-II-CSX/calibration. 6 of 27
processing NSLS-II-CSX/conda-recipes. 7 of 27
processing NSLS-II-CSX/csxtools. 8 of 27
processing NSLS-II-CSX/FastCCDConfig. 9 of 27
processing NSLS-II-CSX/ipython_ophyd. 10 of 27
processing NSLS-II-CSX/lakeshore336. 11 of 27
processing NSLS-II-CSX/libcin. 12 of 27
processing NSLS-II-CSX/logbook. 13 of 27
processing NSLS-II-CSX/lupit. 14 of 27
processing NSLS-II-CSX/NSLS-II-CSX.github.io. 15 of 27
processing NSLS-II-CSX/nsls2NotifyMe. 16 of 27
processing NSLS-II-CSX/omegaM4061. 17 of 27
processing NSLS-II-CSX/ophyd. 18 of 27
processing NSLS-II-CSX/pyepics. 19 of 27
processing NSLS-II-CSX/pyVLSPGM. 20 of 27
processing NSLS-II-CSX/qcinview. 21 of 27
processing NSLS-II-CSX/SRW. 22 of 27
processing NSLS-II-CSX/stanfordDG645. 23 of 27
processing NSLS-II-CSX/testIoc. 24 of 27
processing NSLS-II-CSX/timestamp. 25 of 27
processing NSLS-II-CSX/undcontrol. 26 of 27
processing NSLS-II-CSX/vlspgm. 27 of 27
processing NSLS-II-CHX/chxtools. 1 of 3
processing NSLS-II-CHX/eiger_tools. 2 of 3
processing NSLS-II-CHX/ipython_ophyd. 3 of 3
processing NSLS-II-HXN/hxntools. 1 of 3
processing NSLS-II-HXN/ipython_ophyd. 2 of 3
processing NSLS-II-HXN/PyXRF. 3 of 3
processing NSLS-II-SRX/controlsui. 1 of 4
processing NSLS-II-SRX/ipython_ophyd. 2 of 4
processing NSLS-II-SRX/nsls2-xf-utils. 3 of 4
processing NSLS-II-SRX/srxtools. 4 of 4
processing NSLS-II-XPD/ipython_ophyd. 1 of 2
processing NSLS-II-XPD/xpdtools. 2 of 2
processing NSLS-II-IXS/ipython_ophyd. 1 of 1

In [10]:
vistrails_commits = fetch_commits_for_usertch_commits_for_user('VisTrails')


processing VisTrails/DAT. 1 of 2
processing VisTrails/VisTrails. 2 of 2

In [13]:
ericdill_commits = fetch_commits_for_user('ericdill')


processing ericdill/album. 1 of 83
processing ericdill/anaconda-build. 2 of 83
processing ericdill/archiver. 3 of 83
processing ericdill/asv. 4 of 83
processing ericdill/auto-enaml. 5 of 83
processing ericdill/benchmark_scripts. 6 of 83
processing ericdill/bluesky. 7 of 83
processing ericdill/bokeh. 8 of 83
processing ericdill/boltons. 9 of 83
processing ericdill/brokerStreamServer. 10 of 83
processing ericdill/chxtools. 11 of 83
processing ericdill/committery. 12 of 83
processing ericdill/CompStats. 13 of 83
processing ericdill/conda-builder. 14 of 83
processing ericdill/conda-prescriptions. 15 of 83
processing ericdill/conda-recipes. 16 of 83
processing ericdill/conda-smithy. 17 of 83
processing ericdill/controlsui. 18 of 83
processing ericdill/csxtools. 19 of 83
processing ericdill/databroker. 20 of 83
processing ericdill/datamuxer. 21 of 83
processing ericdill/demoCSX. 22 of 83
processing ericdill/diffpy.srfit. 23 of 83
processing ericdill/diffpy.srxplanar. 24 of 83
processing ericdill/diffpy.Structure. 25 of 83
processing ericdill/docker-demo-images. 26 of 83
processing ericdill/docs. 27 of 83
processing ericdill/docs.anaconda.org. 28 of 83
processing ericdill/DocumentsInLabNotebooks. 29 of 83
processing ericdill/dummyBroker. 30 of 83
processing ericdill/eapy. 31 of 83
processing ericdill/enaml. 32 of 83
processing ericdill/fileStore. 33 of 83
processing ericdill/filestore-v0.0.2-feedstock. 34 of 83
processing ericdill/github-changelog-generator. 35 of 83
processing ericdill/hexrd. 36 of 83
processing ericdill/history. 37 of 83
processing ericdill/hkl. 38 of 83
processing ericdill/hkl-conda. 39 of 83
processing ericdill/image_inspector. 40 of 83
processing ericdill/kivy. 41 of 83
processing ericdill/libcin. 42 of 83
processing ericdill/matplotlib. 43 of 83
processing ericdill/mdsbenchmark. 44 of 83
processing ericdill/metadatastore. 45 of 83
processing ericdill/miniature-hipster. 46 of 83
processing ericdill/mongoengine. 47 of 83
processing ericdill/notes-dev. 48 of 83
processing ericdill/NSLS-II.github.io. 49 of 83
processing ericdill/ophyd. 50 of 83
processing ericdill/pandas. 51 of 83
processing ericdill/pims. 52 of 83
processing ericdill/PlasticSim. 53 of 83
processing ericdill/psmouse-dkms-alpsv7. 54 of 83
processing ericdill/pyatoms. 55 of 83
processing ericdill/pyepics. 56 of 83
processing ericdill/pymca. 57 of 83
processing ericdill/pyOlog. 58 of 83
processing ericdill/pyqtgraph. 59 of 83
processing ericdill/pyRafters. 60 of 83
processing ericdill/pyspec. 61 of 83
processing ericdill/pytest. 62 of 83
processing ericdill/python_qt_binding. 63 of 83
processing ericdill/pyxda. 64 of 83
processing ericdill/pyXPCS. 65 of 83
processing ericdill/PyXRF. 66 of 83
processing ericdill/RamDog. 67 of 83
processing ericdill/replay. 68 of 83
processing ericdill/scikit-image. 69 of 83
processing ericdill/scikit-xray. 70 of 83
processing ericdill/scikit-xray-examples. 71 of 83
processing ericdill/scipy_2015_sklearn_tutorial. 72 of 83
processing ericdill/spotpick. 73 of 83
processing ericdill/suitcase. 74 of 83
processing ericdill/super_state_machine. 75 of 83
processing ericdill/tomopy. 76 of 83
processing ericdill/tutorials. 77 of 83
processing ericdill/versioneer. 78 of 83
processing ericdill/vispy. 79 of 83
processing ericdill/VisTrails. 80 of 83
processing ericdill/VTTools. 81 of 83
processing ericdill/whatsmyversion. 82 of 83
processing ericdill/xray-vision. 83 of 83

In [14]:
danielballan_commits = fetch_commits_for_user('danielballan')


processing danielballan/asv. 1 of 94
processing danielballan/auto-enaml. 2 of 94
processing danielballan/banyan. 3 of 94
processing danielballan/blog. 4 of 94
processing danielballan/bluesky. 5 of 94
processing danielballan/bokeh. 6 of 94
processing danielballan/channelarchiver. 7 of 94
processing danielballan/conda-build-missing. 8 of 94
processing danielballan/conda-docs. 9 of 94
processing danielballan/conda-kernels. 10 of 94
processing danielballan/conda-prescriptions. 11 of 94
processing danielballan/conda-recipes. 12 of 94
processing danielballan/conda-smithy. 13 of 94
processing danielballan/counterpoint. 14 of 94
processing danielballan/cycler. 15 of 94
processing danielballan/datamuxer. 16 of 94
processing danielballan/dataportal. 17 of 94
processing danielballan/diffpy.srxplanar. 18 of 94
processing danielballan/docker-demo-images. 19 of 94
processing danielballan/docs. 20 of 94
processing danielballan/Event-Organiser. 21 of 94
processing danielballan/fetch. 22 of 94
processing danielballan/filestore. 23 of 94
processing danielballan/hashdist. 24 of 94
processing danielballan/hashstack. 25 of 94
processing danielballan/hexrd. 26 of 94
processing danielballan/hickle. 27 of 94
processing danielballan/history. 28 of 94
processing danielballan/image_inspector. 29 of 94
processing danielballan/ipython. 30 of 94
processing danielballan/IPython-notebook-extensions. 31 of 94
processing danielballan/ipython-widgetmode. 32 of 94
processing danielballan/ipython_extensions. 33 of 94
processing danielballan/ipywidgets. 34 of 94
processing danielballan/jhu-thesis-template. 35 of 94
processing danielballan/jsonsocket. 36 of 94
processing danielballan/jupyter. 37 of 94
processing danielballan/jupyterhub. 38 of 94
processing danielballan/jupyter_client. 39 of 94
processing danielballan/jupyter_core. 40 of 94
processing danielballan/leaftletwidget. 41 of 94
processing danielballan/lmfit-py. 42 of 94
processing danielballan/matplotlib. 43 of 94
processing danielballan/mdsbenchmark. 44 of 94
processing danielballan/metadataStore. 45 of 94
processing danielballan/mpld3. 46 of 94
processing danielballan/mr. 47 of 94
processing danielballan/multipledispatch. 48 of 94
processing danielballan/needle. 49 of 94
processing danielballan/notebook. 50 of 94
processing danielballan/numpy. 51 of 94
processing danielballan/Obvious-CI. 52 of 94
processing danielballan/OpenTLD. 53 of 94
processing danielballan/ophyd. 54 of 94
processing danielballan/package_with_continuous_delivery. 55 of 94
processing danielballan/pandas. 56 of 94
processing danielballan/photoactivation-paper. 57 of 94
processing danielballan/photomosaic. 58 of 94
processing danielballan/pims. 59 of 94
processing danielballan/planner. 60 of 94
processing danielballan/probablyscripts. 61 of 94
processing danielballan/PyAV. 62 of 94
processing danielballan/pyOlog. 63 of 94
processing danielballan/pypsalg. 64 of 94
processing danielballan/python-appveyor-conda-example. 65 of 94
processing danielballan/python-gedcom. 66 of 94
processing danielballan/python-video-converter. 67 of 94
processing danielballan/remotekernel. 68 of 94
processing danielballan/replay. 69 of 94
processing danielballan/rk. 70 of 94
processing danielballan/scikit-image. 71 of 94
processing danielballan/scikit-xray. 72 of 94
processing danielballan/scikit-xray-examples. 73 of 94
processing danielballan/scipy-tutorial-2014. 74 of 94
processing danielballan/scipy_2015_sklearn_tutorial. 75 of 94
processing danielballan/seaborn. 76 of 94
processing danielballan/sharelatex. 77 of 94
processing danielballan/slicerator. 78 of 94
processing danielballan/sm_core. 79 of 94
processing danielballan/snase-notebooks. 80 of 94
processing danielballan/soft-matter.github.io-trackpy. 81 of 94
processing danielballan/statsmodels. 82 of 94
processing danielballan/thebe. 83 of 94
processing danielballan/trackpy. 84 of 94
processing danielballan/trackpy-bench. 85 of 94
processing danielballan/trackpy-examples. 86 of 94
processing danielballan/video. 87 of 94
processing danielballan/vistools. 88 of 94
processing danielballan/VisTrails. 89 of 94
processing danielballan/widget-history. 90 of 94
processing danielballan/xray-vision. 91 of 94
processing danielballan/xraylib. 92 of 94
processing danielballan/yaml-serialize. 93 of 94
processing danielballan/zvi_reader. 94 of 94

In [15]:
dchabot_commits = fetch_commits_for_user('dchabot')
arkilic_commits = fetch_commits_for_user('arkilic')
cowanml_commits = fetch_commits_for_user('cowanml')
areaDetector_commits = fetch_commits_for_user('areaDetector')


processing dchabot/adsim. 1 of 15
processing dchabot/areadetector-1-9-1. 2 of 15
processing dchabot/bluesky. 3 of 15
processing dchabot/caautoconfig. 4 of 15
processing dchabot/cls-orbitcontrol. 5 of 15
processing dchabot/diffcalc. 6 of 15
processing dchabot/dotfiles. 7 of 15
processing dchabot/hkl. 8 of 15
processing dchabot/motor-synapps. 9 of 15
processing dchabot/motorsim. 10 of 15
processing dchabot/ophyd. 11 of 15
processing dchabot/pyepics. 12 of 15
processing dchabot/python-pcaspy. 13 of 15
processing dchabot/quadem. 14 of 15
processing dchabot/synapps-mca. 15 of 15
processing arkilic/filestore. 1 of 5
processing arkilic/mdsbenchmark. 2 of 5
processing arkilic/metadatastore. 3 of 5
processing arkilic/pvaPy. 4 of 5
processing arkilic/v4table_example. 5 of 5
processing cowanml/cookiecutter-pylibrary. 1 of 6
processing cowanml/lsdc. 2 of 6
processing cowanml/metadatastore. 3 of 6
processing cowanml/samplemanager. 4 of 6
processing cowanml/samplemangler. 5 of 6
processing cowanml/sysv-rc-softioc. 6 of 6
processing areaDetector/ADADSC. 1 of 35
processing areaDetector/ADAndor. 2 of 35
processing areaDetector/ADAndor3. 3 of 35
processing areaDetector/ADBinaries. 4 of 35
processing areaDetector/ADBruker. 5 of 35
processing areaDetector/ADCore. 6 of 35
processing areaDetector/ADDexela. 7 of 35
processing areaDetector/ADExample. 8 of 35
processing areaDetector/ADFastCCD. 9 of 35
processing areaDetector/ADFireWireWin. 10 of 35
processing areaDetector/ADLambda. 11 of 35
processing areaDetector/ADLightField. 12 of 35
processing areaDetector/ADmar345. 13 of 35
processing areaDetector/ADmarCCD. 14 of 35
processing areaDetector/ADMerlin. 15 of 35
processing areaDetector/ADMythen. 16 of 35
processing areaDetector/ADnED. 17 of 35
processing areaDetector/ADPerkinElmer. 18 of 35
processing areaDetector/ADPICam. 19 of 35
processing areaDetector/ADPilatus. 20 of 35
processing areaDetector/ADPixirad. 21 of 35
processing areaDetector/ADPluginEdge. 22 of 35
processing areaDetector/ADPointGrey. 23 of 35
processing areaDetector/ADProsilica. 24 of 35
processing areaDetector/ADPSL. 25 of 35
processing areaDetector/ADPvAccess. 26 of 35
processing areaDetector/ADPvCam. 27 of 35
processing areaDetector/ADQImaging. 28 of 35
processing areaDetector/ADRoper. 29 of 35
processing areaDetector/ADURL. 30 of 35
processing areaDetector/aravisGigE. 31 of 35
processing areaDetector/areaDetector. 32 of 35
processing areaDetector/ffmpegServer. 33 of 35
processing areaDetector/ffmpegViewer. 34 of 35
processing areaDetector/firewireDCAM. 35 of 35

In [28]:
synchbot_commits = fetch_commits_for_user('synchbot')


processing synchbot/metadataclientv4. 1 of 3
processing synchbot/metadataservice. 2 of 3
processing synchbot/metadatastorev4. 3 of 3

In [41]:
klauer_commits = fetch_commits_for_user('klauer')


processing klauer/ADMerlin. 1 of 31
processing klauer/anc300. 2 of 31
processing klauer/atr142. 3 of 31
processing klauer/bluesky. 4 of 31
processing klauer/build_opi. 5 of 31
processing klauer/conda-prescriptions. 6 of 31
processing klauer/ddrive. 7 of 31
processing klauer/dotfiles. 8 of 31
processing klauer/ECLI. 9 of 31
processing klauer/EZ4axis. 10 of 31
processing klauer/f460. 11 of 31
processing klauer/hkl. 12 of 31
processing klauer/ipplan. 13 of 31
processing klauer/mmc100. 14 of 31
processing klauer/ophyd. 15 of 31
processing klauer/pmcv. 16 of 31
processing klauer/PMD101. 17 of 31
processing klauer/pmd90. 18 of 31
processing klauer/ppmac. 19 of 31
processing klauer/PVRename. 20 of 31
processing klauer/pyepics. 21 of 31
processing klauer/pympx. 22 of 31
processing klauer/pyosxdict. 23 of 31
processing klauer/pyzygo. 24 of 31
processing klauer/qolibri. 25 of 31
processing klauer/simple_scaler. 26 of 31
processing klauer/sios. 27 of 31
processing klauer/smarpod. 28 of 31
processing klauer/stepscan. 29 of 31
processing klauer/tpmac. 30 of 31
processing klauer/xspress3. 31 of 31

In [51]:
giltis_commits = fetch_commits_for_user('giltis')


processing giltis/asv. 1 of 36
processing giltis/benchmark_scripts. 2 of 36
processing giltis/bluesky. 3 of 36
processing giltis/channelarchiver. 4 of 36
processing giltis/conda-prescriptions. 5 of 36
processing giltis/data-exchange. 6 of 36
processing giltis/dataportal. 7 of 36
processing giltis/docs. 8 of 36
processing giltis/dotfiles. 9 of 36
processing giltis/filestore. 10 of 36
processing giltis/git-remote-bzr. 11 of 36
processing giltis/git-remote-hg. 12 of 36
processing giltis/Glogg. 13 of 36
processing giltis/ImageVis3D. 14 of 36
processing giltis/matplotlib. 15 of 36
processing giltis/metadatastore. 16 of 36
processing giltis/netcdf4-python. 17 of 36
processing giltis/openmicroscopy. 18 of 36
processing giltis/ophyd. 19 of 36
processing giltis/pims. 20 of 36
processing giltis/pyOlog. 21 of 36
processing giltis/pyqtgraph. 22 of 36
processing giltis/pyRafters. 23 of 36
processing giltis/replay. 24 of 36
processing giltis/scikit-fuzzy. 25 of 36
processing giltis/scikit-image. 26 of 36
processing giltis/scikit-xray. 27 of 36
processing giltis/scikit-xray-examples. 28 of 36
processing giltis/scipy. 29 of 36
processing giltis/suitcase. 30 of 36
processing giltis/tomopy. 31 of 36
processing giltis/userpackages. 32 of 36
processing giltis/vim-colorschemes. 33 of 36
processing giltis/VisTrails. 34 of 36
processing giltis/VTTools. 35 of 36
processing giltis/xray-vision. 36 of 36

In [52]:
commit_order = [
    skxray_commits,
    softmatter_commits,
    vistrails_commits,
    matplotlib_commits,
    areaDetector_commits,
    nsls2_commits,
    Nikea_commits,
    chx_commits,
    csx_commits,
    hxn_commits,
    srx_commits,
    ixs_commits,
    xpd_commits,
    synchbot_commits,
    ericdill_commits,
    danielballan_commits,
    dchabot_commits,
    arkilic_commits,
    cowanml_commits,
    klauer_commits,
    giltis_commits
]

In [53]:
from collections import defaultdict

In [54]:
df = defaultdict(deque)
for commits in commit_order:
    for column_name, column in commits.items():
        df[column_name].extend(column)
df = pd.DataFrame(df)
    
# dfs = {repo_name: pd.DataFrame({column_name: pd.Series(column) for column_name, column in repo_data.items()}) 
#        for repo_name, repo_data in repo_info.items()}

In [55]:
len(df)


Out[55]:
380817

In [56]:
cleaned_df = df.drop_duplicates('shas').copy()

In [57]:
len(cleaned_df)


Out[57]:
236270

In [58]:
# # remove the user name where the repo came from
# repo_names = [repo.split('/')[-1] for repo in cleaned_df.repo]
# cleaned_df.update({'repo': pd.Series(repo_names)})

In [59]:
map_people = {
    'daniel allan': 'danielballan',
    'arman arkilic': 'arkilic',
    'daron chabot': 'dchabot',
    'thomas caswell': 'tacaswell',
}

In [60]:
from collections import deque
authors = deque()
for idx, (author, committer) in enumerate(zip(cleaned_df.authors, cleaned_df.committers)):
    if author == 'unknown':
        author = committer
    author = author.lower()
    if author in map_people:
        author = map_people[author]
    authors.append(str(author).lower())
cleaned_df['authors'] = authors

In [61]:
cleaned_df.to_csv('cleaned-commit-info.csv')

In [ ]:


In [ ]: