In [282]:
import string
from github import Github
g = Github('$USER','$PASSWORD')

In [293]:
github_astronomy = g.legacy_search_repos('astronomy')
github_space = g.legacy_search_repos('space')
github_astro = g.legacy_search_repos('astro')
github_astrophysics = g.legacy_search_repos('astrophysics')

In [294]:
def get_repo_dict(github_instance):
    output_dict = {}
    for repo in github_instance:
        output_dict[repo.name] = repo._rawData
        output_dict[repo.name].update({'updated_at':repo.updated_at})
    return output_dict

In [295]:
output_dict = get_repo_dict(g.legacy_search_repos('astrophysics'))

In [318]:
output_dict.update(get_repo_dict(g.legacy_search_repos('radio astronomy')))

In [337]:
output_dict.update(get_repo_dict(g.legacy_search_repos('cosmology')))

In [372]:
output_dict.update(get_repo_dict(g.legacy_search_repos('gamma ray')))

In [377]:
output_dict.update(get_repo_dict(g.legacy_search_repos('hubble space telescope')))

In [394]:
github_astro = g.legacy_search_repos('astro')

In [395]:
def sort_by_key(key):
    return sorted(output_dict.iteritems(), key=lambda item: item[1][key], reverse=True)

Basic Analysis


In [396]:
print 'Total Repos: {}'.format(len(output_dict))


Total Repos: 167

Top-10 Lists


In [397]:
def print_top_ten(key): 
    top_ten = ['{} : {}'.format(item[1]['name'],item[1][key]) for item in sort_by_key(key)][0:10]
    for item in top_ten:
        print item

In [398]:
print_top_ten('size')


cosmo-ML : 513412
outreach : 202832
gICLEAN : 149032
estar-project : 75316
Ay190 : 70100
CosmologyExercise : 60104
AstroPhysics : 53328
asterisk : 38575
carma-miriad : 29145
fermi-hero : 28506

In [399]:
print_top_ten('forks')


astroML : 21
GammaRay : 16
CosmoloPy : 10
astropysics : 10
astro : 7
cosmotools : 5
236-Notes : 4
diskEvolution : 4
xGPU : 3
2013-08-23-harvard : 3

In [400]:
print_top_ten('watchers')


GammaRay : 80
astroML : 65
astropysics : 24
CosmoloPy : 16
grif : 10
DAL1 : 8
ccogs : 7
cubep3m : 7
diskEvolution : 6
IRA : 6

In [401]:
print_top_ten('open_issues')


gammapy : 29
gamma-speed : 16
DAL1 : 13
sncosmo : 12
fermi-hero : 10
GammaRay : 10
astroML : 6
AstroTaverna-AA-article : 3
miriad-python : 3
planets : 3

In [402]:
print_top_ten('updated_at')


cosmotools : 2013-09-17 21:34:02
ADS_Collab_Map : 2013-09-17 20:49:18
cosmoslik : 2013-09-17 20:35:31
GammaRay : 2013-09-17 18:15:52
RadiogenicHeatProduction : 2013-09-17 17:08:45
homebrew-tap : 2013-09-17 16:57:44
gICLEAN : 2013-09-17 15:42:11
pygrb : 2013-09-17 08:13:03
sncosmo : 2013-09-17 03:58:50
megalib : 2013-09-16 23:36:00

In [403]:
print output_dict['astroML']


{'fork': False, 'watchers': 65, 'description': u'Machine learning, statistics, and data mining for astronomy and astrophysics', 'language': u'Python', 'has_downloads': True, 'url': u'/repos/astroML/astroML', 'created_at': u'2012-10-17T22:33:50Z', 'updated_at': datetime.datetime(2013, 9, 16, 6, 19, 3), 'private': False, 'name': u'astroML', 'pushed_at': u'2013-08-24T16:22:00Z', 'open_issues': 6, 'has_wiki': True, 'owner': {'url': u'/users/astroML', 'login': u'astroML'}, 'has_issues': True, 'forks': 21, 'homepage': None, 'size': 2245}

Description Analysis


In [386]:
ARTICLES = ['', 'a', 'an', 'at', 'the', 'that', 'this']
COMMON = ['as', 'astronomy', 'astrophysics', 'cosmology', 'cosmological']
CONJUNCTIONS = ['about', 'and', 'but', 'or']
PREPOSITIONS = ['by', 'from', 'for', 'in', 'of', 'on', 'with', 'to']
PRONOUNS = ['i', 'it', 'my', 'your']
VERBS = ['have', 'is', 'using']
SKIP_WORD_LIST = ARTICLES + COMMON + CONJUNCTIONS + PREPOSITIONS + PRONOUNS + VERBS

def get_word_dict():
    word_dict = {}
    for line in output_dict.itervalues():
        if line['description'] != None:
            for word in line['description'].split():
                word = word.encode('ascii','replace')
                word = word.translate(None, string.punctuation)
                word = word.lower()
                if word not in SKIP_WORD_LIST:
                    if word_dict.has_key(word):
                        word_dict[word] += 1
                    else:
                        word_dict[word] = 1
    return word_dict

def plot_tuples(tuples_to_plot):
    plt.clf()
    plt.grid(True)
    plt.plot(range(len(tuples_to_plot)), [item[1] for item in tuples_to_plot], 'r.')
    for x, item in enumerate(tuples_to_plot):
        plt.text(x, item[1] + 3, item[0], rotation=70)
    plt.draw()

word_dict = get_word_dict()
tuples_to_plot = [item for item in word_dict.iteritems() if item[1] > 3]
tuples_to_plot.sort(key=lambda tup: tup[1], reverse=True)

In [388]:
%pylab


Welcome to pylab, a matplotlib-based Python environment [backend: MacOSX].
For more information, type 'help(pylab)'.

In [349]:


In [ ]: