In [1]:
try:
    from urllib.request import urlopen, Request
except ImportError:
    from urllib2 import urlopen, Request
from bs4 import BeautifulSoup

In [32]:
def getCommitInfo(url):
    """
    Get all the commits name and the time from a GitHub repo
    
    Parameters
    ----------
    url: str
      Link to repository
      
    Return
    ------
    names: like-array
      Array with all the commits title
    times: like-array
      Array with the time of the commits
    """
    if not url.endswith('/'):
        url += '/'
    url += 'commits/master'
    
    # Get the url
    html = urlopen(url)
    # Create the parser
    soup = BeautifulSoup(html.read(), 'html.parser')
    
    # Table with all the commits
    class_prop = {"class":"commits-listing commits-listing-padded js-navigation-container js-active-navigation-container"}
    commits_table = soup.find('div', attrs=class_prop)
    
    new_names = []
    for text_box in commits_table.find_all('p', attrs={'class': 'commit-title'}):
        for text in text_box.find('a', attrs={'class': "message"}):
        #    print(text.attrs['title'])
            new_names.append(text)
    
    # Commit title
    #names = []
    #for title in commits_table.find_all('a', attrs={'class': "message"}):
    #    names.append(title.attrs['title'])
    
    # Commit time
    times = []
    for time in commits_table.find_all('relative-time'):
        times.append(time.attrs['datetime'])

    return new_names, times

In [9]:
url = 'https://github.com/rudyphd/X-class'
getCommitInfo(url)


Out[9]:
([u'Adding Notebook',
  u'Changing tabs to spaces',
  u'Next data and hacking files',
  u'Python Notebook for Classifying Attempt',
  u'Moving things around.',
  u'Updating README',
  u'Adding essential data and script files',
  u'Update README.md',
  u'Update README.md',
  u'Update README.md',
  u'Adding some results files...',
  u'first commit'],
 [u'2016-09-02T00:22:39Z',
  u'2016-09-02T00:17:17Z',
  u'2016-09-02T00:11:18Z',
  u'2016-08-31T01:35:07Z',
  u'2016-08-31T00:48:20Z',
  u'2016-08-31T00:46:28Z',
  u'2016-08-31T00:44:13Z',
  u'2016-08-31T00:14:23Z',
  u'2016-08-31T00:07:30Z',
  u'2016-08-31T00:06:47Z',
  u'2016-08-31T00:01:59Z',
  u'2016-08-30T23:59:10Z'])

In [35]:
url = 'https://github.com/astrohackweek/astrohackweek2016'
names, times = getCommitInfo(url)
print(len(names), len(times))


(31, 31)

In [36]:
names


Out[36]:
[u'Merge pull request',
 u'Adding Theano example to autodiff notebook',
 u'move autograd material to top',
 u'git ignore notebook checkpoints',
 u'add forward diff notebook',
 u'add autograd material to autodiff breakout',
 u'Merge pull request',
 u'tensor flow and autograd.',
 u'Merge pull request',
 u'Merge pull request',
 u'optimization slides added (grigor)',
 u'The notebook that produced the figure in optimization-concept.svg.',
 u'Add the slides for optimization basic concepts.',
 u'Merge pull request',
 u'Merge pull request',
 u'adding sampling notebooks',
 u'Merge pull request',
 u'The exercise and pdf with slides',
 u"Merge remote-tracking branch 'AstroHackWeek/master'",
 u'Added maths and GPy kernel comparison',
 u'Merge pull request',
 u'Added manual GP computation',
 u'Merge pull request',
 u'Add interactive jupyter notebook tutorials',
 u'add gaussian process breakout notebook',
 u'Merge pull request',
 u'added some outlier detection',
 u'initial commit of lecture material for machine learning',
 u'add gitter chat badge to README',
 u'create directories for materials',
 u'Initial commit']

In [ ]: