Commit Happiness: Part II

Authors: Franco Bellomo, Adrian Price-Whelan, Madhura Killedar

Search Hackpad for links to GitHub repos, scrape commit messages, and analyse them


In [1]:
import getCommitMessages as gcm
import numpy as np
import textblob
from datetime import datetime
from pytz import timezone
import matplotlib as mpl
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
textfile = "./github_links.txt"
repo_links = np.loadtxt(textfile,dtype="string")

In [3]:
commit_infos = []
for link in repo_links:
    messages, times = gcm.getCommitInfo(link)
    user_repo = link[19:]
    repo_data = (messages,times,user_repo)
    commit_infos.append(repo_data)

In [4]:
all_commits = []
all_times = []
all_users = []
all_feelings = []
repos_feels = []

for repo_data in commit_infos:
    commits = repo_data[0]
    user_repo = repo_data[2]
    all_commits += commits
    all_times += repo_data[1]
    all_users += [repo_data[2]]*len(commits)
    feelings = []
    
    for message in commits:
        feel = textblob.TextBlob(message).sentiment.polarity
        all_feelings.append(feel)
        if not message.startswith("Merge"):
             feelings.append(feel)

    average_feels = sum(feelings)/len(feelings)
    repos_feels.append(average_feels)
    #print "%s has average feelings = %f"%(user_repo,average_feels)

In [5]:
len(all_users), len(all_commits), len(all_feelings)


Out[5]:
(912, 912, 912)

In [6]:
ind = np.argmax(np.array(all_feelings))
feels = all_feelings[ind]
commit = all_commits[ind]
user = all_users[ind]
print "user %s had the happiest commit (%f): \n %s"%(user,feels,commit)


user astrohackweek/astrohackweek2015 had the happiest commit (1.000000): 
 Added awesome blog link from Chris

In [7]:
ind = np.argmin(np.array(all_feelings))
feels = all_feelings[ind]
commit = all_commits[ind]
user = all_users[ind]
print "user %s had the saddest commit (%f): \n %s"%(user,feels,commit)


user pmelchior/skymapper had the saddest commit (-0.800000): 
 introduces base class ConicProjection

In [8]:
all_happiest = np.argmax(np.array(repos_feels))

for happy in all_happiest:
    happy_repo_data = commit_infos[happy]
    happy_user = happy_repo_data[2]
    commits = happy_repo_data[0]
    happinesses = []
    
    for commit in commits:
        happinesses.append(textblob.TextBlob(commit).sentiment.polarity)
        
    which_happiest = np.argmax(np.array(happinesses))
    happiest_commit = commits[which_happiest]
    happiest_time = happy_repo_data[1][which_happiest]
    print "%s is the happiest. Their happiest commit is: \n %s"%(happy_user,happiest_commit)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-8-e0c8e38e307f> in <module>()
      1 all_happiest = np.argmax(np.array(repos_feels))
      2 
----> 3 for happy in all_happiest:
      4     happy_repo_data = commit_infos[happy]
      5     happy_user = happy_repo_data[2]

TypeError: 'numpy.int64' object is not iterable

In [ ]:
all_saddest = np.argmin(np.array(repos_feels))

for sad in all_saddest:
    sad_repo_data = commit_infos[sad]
    sad_user = sad_repo_data[2]
    commits = sad_repo_data[0]
    sadnesses = []
    
    for commit in commits:
        sadnesses.append(textblob.TextBlob(commit).sentiment.polarity)
        
    which_saddest = np.argmin(np.array((sadnesses))
    saddest_commit = commits[which_saddest]
    saddest_time = sad_repo_data[1][which_saddest]
    
    print "%s is the saddest. Their saddest commit is: \n %s"%(sad_user,saddest_commit)

In [ ]:
all_datetimes = []

for time in all_times:
    dt_obj = datetime.strptime(time, "%Y-%m-%dT%H:%M:%SZ")
    all_datetimes.append(dt_obj)

In [ ]:
fig, ax = plt.subplots(figsize=(13,8))
all_datenums = mpl.dates.date2num(all_datetimes)
ax.plot_date(all_datenums, all_feelings, ms=4)
ax.set_xlim([datetime(2016,8,29).toordinal(), datetime(2016,9,3).toordinal()])
vldate = datetime(2016,8,30).toordinal()
ax.plot((vldate,vldate),(-1,1),'k--')
vldate = datetime(2016,8,31).toordinal()
ax.plot((vldate,vldate),(-1,1),'k--')
vldate = datetime(2016,9,1).toordinal()
ax.plot((vldate,vldate),(-1,1),'k--')
vldate = datetime(2016,9,2).toordinal()
ax.plot((vldate,vldate),(-1,1),'k--')
ax.set_xlabel("Time",fontsize=20)
ax.set_ylabel("Happiness",fontsize=20)
plt.gcf().autofmt_xdate()

In [ ]:
seconds_in_day = 3600*24
standard = datetime(2016,8,28)
#standard = timezone('US/Eastern').localize(datetime(2016,8,28))
#all_datesecs = [timezone('US/Eastern').localize(i) - standard for i in all_datetimes]
all_datesecs = [i - standard for i in all_datetimes]
all_seconds = [i.total_seconds()%seconds_in_day for i in all_datesecs]
fig2, ax2 = plt.subplots(figsize=(12,8))
ax2.scatter(all_seconds, all_feelings, s=18, color="#AA0000")
ax2.set_xlabel("Time of day (seconds)",fontsize=20)
ax2.set_ylabel("Happiness",fontsize=20)
ax2.set_xlim([0,seconds_in_day])
ax2.set_ylim([-1,1])

In [ ]: