In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import pylab as p

In [2]:
projects = pd.read_csv('../data/projects.csv')
outcome = pd.read_csv('../data/outcomes.csv')

In [3]:
projects = projects.merge(outcome, how = 'inner')

In [4]:
projects = projects.sort('date_posted')

In [5]:
le = LabelEncoder()

In [6]:
for i in range(0,projects.shape[0]):
    projects.date_posted[i] = projects.date_posted[i][0:7]

In [7]:
projects['date_posted'] = le.fit_transform(projects.date_posted)

In [8]:
projects = projects[['date_posted','is_exciting']]

In [9]:
tot = projects[projects.date_posted == 0]
true = [float(tot[tot.is_exciting == 't'].count()[0])/float(tot.count()[0])]
for i in range(1,projects.date_posted.max()+1):
    tot = projects[projects.date_posted == i]
    true.append(float(tot[tot.is_exciting == 't'].count()[0])/float(tot.count()[0]))

In [10]:
p.plot(true)


Out[10]:
[<matplotlib.lines.Line2D at 0xee98350>]

In [11]:
p.show()