In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import pylab as p
In [2]:
projects = pd.read_csv('../data/projects.csv')
outcome = pd.read_csv('../data/outcomes.csv')
In [3]:
projects = projects.merge(outcome, how = 'inner')
In [4]:
projects = projects.sort('date_posted')
In [5]:
le = LabelEncoder()
In [6]:
for i in range(0,projects.shape[0]):
projects.date_posted[i] = projects.date_posted[i][0:7]
In [7]:
projects['date_posted'] = le.fit_transform(projects.date_posted)
In [8]:
projects = projects[['date_posted','is_exciting']]
In [9]:
tot = projects[projects.date_posted == 0]
true = [float(tot[tot.is_exciting == 't'].count()[0])/float(tot.count()[0])]
for i in range(1,projects.date_posted.max()+1):
tot = projects[projects.date_posted == i]
true.append(float(tot[tot.is_exciting == 't'].count()[0])/float(tot.count()[0]))
In [10]:
p.plot(true)
Out[10]:
In [11]:
p.show()