In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import pylab as plt

In [2]:
projects = pd.read_csv('../data/projects.csv')
outcome = pd.read_csv('../data/outcomes.csv')

In [3]:
projects = projects.merge(outcome, how = 'inner')

In [4]:
projects = projects.sort('date_posted')

In [5]:
projects.columns


Out[5]:
Index([projectid, teacher_acctid, schoolid, school_ncesid, school_latitude, school_longitude, school_city, school_state, school_zip, school_metro, school_district, school_county, school_charter, school_magnet, school_year_round, school_nlns, school_kipp, school_charter_ready_promise, teacher_prefix, teacher_teach_for_america, teacher_ny_teaching_fellow, primary_focus_subject, primary_focus_area, secondary_focus_subject, secondary_focus_area, resource_type, poverty_level, grade_level, fulfillment_labor_materials, total_price_excluding_optional_support, total_price_including_optional_support, students_reached, eligible_double_your_impact_match, eligible_almost_home_match, date_posted, is_exciting, at_least_1_teacher_referred_donor, fully_funded, at_least_1_green_donation, great_chat, three_or_more_non_teacher_referred_donors, one_non_teacher_referred_donor_giving_100_plus, donation_from_thoughtful_donor, great_messages_proportion, teacher_referred_count, non_teacher_referred_count], dtype=object)

In [6]:
projects = projects[['date_posted','is_exciting','at_least_1_teacher_referred_donor', 'fully_funded','at_least_1_green_donation','great_chat',
'three_or_more_non_teacher_referred_donors', 'one_non_teacher_referred_donor_giving_100_plus','donation_from_thoughtful_donor']]

In [7]:
le = LabelEncoder()

In [8]:
for i in range(0,projects.shape[0]):
    projects.date_posted[i] = projects.date_posted[i][0:7]

In [9]:
projects['date_posted'] = le.fit_transform(projects.date_posted)

In [10]:
plotNumb = 1
for col in projects.columns:
    if(col != 'date_posted'):
        tot = projects[projects.date_posted == 0]
        true = [float(tot[tot[col] == 't'].count()[0])/float(tot.count()[0])]
    
        for i in range(1,projects.date_posted.max()+1):
            tot = projects[projects.date_posted == i]
            true.append(float(tot[tot[col] == 't'].count()[0])/float(tot.count()[0]))
        
        plt.subplot(projects.shape[1]/2, 2, plotNumb)
        plt.plot(true,'b.-')
        plt.title(col)
        plotNumb = plotNumb + 1

In [11]:
plt.show()

In [11]: