In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn import cross_validation
from sklearn.metrics import roc_auc_score
import re
In [2]:
#donations = pd.read_csv('../data/donations.csv').sort('projectid')
projects = pd.read_csv('../data/projects.csv').sort('projectid')
#outcomes = pd.read_csv('../data/outcomes.csv').sort('projectid')
resources = pd.read_csv('../data/resources.csv')
#sample = pd.read_csv('../data/sampleSubmission.csv').sort('projectid')
#essays = pd.read_csv('../data/essays.csv').sort('projectid')
In [3]:
s = resources.project_resource_type.unique()
In [4]:
s.size
Out[4]:
In [5]:
arr = np.zeros((projects.shape[0],7))
In [6]:
arr.shape
Out[6]:
In [7]:
le = LabelEncoder()
In [8]:
le.fit(projects.projectid)
Out[8]:
In [ ]: