Shelter Animal Outcomes 6

Support Vector Machines


In [1]:
from sklearn.svm import SVC
from sklearn import cross_validation
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.pipeline import make_pipeline
import pandas as pd

In [2]:
df_train = pd.read_csv('../Shelter_train.csv')
df_test = pd.read_csv('../Shelter_test.csv')

In [3]:
X = df_train.ix[:, :-1]
y = df_train.ix[:, -1]
df_test = df_test.drop('ID', 1)

In [4]:
clf = SVC(probability=True)
cross_validation.cross_val_score(clf, X, y, scoring="log_loss")


Out[4]:
array([-0.99546749, -0.98174512, -0.97886519])

In [5]:
%timeit clf.fit(X, y)


1 loop, best of 3: 3min 48s per loop

In [9]:
X_new = SelectKBest(chi2, k=7).fit_transform(X, y)
cross_validation.cross_val_score(clf, X_new, y, scoring="log_loss")


Out[9]:
array([-0.99748181, -0.98330549, -0.98156068])

In [10]:
selector = SelectKBest(chi2, k=7)
predictor = make_pipeline(selector, clf)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()


Out[10]:
Adoption Died Euthanasia Return_to_owner Transfer
1 0.017044 0.008535 0.073380 0.114324 0.786717
2 0.514006 0.004215 0.036188 0.211757 0.233833
3 0.591613 0.006544 0.039136 0.118934 0.243773
4 0.108488 0.007611 0.064966 0.378202 0.440732
5 0.566251 0.005415 0.035276 0.233439 0.159619

In [11]:
output.to_csv('../submission-SVC.2.0.csv', index_label = 'ID')