In [1]:
from sklearn.svm import SVC
from sklearn import cross_validation
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.pipeline import make_pipeline
import pandas as pd
In [2]:
df_train = pd.read_csv('../Shelter_train.csv')
df_test = pd.read_csv('../Shelter_test.csv')
In [3]:
X = df_train.ix[:, :-1]
y = df_train.ix[:, -1]
df_test = df_test.drop('ID', 1)
In [4]:
clf = SVC(probability=True)
cross_validation.cross_val_score(clf, X, y, scoring="log_loss")
Out[4]:
In [5]:
%timeit clf.fit(X, y)
In [9]:
X_new = SelectKBest(chi2, k=7).fit_transform(X, y)
cross_validation.cross_val_score(clf, X_new, y, scoring="log_loss")
Out[9]:
In [10]:
selector = SelectKBest(chi2, k=7)
predictor = make_pipeline(selector, clf)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()
Out[10]:
In [11]:
output.to_csv('../submission-SVC.2.0.csv', index_label = 'ID')