Shelter Animal Outcomes 11

Bagging


In [1]:
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB 
from sklearn import cross_validation
from sklearn.feature_selection import SelectKBest, chi2, RFECV
from sklearn.cross_validation import StratifiedKFold
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import make_pipeline
import pandas as pd

In [2]:
df_train = pd.read_csv('../Shelter_train.csv')
df_test = pd.read_csv('../Shelter_test.csv')

In [3]:
X = df_train.ix[:, :-1]
y = df_train.ix[:, -1]
df_test = df_test.drop('ID', 1)

Logistic Regression


In [4]:
clf = BaggingClassifier(LogisticRegression(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(clf, X, y, scoring="log_loss")


Out[4]:
array([-1.03859505, -1.05642992, -1.08024019])

In [5]:
X_new = SelectKBest(chi2, k=7).fit_transform(X, y)
cross_validation.cross_val_score(clf, X_new, y, scoring="log_loss")


Out[5]:
array([-1.06820144, -1.07306632, -1.09494661])

In [6]:
selector = SelectKBest(chi2, k=7)
predictor = make_pipeline(selector, clf)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()


Out[6]:
Adoption Died Euthanasia Return_to_owner Transfer
1 0.307454 0.006654 0.070184 0.210140 0.405568
2 0.468738 0.001875 0.044335 0.319245 0.165807
3 0.510772 0.007821 0.043575 0.095783 0.342049
4 0.339458 0.007107 0.063332 0.189334 0.400768
5 0.513114 0.004399 0.033684 0.232800 0.216002

In [7]:
output.to_csv('../submission-Bagging-LogisticRegression.2.0.csv', index_label = 'ID')

SVC


In [8]:
svc = BaggingClassifier(SVC(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(svc, X, y, scoring="log_loss")


Out[8]:
array([-9.96199163, -8.57309252, -8.83333762])

In [9]:
X_new = SelectKBest(chi2, k=7).fit_transform(X, y)
cross_validation.cross_val_score(svc, X_new, y, scoring="log_loss")


Out[9]:
array([-8.3853414 , -9.92669214, -8.38454693])

In [10]:
selectorSVC = SelectKBest(chi2, k=7)
predictor = make_pipeline(selectorSVC, svc)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()


Out[10]:
Adoption Died Euthanasia Return_to_owner Transfer
1 0.4 0.0 0.0 0.1 0.5
2 1.0 0.0 0.0 0.0 0.0
3 0.7 0.0 0.0 0.0 0.3
4 0.3 0.0 0.0 0.1 0.6
5 0.9 0.0 0.0 0.0 0.1

In [11]:
output.to_csv('../submission-Bagging-SVC.2.0.csv', index_label = 'ID')

Decision Tree


In [12]:
decisionTree = BaggingClassifier(DecisionTreeClassifier(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(decisionTree, X, y, scoring="log_loss")


Out[12]:
array([-0.99228152, -1.04551678, -1.00705403])

In [13]:
X_new = SelectKBest(chi2, k=4).fit_transform(X, y)
cross_validation.cross_val_score(decisionTree, X_new, y, scoring="log_loss")


Out[13]:
array([-1.00795675, -0.99000558, -1.01431619])

In [14]:
selectorDTree = SelectKBest(chi2, k=4)
predictor = make_pipeline(selectorDTree, decisionTree)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()


Out[14]:
Adoption Died Euthanasia Return_to_owner Transfer
1 0.252313 0.005126 0.077127 0.233295 0.432140
2 0.503346 0.000730 0.030821 0.268996 0.196107
3 0.504070 0.005231 0.054165 0.147862 0.288672
4 0.239439 0.013327 0.108950 0.200081 0.438203
5 0.497003 0.002079 0.055932 0.221618 0.223368

In [15]:
output.to_csv('../submission-Bagging-DecisionTree.2.0.csv', index_label = 'ID')

KNN


In [18]:
knn = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(knn, X, y, scoring="log_loss")


Out[18]:
array([-1.21654176, -1.34443792, -1.440332  ])

In [19]:
X_new = SelectKBest(chi2, k=8).fit_transform(X, y)
cross_validation.cross_val_score(knn, X_new, y, scoring="log_loss")


Out[19]:
array([-1.3564969 , -1.24266913, -1.34157001])

In [20]:
selectorKNN = SelectKBest(chi2, k=8)
predictor = make_pipeline(selectorKNN, knn)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()


Out[20]:
Adoption Died Euthanasia Return_to_owner Transfer
1 0.26 0.0 0.02 0.30 0.42
2 0.56 0.0 0.10 0.12 0.22
3 0.56 0.0 0.04 0.12 0.28
4 0.36 0.0 0.16 0.20 0.28
5 0.50 0.0 0.04 0.24 0.22

In [21]:
output.to_csv('../submission-Bagging-KNN.2.0.csv', index_label = 'ID')

Naive Bayes


In [22]:
gaussianNB = BaggingClassifier(GaussianNB(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(gaussianNB, X, y, scoring="log_loss")


Out[22]:
array([-1.18349602, -1.20159273, -1.36292606])

In [23]:
X_new = SelectKBest(chi2, k=4).fit_transform(X, y)
cross_validation.cross_val_score(gaussianNB, X_new, y, scoring="log_loss")


Out[23]:
array([-1.02663551, -1.02429311, -1.02377623])

In [24]:
selectorNB = SelectKBest(chi2, k=4)
predictor = make_pipeline(selectorNB, gaussianNB)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()


Out[24]:
Adoption Died Euthanasia Return_to_owner Transfer
1 0.141223 0.004971 0.086521 0.274160 0.493125
2 0.471021 0.002381 0.036342 0.313238 0.177017
3 0.552440 0.009027 0.047226 0.055115 0.336192
4 0.213122 0.007911 0.078015 0.306603 0.394349
5 0.507467 0.001545 0.029447 0.319208 0.142332

In [25]:
output.to_csv('../submission-Bagging-Naive-Bayes.2.0.csv', index_label = 'ID')