Shelter Animal Outcomes 11

Bagging



In [1]:

    
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB 
from sklearn import cross_validation
from sklearn.feature_selection import SelectKBest, chi2, RFECV
from sklearn.cross_validation import StratifiedKFold
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import make_pipeline
import pandas as pd



In [2]:

    
df_train = pd.read_csv('../Shelter_train.csv')
df_test = pd.read_csv('../Shelter_test.csv')



In [3]:

    
X = df_train.ix[:, :-1]
y = df_train.ix[:, -1]
df_test = df_test.drop('ID', 1)

Logistic Regression



In [4]:

    
clf = BaggingClassifier(LogisticRegression(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(clf, X, y, scoring="log_loss")









    Out[4]:





array([-1.03859505, -1.05642992, -1.08024019])



In [5]:

    
X_new = SelectKBest(chi2, k=7).fit_transform(X, y)
cross_validation.cross_val_score(clf, X_new, y, scoring="log_loss")









    Out[5]:





array([-1.06820144, -1.07306632, -1.09494661])



In [6]:

    
selector = SelectKBest(chi2, k=7)
predictor = make_pipeline(selector, clf)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()









    Out[6]:






  
    
      
      Adoption
      Died
      Euthanasia
      Return_to_owner
      Transfer
    
  
  
    
      1
      0.307454
      0.006654
      0.070184
      0.210140
      0.405568
    
    
      2
      0.468738
      0.001875
      0.044335
      0.319245
      0.165807
    
    
      3
      0.510772
      0.007821
      0.043575
      0.095783
      0.342049
    
    
      4
      0.339458
      0.007107
      0.063332
      0.189334
      0.400768
    
    
      5
      0.513114
      0.004399
      0.033684
      0.232800
      0.216002



In [7]:

    
output.to_csv('../submission-Bagging-LogisticRegression.2.0.csv', index_label = 'ID')

SVC



In [8]:

    
svc = BaggingClassifier(SVC(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(svc, X, y, scoring="log_loss")









    Out[8]:





array([-9.96199163, -8.57309252, -8.83333762])



In [9]:

    
X_new = SelectKBest(chi2, k=7).fit_transform(X, y)
cross_validation.cross_val_score(svc, X_new, y, scoring="log_loss")









    Out[9]:





array([-8.3853414 , -9.92669214, -8.38454693])



In [10]:

    
selectorSVC = SelectKBest(chi2, k=7)
predictor = make_pipeline(selectorSVC, svc)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()









    Out[10]:






  
    
      
      Adoption
      Died
      Euthanasia
      Return_to_owner
      Transfer
    
  
  
    
      1
      0.4
      0.0
      0.0
      0.1
      0.5
    
    
      2
      1.0
      0.0
      0.0
      0.0
      0.0
    
    
      3
      0.7
      0.0
      0.0
      0.0
      0.3
    
    
      4
      0.3
      0.0
      0.0
      0.1
      0.6
    
    
      5
      0.9
      0.0
      0.0
      0.0
      0.1



In [11]:

    
output.to_csv('../submission-Bagging-SVC.2.0.csv', index_label = 'ID')

Decision Tree



In [12]:

    
decisionTree = BaggingClassifier(DecisionTreeClassifier(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(decisionTree, X, y, scoring="log_loss")









    Out[12]:





array([-0.99228152, -1.04551678, -1.00705403])



In [13]:

    
X_new = SelectKBest(chi2, k=4).fit_transform(X, y)
cross_validation.cross_val_score(decisionTree, X_new, y, scoring="log_loss")









    Out[13]:





array([-1.00795675, -0.99000558, -1.01431619])



In [14]:

    
selectorDTree = SelectKBest(chi2, k=4)
predictor = make_pipeline(selectorDTree, decisionTree)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()









    Out[14]:






  
    
      
      Adoption
      Died
      Euthanasia
      Return_to_owner
      Transfer
    
  
  
    
      1
      0.252313
      0.005126
      0.077127
      0.233295
      0.432140
    
    
      2
      0.503346
      0.000730
      0.030821
      0.268996
      0.196107
    
    
      3
      0.504070
      0.005231
      0.054165
      0.147862
      0.288672
    
    
      4
      0.239439
      0.013327
      0.108950
      0.200081
      0.438203
    
    
      5
      0.497003
      0.002079
      0.055932
      0.221618
      0.223368



In [15]:

    
output.to_csv('../submission-Bagging-DecisionTree.2.0.csv', index_label = 'ID')

KNN



In [18]:

    
knn = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(knn, X, y, scoring="log_loss")









    Out[18]:





array([-1.21654176, -1.34443792, -1.440332  ])



In [19]:

    
X_new = SelectKBest(chi2, k=8).fit_transform(X, y)
cross_validation.cross_val_score(knn, X_new, y, scoring="log_loss")









    Out[19]:





array([-1.3564969 , -1.24266913, -1.34157001])



In [20]:

    
selectorKNN = SelectKBest(chi2, k=8)
predictor = make_pipeline(selectorKNN, knn)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()









    Out[20]:






  
    
      
      Adoption
      Died
      Euthanasia
      Return_to_owner
      Transfer
    
  
  
    
      1
      0.26
      0.0
      0.02
      0.30
      0.42
    
    
      2
      0.56
      0.0
      0.10
      0.12
      0.22
    
    
      3
      0.56
      0.0
      0.04
      0.12
      0.28
    
    
      4
      0.36
      0.0
      0.16
      0.20
      0.28
    
    
      5
      0.50
      0.0
      0.04
      0.24
      0.22



In [21]:

    
output.to_csv('../submission-Bagging-KNN.2.0.csv', index_label = 'ID')

Naive Bayes



In [22]:

    
gaussianNB = BaggingClassifier(GaussianNB(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(gaussianNB, X, y, scoring="log_loss")









    Out[22]:





array([-1.18349602, -1.20159273, -1.36292606])



In [23]:

    
X_new = SelectKBest(chi2, k=4).fit_transform(X, y)
cross_validation.cross_val_score(gaussianNB, X_new, y, scoring="log_loss")









    Out[23]:





array([-1.02663551, -1.02429311, -1.02377623])



In [24]:

    
selectorNB = SelectKBest(chi2, k=4)
predictor = make_pipeline(selectorNB, gaussianNB)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()









    Out[24]:






  
    
      
      Adoption
      Died
      Euthanasia
      Return_to_owner
      Transfer
    
  
  
    
      1
      0.141223
      0.004971
      0.086521
      0.274160
      0.493125
    
    
      2
      0.471021
      0.002381
      0.036342
      0.313238
      0.177017
    
    
      3
      0.552440
      0.009027
      0.047226
      0.055115
      0.336192
    
    
      4
      0.213122
      0.007911
      0.078015
      0.306603
      0.394349
    
    
      5
      0.507467
      0.001545
      0.029447
      0.319208
      0.142332



In [25]:

    
output.to_csv('../submission-Bagging-Naive-Bayes.2.0.csv', index_label = 'ID')

	Adoption	Died	Euthanasia	Return_to_owner	Transfer
1	0.307454	0.006654	0.070184	0.210140	0.405568
2	0.468738	0.001875	0.044335	0.319245	0.165807
3	0.510772	0.007821	0.043575	0.095783	0.342049
4	0.339458	0.007107	0.063332	0.189334	0.400768
5	0.513114	0.004399	0.033684	0.232800	0.216002

	Adoption	Return_to_owner	Transfer
1	0.4	0.1	0.5
2	1.0	0.0	0.0
3	0.7	0.0	0.3
4	0.3	0.1	0.6
5	0.9	0.0	0.1

	Adoption	Died	Euthanasia	Return_to_owner	Transfer
1	0.252313	0.005126	0.077127	0.233295	0.432140
2	0.503346	0.000730	0.030821	0.268996	0.196107
3	0.504070	0.005231	0.054165	0.147862	0.288672
4	0.239439	0.013327	0.108950	0.200081	0.438203
5	0.497003	0.002079	0.055932	0.221618	0.223368

	Adoption	Euthanasia	Return_to_owner	Transfer
1	0.26	0.02	0.30	0.42
2	0.56	0.10	0.12	0.22
3	0.56	0.04	0.12	0.28
4	0.36	0.16	0.20	0.28
5	0.50	0.04	0.24	0.22

	Adoption	Died	Euthanasia	Return_to_owner	Transfer
1	0.141223	0.004971	0.086521	0.274160	0.493125
2	0.471021	0.002381	0.036342	0.313238	0.177017
3	0.552440	0.009027	0.047226	0.055115	0.336192
4	0.213122	0.007911	0.078015	0.306603	0.394349
5	0.507467	0.001545	0.029447	0.319208	0.142332

	Adoption	Return_to_owner	Transfer
1	0.4	0.1	0.5
2	1.0	0.0	0.0
3	0.7	0.0	0.3
4	0.3	0.1	0.6
5	0.9	0.0	0.1

	Adoption	Return_to_owner	Transfer
1	0.4	0.1	0.5
2	1.0	0.0	0.0
3	0.7	0.0	0.3
4	0.3	0.1	0.6
5	0.9	0.0	0.1