In [1]:
    
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB 
from sklearn import cross_validation
from sklearn.feature_selection import SelectKBest, chi2, RFECV
from sklearn.cross_validation import StratifiedKFold
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import make_pipeline
import pandas as pd
    
In [2]:
    
df_train = pd.read_csv('../Shelter_train.csv')
df_test = pd.read_csv('../Shelter_test.csv')
    
In [3]:
    
X = df_train.ix[:, :-1]
y = df_train.ix[:, -1]
df_test = df_test.drop('ID', 1)
    
In [4]:
    
clf = BaggingClassifier(LogisticRegression(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(clf, X, y, scoring="log_loss")
    
    Out[4]:
In [5]:
    
X_new = SelectKBest(chi2, k=7).fit_transform(X, y)
cross_validation.cross_val_score(clf, X_new, y, scoring="log_loss")
    
    Out[5]:
In [6]:
    
selector = SelectKBest(chi2, k=7)
predictor = make_pipeline(selector, clf)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()
    
    Out[6]:
In [7]:
    
output.to_csv('../submission-Bagging-LogisticRegression.2.0.csv', index_label = 'ID')
    
In [8]:
    
svc = BaggingClassifier(SVC(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(svc, X, y, scoring="log_loss")
    
    Out[8]:
In [9]:
    
X_new = SelectKBest(chi2, k=7).fit_transform(X, y)
cross_validation.cross_val_score(svc, X_new, y, scoring="log_loss")
    
    Out[9]:
In [10]:
    
selectorSVC = SelectKBest(chi2, k=7)
predictor = make_pipeline(selectorSVC, svc)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()
    
    Out[10]:
In [11]:
    
output.to_csv('../submission-Bagging-SVC.2.0.csv', index_label = 'ID')
    
In [12]:
    
decisionTree = BaggingClassifier(DecisionTreeClassifier(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(decisionTree, X, y, scoring="log_loss")
    
    Out[12]:
In [13]:
    
X_new = SelectKBest(chi2, k=4).fit_transform(X, y)
cross_validation.cross_val_score(decisionTree, X_new, y, scoring="log_loss")
    
    Out[13]:
In [14]:
    
selectorDTree = SelectKBest(chi2, k=4)
predictor = make_pipeline(selectorDTree, decisionTree)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()
    
    Out[14]:
In [15]:
    
output.to_csv('../submission-Bagging-DecisionTree.2.0.csv', index_label = 'ID')
    
In [18]:
    
knn = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(knn, X, y, scoring="log_loss")
    
    Out[18]:
In [19]:
    
X_new = SelectKBest(chi2, k=8).fit_transform(X, y)
cross_validation.cross_val_score(knn, X_new, y, scoring="log_loss")
    
    Out[19]:
In [20]:
    
selectorKNN = SelectKBest(chi2, k=8)
predictor = make_pipeline(selectorKNN, knn)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()
    
    Out[20]:
In [21]:
    
output.to_csv('../submission-Bagging-KNN.2.0.csv', index_label = 'ID')
    
In [22]:
    
gaussianNB = BaggingClassifier(GaussianNB(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(gaussianNB, X, y, scoring="log_loss")
    
    Out[22]:
In [23]:
    
X_new = SelectKBest(chi2, k=4).fit_transform(X, y)
cross_validation.cross_val_score(gaussianNB, X_new, y, scoring="log_loss")
    
    Out[23]:
In [24]:
    
selectorNB = SelectKBest(chi2, k=4)
predictor = make_pipeline(selectorNB, gaussianNB)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()
    
    Out[24]:
In [25]:
    
output.to_csv('../submission-Bagging-Naive-Bayes.2.0.csv', index_label = 'ID')