In [1]:
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn import cross_validation
from sklearn.feature_selection import SelectKBest, chi2, RFECV
from sklearn.cross_validation import StratifiedKFold
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import make_pipeline
import pandas as pd
In [2]:
df_train = pd.read_csv('../Shelter_train.csv')
df_test = pd.read_csv('../Shelter_test.csv')
In [3]:
X = df_train.ix[:, :-1]
y = df_train.ix[:, -1]
df_test = df_test.drop('ID', 1)
In [4]:
clf = BaggingClassifier(LogisticRegression(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(clf, X, y, scoring="log_loss")
Out[4]:
In [5]:
X_new = SelectKBest(chi2, k=7).fit_transform(X, y)
cross_validation.cross_val_score(clf, X_new, y, scoring="log_loss")
Out[5]:
In [6]:
selector = SelectKBest(chi2, k=7)
predictor = make_pipeline(selector, clf)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()
Out[6]:
In [7]:
output.to_csv('../submission-Bagging-LogisticRegression.2.0.csv', index_label = 'ID')
In [8]:
svc = BaggingClassifier(SVC(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(svc, X, y, scoring="log_loss")
Out[8]:
In [9]:
X_new = SelectKBest(chi2, k=7).fit_transform(X, y)
cross_validation.cross_val_score(svc, X_new, y, scoring="log_loss")
Out[9]:
In [10]:
selectorSVC = SelectKBest(chi2, k=7)
predictor = make_pipeline(selectorSVC, svc)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()
Out[10]:
In [11]:
output.to_csv('../submission-Bagging-SVC.2.0.csv', index_label = 'ID')
In [12]:
decisionTree = BaggingClassifier(DecisionTreeClassifier(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(decisionTree, X, y, scoring="log_loss")
Out[12]:
In [13]:
X_new = SelectKBest(chi2, k=4).fit_transform(X, y)
cross_validation.cross_val_score(decisionTree, X_new, y, scoring="log_loss")
Out[13]:
In [14]:
selectorDTree = SelectKBest(chi2, k=4)
predictor = make_pipeline(selectorDTree, decisionTree)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()
Out[14]:
In [15]:
output.to_csv('../submission-Bagging-DecisionTree.2.0.csv', index_label = 'ID')
In [18]:
knn = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(knn, X, y, scoring="log_loss")
Out[18]:
In [19]:
X_new = SelectKBest(chi2, k=8).fit_transform(X, y)
cross_validation.cross_val_score(knn, X_new, y, scoring="log_loss")
Out[19]:
In [20]:
selectorKNN = SelectKBest(chi2, k=8)
predictor = make_pipeline(selectorKNN, knn)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()
Out[20]:
In [21]:
output.to_csv('../submission-Bagging-KNN.2.0.csv', index_label = 'ID')
In [22]:
gaussianNB = BaggingClassifier(GaussianNB(), max_samples=0.5, max_features=0.5)
cross_validation.cross_val_score(gaussianNB, X, y, scoring="log_loss")
Out[22]:
In [23]:
X_new = SelectKBest(chi2, k=4).fit_transform(X, y)
cross_validation.cross_val_score(gaussianNB, X_new, y, scoring="log_loss")
Out[23]:
In [24]:
selectorNB = SelectKBest(chi2, k=4)
predictor = make_pipeline(selectorNB, gaussianNB)
predictor.fit(X, y)
predictions = predictor.predict_proba(df_test)
output = pd.DataFrame(predictions, columns=['Adoption', 'Died', 'Euthanasia', 'Return_to_owner', 'Transfer'])
output.index.names = ['ID']
output.index += 1
output.head()
Out[24]:
In [25]:
output.to_csv('../submission-Bagging-Naive-Bayes.2.0.csv', index_label = 'ID')