In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [2]:
#Win dir
train = pd.read_csv(r'C:\Users\hrao\Documents\Personal\HK\Python\train.csv')
test = pd.read_csv(r'C:\Users\hrao\Documents\Personal\HK\Python\test.csv')
#Mac dir
#train = pd.read_csv(r'/Users/Harish/Documents/HK_Work/Python/Machine-Learning/train.csv')
#test = pd.read_csv(r'/Users/Harish/Documents/HK_Work/Python/Machine-Learning/test.csv')
In [3]:
train.head(n=3)
Out[3]:
In [4]:
train = train.fillna(0)
test = test.fillna(0)
In [5]:
train = train.drop(['Name','SibSp','Parch','Cabin','Ticket'],axis=1)
In [6]:
train.head(n=3)
Out[6]:
In [7]:
train['Sex'] = train['Sex'].map({'female':1,'male':0})
test['Sex'] = test['Sex'].map({'female':1,'male':0})
In [8]:
train['Embarked'] = train['Embarked'].map({'C':1,'Q':2,'S':3})
test['Embarked'] = test['Embarked'].map({'C':1,'Q':2,'S':3})
In [9]:
train.head(n=3)
Out[9]:
In [10]:
test = test.drop(['Name','SibSp','Parch','Cabin','Ticket'],axis=1)
In [11]:
test.head(n=3)
Out[11]:
In [12]:
train['Age']=train['Age'].fillna(0)
test['Age']=test['Age'].fillna(0)
train['Embarked']=train['Embarked'].fillna(0)
test['Embarked']=test['Embarked'].fillna(0)
In [13]:
#train.to_csv(r'C:\Users\hrao\Documents\Personal\HK\Machine-Learning\train_clean_data.csv')
#test.to_csv(r'C:\Users\hrao\Documents\Personal\HK\Machine-Learning\test_clean_data.csv')
In [36]:
X_train = train.drop(["Survived","PassengerId"], axis=1)
Y_train = train["Survived"]
X_test = test.drop("PassengerId", axis=1).copy()
#X_test = test
X_train.shape, Y_train.shape, X_test.shape
Out[36]:
In [37]:
X_train.head(n=5)
Out[37]:
In [38]:
Y_train.head(n=5)
Out[38]:
In [39]:
X_test.head(n=5)
Out[39]:
In [40]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(X_train, Y_train)
Out[40]:
In [41]:
prediction_lr = lr.predict(X_test)
accuracy_lr = lr.score(X_train, Y_train)
accuracy_lr
Out[41]:
In [42]:
prediction_passenger_lr = pd.DataFrame({'PassengerId':test['PassengerId'],'Survived':prediction_lr})
In [43]:
from sklearn import svm
lsvc = svm.SVC()
lsvc.fit(X_train, Y_train)
Out[43]:
In [44]:
prediction_svc = lsvc.predict(X_test)
In [45]:
accuracy_svc = lsvc.score(X_train,Y_train)
accuracy_svc
Out[45]:
In [47]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
In [48]:
knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(X_train, Y_train)
Y_pred = knn.predict(X_test)
acc_knn = round(knn.score(X_train, Y_train) * 100, 2)
acc_knn
Out[48]:
In [49]:
gaussian = GaussianNB()
gaussian.fit(X_train, Y_train)
Y_pred = gaussian.predict(X_test)
acc_gaussian = round(gaussian.score(X_train, Y_train) * 100, 2)
acc_gaussian
Out[49]:
In [50]:
perceptron = Perceptron()
perceptron.fit(X_train, Y_train)
Y_pred = perceptron.predict(X_test)
acc_perceptron = round(perceptron.score(X_train, Y_train) * 100, 2)
acc_perceptron
Out[50]:
In [51]:
random_forest = RandomForestClassifier(n_estimators=100)
random_forest.fit(X_train, Y_train)
Y_pred = random_forest.predict(X_test)
random_forest.score(X_train, Y_train)
acc_random_forest = round(random_forest.score(X_train, Y_train) * 100, 2)
acc_random_forest
Out[51]: