In [15]:
import pandas as pd
import numpy as np
In [9]:
data = pd.read_csv('resources/titanic.csv')
In [10]:
print(data.head(10))
In [11]:
prepared_data = data[["Pclass", "Fare", "Age", "Sex"]]
In [13]:
print(type(prepared_data))
print(prepared_data.head(10))
In [35]:
pdf1 = prepared_data[np.isfinite(prepared_data['Age'])]
pdf = pdf1[pdf1['Sex'].notnull()]
from sklearn.preprocessing import LabelEncoder
label = LabelEncoder()
dicts = {}
label.fit(pdf.Sex.drop_duplicates())
dicts['Sex'] = list(label.classes_)
pdf.Sex = label.transform(pdf.Sex)
print(pdf.head(10))
print(len(pdf))
In [33]:
surv = data[["Survived", "Age", "Sex"]]
surv1 = surv[np.isfinite(surv['Age'])]
survived = surv1[surv1['Sex'].notnull()]
survived.Sex = survived
print(survived.head(10))
print(len(survived))
In [37]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(random_state=241)
clf.fit(pdf, survived.Survived)
Out[37]:
In [40]:
print(pdf.head(1))
print(clf.feature_importances_)
In [41]:
print("Age", "Sex")
In [ ]: