In [1]:
import pandas as pd
%matplotlib inline
from sklearn import tree
import matplotlib.pyplot as plt
import numpy as np
In [2]:
sample_list = [0,250,36,'M',10,150,34,'F',2,90,10,'M',6,78,8,'F',4,20,1,'F',1,170,70,'M',8,160,41,'F',10,180,38,'M',6,200,45,'M']
df = pd.DataFrame(np.asarray(sample_list).reshape(9,4),
index=['Homer','Marge','Bart','Lisa','Maggie','Abe','Selma','Otto','Krusty'],columns = ['hair_length','weight','age','class'])
In [3]:
df
Out[3]:
In [4]:
x = df[['hair_length','weight','age']].as_matrix() # the attributes
x
Out[4]:
In [5]:
y = df['class'].as_matrix() # the attributes
y
Out[5]:
In [7]:
dt = tree.DecisionTreeClassifier()
In [8]:
dt = dt.fit(x,y)
In [13]:
Out[13]:
In [15]:
from sklearn.cross_validation import cross_val_score
In [16]:
# http://scikit-learn.org/stable/modules/cross_validation.html#computing-cross-validated-metrics
scores = cross_val_score(dt,x,y,cv=5) #We're passing in our values and getting an array of values back
In [17]:
np.mean(scores) #here we get our average result
Out[17]:
Comic
In [23]:
x_test = np.asarray([0,250,36])
In [24]:
predicted= dt.predict(x_test)
In [25]:
predicted
Out[25]:
In [ ]: