$\tt{Accuracy} = \frac{correct}{samples}$


In [24]:
from sklearn import datasets, neighbors, metrics
import pandas as pd
%matplotlib inline

In [6]:
iris = datasets.load_iris()

In [8]:
irisdf = pd.DataFrame(iris.data, columns=iris.feature_names)
irisdf.head(4)


Out[8]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2

In [9]:
irisdf["target"] = iris.target

In [10]:
irisdf.head(4)


Out[10]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target
0 5.1 3.5 1.4 0.2 0
1 4.9 3.0 1.4 0.2 0
2 4.7 3.2 1.3 0.2 0
3 4.6 3.1 1.5 0.2 0

In [12]:
cmap = {"0": "r", "1": "g", "2": "b"}

In [13]:
irisdf["ctarget"] = irisdf.target.apply(lambda x: cmap[str(x)])

In [14]:
irisdf.head(6)


Out[14]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target ctarget
0 5.1 3.5 1.4 0.2 0 r
1 4.9 3.0 1.4 0.2 0 r
2 4.7 3.2 1.3 0.2 0 r
3 4.6 3.1 1.5 0.2 0 r
4 5.0 3.6 1.4 0.2 0 r
5 5.4 3.9 1.7 0.4 0 r

In [17]:
irisdf.plot('petal length (cm)', 'petal width (cm)', kind='scatter', c=irisdf.ctarget)


Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x1139f0978>

In [19]:
print(irisdf.describe())


       sepal length (cm)  sepal width (cm)  petal length (cm)  \
count         150.000000        150.000000         150.000000   
mean            5.843333          3.054000           3.758667   
std             0.828066          0.433594           1.764420   
min             4.300000          2.000000           1.000000   
25%             5.100000          2.800000           1.600000   
50%             5.800000          3.000000           4.350000   
75%             6.400000          3.300000           5.100000   
max             7.900000          4.400000           6.900000   

       petal width (cm)      target  
count        150.000000  150.000000  
mean           1.198667    1.000000  
std            0.763161    0.819232  
min            0.100000    0.000000  
25%            0.300000    0.000000  
50%            1.300000    1.000000  
75%            1.800000    2.000000  
max            2.500000    2.000000  

In [25]:
def my_classifier(row):
    if row["petal length (cm)"] < 2:
        return 0
    else:
        return 1

irisdf['predictions_1'] = irisdf.apply(my_classifier, axis=1)

In [26]:
irisdf.head(6)


Out[26]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target ctarget predictions_1
0 5.1 3.5 1.4 0.2 0 r 0
1 4.9 3.0 1.4 0.2 0 r 0
2 4.7 3.2 1.3 0.2 0 r 0
3 4.6 3.1 1.5 0.2 0 r 0
4 5.0 3.6 1.4 0.2 0 r 0
5 5.4 3.9 1.7 0.4 0 r 0

Plot decision boundaries


In [ ]: