In [15]:
from sklearn.datasets import load_iris
from sklearn import tree
import pydotplus 
import pandas as pd
iris = load_iris()
clf = tree.DecisionTreeClassifier()
clf = clf.fit(iris.data, iris.target)

In [6]:
from IPython.display import Image  
dot_data = tree.export_graphviz(clf, out_file=None, 
                         feature_names=iris.feature_names,  
                         class_names=iris.target_names,  
                         filled=True, rounded=True,  
                         special_characters=True)  
graph = pydotplus.graph_from_dot_data(dot_data)  
Image(graph.create_png())


Out[6]:

In [7]:
type(iris.data)


Out[7]:
numpy.ndarray

In [9]:
iris.data


Out[9]:
array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  3. ,  1.4,  0.2],
       [ 4.7,  3.2,  1.3,  0.2],
       [ 4.6,  3.1,  1.5,  0.2],
       [ 5. ,  3.6,  1.4,  0.2],
       [ 5.4,  3.9,  1.7,  0.4],
       [ 4.6,  3.4,  1.4,  0.3],
       [ 5. ,  3.4,  1.5,  0.2],
       [ 4.4,  2.9,  1.4,  0.2],
       [ 4.9,  3.1,  1.5,  0.1],
       [ 5.4,  3.7,  1.5,  0.2],
       [ 4.8,  3.4,  1.6,  0.2],
       [ 4.8,  3. ,  1.4,  0.1],
       [ 4.3,  3. ,  1.1,  0.1],
       [ 5.8,  4. ,  1.2,  0.2],
       [ 5.7,  4.4,  1.5,  0.4],
       [ 5.4,  3.9,  1.3,  0.4],
       [ 5.1,  3.5,  1.4,  0.3],
       [ 5.7,  3.8,  1.7,  0.3],
       [ 5.1,  3.8,  1.5,  0.3],
       [ 5.4,  3.4,  1.7,  0.2],
       [ 5.1,  3.7,  1.5,  0.4],
       [ 4.6,  3.6,  1. ,  0.2],
       [ 5.1,  3.3,  1.7,  0.5],
       [ 4.8,  3.4,  1.9,  0.2],
       [ 5. ,  3. ,  1.6,  0.2],
       [ 5. ,  3.4,  1.6,  0.4],
       [ 5.2,  3.5,  1.5,  0.2],
       [ 5.2,  3.4,  1.4,  0.2],
       [ 4.7,  3.2,  1.6,  0.2],
       [ 4.8,  3.1,  1.6,  0.2],
       [ 5.4,  3.4,  1.5,  0.4],
       [ 5.2,  4.1,  1.5,  0.1],
       [ 5.5,  4.2,  1.4,  0.2],
       [ 4.9,  3.1,  1.5,  0.1],
       [ 5. ,  3.2,  1.2,  0.2],
       [ 5.5,  3.5,  1.3,  0.2],
       [ 4.9,  3.1,  1.5,  0.1],
       [ 4.4,  3. ,  1.3,  0.2],
       [ 5.1,  3.4,  1.5,  0.2],
       [ 5. ,  3.5,  1.3,  0.3],
       [ 4.5,  2.3,  1.3,  0.3],
       [ 4.4,  3.2,  1.3,  0.2],
       [ 5. ,  3.5,  1.6,  0.6],
       [ 5.1,  3.8,  1.9,  0.4],
       [ 4.8,  3. ,  1.4,  0.3],
       [ 5.1,  3.8,  1.6,  0.2],
       [ 4.6,  3.2,  1.4,  0.2],
       [ 5.3,  3.7,  1.5,  0.2],
       [ 5. ,  3.3,  1.4,  0.2],
       [ 7. ,  3.2,  4.7,  1.4],
       [ 6.4,  3.2,  4.5,  1.5],
       [ 6.9,  3.1,  4.9,  1.5],
       [ 5.5,  2.3,  4. ,  1.3],
       [ 6.5,  2.8,  4.6,  1.5],
       [ 5.7,  2.8,  4.5,  1.3],
       [ 6.3,  3.3,  4.7,  1.6],
       [ 4.9,  2.4,  3.3,  1. ],
       [ 6.6,  2.9,  4.6,  1.3],
       [ 5.2,  2.7,  3.9,  1.4],
       [ 5. ,  2. ,  3.5,  1. ],
       [ 5.9,  3. ,  4.2,  1.5],
       [ 6. ,  2.2,  4. ,  1. ],
       [ 6.1,  2.9,  4.7,  1.4],
       [ 5.6,  2.9,  3.6,  1.3],
       [ 6.7,  3.1,  4.4,  1.4],
       [ 5.6,  3. ,  4.5,  1.5],
       [ 5.8,  2.7,  4.1,  1. ],
       [ 6.2,  2.2,  4.5,  1.5],
       [ 5.6,  2.5,  3.9,  1.1],
       [ 5.9,  3.2,  4.8,  1.8],
       [ 6.1,  2.8,  4. ,  1.3],
       [ 6.3,  2.5,  4.9,  1.5],
       [ 6.1,  2.8,  4.7,  1.2],
       [ 6.4,  2.9,  4.3,  1.3],
       [ 6.6,  3. ,  4.4,  1.4],
       [ 6.8,  2.8,  4.8,  1.4],
       [ 6.7,  3. ,  5. ,  1.7],
       [ 6. ,  2.9,  4.5,  1.5],
       [ 5.7,  2.6,  3.5,  1. ],
       [ 5.5,  2.4,  3.8,  1.1],
       [ 5.5,  2.4,  3.7,  1. ],
       [ 5.8,  2.7,  3.9,  1.2],
       [ 6. ,  2.7,  5.1,  1.6],
       [ 5.4,  3. ,  4.5,  1.5],
       [ 6. ,  3.4,  4.5,  1.6],
       [ 6.7,  3.1,  4.7,  1.5],
       [ 6.3,  2.3,  4.4,  1.3],
       [ 5.6,  3. ,  4.1,  1.3],
       [ 5.5,  2.5,  4. ,  1.3],
       [ 5.5,  2.6,  4.4,  1.2],
       [ 6.1,  3. ,  4.6,  1.4],
       [ 5.8,  2.6,  4. ,  1.2],
       [ 5. ,  2.3,  3.3,  1. ],
       [ 5.6,  2.7,  4.2,  1.3],
       [ 5.7,  3. ,  4.2,  1.2],
       [ 5.7,  2.9,  4.2,  1.3],
       [ 6.2,  2.9,  4.3,  1.3],
       [ 5.1,  2.5,  3. ,  1.1],
       [ 5.7,  2.8,  4.1,  1.3],
       [ 6.3,  3.3,  6. ,  2.5],
       [ 5.8,  2.7,  5.1,  1.9],
       [ 7.1,  3. ,  5.9,  2.1],
       [ 6.3,  2.9,  5.6,  1.8],
       [ 6.5,  3. ,  5.8,  2.2],
       [ 7.6,  3. ,  6.6,  2.1],
       [ 4.9,  2.5,  4.5,  1.7],
       [ 7.3,  2.9,  6.3,  1.8],
       [ 6.7,  2.5,  5.8,  1.8],
       [ 7.2,  3.6,  6.1,  2.5],
       [ 6.5,  3.2,  5.1,  2. ],
       [ 6.4,  2.7,  5.3,  1.9],
       [ 6.8,  3. ,  5.5,  2.1],
       [ 5.7,  2.5,  5. ,  2. ],
       [ 5.8,  2.8,  5.1,  2.4],
       [ 6.4,  3.2,  5.3,  2.3],
       [ 6.5,  3. ,  5.5,  1.8],
       [ 7.7,  3.8,  6.7,  2.2],
       [ 7.7,  2.6,  6.9,  2.3],
       [ 6. ,  2.2,  5. ,  1.5],
       [ 6.9,  3.2,  5.7,  2.3],
       [ 5.6,  2.8,  4.9,  2. ],
       [ 7.7,  2.8,  6.7,  2. ],
       [ 6.3,  2.7,  4.9,  1.8],
       [ 6.7,  3.3,  5.7,  2.1],
       [ 7.2,  3.2,  6. ,  1.8],
       [ 6.2,  2.8,  4.8,  1.8],
       [ 6.1,  3. ,  4.9,  1.8],
       [ 6.4,  2.8,  5.6,  2.1],
       [ 7.2,  3. ,  5.8,  1.6],
       [ 7.4,  2.8,  6.1,  1.9],
       [ 7.9,  3.8,  6.4,  2. ],
       [ 6.4,  2.8,  5.6,  2.2],
       [ 6.3,  2.8,  5.1,  1.5],
       [ 6.1,  2.6,  5.6,  1.4],
       [ 7.7,  3. ,  6.1,  2.3],
       [ 6.3,  3.4,  5.6,  2.4],
       [ 6.4,  3.1,  5.5,  1.8],
       [ 6. ,  3. ,  4.8,  1.8],
       [ 6.9,  3.1,  5.4,  2.1],
       [ 6.7,  3.1,  5.6,  2.4],
       [ 6.9,  3.1,  5.1,  2.3],
       [ 5.8,  2.7,  5.1,  1.9],
       [ 6.8,  3.2,  5.9,  2.3],
       [ 6.7,  3.3,  5.7,  2.5],
       [ 6.7,  3. ,  5.2,  2.3],
       [ 6.3,  2.5,  5. ,  1.9],
       [ 6.5,  3. ,  5.2,  2. ],
       [ 6.2,  3.4,  5.4,  2.3],
       [ 5.9,  3. ,  5.1,  1.8]])

In [10]:
clf.predict([[ 5.1,  3.5,  1.4,  0.2]])


Out[10]:
array([0])

In [11]:
iris.target


Out[11]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [12]:
clf.predict([[5.9,  3. ,  5.1,  1.8]])


Out[12]:
array([2])

In [16]:
book = pd.read_csv("/Users/page/data/book/newbook.csv")

In [17]:
book.head()


Out[17]:
Unnamed: 0 CBS_Class CBS_MY_Order CBS_MY_Rate CBS_XS_MY CBS_DXPZ CBS_XSCS CBS_CRTAX Book_Class CBS_BK_Average_price ... FXS_Class User_Area User_Get_Price FXS_Discount FXS_BK_MY_Order FXS_BK_MY_Rate FXS_BK_MY FXS_BK_DXPZ FXS_BK_XSCS FXS_BK_Average_Price
0 0 1 11 1.0934 90678033.8 2921 2357742 0.07 1 34.83 ... 3 2 9.0 0.585 2 3.4196 15289978.1 1579 426206 35.87
1 1 1 11 1.0934 90678033.8 2921 2357742 0.07 6 17.75 ... 3 2 9.0 0.585 484 0.0000 36.0 1 2 18.00
2 2 1 11 1.0934 90678033.8 2921 2357742 0.08 5 32.51 ... 3 2 9.0 0.585 235 0.0099 15346.0 7 478 32.10
3 3 1 11 1.0934 90678033.8 2921 2357742 0.07 3 47.25 ... 3 2 9.0 0.585 3 3.1915 10890168.2 405 232732 46.79
4 4 1 11 1.0934 90678033.8 2921 2357742 0.08 7 60.70 ... 3 2 9.0 0.585 181 0.0240 59391.0 21 1022 58.11

5 rows × 25 columns


In [18]:
book.columns


Out[18]:
Index(['Unnamed: 0', 'CBS_Class', 'CBS_MY_Order', 'CBS_MY_Rate', 'CBS_XS_MY',
       'CBS_DXPZ', 'CBS_XSCS', 'CBS_CRTAX', 'Book_Class',
       'CBS_BK_Average_price', 'CBS_BK_MY_Order', 'CBS_BK_MY_Rate',
       'CBS_BK_XS_MY', 'CBS_BK_DXPZ', 'CBS_BK_XSCS', 'FXS_Class', 'User_Area',
       'User_Get_Price', 'FXS_Discount', 'FXS_BK_MY_Order', 'FXS_BK_MY_Rate',
       'FXS_BK_MY', 'FXS_BK_DXPZ', 'FXS_BK_XSCS', 'FXS_BK_Average_Price'],
      dtype='object')

In [19]:
x = book[['CBS_Class', 'CBS_MY_Order', 'CBS_MY_Rate',
       'CBS_XS_MY', 'CBS_DXPZ', 'CBS_XSCS', 'CBS_CRTAX', 'Book_Class',
       'CBS_BK_Average_price', 'CBS_BK_MY_Order', 'CBS_BK_MY_Rate',
       'CBS_BK_XS_MY', 'CBS_BK_DXPZ', 'FXS_Class', 'User_Area',
       'User_Get_Price', 'FXS_Discount', 'FXS_BK_MY_Order', 'FXS_BK_MY_Rate',
       'FXS_BK_MY', 'FXS_BK_DXPZ', 'FXS_BK_XSCS', 'FXS_BK_Average_Price']]

In [20]:
y = book[['CBS_BK_XSCS']]

In [21]:
clf = tree.DecisionTreeClassifier()
clf = clf.fit(x, y)

In [22]:
from IPython.display import Image

In [23]:
dot_data = tree.export_graphviz(clf, out_file=None, 
                         feature_names=x,
                         class_names=y,  
                         filled=True, rounded=True,
                         special_characters=True)


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/usr/local/lib/python3.5/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
   2133             try:
-> 2134                 return self._engine.get_loc(key)
   2135             except KeyError:

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()

KeyError: 11

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-23-8de1aa33087e> in <module>()
      3                          class_names=y,
      4                          filled=True, rounded=True,
----> 5                          special_characters=True)

/usr/local/lib/python3.5/site-packages/sklearn/tree/export.py in export_graphviz(decision_tree, out_file, max_depth, feature_names, class_names, label, filled, leaves_parallel, impurity, node_ids, proportion, rotate, rounded, special_characters)
    431             recurse(decision_tree, 0, criterion="impurity")
    432         else:
--> 433             recurse(decision_tree.tree_, 0, criterion=decision_tree.criterion)
    434 
    435         # If required, draw leaf nodes at same depth as each other

/usr/local/lib/python3.5/site-packages/sklearn/tree/export.py in recurse(tree, node_id, criterion, parent, depth)
    319             out_file.write('%d [label=%s'
    320                            % (node_id,
--> 321                               node_to_str(tree, node_id, criterion)))
    322 
    323             if filled:

/usr/local/lib/python3.5/site-packages/sklearn/tree/export.py in node_to_str(tree, node_id, criterion)
    217             # Always write node decision criteria, except for leaves
    218             if feature_names is not None:
--> 219                 feature = feature_names[tree.feature[node_id]]
    220             else:
    221                 feature = "X%s%s%s" % (characters[1],

/usr/local/lib/python3.5/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2057             return self._getitem_multilevel(key)
   2058         else:
-> 2059             return self._getitem_column(key)
   2060 
   2061     def _getitem_column(self, key):

/usr/local/lib/python3.5/site-packages/pandas/core/frame.py in _getitem_column(self, key)
   2064         # get column
   2065         if self.columns.is_unique:
-> 2066             return self._get_item_cache(key)
   2067 
   2068         # duplicate columns & possible reduce dimensionality

/usr/local/lib/python3.5/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
   1384         res = cache.get(item)
   1385         if res is None:
-> 1386             values = self._data.get(item)
   1387             res = self._box_item_values(item, values)
   1388             cache[item] = res

/usr/local/lib/python3.5/site-packages/pandas/core/internals.py in get(self, item, fastpath)
   3541 
   3542             if not isnull(item):
-> 3543                 loc = self.items.get_loc(item)
   3544             else:
   3545                 indexer = np.arange(len(self.items))[isnull(self.items)]

/usr/local/lib/python3.5/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
   2134                 return self._engine.get_loc(key)
   2135             except KeyError:
-> 2136                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2137 
   2138         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()

KeyError: 11

In [24]:
print(book.dtypes)


Unnamed: 0                int64
CBS_Class                 int64
CBS_MY_Order              int64
CBS_MY_Rate             float64
CBS_XS_MY               float64
CBS_DXPZ                  int64
CBS_XSCS                  int64
CBS_CRTAX               float64
Book_Class                int64
CBS_BK_Average_price    float64
CBS_BK_MY_Order           int64
CBS_BK_MY_Rate          float64
CBS_BK_XS_MY            float64
CBS_BK_DXPZ               int64
CBS_BK_XSCS               int64
FXS_Class                 int64
User_Area                 int64
User_Get_Price          float64
FXS_Discount            float64
FXS_BK_MY_Order           int64
FXS_BK_MY_Rate          float64
FXS_BK_MY               float64
FXS_BK_DXPZ               int64
FXS_BK_XSCS               int64
FXS_BK_Average_Price    float64
dtype: object

In [25]:
print(x.dtypes)


CBS_Class                 int64
CBS_MY_Order              int64
CBS_MY_Rate             float64
CBS_XS_MY               float64
CBS_DXPZ                  int64
CBS_XSCS                  int64
CBS_CRTAX               float64
Book_Class                int64
CBS_BK_Average_price    float64
CBS_BK_MY_Order           int64
CBS_BK_MY_Rate          float64
CBS_BK_XS_MY            float64
CBS_BK_DXPZ               int64
FXS_Class                 int64
User_Area                 int64
User_Get_Price          float64
FXS_Discount            float64
FXS_BK_MY_Order           int64
FXS_BK_MY_Rate          float64
FXS_BK_MY               float64
FXS_BK_DXPZ               int64
FXS_BK_XSCS               int64
FXS_BK_Average_Price    float64
dtype: object

In [26]:
type(book)


Out[26]:
pandas.core.frame.DataFrame

In [27]:
type(x)


Out[27]:
pandas.core.frame.DataFrame

In [35]:
type(iris.feature_names)


Out[35]:
list

In [36]:
type(iris.target_names)


Out[36]:
numpy.ndarray

In [32]:
iris.data.shape


Out[32]:
(150, 4)

In [33]:
iris.target.shape


Out[33]:
(150,)

In [34]:
iris.feature_names.shape


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-34-98f658ef357a> in <module>()
----> 1 iris.feature_names.shape

AttributeError: 'list' object has no attribute 'shape'

In [38]:
iris.target_names.shape


Out[38]:
(3,)

In [1]:
import numpy as np
import pylab as pl
x = [1, 2, 3, 4, 5]# Make an array of x values
y = [1, 4, 9, 16, 25]# Make an array of y values for each x value
pl.plot(x, y)# use pylab to plot x and y
pl.show()# show the plot on the screen



In [ ]: