In [2]:
import pandas as pd

In [3]:
flowers = pd.read_csv("flowers_data.csv")

In [4]:
flowers


Out[4]:
petal length (cm) petal width (cm) sepal length (cm) sepal width (cm) target class_names
0 1.4 0.2 5.1 3.5 0 setosa
1 1.4 0.2 4.9 3.0 0 setosa
2 1.3 0.2 4.7 3.2 0 setosa
3 1.5 0.2 4.6 3.1 0 setosa
4 1.4 0.2 5.0 3.6 0 setosa
5 1.7 0.4 5.4 3.9 0 setosa
6 1.4 0.3 4.6 3.4 0 setosa
7 1.5 0.2 5.0 3.4 0 setosa
8 1.4 0.2 4.4 2.9 0 setosa
9 1.5 0.1 4.9 3.1 0 setosa
10 1.5 0.2 5.4 3.7 0 setosa
11 1.6 0.2 4.8 3.4 0 setosa
12 1.4 0.1 4.8 3.0 0 setosa
13 1.1 0.1 4.3 3.0 0 setosa
14 1.2 0.2 5.8 4.0 0 setosa
15 1.5 0.4 5.7 4.4 0 setosa
16 1.3 0.4 5.4 3.9 0 setosa
17 1.4 0.3 5.1 3.5 0 setosa
18 1.7 0.3 5.7 3.8 0 setosa
19 1.5 0.3 5.1 3.8 0 setosa
20 1.7 0.2 5.4 3.4 0 setosa
21 1.5 0.4 5.1 3.7 0 setosa
22 1.0 0.2 4.6 3.6 0 setosa
23 1.7 0.5 5.1 3.3 0 setosa
24 1.9 0.2 4.8 3.4 0 setosa
25 1.6 0.2 5.0 3.0 0 setosa
26 1.6 0.4 5.0 3.4 0 setosa
27 1.5 0.2 5.2 3.5 0 setosa
28 1.4 0.2 5.2 3.4 0 setosa
29 1.6 0.2 4.7 3.2 0 setosa
... ... ... ... ... ... ...
70 4.8 1.8 5.9 3.2 1 versicolor
71 4.0 1.3 6.1 2.8 1 versicolor
72 4.9 1.5 6.3 2.5 1 versicolor
73 4.7 1.2 6.1 2.8 1 versicolor
74 4.3 1.3 6.4 2.9 1 versicolor
75 4.4 1.4 6.6 3.0 1 versicolor
76 4.8 1.4 6.8 2.8 1 versicolor
77 5.0 1.7 6.7 3.0 1 versicolor
78 4.5 1.5 6.0 2.9 1 versicolor
79 3.5 1.0 5.7 2.6 1 versicolor
80 3.8 1.1 5.5 2.4 1 versicolor
81 3.7 1.0 5.5 2.4 1 versicolor
82 3.9 1.2 5.8 2.7 1 versicolor
83 5.1 1.6 6.0 2.7 1 versicolor
84 4.5 1.5 5.4 3.0 1 versicolor
85 4.5 1.6 6.0 3.4 1 versicolor
86 4.7 1.5 6.7 3.1 1 versicolor
87 4.4 1.3 6.3 2.3 1 versicolor
88 4.1 1.3 5.6 3.0 1 versicolor
89 4.0 1.3 5.5 2.5 1 versicolor
90 4.4 1.2 5.5 2.6 1 versicolor
91 4.6 1.4 6.1 3.0 1 versicolor
92 4.0 1.2 5.8 2.6 1 versicolor
93 3.3 1.0 5.0 2.3 1 versicolor
94 4.2 1.3 5.6 2.7 1 versicolor
95 4.2 1.2 5.7 3.0 1 versicolor
96 4.2 1.3 5.7 2.9 1 versicolor
97 4.3 1.3 6.2 2.9 1 versicolor
98 3.0 1.1 5.1 2.5 1 versicolor
99 4.1 1.3 5.7 2.8 1 versicolor

100 rows × 6 columns


In [7]:
flowers = flowers[(flowers.target == 1)|(flowers.target == 0)]

In [8]:
flowers


Out[8]:
petal length (cm) petal width (cm) sepal length (cm) sepal width (cm) target class_names
0 1.4 0.2 5.1 3.5 0 setosa
1 1.4 0.2 4.9 3.0 0 setosa
2 1.3 0.2 4.7 3.2 0 setosa
3 1.5 0.2 4.6 3.1 0 setosa
4 1.4 0.2 5.0 3.6 0 setosa
5 1.7 0.4 5.4 3.9 0 setosa
6 1.4 0.3 4.6 3.4 0 setosa
7 1.5 0.2 5.0 3.4 0 setosa
8 1.4 0.2 4.4 2.9 0 setosa
9 1.5 0.1 4.9 3.1 0 setosa
10 1.5 0.2 5.4 3.7 0 setosa
11 1.6 0.2 4.8 3.4 0 setosa
12 1.4 0.1 4.8 3.0 0 setosa
13 1.1 0.1 4.3 3.0 0 setosa
14 1.2 0.2 5.8 4.0 0 setosa
15 1.5 0.4 5.7 4.4 0 setosa
16 1.3 0.4 5.4 3.9 0 setosa
17 1.4 0.3 5.1 3.5 0 setosa
18 1.7 0.3 5.7 3.8 0 setosa
19 1.5 0.3 5.1 3.8 0 setosa
20 1.7 0.2 5.4 3.4 0 setosa
21 1.5 0.4 5.1 3.7 0 setosa
22 1.0 0.2 4.6 3.6 0 setosa
23 1.7 0.5 5.1 3.3 0 setosa
24 1.9 0.2 4.8 3.4 0 setosa
25 1.6 0.2 5.0 3.0 0 setosa
26 1.6 0.4 5.0 3.4 0 setosa
27 1.5 0.2 5.2 3.5 0 setosa
28 1.4 0.2 5.2 3.4 0 setosa
29 1.6 0.2 4.7 3.2 0 setosa
... ... ... ... ... ... ...
70 4.8 1.8 5.9 3.2 1 versicolor
71 4.0 1.3 6.1 2.8 1 versicolor
72 4.9 1.5 6.3 2.5 1 versicolor
73 4.7 1.2 6.1 2.8 1 versicolor
74 4.3 1.3 6.4 2.9 1 versicolor
75 4.4 1.4 6.6 3.0 1 versicolor
76 4.8 1.4 6.8 2.8 1 versicolor
77 5.0 1.7 6.7 3.0 1 versicolor
78 4.5 1.5 6.0 2.9 1 versicolor
79 3.5 1.0 5.7 2.6 1 versicolor
80 3.8 1.1 5.5 2.4 1 versicolor
81 3.7 1.0 5.5 2.4 1 versicolor
82 3.9 1.2 5.8 2.7 1 versicolor
83 5.1 1.6 6.0 2.7 1 versicolor
84 4.5 1.5 5.4 3.0 1 versicolor
85 4.5 1.6 6.0 3.4 1 versicolor
86 4.7 1.5 6.7 3.1 1 versicolor
87 4.4 1.3 6.3 2.3 1 versicolor
88 4.1 1.3 5.6 3.0 1 versicolor
89 4.0 1.3 5.5 2.5 1 versicolor
90 4.4 1.2 5.5 2.6 1 versicolor
91 4.6 1.4 6.1 3.0 1 versicolor
92 4.0 1.2 5.8 2.6 1 versicolor
93 3.3 1.0 5.0 2.3 1 versicolor
94 4.2 1.3 5.6 2.7 1 versicolor
95 4.2 1.2 5.7 3.0 1 versicolor
96 4.2 1.3 5.7 2.9 1 versicolor
97 4.3 1.3 6.2 2.9 1 versicolor
98 3.0 1.1 5.1 2.5 1 versicolor
99 4.1 1.3 5.7 2.8 1 versicolor

100 rows × 6 columns


In [11]:
set(flowers.target)


Out[11]:
{0, 1}

In [10]:
len(flowers.target)


Out[10]:
100

In [12]:
plt


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-12-775d6b1824e5> in <module>()
----> 1 plt

NameError: name 'plt' is not defined

In [16]:
import matplotlib.pyplot as plt

In [17]:
plt.scatter(flowers["sepal length (cm)"][flowers.target == 0], 
            flowers["sepal width (cm)"][flowers.target == 0], 
            color='gold')


Out[17]:
<matplotlib.collections.PathCollection at 0x7fb3764f8320>

In [18]:
plt.scatter(flowers["sepal length (cm)"][flowers.target == 1], 
            flowers["sepal width (cm)"][flowers.target == 1], 
            color='purple')


Out[18]:
<matplotlib.collections.PathCollection at 0x7fb33ff4f2b0>

In [19]:
plt.show()



In [20]:
flowers.index


Out[20]:
Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
            34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
            51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
            68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
            85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99],
           dtype='int64')

In [30]:
for label in flowers["label"][flowers["sepal length (cm)"]<5]:
    print(label)


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/home/julienawilson/Desktop/Programs/401py/week3/data-structures/lib/python3.5/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
   2133             try:
-> 2134                 return self._engine.get_loc(key)
   2135             except KeyError:

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()

KeyError: 'label'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-30-61a74510f838> in <module>()
----> 1 for label in flowers["label"][flowers["sepal length (cm)"]<5]:
      2     print(label)

/home/julienawilson/Desktop/Programs/401py/week3/data-structures/lib/python3.5/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2057             return self._getitem_multilevel(key)
   2058         else:
-> 2059             return self._getitem_column(key)
   2060 
   2061     def _getitem_column(self, key):

/home/julienawilson/Desktop/Programs/401py/week3/data-structures/lib/python3.5/site-packages/pandas/core/frame.py in _getitem_column(self, key)
   2064         # get column
   2065         if self.columns.is_unique:
-> 2066             return self._get_item_cache(key)
   2067 
   2068         # duplicate columns & possible reduce dimensionality

/home/julienawilson/Desktop/Programs/401py/week3/data-structures/lib/python3.5/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
   1384         res = cache.get(item)
   1385         if res is None:
-> 1386             values = self._data.get(item)
   1387             res = self._box_item_values(item, values)
   1388             cache[item] = res

/home/julienawilson/Desktop/Programs/401py/week3/data-structures/lib/python3.5/site-packages/pandas/core/internals.py in get(self, item, fastpath)
   3541 
   3542             if not isnull(item):
-> 3543                 loc = self.items.get_loc(item)
   3544             else:
   3545                 indexer = np.arange(len(self.items))[isnull(self.items)]

/home/julienawilson/Desktop/Programs/401py/week3/data-structures/lib/python3.5/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
   2134                 return self._engine.get_loc(key)
   2135             except KeyError:
-> 2136                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2137 
   2138         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()

KeyError: 'label'

In [32]:
flowers.ini[0]


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-32-6e6dde73c712> in <module>()
----> 1 flowers.ini[0]

/home/julienawilson/Desktop/Programs/401py/week3/data-structures/lib/python3.5/site-packages/pandas/core/generic.py in __getattr__(self, name)
   2742             if name in self._info_axis:
   2743                 return self[name]
-> 2744             return object.__getattribute__(self, name)
   2745 
   2746     def __setattr__(self, name, value):

AttributeError: 'DataFrame' object has no attribute 'ini'

In [41]:
flowers_data = [[3,4,5,6,7,8,9],
                [8,5,4,7,2,3,1]]

In [42]:
labels =[1,1,1,0,0,0,1]

In [43]:
data_idx = [0,1,2,3,4,5,6]
left_idx = [0,1,2]
right_idx = [3,4,5,6]

In [46]:
val = 5
left_list=[labels[idx] for idx in data_idx if flowers_data[0][idx] <= val]

In [47]:
left_list


Out[47]:
[1, 1, 1]

In [48]:
val = 5
right_list=[labels[idx] for idx in data_idx if flowers_data[0][idx] > val]

In [49]:
right_list


Out[49]:
[0, 0, 0, 1]

In [60]:
for val in flowers_data[0]:
    left_list=[labels[idx] for idx in data_idx if flowers_data[0][idx] <= val]
    right_list=[labels[idx] for idx in data_idx if flowers_data[0][idx] > val]
    print(left_list, right_list, 
          left_list.count(1)/len(left_list) * right_list.count(0)/len(right_list), 
          left_list.count(0)/len(left_list) * right_list.count(1)/len(right_list),
         right_list.count(0),
         left_list.count(0),
         len(right_list),
         len(left_list))


[1] [1, 1, 0, 0, 0, 1] 0.5 0.0 3 0 6 1
[1, 1] [1, 0, 0, 0, 1] 0.6 0.0 3 0 5 2
[1, 1, 1] [0, 0, 0, 1] 0.75 0.0 3 0 4 3
[1, 1, 1, 0] [0, 0, 1] 0.5 0.08333333333333333 2 1 3 4
[1, 1, 1, 0, 0] [0, 1] 0.3 0.2 1 2 2 5
[1, 1, 1, 0, 0, 0] [1] 0.0 0.5 0 3 1 6
---------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
<ipython-input-60-1d1c1387b568> in <module>()
      3     right_list=[labels[idx] for idx in data_idx if flowers_data[0][idx] > val]
      4     print(left_list, right_list, 
----> 5           left_list.count(1)/len(left_list) * right_list.count(0)/len(right_list),
      6           left_list.count(0)/len(left_list) * right_list.count(1)/len(right_list),
      7          right_list.count(0),

ZeroDivisionError: float division by zero

In [ ]: