In [2]:
import pandas as pd
In [3]:
flowers = pd.read_csv("flowers_data.csv")
In [4]:
flowers
Out[4]:
petal length (cm)
petal width (cm)
sepal length (cm)
sepal width (cm)
target
class_names
0
1.4
0.2
5.1
3.5
0
setosa
1
1.4
0.2
4.9
3.0
0
setosa
2
1.3
0.2
4.7
3.2
0
setosa
3
1.5
0.2
4.6
3.1
0
setosa
4
1.4
0.2
5.0
3.6
0
setosa
5
1.7
0.4
5.4
3.9
0
setosa
6
1.4
0.3
4.6
3.4
0
setosa
7
1.5
0.2
5.0
3.4
0
setosa
8
1.4
0.2
4.4
2.9
0
setosa
9
1.5
0.1
4.9
3.1
0
setosa
10
1.5
0.2
5.4
3.7
0
setosa
11
1.6
0.2
4.8
3.4
0
setosa
12
1.4
0.1
4.8
3.0
0
setosa
13
1.1
0.1
4.3
3.0
0
setosa
14
1.2
0.2
5.8
4.0
0
setosa
15
1.5
0.4
5.7
4.4
0
setosa
16
1.3
0.4
5.4
3.9
0
setosa
17
1.4
0.3
5.1
3.5
0
setosa
18
1.7
0.3
5.7
3.8
0
setosa
19
1.5
0.3
5.1
3.8
0
setosa
20
1.7
0.2
5.4
3.4
0
setosa
21
1.5
0.4
5.1
3.7
0
setosa
22
1.0
0.2
4.6
3.6
0
setosa
23
1.7
0.5
5.1
3.3
0
setosa
24
1.9
0.2
4.8
3.4
0
setosa
25
1.6
0.2
5.0
3.0
0
setosa
26
1.6
0.4
5.0
3.4
0
setosa
27
1.5
0.2
5.2
3.5
0
setosa
28
1.4
0.2
5.2
3.4
0
setosa
29
1.6
0.2
4.7
3.2
0
setosa
...
...
...
...
...
...
...
70
4.8
1.8
5.9
3.2
1
versicolor
71
4.0
1.3
6.1
2.8
1
versicolor
72
4.9
1.5
6.3
2.5
1
versicolor
73
4.7
1.2
6.1
2.8
1
versicolor
74
4.3
1.3
6.4
2.9
1
versicolor
75
4.4
1.4
6.6
3.0
1
versicolor
76
4.8
1.4
6.8
2.8
1
versicolor
77
5.0
1.7
6.7
3.0
1
versicolor
78
4.5
1.5
6.0
2.9
1
versicolor
79
3.5
1.0
5.7
2.6
1
versicolor
80
3.8
1.1
5.5
2.4
1
versicolor
81
3.7
1.0
5.5
2.4
1
versicolor
82
3.9
1.2
5.8
2.7
1
versicolor
83
5.1
1.6
6.0
2.7
1
versicolor
84
4.5
1.5
5.4
3.0
1
versicolor
85
4.5
1.6
6.0
3.4
1
versicolor
86
4.7
1.5
6.7
3.1
1
versicolor
87
4.4
1.3
6.3
2.3
1
versicolor
88
4.1
1.3
5.6
3.0
1
versicolor
89
4.0
1.3
5.5
2.5
1
versicolor
90
4.4
1.2
5.5
2.6
1
versicolor
91
4.6
1.4
6.1
3.0
1
versicolor
92
4.0
1.2
5.8
2.6
1
versicolor
93
3.3
1.0
5.0
2.3
1
versicolor
94
4.2
1.3
5.6
2.7
1
versicolor
95
4.2
1.2
5.7
3.0
1
versicolor
96
4.2
1.3
5.7
2.9
1
versicolor
97
4.3
1.3
6.2
2.9
1
versicolor
98
3.0
1.1
5.1
2.5
1
versicolor
99
4.1
1.3
5.7
2.8
1
versicolor
100 rows × 6 columns
In [7]:
flowers = flowers[(flowers.target == 1)|(flowers.target == 0)]
In [8]:
flowers
Out[8]:
petal length (cm)
petal width (cm)
sepal length (cm)
sepal width (cm)
target
class_names
0
1.4
0.2
5.1
3.5
0
setosa
1
1.4
0.2
4.9
3.0
0
setosa
2
1.3
0.2
4.7
3.2
0
setosa
3
1.5
0.2
4.6
3.1
0
setosa
4
1.4
0.2
5.0
3.6
0
setosa
5
1.7
0.4
5.4
3.9
0
setosa
6
1.4
0.3
4.6
3.4
0
setosa
7
1.5
0.2
5.0
3.4
0
setosa
8
1.4
0.2
4.4
2.9
0
setosa
9
1.5
0.1
4.9
3.1
0
setosa
10
1.5
0.2
5.4
3.7
0
setosa
11
1.6
0.2
4.8
3.4
0
setosa
12
1.4
0.1
4.8
3.0
0
setosa
13
1.1
0.1
4.3
3.0
0
setosa
14
1.2
0.2
5.8
4.0
0
setosa
15
1.5
0.4
5.7
4.4
0
setosa
16
1.3
0.4
5.4
3.9
0
setosa
17
1.4
0.3
5.1
3.5
0
setosa
18
1.7
0.3
5.7
3.8
0
setosa
19
1.5
0.3
5.1
3.8
0
setosa
20
1.7
0.2
5.4
3.4
0
setosa
21
1.5
0.4
5.1
3.7
0
setosa
22
1.0
0.2
4.6
3.6
0
setosa
23
1.7
0.5
5.1
3.3
0
setosa
24
1.9
0.2
4.8
3.4
0
setosa
25
1.6
0.2
5.0
3.0
0
setosa
26
1.6
0.4
5.0
3.4
0
setosa
27
1.5
0.2
5.2
3.5
0
setosa
28
1.4
0.2
5.2
3.4
0
setosa
29
1.6
0.2
4.7
3.2
0
setosa
...
...
...
...
...
...
...
70
4.8
1.8
5.9
3.2
1
versicolor
71
4.0
1.3
6.1
2.8
1
versicolor
72
4.9
1.5
6.3
2.5
1
versicolor
73
4.7
1.2
6.1
2.8
1
versicolor
74
4.3
1.3
6.4
2.9
1
versicolor
75
4.4
1.4
6.6
3.0
1
versicolor
76
4.8
1.4
6.8
2.8
1
versicolor
77
5.0
1.7
6.7
3.0
1
versicolor
78
4.5
1.5
6.0
2.9
1
versicolor
79
3.5
1.0
5.7
2.6
1
versicolor
80
3.8
1.1
5.5
2.4
1
versicolor
81
3.7
1.0
5.5
2.4
1
versicolor
82
3.9
1.2
5.8
2.7
1
versicolor
83
5.1
1.6
6.0
2.7
1
versicolor
84
4.5
1.5
5.4
3.0
1
versicolor
85
4.5
1.6
6.0
3.4
1
versicolor
86
4.7
1.5
6.7
3.1
1
versicolor
87
4.4
1.3
6.3
2.3
1
versicolor
88
4.1
1.3
5.6
3.0
1
versicolor
89
4.0
1.3
5.5
2.5
1
versicolor
90
4.4
1.2
5.5
2.6
1
versicolor
91
4.6
1.4
6.1
3.0
1
versicolor
92
4.0
1.2
5.8
2.6
1
versicolor
93
3.3
1.0
5.0
2.3
1
versicolor
94
4.2
1.3
5.6
2.7
1
versicolor
95
4.2
1.2
5.7
3.0
1
versicolor
96
4.2
1.3
5.7
2.9
1
versicolor
97
4.3
1.3
6.2
2.9
1
versicolor
98
3.0
1.1
5.1
2.5
1
versicolor
99
4.1
1.3
5.7
2.8
1
versicolor
100 rows × 6 columns
In [11]:
set(flowers.target)
Out[11]:
{0, 1}
In [10]:
len(flowers.target)
Out[10]:
100
In [12]:
plt
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-12-775d6b1824e5> in <module>()
----> 1 plt
NameError: name 'plt' is not defined
In [16]:
import matplotlib.pyplot as plt
In [17]:
plt.scatter(flowers["sepal length (cm)"][flowers.target == 0],
flowers["sepal width (cm)"][flowers.target == 0],
color='gold')
Out[17]:
<matplotlib.collections.PathCollection at 0x7fb3764f8320>
In [18]:
plt.scatter(flowers["sepal length (cm)"][flowers.target == 1],
flowers["sepal width (cm)"][flowers.target == 1],
color='purple')
Out[18]:
<matplotlib.collections.PathCollection at 0x7fb33ff4f2b0>
In [19]:
plt.show()
In [20]:
flowers.index
Out[20]:
Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99],
dtype='int64')
In [30]:
for label in flowers["label"][flowers["sepal length (cm)"]<5]:
print(label)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/home/julienawilson/Desktop/Programs/401py/week3/data-structures/lib/python3.5/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
2133 try:
-> 2134 return self._engine.get_loc(key)
2135 except KeyError:
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()
KeyError: 'label'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-30-61a74510f838> in <module>()
----> 1 for label in flowers["label"][flowers["sepal length (cm)"]<5]:
2 print(label)
/home/julienawilson/Desktop/Programs/401py/week3/data-structures/lib/python3.5/site-packages/pandas/core/frame.py in __getitem__(self, key)
2057 return self._getitem_multilevel(key)
2058 else:
-> 2059 return self._getitem_column(key)
2060
2061 def _getitem_column(self, key):
/home/julienawilson/Desktop/Programs/401py/week3/data-structures/lib/python3.5/site-packages/pandas/core/frame.py in _getitem_column(self, key)
2064 # get column
2065 if self.columns.is_unique:
-> 2066 return self._get_item_cache(key)
2067
2068 # duplicate columns & possible reduce dimensionality
/home/julienawilson/Desktop/Programs/401py/week3/data-structures/lib/python3.5/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
1384 res = cache.get(item)
1385 if res is None:
-> 1386 values = self._data.get(item)
1387 res = self._box_item_values(item, values)
1388 cache[item] = res
/home/julienawilson/Desktop/Programs/401py/week3/data-structures/lib/python3.5/site-packages/pandas/core/internals.py in get(self, item, fastpath)
3541
3542 if not isnull(item):
-> 3543 loc = self.items.get_loc(item)
3544 else:
3545 indexer = np.arange(len(self.items))[isnull(self.items)]
/home/julienawilson/Desktop/Programs/401py/week3/data-structures/lib/python3.5/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
2134 return self._engine.get_loc(key)
2135 except KeyError:
-> 2136 return self._engine.get_loc(self._maybe_cast_indexer(key))
2137
2138 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4433)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4279)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13742)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13696)()
KeyError: 'label'
In [32]:
flowers.ini[0]
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-32-6e6dde73c712> in <module>()
----> 1 flowers.ini[0]
/home/julienawilson/Desktop/Programs/401py/week3/data-structures/lib/python3.5/site-packages/pandas/core/generic.py in __getattr__(self, name)
2742 if name in self._info_axis:
2743 return self[name]
-> 2744 return object.__getattribute__(self, name)
2745
2746 def __setattr__(self, name, value):
AttributeError: 'DataFrame' object has no attribute 'ini'
In [41]:
flowers_data = [[3,4,5,6,7,8,9],
[8,5,4,7,2,3,1]]
In [42]:
labels =[1,1,1,0,0,0,1]
In [43]:
data_idx = [0,1,2,3,4,5,6]
left_idx = [0,1,2]
right_idx = [3,4,5,6]
In [46]:
val = 5
left_list=[labels[idx] for idx in data_idx if flowers_data[0][idx] <= val]
In [47]:
left_list
Out[47]:
[1, 1, 1]
In [48]:
val = 5
right_list=[labels[idx] for idx in data_idx if flowers_data[0][idx] > val]
In [49]:
right_list
Out[49]:
[0, 0, 0, 1]
In [60]:
for val in flowers_data[0]:
left_list=[labels[idx] for idx in data_idx if flowers_data[0][idx] <= val]
right_list=[labels[idx] for idx in data_idx if flowers_data[0][idx] > val]
print(left_list, right_list,
left_list.count(1)/len(left_list) * right_list.count(0)/len(right_list),
left_list.count(0)/len(left_list) * right_list.count(1)/len(right_list),
right_list.count(0),
left_list.count(0),
len(right_list),
len(left_list))
[1] [1, 1, 0, 0, 0, 1] 0.5 0.0 3 0 6 1
[1, 1] [1, 0, 0, 0, 1] 0.6 0.0 3 0 5 2
[1, 1, 1] [0, 0, 0, 1] 0.75 0.0 3 0 4 3
[1, 1, 1, 0] [0, 0, 1] 0.5 0.08333333333333333 2 1 3 4
[1, 1, 1, 0, 0] [0, 1] 0.3 0.2 1 2 2 5
[1, 1, 1, 0, 0, 0] [1] 0.0 0.5 0 3 1 6
---------------------------------------------------------------------------
ZeroDivisionError Traceback (most recent call last)
<ipython-input-60-1d1c1387b568> in <module>()
3 right_list=[labels[idx] for idx in data_idx if flowers_data[0][idx] > val]
4 print(left_list, right_list,
----> 5 left_list.count(1)/len(left_list) * right_list.count(0)/len(right_list),
6 left_list.count(0)/len(left_list) * right_list.count(1)/len(right_list),
7 right_list.count(0),
ZeroDivisionError: float division by zero
In [ ]:
Content source: julienawilson/data-structures
Similar notebooks: