In [39]:
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
matplotlib.style.use("ggplot")

In [40]:
file_path = "/Users/szabolcs/dev/git/DAT210x/Module3/Datasets/"
file_name = "wheat.data"

df = pd.read_csv(file_path + file_name)
df.head()


Out[40]:
id area perimeter compactness length width asymmetry groove wheat_type
0 0 15.26 14.84 0.8710 5.763 3.312 2.221 5.220 kama
1 1 14.88 14.57 0.8811 5.554 3.333 1.018 4.956 kama
2 2 14.29 14.09 0.9050 5.291 3.337 2.699 4.825 kama
3 3 13.84 13.94 0.8955 5.324 3.379 2.259 4.805 kama
4 4 16.14 14.99 0.9034 5.658 3.562 1.355 5.175 kama

In [41]:
df.asymmetry.plot.hist(title="Asymmetry", bins=8)
plt.show()



In [42]:
wheat_types = df.wheat_type.unique()
print(wheat_types)
plt.figure()
for wtype in wheat_types:
    df[df.wheat_type == wtype].asymmetry.plot.hist(alpha=0.4)
plt.show()


['kama' 'canadian' 'rosa']

In [43]:
plt.figure()
wheat_df = df[["wheat_type", "perimeter"]]
wheat_df.plot.hist(alpha=0.5)
plt.show


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-43-eae48cbd3db8> in <module>()
      1 plt.figure()
----> 2 wheat_df = df[wheat_types]
      3 wheat_df.plot.hist(alpha=0.5)
      4 plt.show

/Users/szabolcs/anaconda/lib/python3.5/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2051         if isinstance(key, (Series, np.ndarray, Index, list)):
   2052             # either boolean or fancy integer index
-> 2053             return self._getitem_array(key)
   2054         elif isinstance(key, DataFrame):
   2055             return self._getitem_frame(key)

/Users/szabolcs/anaconda/lib/python3.5/site-packages/pandas/core/frame.py in _getitem_array(self, key)
   2095             return self.take(indexer, axis=0, convert=False)
   2096         else:
-> 2097             indexer = self.ix._convert_to_indexer(key, axis=1)
   2098             return self.take(indexer, axis=1, convert=True)
   2099 

/Users/szabolcs/anaconda/lib/python3.5/site-packages/pandas/core/indexing.py in _convert_to_indexer(self, obj, axis, is_setter)
   1228                 mask = check == -1
   1229                 if mask.any():
-> 1230                     raise KeyError('%s not in index' % objarr[mask])
   1231 
   1232                 return _values_from_object(indexer)

KeyError: "['kama' 'canadian' 'rosa'] not in index"
<matplotlib.figure.Figure at 0x11289ba90>

In [ ]: