In [39]:
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
matplotlib.style.use("ggplot")

In [40]:
file_path = "/Users/szabolcs/dev/git/DAT210x/Module3/Datasets/"
file_name = "wheat.data"

df = pd.read_csv(file_path + file_name)
df.head()


Out[40]:
id area perimeter compactness length width asymmetry groove wheat_type
0 0 15.26 14.84 0.8710 5.763 3.312 2.221 5.220 kama
1 1 14.88 14.57 0.8811 5.554 3.333 1.018 4.956 kama
2 2 14.29 14.09 0.9050 5.291 3.337 2.699 4.825 kama
3 3 13.84 13.94 0.8955 5.324 3.379 2.259 4.805 kama
4 4 16.14 14.99 0.9034 5.658 3.562 1.355 5.175 kama

In [41]:
df.asymmetry.plot.hist(title="Asymmetry", bins=8)
plt.show()



In [42]:
wheat_types = df.wheat_type.unique()
print(wheat_types)
plt.figure()
for wtype in wheat_types:
    df[df.wheat_type == wtype].asymmetry.plot.hist(alpha=0.4)
plt.show()


['kama' 'canadian' 'rosa']

In [46]:
plt.figure()
wheat_df = df[["asymmetry", "perimeter"]]
wheat_df.plot.hist(alpha=0.5)
plt.show()


<matplotlib.figure.Figure at 0x10f5dbba8>

In [ ]: