notebook.community

Edit and run



In [39]:

    
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
matplotlib.style.use("ggplot")



In [40]:

    
file_path = "/Users/szabolcs/dev/git/DAT210x/Module3/Datasets/"
file_name = "wheat.data"

df = pd.read_csv(file_path + file_name)
df.head()









    Out[40]:






  
    
      
      id
      area
      perimeter
      compactness
      length
      width
      asymmetry
      groove
      wheat_type
    
  
  
    
      0
      0
      15.26
      14.84
      0.8710
      5.763
      3.312
      2.221
      5.220
      kama
    
    
      1
      1
      14.88
      14.57
      0.8811
      5.554
      3.333
      1.018
      4.956
      kama
    
    
      2
      2
      14.29
      14.09
      0.9050
      5.291
      3.337
      2.699
      4.825
      kama
    
    
      3
      3
      13.84
      13.94
      0.8955
      5.324
      3.379
      2.259
      4.805
      kama
    
    
      4
      4
      16.14
      14.99
      0.9034
      5.658
      3.562
      1.355
      5.175
      kama



In [41]:

    
df.asymmetry.plot.hist(title="Asymmetry", bins=8)
plt.show()



In [42]:

    
wheat_types = df.wheat_type.unique()
print(wheat_types)
plt.figure()
for wtype in wheat_types:
    df[df.wheat_type == wtype].asymmetry.plot.hist(alpha=0.4)
plt.show()









    



['kama' 'canadian' 'rosa']



In [46]:

    
plt.figure()
wheat_df = df[["asymmetry", "perimeter"]]
wheat_df.plot.hist(alpha=0.5)
plt.show()









    





<matplotlib.figure.Figure at 0x10f5dbba8>



In [ ]:

	id	area	perimeter	compactness	length	width	asymmetry	groove	wheat_type
0	0	15.26	14.84	0.8710	5.763	3.312	2.221	5.220	kama
1	1	14.88	14.57	0.8811	5.554	3.333	1.018	4.956	kama
2	2	14.29	14.09	0.9050	5.291	3.337	2.699	4.825	kama
3	3	13.84	13.94	0.8955	5.324	3.379	2.259	4.805	kama
4	4	16.14	14.99	0.9034	5.658	3.562	1.355	5.175	kama