notebook.community

Edit and run



In [205]:

    
from sklearn.datasets import load_iris
from sklearn import preprocessing

import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib
matplotlib.style.use("ggplot")









    



Using matplotlib backend: MacOSX



In [206]:

    
# Load data and repair the header
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
target_names = [data.target_names[i] for i in data.target]
df["target_names"] = target_names
columns = ["sepal_length", "sepal_width", "petal_length", "petal_width", "target_names"]
df.columns = columns
print(df.head())
print(df.describe())









    



   sepal_length  sepal_width  petal_length  petal_width target_names
0           5.1          3.5           1.4          0.2       setosa
1           4.9          3.0           1.4          0.2       setosa
2           4.7          3.2           1.3          0.2       setosa
3           4.6          3.1           1.5          0.2       setosa
4           5.0          3.6           1.4          0.2       setosa
       sepal_length  sepal_width  petal_length  petal_width
count    150.000000   150.000000    150.000000   150.000000
mean       5.843333     3.054000      3.758667     1.198667
std        0.828066     0.433594      1.764420     0.763161
min        4.300000     2.000000      1.000000     0.100000
25%        5.100000     2.800000      1.600000     0.300000
50%        5.800000     3.000000      4.350000     1.300000
75%        6.400000     3.300000      5.100000     1.800000
max        7.900000     4.400000      6.900000     2.500000



In [207]:

    
# Convert data
#df = pd.get_dummies(df, columns=["target_names"])
print(df.dtypes)









    



sepal_length    float64
sepal_width     float64
petal_length    float64
petal_width     float64
target_names     object
dtype: object



In [208]:

    
# Clean data
#print(df[df.sepal_length.isnull()])
#df = df.fillna(0.0, axis=0).reindex()



In [209]:

    
df_norm = df.iloc[:, 0:4]

scaler = preprocessing.MinMaxScaler()
np_scaled = scaler.fit_transform(df_norm)
df_norm = pd.DataFrame(np_scaled)
df_norm["target_names"] = target_names
df_norm.columns = columns
df_norm.describe()









    Out[209]:






  
    
      
      sepal_length
      sepal_width
      petal_length
      petal_width
    
  
  
    
      count
      150.000000
      150.000000
      150.000000
      150.000000
    
    
      mean
      0.428704
      0.439167
      0.467571
      0.457778
    
    
      std
      0.230018
      0.180664
      0.299054
      0.317984
    
    
      min
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      25%
      0.222222
      0.333333
      0.101695
      0.083333
    
    
      50%
      0.416667
      0.416667
      0.567797
      0.500000
    
    
      75%
      0.583333
      0.541667
      0.694915
      0.708333
    
    
      max
      1.000000
      1.000000
      1.000000
      1.000000



In [210]:

    
pd.tools.plotting.parallel_coordinates(df_norm, "target_names")









    Out[210]:





<matplotlib.axes._subplots.AxesSubplot at 0x119157160>



In [211]:

    
pd.tools.plotting.parallel_coordinates(df, "target_names")









    Out[211]:





<matplotlib.axes._subplots.AxesSubplot at 0x119157160>



In [ ]:

	sepal_length	sepal_width	petal_length	petal_width
count	150.000000	150.000000	150.000000	150.000000
mean	0.428704	0.439167	0.467571	0.457778
std	0.230018	0.180664	0.299054	0.317984
min	0.000000	0.000000	0.000000	0.000000
25%	0.222222	0.333333	0.101695	0.083333
50%	0.416667	0.416667	0.567797	0.500000
75%	0.583333	0.541667	0.694915	0.708333
max	1.000000	1.000000	1.000000	1.000000