In [205]:
from sklearn.datasets import load_iris
from sklearn import preprocessing
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib
matplotlib.style.use("ggplot")
In [206]:
# Load data and repair the header
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
target_names = [data.target_names[i] for i in data.target]
df["target_names"] = target_names
columns = ["sepal_length", "sepal_width", "petal_length", "petal_width", "target_names"]
df.columns = columns
print(df.head())
print(df.describe())
In [207]:
# Convert data
#df = pd.get_dummies(df, columns=["target_names"])
print(df.dtypes)
In [208]:
# Clean data
#print(df[df.sepal_length.isnull()])
#df = df.fillna(0.0, axis=0).reindex()
In [209]:
df_norm = df.iloc[:, 0:4]
scaler = preprocessing.MinMaxScaler()
np_scaled = scaler.fit_transform(df_norm)
df_norm = pd.DataFrame(np_scaled)
df_norm["target_names"] = target_names
df_norm.columns = columns
df_norm.describe()
Out[209]:
In [210]:
pd.tools.plotting.parallel_coordinates(df_norm, "target_names")
Out[210]:
In [211]:
pd.tools.plotting.parallel_coordinates(df, "target_names")
Out[211]:
In [ ]: