dimensionality reduction on iris dataset


In [1]:
import matplotlib.pyplot as plt
import matplotlib.ticker
import mpl_toolkits.mplot3d
import numpy as np
import pandas as pd
import seaborn as sns
import sklearn.datasets
import sklearn.decomposition
import sklearn.manifold
import sklearn.preprocessing
sns.set(style='ticks')
sns.set_palette('husl')
mpl_toolkits.mplot3d.Axes3D
import warnings
warnings.filterwarnings("ignore")

In [2]:
%matplotlib inline
plt.rcParams["figure.figsize"] = (17, 17)
#sns.set(context="paper", font="monospace")

In [3]:
df = pd.read_csv(
    'iris.csv',
    header = None,
    names  = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
)
df.head()


Out[3]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa

In [4]:
features = list(df.columns[:-1])
print(features)
X = df[features].values
y = df["species"].values


['sepal_length', 'sepal_width', 'petal_length', 'petal_width']

In [5]:
df["species"].unique()


Out[5]:
array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [6]:
for point, label in zip(X, y):
    if label == 'Iris-setosa':
        plt.scatter(point[0], point[1], c="red", s=100)
    if label == 'Iris-versicolor':
        plt.scatter(point[0], point[1], c="blue", s=100)
    if label == 'Iris-virginica':
        plt.scatter(point[0], point[1], c="green", s=100)
    plt.axes().annotate(label, (point[0], point[1]), ha='center', size=15)



In [7]:
pca = sklearn.decomposition.PCA(n_components=2).fit_transform(X)
for point, label in zip(pca, y):
    if label == 'Iris-setosa':
        plt.scatter(point[0], point[1], c="red", s=100)
    if label == 'Iris-versicolor':
        plt.scatter(point[0], point[1], c="blue", s=100)
    if label == 'Iris-virginica':
        plt.scatter(point[0], point[1], c="green", s=100)
    plt.axes().annotate(label, (point[0], point[1]), ha='center', size=15)



In [9]:
Y = sklearn.manifold.TSNE(
    n_components = 2,
    init         = "pca",
    method       = "barnes_hut",
    perplexity   = 13
).fit_transform(X)

for point, label in zip(Y, y):
    if label == 'Iris-setosa':
        plt.scatter(point[0], point[1], c="red", s=100)
    if label == 'Iris-versicolor':
        plt.scatter(point[0], point[1], c="blue", s=100)
    if label == 'Iris-virginica':
        plt.scatter(point[0], point[1], c="green", s=100)
    plt.axes().annotate(label, (point[0], point[1]), ha='center', size=15)



In [ ]: