Iris dataset

Imports

Import deps


In [1]:
from datetime import datetime as dt

import numpy as np
import pandas as pd

# viz libs
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.cm as cm
from matplotlib.colors import colorConverter

import plotly.graph_objs as go
from plotly import tools
import plotly.figure_factory as ff
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

random_state=42
nb_start = dt.now()



In [2]:
from sklearn.cluster import KMeans


from sklearn.metrics import silhouette_samples, silhouette_score

In [ ]:


In [ ]:

Import data


In [3]:
%%bash

ls -l | grep iris
cat iris.csv | head -2
cat iris.csv | tail -1


-rw-rw-r-- 1 1000 1000     2701 May 13 15:09 iris.csv
-rw-r--r-- 1 root root    76961 May 28 04:49 iris.ipynb
5.1,3.5,1.4,0.2,0
4.9,3.0,1.4,0.2,0


In [4]:
df = pd.read_csv('iris.csv', header=None, names=['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'])
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
SepalLengthCm    150 non-null float64
SepalWidthCm     150 non-null float64
PetalLengthCm    150 non-null float64
PetalWidthCm     150 non-null float64
Species          150 non-null int64
dtypes: float64(4), int64(1)
memory usage: 5.9 KB

In [5]:
df.head()


Out[5]:
SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
0 5.1 3.5 1.4 0.2 0
1 4.9 3.0 1.4 0.2 0
2 4.7 3.2 1.3 0.2 0
3 4.6 3.1 1.5 0.2 0
4 5.0 3.6 1.4 0.2 0

In [6]:
df.tail()


Out[6]:
SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
145 6.7 3.0 5.2 2.3 2
146 6.3 2.5 5.0 1.9 2
147 6.5 3.0 5.2 2.0 2
148 6.2 3.4 5.4 2.3 2
149 5.9 3.0 5.1 1.8 2

In [ ]:


Viz


In [7]:
x = df.SepalLengthCm
y = df.SepalWidthCm
c = df.Species

trace = [go.Scatter(
    x = x,
    y = y,
    marker = dict(
        # color = col,
        color = c,
        colorscale='Viridis',
        colorbar=dict(
            title='Labels'
        ),
    ),
    name = 'data',
    mode = 'markers',
    hoverinfo = 'text',
    text = ['x: %s<br>y: %s<br>cluster %i' % (x_i, y_i, c_i) for x_i, y_i, c_i in zip(x, y, c)]
)]

layout = go.Layout(
    xaxis = dict({'title': 'x'}),
    yaxis = dict({'title': 'y'}),
    hovermode='closest',
)

fig = go.Figure(data=trace, layout=layout)
iplot(fig, layout)



In [8]:
x = df.PetalLengthCm
y = df.PetalWidthCm
c = df.Species

trace = [go.Scatter(
    x = x,
    y = y,
    marker = dict(
        # color = col,
        color = c,
        colorscale='Viridis',
        colorbar=dict(
            title='Labels'
        ),
    ),
    name = 'data',
    mode = 'markers',
    hoverinfo = 'text',
    text = ['x: %s<br>y: %s<br>cluster %i' % (x_i, y_i, c_i) for x_i, y_i, c_i in zip(x, y, c)]
)]

layout = go.Layout(
    xaxis = dict({'title': 'x'}),
    yaxis = dict({'title': 'y'}),
    hovermode='closest',
)

fig = go.Figure(data=trace, layout=layout)
iplot(fig, layout)



In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:

Nb end


In [9]:
nb_end = dt.now()

'Time elapsed: %s' % (nb_end - nb_start)


Out[9]:
'Time elapsed: 0:00:01.158076'

Bibliography


In [ ]: