In [15]:
## make imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
%matplotlib inline
sns.set(style="ticks", color_codes=True)

## load the iris data
iris = sns.load_dataset("iris")
g = sns.pairplot(iris)



In [19]:
g = sns.pairplot(iris, hue="species",palette="husl")



In [17]:
iris.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
sepal_length    150 non-null float64
sepal_width     150 non-null float64
petal_length    150 non-null float64
petal_width     150 non-null float64
species         150 non-null object
dtypes: float64(4), object(1)
memory usage: 5.9+ KB

In [18]:
iris.head()


Out[18]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa

In [20]:
iris.groupby('species').mean()


Out[20]:
sepal_length sepal_width petal_length petal_width
species
setosa 5.006 3.428 1.462 0.246
versicolor 5.936 2.770 4.260 1.326
virginica 6.588 2.974 5.552 2.026

In [26]:
## make a histogram the pandas way
ax = iris["sepal_length"].plot(kind='hist')
ax.set_title("my title");



In [25]:
## make a histogram the matplotlib way
fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(1,1,1)
ax.hist(iris['sepal_length']);



In [ ]:
## clustering


# import some data to play with
#iris = datasets.load_iris()
#X = iris.data[:, :2]  # we only take the first two features.
#Y = iris.target