In [7]:
# seaborn is a powerful data visualization tool#
# updated Jul 28, 2017# 
import seaborn as sns
import matplotlib.pyplot as plt

In [8]:
iris = sns.load_dataset("iris") # output is pandas data frame
print iris.head()


   sepal_length  sepal_width  petal_length  petal_width species
0           5.1          3.5           1.4          0.2  setosa
1           4.9          3.0           1.4          0.2  setosa
2           4.7          3.2           1.3          0.2  setosa
3           4.6          3.1           1.5          0.2  setosa
4           5.0          3.6           1.4          0.2  setosa

In [10]:
sns.pairplot(iris, hue="species")
plt.show()



In [13]:
#histogram of a column
sns.distplot(iris['sepal_length'], kde=False, color="b", bins = 20)
plt.show()



In [18]:
# histogram of a column, separate by species
print iris['species'].unique()
ax = sns.distplot(iris.loc[iris['species'] == 'setosa','sepal_length'], kde=False, color="b", bins = 20)
plt.show()


['setosa' 'versicolor' 'virginica']

In [23]:
fig, ax = plt.subplots()
sns.distplot(iris.loc[iris['species'] == 'setosa','sepal_length'], kde=False, color="b", bins = 20, ax = ax)
sns.distplot(iris.loc[iris['species'] == 'versicolor','sepal_length'], kde=False, color="r", bins = 20, ax = ax)
sns.distplot(iris.loc[iris['species'] == 'virginica','sepal_length'], kde=False, color="c", bins = 20, ax = ax)
plt.legend(iris['species'].unique())
plt.ylabel('relative frequency')
plt.show()



In [26]:
# separate plot
fig, ax = plt.subplots()
ax1 = plt.subplot(311)
ax2 = plt.subplot(312)
ax3 = plt.subplot(313)
sns.distplot(iris.loc[iris['species'] == 'setosa','sepal_length'], kde=False, color="b", bins = 20, ax = ax1)
sns.distplot(iris.loc[iris['species'] == 'versicolor','sepal_length'], kde=False, color="r", bins = 20, ax = ax2)
sns.distplot(iris.loc[iris['species'] == 'virginica','sepal_length'], kde=False, color="c", bins = 20, ax = ax3)
plt.legend(iris['species'].unique())
plt.ylabel('relative frequency')
plt.show()



In [29]:
# box plot
sns.boxplot(x="species", y="sepal_length", data=iris, palette="PRGn")
plt.show()



In [ ]:


In [61]:
from scipy import stats
# linear regression with all data
slope, intercept, r_value, p_value, std_err = stats.linregress(iris['sepal_length'],iris['sepal_width'])
print slope, intercept, r_value, p_value, std_err
sns.jointplot(x="sepal_length", y="sepal_width", data=iris, kind="reg")
plt.show()


-0.0618847979641 3.4189468361 -0.117569784133 0.151898260711 0.0429669879299

In [62]:
# linear regression for species = 'setosa'
slope, intercept, r_value, p_value, std_err = stats.linregress(iris.loc[iris['species']=='setosa', 'sepal_length'], \
                                                               iris.loc[iris['species']=='setosa', 'sepal_width'])
print slope, intercept, r_value, p_value, std_err

# Show the results of a linear regression within each species
g = sns.lmplot(x="sepal_length", y="sepal_width", hue="species", data=iris,
           palette="muted", scatter_kws={"s": 50, "alpha": 1})

plt.show()


0.798528300647 -0.56943267304 0.742546685665 6.70984301766e-10 0.103965054285

In [ ]: