In [112]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sbn
import pandas as pd
import scipy.stats as stats
In [17]:
sbn.set_style("white") # get rid of seaborn grid lines
In [18]:
irisurl ="https://raw.githubusercontent.com/Bio204-class/bio204-datasets/master/iris.csv"
iris = pd.read_csv(irisurl)
In [19]:
iris.head(3)
Out[19]:
In [20]:
# rename columns for convenience
iris.columns = iris.columns.str.replace('.', '')
iris.head(3)
Out[20]:
In [21]:
setosa = iris[iris.Species == 'setosa']
versicolor = iris[iris.Species == 'versicolor']
virginica = iris[iris.Species == 'virginica']
In [38]:
# using pyplot.scatter
fig, ax = plt.subplots()
ax.scatter(setosa.SepalLength, setosa.SepalWidth, marker='o', s=30, color='steelblue')
ax.set_xlabel('Sepal Length (mm)')
ax.set_ylabel('Sepal Width (mm)')
sbn.despine()
pass
In [72]:
# as above but with equal aspect ratio
fig, ax = plt.subplots()
ax.scatter(setosa.SepalLength, setosa.SepalWidth, marker='o', s=30, color='steelblue')
ax.set_xlabel('Sepal Length (mm)')
ax.set_ylabel('Sepal Width (mm)')
ax.set_aspect('equal') # <-- aspect ratio specified here
sbn.despine()
pass
In [71]:
# using seaborn.jointplot
# note that jointplot returns an object of type JointGrid
# Since there are three sets of axes, the JointGrid
# object provide a function `set_axis_labels`
# to properly set labels
# https://stanford.edu/~mwaskom/software/seaborn/generated/seaborn.JointGrid.html
g = sbn.jointplot(setosa.SepalLength, setosa.SepalWidth)
g.set_axis_labels("Sepal Length", "Sepal Width")
pass
In [68]:
sbn.kdeplot(setosa.SepalLength, setosa.SepalWidth)
sbn.despine()
pass
In [103]:
sbn.kdeplot(setosa.SepalLength, setosa.SepalWidth, shade=True)
sbn.despine()
pass
In [83]:
sbn.kdeplot(setosa.SepalLength, setosa.SepalWidth)
# the zorder argument makes sure the points are drawn on top of the
# density contours
plt.scatter(setosa.SepalLength, setosa.SepalWidth, color='forestgreen', s=30, zorder=10)
sbn.despine()
pass
In [122]:
sbn.jointplot(setosa.SepalLength, setosa.SepalWidth, kind='kde')
pass
Out[122]:
In [123]:
sbn.jointplot(setosa.SepalLength, setosa.SepalWidth, kind='hex')
pass
In [120]:
sbn.pairplot(setosa)
pass
In [ ]:
In the plot below there is a relationship between $x$ and $y$ but it is non-linear.
In [119]:
x = np.linspace(-1,1,100) + stats.norm.rvs(0,0.1,size=100)
y = (np.linspace(-1,1,100))**3 + stats.norm.rvs(0,0.1,size=100)
plt.scatter(x,y)
pass
Out[119]:
In [100]:
# ddof = 1 gives us the sample covariance
np.cov(setosa.SepalLength, setosa.SepalWidth, ddof=1)
Out[100]:
In [98]:
# using cov method on pandas data frame
setosa[["SepalWidth","SepalLength"]].cov()
Out[98]:
In [102]:
# will generate covariance matrix for all pairs of variables
setosa.cov()
Out[102]:
In [125]:
np.corrcoef(setosa.SepalLength, setosa.SepalWidth, ddof=1)
Out[125]:
In [128]:
setosa[['SepalLength','SepalWidth']].corr()
Out[128]:
In [129]:
setosa.corr()
Out[129]:
In [130]:
# anscombe's synthetic data set is included in seaborn
anscombe = sbn.load_dataset('anscombe')
anscombe.head()
Out[130]:
The basic descriptive stats for $x$ and $y$ for each of the groups is quite similar.
In [131]:
anscombe.groupby('dataset').describe()
Out[131]:
In [132]:
groupI = anscombe[anscombe.dataset == "I"]
groupII = anscombe[anscombe.dataset == "II"]
groupIII = anscombe[anscombe.dataset == "III"]
groupIV = anscombe[anscombe.dataset == "IV"]
The pairwise correlations within the groups look very similar
In [133]:
groupI.corr()
Out[133]:
In [134]:
groupII.corr()
Out[134]:
In [135]:
groupIII.corr()
Out[135]:
In [136]:
groupIV.corr()
Out[136]:
In [147]:
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2,2, figsize=(10,10))
ax1.scatter(groupI.x, groupI.y, color='steelblue', s=30)
ax2.scatter(groupII.x, groupII.y, color='forestgreen', s=30)
ax3.scatter(groupIII.x, groupIII.y, color='firebrick', s=30)
ax4.scatter(groupIV.x, groupIV.y, color='purple', s=30)
for ax in (ax1,ax2,ax3,ax4):
ax.set_xlabel('x')
ax.set_ylabel('y')
ax1.set_title("Group I")
ax2.set_title("Group II")
ax3.set_title("Group III")
ax4.set_title("Group IV")
fig.tight_layout()
pass
In [ ]: