This notbeook uses the code from the book Python Data Science Handbook, by Jake VanderPlas
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
# Random data
rng = np.random.RandomState(0)
x = np.linspace(0, 10, 500)
y = np.cumsum(rng.randn(500, 6), 0)
# 1. Plot the data with Matplotlib defaults
plt.plot(x, y)
plt.legend('ABCDEF', ncol=2, loc='upper left');
In [3]:
# 2. Now let's see what Seaborn can do
import seaborn as sns
sns.set()
# same data defined above (x, y)
plt.plot(x, y)
plt.legend('ABCDEF', ncol=2, loc='upper left');
In [4]:
data = np.random.multivariate_normal([0, 0], [[5, 2], [2, 2]], size=2000)
data = pd.DataFrame(data, columns=['x', 'y'])
for col in 'xy':
plt.hist(data[col], normed=True, alpha=0.5)
Now a smooth estimate of the distribution using a kernel density estimation, which Seaborn does with sns.kdeplot:
In [5]:
for col in 'xy':
sns.kdeplot(data[col], shade=True)
Histograms and KDE can be combined using distplot:
In [6]:
sns.distplot(data['x'])
sns.distplot(data['y']);
In [7]:
iris = sns.load_dataset("iris")
iris.head()
Out[7]:
Now, sns.pairplot visualization:
In [8]:
sns.pairplot(iris, hue='species', size=2.5);
In [9]:
# "Tips" dataset
tips = sns.load_dataset('tips')
tips.head()
Out[9]:
In [10]:
tips['tip_pct'] = 100 * tips['tip'] / tips['total_bill']
grid = sns.FacetGrid(tips, row="sex", col="time", margin_titles=True)
grid.map(plt.hist, "tip_pct", bins=np.linspace(0, 40, 15));
In [11]:
planets = sns.load_dataset('planets')
planets.head()
Out[11]:
In [12]:
with sns.axes_style('white'):
g = sns.factorplot("year", data=planets, aspect=2,
kind="count", color='steelblue')
g.set_xticklabels(step=5)
More options:
In [13]:
with sns.axes_style('white'):
g = sns.factorplot("year", data=planets, aspect=4.0, kind='count',
hue='method', order=range(2001, 2015))
g.set_ylabels('Number of Planets Discovered')
Simple graphic bar:
In [14]:
titanic = sns.load_dataset("titanic")
In [15]:
sns.countplot(x="deck", data=titanic, palette="Greens_d");
In [ ]: