The foundational package for most graphics in Python is matplotlib
, and the seaborn
package builds on this to provide more statistical graphing options. We will focus on these two packages, but there are many others if these don't meet your needs.
There are also several specialized packages that might come in useful:
In [1]:
import warnings
warnings.filterwarnings("ignore")
In [2]:
plt.hist(np.random.randn(1000), bins=np.linspace(-4,4,11))
pass
In [3]:
plt.boxplot(np.random.random((6,10)))
pass
In [4]:
plt.scatter(*np.random.uniform(0.1, 0.9, (2,100)),
s=np.random.randint(10, 200, 100),
c=np.random.random(100))
pass
In [5]:
plt.stem(np.random.random(8))
plt.margins(0.05)
pass
In [6]:
x = np.linspace(0, 2*np.pi, 100)
y = np.sin(x)
In [7]:
plt.plot(x, y)
plt.axis([0, 2*np.pi, -1.05, 1.05,])
pass
In [8]:
plt.scatter(*np.random.uniform(0.1, 0.9, (2,100)),
s=np.random.randint(10, 200, 100),
c=np.random.random(100))
pass
In [9]:
plt.scatter(*np.random.uniform(0.1, 0.9, (2,100)),
s=np.random.randint(10, 200, 100),
c=np.random.random(100), cmap='summer')
pass
In [10]:
plt.scatter(*np.random.uniform(0.1, 0.9, (2,100)),
s=np.random.randint(10, 200, 100),
c=np.random.random(100), cmap='hsv')
pass
In [11]:
# find the bottom, middle and top colors of the winter colormap
colors = plt.cm.winter(np.linspace(0, 1, 3))
colors
Out[11]:
In [12]:
plt.scatter(*np.random.uniform(0.1, 0.9, (2,100)),
s=np.random.randint(10, 200, 100),
c=colors)
pass
In [13]:
plt.style.available
Out[13]:
In [14]:
with plt.style.context('classic'):
plt.plot(x, y)
plt.axis([0, 2*np.pi, -1.05, 1.05,])
In [15]:
with plt.style.context('fivethirtyeight'):
plt.plot(x, y)
plt.axis([0, 2*np.pi, -1.05, 1.05,])
In [16]:
with plt.style.context('ggplot'):
plt.plot(x, y)
plt.axis([0, 2*np.pi, -1.05, 1.05,])
In [17]:
with plt.xkcd():
plt.plot(x, y)
plt.axis([0, 2*np.pi, -1.05, 1.05,])
Many, many options can be configured.
In [18]:
plt.rcParams
Out[18]:
In [19]:
%%file foo.mplstyle
axes.grid: True
axes.titlesize : 24
axes.labelsize : 20
lines.linewidth : 3
lines.markersize : 10
xtick.labelsize : 16
ytick.labelsize : 16
In [20]:
with plt.style.context('foo.mplstyle'):
plt.plot(x, y)
plt.axis([0, 2*np.pi, -1.05, 1.05,])
In [21]:
plt.rcParams.update({'font.size': 22})
fig = plt.figure(figsize=(8,6))
ax = plt.subplot(1,1,1)
plt.plot(x, y, color='red', linewidth=2, linestyle='dashed', label='sine curve')
plt.plot(x, np.cos(x), 'b-', label='cosine curve')
plt.legend(loc='best', fontsize=14)
plt.axis([0, 2*np.pi, -1.05, 1.05,])
plt.xlabel('x')
plt.ylabel('sin(x)')
plt.xticks([0,0.5*np.pi,np.pi,1.5*np.pi,2*np.pi],
[0, r'$\frac{\pi}{2}$', r'$\pi$', r'$\frac{3\pi}{2}$', r'$2\pi$'])
plt.title('Sine and Cosine Plots')
plt.text(0.45, 0.9, 'Empty space', transform=ax.transAxes, ha='left', va='top')
pass
In [22]:
fig, axes = plt.subplots(2,2,figsize=(8,8))
axes[0,0].plot(x,y, 'r')
axes[0,1].plot(x,y, 'g')
axes[1,0].plot(x,y, 'b')
axes[1,1].plot(x,y, 'k')
for ax in axes.ravel():
ax.margins(0.05)
pass
In [23]:
ax1 = plt.subplot2grid((3,3), (0,0), colspan=3)
ax2 = plt.subplot2grid((3,3), (1,0), colspan=2)
ax3 = plt.subplot2grid((3,3), (1,2), rowspan=2)
ax4 = plt.subplot2grid((3,3), (2,0), colspan=2)
axes = [ax1, ax2, ax3, ax4]
colors = ['r', 'g', 'b', 'k']
for ax, c in zip(axes, colors):
ax.plot(x, y, c)
ax.margins(0.05)
plt.tight_layout()
In [24]:
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
In [25]:
import numpy.random as rng
In [26]:
xs = rng.normal(0,1,100)
fig, axes = plt.subplots(1, 2, figsize=(8,4))
sns.distplot(xs, hist=False, rug=True, ax=axes[0]);
sns.distplot(xs, hist=True, ax=axes[1])
pass
In [27]:
sns.kdeplot(np.r_[rng.normal(0,1,50), rng.normal(4,0.8,100)])
pass
In [28]:
iris = sns.load_dataset('iris')
In [29]:
iris.head()
Out[29]:
In [30]:
sns.jointplot(x='petal_length', y='petal_width', data=iris, kind='kdeplot')
pass
In [31]:
fig, axes = plt.subplots(1, 2, figsize=(8,4))
sns.boxplot(x='species', y='petal_length', data=iris, ax=axes[0])
sns.violinplot(x='species', y='petal_length', data=iris, ax=axes[1])
pass
In [32]:
url = 'https://raw.githubusercontent.com/mwaskom/seaborn-data/master/titanic.csv'
titanic = pd.read_csv(url)
In [33]:
titanic.head()
Out[33]:
In [34]:
sns.set_context('notebook', font_scale=1.5)
In [35]:
sns.lmplot(x='fare', y='survived', col='alone', row='sex', data=titanic, logistic=True)
pass
In [36]:
g = sns.PairGrid(titanic,
y_vars=['fare', 'age'],
x_vars=['sex', 'class', 'embark_town' ],
aspect=1, size=5.5)
g.map(sns.stripplot, jitter=True, palette="bright")
pass
ggplot
as an alternative to seaborn
.The ggplot
module is a port of R's ggplot2
- usage is very similar except for the following minor differences:
pandas
dataframe\
to extend over multiple linesOnly the most elementary examples are shown below. The ggplot
module is extremely rich and sophisticated with a steep learning curve if you're not already familiar with it from R. Please see documentation for details.
In [37]:
from ggplot import *
In [38]:
%load_ext rpy2.ipython
In [39]:
%R -o mtcars
In [40]:
mtcars.head()
Out[40]:
In [41]:
ggplot(aes(x='wt', y='mpg'), data=mtcars,) + geom_point()
Out[41]:
In [42]:
ggplot(aes(x='wt', y='mpg'), data=mtcars) + geom_point() + geom_smooth(method='loess')
Out[42]:
In [43]:
ggplot(aes(x='wt', y='mpg'), data=mtcars) + geom_point() + geom_line()
Out[43]:
In [44]:
ggplot(aes(x='mpg'), data=mtcars) + geom_histogram(binwidth=2)
Out[44]:
In [45]:
ggplot(aes(x='mpg'), mtcars) + \
geom_line(stat="density") + \
xlim(2.97, 41.33) + \
labs(title="Density plot")
Out[45]:
In [46]:
cars = mtcars
In [52]:
%%R -i cars
library('ggplot2')
ggplot(cars, aes(x=mpg, y=am)) +
geom_point(position=position_jitter(width=.3, height=.08), shape=21, alpha=0.6, size=3) +
stat_smooth(method=glm, method.args=list(family="binomial"), color="red")