In [1]:
import prettyplotlib as ppl
import numpy as np
from scipy.stats import gaussian_kde
import brewer2mpl
set2 = brewer2mpl.get_map('Set2', 'qualitative', 8).mpl_colors
def violinplot(ax, x, ys, bp=False, cut=False, facecolor=set2[0],
edgecolor=ppl.almost_black,
alpha=0.3, bw_method=0.05, width=None):
"""Make a violin plot of each dataset in the `ys` sequence. `ys` is a
list of numpy arrays.
Adapted by: Olga Botvinnik
# Original Author: Teemu Ikonen <tpikonen@gmail.com>
# Based on code by Flavio Codeco Coelho,
# http://pyinsci.blogspot.com/2009/09/violin-plot-with-matplotlib.html
"""
dist = np.max(x) - np.min(x)
if width is None:
width = min(0.15 * max(dist, 1.0), 0.4)
for i, (d, p) in enumerate(zip(ys, x)):
k = gaussian_kde(d, bw_method=bw_method) #calculates the kernel density
# k.covariance_factor = 0.1
s = 0.0
if not cut:
s = 1 * np.std(d) #FIXME: magic constant 1
m = k.dataset.min() - s #lower bound of violin
M = k.dataset.max() + s #upper bound of violin
x = np.linspace(m, M, 100) # support for violin
v = k.evaluate(x) #violin profile (density curve)
v = width * v / v.max() #scaling the violin to the available space
if isinstance(facecolor, list):
# for x0, v0, p0
ax.fill_betweenx(x, -v + p,
v + p,
facecolor=facecolor[i],
alpha=alpha, edgecolor=edgecolor)
else:
ax.fill_betweenx(x, -v + p,
v + p,
facecolor=facecolor,
alpha=alpha, edgecolor=edgecolor)
if bp:
ax.boxplot(ys, notch=1, positions=x, vert=1)
ppl.remove_chartjunk(ax, ['top', 'right'])
return ax
In [4]:
[50:100::10]
In [11]:
n = 5
x = range(n)
ys = [np.random.randn(np.random.choice(range(50, 110, 10)))+_ for _ in range(n)]
fig, ax = plt.subplots(1)
violinplot(ax, x, ys)
Out[11]:
Change the bandwidth size (bigger = smoother) with bw_method
(default bw_method=0.1
, but you can specify any bw_method
to http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gaussian_kde.html#scipy.stats.gaussian_kde:
Parameters :
dataset : array_like
Datapoints to estimate from. In case of univariate data this is a 1-D array, otherwise a 2-D array with shape (# of dims, # of data).
bw_method : str, scalar or callable, optional
The method used to calculate the estimator bandwidth. This can be ‘scott’, ‘silverman’, a scalar constant or a callable. If a scalar, this will be used directly as kde.factor. If a callable, it should take a gaussian_kde instance as only parameter and return a scalar. If None (default), ‘scott’ is used. See Notes for more details.
In [12]:
fig, ax = plt.subplots(1)
violinplot(ax, x, ys, bw_method=0.5)
Out[12]:
In [ ]: