In [50]:
%matplotlib inline
import numpy as np
from numpy.random import randn
import pandas as pd
from scipy import stats
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
np.random.seed(sum(map(ord, "aesthetics")))

In [13]:
def sinplot(flip=1):
    x = np.linspace(0, 14, 100)
    for i in range(1, 7):
        plt.plot(x, np.sin(x + i * .5) * (7 - i) * flip)

In [41]:
sinplot()



In [42]:
sns.set_style("whitegrid")
data = np.random.normal(size=(20, 6)) + np.arange(6) / 2
sns.boxplot(data);



In [43]:
sns.set_style("dark")
sinplot()



In [44]:
sns.set_style("ticks")
sinplot()
sns.despine()



In [47]:
f, ax = plt.subplots()
sns.violinplot(data)
sns.despine(trim=True);



In [48]:
with sns.axes_style("darkgrid"):
    plt.subplot(211)
    sinplot()
plt.subplot(212)
sinplot(-1)



In [49]:
sns.set_style("white")
sns.despine()
current_palette = sns.color_palette()
sns.palplot(current_palette)


<matplotlib.figure.Figure at 0x10bd57fd0>

In [50]:
sns.palplot(sns.color_palette("hls", 8))



In [51]:
sns.palplot(sns.hls_palette(8, l=.3, s=.8))



In [52]:
sns.palplot(sns.color_palette("Paired"))



In [53]:
flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
sns.palplot(sns.color_palette(flatui))



In [5]:
plt.plot([0, 1], [0, 1], sns.xkcd_rgb["pale red"], lw=3)
plt.plot([0, 1], [0, 2], sns.xkcd_rgb["medium green"], lw=3)
plt.plot([0, 1], [0, 3], sns.xkcd_rgb["denim blue"], lw=3);



In [8]:
x, y = np.random.multivariate_normal([0, 0], [[1, -.5], [-.5, 1]], size=300).T
cmap = sns.cubehelix_palette(8, start=.5, rot=-.75, as_cmap=True)
sns.kdeplot(x, y, cmap=cmap, shade=True);



In [11]:
pal = sns.dark_palette("green", as_cmap=True)
sns.kdeplot(x, y, cmap=pal);



In [14]:
with sns.color_palette("PuBuGn_d"):
    sinplot()



In [16]:
sns.set_palette("deep", desat=.6)
sns.set_context(rc={"figure.figsize": (8, 4)})
np.random.seed(9221999)

In [17]:
data = randn(75)
plt.hist(data);



In [30]:
plt.hist(data, 15, color=sns.desaturate("indianred", 1));



In [39]:
data1 = stats.poisson(1).rvs(1)
data2 = stats.poisson(2).rvs(10)
max_data = np.r_[data1, data2].max()
bins = np.linspace(0, max_data, max_data + 1)
plt.hist(data1, bins, normed=True, color="#6495ED", alpha=.5)
plt.hist(data2, bins, normed=True, color="#F08080", alpha=.5);



In [48]:
x = stats.gamma(3).rvs(5000)
plt.hist(x, 70, histtype="stepfilled", alpha=.7);



In [41]:
help(plt.hist)


Help on function hist in module matplotlib.pyplot:

hist(x, bins=10, range=None, normed=False, weights=None, cumulative=False, bottom=None, histtype=u'bar', align=u'mid', orientation=u'vertical', rwidth=None, log=False, color=None, label=None, stacked=False, hold=None, **kwargs)
    Plot a histogram.
    
    Compute and draw the histogram of *x*. The return value is a
    tuple (*n*, *bins*, *patches*) or ([*n0*, *n1*, ...], *bins*,
    [*patches0*, *patches1*,...]) if the input contains multiple
    data.
    
    Multiple data can be provided via *x* as a list of datasets
    of potentially different length ([*x0*, *x1*, ...]), or as
    a 2-D ndarray in which each column is a dataset.  Note that
    the ndarray form is transposed relative to the list form.
    
    Masked arrays are not supported at present.
    
    Parameters
    ----------
    x : (n,) array or sequence of (n,) arrays
        Input values, this takes either a single array or a sequency of
        arrays which are not required to be of the same length
    
    bins : integer or array_like, optional, default: 10
        If an integer is given, `bins + 1` bin edges are returned,
        consistently with :func:`numpy.histogram` for numpy version >=
        1.3.
    
        Unequally spaced bins are supported if `bins` is a sequence.
    
    range : tuple, optional, default: None
        The lower and upper range of the bins. Lower and upper outliers
        are ignored. If not provided, `range` is (x.min(), x.max()). Range
        has no effect if `bins` is a sequence.
    
        If `bins` is a sequence or `range` is specified, autoscaling
        is based on the specified bin range instead of the
        range of x.
    
    normed : boolean, optional, default: False
        If `True`, the first element of the return tuple will
        be the counts normalized to form a probability density, i.e.,
        ``n/(len(x)`dbin)``, ie the integral of the histogram will sum to
        1. If *stacked* is also *True*, the sum of the histograms is
        normalized to 1.
    
    weights : array_like, shape (n, ), optional, default: None
        An array of weights, of the same shape as `x`.  Each value in `x`
        only contributes its associated weight towards the bin count
        (instead of 1).  If `normed` is True, the weights are normalized,
        so that the integral of the density over the range remains 1.
    
    cumulative : boolean, optional, default : False
        If `True`, then a histogram is computed where each bin gives the
        counts in that bin plus all bins for smaller values. The last bin
        gives the total number of datapoints.  If `normed` is also `True`
        then the histogram is normalized such that the last bin equals 1.
        If `cumulative` evaluates to less than 0 (e.g., -1), the direction
        of accumulation is reversed.  In this case, if `normed` is also
        `True`, then the histogram is normalized such that the first bin
        equals 1.
    
    bottom : array_like, scalar, or None, default: None
        Location of the bottom baseline of each bin.  If a scalar,
        the base line for each bin is shifted by the same amount.
        If an array, each bin is shifted independently and the length
        of bottom must match the number of bins.  If None, defaults to 0.
    
    histtype : ['bar' | 'barstacked' | 'step' | 'stepfilled'], optional
        The type of histogram to draw.
    
        - 'bar' is a traditional bar-type histogram.  If multiple data
          are given the bars are aranged side by side.
    
        - 'barstacked' is a bar-type histogram where multiple
          data are stacked on top of each other.
    
        - 'step' generates a lineplot that is by default
          unfilled.
    
        - 'stepfilled' generates a lineplot that is by default
          filled.
    
    align : ['left' | 'mid' | 'right'], optional, default: 'mid'
        Controls how the histogram is plotted.
    
            - 'left': bars are centered on the left bin edges.
    
            - 'mid': bars are centered between the bin edges.
    
            - 'right': bars are centered on the right bin edges.
    
    orientation : ['horizontal' | 'vertical'], optional
        If 'horizontal', `~matplotlib.pyplot.barh` will be used for
        bar-type histograms and the *bottom* kwarg will be the left edges.
    
    rwidth : scalar, optional, default: None
        The relative width of the bars as a fraction of the bin width.  If
        `None`, automatically compute the width. Ignored if `histtype` =
        'step' or 'stepfilled'.
    
    log : boolean, optional, default : False
        If `True`, the histogram axis will be set to a log scale. If `log`
        is `True` and `x` is a 1D array, empty bins will be filtered out
        and only the non-empty (`n`, `bins`, `patches`) will be returned.
    
    color : color or array_like of colors, optional, default: None
        Color spec or sequence of color specs, one per dataset.  Default
        (`None`) uses the standard line color sequence.
    
    label : string, optional, default: ''
        String, or sequence of strings to match multiple datasets.  Bar
        charts yield multiple patches per dataset, but only the first gets
        the label, so that the legend command will work as expected.
    
    stacked : boolean, optional, default : False
        If `True`, multiple data are stacked on top of each other If
        `False` multiple data are aranged side by side if histtype is
        'bar' or on top of each other if histtype is 'step'
    
    Returns
    -------
    n : array or list of arrays
        The values of the histogram bins. See **normed** and **weights**
        for a description of the possible semantics. If input **x** is an
        array, then this is an array of length **nbins**. If input is a
        sequence arrays ``[data1, data2,..]``, then this is a list of
        arrays with the values of the histograms for each of the arrays
        in the same order.
    
    bins : array
        The edges of the bins. Length nbins + 1 (nbins left edges and right
        edge of last bin).  Always a single array even when multiple data
        sets are passed in.
    
    patches : list or list of lists
        Silent list of individual patches used to create the histogram
        or list of such list if multiple input datasets.
    
    Other Parameters
    ----------------
    kwargs : `~matplotlib.patches.Patch` properties
    
    See also
    --------
    hist2d : 2D histograms
    
    Notes
    -----
    Until numpy release 1.5, the underlying numpy histogram function was
    incorrect with `normed`=`True` if bin sizes were unequal.  MPL
    inherited that error.  It is now corrected within MPL when using
    earlier numpy versions.
    
    Examples
    --------
    .. plot:: mpl_examples/statistics/histogram_demo_features.py
    
    
    
    Additional kwargs: hold = [True|False] overrides default hold state


In [56]:
y = stats.gamma(5).rvs(5000)
with sns.axes_style("darkgrid"):
    sns.jointplot(x, y, kind="hex");



In [153]:
sns.set_palette("hls", 1)
data = randn(30)
sns.rugplot(data)
sns.kdeplot(data, shade=True);



In [154]:
kernels = ["biw", "cos", "epa", "gau", "tri", "triw"]
pal = sns.color_palette("hls", len(kernels))
for k, c in zip(kernels, pal):
    sns.kdeplot(data, kernel=k, color=c, label=k)
plt.legend();



In [155]:
with sns.color_palette("Set1"):
    sns.kdeplot(data, cumulative=True)



In [156]:
df = pd.DataFrame(zip(np.linspace(0,100,10), np.linspace(0,100,10)), columns = ['A','B'])
df.plot()
sns.lmplot('A','B',df)


Out[156]:
<seaborn.axisgrid.FacetGrid at 0x114ab93d0>

In [157]:
plt.figure(figsize=(4, 7))
data = stats.norm(0, 1).rvs((3, 100)) + np.arange(3)[:, None]

with sns.color_palette("Set2"):
    for d, label in zip(data, list("ABC")):
        sns.kdeplot(d, vertical=True, shade=True, label=label)



In [138]:
iris = sns.load_dataset('iris')
sns.jointplot('sepal_length','sepal_width',iris, kind='kde')
sns.jointplot('sepal_length','petal_length',iris, kind='kde')
iris


Out[138]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
5 5.4 3.9 1.7 0.4 setosa
6 4.6 3.4 1.4 0.3 setosa
7 5.0 3.4 1.5 0.2 setosa
8 4.4 2.9 1.4 0.2 setosa
9 4.9 3.1 1.5 0.1 setosa
10 5.4 3.7 1.5 0.2 setosa
11 4.8 3.4 1.6 0.2 setosa
12 4.8 3.0 1.4 0.1 setosa
13 4.3 3.0 1.1 0.1 setosa
14 5.8 4.0 1.2 0.2 setosa
15 5.7 4.4 1.5 0.4 setosa
16 5.4 3.9 1.3 0.4 setosa
17 5.1 3.5 1.4 0.3 setosa
18 5.7 3.8 1.7 0.3 setosa
19 5.1 3.8 1.5 0.3 setosa
20 5.4 3.4 1.7 0.2 setosa
21 5.1 3.7 1.5 0.4 setosa
22 4.6 3.6 1.0 0.2 setosa
23 5.1 3.3 1.7 0.5 setosa
24 4.8 3.4 1.9 0.2 setosa
25 5.0 3.0 1.6 0.2 setosa
26 5.0 3.4 1.6 0.4 setosa
27 5.2 3.5 1.5 0.2 setosa
28 5.2 3.4 1.4 0.2 setosa
29 4.7 3.2 1.6 0.2 setosa
... ... ... ... ... ...
120 6.9 3.2 5.7 2.3 virginica
121 5.6 2.8 4.9 2.0 virginica
122 7.7 2.8 6.7 2.0 virginica
123 6.3 2.7 4.9 1.8 virginica
124 6.7 3.3 5.7 2.1 virginica
125 7.2 3.2 6.0 1.8 virginica
126 6.2 2.8 4.8 1.8 virginica
127 6.1 3.0 4.9 1.8 virginica
128 6.4 2.8 5.6 2.1 virginica
129 7.2 3.0 5.8 1.6 virginica
130 7.4 2.8 6.1 1.9 virginica
131 7.9 3.8 6.4 2.0 virginica
132 6.4 2.8 5.6 2.2 virginica
133 6.3 2.8 5.1 1.5 virginica
134 6.1 2.6 5.6 1.4 virginica
135 7.7 3.0 6.1 2.3 virginica
136 6.3 3.4 5.6 2.4 virginica
137 6.4 3.1 5.5 1.8 virginica
138 6.0 3.0 4.8 1.8 virginica
139 6.9 3.1 5.4 2.1 virginica
140 6.7 3.1 5.6 2.4 virginica
141 6.9 3.1 5.1 2.3 virginica
142 5.8 2.7 5.1 1.9 virginica
143 6.8 3.2 5.9 2.3 virginica
144 6.7 3.3 5.7 2.5 virginica
145 6.7 3.0 5.2 2.3 virginica
146 6.3 2.5 5.0 1.9 virginica
147 6.5 3.0 5.2 2.0 virginica
148 6.2 3.4 5.4 2.3 virginica
149 5.9 3.0 5.1 1.8 virginica

150 rows × 5 columns


In [147]:
spec = iris.groupby('species')
spec.mean()


Out[147]:
sepal_length sepal_width petal_length petal_width
species
setosa 5.006 3.428 1.462 0.246
versicolor 5.936 2.770 4.260 1.326
virginica 6.588 2.974 5.552 2.026

In [148]:
spec.count()


Out[148]:
sepal_length sepal_width petal_length petal_width
species
setosa 50 50 50 50
versicolor 50 50 50 50
virginica 50 50 50 50

In [150]:
data = np.random.multivariate_normal([0, 0], [[1, 2], [2, 20]], size=1000)
data = pd.DataFrame(data, columns=["X", "Y"])
mpl.rc("figure", figsize=(6, 6))
with sns.axes_style("white"):
    sns.jointplot("X", "Y", data, kind="kde");



In [151]:
sns.kdeplot(data, bw="silverman", gridsize=50, cut=2, clip=(-11, 11), cmap="BuGn_d");



In [162]:
sns.set(rc={"figure.figsize": (6, 6)})
data2 = [randn(100), randn(100) + 1]
sns.boxplot(data2);



In [163]:
sns.boxplot(data2, names=["group1", "group1"], whis=np.inf, color="PaleGreen");



In [164]:
pre = randn(25)
post = pre + np.random.rand(25)
sns.boxplot([pre, post], names=["pre", "post"], color="coral", join_rm=True);



In [167]:
d1 = stats.norm(0, 5).rvs(100)
d2 = np.concatenate([stats.gamma(4).rvs(50),
                     -1 * stats.gamma(4).rvs(50)])
data = pd.DataFrame(dict(d1=d1, d2=d2))
sns.boxplot(data, color="pastel", widths=.5, whis=np.inf);



In [168]:
sns.violinplot(data, color="pastel");



In [169]:
y = np.random.randn(200)
g = np.random.choice(list("abcdef"), 200)
for i, l in enumerate("abcdef"):
    y[g == l] += i // 2
df = pd.DataFrame(dict(score=y, group=g))
sns.boxplot(df.score, df.group);



In [4]:
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

In [5]:
tips = sns.load_dataset("tips")

In [6]:
sns.lmplot("total_bill", "tip", tips);



In [9]:
sns.lmplot("total_bill", "tip", tips, ci=68);



In [8]:
tips.head()
sns.lmplot("total_bill", "size", tips)


Out[8]:
<seaborn.axisgrid.FacetGrid at 0x10ce1dc50>

In [15]:
sns.lmplot("size", "tip", tips, x_jitter=.15);



In [18]:
sns.lmplot("size", "tip", tips, x_estimator=np.mean);



In [19]:
sns.lmplot("total_bill", "tip", tips, hue="time", palette="Set1", fit_reg=False);



In [20]:
sns.lmplot("size", "total_bill", tips, order=2);



In [21]:
sns.lmplot("size", "total_bill", tips, order=3);



In [25]:
sns.lmplot("total_bill", "tip", tips, lowess=True, line_kws={"color": "0.2"});



In [31]:
tips["big_tip"] = (tips["tip"] / tips["total_bill"]) > .15
sns.lmplot("total_bill", "big_tip", tips);
sns.lmplot("total_bill", "big_tip", tips, y_jitter=.05);
sns.lmplot("total_bill", "big_tip", tips, y_jitter=.05, logistic=True);



In [32]:
sns.lmplot("total_bill", "tip", tips, robust=True, n_boot=500);



In [33]:
c = np.random.randn(50)
a = 2 + c + np.random.randn(50)
b = 3 + c + np.random.randn(50)
df = pd.DataFrame(dict(A=a, B=b, C=c), columns=list("ABC"))

In [34]:
sns.lmplot("A", "B", df);



In [35]:
sns.lmplot("A", "B", df, x_partial="C");



In [38]:
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
sns.regplot("total_bill", "tip", tips, ax=ax1)
sns.boxplot(tips["tip"], tips["size"], color="Blues_r", ax=ax2).set_ylabel("")
f.tight_layout()



In [51]:
x, y = np.random.multivariate_normal([1, 5], [(2, -.8), (-.8, 2)], 80).T
ax = sns.regplot(x, y, color="seagreen")
ax.set(xlabel="x variable", ylabel="y variable");



In [52]:
sns.residplot(x, y);



In [53]:
z = x + 1.5 * x ** 2 + np.random.randn(len(x))
sns.residplot(x, z, color="indianred");



In [54]:
z = x + 1.5 * x ** 3 + np.random.randn(len(x))
sns.residplot(x, z, color="indianred");



In [55]:
sns.residplot(x, z, color="indianred", lowess=True);



In [56]:
sns.residplot(x, y, color="indianred", order=2, lowess=True);



In [57]:
sns.jointplot("total_bill", "tip", tips);



In [58]:
sns.jointplot("total_bill", "tip", tips, kind="reg", color="seagreen");



In [59]:
sns.jointplot("total_bill", "tip", tips, kind="resid", color="#774499");



In [60]:
rs = np.random.RandomState(1)
x1, x2 = rs.normal(size=(2, 100))
y1 = .5 * x1 + 2 * x2 + 2 * x1 * x2 + rs.normal(size=100)
y2 = x1 + x2 + rs.normal(size=100)
p = 1 / (1 + np.exp(-y1))
y_flip = [rs.binomial(1, p_i) for p_i in p]
df = pd.DataFrame(dict(x1=x1, x2=x2, y1=y1, y2=y2, y_flip=y_flip))

In [61]:
bins = np.array([-1, 0, 1])
binned = np.digitize(x2, bins)
df["x2_bin"] = binned
sns.lmplot("x1", "y1", df, col="x2_bin", hue="x2_bin", palette="PuBuGn_d", size=3);



In [62]:
sns.interactplot(x1, x2, y1);



In [63]:
sns.interactplot("x1", "x2", "y1", df, cmap="coolwarm", filled=True, levels=25);



In [ ]: