学习绘制分布


In [ ]:
# https://plot.ly/python/distplot/

import plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff

import numpy as np

import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

py.offline.init_notebook_mode(connected=True)

Basic distplot


In [ ]:
xs = np.random.randn(1000) # ndarray
xs[:5], xs[-5:]

In [ ]:
# create_distplot 绘制多维度的分布, 所以都要加[]
fig = ff.create_distplot(
    hist_data=[xs],
    group_labels=['basic distplot'],
    bin_size=0.08,
    colors=['#DAA520'],
)

# 曲线不是正态的密度函数图, 只是近似
py.offline.iplot(fig)

In [ ]:
# ff.create_xx 和 go.figure区别, 就是ff.create_xx更方便了, go.figure更灵活
# 箱线图, 需要使用go

box_0 = go.Box(
    x=xs,
    name='box-name',
    boxpoints='outliers',
    jitter=1,
    marker=dict( 
        color='#DAA520',
    )
)

type(box_0) # go._box.Box

layout = go.Layout(
    title='layout-title',
    width=600,
    height=500,
    yaxis=dict(
        title='yaxis-title',
        zeroline=True
    )
)

fig = go.Figure(data=[box_0], layout=layout)

py.offline.iplot(fig)

In [ ]:
# 小提琴, 不需要使用go, 优美的看到正态分布
fig = ff.create_violin(data=xs, colors='#DAA520', title='violin-title')
py.offline.iplot(fig)

Plot multiple datasets


In [ ]:
xs_1 = np.random.randn(1000) - 2
xs_2 = np.random.randn(1000) 
xs_3 = np.random.randn(1000) + 2

In [ ]:
fig = ff.create_distplot(
    hist_data=[xs_1, xs_2, xs_3],
    group_labels=['g1', 'g2', 'g3'],
    bin_size=0.08,
    curve_type='normal',
    show_hist=False,
    colors=['#FF0000', '#00FF00', '#0000FF'],
)

py.offline.iplot(fig)

In [ ]: