In [ ]:
# https://plot.ly/python/t-test/

import plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff

import numpy as np
import pandas as pd
import scipy

import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

py.offline.init_notebook_mode(connected=True)

In [ ]:
# Sample 画出散点图
xs_nd = np.linspace(start=-4, stop=4, num=200)

norm_trace_0 = go.Scatter(
    x = xs_nd,
    y = scipy.stats.norm.pdf(x=xs_nd, loc=0, scale=1),
    name = 'Mean is 0',
    mode = 'lines '
)

norm_trace_1 = go.Scatter(
    x = xs_nd,
    y = scipy.stats.norm.pdf(x=xs_nd, loc=2, scale=1),
    name = 'Mean is 2',
    mode = 'lines+markers'
)

norm_fig = go.Figure(data=[norm_trace_0, norm_trace_1])

py.offline.iplot(norm_fig)

In [ ]:
# 生成两组正态数据
data_nd_1 = np.random.normal(loc=0, scale=1, size=1000)
data_nd_2 = np.random.normal(loc=2, scale=1, size=1000)

In [ ]:
# Sample画出频率图

hist_trace_1 = go.Histogram(
    x = data_nd_1,
    autobinx = True,
    name = 'Mean of 0',
)

hist_trace_2 = go.Histogram(
    x = data_nd_2,
    xbins = dict(
        start = np.min(data_nd_1),
        size = 0.1,
        end = np.max(data_nd_2),
    ),
    name = 'Mean of 2',
)

# 如果hist_trace_2设置了xbins, hist_trace_1的autobinx将会失效
hist_fig = go.Figure(data=[hist_trace_1, hist_trace_2])
py.offline.iplot(hist_fig)

In [ ]:
# Sample画出分布图, 猜测里面封装了对频率图的操作(且histnorm = 'probability')
dist_trace_0 = ff.create_distplot(
    hist_data = [data_nd_1, data_nd_2],
    group_labels = ['Mean of 0', 'Mean of 2'],
    bin_size = 0.1,
    curve_type = 'kde', # normal 比 kde 更平滑
    histnorm = 'probability',
    colors = ['#0000FF', '#00FF00'],
    show_hist = False, # 控制是否显示柱子
)
py.offline.iplot(dist_trace_0)

In [ ]:
# 1个Sample的T-Test
t_1sample_result = scipy.stats.ttest_1samp(a=data_nd_1, popmean=0.0)
t_1sample_result

In [ ]:
# 2个Sample的T-Test, 等方差 (ind: indenpent)
t_ind_result = scipy.stats.ttest_ind(a=data_nd_1, b=data_nd_2, equal_var=True)
t_ind_result

In [ ]:
# loc接近0的时候, 概率判断出: 数据不足以否定0假设
data_nd_3 = np.random.normal(loc=0.1, scale=1, size=30)
t_ind_result = scipy.stats.ttest_ind(a=data_nd_1, b=data_nd_3, equal_var=True)
t_ind_result

In [ ]:
data_nd_4 = np.random.normal(loc=0, scale=1.1, size=30)
t_ind_result = scipy.stats.ttest_ind(a=data_nd_1, b=data_nd_4, equal_var=False)
t_ind_result