In [ ]:
# https://plot.ly/python/discrete-frequency/
import plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
py.offline.init_notebook_mode(connected=True)
# 本节主要运用histnorm的参数: 'density|probability|percent|'
In [ ]:
# 通过控制bins的size来通过Histogram渐进展示分布
In [ ]:
df_data = pd.read_csv(filepath_or_buffer='./2010_alcohol_consumption_by_country.csv')
nd_data = df_data.values
type(df_data), type(df_data.values)
fig = ff.create_table(table_text=df_data[0:10]) # nd_data
py.offline.iplot(fig)
alcohol_ss_data = df_data['alcohol']
type(alcohol_ss_data)
In [ ]:
trace_hist = go.Histogram(
x = alcohol_ss_data,
xbins = dict(
start = np.min(alcohol_ss_data),
size = .5,
end = np.max(alcohol_ss_data),
),
histnorm = 'density',
name = 'trace-name-t1',
marker = dict(
color='#0000FF'
),
)
layout = go.Layout(
title = 'layout-title-density',
)
fig = go.Figure(data = [trace_hist], layout = layout)
py.offline.iplot(fig)
In [ ]:
trace_hist.histnorm = 'probability'
fig = go.Figure(data = [trace_hist], layout = layout)
fig.layout.title = 'layout-title-probability'
py.offline.iplot(fig)
In [ ]:
trace_hist.histnorm = 'percent'
fig = go.Figure(data = [trace_hist], layout = layout)
fig.layout.title = 'layout-title-percent'
py.offline.iplot(fig)
In [ ]:
alcohol_ss_cum = np.cumsum(alcohol_ss_data)
print(np.min(alcohol_ss_data), np.max(alcohol_ss_data), alcohol_ss_data.size, alcohol_ss_cum.size)
print(alcohol_ss_data[0:5])
print("\n")
print(alcohol_ss_cum[0:5])
# 感觉这个例子有问题
trace = go.Scatter(
x = [i for i in range(len(alcohol_ss_cum))],
y = alcohol_ss_cum / alcohol_ss_cum.iloc[-1],
marker = dict(
color = '#0000FF',
)
)
fig = go.Figure(data = [trace], layout=layout)
fig.layout.title = "CDF"
py.offline.iplot(fig)
In [ ]:
alcohol_ss_sort_data = alcohol_ss_data.sort_values()
print(alcohol_ss_sort_data[-10:])
alcohol_ss_sort_cum = np.cumsum(alcohol_ss_sort_data)
trace2 = go.Scatter(
x = alcohol_ss_sort_data.tolist(),
y = alcohol_ss_sort_cum / alcohol_ss_sort_cum.iloc[-1],
marker = dict(
color = '#0000FF',
)
)
fig = go.Figure(data = [trace2], layout=layout)
fig.layout.title = "CDF2"
py.offline.iplot(fig)