In [1]:
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.palettes import Blues4
from bokeh.transform import factor_cmap
import bokeh.models as bm
import pandas as pd
import chartify
output_notebook()


Loading BokehJS ...

In [2]:
df = pd.read_csv('tmp_df2_percentual_and_absolute.csv', index_col=0)
df.head()


Out[2]:
similarity group absolute percentual
0 100% script_url 287460 0.348066
1 100% script_url_clean 363694 0.440373
2 100% script_url_plus_1 656672 0.795120
3 80%+ script_url 90798 0.109941
4 80%+ script_url_clean 140444 0.170054

In [3]:
df.head()


Out[3]:
similarity group absolute percentual
0 100% script_url 287460 0.348066
1 100% script_url_clean 363694 0.440373
2 100% script_url_plus_1 656672 0.795120
3 80%+ script_url 90798 0.109941
4 80%+ script_url_clean 140444 0.170054

In [4]:
lookup = {'100%': 0, '80%+': 1, '50%+': 2, '50%-': 3}
df['similarity_index'] = df.similarity.apply(lambda x: lookup[x])
df = df.sort_values(by=['group', 'similarity_index'])
df['top'] = df.groupby('group').cumsum()['percentual']
df['bottom'] = df.top - df.percentual
df['absolute_text'] = df.absolute.apply(lambda x: f'n = {x:,}')
df


Out[4]:
similarity group absolute percentual similarity_index top bottom absolute_text
0 100% script_url 287460 0.348066 0 0.348066 0.000000 n = 287,460
3 80%+ script_url 90798 0.109941 1 0.458007 0.348066 n = 90,798
6 50%+ script_url 311461 0.377127 2 0.835134 0.458007 n = 311,461
9 50%- script_url 136159 0.164866 3 1.000000 0.835134 n = 136,159
1 100% script_url_clean 363694 0.440373 0 0.440373 0.000000 n = 363,694
4 80%+ script_url_clean 140444 0.170054 1 0.610427 0.440373 n = 140,444
7 50%+ script_url_clean 241902 0.292903 2 0.903330 0.610427 n = 241,902
10 50%- script_url_clean 79838 0.096670 3 1.000000 0.903330 n = 79,838
2 100% script_url_plus_1 656672 0.795120 0 0.795120 0.000000 n = 656,672
5 80%+ script_url_plus_1 81609 0.098815 1 0.893935 0.795120 n = 81,609
8 50%+ script_url_plus_1 64849 0.078521 2 0.972456 0.893935 n = 64,849
11 50%- script_url_plus_1 22748 0.027544 3 1.000000 0.972456 n = 22,748

In [5]:
source = bm.ColumnDataSource(df)
similarities = list(df.similarity.unique())
groups = list(df.group.unique())
p = figure(x_range=groups, y_range=(0, 1), tools='')
p.vbar(
    x='group', 
    bottom='bottom',
    top='top', 
    width=0.9,
    color=factor_cmap('similarity', palette=Blues4, factors=similarities),
    source=source,
    legend='similarity',
)
text_palette = [Blues4[-1]] * 2 + [Blues4[0]] * 2
p.text(
    x='group',
    y='top',
    text='absolute_text',
    source=source,
    text_color=factor_cmap('similarity', palette=text_palette, factors=similarities),
    text_align='center',
    text_font_size='8pt',
    y_offset=15
    
)
p.yaxis.minor_tick_out = None
p.legend.location = (p.plot_width - 150, 10)
#p.yaxis.axis_label = "%"
p.yaxis.formatter = bm.NumeralTickFormatter(format='1%')
#p.add_tools(bm.HoverTool(tooltips="<p>Absolute: @absolute{,}</p><p>Similarity: @similarity</p>"))
p.toolbar_location = None
show(p)


BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead

In [6]:
p = chartify.Chart(x_axis_type='categorical', blank_labels=True)
p.style.set_color_palette('sequential')
p.set_title("Chartified")
p.set_subtitle("Some additional stuff")
p.plot.bar_stacked(
    data_frame=df,
    categorical_columns=['group'],
    numeric_column='percentual',
    stack_column='similarity',
    stack_order=similarities[::-1],
)
p.figure.tools = []
p.figure.y_range.end = 1.2
p.figure.yaxis.minor_tick_out = None
p.axes.set_yaxis_tick_format('1%')
p.show()


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-6-b336124d0124> in <module>
----> 1 p = chartify.Chart(x_axis_type='categorical', blank_labels=True)
      2 p.style.set_color_palette('sequential')
      3 p.set_title("Chartified")
      4 p.set_subtitle("Some additional stuff")
      5 p.plot.bar_stacked(

AttributeError: module 'chartify' has no attribute 'Chart'

In [ ]: