notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np

import bokeh
import sys

print("Pandas version: \n\t{}".format(pd.__version__))
print("Bokeh version: \n\t{}".format(bokeh.__version__))
print("Python version: \n\t{}".format(sys.version))









    



Pandas version: 
	0.19.2
Bokeh version: 
	0.12.4
Python version: 
	3.4.5 |Anaconda custom (x86_64)| (default, Jul  2 2016, 17:47:57) 
[GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)]



In [2]:

    
from bokeh.plotting import figure, output_notebook, show
from bokeh.charts import Bar
#Show plot in this Jupyter notebook
output_notebook()









    





    
        
        Loading BokehJS ...



In [3]:

    
#https://github.com/bokeh/bokeh/issues/1671
from bokeh.core.properties import Dict, Int, String
from bokeh.models import (
    ColorBar,
    LinearColorMapper,
    Plot,
    Range1d,
    LinearAxis,
    FixedTicker,
    TickFormatter,
)
from bokeh.util.compiler import CoffeeScript

class FixedTickFormatter(TickFormatter):
    """
    Class used to allow custom axis tick labels on a bokeh chart
    Extends bokeh.model.formatters.TickFormatter
    """

    COFFEESCRIPT =  """
        import {Model} from "model"
        import * as p from "core/properties"
        export class FixedTickFormatter extends Model
          type: 'FixedTickFormatter'
          doFormat: (ticks) ->
            labels = @get("labels")
            return (labels[tick] ? "" for tick in ticks)
          @define {
            labels: [ p.Any ]
          }
    """

    labels = Dict(Int, String, help="""
    A mapping of integer ticks values to their labels.
    """)

    __implementation__ = CoffeeScript(COFFEESCRIPT)



In [4]:

    
skills_list = ['cheese making', 'squanching', 'leaving harsh criticisms']
label_dict = {}
for i, s in enumerate(skills_list):
    label_dict[i] = s

pct_counts = [25, 40, 1]
df = pd.DataFrame({'skill':skills_list, 'pct jobs with skill':pct_counts})

p = Bar(df, 'index', values='pct jobs with skill', title="Top skills for ___ jobs", legend=False)

p.xaxis[0].formatter = FixedTickFormatter(labels=label_dict)

# show the results
# This will not render in github
# http://stackoverflow.com/questions/32518342/why-my-bokeh-plots-doesnt-work-on-github
show(p)

# workaround?
# http://stackoverflow.com/questions/32370281/how-to-include-image-or-picture-in-jupyter-notebook

Take a screenshot, store it in the images directory



In [5]:

    
from IPython.core.display import Image, display
display(Image('images/bar-plot-custom-labels.png', width=600, unconfined=True))

Slightly more involved example



In [6]:

    
df2 = pd.DataFrame({'applicants': [20, 10, 1, 0, 3, 37] , 'performance': [0, np.NaN, 0.5, 0.95, 0.15, 0.17 ]})



In [7]:

    
df2









    Out[7]:






  
    
      
      applicants
      performance
    
  
  
    
      0
      20
      0.00
    
    
      1
      10
      NaN
    
    
      2
      1
      0.50
    
    
      3
      0
      0.95
    
    
      4
      3
      0.15
    
    
      5
      37
      0.17



In [8]:

    
bins = [-1.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
my_labels = ['Missing', '[0, 0.1)', '[0.1, 0.2)', '[0.2, 0.3)', '[0.3, 0.4)', '[0.4, 0.5)', '[0.5, 0.6)', '[0.6, 0.7)', '[0.7, 0.8)', '[0.8, 0.9)', '[0.9, 1.0)']

df2['performance'] = df2['performance'].fillna(-1.0) # fill missing data
df2['category'] = pd.cut(df2['performance'], bins, right=False, labels=False
                         , retbins=False, precision=3, include_lowest=True)
df2['applicant_pct'] = 100*df2['applicants']/df2['applicants'].sum()
df2









    Out[8]:






  
    
      
      applicants
      performance
      category
      applicant_pct
    
  
  
    
      0
      20
      0.00
      1
      28.169014
    
    
      1
      10
      -1.00
      0
      14.084507
    
    
      2
      1
      0.50
      6
      1.408451
    
    
      3
      0
      0.95
      10
      0.000000
    
    
      4
      3
      0.15
      2
      4.225352
    
    
      5
      37
      0.17
      2
      52.112676



In [9]:

    
# We used the dataframe index in the previous example, in this we use the category column
new_label_dict = dict(zip(range(len(my_labels)), my_labels))
p2 = Bar(df2, 'category', values='applicant_pct', 
         agg = 'sum',
         title="Applicant Performance Scores", color='red', legend=False)
p2.xaxis[0].formatter = FixedTickFormatter(labels=new_label_dict)

show(p2)



In [10]:

    
from IPython.core.display import Image, display
display(Image('images/bar-plot-custom-labels2.png', width=600, unconfined=True))



In [ ]:

	applicants	performance	category	applicant_pct
0	20	0.00	1	28.169014
1	10	-1.00	0	14.084507
2	1	0.50	6	1.408451
3	0	0.95	10	0.000000
4	3	0.15	2	4.225352
5	37	0.17	2	52.112676