In [1]:
import pandas as pd
from bokeh.plotting import *
from bokeh.objects import HoverTool
from collections import OrderedDict
output_notebook()


Bokeh Plot

Configuring embedded BokehJS mode.


In [2]:
data = pd.read_csv('../data/uk_buckets.csv')

data.head()


Out[2]:
Field Bucket
0 noticeid SYSTEM
1 referencenumber TENDER TRACKING
2 datepublished SYSTEM
3 valuemin AMOUNT
4 valuemax AMOUNT

5 rows × 2 columns


In [3]:
# The list of all possible buckets for fields
all_buckets = [
    "SYSTEM",
    "DOCUMENT",
    "TENDER TRACKING",
    "TENDER FEATURES",
    "GOODS / SERVICES",
    "AMOUNT",
    "BUYER",
    "SUPPLIER",
    "AWARD TRACKING",
    "AWARD FEATURES",
    "CONTRACT TRACKING",
    "CONTRACT FEATURES",
    "ADD ON"
]

In [4]:
# The buckets in the data
buckets = data.Bucket.unique().tolist()
# Check all buckets in data are in all_buckets
check_list = [bucket for bucket in buckets if bucket not in all_buckets]
assert check_list == []

In [5]:
# Assemble the data for plotting

# Make an OrderedDict of empty lists to store our fields in
empty_list = ([] for bucket in all_buckets)
bucket_dict = OrderedDict(zip(all_buckets, empty_list))

# Add our fields to our ordered dict
def add_field_to_bucket(row):
    field = row.Field
    bucket = row.Bucket
    bucket_dict[bucket].append(field)

data.apply(add_field_to_bucket, axis=1)


Out[5]:
0     None
1     None
2     None
3     None
4     None
5     None
6     None
7     None
8     None
9     None
10    None
11    None
12    None
13    None
14    None
15    None
16    None
dtype: object

In [6]:
# Make the lists for the plot
y = []
x = []
radii = []
fields_in_bucket = []
for bucket, fields in bucket_dict.iteritems():
    y.append(bucket)
    x.append('UK')
    radii.append(len(fields)*5)
    fields_in_bucket.append(', '.join(fields))
print fields_in_bucket


['noticeid, datepublished, status, url, noticetype, notice_state, notice_state_change_date', 'num_docs', 'referencenumber', '', 'title, description, classification', 'valuemin, valuemax', 'org_name, org_contactemail', '', '', '', '', '', 'region']

In [7]:
# Build the plot source
source = ColumnDataSource(
    data=dict(
        x=x,
        y=y,
        radii=radii,
        fields_in_bucket=fields_in_bucket,
    )
)

In [8]:
hold()

In [15]:
# draw it
figure()

plot_properties = {
    'title': None,
    'tools': "hover",
    'x_range': ['UK'], 
    'y_range': buckets,
}

rect(x, y, 1, 1,
     source=source,
     color='white', # put in background
     **plot_properties)

circle('x', 'y', 
       source=source,
       size='radii', 
       color='black',
       **plot_properties)

grid().grid_line_color = None

hover = [t for t in curplot().tools if isinstance(t, HoverTool)][0]

hover.tooltips = OrderedDict([
    ("Bucket", "@y"),
    ("Fields", "@fields_in_bucket"),
])

show()


Bokeh Plot
Plots

In [9]:


In [9]: