In [1]:
import pandas as pd
from bokeh.plotting import *
from bokeh.objects import HoverTool
from collections import OrderedDict
output_notebook()
In [2]:
data = pd.read_csv('../data/uk_buckets.csv')
data.head()
Out[2]:
In [3]:
# The list of all possible buckets for fields
all_buckets = [
"SYSTEM",
"DOCUMENT",
"TENDER TRACKING",
"TENDER FEATURES",
"GOODS / SERVICES",
"AMOUNT",
"BUYER",
"SUPPLIER",
"AWARD TRACKING",
"AWARD FEATURES",
"CONTRACT TRACKING",
"CONTRACT FEATURES",
"ADD ON"
]
In [4]:
# The buckets in the data
buckets = data.Bucket.unique().tolist()
# Check all buckets in data are in all_buckets
check_list = [bucket for bucket in buckets if bucket not in all_buckets]
assert check_list == []
In [5]:
# Assemble the data for plotting
# Make an OrderedDict of empty lists to store our fields in
empty_list = ([] for bucket in all_buckets)
bucket_dict = OrderedDict(zip(all_buckets, empty_list))
# Add our fields to our ordered dict
def add_field_to_bucket(row):
field = row.Field
bucket = row.Bucket
bucket_dict[bucket].append(field)
data.apply(add_field_to_bucket, axis=1)
Out[5]:
In [6]:
# Make the lists for the plot
y = []
x = []
radii = []
fields_in_bucket = []
for bucket, fields in bucket_dict.iteritems():
y.append(bucket)
x.append('UK')
radii.append(len(fields)*5)
fields_in_bucket.append(', '.join(fields))
print fields_in_bucket
In [7]:
# Build the plot source
source = ColumnDataSource(
data=dict(
x=x,
y=y,
radii=radii,
fields_in_bucket=fields_in_bucket,
)
)
In [8]:
hold()
In [15]:
# draw it
figure()
plot_properties = {
'title': None,
'tools': "hover",
'x_range': ['UK'],
'y_range': buckets,
}
rect(x, y, 1, 1,
source=source,
color='white', # put in background
**plot_properties)
circle('x', 'y',
source=source,
size='radii',
color='black',
**plot_properties)
grid().grid_line_color = None
hover = [t for t in curplot().tools if isinstance(t, HoverTool)][0]
hover.tooltips = OrderedDict([
("Bucket", "@y"),
("Fields", "@fields_in_bucket"),
])
show()
In [9]:
In [9]: