Import a few dependencies.
In [1]:
import requests
from collections import defaultdict
Retrieve the JSON data from the insights module using the query interface.
In [2]:
r = requests.get('http://localhost:8002/query/all_problem_answers')
The response contains JSON organized in a flat structure, we will group it by problem later.
In [3]:
r.json()[:3]
Out[3]:
Group the data by problem.
In [4]:
problems = defaultdict(dict)
for bucket in r.json():
question = bucket['question']
answer = bucket['answer']
if isinstance(answer, list):
answer = ','.join(answer)
problems[question].update({answer: bucket['count']})
Now the data is grouped by problem with one entry in a dict for each bucket.
In [5]:
problems.iteritems().next()
Out[5]:
Render a small histogram for every problem on the same axes.
In [7]:
fig, ax = plt.subplots()
# Semi-arbitrary
bar_width = 0.25
# The maximum count of any response. Used to scale the y-axis.
max_value = 0
# Keeps track of the location along the x-axis where the histogram should be
# rendered. Note this increases with each rendered problem.
problem_x_offset = 0
# Stores the labels for the bars. Will appear below the x-axis.
x_tick_locations = []
x_tick_labels = []
for problem, values in problems.iteritems():
# Calculate the left coordinates of each of the bars.
# problem_x_offset is the left coordinate of the problem
# The resulting list should look like:
# [problem_x_offset, problem_x_offset + bar_width, problem_x_offset + (bar_width*2), ...]
bar_index = np.arange(0, len(values)*bar_width, bar_width) + problem_x_offset
# Calculate the maximum count seen across all problems, this will
# be used to determine the scale of the y-axis.
max_value_for_this_problem = max(values.values())
max_value = max(max_value_for_this_problem, max_value)
# Generate the plot
plt.bar(bar_index, values.values(), bar_width,
label=problem,
color=np.random.rand(3,)) # Use a random color
# Gather labels for the x-axis with the various responses seen
x_tick_locations.extend(bar_index + (bar_width/2))
x_tick_labels.extend(values.keys())
# Put an empty bar_width of space between histograms for each problem
problem_x_offset = bar_index.max() + (bar_width*2)
# Render the x-axis labels at an angle with the right most letter centered under the bar
plt.xticks(x_tick_locations, x_tick_labels, rotation=50, ha='right')
# Render some empty space above the top of the highest bar
plt.ylim(0, max_value*1.25)
# Increase the size of the plot
fig.set_size_inches(16, 14)
plt.legend()
plt.show()