In [8]:
import networkx as nx
import csv
import collections
%matplotlib inline
import matplotlib.pyplot as plt
In [14]:
g = nx.Graph()
with open("data.csv", "rb") as datafile:
dr = csv.reader(datafile)
dr.next() # disregard first row that is the header
dishmap = collections.defaultdict(list)
# Todo:
for r in dr:
code = r[0]
dish = r[1]
ingredient = r[2]
dishmap[code].append(ingredient)
g.add_node(ingredient)
# Draw an edge between two ingredients if they are used in the same
# item.
#
# Note: This is probably the worst possible way to do this.
for n1 in g.nodes():
for n2 in g.nodes():
for (k,v) in dishmap.iteritems():
if n1 in v and n2 in v:
g.add_edge(n1, n2, {"code": k})
In [15]:
labels = dict([((u,v,), d["code"]) for u,v,d in g.edges(data=True)])
pos = nx.spring_layout(g)
labels = nx.draw_networkx_edge_labels(g, pos, edge_labels=labels)
nx.draw(g, pos)
In [45]:
# Let us now count the ingredients in an equally terrible way.
ingredientSet = set(g.nodes())
ingredientCount = collections.defaultdict(int)
for ing in ingredientSet:
for (k, v) in dishmap.iteritems():
if ing in v:
ingredientCount[ing] += 1
labels = []
values = []
for (k, v) in ingredientCount.iteritems():
labels.append(k)
values.append(v)
In [48]:
import numpy as np
import matplotlib.pyplot as plt
N = len(labels)
ind = np.arange(N)
width = 0.35
p1 = plt.bar(ind, values, width, color='b')
plt.ylabel('Number of things contained in')
plt.title('Ingredients by number of meals they are in')
plt.xticks(ind+width/2,labels)
plt.yticks(np.arange(0,max(values)+2,1))
plt.show()
In [ ]: