Analysis of recipes

In the following we take a look at some of the recipe data that has been obtained at the Melb OSW 2014.



In [8]:

    
import networkx as nx
import csv
import collections

%matplotlib inline
import matplotlib.pyplot as plt



In [14]:

    
g = nx.Graph()

with open("data.csv", "rb") as datafile:
    dr = csv.reader(datafile)
    dr.next() # disregard first row that is the header
    
    dishmap = collections.defaultdict(list)
    
    # Todo:
    for r in dr:
        code = r[0]
        dish = r[1]
        ingredient = r[2]
        dishmap[code].append(ingredient)
        g.add_node(ingredient)
    
    # Draw an edge between two ingredients if they are used in the same
    # item.
    #
    # Note: This is probably the worst possible way to do this.
    for n1 in g.nodes():
        for n2 in g.nodes():
            for (k,v) in dishmap.iteritems():
                if n1 in v and n2 in v:
                    g.add_edge(n1, n2, {"code": k})



In [15]:

    
labels = dict([((u,v,), d["code"]) for u,v,d in g.edges(data=True)])

pos = nx.spring_layout(g)
labels = nx.draw_networkx_edge_labels(g, pos, edge_labels=labels)

nx.draw(g, pos)



In [45]:

    
# Let us now count the ingredients in an equally terrible way.

ingredientSet = set(g.nodes())

ingredientCount = collections.defaultdict(int)

for ing in ingredientSet:
    for (k, v) in dishmap.iteritems():
        if ing in v:
            ingredientCount[ing] += 1

labels = []
values = []
for (k, v) in ingredientCount.iteritems():
    labels.append(k)
    values.append(v)



In [48]:

    
import numpy as np
import matplotlib.pyplot as plt


N = len(labels)
ind = np.arange(N)
width = 0.35

p1 = plt.bar(ind, values, width, color='b')

plt.ylabel('Number of things contained in')
plt.title('Ingredients by number of meals they are in')
plt.xticks(ind+width/2,labels)
plt.yticks(np.arange(0,max(values)+2,1))

plt.show()



In [ ]: