In [1]:
# These commands control inline plotting
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
import numpy as np # Useful numeric package
import scipy as sp # Useful statistics package
import matplotlib.pyplot as plt # Plotting package
In [25]:
import pandas as pd # Dataframe package
filename = './burrito_bootcamp.csv'
df = pd.read_csv(filename)
In [21]:
df
Out[21]:
In [24]:
print 'Number of burritos:', df.shape[0]
print 'Average burrito rating'
print 'Reviewers: '
print np.array(df['Reviewer'])
In [10]:
def burritotypes(x, types = {'California':'cali', 'Carnitas':'carnita', 'Carne asada':'carne asada',
'Soyrizo':'soyrizo', 'Shredded chicken':'chicken'}):
import re
T = len(types)
Nmatches = {}
for b in x:
matched = False
for t in types.keys():
re4str = re.compile('.*'+types[t]+'.*', re.IGNORECASE)
if np.logical_and(re4str.match(b) is not None, matched is False):
try:
Nmatches[t] +=1
except KeyError:
Nmatches[t] = 1
matched = True
if matched is False:
try:
Nmatches['other'] +=1
except KeyError:
Nmatches['other'] = 1
return Nmatches
typecounts = burritotypes(df.Burrito)
In [12]:
plt.figure(figsize=(6,6))
ax = plt.axes([0.1, 0.1, 0.65, 0.65])
# The slices will be ordered and plotted counter-clockwise.
labels = typecounts.keys()
fracs = typecounts.values()
explode=[.1]*len(typecounts)
patches, texts, autotexts = plt.pie(fracs, explode=explode, labels=labels,
autopct=lambda(p): '{:.0f}'.format(p * np.sum(fracs) / 100), shadow=False, startangle=0)
# The default startangle is 0, which would start
# the Frogs slice on the x-axis. With startangle=90,
# everything is rotated counter-clockwise by 90 degrees,
# so the plotting starts on the positive y-axis.
plt.title('Types of burritos',size=30)
for t in texts:
t.set_size(20)
for t in autotexts:
t.set_size(20)
autotexts[0].set_color('w')
In [ ]:
#California burritos vs. Carnitas burritos
TODO
In [ ]:
# Don Carlos 1 vs. Don Carlos 2
TODO
In [ ]:
# Bonferroni correction
TODO
In [18]:
import math
def metrichist(metricname):
if metricname == 'Volume':
bins = np.arange(.375,1.225,.05)
xticks = np.arange(.4,1.2,.1)
xlim = (.4,1.2)
else:
bins = np.arange(-.25,5.5,.5)
xticks = np.arange(0,5.5,.5)
xlim = (-.25,5.25)
plt.figure(figsize=(5,5))
n, _, _ = plt.hist(df[metricname].dropna(),bins,color='k')
plt.xlabel(metricname + ' rating',size=20)
plt.xticks(xticks,size=15)
plt.xlim(xlim)
plt.ylabel('Count',size=20)
plt.yticks((0,int(math.ceil(np.max(n) / 5.)) * 5),size=15)
plt.tight_layout()
In [19]:
m_Hist = ['Hunger','Volume','Tortilla','Temp','Meat','Fillings',
'Meat:filling','Uniformity','Salsa','Synergy','Wrap','overall']
for m in m_Hist:
metrichist(m)
In [ ]:
TODO