In [11]:
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
sns.set_style("white")
In [12]:
filename="burrito_current.csv"
df = pd.read_csv(filename)
N = df.shape[0]
In [14]:
# Avoid case issues; in the future should avoid article issues
df.Location = df.Location.str.lower()
m_Location = ['Location','N','Yelp','Google','Hunger','Cost','Volume','Tortilla','Temp','Meat','Fillings','Meat:filling',
'Uniformity','Salsa','Synergy','Wrap','overall']
tacoshops = df.Location.unique()
TS = len(tacoshops)
dfmean = pd.DataFrame(np.nan, index=range(TS), columns=m_Location)
for ts in range(TS):
dfmean.loc[ts] = df.loc[df.Location==tacoshops[ts]].mean()
dfmean['N'][ts] = sum(df.Location == tacoshops[ts])
dfmean.Location = tacoshops
In [53]:
Ncutoff = 8
dfToRank = dfmean.loc[dfmean.N>=Ncutoff]
In [54]:
dfToRank
Out[54]:
In [55]:
m_Rank = ['Location','Cost','Volume','Tortilla','Temp','Meat','Fillings','Meat:filling', 'Uniformity','Salsa','Synergy','Wrap','overall']
TS = len(dfToRank)
dfRanked = pd.DataFrame(np.nan, index=range(TS), columns=m_Rank)
dfRanked.Location[:] = dfToRank.Location
for m in m_Rank[1:]:
if m == 'Cost':
dfRanked[m][:] = dfToRank[m].rank(ascending=1)
else:
dfRanked[m][:] = dfToRank[m].rank(ascending=0)
In [56]:
dfRanked
Out[56]:
In [ ]:
#TODO