San Diego Burrito Analytics: Rankings

Scott Cole

21 May 2016

This notebook ranks each taco shop along each dimension

imports


In [1]:
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import pandas as pd

import seaborn as sns
sns.set_style("white")

Load data


In [2]:
import util
df = util.load_burritos()
N = df.shape[0]

Average each metric over each Location


In [3]:
m_Location = ['Location','N','Yelp','Google','Hunger','Cost','Volume','Tortilla','Temp','Meat','Fillings','Meat:filling',
               'Uniformity','Salsa','Synergy','Wrap','overall']

# Calculate the mean of each of the metrics above for each taco shop
tacoshops = df.Location.unique()
TS = len(tacoshops)
dfmean = pd.DataFrame(np.nan, index=range(TS), columns=m_Location)
for ts in range(TS):
    dfmean.loc[ts] = df.loc[df.Location==tacoshops[ts]].mean()
    dfmean['N'][ts] = sum(df.Location == tacoshops[ts])
dfmean.Location = tacoshops

In [12]:
Ncutoff = 5
dfToRank = dfmean.loc[dfmean.N>=Ncutoff]

In [13]:
dfToRank


Out[13]:
Location N Yelp Google Hunger Cost Volume Tortilla Temp Meat Fillings Meat:filling Uniformity Salsa Synergy Wrap overall
4 los primos mexican food 11.0 3.0 3.7 3.318182 7.077273 0.761429 3.363636 3.500000 2.863636 3.090909 1.909091 2.750000 3.000000 2.545455 3.454545 2.645455
7 taco stand 21.0 4.5 4.4 3.171429 7.542857 0.768889 3.714286 3.380952 4.261905 3.928571 3.938095 3.857143 3.700000 4.190476 4.095238 4.114286
10 lolita's taco shop 12.0 4.0 4.4 3.141667 7.225000 0.747778 2.983333 3.275000 3.363636 3.641667 3.354545 2.991667 2.854167 3.437500 3.916667 3.283333
13 rigoberto's taco shop 17.0 4.0 4.4 3.633333 6.791176 0.899000 3.676471 3.858824 3.750000 3.676471 3.843750 3.800000 3.380769 3.779412 3.911765 3.747059
15 el zarape 8.0 4.0 4.4 3.156250 6.656250 0.596667 3.250000 4.285714 3.607143 3.281250 3.843750 3.600000 3.125000 3.300000 4.687500 3.366666
17 cancun mexican & seafood 6.0 4.5 4.1 3.716667 6.733333 0.735000 4.050000 4.500000 3.833333 3.900000 3.916667 3.416667 3.300000 3.800000 3.916667 4.100000
18 vallarta express 10.0 3.5 4.0 3.550000 6.990000 0.880000 2.925000 4.357143 3.350000 3.500000 3.550000 3.350000 3.444444 3.100000 3.750000 3.560000
23 california burritos 27.0 4.5 4.4 3.955926 6.312963 0.823333 3.881481 3.437037 4.037037 3.934615 3.951923 3.925926 3.532000 4.022222 4.307407 4.152000
35 los tacos 11.0 4.0 4.2 3.263636 8.009091 0.829000 3.654545 4.227273 4.190909 3.754545 3.563636 3.445455 3.818182 4.109091 4.136364 4.036364
46 lucha libre north park 28.0 3.5 4.3 3.489286 7.587500 0.827083 3.678571 3.200000 3.612000 3.364286 3.480000 3.185714 3.925926 3.314286 4.014286 3.264286
52 taco villa 14.0 3.0 NaN 3.485714 5.982000 0.633333 3.121429 4.092857 3.438462 3.342857 3.584286 3.214286 2.800000 3.100000 4.528571 3.421429
53 valentines mexican food 7.0 4.0 4.0 3.642857 7.735714 0.730000 4.071429 4.314286 4.100000 4.142857 4.428571 3.928571 3.642857 4.071429 3.071429 4.371429
57 california burrito company 5.0 3.5 4.4 3.200000 5.900000 0.686000 3.100000 4.200000 3.300000 2.900000 2.600000 3.000000 2.840000 3.400000 4.600000 3.200000

In [14]:
m_Rank = ['Location','Cost','Volume','Tortilla','Temp','Meat','Fillings','Meat:filling', 'Uniformity','Salsa','Synergy','Wrap','overall']
TS = len(dfToRank)
dfRanked = pd.DataFrame(np.nan, index=range(TS), columns=m_Rank)
dfRanked.Location[:] = dfToRank.Location
for m in m_Rank[1:]:
    if m == 'Cost':
        dfRanked[m][:] = dfToRank[m].rank(ascending=1)
    else:
        dfRanked[m][:] = dfToRank[m].rank(ascending=0)


/Users/scott/anaconda/lib/python3.6/site-packages/ipykernel/__main__.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/Users/scott/anaconda/lib/python3.6/site-packages/ipykernel/__main__.py:9: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [15]:
dfRanked


Out[15]:
Location Cost Volume Tortilla Temp Meat Fillings Meat:filling Uniformity Salsa Synergy Wrap overall
0 los primos mexican food 8.0 7.0 8.0 9.0 13.0 12.0 13.0 13.0 10.0 13.0 12.0 13.0
1 taco stand 10.0 6.0 4.0 11.0 1.0 3.0 3.0 3.0 3.0 1.0 6.0 3.0
2 lolita's taco shop 9.0 8.0 12.0 12.0 10.0 7.0 11.0 12.0 11.0 7.0 8.5 10.0
3 rigoberto's taco shop 6.0 1.0 6.0 8.0 6.0 6.0 5.5 4.0 7.0 6.0 10.0 6.0
4 el zarape 4.0 13.0 9.0 4.0 8.0 11.0 5.5 5.0 9.0 10.0 1.0 9.0
5 cancun mexican & seafood 5.0 9.0 2.0 1.0 5.0 4.0 4.0 7.0 8.0 5.0 8.5 4.0
6 vallarta express 7.0 2.0 13.0 2.0 11.0 8.0 9.0 8.0 6.0 11.5 11.0 7.0
7 california burritos 3.0 5.0 3.0 10.0 4.0 2.0 2.0 2.0 5.0 4.0 4.0 2.0
8 los tacos 13.0 3.0 7.0 5.0 2.0 5.0 8.0 6.0 2.0 2.0 5.0 5.0
9 lucha libre north park 11.0 4.0 5.0 13.0 7.0 9.0 10.0 10.0 1.0 9.0 7.0 11.0
10 taco villa 2.0 12.0 10.0 7.0 9.0 10.0 7.0 9.0 13.0 11.5 3.0 8.0
11 valentines mexican food 12.0 10.0 1.0 3.0 3.0 1.0 1.0 1.0 4.0 3.0 13.0 1.0
12 california burrito company 1.0 11.0 11.0 6.0 12.0 13.0 12.0 11.0 12.0 8.0 2.0 12.0

In [ ]: