Scott Cole
27 August 2016
The data set is format so that it can be read into Tableau to make a visualization
Analyze the different kinds of California burritos served at the different locations (with guac?)
In [1]:
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
sns.set_style("white")
In [2]:
import util2
df, dfRestaurants, dfIngredients = util2.load_burritos()
N = df.shape[0]
In [3]:
dfCali = df.loc[df.Burrito.str.contains('.*[Cc]ali.*')]
dfCaliIngredients = dfIngredients.loc[df.Burrito.str.contains('.*[Cc]ali.*')][['Beef','Pico','Guac','Cheese','Fries','Sour cream','Chicken']]
dfRestaurants=dfRestaurants.reset_index().drop('index',axis=1)
dfCaliRestaurants = dfRestaurants.loc[[i for i,x in enumerate(dfRestaurants.Location) if x in dfCali.Location.unique()]]
In [4]:
dfCaliAvg = dfCali.groupby('Location').agg({'Cost': np.mean,'Volume': np.mean,'Hunger': np.mean,
'Tortilla': np.mean,'Temp': np.mean,'Meat': np.mean,
'Fillings': np.mean,'Meat:filling': np.mean,'Uniformity': np.mean,
'Salsa': np.mean,'Synergy': np.mean,'Wrap': np.mean,
'overall': np.mean, 'Location':np.size})
dfCaliAvg.rename(columns={'Location': 'N'}, inplace=True)
dfCaliAvg['Location'] = list(dfCaliAvg.index)
In [5]:
# Calculate latitutude and longitude for each city
import geocoder
addresses = dfCaliRestaurants['Address'] + ', San Diego, CA'
lats = np.zeros(len(addresses))
longs = np.zeros(len(addresses))
for i, address in enumerate(addresses):
g = geocoder.google(address)
Ntries = 1
while g.latlng ==[]:
g = geocoder.google(address)
Ntries +=1
print 'try again: ' + address
if Ntries >= 5:
raise ValueError('Address not found: ' + address)
lats[i], longs[i] = g.latlng
# # Check for nonsense lats and longs
if sum(np.logical_or(lats>34,lats<32)):
raise ValueError('Address not in san diego')
if sum(np.logical_or(longs<-118,longs>-117)):
raise ValueError('Address not in san diego')
In [6]:
# Incorporate lats and longs into restaurants data
dfCaliRestaurants['Latitude'] = lats
dfCaliRestaurants['Longitude'] = longs
# Merge restaurant data with burrito data
dfCaliTableau = pd.merge(dfCaliRestaurants,dfCaliAvg,on='Location')
In [7]:
dfCaliTableau.to_csv('cali_now.csv')