In [1]:
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
sns.set_style("white")
In [180]:
filename="burrito_current.csv"
df = pd.read_csv(filename)
N = df.shape[0]
In [106]:
# Identify california burritos
def caliburritoidx(x):
import re
idx = []
for b in range(len(x)):
re4str = re.compile('.*cali.*', re.IGNORECASE)
if re4str.match(x[b]) is not None:
idx.append(b)
return idx
caliidx = caliburritoidx(df.Burrito)
Ncaliidx = np.arange(len(df))
Ncaliidx = np.delete(Ncaliidx,caliidx)
In [127]:
met_Cali = ['Hunger','Volume','Cost','Tortilla','Temp','Meat','Fillings','Meat:filling',
'Uniformity','Salsa','Synergy','Wrap','overall']
for k in met_Cali:
Mcali = df[k][caliidx].dropna()
MNcali = df[k][Ncaliidx].dropna()
print k
print sp.stats.ttest_ind(Mcali,MNcali)
In [146]:
df_Scott = df[df.Reviewer=='Scott']
idx_Scott = df2.index.values
idx_NScott = np.arange(len(df))
idx_NScott = np.delete(idx_NScott,idx_Scott)
burritos_Scott = df.loc[df2.index.values]['Burrito']
In [144]:
dfScorr = df_Scott.corr()
metricscorr = ['Yelp','Google','Hunger','Cost','Volume','Tortilla','Temp','Meat','Fillings','Meat:filling',
'Uniformity','Salsa','Synergy','Wrap','overall']
M = len(metricscorr)
Mcorrmat = np.zeros((M,M))
Mpmat = np.zeros((M,M))
for m1 in range(M):
for m2 in range(M):
if m1 != m2:
Mcorrmat[m1,m2] = dfcorr[metricscorr[m1]][metricscorr[m2]]
Mpmat[m1,m2] = pearsonp(Mcorrmat[m1,m2],N)
clim1 = (-1,1)
plt.figure(figsize=(10,10))
cax = plt.pcolor(range(M+1), range(M+1), Mcorrmat, cmap=cm.bwr)
cbar = plt.colorbar(cax, ticks=(-1,-.5,0,.5,1))
cbar.ax.set_ylabel('Pearson correlation (r)', size=30)
plt.clim(clim1)
cbar.ax.set_yticklabels((-1,-.5,0,.5,1),size=20)
#plt.axis([2, M+1, floall[0],floall[-1]+10])
ax = plt.gca()
ax.set_yticks(np.arange(M)+.5)
ax.set_yticklabels(metricscorr,size=25)
ax.set_xticks(np.arange(M)+.5)
ax.set_xticklabels(metricscorr,size=9)
plt.tight_layout()
In [ ]:
# Try to argue that me sampling a bunch of burritos is equivalent to a bunch of people sampling burritos
# you would not be able to tell if a rated burrito was by me or someone else.
# Tests:
# 1. Means of each metric are the same
# 2. Metric correlations are the same (between each quality and overall)
# 3. Do I like Cali burritos more than other people?
In [147]:
# 1. Metric means are the same: I give my meat and meat:filling lower ratings
met_Scott = ['Hunger','Volume','Cost','Tortilla','Temp','Meat','Fillings','Meat:filling',
'Uniformity','Salsa','Synergy','Wrap','overall']
for k in met_Scott:
Msc = df[k][idx_Scott].dropna()
MNsc = df[k][idx_NScott].dropna()
print k
print sp.stats.ttest_ind(Msc,MNsc)