In [2]:
import plotly.plotly as py
import cufflinks as cf
import pandas as pd
import numpy as np
import json
with open("/Users/Alan/.plotly/.credentials", 'rb') as fi:
cred= json.load(fi, encoding='utf-8')
myapi = cred['api_key']
username = cred['username']
py.sign_in(username, myapi)
In [6]:
j_cuisine = pd.read_pickle('../yelp-challenge/data_processeing/Yelp_Cuisine_Japanese.pkl')
c_cuisine = pd.read_pickle('../yelp-challenge/data_processeing/Yelp_Cuisine_Chinese.pkl')
ALL_JPN_BID = j_cuisine[j_cuisine.cuisine_Japanese == 2][['business_id']]
ALL_CHN_BID = c_cuisine[c_cuisine.cuisine_Chinese == 2][['business_id']]
area = pd.read_pickle("../yelp-challenge/data_processeing/spatial_labels.pkl")
TOR = area[area.spatial_label == 3]
PHO = area[area.spatial_label == 2]
LAS = area[area.spatial_label == 7]
In [75]:
cities = {}
In [76]:
c = len(TOR.join(ALL_CHN_BID, how='inner'))
j = len(TOR.join(ALL_JPN_BID, how='inner'))
o = len(TOR) - len(TOR.join(ALL_CHN_BID, how='inner')) - len(TOR.join(ALL_JPN_BID, how='inner'))
cities['Torronto'] = [o,c,j]
In [77]:
c = len(PHO.join(ALL_CHN_BID, how='inner'))
j = len(PHO.join(ALL_JPN_BID, how='inner'))
o = len(PHO) - len(PHO.join(ALL_CHN_BID, how='inner')) - len(PHO.join(ALL_JPN_BID, how='inner'))
cities['Phoenix'] = [o,c,j]
In [78]:
c = len(LAS.join(ALL_CHN_BID, how='inner'))
j = len(LAS.join(ALL_JPN_BID, how='inner'))
o = len(LAS) - len(LAS.join(ALL_CHN_BID, how='inner')) - len(LAS.join(ALL_JPN_BID, how='inner'))
cities['Las Vegas'] = [o,c,j]
In [79]:
df = pd.DataFrame.from_dict(cities, orient='index')
df.columns = ['Others', 'Chinese Cuisine', 'Japanese Cuisine']
df
Out[79]:
In [81]:
cf.set_config_file(offline=False, world_readable=True, theme='ggplot')
df.iplot(kind='barh',barmode='stack', bargap=.1, \
title='Distribution of Chinese and Japanese Cuisine Types in Target Areas', \
xTitle='The Number of Restaurants', yTitle='The Greater City Area')
Out[81]: