In [4]:
import numpy as np
import pandas as pd
from scipy.spatial import distance
from sklearn.cluster import SpectralClustering
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.metrics import silhouette_samples, silhouette_score
import matplotlib.pyplot as plt
%matplotlib inline

In [14]:
df = pd.read_pickle('../data_processeing/Yelp_Cuisine_Japanese.pkl')
d = pd.read_pickle('../data_processeing/Yelp_Cuisine_Japanese.pkl')
df = df.loc[:,'AcceptsInsurance':'cuisine_Japanese']

In [15]:
if_col_keep = 1.0* df.count().sort_values() / len(df) > 0.7
col_keep = df.count().sort_values()[if_col_keep]
col_prefix = []
for c in col_keep.keys().values:
    #print c.split('_')[0]
    if c == 'cuisine_Chinese':
        pass
    else:
        col_prefix.append(c.split('_')[0])
col_prefix = list(set(col_prefix))
delete_col = ['HairSpecializesIn']
for c in delete_col:
    col_prefix.remove(c)
len(col_prefix)
col_with_prefix = []
for c in df.columns[:-1]:
    if c.split('_')[0] in col_prefix:
        col_with_prefix.append(c)
df1 = df.copy()
md = df1.join(d.review_count).review_count.median()
df1['review_count_greater_median'] = df1.join(d.review_count).review_count > md
df_basic = d[[u'categories', u'city', u'hours', u'is_open', u'latitude', u'longitude', u'name', u'neighborhood', u'postal_code', u'review_count', u'stars', u'state']]
df_final = df_basic.join(df1[col_with_prefix+['review_count_greater_median','cuisine_Japanese']])

In [16]:
spatial_label = pd.read_pickle('../data_processeing/spatial_labels.pkl')

In [19]:
df_new = pd.concat([df_final,spatial_label], axis=1)

In [20]:
df_select = df_new[(df_new['stars'] >= 4) & (df_new['spatial_label'] == 3) & (df_new['cuisine_Japanese'] == 2)]

In [21]:
df_select


Out[21]:
categories city hours is_open latitude longitude name neighborhood postal_code review_count ... Music_live Music_no_music Music_video OutdoorSeating RestaurantsDelivery RestaurantsGoodForGroups Smoking review_count_greater_median cuisine_Japanese spatial_label
LPCFfrQow0kl7EABuM7VDA [Seafood, Japanese, Sushi Bars, Restaurants] Richmond Hill [Monday 11:30-14:30, Monday 17:30-22:30, Wedne... 1 43.849523 -79.381647 Inatei Japanese Cuisine L4B 0B2 113 ... NaN True False True False False True True 2 3
abO4gvGKB0aVjsrXHlgeFg [Japanese, Restaurants, Fish & Chips, Ramen] Toronto [Monday 17:30-22:0, Tuesday 17:30-22:0, Wednes... 1 43.614481 -79.497516 Nobuya Etobicoke M8V 2W1 77 ... NaN True False True False True True True 2 3
-4bPFENRdTqjML8aKEL6ow [Sushi Bars, Restaurants, Japanese] King City [Monday 11:30-22:0, Tuesday 11:30-22:0, Wednes... 1 43.928036 -79.527723 Paper Crane L7B 1G2 22 ... NaN False False False False False True False 2 3
CAhoEZPcAEvBSoCxn_fAjQ [Ramen, Cafes, Sushi Bars, Japanese, Restaurants] Markham [Monday 11:0-22:0, Tuesday 11:0-22:0, Wednesda... 1 43.854806 -79.337010 Tim Sushi and Cafe Brown's Corners L3R 0G6 7 ... NaN NaN NaN False NaN NaN NaN False 2 3
96DHi-xkBB3CtsQRTPUjxQ [Restaurants, Sushi Bars, Japanese] Toronto [Tuesday 11:30-22:0, Wednesday 11:30-22:0, Thu... 1 43.662553 -79.364269 Qi Sushi Cabbagetown M5A 2H4 75 ... NaN False False False False False True True 2 3
bNHeKmkBx5emT9xLfdWyjw [Restaurants, Sushi Bars, Japanese] Toronto [Monday 10:30-22:30, Tuesday 10:30-22:30, Wedn... 1 43.665419 -79.387132 Tokyo Sushi House Downtown Core M4Y 1J8 75 ... NaN False False True False False True True 2 3
L82O1ZFFQfjJxF0_PYWPnA [Restaurants, Japanese, Tapas Bars, Izakaya] Toronto [Monday 17:30-23:30, Tuesday 17:30-23:30, Wedn... 1 43.641867 -79.431090 Guu Izakaya Toronto Parkdale M6K 1L4 28 ... NaN False False True False False False False 2 3
jniApOOS8ppUHhESL7OzTg [Korean, Restaurants, Seafood, Japanese] Toronto [Monday 11:30-22:0, Tuesday 11:30-22:0, Wednes... 1 43.755243 -79.359851 Hamaru Sushi M3B 2M3 12 ... NaN False False False False False False False 2 3
Qmwfg-PtYuCo5Q_IwcA_HQ [Japanese, Restaurants, Sushi Bars] Toronto [Monday 11:30-14:30, Monday 17:0-23:0, Tuesday... 1 43.649707 -79.388118 JaBistro Entertainment District M5V 1W4 288 ... NaN False False True False False False True 2 3
H5poGfDHCDMip7VYi7u8Rg [Sushi Bars, Restaurants, Japanese] Toronto [Monday 11:0-23:0, Tuesday 11:0-23:0, Wednesda... 1 43.711768 -79.375816 Rollian Sushi M4G 2K2 31 ... NaN False False True False False False True 2 3
8I5U8OQ06nSxX2y4PPOWzQ [Restaurants, Sushi Bars] Toronto [Monday 11:30-22:30, Tuesday 11:30-22:30, Wedn... 1 43.655114 -79.385083 Japango Downtown Core M5G 1P5 491 ... NaN False False True False False True True 2 3
PUneCUDGvdgnay3uvI89fA [Japanese, Sushi Bars, Restaurants] North York [Monday 12:0-23:0, Tuesday 12:0-23:0, Wednesda... 1 43.774992 -79.413470 Sushi Bong Willowdale M2N 7A1 197 ... NaN False False True False False True True 2 3
eQRopptwTiGRGOqo5RWxbg [Event Planning & Services, Japanese, Restaura... Oakville [Tuesday 12:0-14:30, Tuesday 17:30-22:0, Wedne... 1 43.444968 -79.670603 Mye Japanese Restaurant L6J 1N1 38 ... NaN False False True False False True True 2 3
G9jwU3O0HOXTHt43eJi2mg [Restaurants, Japanese] Toronto [Monday 17:30-23:0, Wednesday 17:30-23:0, Thur... 1 43.649596 -79.426544 Imanishi Japanese Kitchen Dufferin Grove M6J 1Y1 34 ... NaN False False False False False False True 2 3
KBWLCfWyfoXMvvBOXlABlw [Japanese, Sushi Bars, Seafood, Restaurants] Thornhil [Monday 11:0-23:0, Tuesday 11:0-23:0, Wednesda... 1 43.805987 -79.422070 Marado Sushi Langstaff L3T 2B2 10 ... NaN NaN NaN False NaN NaN NaN False 2 3
fB-v-caPf0o3eUkp_2ua2Q [Japanese, Specialty Food, Seafood Markets, Re... Toronto [Monday 9:0-18:0, Tuesday 9:0-18:0, Wednesday ... 1 43.824650 -79.249080 Sakana-Ya Scarborough M1X 1E6 4 ... NaN True False NaN NaN False NaN False 2 3
TAAlaXAk9sWOUXZW1-wfYQ [Japanese, Restaurants, Bars, Lounges, Steakho... Toronto [Monday 11:30-22:0, Tuesday 11:30-22:0, Wednes... 1 43.643139 -79.402051 Hibachi Teppanyaki & Bar M5V 2V5 21 ... NaN False False False False False False False 2 3
rjJM4xHdCKJRUpovQ-msAQ [Japanese, Sushi Bars, Restaurants] Richmond Hill [Monday 11:30-22:30, Tuesday 11:30-22:30, Wedn... 1 43.891618 -79.441192 Mi Ne Japanese Restaurant L4C 3E3 159 ... NaN True False True False False True True 2 3
9GLN1xfck07CKfNfejKCwg [Restaurants, Food Delivery Services, Sushi Ba... Toronto [Monday 11:30-22:0, Tuesday 11:30-22:0, Wednes... 1 43.644745 -79.390892 T-Sushi Entertainment District M5V 3E6 7 ... NaN NaN NaN NaN NaN NaN NaN False 2 3
MNIIRlWXotubOauSksv9AQ [Restaurants, Sushi Bars, Japanese] Toronto [Monday 11:30-22:30, Tuesday 11:30-22:30, Wedn... 1 43.662642 -79.404883 Akai Sushi M5S 2L3 49 ... NaN False False True False False False True 2 3
XS80QAi1aYKK5v1zmKlkwA [Sushi Bars, Restaurants] Toronto [Monday 11:30-22:30, Tuesday 11:30-22:30, Wedn... 1 43.682747 -79.392255 Kibo Summerhill Summer Hill M4V 5 ... NaN NaN NaN NaN NaN NaN NaN False 2 3
zRxKrysFBrKH76yhyVdrRA [Restaurants, Japanese] Vaughan [Monday 11:30-15:0, Monday 17:0-22:30, Tuesday... 1 43.827992 -79.539444 Yang's Teppanyaki & Sushi L6A 55 ... NaN True False True False False False True 2 3
AVwE6GQmlCTAjaDbli4IKg [Sushi Bars, Japanese, Restaurants] Toronto [Monday 11:30-22:30, Tuesday 11:30-22:30, Wedn... 1 43.636936 -79.396641 Iruka Sushi M5V 3M8 28 ... NaN False False False False False True False 2 3
PgxjXf7enBAyzjR9ftLZ2A [Food, Japanese, Food Delivery Services, Sushi... Toronto [Monday 11:30-22:0, Tuesday 11:30-22:0, Wednes... 1 43.644901 -79.521863 Sushi Run Etobicoke M8X 1E9 28 ... NaN False False False False False True False 2 3
PS5ghm09F2km76m4sQNJAw [Grocery, Food, Restaurants, Japanese] Markham [Tuesday 10:0-19:0, Wednesday 10:0-19:0, Thurs... 1 43.815543 -79.344707 Heisei Mart L3R 8T3 8 ... NaN False False False False False False False 2 3
2lo7N8RZOCS0DDxO6LPQhw [Japanese, Restaurants] Toronto [Monday 12:0-23:0, Wednesday 12:0-23:0, Thursd... 1 43.776593 -79.414183 Naniwa-Taro Willowdale M2N 5R6 63 ... NaN False False True False False True True 2 3
5kpaX40N0Mj0lx0awd9PmQ [Steakhouses, Restaurants, Teppanyaki, Japanes... Woodbridge [Tuesday 17:0-21:0, Wednesday 17:0-21:0, Thurs... 1 43.828842 -79.549051 Teppan Cave L4H 3T8 11 ... NaN True False False NaN False NaN False 2 3
4JxFc9_0epj0YJPcJy3Xsg [Restaurants, Sushi Bars, Local Services, Dry ... Toronto [Monday 11:30-22:0, Tuesday 11:30-22:0, Wednes... 1 43.649332 -79.387045 Roll & Roll by JC Mart Entertainment District M5H 3C6 53 ... NaN False False False False False True True 2 3
A6bnXx1see4yZSaVVCVDyw [Fast Food, Restaurants, Salad, Japanese, Sush... Toronto [Monday 11:0-22:0, Tuesday 11:0-22:0, Wednesda... 1 43.654756 -79.387492 Rolltation Downtown Core M5G 1C8 96 ... NaN False True True False False True True 2 3
s4IsetnGL_dUPb_7kgCfDQ [Restaurants, Food, Sushi Bars, Japanese] Newmarket [Tuesday 11:30-21:0, Wednesday 11:30-21:0, Thu... 1 44.051962 -79.456493 Sushi Jun L3Y 6M8 8 ... NaN False False NaN NaN False NaN False 2 3
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
sKIAHIGW0viQGFAMGgQ3Rg [Sushi Bars, Japanese, Restaurants] Toronto [Monday 11:0-22:0, Tuesday 11:0-22:0, Wednesda... 1 43.662033 -79.335608 Sushi Mugen Leslieville M4M 1K7 3 ... NaN NaN NaN False NaN NaN NaN False 2 3
aeJojBE7k6_vSGNec3Z0OQ [Restaurants, Japanese] Mississauga [Monday 11:30-22:30, Tuesday 11:30-22:30, Wedn... 1 43.516553 -79.625075 Momiji Japanese Restaurant Clarkson L5J 1J6 23 ... NaN True False False False False False False 2 3
1b7Ma0CBJ0oDPQrFOicOqQ [Restaurants, Japanese, Sushi Bars] Aurora [Tuesday 11:30-21:30, Wednesday 11:30-21:30, T... 1 44.001801 -79.457390 Sushi Den Teppanyaki L4G 1J5 40 ... NaN True False True False False False True 2 3
Otncrh8MiVM4WV3-xJqQbA [Korean, Restaurants, Sushi Bars] Mississauga [Tuesday 11:30-21:30, Wednesday 11:30-21:30, T... 1 43.525689 -79.683544 Dai Ichi Sushi Restaurant Erin Mills L5L 5S3 36 ... NaN True False True False False True True 2 3
1ORcqQT1DUHkKEZr1NpbrA [Soup, Japanese, Sandwiches, Caterers, Event P... Toronto [Monday 9:0-19:0, Tuesday 9:0-19:0, Wednesday ... 1 43.675135 -79.406547 Abokichi The Annex M5R 1V7 6 ... NaN NaN NaN NaN NaN NaN NaN False 2 3
8Lpoyo7RHWZrHlQW2JM2wQ [Asian Fusion, Restaurants, Japanese, Sushi Ba... Mississauga [Monday 10:30-22:0, Tuesday 10:30-22:0, Wednes... 1 43.633710 -79.668259 Kura Sushi L4Z 3E6 53 ... NaN True False True False False True True 2 3
B9AHWTeEuC2s3jOawidKkA [Sushi Bars, Restaurants] Toronto [Monday 11:30-22:0, Tuesday 11:30-22:0, Wednes... 1 43.706283 -79.375641 Fukui Sushi Mount Pleasant and Davisville M4G 3B7 16 ... NaN False False False False False False False 2 3
TZOiMROeIalOC_3Atb8a8A [Japanese, Restaurants, Korean, Tapas/Small Pl... Markham [Monday 17:0-3:0, Tuesday 17:0-3:0, Wednesday ... 1 43.799668 -79.420247 Anzenchitai Izakaya L3T 2A6 10 ... NaN NaN NaN False False NaN False False 2 3
a93TpkEV5oyEuyiI0CqAGw [Japanese, Restaurants, Sushi Bars] Toronto [Monday 11:30-22:0, Tuesday 11:30-22:0, Wednes... 1 43.682697 -79.420880 Moong Japanese Fusion Restaurant Wychwood M6C 1A5 28 ... NaN False False True False False True False 2 3
omwPSPE8j7jEP4mecQM8KA [Restaurants, Japanese] North York [Monday 11:30-22:30, Tuesday 11:30-22:30, Wedn... 1 43.781918 -79.492436 Genji Sushi M3J 3A1 48 ... NaN False False False False False True True 2 3
BlWPjh2WuvQSlmYmgFsylg [Restaurants, Japanese, Sushi Bars] Scarborough [Monday 11:0-21:30, Tuesday 11:0-21:30, Wednes... 1 43.779231 -79.138224 Azumi Sushi Scarborough M1C 3B2 18 ... NaN False False False False False False False 2 3
w5axMYWy5bcY0tiqAnYeRg [Japanese, Restaurants, Sushi Bars] Etobicoke [Wednesday 18:0-21:0, Thursday 18:0-21:0, Frid... 1 43.624938 -79.509964 Sushi Kaji Restaurant Etobicoke M8Z 1N7 112 ... NaN False False True False False False True 2 3
zaAh8JjNeDyrJXf6xNkdUQ [Sushi Bars, Restaurants, Japanese] Toronto [Monday 11:30-22:30, Tuesday 11:30-22:30, Wedn... 1 43.655818 -79.393757 Sushi Tanoshii Downtown Core M5T 1L1 12 ... NaN False False False False False False False 2 3
aT9jGDqTCvVjGjbYC969Rg [Japanese, Sushi Bars, Food Court, Restaurants... Toronto [Monday 11:0-21:0, Tuesday 11:0-21:0, Wednesda... 1 43.659154 -79.382143 Sushi & BBbop Ryerson M5B 1S8 43 ... NaN False False True False False True True 2 3
-BbnAc9YEO6pjvJGEtFbVQ [Ramen, Restaurants, Japanese, Noodles] Toronto [Monday 11:30-22:30, Wednesday 11:30-22:30, Th... 1 43.666819 -79.385469 Sansotei Downtown Core M4Y 2A6 154 ... NaN False False True False False True True 2 3
iAxsnFs3qq0Qx1EJ1S9SoQ [Restaurants, Nightlife, Japanese, Bars] Toronto [Tuesday 17:0-0:0, Wednesday 17:0-0:0, Thursda... 1 43.666679 -79.385468 Koyoi Restaurant & Bar Downtown Core M4Y 1K9 96 ... NaN False False True False False True True 2 3
v9DncDynhn7f-RBURtcSFw [Restaurants, Sushi Bars] Toronto [Monday 10:0-21:0, Tuesday 10:0-21:0, Wednesda... 1 43.726132 -79.231139 Sushi & Tea Scarborough M1M 15 ... NaN False False NaN NaN False NaN False 2 3
cAj-wwzw8HhwpHDtF_zLhA [Restaurants, Japanese, Korean] Toronto [Monday 11:30-22:0, Wednesday 11:30-22:0, Thur... 1 43.595438 -79.528637 Dani Sushi Japanese & Korean Restaurant Etobicoke M8W 1N2 22 ... NaN False False False False False False False 2 3
ZL0L7IIfCRhBwRFFSs1Big [Sushi Bars, Restaurants, Japanese] Pickering [Monday 11:30-21:30, Tuesday 11:30-21:30, Wedn... 1 43.843929 -79.081407 Sakura Sushi Japanese Restaurant L1V 1C3 18 ... NaN True False True False False True False 2 3
W4Dmj9Wxfq5gEALW6RUKTA [Restaurants, Sushi Bars, Japanese] North York [Monday 11:30-23:0, Tuesday 11:30-23:0, Wednes... 1 43.794269 -79.420142 Ichiban Sushi House M2M 3X4 14 ... NaN False False False False False False False 2 3
HKfW6AZ-lW7jwLJwNlcLLQ [Restaurants, Japanese] Toronto [Tuesday 11:30-22:0, Wednesday 11:30-22:0, Thu... 1 43.711488 -79.364093 Kintako Japanese Restaurant M4G 3W4 80 ... NaN False False True False False True True 2 3
bzMO8LJmJtgo3KuyWGKUxA [Japanese, Chinese, Ramen, Noodles, Restaurants] Toronto [Monday 11:0-22:30, Tuesday 11:0-22:30, Wednes... 1 43.652390 -79.397391 Homemade Ramen Chinatown M5T 2E3 104 ... NaN False False True False False True True 2 3
aLTpnF4xG3KA5trMilgChQ [Sushi Bars, Restaurants, Japanese] Toronto [Monday 11:30-22:0, Tuesday 11:30-22:0, Wednes... 1 43.767601 -79.271453 Ni Ji Sushi Scarborough M1P 2W9 140 ... NaN True False True False False True True 2 3
TR0-w6VoZDAdvFQiq7P2Ug [Food, Desserts, Restaurants, Creperies, Japan... Toronto [Monday 11:30-20:30, Tuesday 11:30-20:30, Wedn... 1 43.654949 -79.399671 Millie Creperie Chinatown M5T 1L8 281 ... NaN False True False False False True True 2 3
qGL7Z0yVXET7GTq-mNyQmQ [Sushi Bars, Restaurants, Japanese] Toronto [Tuesday 11:30-22:0, Wednesday 11:30-22:0, Thu... 1 43.731713 -79.403930 The Sushi Bar M4N 2M4 30 ... NaN False False False False False False True 2 3
rA1WJvN6IULGbM0MTbbaaA [Japanese, Restaurants, Thai] Toronto [Monday 11:30-22:0, Tuesday 17:0-22:0, Wednesd... 1 43.682309 -79.423830 Calrose Asian Cuisine M6C 1A9 4 ... NaN False False NaN NaN False NaN False 2 3
R5ZvZ97FDp2bD93CNKg-zQ [Korean, Sushi Bars, Restaurants, Japanese] Toronto [Monday 11:30-23:0, Tuesday 11:30-23:0, Wednes... 1 43.701945 -79.397365 Lola Sushi Mount Pleasant and Davisville M4S 2A3 50 ... NaN False False False False False True True 2 3
bhoJYB0ArSqRe-Q82BI4vA [Ramen, Restaurants, Japanese] Toronto [Monday 12:0-14:0, Monday 17:30-20:30, Wednesd... 1 43.672683 -79.288133 Kyouka Ramen The Beach M4E 1E9 51 ... NaN False False True False False True True 2 3
GTwQgfi8DN23D9fMURYUdw [Sushi Bars, Nightlife, Restaurants, Bars, Win... North York [Monday 11:30-0:0, Tuesday 11:30-0:0, Wednesda... 1 43.763983 -79.411380 Sushi Moto Willowdale M2N 5N4 238 ... False False False True False False True True 2 3
LC7sXugc_BkKuZa2ghUhkQ [Restaurants, Japanese] Toronto [Tuesday 17:0-22:0, Wednesday 17:0-22:0, Thurs... 1 43.676999 -79.358788 Kobo Nobu M4K 2P7 16 ... NaN False False True False False False False 2 3

212 rows × 67 columns

Spectual Clustering


In [22]:
X = df_select.loc[:, 'AgesAllowed': 'review_count_greater_median']
X = pd.concat([X, df_select[['stars']]], axis=1)
X['stars'] = X['stars'].apply(str)
X = pd.get_dummies(X, dummy_na=False, drop_first=True)
def true_false(x):
    if x == True:
        return 1
    else:
        return 0
X['review_count_greater_median'] = X['review_count_greater_median'].apply(true_false)

In [23]:
for n_clusters in range(2,5):  
    spectural_clustering = SpectralClustering(n_clusters=n_clusters, random_state=100, affinity='sigmoid').fit(X)
    labels = spectural_clustering.labels_
    print silhouette_score(X, labels, metric='cityblock')
    print list(labels).count(0)
    print list(labels).count(1)
    print list(labels).count(2)
    print list(labels).count(3)
    print list(labels).count(4)


0.171551473534
186
26
0
0
0
0.0846245831412
150
49
13
0
0
0.0464318132094
10
49
13
140
0

In [24]:
spectural_clustering = SpectralClustering(n_clusters=2, random_state=999, affinity='sigmoid').fit(X)
labels_spectural = spectural_clustering.labels_

In [25]:
df_select.shape


Out[25]:
(212, 67)

KMeans


In [26]:
## function that gets silhouette scores for a clustering method
def get_silhouette_score(X, cluster_method, method='complete'):
    
    #Choose a range(list) of clusters I would like to try:
    range_n_clusters = range(2,10)
    
    if cluster_method == 'kmeans':
        for n_clusters in range_n_clusters: 
            km_result = KMeans(n_clusters=n_clusters, random_state=22).fit(X) # fit model
            cluster_labels = km_result.labels_

            # clustering model & clustering result to variable "cluster_labels". 
            silhouette_avg = silhouette_score(X, cluster_labels, random_state=22)
            print("For n_clusters ={},".format(n_clusters)+"{} - average silhouette_score :{}".\
                  format(cluster_method, silhouette_avg))

        
    elif cluster_method == 'gaussian_mix':
        for n_clusters in range_n_clusters: 
            gm = GaussianMixture(n_components=n_clusters, random_state=22).fit(X) # fit model
            cluster_labels = gm.predict(X)

            # clustering model & clustering result to variable "cluster_labels". 
            silhouette_avg = silhouette_score(X, cluster_labels, random_state=22)
            print("For n_clusters ={},".format(n_clusters)+"{} - average silhouette_score :{}".\
                  format(cluster_method, silhouette_avg))
        
        # res_mix_cluster.predict(X)
        
    elif cluster_method == 'hierarchical':
        # Define Z
        Z = linkage(X, method)

        for n_clusters in range_n_clusters:   
            cluster_labels=  fcluster(Z, n_clusters, criterion='maxclust') 

            silhouette_avg = silhouette_score(X, cluster_labels, random_state=22)
            print("For n_clusters ={},".format(n_clusters)+"{} - average silhouette_score :{}".\
                  format(cluster_method, silhouette_avg))

In [27]:
## choose the number of clusters using silhouette method
get_silhouette_score(X, 'kmeans')


For n_clusters =2,kmeans - average silhouette_score :0.139646702691
For n_clusters =3,kmeans - average silhouette_score :0.129650660088
For n_clusters =4,kmeans - average silhouette_score :0.114923490278
For n_clusters =5,kmeans - average silhouette_score :0.115386171136
For n_clusters =6,kmeans - average silhouette_score :0.118219736853
For n_clusters =7,kmeans - average silhouette_score :0.112944680135
For n_clusters =8,kmeans - average silhouette_score :0.10640220379
For n_clusters =9,kmeans - average silhouette_score :0.0815172312234

In [28]:
n_clusters = 2  # number of clusters
#XX= X.ix[:, ] # hour of day data

#train the model.
km=KMeans(n_clusters=n_clusters, random_state=22).fit(X)
labels_km = km.labels_

print list(labels_km).count(0)
print list(labels_km).count(1)


73
139

In [29]:
## Distance 
# KM=KMeans(n_clusters=k,random_state=9)
scor=km.fit_predict(X)

res_p=pd.DataFrame(km.transform(X))  ## distances of each data point to each cluster center!!!
res_p=pd.concat((res_p, pd.DataFrame(km.fit_predict(X))),axis=1)

res_p.columns=list(range(n_clusters))+["cluster"]
res_p.loc[:,"score"]=res_p.apply(lambda x: x[int(x["cluster"])],axis=1)
res_p.sort_values("score",ascending=False)[:5]


Out[29]:
0 1 cluster score
181 3.414780 3.048289 1 3.048289
128 3.034579 2.816443 1 2.816443
175 2.959158 2.726894 1 2.726894
56 2.799787 2.658767 1 2.658767
28 2.777682 2.632936 1 2.632936

In [30]:
X_df = X.copy()

In [31]:
X_df['km'] = labels_km

In [32]:
X_df['distance_KM'] = res_p.score.values

In [33]:
## Get Anomalies
def get_anomaly(df, label_col, dist_or_likelihood, thres):
    
    df_anomalies = pd.DataFrame(columns=df.columns)
    
    clustermean = df.groupby(label_col)[dist_or_likelihood].mean()
    clusterstd = df.groupby(label_col)[dist_or_likelihood].std()
    
    for label in df[label_col].unique():
        anomaly = df[(df[label_col]==label) \
               & (abs(df[dist_or_likelihood] - clustermean[label]) / clusterstd[label] > thres)]
        
        df_anomalies = pd.concat([df_anomalies, anomaly], axis=0)
    
    return df_anomalies

In [34]:
## get anomalies based on kmeans distance
km_anomalies = get_anomaly(X_df, 'km', 'distance_KM', 2)

In [35]:
km_anomalies['distance_KM']


Out[35]:
bGuxRBRKv7i1BKKfGbjxEw    2.816443
B5JCODx4728Ce2Qca1NFHw    2.726894
SjgeuBlgKER9yegpoxT99w    3.048289
L82O1ZFFQfjJxF0_PYWPnA    2.551478
QhAmKLX_0AgQe5HLxode9Q    2.586140
1pttL4MkpxOL6Mj2azOjVQ    2.625567
rA1WJvN6IULGbM0MTbbaaA    2.551478
Name: distance_KM, dtype: float64

In [ ]:


In [ ]:

Gaussian Mixture


In [36]:
##### Gaussian Mixture #########

## choose the number of clusters using silhouette method
get_silhouette_score(X, 'gaussian_mix')


For n_clusters =2,gaussian_mix - average silhouette_score :0.0654529016065
For n_clusters =3,gaussian_mix - average silhouette_score :0.0780903740302
For n_clusters =4,gaussian_mix - average silhouette_score :0.113687282489
For n_clusters =5,gaussian_mix - average silhouette_score :0.103280578979
For n_clusters =6,gaussian_mix - average silhouette_score :0.100083297708
For n_clusters =7,gaussian_mix - average silhouette_score :0.0998526673056
For n_clusters =8,gaussian_mix - average silhouette_score :0.0878005301017
For n_clusters =9,gaussian_mix - average silhouette_score :0.082884277143

In [37]:
k = 4
GM=GaussianMixture(n_components=k,random_state=22)
GM.fit(X)

label_gm = GM.predict(X)

In [38]:
print list(label_gm).count(0)
print list(label_gm).count(1)
print list(label_gm).count(2)
print list(label_gm).count(3)
print list(label_gm).count(4)


34
55
99
24
0

Isolation Forest


In [39]:
### 3) Isolation Foreset
from sklearn.ensemble import IsolationForest

# fit the model
ISF = IsolationForest(max_samples=100, random_state=22)
ISF.fit(X)

## compute anomaly score of the input. The lower, the more abnormal.
score_isf = ISF.decision_function(X)

In [40]:
score_isf.argmin()


Out[40]:
128

KNN Distance


In [41]:
def dist2knn(x, nn, k):
    dist_ = []
    for i in range(len(nn)):
        dist_.append(distance.euclidean(x, nn.iloc[i,:]))
    dist_.sort()
    return sum(dist_[:k+1]) # +1: remove 0 self

In [42]:
dist_sum_knn = []
for i in range(len(X)):
    print '\r{}%'.format(100.0*(i+1)/len(X)),
    dist_sum_knn.append(dist2knn(X.iloc[i,:], X, 5))


100.0%

In [ ]:

Output


In [46]:
result = pd.DataFrame(index=X.index)

In [47]:
result['clusters_sp'] = labels_spectural
result['clusters_km'] = labels_km
result['distance_km'] = km_anomalies['distance_KM']
result['clusters_gm'] = label_gm
result['scores_isf'] = score_isf
result['distance_knn'] = dist_sum_knn

In [48]:
result


Out[48]:
clusters_sp clusters_km distance_km clusters_gm scores_isf distance_knn
LPCFfrQow0kl7EABuM7VDA 1 1 NaN 2 0.094898 6.656854
abO4gvGKB0aVjsrXHlgeFg 1 1 NaN 2 -0.001748 11.820605
-4bPFENRdTqjML8aKEL6ow 1 1 NaN 2 0.071132 9.464102
CAhoEZPcAEvBSoCxn_fAjQ 0 0 NaN 1 0.069511 5.560478
96DHi-xkBB3CtsQRTPUjxQ 1 1 NaN 2 0.100907 5.828427
bNHeKmkBx5emT9xLfdWyjw 1 1 NaN 2 0.109392 5.242641
L82O1ZFFQfjJxF0_PYWPnA 1 0 2.551478 3 -0.023985 11.797959
jniApOOS8ppUHhESL7OzTg 1 0 NaN 1 0.064788 9.146264
Qmwfg-PtYuCo5Q_IwcA_HQ 1 1 NaN 3 0.014985 10.000000
H5poGfDHCDMip7VYi7u8Rg 1 1 NaN 2 0.040747 9.732051
8I5U8OQ06nSxX2y4PPOWzQ 1 1 NaN 2 0.090771 6.656854
PUneCUDGvdgnay3uvI89fA 1 1 NaN 0 -0.003049 10.921626
eQRopptwTiGRGOqo5RWxbg 1 1 NaN 2 0.097232 8.342417
G9jwU3O0HOXTHt43eJi2mg 1 1 NaN 3 0.014115 10.944272
KBWLCfWyfoXMvvBOXlABlw 0 0 NaN 1 0.054576 9.464102
fB-v-caPf0o3eUkp_2ua2Q 1 0 NaN 1 0.039315 10.472136
TAAlaXAk9sWOUXZW1-wfYQ 1 0 NaN 3 0.007494 11.371115
rjJM4xHdCKJRUpovQ-msAQ 1 1 NaN 2 0.079771 5.828427
9GLN1xfck07CKfNfejKCwg 1 0 NaN 1 0.007018 7.292529
MNIIRlWXotubOauSksv9AQ 1 1 NaN 2 0.044073 9.936238
XS80QAi1aYKK5v1zmKlkwA 0 0 NaN 1 0.033787 8.024580
zRxKrysFBrKH76yhyVdrRA 1 1 NaN 2 0.067171 8.660254
AVwE6GQmlCTAjaDbli4IKg 1 1 NaN 2 0.101551 8.342417
PgxjXf7enBAyzjR9ftLZ2A 1 1 NaN 2 0.019858 9.472136
PS5ghm09F2km76m4sQNJAw 1 0 NaN 1 0.065361 9.464102
2lo7N8RZOCS0DDxO6LPQhw 1 1 NaN 0 0.034740 9.936238
5kpaX40N0Mj0lx0awd9PmQ 1 0 NaN 3 0.040445 10.708204
4JxFc9_0epj0YJPcJy3Xsg 1 1 NaN 0 0.032380 10.000000
A6bnXx1see4yZSaVVCVDyw 1 1 NaN 0 -0.022717 12.213128
s4IsetnGL_dUPb_7kgCfDQ 1 0 NaN 1 0.046769 9.732051
... ... ... ... ... ... ...
sKIAHIGW0viQGFAMGgQ3Rg 0 0 NaN 1 0.023686 6.656854
aeJojBE7k6_vSGNec3Z0OQ 1 1 NaN 2 0.042291 10.472136
1b7Ma0CBJ0oDPQrFOicOqQ 1 1 NaN 2 0.036343 9.968119
Otncrh8MiVM4WV3-xJqQbA 1 1 NaN 2 0.058472 7.610366
1ORcqQT1DUHkKEZr1NpbrA 1 0 NaN 1 0.008982 9.146264
8Lpoyo7RHWZrHlQW2JM2wQ 1 1 NaN 2 0.034571 10.204187
B9AHWTeEuC2s3jOawidKkA 1 1 NaN 2 0.060456 8.878315
TZOiMROeIalOC_3Atb8a8A 0 0 NaN 1 0.080296 7.388905
a93TpkEV5oyEuyiI0CqAGw 1 1 NaN 2 0.049685 9.464102
omwPSPE8j7jEP4mecQM8KA 1 1 NaN 2 0.053676 9.464102
BlWPjh2WuvQSlmYmgFsylg 1 0 NaN 1 0.087444 9.146264
w5axMYWy5bcY0tiqAnYeRg 1 1 NaN 3 0.038118 9.236068
zaAh8JjNeDyrJXf6xNkdUQ 1 0 NaN 1 0.069445 8.024580
aT9jGDqTCvVjGjbYC969Rg 1 1 NaN 0 0.038144 11.180340
-BbnAc9YEO6pjvJGEtFbVQ 1 1 NaN 0 0.075078 5.000000
iAxsnFs3qq0Qx1EJ1S9SoQ 1 1 NaN 3 0.066779 8.928203
v9DncDynhn7f-RBURtcSFw 1 0 NaN 1 0.040701 9.886350
cAj-wwzw8HhwpHDtF_zLhA 1 0 NaN 1 0.068536 8.610366
ZL0L7IIfCRhBwRFFSs1Big 1 1 NaN 2 0.068036 9.196152
W4Dmj9Wxfq5gEALW6RUKTA 1 0 NaN 1 0.070740 8.828427
HKfW6AZ-lW7jwLJwNlcLLQ 1 1 NaN 2 0.030925 9.464102
bzMO8LJmJtgo3KuyWGKUxA 1 1 NaN 0 0.024122 9.196152
aLTpnF4xG3KA5trMilgChQ 1 1 NaN 2 0.094041 4.828427
TR0-w6VoZDAdvFQiq7P2Ug 1 1 NaN 0 -0.026775 11.797959
qGL7Z0yVXET7GTq-mNyQmQ 1 1 NaN 2 0.078541 8.610366
rA1WJvN6IULGbM0MTbbaaA 1 0 2.551478 1 -0.033943 11.820605
R5ZvZ97FDp2bD93CNKg-zQ 1 1 NaN 2 0.077506 6.974691
bhoJYB0ArSqRe-Q82BI4vA 1 1 NaN 0 0.050493 8.660254
GTwQgfi8DN23D9fMURYUdw 1 1 NaN 3 0.033322 9.064495
LC7sXugc_BkKuZa2ghUhkQ 1 1 NaN 2 0.034359 9.732051

212 rows × 6 columns


In [49]:
result.to_csv('torronto_jap_results.csv', index_label=False, encoding='utf-8' )

In [51]:
anomaly_merged = pd.concat([df_select, result], axis=1)

In [52]:
# Spectural
X[anomaly_merged['clusters_sp'] == 1].mean() - X[anomaly_merged['clusters_sp'] == 0].mean()


Out[52]:
review_count_greater_median          0.437552
AgesAllowed_True                     0.021505
Ambience_classy_full_bar             0.311828
Ambience_classy_none                -0.267163
Ambience_divey_True                  0.181141
Ambience_hipster_loud               -0.016956
Ambience_hipster_quiet               0.182382
Ambience_hipster_very_loud           0.016129
Ambience_romantic_dressy             0.010753
Ambience_upscale_True                0.597188
BYOB_True                            0.461952
BYOBCorkage_True                     0.053763
BestNights_saturday_True             0.005376
BestNights_sunday_True               0.291150
BestNights_tuesday_True              0.010753
BestNights_wednesday_True            0.048387
BusinessAcceptsCreditCards_True      0.160050
BusinessParking_garage_True          0.301489
BusinessParking_lot_free             0.103391
BusinessParking_valet_True          -0.033085
Corkage_True                         0.581886
DietaryRestrictions_halal_True      -0.014475
DietaryRestrictions_soy-free_True    0.185691
GoodForMeal_breakfast_True           0.010753
GoodForMeal_brunch_True              0.010753
GoodForMeal_dessert_True             0.870968
GoodForMeal_dinner_True              0.026882
GoodForMeal_latenight_True           0.021505
GoodForMeal_lunch_True               0.375517
HappyHour_True                       0.112490
Music_background_music_True          0.016129
Music_jukebox_True                   0.005376
Music_no_music_True                  0.098842
Music_video_True                    -0.000827
OutdoorSeating_True                  0.503722
RestaurantsDelivery_True             0.010753
RestaurantsGoodForGroups_True        0.010753
Smoking_True                         0.349876
stars_4.5                           -0.350289
stars_5.0                            0.043011
dtype: float64

In [53]:
anomaly_merged['distance_km'].sort_values()


Out[53]:
L82O1ZFFQfjJxF0_PYWPnA    2.551478
rA1WJvN6IULGbM0MTbbaaA    2.551478
QhAmKLX_0AgQe5HLxode9Q    2.586140
1pttL4MkpxOL6Mj2azOjVQ    2.625567
B5JCODx4728Ce2Qca1NFHw    2.726894
bGuxRBRKv7i1BKKfGbjxEw    2.816443
SjgeuBlgKER9yegpoxT99w    3.048289
LPCFfrQow0kl7EABuM7VDA         NaN
abO4gvGKB0aVjsrXHlgeFg         NaN
-4bPFENRdTqjML8aKEL6ow         NaN
CAhoEZPcAEvBSoCxn_fAjQ         NaN
96DHi-xkBB3CtsQRTPUjxQ         NaN
bNHeKmkBx5emT9xLfdWyjw         NaN
jniApOOS8ppUHhESL7OzTg         NaN
Qmwfg-PtYuCo5Q_IwcA_HQ         NaN
H5poGfDHCDMip7VYi7u8Rg         NaN
8I5U8OQ06nSxX2y4PPOWzQ         NaN
PUneCUDGvdgnay3uvI89fA         NaN
eQRopptwTiGRGOqo5RWxbg         NaN
G9jwU3O0HOXTHt43eJi2mg         NaN
KBWLCfWyfoXMvvBOXlABlw         NaN
fB-v-caPf0o3eUkp_2ua2Q         NaN
TAAlaXAk9sWOUXZW1-wfYQ         NaN
rjJM4xHdCKJRUpovQ-msAQ         NaN
9GLN1xfck07CKfNfejKCwg         NaN
MNIIRlWXotubOauSksv9AQ         NaN
XS80QAi1aYKK5v1zmKlkwA         NaN
zRxKrysFBrKH76yhyVdrRA         NaN
AVwE6GQmlCTAjaDbli4IKg         NaN
PgxjXf7enBAyzjR9ftLZ2A         NaN
                            ...   
doj3kl6iJTrKJqEFp4nYZg         NaN
sKIAHIGW0viQGFAMGgQ3Rg         NaN
aeJojBE7k6_vSGNec3Z0OQ         NaN
1b7Ma0CBJ0oDPQrFOicOqQ         NaN
Otncrh8MiVM4WV3-xJqQbA         NaN
1ORcqQT1DUHkKEZr1NpbrA         NaN
8Lpoyo7RHWZrHlQW2JM2wQ         NaN
B9AHWTeEuC2s3jOawidKkA         NaN
TZOiMROeIalOC_3Atb8a8A         NaN
a93TpkEV5oyEuyiI0CqAGw         NaN
omwPSPE8j7jEP4mecQM8KA         NaN
BlWPjh2WuvQSlmYmgFsylg         NaN
w5axMYWy5bcY0tiqAnYeRg         NaN
zaAh8JjNeDyrJXf6xNkdUQ         NaN
aT9jGDqTCvVjGjbYC969Rg         NaN
-BbnAc9YEO6pjvJGEtFbVQ         NaN
iAxsnFs3qq0Qx1EJ1S9SoQ         NaN
v9DncDynhn7f-RBURtcSFw         NaN
cAj-wwzw8HhwpHDtF_zLhA         NaN
ZL0L7IIfCRhBwRFFSs1Big         NaN
W4Dmj9Wxfq5gEALW6RUKTA         NaN
HKfW6AZ-lW7jwLJwNlcLLQ         NaN
bzMO8LJmJtgo3KuyWGKUxA         NaN
aLTpnF4xG3KA5trMilgChQ         NaN
TR0-w6VoZDAdvFQiq7P2Ug         NaN
qGL7Z0yVXET7GTq-mNyQmQ         NaN
R5ZvZ97FDp2bD93CNKg-zQ         NaN
bhoJYB0ArSqRe-Q82BI4vA         NaN
GTwQgfi8DN23D9fMURYUdw         NaN
LC7sXugc_BkKuZa2ghUhkQ         NaN
Name: distance_km, dtype: float64

In [54]:
anomaly_merged.loc['SjgeuBlgKER9yegpoxT99w', :]


Out[54]:
categories                        [Lounges, Nightlife, Restaurants, Japanese, Ba...
city                                                                        Toronto
hours                             [Monday 17:0-2:30, Tuesday 17:0-2:30, Wednesda...
is_open                                                                           1
latitude                                                                    43.7626
longitude                                                                  -79.4115
name                                                                   Nomé Izakaya
neighborhood                                                             Willowdale
postal_code                                                                 M2N 5N2
review_count                                                                    322
stars                                                                             4
state                                                                            ON
AgesAllowed                                                                   False
Ambience_casual                                                                 NaN
Ambience_classy                                                            full_bar
Ambience_divey                                                                 True
Ambience_hipster                                                               loud
Ambience_intimate                                                             False
Ambience_romantic                                                            casual
Ambience_touristy                                                             False
Ambience_trendy                                                                 NaN
Ambience_upscale                                                               True
BYOB                                                                           True
BYOBCorkage                                                                    True
BestNights_friday                                                             False
BestNights_monday                                                               NaN
BestNights_saturday                                                            True
BestNights_sunday                                                              True
BestNights_thursday                                                             NaN
BestNights_tuesday                                                            False
                                                        ...                        
DietaryRestrictions_soy-free                                                  False
DietaryRestrictions_vegan                                                       NaN
DietaryRestrictions_vegetarian                                                  NaN
GoodForMeal_breakfast                                                         False
GoodForMeal_brunch                                                              NaN
GoodForMeal_dessert                                                            True
GoodForMeal_dinner                                                             True
GoodForMeal_latenight                                                         False
GoodForMeal_lunch                                                              True
HappyHour                                                                     False
Music_background_music                                                        False
Music_dj                                                                        NaN
Music_jukebox                                                                  True
Music_karaoke                                                                   NaN
Music_live                                                                    False
Music_no_music                                                                False
Music_video                                                                   False
OutdoorSeating                                                                 True
RestaurantsDelivery                                                           False
RestaurantsGoodForGroups                                                      False
Smoking                                                                       False
review_count_greater_median                                                    True
cuisine_Japanese                                                                  2
spatial_label                                                                     3
clusters_sp                                                                       1
clusters_km                                                                       1
distance_km                                                                 3.04829
clusters_gm                                                                       3
scores_isf                                                               -0.0410919
distance_knn                                                                13.9595
Name: SjgeuBlgKER9yegpoxT99w, dtype: object

In [55]:
anomaly_merged['scores_isf'].sort_values()


Out[55]:
bGuxRBRKv7i1BKKfGbjxEw   -0.054159
SjgeuBlgKER9yegpoxT99w   -0.041092
rA1WJvN6IULGbM0MTbbaaA   -0.033943
KRWa12MkvE7sgaEnNhSWtQ   -0.030220
oOGLDf2rzeCPS7UQ8hhPlQ   -0.028507
TR0-w6VoZDAdvFQiq7P2Ug   -0.026775
wT1mRdUeZRNgMB81s3dJDw   -0.024783
L82O1ZFFQfjJxF0_PYWPnA   -0.023985
1pttL4MkpxOL6Mj2azOjVQ   -0.022951
A6bnXx1see4yZSaVVCVDyw   -0.022717
26Kt6gQRF7S6m7crwR0tjw   -0.022695
6K_9C9qQ5sDrmbtEQKjU_g   -0.012906
JLTxzBEW4IrhraaonG-vUw   -0.012484
hPXR-Bi8U-uz6TUViqmGpg   -0.012152
QhAmKLX_0AgQe5HLxode9Q   -0.011374
B5JCODx4728Ce2Qca1NFHw   -0.009351
UOHwhOqB4cIKZ79xpJxTJw   -0.008662
2gL0aUIPqe4rw7bJqfAA_g   -0.007210
twirfhS-LvbQjAoZjoxWOQ   -0.005672
GMrFxo9zw2xoTfPV-jQlYg   -0.004949
zlsiSoZaNtQeMkdKl5K_hg   -0.004795
PUneCUDGvdgnay3uvI89fA   -0.003049
vcxzcp7D9IiyvyLlwpgvVg   -0.002204
CN5nuUQod0f8g3oh99qq0w   -0.001947
abO4gvGKB0aVjsrXHlgeFg   -0.001748
TQp5cls3bgIUZZRSC4g1pQ   -0.000013
0a2O150ytxrDjDzXNfRWkA    0.000815
zO2558leuUH9jeEs_8hfOQ    0.001514
RZv0iTHrFbw87sZg_45WHQ    0.003116
AuKKWh7jc1G7G_P_l8fd-g    0.004820
                            ...   
34OwRFftkT7t3iBquazJcg    0.093011
c-Jxth7pZfPqsWwBQ7sn1Q    0.093102
thzyiQZb16zD8wMliaEfRQ    0.093171
ik9VvawL-BeAqlxTI1leew    0.094041
aLTpnF4xG3KA5trMilgChQ    0.094041
5GCaHoHo547U7wkcoJDrmg    0.094439
XCxxPZ3Lu5mwmIo7IQRf1g    0.094647
LPCFfrQow0kl7EABuM7VDA    0.094898
dYJ7uBteyJiNHCLyK1ZOoQ    0.095010
jTZYireK6pnPb6Rni0jygw    0.095645
-w6VX94tbpA7joO748wN2w    0.096415
eQRopptwTiGRGOqo5RWxbg    0.097232
UVKi839mvCc9BrCmeJbitA    0.097527
sXLdPB6WWaAN_ZsEAmCF0w    0.099131
96DHi-xkBB3CtsQRTPUjxQ    0.100907
AVwE6GQmlCTAjaDbli4IKg    0.101551
qhagbvK-Pv4cGGghJ7gXBA    0.106802
5P2cIEeY8YJ0xDIfSgeXWg    0.106892
RQ6W7OLneDMrAiqNkRN6Rw    0.106910
HUYEadSbGSQNHXFmT2Ujjw    0.108899
RqOgBMpKYdsEOmBVmFXe3g    0.108899
crstB-H5rOfbXhV8pX0e6g    0.108899
PPDW3gtt0JojgXNYcSH1GA    0.109392
bNHeKmkBx5emT9xLfdWyjw    0.109392
n-5dPbGyziS0SOkVDTquFQ    0.109611
RvYjKtzdIPA2VBUpJJbzCw    0.109716
GEvolb_U74NXgfgdxrVtyQ    0.111893
Zrr8wJQOtTgdgh6i87Qrkw    0.114976
MMGnDeqMUNQsLC5V6BINjQ    0.123808
LCb2moBNkcBto_32rmZJ5A    0.123808
Name: scores_isf, dtype: float64

In [56]:
anomaly_merged.loc['bGuxRBRKv7i1BKKfGbjxEw', :]


Out[56]:
categories                        [Asian Fusion, Japanese, Restaurants, Sushi Bars]
city                                                                        Toronto
hours                             [Monday 11:30-22:30, Tuesday 11:30-22:30, Wedn...
is_open                                                                           1
latitude                                                                    43.6503
longitude                                                                  -79.3805
name                                                                  Katana on Bay
neighborhood                                                          Downtown Core
postal_code                                                                 M5H 2R2
review_count                                                                     65
stars                                                                             4
state                                                                            ON
AgesAllowed                                                                   False
Ambience_casual                                                                 NaN
Ambience_classy                                                            full_bar
Ambience_divey                                                                 True
Ambience_hipster                                                            average
Ambience_intimate                                                               NaN
Ambience_romantic                                                            casual
Ambience_touristy                                                               NaN
Ambience_trendy                                                                 NaN
Ambience_upscale                                                               True
BYOB                                                                           True
BYOBCorkage                                                                    True
BestNights_friday                                                               NaN
BestNights_monday                                                               NaN
BestNights_saturday                                                             NaN
BestNights_sunday                                                              True
BestNights_thursday                                                             NaN
BestNights_tuesday                                                             True
                                                        ...                        
DietaryRestrictions_soy-free                                                  False
DietaryRestrictions_vegan                                                       NaN
DietaryRestrictions_vegetarian                                                  NaN
GoodForMeal_breakfast                                                           NaN
GoodForMeal_brunch                                                              NaN
GoodForMeal_dessert                                                            True
GoodForMeal_dinner                                                            False
GoodForMeal_latenight                                                         False
GoodForMeal_lunch                                                             False
HappyHour                                                                     False
Music_background_music                                                          NaN
Music_dj                                                                        NaN
Music_jukebox                                                                   NaN
Music_karaoke                                                                   NaN
Music_live                                                                      NaN
Music_no_music                                                                False
Music_video                                                                    True
OutdoorSeating                                                                 True
RestaurantsDelivery                                                           False
RestaurantsGoodForGroups                                                      False
Smoking                                                                       False
review_count_greater_median                                                    True
cuisine_Japanese                                                                  2
spatial_label                                                                     3
clusters_sp                                                                       1
clusters_km                                                                       1
distance_km                                                                 2.81644
clusters_gm                                                                       3
scores_isf                                                                -0.054159
distance_knn                                                                 12.583
Name: bGuxRBRKv7i1BKKfGbjxEw, dtype: object

In [ ]: