In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
df = pd.read_pickle('df_1518.pkl')
In [4]:
spatial_label = pd.read_pickle('../data_processeing/spatial_labels.pkl')
In [8]:
df_new = pd.concat([df,spatial_label], axis=1)
In [10]:
df_new.columns
Out[10]:
Index([ u'categories', u'city',
u'hours', u'is_open',
u'latitude', u'longitude',
u'name', u'neighborhood',
u'postal_code', u'review_count',
u'stars', u'state',
u'AgesAllowed', u'Ambience_casual',
u'Ambience_classy', u'Ambience_divey',
u'Ambience_hipster', u'Ambience_intimate',
u'Ambience_romantic', u'Ambience_touristy',
u'Ambience_trendy', u'Ambience_upscale',
u'BYOB', u'BYOBCorkage',
u'BestNights_friday', u'BestNights_monday',
u'BestNights_saturday', u'BestNights_sunday',
u'BestNights_thursday', u'BestNights_tuesday',
u'BestNights_wednesday', u'BikeParking',
u'BusinessAcceptsCreditCards', u'BusinessParking_garage',
u'BusinessParking_lot', u'BusinessParking_street',
u'BusinessParking_valet', u'BusinessParking_validated',
u'Corkage', u'DietaryRestrictions_dairy-free',
u'DietaryRestrictions_gluten-free', u'DietaryRestrictions_halal',
u'DietaryRestrictions_kosher', u'DietaryRestrictions_soy-free',
u'DietaryRestrictions_vegan', u'DietaryRestrictions_vegetarian',
u'GoodForMeal_breakfast', u'GoodForMeal_brunch',
u'GoodForMeal_dessert', u'GoodForMeal_dinner',
u'GoodForMeal_latenight', u'GoodForMeal_lunch',
u'HappyHour', u'Music_background_music',
u'Music_dj', u'Music_jukebox',
u'Music_karaoke', u'Music_live',
u'Music_no_music', u'Music_video',
u'OutdoorSeating', u'RestaurantsDelivery',
u'RestaurantsGoodForGroups', u'Smoking',
u'review_count_greater_median', u'cuisine_Chinese',
u'spatial_label'],
dtype='object')
In [162]:
df_select = df_new[(df_new['stars'] >= 4) & (df_new['spatial_label'] == 3) & (df_new['cuisine_Chinese'] == 2)]
In [163]:
df_select
Out[163]:
categories
city
hours
is_open
latitude
longitude
name
neighborhood
postal_code
review_count
...
Music_live
Music_no_music
Music_video
OutdoorSeating
RestaurantsDelivery
RestaurantsGoodForGroups
Smoking
review_count_greater_median
cuisine_Chinese
spatial_label
KeQ1cK564cL5C_hBTFrqnA
[Delis, Chinese, Indian, Seafood, Restaurants,...
Mississauga
[Monday 11:30-0:0, Tuesday 17:30-0:0, Wednesda...
1
43.616083
-79.617576
Desi Bar & Grill
Mississauga Valley
L4W 4C3
51
...
False
True
False
True
False
False
False
True
2
3
f5xm2RiwLv0gbmXU4BkrGA
[Hot Pot, Restaurants, Chinese]
Markham
[Monday 12:0-23:0, Tuesday 12:0-23:0, Wednesda...
1
43.860726
-79.304713
Lion Pavilion Hot Pot
Unionville
L3R 0W4
4
...
NaN
NaN
NaN
True
NaN
NaN
NaN
False
2
3
q0oPX1DXW86QytTvvrD9MA
[Southern, Specialty Food, Sri Lankan, Indian,...
Toronto
[Tuesday 11:30-22:0, Wednesday 11:30-22:0, Thu...
1
43.836384
-79.251047
Ceylon Flavor
Scarborough
M1X 0A5
12
...
NaN
True
False
True
False
False
True
False
2
3
0C6uuZBD7xKS-uucgwoyQw
[Local Flavor, Asian Fusion, Restaurants, Cafe...
Mississauga
[Monday 7:0-16:0, Tuesday 7:0-16:0, Wednesday ...
1
43.687343
-79.607775
The Mission Eatery
L4V 1T1
33
...
NaN
False
True
False
False
False
False
True
2
3
mevOvXMnlTLbSl4w8fcG2A
[Chinese, Restaurants]
Vaughan
[Monday 12:0-23:0, Tuesday 12:0-23:0, Wednesda...
1
43.776459
-79.610612
Choice of the Orient
Woodbridge
L4L 9S2
8
...
NaN
True
False
False
False
False
False
False
2
3
nTQhgQZa7HhIpyM0ICC1mw
[Chinese, Cambodian, Restaurants, Vietnamese]
Vaughan
[Monday 10:0-21:30, Wednesday 10:0-21:30, Thur...
1
43.828880
-79.537802
Hu Tieu Nam Vang Hong Phat
L4K 5Y6
16
...
NaN
True
False
False
False
False
True
False
2
3
TBzgzTFSa7pJXiLD7emYaQ
[Indian, Chinese, Restaurants]
Toronto
[Monday 12:0-22:0, Wednesday 12:0-22:0, Thursd...
1
43.694272
-79.276040
Lotus Garden Hakka Indian Style Chinese
Scarborough
M1L 1E3
64
...
NaN
True
False
True
False
False
True
True
2
3
0XlbdDkYzC14SZF_bUGeXA
[Food, Specialty Food, Imported Food, Ethnic F...
Markham
[Monday 14:0-22:0, Wednesday 14:0-22:0, Thursd...
1
43.883528
-79.261944
Chef Chan
L3P 1Y6
10
...
NaN
False
False
NaN
NaN
False
NaN
False
2
3
o5t33JVLpZWf269AEPB95Q
[Chinese, Vegan, Vegetarian, Restaurants]
Toronto
[Monday 11:30-21:0, Tuesday 11:30-21:0, Thursd...
1
43.651965
-79.402650
Greens Vegetarian Restaurant
Alexandra Park
M5T 1H8
83
...
NaN
False
False
True
False
False
True
True
2
3
Ih7a0qOMLFnYAec8qyq2Gg
[Chinese, Dim Sum, Restaurants]
Toronto
[Monday 11:30-22:0, Tuesday 11:30-22:0, Wednes...
1
43.651263
-79.474637
Delights of China
Swansea
M6S 1N2
5
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
False
2
3
WpK-mZ7w-304c78MOEYH6A
[Asian Fusion, Latin American, Tapas/Small Pla...
Toronto
[Monday 17:0-22:30, Tuesday 17:0-22:30, Wednes...
1
43.649003
-79.528554
Mai Bistro
Etobicoke
M9A 1B4
46
...
NaN
True
False
True
False
False
True
True
2
3
E_mRnwhHLuz6tgMBScw1mw
[Asian Fusion, Chinese, Restaurants]
Toronto
[Monday 11:30-21:30, Tuesday 11:30-21:30, Wedn...
1
43.745538
-79.209955
Phoenix
Scarborough
M1J 3H3
29
...
NaN
True
False
True
False
False
True
False
2
3
2BuiyGL0JbNjnc88fYH39Q
[Chinese, Restaurants, Hot Pot, Noodles]
North York
[Monday 11:0-23:0, Tuesday 11:0-23:0, Wednesda...
1
43.777027
-79.414539
Hot Pot on Yonge
Willowdale
M2N 5S1
26
...
NaN
False
False
True
False
False
True
False
2
3
yPR8Bh55-9NXmooZS7lrLA
[Restaurants, Chinese, Food, Food Delivery Ser...
Toronto
[Monday 11:0-23:0, Tuesday 11:0-23:0, Wednesda...
1
43.724452
-79.252395
Royal Congee Chinese Cuisine
Scarborough
M1K 5J2
9
...
NaN
False
False
False
False
False
False
False
2
3
Q62lGVKqoKJ09lZNt2qpfw
[Chinese, Restaurants]
Scarborough
[Monday 12:0-22:0, Tuesday 12:0-22:0, Wednesda...
1
43.752733
-79.264041
Lucky Chinese Restaurant
Scarborough
M1P 2S1
54
...
NaN
True
False
True
False
False
True
True
2
3
IithJLAUG3gWcHw9EKLLFA
[Asian Fusion, Restaurants, Halal]
Scarborough
[Monday 11:30-22:0, Tuesday 11:30-22:0, Wednes...
1
43.753508
-79.276481
Kim Kim Indian Hakka Chinese Restaurant
Scarborough
M1P 2L1
46
...
NaN
True
False
True
False
False
True
True
2
3
OXW2erFOPT-Oov6wChxWYA
[Dim Sum, Asian Fusion, Restaurants, Chinese]
Toronto
[Monday 11:30-22:0, Tuesday 11:30-22:0, Wednes...
1
43.647369
-79.386763
Lee Chen Asian Bistro
Entertainment District
M5H 3S6
10
...
NaN
NaN
NaN
False
False
NaN
False
False
2
3
PlJ5Tf9T4QlFQoNZf99aTg
[Asian Fusion, Chinese, Thai, Restaurants, Ind...
Brampton
[Monday 11:0-23:0, Tuesday 11:0-23:0, Wednesda...
1
43.710523
-79.683989
Hakka No.1
L6T 4K9
7
...
NaN
NaN
NaN
False
False
NaN
False
False
2
3
MDInVOKvIetk_OIpKd2Ofw
[Chinese, Restaurants]
Mississauga
[Monday 11:0-22:0, Tuesday 11:0-22:0, Wednesda...
1
43.588358
-79.606632
Szechuan Noodle Bowl
Cooksville
L5A 1X5
14
...
NaN
NaN
NaN
False
NaN
NaN
NaN
False
2
3
mBHYTLynxk-Ryq56QIx6cw
[Restaurants, Chinese]
Mississauga
[Monday 11:30-22:0, Tuesday 11:30-22:0, Wednes...
1
43.619793
-79.572541
Dragon Palace
Summerville
L4X 1L9
15
...
NaN
True
False
False
False
False
True
False
2
3
QaxDKkqYTtVYZJcqBNTnvQ
[Noodles, Pizza, Chinese, Restaurants]
Toronto
[Monday 11:0-22:0, Tuesday 11:0-22:0, Wednesda...
1
43.653810
-79.396020
Yummy Yummy Dumplings
Downtown Core
M5T 2A8
123
...
NaN
False
False
True
False
False
True
True
2
3
RlNmDWJ0kkEWXW7yORJZVw
[Chinese, Restaurants]
North York
[Monday 10:0-1:0, Tuesday 10:0-1:0, Wednesday ...
1
43.790632
-79.366067
Congee Wong
M2H 1S8
129
...
NaN
True
False
True
False
False
True
True
2
3
sePTDv1dGHPID7kwuNzXaA
[Restaurants, Chinese, Dim Sum]
Markham
[Monday 10:0-20:0, Tuesday 10:0-20:0, Wednesda...
1
43.828469
-79.305981
Sam's Congee Delight
Milliken
L3R 9V7
102
...
NaN
True
False
False
False
False
True
True
2
3
A6bnXx1see4yZSaVVCVDyw
[Fast Food, Restaurants, Salad, Japanese, Sush...
Toronto
[Monday 11:0-22:0, Tuesday 11:0-22:0, Wednesda...
1
43.654756
-79.387492
Rolltation
Downtown Core
M5G 1C8
96
...
NaN
False
True
True
False
False
True
True
2
3
sqHRwTOpFMsdoN_3xsTFNQ
[Coffee & Tea, Vietnamese, Cafes, Juice Bars &...
Vaughan
[Monday 11:0-15:0, Tuesday 11:0-21:0, Wednesda...
1
43.828074
-79.537355
Ox Noodle
L4K 5Y6
52
...
NaN
True
False
True
False
False
True
True
2
3
aarBX0VyJbjMACGCcMrfEQ
[Asian Fusion, African, Barbeque, Restaurants,...
Brampton
[Tuesday 11:0-21:0, Wednesday 11:0-21:0, Thurs...
1
43.663580
-79.732279
Get Grill
L6Y 0P6
21
...
NaN
True
False
False
False
False
True
False
2
3
cE7ArG7tPXsLSBbgHGHYZA
[Chinese, Restaurants]
Etobicoke
[Monday 11:0-22:30, Tuesday 11:0-22:30, Wednes...
1
43.693314
-79.557283
Gourmet Express
Etobicoke
M9R 2Y8
19
...
NaN
True
False
False
False
False
False
False
2
3
RcEaxi1h3qMIzQTDQyqNFg
[Food, Desserts, Asian Fusion, Restaurants, Fi...
Toronto
[Monday 10:0-21:0, Tuesday 10:0-21:0, Wednesda...
1
43.795685
-79.422284
Pinoy Hub
M2M 3X4
5
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
False
2
3
vuVyosC2e3CQ5GRdr9sh6A
[Tea Rooms, Restaurants, Taiwanese, Coffee & T...
Newmarket
[Monday 11:0-22:0, Tuesday 11:0-22:0, Wednesda...
1
44.035145
-79.473013
Tea Hotea
L3X 1V6
11
...
NaN
False
False
NaN
NaN
False
NaN
False
2
3
8xPmlVJy2o6x0J04CBpEMQ
[Food Stands, Food, Chinese, Bakeries, Ice Cre...
Markham
[Monday 11:0-20:0, Wednesday 11:0-20:0, Thursd...
1
43.857214
-79.303344
Toast Delight
Unionville
L3R 5T5
92
...
NaN
False
True
False
False
False
False
True
2
3
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
snw9iNNLpFYZeHotW00uVA
[Restaurants, Asian Fusion]
Toronto
[Monday 17:30-22:30, Tuesday 17:30-22:30, Wedn...
1
43.644253
-79.400508
Lee Restaurant
M5V 1M5
397
...
NaN
False
False
True
False
False
False
True
2
3
8Lpoyo7RHWZrHlQW2JM2wQ
[Asian Fusion, Restaurants, Japanese, Sushi Ba...
Mississauga
[Monday 10:30-22:0, Tuesday 10:30-22:0, Wednes...
1
43.633710
-79.668259
Kura Sushi
L4Z 3E6
53
...
NaN
True
False
True
False
False
True
True
2
3
1kqLZp0_FEMHGx-W4TjTLQ
[Fast Food, Food Court, Chinese, Restaurants]
Toronto
[Monday 9:0-19:0, Tuesday 9:0-19:0, Wednesday ...
1
43.797708
-79.272237
Hong Kong Style Tea & Fast Food
Scarborough
M1S
3
...
NaN
False
False
False
False
False
False
False
2
3
gopA-Q9KWwC8BLIRDhusgg
[Restaurants, Chinese]
Brampton
[Tuesday 12:0-22:0, Wednesday 12:0-22:0, Thurs...
1
43.695403
-79.750700
Singapore Garden Chinese Restaurant
L6V 1B8
10
...
NaN
True
False
True
False
False
True
False
2
3
ZDDzKXN_JXe7694zePRuTQ
[Food, Restaurants, Chinese, Coffee & Tea, Tai...
Toronto
[Monday 11:0-0:0, Tuesday 11:0-0:0, Wednesday ...
1
43.655657
-79.384737
Chatime - Dundas
Discovery District
M5G 1C3
263
...
NaN
False
True
False
False
False
True
True
2
3
u3q_fVU-qxgoGWLPVy4nMw
[Caribbean, Chinese, Restaurants]
Vaughan
[Monday 11:0-22:0, Tuesday 11:0-22:0, Wednesda...
1
43.829573
-79.536329
Tommy's Restaurant
L4K 0A3
18
...
NaN
NaN
NaN
False
NaN
NaN
NaN
False
2
3
UBKAE_miJ-lVmUqOTlI13g
[Chinese, Caribbean, Restaurants]
Mississuaga
[Monday 12:0-23:0, Tuesday 12:0-23:0, Wednesda...
1
43.582525
-79.619314
Benab Family Restaurant
Cooksville
L5A 4E4
9
...
NaN
False
False
NaN
False
False
False
False
2
3
S0Z8fKAUYTd2MKUuez-evA
[Food, Chinese, Restaurants]
Brampton
[Monday 11:30-23:0, Tuesday 11:30-23:0, Wednes...
1
43.735154
-79.823489
Noodle To Go Chinese Food
L6Z 0E3
9
...
NaN
True
False
NaN
False
False
False
False
2
3
2SyYW8GiDZsqtPb-aCbosg
[Chinese, Vietnamese, Restaurants]
Toronto
[Monday 10:0-18:30, Tuesday 10:0-18:30, Wednes...
1
43.651181
-79.384465
Noodle King
Downtown Core
M5H 3M9
48
...
NaN
False
False
False
False
False
True
True
2
3
jhKKwn9FSwFbZsbbYi5WfQ
[Vietnamese, Juice Bars & Smoothies, Food, Des...
Toronto
[Monday 9:0-21:0, Tuesday 10:0-21:0, Wednesday...
1
43.759988
-79.412225
Holy Snack
M2N 5M6
4
...
NaN
False
True
False
False
False
False
False
2
3
F4cIFBf8u81hKlGn0KgGCg
[Asian Fusion, Restaurants]
Aurora
[Monday 11:0-21:0, Tuesday 11:0-21:0, Wednesda...
1
43.997820
-79.466982
Annalisa Asian Fusion Cuisine & Lounge
L4G 1M1
11
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
False
2
3
FpFRtA1tagxqh_rLFs4XFQ
[Restaurants, Chinese, Seafood]
Richmond Hill
[Monday 10:30-0:0, Tuesday 10:30-0:0, Wednesda...
1
43.848329
-79.382966
Sichuan Kungfu Fish
L4B 0B2
5
...
NaN
True
False
False
False
False
False
False
2
3
33IS-0QBNwRC8cXnHABpbg
[Canadian (New), Restaurants, Asian Fusion]
Toronto
[Monday 17:30-22:30, Tuesday 17:30-22:30, Wedn...
1
43.651941
-79.407317
Bent
Little Italy
M6J 1V1
167
...
NaN
False
False
True
False
False
False
True
2
3
jNiZgCwZj4CNw1noxiC8PQ
[Restaurants, Buffets, Chinese]
Markham
[Monday 11:30-15:0, Monday 17:0-21:30, Tuesday...
1
43.866984
-79.228414
Mandarin - Markham East
Box Grove
L6B 0P2
43
...
NaN
True
False
True
False
False
False
True
2
3
K2CdjES-IZDDEgetOnf0Vw
[Asian Fusion, Fast Food, Restaurants, Chinese...
Toronto
[Thursday 11:0-20:0, Friday 11:0-20:0, Saturda...
1
43.787190
-79.276623
One2 Snacks
Scarborough
M1S 2C1
101
...
NaN
True
False
True
False
False
True
True
2
3
I8cCdp3q_5oCEykzZgPkQw
[Restaurants, Taiwanese, Juice Bars & Smoothie...
Markham
[Monday 12:0-23:0, Tuesday 12:0-23:0, Wednesda...
1
43.842575
-79.386548
CoCo Fresh Tea & Juice Richmond Hill
L3T 7T1
26
...
NaN
False
True
False
False
False
True
False
2
3
bzMO8LJmJtgo3KuyWGKUxA
[Japanese, Chinese, Ramen, Noodles, Restaurants]
Toronto
[Monday 11:0-22:30, Tuesday 11:0-22:30, Wednes...
1
43.652390
-79.397391
Homemade Ramen
Chinatown
M5T 2E3
104
...
NaN
False
False
True
False
False
True
True
2
3
mNRAFxAqL31c2Y96LEKggA
[Restaurants, Chinese]
Oakville
[Monday 11:30-21:0, Tuesday 11:30-21:0, Wednes...
1
43.463397
-79.674470
King Szechuan Chinese Restaurant
L6J 7S8
6
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
False
2
3
gtcsOodbmk4E0TulYHnlHA
[Cafes, Coffee & Tea, Nightlife, Restaurants, ...
Toronto
[Monday 12:0-2:0, Tuesday 12:0-2:0, Wednesday ...
1
43.662726
-79.422167
Northwood
Bickford Park
M6G 1M1
89
...
False
False
False
False
False
False
False
True
2
3
0v8icS8wOOgEDiHDCOQkZQ
[Restaurants, Chinese, Indian]
Mississauga
[Monday 11:30-22:0, Tuesday 11:30-22:0, Wednes...
1
43.568452
-79.666129
Chilli Chicken House
L5C
230
...
NaN
True
False
True
False
False
True
True
2
3
nqnVKM55nIZn8fNH1fIJPA
[Restaurants, Chinese, Vegetarian]
Toronto
[Monday 12:0-21:0, Tuesday 12:0-21:0, Wednesda...
1
43.666863
-79.345552
Simon's Wok
Leslieville
M4M 1Y5
31
...
NaN
False
False
True
False
False
True
True
2
3
ZuZxDPtDsh-nmbLebRC8iA
[Restaurants, Asian Fusion, Sandwiches]
Toronto
[Monday 11:0-21:0, Tuesday 11:0-21:0, Wednesda...
1
43.645199
-79.414396
Nam Sandwich
West Queen West
M6J 1G3
21
...
NaN
False
False
False
False
False
True
False
2
3
FfmH-oEWN7K1za-o8KZQDA
[Food, Chinese, Restaurants, Bakeries, Coffee ...
Richmond Hill
[Tuesday 8:30-18:30, Wednesday 8:30-18:30, Thu...
1
43.851122
-79.406816
Palace Bakery and Cafe
L4B 1Y3
7
...
NaN
False
False
False
False
False
False
False
2
3
zBd5JvFWuMGlnCYLaGsXZg
[Noodles, Comfort Food, Asian Fusion, Restaura...
Markham
[Monday 9:30-16:0, Tuesday 9:30-21:0, Wednesda...
1
43.889130
-79.314860
Pho Metro
L6C 0A2
83
...
NaN
False
False
False
False
False
True
True
2
3
XIX4xuzrt3IzvjX9MtHr9g
[Restaurants, Chinese]
Etobicoke
[Monday 16:0-23:30, Tuesday 16:0-0:0, Wednesda...
1
43.691946
-79.531593
Mayflower Chinese Food
Etobicoke
M9P 3B6
22
...
NaN
True
False
True
False
False
False
False
2
3
u6JJojkbqVlVI4NqKsL54Q
[Korean, Cocktail Bars, Restaurants, Bars, Asi...
Toronto
[Tuesday 17:0-23:0, Wednesday 17:0-23:0, Thurs...
1
43.655478
-79.414277
DOMA
Bickford Park
M6G 2Y3
11
...
False
False
False
False
False
False
False
False
2
3
_5XClj4E5VCIsEscbrrPKg
[Cocktail Bars, Chinese, Restaurants, Bars, As...
Toronto
[Tuesday 18:0-2:0, Wednesday 18:0-2:0, Thursda...
1
43.655939
-79.409460
LoPan
Little Italy
M6G 1A5
9
...
False
False
False
NaN
False
False
False
False
2
3
-yzBFD877La-RP739VxF7w
[Coffee & Tea, Cafes, Tea Rooms, Taiwanese, Fo...
Toronto
[Monday 13:0-0:0, Tuesday 13:0-0:0, Wednesday ...
1
43.775339
-79.413432
Sharetea
Willowdale
M2N 7A2
43
...
NaN
False
False
False
False
False
True
True
2
3
9iJMGMsTK-q6W5MB1_Ny3Q
[Chinese, Restaurants]
Richmond Hill
[Monday 11:0-22:0, Tuesday 11:0-22:0, Wednesda...
1
43.867072
-79.457974
Dynasty Chinese Restaurant
L4C 9W9
6
...
NaN
NaN
NaN
False
False
NaN
False
False
2
3
Q3UkgxNNInsPcUFhsQFcrg
[Chinese, Barbeque, Restaurants]
Richmond Hill
[Monday 11:30-20:30, Tuesday 11:30-20:30, Wedn...
1
43.843598
-79.391201
Kum Hong BBQ Restaurant
L4B 3K2
52
...
NaN
True
False
False
False
False
True
True
2
3
235 rows × 67 columns
In [19]:
df_select.loc[:, 'AgesAllowed': 'review_count_greater_median']
Out[19]:
AgesAllowed
Ambience_casual
Ambience_classy
Ambience_divey
Ambience_hipster
Ambience_intimate
Ambience_romantic
Ambience_touristy
Ambience_trendy
Ambience_upscale
...
Music_jukebox
Music_karaoke
Music_live
Music_no_music
Music_video
OutdoorSeating
RestaurantsDelivery
RestaurantsGoodForGroups
Smoking
review_count_greater_median
KeQ1cK564cL5C_hBTFrqnA
False
NaN
full_bar
True
average
False
casual
True
NaN
True
...
NaN
NaN
False
True
False
True
False
False
False
True
f5xm2RiwLv0gbmXU4BkrGA
False
NaN
beer_and_wine
True
average
NaN
casual
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
True
NaN
NaN
NaN
False
q0oPX1DXW86QytTvvrD9MA
False
NaN
full_bar
True
quiet
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
True
False
True
False
False
True
False
0C6uuZBD7xKS-uucgwoyQw
False
NaN
none
False
quiet
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
False
True
False
False
False
False
True
mevOvXMnlTLbSl4w8fcG2A
False
NaN
full_bar
False
average
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
True
False
False
False
False
False
False
nTQhgQZa7HhIpyM0ICC1mw
True
NaN
full_bar
True
average
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
True
False
False
False
False
True
False
TBzgzTFSa7pJXiLD7emYaQ
False
NaN
none
False
quiet
NaN
casual
NaN
NaN
False
...
NaN
NaN
NaN
True
False
True
False
False
True
True
0XlbdDkYzC14SZF_bUGeXA
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
True
...
NaN
NaN
NaN
False
False
NaN
NaN
False
NaN
False
o5t33JVLpZWf269AEPB95Q
False
NaN
none
True
quiet
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
False
False
True
False
False
True
True
Ih7a0qOMLFnYAec8qyq2Gg
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
False
WpK-mZ7w-304c78MOEYH6A
False
NaN
full_bar
False
quiet
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
True
False
True
False
False
True
True
E_mRnwhHLuz6tgMBScw1mw
False
NaN
none
False
quiet
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
True
False
True
False
False
True
False
2BuiyGL0JbNjnc88fYH39Q
False
NaN
none
False
loud
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
False
False
True
False
False
True
False
yPR8Bh55-9NXmooZS7lrLA
False
NaN
none
False
average
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
False
False
False
False
False
False
False
Q62lGVKqoKJ09lZNt2qpfw
False
NaN
beer_and_wine
True
average
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
True
False
True
False
False
True
True
IithJLAUG3gWcHw9EKLLFA
False
NaN
beer_and_wine
True
average
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
True
False
True
False
False
True
True
OXW2erFOPT-Oov6wChxWYA
False
NaN
NaN
NaN
NaN
NaN
casual
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
False
False
NaN
False
False
PlJ5Tf9T4QlFQoNZf99aTg
False
NaN
full_bar
True
average
NaN
casual
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
False
False
NaN
False
False
MDInVOKvIetk_OIpKd2Ofw
False
NaN
NaN
False
NaN
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
NaN
NaN
False
NaN
NaN
NaN
False
mBHYTLynxk-Ryq56QIx6cw
False
NaN
beer_and_wine
False
average
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
True
False
False
False
False
True
False
QaxDKkqYTtVYZJcqBNTnvQ
False
NaN
none
False
quiet
NaN
casual
NaN
NaN
False
...
NaN
NaN
NaN
False
False
True
False
False
True
True
RlNmDWJ0kkEWXW7yORJZVw
False
NaN
none
False
loud
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
True
False
True
False
False
True
True
sePTDv1dGHPID7kwuNzXaA
True
NaN
none
False
average
NaN
casual
NaN
NaN
False
...
NaN
NaN
NaN
True
False
False
False
False
True
True
A6bnXx1see4yZSaVVCVDyw
False
NaN
none
False
average
NaN
casual
NaN
NaN
NaN
...
NaN
NaN
NaN
False
True
True
False
False
True
True
sqHRwTOpFMsdoN_3xsTFNQ
True
NaN
none
False
average
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
True
False
True
False
False
True
True
aarBX0VyJbjMACGCcMrfEQ
False
NaN
none
True
quiet
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
True
False
False
False
False
True
False
cE7ArG7tPXsLSBbgHGHYZA
False
NaN
none
True
average
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
True
False
False
False
False
False
False
RcEaxi1h3qMIzQTDQyqNFg
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
True
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
False
vuVyosC2e3CQ5GRdr9sh6A
NaN
NaN
NaN
NaN
NaN
NaN
casual
NaN
NaN
NaN
...
NaN
NaN
NaN
False
False
NaN
NaN
False
NaN
False
8xPmlVJy2o6x0J04CBpEMQ
False
NaN
none
False
quiet
NaN
casual
NaN
NaN
False
...
NaN
NaN
NaN
False
True
False
False
False
False
True
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
snw9iNNLpFYZeHotW00uVA
False
NaN
full_bar
False
average
NaN
dressy
NaN
NaN
True
...
NaN
NaN
NaN
False
False
True
False
False
False
True
8Lpoyo7RHWZrHlQW2JM2wQ
False
NaN
beer_and_wine
True
quiet
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
True
False
True
False
False
True
True
1kqLZp0_FEMHGx-W4TjTLQ
False
NaN
none
True
average
NaN
casual
NaN
NaN
False
...
NaN
NaN
NaN
False
False
False
False
False
False
False
gopA-Q9KWwC8BLIRDhusgg
False
NaN
none
False
average
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
True
False
True
False
False
True
False
ZDDzKXN_JXe7694zePRuTQ
False
NaN
none
False
loud
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
False
True
False
False
False
True
True
u3q_fVU-qxgoGWLPVy4nMw
False
NaN
NaN
NaN
NaN
NaN
casual
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
False
NaN
NaN
NaN
False
UBKAE_miJ-lVmUqOTlI13g
NaN
NaN
beer_and_wine
True
average
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
False
False
NaN
False
False
False
False
S0Z8fKAUYTd2MKUuez-evA
NaN
NaN
none
False
NaN
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
True
False
NaN
False
False
False
False
2SyYW8GiDZsqtPb-aCbosg
False
NaN
none
False
average
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
False
False
False
False
False
True
True
jhKKwn9FSwFbZsbbYi5WfQ
False
NaN
none
False
NaN
NaN
casual
NaN
NaN
NaN
...
NaN
NaN
NaN
False
True
False
False
False
False
False
F4cIFBf8u81hKlGn0KgGCg
NaN
NaN
NaN
NaN
quiet
NaN
casual
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
False
FpFRtA1tagxqh_rLFs4XFQ
False
NaN
beer_and_wine
True
NaN
NaN
casual
NaN
NaN
NaN
...
NaN
NaN
NaN
True
False
False
False
False
False
False
33IS-0QBNwRC8cXnHABpbg
False
NaN
full_bar
False
average
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
False
False
True
False
False
False
True
jNiZgCwZj4CNw1noxiC8PQ
False
NaN
full_bar
False
average
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
True
False
True
False
False
False
True
K2CdjES-IZDDEgetOnf0Vw
False
NaN
none
False
average
NaN
casual
NaN
NaN
False
...
NaN
NaN
NaN
True
False
True
False
False
True
True
I8cCdp3q_5oCEykzZgPkQw
False
NaN
none
False
average
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
False
True
False
False
False
True
False
bzMO8LJmJtgo3KuyWGKUxA
False
NaN
none
True
loud
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
False
False
True
False
False
True
True
mNRAFxAqL31c2Y96LEKggA
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
False
gtcsOodbmk4E0TulYHnlHA
False
NaN
full_bar
False
loud
False
casual
False
NaN
True
...
False
NaN
False
False
False
False
False
False
False
True
0v8icS8wOOgEDiHDCOQkZQ
False
NaN
none
False
average
NaN
casual
NaN
NaN
False
...
NaN
NaN
NaN
True
False
True
False
False
True
True
nqnVKM55nIZn8fNH1fIJPA
False
NaN
none
False
quiet
NaN
casual
NaN
NaN
False
...
NaN
NaN
NaN
False
False
True
False
False
True
True
ZuZxDPtDsh-nmbLebRC8iA
False
NaN
none
False
average
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
False
False
False
False
False
True
False
FfmH-oEWN7K1za-o8KZQDA
False
NaN
none
False
average
NaN
casual
NaN
NaN
False
...
NaN
NaN
NaN
False
False
False
False
False
False
False
zBd5JvFWuMGlnCYLaGsXZg
False
NaN
none
True
average
NaN
casual
NaN
NaN
False
...
NaN
NaN
NaN
False
False
False
False
False
True
True
XIX4xuzrt3IzvjX9MtHr9g
False
NaN
none
False
quiet
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
True
False
True
False
False
False
False
u6JJojkbqVlVI4NqKsL54Q
False
NaN
full_bar
NaN
loud
False
dressy
False
NaN
NaN
...
NaN
NaN
False
False
False
False
False
False
False
False
_5XClj4E5VCIsEscbrrPKg
NaN
NaN
full_bar
False
quiet
False
NaN
False
NaN
True
...
False
NaN
False
False
False
NaN
False
False
False
False
-yzBFD877La-RP739VxF7w
False
NaN
none
True
average
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
False
False
False
False
False
True
True
9iJMGMsTK-q6W5MB1_Ny3Q
False
NaN
NaN
True
NaN
NaN
casual
NaN
NaN
True
...
NaN
NaN
NaN
NaN
NaN
False
False
NaN
False
False
Q3UkgxNNInsPcUFhsQFcrg
False
NaN
none
False
quiet
NaN
casual
NaN
NaN
False
...
NaN
NaN
NaN
True
False
False
False
False
True
True
235 rows × 53 columns
In [164]:
X = df_select.loc[:, 'AgesAllowed': 'review_count_greater_median']
In [165]:
X = pd.concat([X, df_select[['stars']]], axis=1)
In [166]:
X['stars'] = X['stars'].apply(str)
In [167]:
X = pd.get_dummies(X, dummy_na=False, drop_first=True)
In [168]:
def true_false(x):
if x == True:
return 1
else:
return 0
X['review_count_greater_median'] = X['review_count_greater_median'].apply(true_false)
In [169]:
X.sum()
Out[169]:
review_count_greater_median 100
AgesAllowed_True 14
Ambience_classy_full_bar 54
Ambience_classy_none 97
Ambience_divey_True 81
Ambience_hipster_loud 13
Ambience_hipster_quiet 49
Ambience_hipster_very_loud 1
Ambience_romantic_dressy 4
Ambience_romantic_formal 1
Ambience_touristy_True 2
Ambience_upscale_True 136
BYOB_True 122
BYOBCorkage_True 11
BestNights_saturday_True 1
BestNights_sunday_True 192
BestNights_tuesday_True 3
BestNights_wednesday_True 1
BikeParking_True 3
BusinessAcceptsCreditCards_True 101
BusinessParking_garage_True 92
BusinessParking_lot_free 77
BusinessParking_lot_paid 2
BusinessParking_valet_True 6
Corkage_True 130
DietaryRestrictions_halal_True 213
DietaryRestrictions_soy-free_True 192
GoodForMeal_breakfast_True 1
GoodForMeal_brunch_True 3
GoodForMeal_dessert_True 143
GoodForMeal_dinner_True 4
GoodForMeal_latenight_True 6
GoodForMeal_lunch_True 56
HappyHour_True 83
Music_background_music_True 3
Music_no_music_True 83
Music_video_True 15
OutdoorSeating_True 87
RestaurantsDelivery_True 3
RestaurantsGoodForGroups_True 1
Smoking_True 106
stars_4.5 40
stars_5.0 11
dtype: int64
In [155]:
X.drop(['AgesAllowed_True', 'Ambience_classy_none', 'BusinessParking_lot_paid', 'Ambience_hipster_very_loud'],
axis=1,
inplace=True)
In [ ]:
In [ ]:
In [ ]:
In [44]:
from sklearn import metrics
from sklearn.cluster import DBSCAN, SpectralClustering
In [170]:
for n_clusters in range(2,10):
spectural_clustering = SpectralClustering(n_clusters=n_clusters, random_state=2, affinity='nearest_neighbors').fit(X)
labels = spectural_clustering.labels_
print metrics.silhouette_score(X, labels, metric='euclidean')
0.137912589443
0.125983452632
0.119376850964
0.0951945455224
0.085683194616
0.0666698492817
0.068397636597
0.074395420056
In [203]:
spectural_clustering = SpectralClustering(n_clusters=2, random_state=999).fit(X)
labels = spectural_clustering.labels_
In [204]:
list(labels).count(1)
Out[204]:
30
In [205]:
list(labels).count(2)
Out[205]:
0
In [206]:
list(labels).count(0)
Out[206]:
205
In [188]:
list(labels).count(3)
Out[188]:
0
In [ ]:
In [201]:
df_label = df_select.copy()
In [207]:
df_label['spectural_cluster_'] = labels
In [211]:
df_label.iloc[:, -1:].to_pickle('../Anomaly/spectural_cluster.pkl')
In [ ]:
In [154]:
for n_clusters in range(2,10):
spectural_clustering = SpectralClustering(n_clusters=n_clusters, random_state=2, affinity='nearest_neighbors').fit(X)
labels = spectural_clustering.labels_
print metrics.silhouette_score(X, labels, metric='cityblock')
0.224169918235
0.208667504098
0.180335106359
0.164940155102
0.12280095451
0.129421723101
0.0977548646697
0.111062080759
In [117]:
dbscan = DBSCAN(eps=1.8).fit(X)
labels = dbscan.labels_
# print metrics.silhouette_score(X, labels, metric='euclidean')
In [118]:
list(labels).count(0)
Out[118]:
69
In [119]:
list(labels).count(-1)
Out[119]:
88
In [ ]:
Content source: djfan/yelp-challenge
Similar notebooks: