In [2]:
import numpy as np
import pandas as pd
from scipy.spatial import distance
from sklearn.cluster import SpectralClustering
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.metrics import silhouette_samples, silhouette_score
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
df = pd.read_pickle('df_1518.pkl')

In [4]:
spatial_label = pd.read_pickle('../data_processeing/spatial_labels.pkl')

In [5]:
df_new = pd.concat([df,spatial_label], axis=1)

In [6]:
df_select = df_new[(df_new['stars'] >= 4) & (df_new['spatial_label'] == 7) & (df_new['cuisine_Chinese'] == 2)]

Spectual Clustering


In [7]:
X = df_select.loc[:, 'AgesAllowed': 'review_count_greater_median']
X = pd.concat([X, df_select[['stars']]], axis=1)
X['stars'] = X['stars'].apply(str)
X = pd.get_dummies(X, dummy_na=False, drop_first=True)
def true_false(x):
    if x == True:
        return 1
    else:
        return 0
X['review_count_greater_median'] = X['review_count_greater_median'].apply(true_false)

In [8]:
for n_clusters in range(2,5):  
    spectural_clustering = SpectralClustering(n_clusters=n_clusters, random_state=100, affinity='sigmoid').fit(X)
    labels = spectural_clustering.labels_
    print silhouette_score(X, labels, metric='cityblock')
    print list(labels).count(0)
    print list(labels).count(1)
    print list(labels).count(2)
    print list(labels).count(3)
    print list(labels).count(4)


0.0604747245331
161
8
0
0
0
0.0179897292672
160
8
1
0
0
0.043740098015
7
153
8
1
0

In [9]:
spectural_clustering = SpectralClustering(n_clusters=2, random_state=999, affinity='sigmoid').fit(X)
labels_spectural = spectural_clustering.labels_

In [10]:
df_select.shape


Out[10]:
(169, 67)

KMeans


In [11]:
## function that gets silhouette scores for a clustering method
def get_silhouette_score(X, cluster_method, method='complete'):
    
    #Choose a range(list) of clusters I would like to try:
    range_n_clusters = range(2,10)
    
    if cluster_method == 'kmeans':
        for n_clusters in range_n_clusters: 
            km_result = KMeans(n_clusters=n_clusters, random_state=22).fit(X) # fit model
            cluster_labels = km_result.labels_

            # clustering model & clustering result to variable "cluster_labels". 
            silhouette_avg = silhouette_score(X, cluster_labels, random_state=22)
            print("For n_clusters ={},".format(n_clusters)+"{} - average silhouette_score :{}".\
                  format(cluster_method, silhouette_avg))

        
    elif cluster_method == 'gaussian_mix':
        for n_clusters in range_n_clusters: 
            gm = GaussianMixture(n_components=n_clusters, random_state=22).fit(X) # fit model
            cluster_labels = gm.predict(X)

            # clustering model & clustering result to variable "cluster_labels". 
            silhouette_avg = silhouette_score(X, cluster_labels, random_state=22)
            print("For n_clusters ={},".format(n_clusters)+"{} - average silhouette_score :{}".\
                  format(cluster_method, silhouette_avg))
        
        # res_mix_cluster.predict(X)
        
    elif cluster_method == 'hierarchical':
        # Define Z
        Z = linkage(X, method)

        for n_clusters in range_n_clusters:   
            cluster_labels=  fcluster(Z, n_clusters, criterion='maxclust') 

            silhouette_avg = silhouette_score(X, cluster_labels, random_state=22)
            print("For n_clusters ={},".format(n_clusters)+"{} - average silhouette_score :{}".\
                  format(cluster_method, silhouette_avg))

In [12]:
## choose the number of clusters using silhouette method
get_silhouette_score(X, 'kmeans')


For n_clusters =2,kmeans - average silhouette_score :0.153933490359
For n_clusters =3,kmeans - average silhouette_score :0.0895060167385
For n_clusters =4,kmeans - average silhouette_score :0.0960596183143
For n_clusters =5,kmeans - average silhouette_score :0.0941486644697
For n_clusters =6,kmeans - average silhouette_score :0.0916804580175
For n_clusters =7,kmeans - average silhouette_score :0.0956989968796
For n_clusters =8,kmeans - average silhouette_score :0.0840999060067
For n_clusters =9,kmeans - average silhouette_score :0.0849166946159

In [13]:
n_clusters = 3  # number of clusters
#XX= X.ix[:, ] # hour of day data

#train the model.
km=KMeans(n_clusters=n_clusters, random_state=22).fit(X)
labels_km = km.labels_

print list(labels_km).count(0)
print list(labels_km).count(1)
print list(labels_km).count(2)


77
33
59

In [14]:
## Distance 
# KM=KMeans(n_clusters=k,random_state=9)
scor=km.fit_predict(X)

res_p=pd.DataFrame(km.transform(X))  ## distances of each data point to each cluster center!!!
res_p=pd.concat((res_p, pd.DataFrame(km.fit_predict(X))),axis=1)

res_p.columns=list(range(n_clusters))+["cluster"]
res_p.loc[:,"score"]=res_p.apply(lambda x: x[int(x["cluster"])],axis=1)
res_p.sort_values("score",ascending=False)[:5]


Out[14]:
0 1 2 cluster score
69 3.523839 2.965984 3.215116 1 2.965984
83 3.382812 3.061506 2.758289 2 2.758289
92 3.593181 2.731982 3.215116 1 2.731982
166 3.162571 2.720868 3.183329 1 2.720868
46 3.066668 2.653203 2.869717 1 2.653203

In [15]:
X_df = X.copy()

In [16]:
X_df['km'] = labels_km

In [17]:
X_df['distance_KM'] = res_p.score.values

In [18]:
## Get Anomalies
def get_anomaly(df, label_col, dist_or_likelihood, thres):
    
    df_anomalies = pd.DataFrame(columns=df.columns)
    
    clustermean = df.groupby(label_col)[dist_or_likelihood].mean()
    clusterstd = df.groupby(label_col)[dist_or_likelihood].std()
    
    for label in df[label_col].unique():
        anomaly = df[(df[label_col]==label) \
               & (abs(df[dist_or_likelihood] - clustermean[label]) / clusterstd[label] > thres)]
        
        df_anomalies = pd.concat([df_anomalies, anomaly], axis=0)
    
    return df_anomalies

In [19]:
## get anomalies based on kmeans distance
km_anomalies = get_anomaly(X_df, 'km', 'distance_KM', 2)

In [20]:
km_anomalies['distance_KM']


Out[20]:
y8d90Pt16Nip-B5UXWBP-w    2.593608
UTXNr62dGSK-tet8OeXUcQ    2.758289
IZivKqtHyz4-ts8KsnvMrA    2.576474
-Uix-n4Jqo4W7ERagC5qAA    2.568902
2xbxXWeu3tpZEJdGTTGbLg    1.663358
zjvnqTjBp56NhMp1GrlO5g    2.965984
Name: distance_KM, dtype: float64

In [ ]:


In [ ]:

Gaussian Mixture


In [21]:
##### Gaussian Mixture #########

## choose the number of clusters using silhouette method
get_silhouette_score(X, 'gaussian_mix')


For n_clusters =2,gaussian_mix - average silhouette_score :0.177347637193
For n_clusters =3,gaussian_mix - average silhouette_score :0.102471274593
For n_clusters =4,gaussian_mix - average silhouette_score :0.0660608778246
For n_clusters =5,gaussian_mix - average silhouette_score :0.0610811965445
For n_clusters =6,gaussian_mix - average silhouette_score :0.0764446920948
For n_clusters =7,gaussian_mix - average silhouette_score :0.0604045046468
For n_clusters =8,gaussian_mix - average silhouette_score :0.0647091305484
For n_clusters =9,gaussian_mix - average silhouette_score :0.0650270024766

In [24]:
k = 2
GM=GaussianMixture(n_components=k,random_state=22)
GM.fit(X)

label_gm = GM.predict(X)

In [25]:
print list(label_gm).count(0)
print list(label_gm).count(1)
print list(label_gm).count(2)
print list(label_gm).count(3)
print list(label_gm).count(4)


134
35
0
0
0

Isolation Forest


In [26]:
### 3) Isolation Foreset
from sklearn.ensemble import IsolationForest

# fit the model
ISF = IsolationForest(max_samples=100, random_state=22)
ISF.fit(X)

## compute anomaly score of the input. The lower, the more abnormal.
score_isf = ISF.decision_function(X)

In [27]:
score_isf.argmin()


Out[27]:
69

KNN Distance


In [28]:
def dist2knn(x, nn, k):
    dist_ = []
    for i in range(len(nn)):
        dist_.append(distance.euclidean(x, nn.iloc[i,:]))
    dist_.sort()
    return sum(dist_[:k+1]) # +1: remove 0 self

In [29]:
dist_sum_knn = []
for i in range(len(X)):
    print '\r{}%'.format(100.0*(i+1)/len(X)),
    dist_sum_knn.append(dist2knn(X.iloc[i,:], X, 5))


100.0%

In [ ]:

Output


In [30]:
result = pd.DataFrame(index=X.index)

In [31]:
result['clusters_sp'] = labels_spectural
result['clusters_km'] = labels_km
result['distance_km'] = km_anomalies['distance_KM']
result['clusters_gm'] = label_gm
result['scores_isf'] = score_isf
result['distance_knn'] = dist_sum_knn

In [32]:
result


Out[32]:
clusters_sp clusters_km distance_km clusters_gm scores_isf distance_knn
01aNlDhbMObjc9OdAHuNpQ 0 2 NaN 0 0.089663 9.196152
VzUo-RURV3VnfNItAYM8yg 0 0 NaN 0 0.013683 10.944272
3GfdCuI0YCc5U3rLLLPHUw 0 2 NaN 0 0.045325 10.236068
H_eO04NZAQIDcbtFQ4BUag 1 0 NaN 0 0.087450 8.660254
2xbxXWeu3tpZEJdGTTGbLg 0 1 1.663358 0 0.086864 9.668288
N3zuaqGESF5iZsi_md9c1Q 0 2 NaN 0 0.072284 9.732051
BKg8YIGX_5YyUczmBAyyCQ 0 0 NaN 0 0.105013 8.928203
kABF0hYfAEnl166mn1zR1A 0 2 NaN 0 0.141218 7.071068
5OqrwhtZ3mcmUSwLINZTWQ 0 1 NaN 1 -0.036444 11.391274
ky5L-EfUwU9chSPcIeXM5w 0 0 NaN 0 0.127891 6.656854
WSGHEQdcdbBWXDpna99EiQ 0 0 NaN 0 0.108063 7.610366
W8xPHUqL088jdcmNZyH-eA 0 0 NaN 0 0.105383 9.414214
wpnLPAnkMEb1NpC3xiD_qg 0 0 NaN 0 0.017324 11.157694
A6lKCuTrDSJ_eFKyumZCJQ 0 0 NaN 0 0.073273 10.708204
YkOCo5ipV2he2WXIAlZb-A 0 0 NaN 0 0.103053 7.706742
HyoPFfBHLK0lKVHWEdFpvQ 0 0 NaN 0 0.067762 10.204187
eBxQzBFLEPwRCxuiL50SQA 0 1 NaN 1 0.052726 11.820605
MpT8KheTjs7TSDWIoMUFxg 0 0 NaN 0 0.059467 8.700170
aTbz9GdpAf2M5MeAHVNhWw 0 2 NaN 0 0.067115 10.676323
hf761YTEXm0B1GdtGHGGYQ 0 0 NaN 0 0.094983 10.204187
1ZnVfS-qP19upP_fwOhZsA 0 0 NaN 0 0.130772 7.071068
eV-5b1Bjtnn-QePN3t3taQ 0 2 NaN 0 0.120631 6.656854
HPLB1Tr8ZB5UkD-2umKPdA 0 2 NaN 0 0.132974 6.656854
ABlNn6tyCTfjiKteYnp53g 0 0 NaN 0 0.082698 8.878315
2PS9kBbuJcmBhcNp-D62uA 0 0 NaN 0 0.081908 9.732051
-sN_og_84rLW22CaE3cjVw 0 2 NaN 0 0.065244 10.236068
w6zW6gIyg1sI5V6Wag_SYg 0 2 NaN 0 0.119115 7.706742
rIeXMt92PLNjZbemWKRXcA 0 2 NaN 0 0.113818 8.024580
B_WggEKFq-ZFNui8CHPYvA 0 0 NaN 0 0.116683 6.656854
XIDhO9YAhsCa0upSkMhGuw 0 1 NaN 1 0.020693 11.080520
... ... ... ... ... ... ...
A5Rkh7UymKm0_Rxm9K2PJw 0 2 NaN 0 0.020272 12.426550
WRQ-9LluyivReFiQZFUujw 0 0 NaN 0 0.109342 6.242641
O3OH5IEFMPtz7mPKakPZ3Q 0 2 NaN 0 0.063558 9.732051
xxAGaqYnboeR0SilS7DbAg 0 0 NaN 1 0.047351 12.426550
eJPckkV1k7gn0V2awA1pzA 0 0 NaN 0 0.099795 7.706742
JdoBiBRgeS_KKaNaU1sofA 0 2 NaN 0 0.128551 7.071068
lmxA0dJM0XsPCIHPXhEQ-g 0 1 NaN 1 -0.018781 12.362570
Cl-xl1vTUwHeaGgBxzdTRA 0 2 NaN 0 0.111801 8.928203
PglC8rgguMIlT621p1BLdQ 0 2 NaN 0 0.097091 9.146264
Zua91nLoti4rx1VCkuIe6g 0 1 NaN 1 -0.018843 12.497732
pQ6e4fjq6kqRqLE6w8CfWQ 0 2 NaN 0 0.139344 7.928203
beuVp5CZxCdNvQIIPBS2rw 0 2 NaN 0 0.062357 10.440255
FPAOrUsP7GuIyqmK9UoPUQ 1 0 NaN 0 0.078042 8.560478
-Uix-n4Jqo4W7ERagC5qAA 1 0 2.568902 0 -0.013621 13.032495
4JkGjRRc3XucG9U8XwthGg 0 0 NaN 0 0.135239 5.828427
XKo3y41cF6euqS7I6AX_1Q 0 0 NaN 0 0.080824 10.000000
0xvdC8F0HmFpAFkJk6nXLg 0 2 NaN 0 0.051095 10.000000
ywtDqnrLeWzG7Q-9ORmSsA 0 0 NaN 0 0.121607 6.656854
czyhvf93Txsfxdxqcm16mA 0 0 NaN 0 0.147601 4.000000
D6K3VT6S8FwFm4u5uN8T8g 0 2 NaN 0 0.105825 6.974691
H1NqpoM2hT_A6M98PSrZvw 0 0 NaN 0 0.087609 8.560478
01VPJkcU0m8WzlA7dCEpVQ 0 0 NaN 0 0.084024 9.196152
Vs7gc9EE3k9wARuUcN9piA 0 2 NaN 0 0.142358 4.828427
Gh1BoQNMGkh91pSHqvDRAA 0 2 NaN 0 0.146998 4.414214
oyPvqoQhsdkfUeHtdPD7BQ 0 2 NaN 0 0.134607 7.388905
DIxgItsSI9QwX9H8lVptYg 0 0 NaN 0 0.081862 9.196152
2reHgsuAWofHfocfJ4Xs5w 0 0 NaN 0 0.073752 9.146264
bM-rAr7gxsxxjD-FlRcIJA 0 1 NaN 1 -0.017160 14.656854
JPfi__QJAaRzmfh5aOyFEw 0 0 NaN 0 0.084706 9.968119
w5CSi-An5meLnxjKSFn0wQ 0 2 NaN 0 0.106556 8.660254

169 rows × 6 columns


In [33]:
result.to_csv('LasVegas_chi_results.csv', index_label=False, encoding='utf-8' )

In [34]:
anomaly_merged = pd.concat([df_select, result], axis=1)

In [35]:
anomaly_merged['distance_km'].sort_values()


Out[35]:
2xbxXWeu3tpZEJdGTTGbLg    1.663358
-Uix-n4Jqo4W7ERagC5qAA    2.568902
IZivKqtHyz4-ts8KsnvMrA    2.576474
y8d90Pt16Nip-B5UXWBP-w    2.593608
UTXNr62dGSK-tet8OeXUcQ    2.758289
zjvnqTjBp56NhMp1GrlO5g    2.965984
01aNlDhbMObjc9OdAHuNpQ         NaN
VzUo-RURV3VnfNItAYM8yg         NaN
3GfdCuI0YCc5U3rLLLPHUw         NaN
H_eO04NZAQIDcbtFQ4BUag         NaN
N3zuaqGESF5iZsi_md9c1Q         NaN
BKg8YIGX_5YyUczmBAyyCQ         NaN
kABF0hYfAEnl166mn1zR1A         NaN
5OqrwhtZ3mcmUSwLINZTWQ         NaN
ky5L-EfUwU9chSPcIeXM5w         NaN
WSGHEQdcdbBWXDpna99EiQ         NaN
W8xPHUqL088jdcmNZyH-eA         NaN
wpnLPAnkMEb1NpC3xiD_qg         NaN
A6lKCuTrDSJ_eFKyumZCJQ         NaN
YkOCo5ipV2he2WXIAlZb-A         NaN
HyoPFfBHLK0lKVHWEdFpvQ         NaN
eBxQzBFLEPwRCxuiL50SQA         NaN
MpT8KheTjs7TSDWIoMUFxg         NaN
aTbz9GdpAf2M5MeAHVNhWw         NaN
hf761YTEXm0B1GdtGHGGYQ         NaN
1ZnVfS-qP19upP_fwOhZsA         NaN
eV-5b1Bjtnn-QePN3t3taQ         NaN
HPLB1Tr8ZB5UkD-2umKPdA         NaN
ABlNn6tyCTfjiKteYnp53g         NaN
2PS9kBbuJcmBhcNp-D62uA         NaN
                            ...   
yYMTMtguT90BfMJVxasFuw         NaN
A5Rkh7UymKm0_Rxm9K2PJw         NaN
WRQ-9LluyivReFiQZFUujw         NaN
O3OH5IEFMPtz7mPKakPZ3Q         NaN
xxAGaqYnboeR0SilS7DbAg         NaN
eJPckkV1k7gn0V2awA1pzA         NaN
JdoBiBRgeS_KKaNaU1sofA         NaN
lmxA0dJM0XsPCIHPXhEQ-g         NaN
Cl-xl1vTUwHeaGgBxzdTRA         NaN
PglC8rgguMIlT621p1BLdQ         NaN
Zua91nLoti4rx1VCkuIe6g         NaN
pQ6e4fjq6kqRqLE6w8CfWQ         NaN
beuVp5CZxCdNvQIIPBS2rw         NaN
FPAOrUsP7GuIyqmK9UoPUQ         NaN
4JkGjRRc3XucG9U8XwthGg         NaN
XKo3y41cF6euqS7I6AX_1Q         NaN
0xvdC8F0HmFpAFkJk6nXLg         NaN
ywtDqnrLeWzG7Q-9ORmSsA         NaN
czyhvf93Txsfxdxqcm16mA         NaN
D6K3VT6S8FwFm4u5uN8T8g         NaN
H1NqpoM2hT_A6M98PSrZvw         NaN
01VPJkcU0m8WzlA7dCEpVQ         NaN
Vs7gc9EE3k9wARuUcN9piA         NaN
Gh1BoQNMGkh91pSHqvDRAA         NaN
oyPvqoQhsdkfUeHtdPD7BQ         NaN
DIxgItsSI9QwX9H8lVptYg         NaN
2reHgsuAWofHfocfJ4Xs5w         NaN
bM-rAr7gxsxxjD-FlRcIJA         NaN
JPfi__QJAaRzmfh5aOyFEw         NaN
w5CSi-An5meLnxjKSFn0wQ         NaN
Name: distance_km, dtype: float64

In [43]:
anomaly_merged['distance_knn'].sort_values()


Out[43]:
yYMTMtguT90BfMJVxasFuw     4.000000
czyhvf93Txsfxdxqcm16mA     4.000000
Gh1BoQNMGkh91pSHqvDRAA     4.414214
8FUzCm4Q13MJMX7N6ftmQw     4.414214
arv3T_av9OoVo3SvxV4TXw     4.828427
Vs7gc9EE3k9wARuUcN9piA     4.828427
YI08egmS9lWWB6N6MmrePw     5.242641
gWeYW0E5Tfmmj_9fugfLNw     5.242641
zdE82PiD6wquvjYLyhOJNA     5.414214
oAngC9V69vyzBVYwKEJQaw     5.414214
MyTsAeShB9pcWP84GSTAdw     5.828427
Os1n1_idfw9vv9kwULGJnQ     5.828427
4JkGjRRc3XucG9U8XwthGg     5.828427
IS_IWMsFRJii__17LsP1Og     6.242641
WRQ-9LluyivReFiQZFUujw     6.242641
9tLCuUmw3Tlco-tKPGkIWQ     6.242641
ZuNJelvkJD4wsXNBRg6t5w     6.242641
V-0qRzBHKixmQgon_fW_AA     6.242641
eiTWTVThkHr-DKXxIysbgA     6.560478
ky5L-EfUwU9chSPcIeXM5w     6.656854
ywtDqnrLeWzG7Q-9ORmSsA     6.656854
t6ZIBNrQjvtwor8W-u3sUg     6.656854
eV-5b1Bjtnn-QePN3t3taQ     6.656854
HPLB1Tr8ZB5UkD-2umKPdA     6.656854
_8VkSb_Mryb6bHQxSWB7Pg     6.656854
IkaII0DYh1OPGRnqx2ccvA     6.656854
B_WggEKFq-ZFNui8CHPYvA     6.656854
JJAwT9R6Fp2yyWHtWFA8uQ     6.878315
D6K3VT6S8FwFm4u5uN8T8g     6.974691
JdoBiBRgeS_KKaNaU1sofA     7.071068
                            ...    
9mQGAVvNsUULaEPQpG9LuA    11.567377
jScBTQtdAt-8RshaiBEHgw    11.584537
OOHDPE3tilNC4md8Lcx_4Q    11.584537
xEH00X_VoRepWKXQrragmw    11.820605
eBxQzBFLEPwRCxuiL50SQA    11.820605
YTfXZ9tbTBJkWsDI7cMDWQ    11.820605
Er5XAY2UFGGqBHdqpeCFkA    11.922533
w0ArxcJ0XsyK1ZwrIf3gpA    12.016867
0i9S0BejjRv0ZDwdO9XymA    12.034027
Ec9CBmL3285XkeHaNp-bSQ    12.247449
lmxA0dJM0XsPCIHPXhEQ-g    12.362570
A5Rkh7UymKm0_Rxm9K2PJw    12.426550
xxAGaqYnboeR0SilS7DbAg    12.426550
Zua91nLoti4rx1VCkuIe6g    12.497732
PYIk6aWfw8B8bO1mTn2rpQ    12.622812
y8d90Pt16Nip-B5UXWBP-w    12.622812
l3joBBpkq0ib11dKUpKMAw    12.622812
Fanu7vgveGy1LyLV8R9aMA    12.774742
SLx-vB9LnRtC83oAeVdAuA    12.988163
1Df5WnLX3DqN6ymlhqznaQ    13.001749
-Uix-n4Jqo4W7ERagC5qAA    13.032495
r05kbKriZ3Z56Nre1WQIDg    13.228757
-CQokjildrY7UZezXCdEBw    13.538674
T419Y3fiJW9EuFDkFougVA    13.580522
9cw870n3gsGikVoOT5YOYg    13.776784
IQnUxhO7oieEP7-5ZVJ4uA    14.142136
UTXNr62dGSK-tet8OeXUcQ    14.302606
bM-rAr7gxsxxjD-FlRcIJA    14.656854
3pSUr_cdrphurO6m1HMP9A    14.828427
zjvnqTjBp56NhMp1GrlO5g    16.274430
Name: distance_knn, dtype: float64

In [42]:
anomaly_merged['scores_isf'].sort_values()


Out[42]:
zjvnqTjBp56NhMp1GrlO5g   -0.066326
IRbgMt0cVuzK1KSppU16_Q   -0.051790
kICgNgVRkIh4doz0atyMMA   -0.049993
3pSUr_cdrphurO6m1HMP9A   -0.048296
5OqrwhtZ3mcmUSwLINZTWQ   -0.036444
-CQokjildrY7UZezXCdEBw   -0.034522
UTXNr62dGSK-tet8OeXUcQ   -0.030198
9cw870n3gsGikVoOT5YOYg   -0.024481
T419Y3fiJW9EuFDkFougVA   -0.021470
9mQGAVvNsUULaEPQpG9LuA   -0.019341
Zua91nLoti4rx1VCkuIe6g   -0.018843
lmxA0dJM0XsPCIHPXhEQ-g   -0.018781
PYIk6aWfw8B8bO1mTn2rpQ   -0.017927
bM-rAr7gxsxxjD-FlRcIJA   -0.017160
-Uix-n4Jqo4W7ERagC5qAA   -0.013621
xEH00X_VoRepWKXQrragmw   -0.010544
Fanu7vgveGy1LyLV8R9aMA   -0.009628
IZivKqtHyz4-ts8KsnvMrA   -0.005598
y8d90Pt16Nip-B5UXWBP-w   -0.004788
1Df5WnLX3DqN6ymlhqznaQ    0.003512
rbQOcy2n8dBPpm5ua3VU_Q    0.011956
VzUo-RURV3VnfNItAYM8yg    0.013683
IQnUxhO7oieEP7-5ZVJ4uA    0.014040
SLx-vB9LnRtC83oAeVdAuA    0.014722
Ec9CBmL3285XkeHaNp-bSQ    0.017006
wpnLPAnkMEb1NpC3xiD_qg    0.017324
w0ArxcJ0XsyK1ZwrIf3gpA    0.017484
A5Rkh7UymKm0_Rxm9K2PJw    0.020272
XIDhO9YAhsCa0upSkMhGuw    0.020693
pH0BLkL4cbxKzu471VZnuA    0.020922
                            ...   
gWeYW0E5Tfmmj_9fugfLNw    0.127180
ky5L-EfUwU9chSPcIeXM5w    0.127891
JdoBiBRgeS_KKaNaU1sofA    0.128551
vbVJzKDhHlhMnKRpES5QzQ    0.128685
TrN8HBHBL4-Tu7cXMDoopQ    0.129543
zdE82PiD6wquvjYLyhOJNA    0.129897
1ZnVfS-qP19upP_fwOhZsA    0.130772
C5azlhXSTOlqVAM3nl95nw    0.131628
Rv1IiQaIIDunqzS8dSvHCw    0.132203
0VjHFdczi6Nln_nn8bucJQ    0.132924
HPLB1Tr8ZB5UkD-2umKPdA    0.132974
vAh6WNioOEw7G9tGhd_JQw    0.133524
yBxzqKXHhvNVaFnEOd4f7Q    0.133972
MyTsAeShB9pcWP84GSTAdw    0.134241
oyPvqoQhsdkfUeHtdPD7BQ    0.134607
oAngC9V69vyzBVYwKEJQaw    0.134890
4JkGjRRc3XucG9U8XwthGg    0.135239
ZibmYdOPKLlqDM9oR6xzOA    0.135968
IS_IWMsFRJii__17LsP1Og    0.136437
_8VkSb_Mryb6bHQxSWB7Pg    0.136814
pQ6e4fjq6kqRqLE6w8CfWQ    0.139344
Os1n1_idfw9vv9kwULGJnQ    0.139575
ZuNJelvkJD4wsXNBRg6t5w    0.140763
kABF0hYfAEnl166mn1zR1A    0.141218
arv3T_av9OoVo3SvxV4TXw    0.142358
Vs7gc9EE3k9wARuUcN9piA    0.142358
Gh1BoQNMGkh91pSHqvDRAA    0.146998
8FUzCm4Q13MJMX7N6ftmQw    0.146998
yYMTMtguT90BfMJVxasFuw    0.147601
czyhvf93Txsfxdxqcm16mA    0.147601
Name: scores_isf, dtype: float64

In [46]:
anomaly_merged.loc['zjvnqTjBp56NhMp1GrlO5g', :]


Out[46]:
categories                        [Nightlife, Lounges, Bars, Restaurants, Mexica...
city                                                                      Las Vegas
hours                             [Monday 11:0-22:0, Tuesday 11:0-22:0, Wednesda...
is_open                                                                           1
latitude                                                                    36.1599
longitude                                                                  -115.152
name                                                         Mingo Kitchen & Lounge
neighborhood                                                               Downtown
postal_code                                                                   89101
review_count                                                                    169
stars                                                                             4
state                                                                            NV
AgesAllowed                                                                    True
Ambience_casual                                                                 NaN
Ambience_classy                                                            full_bar
Ambience_divey                                                                 True
Ambience_hipster                                                            average
Ambience_intimate                                                             False
Ambience_romantic                                                            casual
Ambience_touristy                                                              True
Ambience_trendy                                                                 NaN
Ambience_upscale                                                               True
BYOB                                                                           True
BYOBCorkage                                                                    True
BestNights_friday                                                             False
BestNights_monday                                                               NaN
BestNights_saturday                                                           False
BestNights_sunday                                                              True
BestNights_thursday                                                             NaN
BestNights_tuesday                                                            False
                                                        ...                        
DietaryRestrictions_soy-free                                                  False
DietaryRestrictions_vegan                                                       NaN
DietaryRestrictions_vegetarian                                                  NaN
GoodForMeal_breakfast                                                          True
GoodForMeal_brunch                                                             True
GoodForMeal_dessert                                                            True
GoodForMeal_dinner                                                            False
GoodForMeal_latenight                                                         False
GoodForMeal_lunch                                                             False
HappyHour                                                                      True
Music_background_music                                                         True
Music_dj                                                                        NaN
Music_jukebox                                                                 False
Music_karaoke                                                                   NaN
Music_live                                                                    False
Music_no_music                                                                 True
Music_video                                                                   False
OutdoorSeating                                                                False
RestaurantsDelivery                                                           False
RestaurantsGoodForGroups                                                      False
Smoking                                                                       False
review_count_greater_median                                                    True
cuisine_Chinese                                                                   2
spatial_label                                                                     7
clusters_sp                                                                       0
clusters_km                                                                       1
distance_km                                                                 2.96598
clusters_gm                                                                       1
scores_isf                                                               -0.0663256
distance_knn                                                                16.2744
Name: zjvnqTjBp56NhMp1GrlO5g, dtype: object

In [ ]: