In [3]:
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
%matplotlib inline
In [4]:
df = pd.read_pickle('../data_processeing/Yelp_Cuisine_Chinese.pkl')
In [41]:
df_spatial = df[['latitude', 'longitude', 'cuisine_Chinese']]
In [ ]:
11
#5F4690, #1D6996, #38A6A5, #0F8554, #73AF48, #EDAD08, #E17C05, #CC503E, #94346E, #6F4070, #666666
In [46]:
kmeans = KMeans(n_clusters=11, random_state=5).fit(df[['latitude', 'longitude']])
In [ ]:
for
In [47]:
df_spatial['label'] = kmeans.labels_
In [48]:
df_spatial[df_spatial['label'] == 0].plot(x='longitude', y='latitude', kind='scatter')
Out[48]:
In [35]:
range(11)
Out[35]:
In [49]:
for i in range(11):
temp = df_spatial[df_spatial['cuisine_Chinese'] == 2]
print len(temp[temp['label'] == i])
In [45]:
for i in range(11):
temp = df_spatial[df_spatial['cuisine_Chinese'] == 2]
print len(temp[temp['label'] == i])
In [39]:
for i in range(11):
df_spatial[df_spatial['label'] == i].plot(x='longitude', y='latitude', kind='scatter')
In [29]:
df_spatial.plot(x='longitude', y='latitude', kind='scatter', c='#5F4690')
Out[29]:
In [26]:
kmeans.labels_
Out[26]:
In [50]:
Out[50]:
In [52]:
df_ = df_spatial.copy()
In [ ]:
In [54]:
df_ = df_[['label']]
In [57]:
df_.columns = ['spatial_label']
In [58]:
df_.to_pickle('../spatial_labels.pkl')
Out[58]:
In [ ]: