In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import MiniBatchKMeans

In [2]:
train = pd.io.parsers.read_csv('data/train.csv')

In [3]:
train['created_time'] = pd.to_datetime(train['created_time'])

In [4]:
k = 4
kmeans = MiniBatchKMeans(n_clusters=k)
kmeans.fit(train[['longitude', 'latitude']])


Out[4]:
MiniBatchKMeans(batch_size=100, compute_labels=True, init='k-means++',
        init_size=None, max_iter=100, max_no_improvement=10, n_clusters=4,
        n_init=3, random_state=None, reassignment_ratio=0.01, tol=0.0,
        verbose=0)

In [5]:
preds = kmeans.predict(train[['longitude', 'latitude']])

In [10]:
plt.figure()
plt.scatter(train['longitude'], train['latitude'], c=preds)


Out[10]:
<matplotlib.collections.PathCollection at 0x10f12afd0>

In [11]:
plt.show()

In [ ]: