In [25]:
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import MiniBatchKMeans
%pylab osx


Populating the interactive namespace from numpy and matplotlib

In [2]:
train = pd.io.parsers.read_csv('data/train.csv')

In [3]:
train['created_time'] = pd.to_datetime(train['created_time'])

In [54]:
kmeans = MiniBatchKMeans(n_clusters=4)
kmeans.fit(train[['num_votes', 'num_comments', 'num_views']])

In [55]:
preds = kmeans.predict(train[['num_votes', 'num_comments', 'num_views']])

fig = plt.figure()
ax = fig.add_subplot(221, projection='3d')
ax.set_xlim(0, 
ax.set_xlabel('votes')
ax.set_ylabel('comments')
ax.set_zlabel('views')
ax.scatter(train['num_votes'], train['num_comments'], train['num_views'], c=preds)

ax = fig.add_subplot(222)
ax.scatter(train['num_votes'], train['num_comments'], c=preds)
ax.set_xlabel('votes')
ax.set_ylabel('comments')

ax = fig.add_subplot(223)
ax.scatter(train['num_votes'], train['num_views'], c=preds)
ax.set_xlabel('votes')
ax.set_ylabel('num_views')

ax = fig.add_subplot(224)
ax.scatter(train['num_comments'], train['num_views'], c=preds)
ax.set_xlabel('num_comments')
ax.set_ylabel('num_views')


Out[55]:
<matplotlib.text.Text at 0x160f384d0>

In [ ]: