In [25]:
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import MiniBatchKMeans
%pylab osx
In [2]:
train = pd.io.parsers.read_csv('data/train.csv')
In [3]:
train['created_time'] = pd.to_datetime(train['created_time'])
In [54]:
kmeans = MiniBatchKMeans(n_clusters=4)
kmeans.fit(train[['num_votes', 'num_comments', 'num_views']])
In [55]:
preds = kmeans.predict(train[['num_votes', 'num_comments', 'num_views']])
fig = plt.figure()
ax = fig.add_subplot(221, projection='3d')
ax.set_xlim(0,
ax.set_xlabel('votes')
ax.set_ylabel('comments')
ax.set_zlabel('views')
ax.scatter(train['num_votes'], train['num_comments'], train['num_views'], c=preds)
ax = fig.add_subplot(222)
ax.scatter(train['num_votes'], train['num_comments'], c=preds)
ax.set_xlabel('votes')
ax.set_ylabel('comments')
ax = fig.add_subplot(223)
ax.scatter(train['num_votes'], train['num_views'], c=preds)
ax.set_xlabel('votes')
ax.set_ylabel('num_views')
ax = fig.add_subplot(224)
ax.scatter(train['num_comments'], train['num_views'], c=preds)
ax.set_xlabel('num_comments')
ax.set_ylabel('num_views')
Out[55]:
In [ ]: