In [119]:
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
%load_ext sql
%pylab inline
from sklearn.preprocessing import scale


The sql extension is already loaded. To reload it, use:
  %reload_ext sql
Populating the interactive namespace from numpy and matplotlib

In [26]:
%sql postgresql://sharknado:sharknado@sharknado-dse.ceg3hdkdq8l0.us-east-1.rds.amazonaws.com/sharknado


Out[26]:
u'Connected: sharknado@sharknado'

In [121]:
result = %sql select * from panel_demos_11


4171 rows affected.

In [122]:
panelist_df = result.DataFrame()

In [ ]:


In [ ]:


In [ ]:


In [123]:
panelist_df.set_index(['panelist_id'], inplace=True)
panelist_df.fillna(panelist_df.mean(), inplace=True)
panelist_df = scale(panelist_df)

In [ ]:


In [ ]:

PCA


In [126]:
from sklearn.decomposition import PCA
pca = PCA(n_components=None)
pca.fit(panelist_df)


Out[126]:
PCA(copy=True, n_components=None, whiten=False)

In [127]:
cumulative_explained = np.cumsum(pca.explained_variance_ratio_) 
plt.plot(cumulative_explained);
plt.grid()



In [ ]:


In [128]:
plt.figure(1, figsize=(8, 6))
plt.clf()
plt.axes([.2, .2, .7, .7])
plt.plot(pca.explained_variance_ratio_, linewidth=2)
plt.axis('tight')
plt.xlabel('n_components')
plt.ylabel('explained_variance_')
plt.grid()



In [129]:
plt.plot(pca.components_[0,:])
plt.plot(pca.components_[1,:])
plt.plot(pca.components_[2,:])


Out[129]:
[<matplotlib.lines.Line2D at 0x111b18e10>]

In [154]:
reduced_data = PCA(n_components=3).fit_transform(panelist_df)
reduced_data.shape


Out[154]:
(4171, 3)

KMeans


In [160]:
kmeans_model = KMeans(n_clusters=1, random_state=1, init='k-means++')
kmeans_model.fit(reduced_data)


Out[160]:
KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=1, n_init=10,
    n_jobs=1, precompute_distances='auto', random_state=1, tol=0.0001,
    verbose=0)

In [161]:
pd.Series(kmeans_model.labels_).value_counts()


Out[161]:
0    4171
dtype: int64

In [165]:
fig = plt.figure(1, figsize=(8, 6))
plt.clf()
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
plt.cla()
labels = kmeans_model.labels_
ax.scatter(reduced_data[:, 0], reduced_data[:, 1], reduced_data[:, 2], c=labels.astype(np.float), marker='x')
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('z')


Out[165]:
<matplotlib.text.Text at 0x114855250>

In [ ]:


In [ ]: