In [3]:
import os
import csv
import numpy as np
import matplotlib.pyplot as plt

from mpl_toolkits.mplot3d import Axes3D
from time import time
from sklearn import metrics
from sklearn.cluster import KMeans
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale

In [7]:
filename = '../csv/Fear199localeq.csv'

data = np.genfromtxt(
    filename,                              # file name
    skip_header=0,                         # lines to skip at the top
    skip_footer=0,                         # lines to skip at the bottom
    delimiter=',',                         # column delimiter
    dtype='int32',                         # data type
    filling_values=0,                      # fill missing values with 0
    usecols = (0,1,2),                     # columns to read (omit brightness)
    names=['X', 'Y', 'Z'])                 # column names

In [8]:
minimum_x = np.amin(data['X'])
maximum_x = np.amax(data['X'])

minimum_y = np.amin(data['Y'])
maximum_y = np.amax(data['Y'])

minimum_z = np.amin(data['Z'])
maximum_z = np.amax(data['Z'])

print minimum_x
print minimum_y
print minimum_z

print len(data)


0
0
27
9651

In [9]:
# I followed this tutorial: http://scikit-learn.org/stable/tutorial/basic/tutorial.html

In [10]:
# We have 9651 (len(data)) samples, and 3 features (X, Y, Z)
# Thus, data should be a (9651 x 3) array

In [11]:
xcoord = np.array([0])
ycoord = np.array([0])
zcoord = np.array([0])

for i in range(0, len(data)):
    xcoord = np.insert(xcoord, i, data['X'][i])
    ycoord = np.insert(ycoord, i, data['Y'][i])
    zcoord = np.insert(zcoord, i, data['Z'][i])
    i = i + 1
    
print xcoord
print len(xcoord)

print ycoord
print len(ycoord)

print zcoord
print len(zcoord)

xcoordT = np.transpose(xcoord)
ycoordT = np.transpose(ycoord)
zcoordT = np.transpose(zcoord)
print xcoordT
print np.shape(xcoordT)
print np.shape(ycoordT)
print np.shape(zcoordT)
print type(xcoordT)


[  0  82  85 ..., 503 523   0]
9652
[  0  91 114 ..., 143 119   0]
9652
[1216  209   60 ..., 1214  644    0]
9652
[  0  82  85 ..., 503 523   0]
(9652,)
(9652,)
(9652,)
<type 'numpy.ndarray'>

In [12]:
final = np.column_stack((xcoordT,ycoordT))
final = np.column_stack((final,zcoordT))
print final


[[   0    0 1216]
 [  82   91  209]
 [  85  114   60]
 ..., 
 [ 503  143 1214]
 [ 523  119  644]
 [   0    0    0]]

In [13]:
print len(final)
dataSAMP = scale(final)
n_samples, n_features = dataSAMP.shape

#Correct number of n_samples (9652) and n_features (3)
print dataSAMP
print n_samples
print n_features


9652
[[-4.85167244 -1.47636443  3.18641629]
 [-3.56174336 -0.81134188 -3.36715879]
 [-3.51455084 -0.64325925 -4.33685361]
 ..., 
 [ 3.06094127 -0.43132899  3.17340026]
 [ 3.37555812 -0.60671955 -0.53617054]
 [-4.85167244 -1.47636443 -4.72733475]]
9652
3
/Users/Tony/anaconda/lib/python2.7/site-packages/sklearn/utils/validation.py:420: DataConversionWarning: Data with input dtype int64 was converted to float64 by the scale function.
  warnings.warn(msg, DataConversionWarning)

In [14]:
def bench_k_means(estimator, name, data):
    t0 = time()
    estimator.fit(dataSAMP)

In [15]:
bench_k_means(KMeans(init='k-means++', n_clusters=2, n_init=10),
              name="k-means++", data=dataSAMP)

bench_k_means(KMeans(init='random', n_clusters=2, n_init=10),
              name="random", data=dataSAMP)
print(79 * '_')


_______________________________________________________________________________

In [16]:
kmeans = KMeans(init ='k-means++', n_clusters = 3, n_init=10)
fitted = kmeans.fit(dataSAMP)

In [28]:
centroids = kmeans.cluster_centers_
print type(centroids)

#In 3D:
ax = Axes3D(plt.gcf())
ax.scatter(dataSAMP[:, 0], dataSAMP[:, 1], dataSAMP[:, 2],
          s = 5, marker = 'x', color = 'white')
ax.scatter(centroids[:, 0], centroids[:, 1], centroids[:, 2],
            marker='o', s=30, linewidths=3,
            color='orange', zorder=10)
plt.savefig(filename + 'kmeans.png', bbox_inches='tight')

#In 2D Scatter:
#plt.plot(dataSAMP[:, 0], dataSAMP[:, 1], 'k.', markersize=2)
#plt.scatter(centroids[:, 0], centroids[:, 1],
#            marker='x', s=169, linewidths=3,
#            color='blue', zorder=10)
#plt.savefig(filename + 'kmeans.png', bbox_inches='tight')


<type 'numpy.ndarray'>

In [ ]:


In [ ]: