In [3]:
import os
import csv
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from time import time
from sklearn import metrics
from sklearn.cluster import KMeans
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
In [7]:
filename = '../csv/Fear199localeq.csv'
data = np.genfromtxt(
filename, # file name
skip_header=0, # lines to skip at the top
skip_footer=0, # lines to skip at the bottom
delimiter=',', # column delimiter
dtype='int32', # data type
filling_values=0, # fill missing values with 0
usecols = (0,1,2), # columns to read (omit brightness)
names=['X', 'Y', 'Z']) # column names
In [8]:
minimum_x = np.amin(data['X'])
maximum_x = np.amax(data['X'])
minimum_y = np.amin(data['Y'])
maximum_y = np.amax(data['Y'])
minimum_z = np.amin(data['Z'])
maximum_z = np.amax(data['Z'])
print minimum_x
print minimum_y
print minimum_z
print len(data)
In [9]:
# I followed this tutorial: http://scikit-learn.org/stable/tutorial/basic/tutorial.html
In [10]:
# We have 9651 (len(data)) samples, and 3 features (X, Y, Z)
# Thus, data should be a (9651 x 3) array
In [11]:
xcoord = np.array([0])
ycoord = np.array([0])
zcoord = np.array([0])
for i in range(0, len(data)):
xcoord = np.insert(xcoord, i, data['X'][i])
ycoord = np.insert(ycoord, i, data['Y'][i])
zcoord = np.insert(zcoord, i, data['Z'][i])
i = i + 1
print xcoord
print len(xcoord)
print ycoord
print len(ycoord)
print zcoord
print len(zcoord)
xcoordT = np.transpose(xcoord)
ycoordT = np.transpose(ycoord)
zcoordT = np.transpose(zcoord)
print xcoordT
print np.shape(xcoordT)
print np.shape(ycoordT)
print np.shape(zcoordT)
print type(xcoordT)
In [12]:
final = np.column_stack((xcoordT,ycoordT))
final = np.column_stack((final,zcoordT))
print final
In [13]:
print len(final)
dataSAMP = scale(final)
n_samples, n_features = dataSAMP.shape
#Correct number of n_samples (9652) and n_features (3)
print dataSAMP
print n_samples
print n_features
In [14]:
def bench_k_means(estimator, name, data):
t0 = time()
estimator.fit(dataSAMP)
In [15]:
bench_k_means(KMeans(init='k-means++', n_clusters=2, n_init=10),
name="k-means++", data=dataSAMP)
bench_k_means(KMeans(init='random', n_clusters=2, n_init=10),
name="random", data=dataSAMP)
print(79 * '_')
In [16]:
kmeans = KMeans(init ='k-means++', n_clusters = 3, n_init=10)
fitted = kmeans.fit(dataSAMP)
In [28]:
centroids = kmeans.cluster_centers_
print type(centroids)
#In 3D:
ax = Axes3D(plt.gcf())
ax.scatter(dataSAMP[:, 0], dataSAMP[:, 1], dataSAMP[:, 2],
s = 5, marker = 'x', color = 'white')
ax.scatter(centroids[:, 0], centroids[:, 1], centroids[:, 2],
marker='o', s=30, linewidths=3,
color='orange', zorder=10)
plt.savefig(filename + 'kmeans.png', bbox_inches='tight')
#In 2D Scatter:
#plt.plot(dataSAMP[:, 0], dataSAMP[:, 1], 'k.', markersize=2)
#plt.scatter(centroids[:, 0], centroids[:, 1],
# marker='x', s=169, linewidths=3,
# color='blue', zorder=10)
#plt.savefig(filename + 'kmeans.png', bbox_inches='tight')
In [ ]:
In [ ]: