In [51]:
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import csv
%matplotlib inline
In [52]:
x=[]
y=[]
In [53]:
with open('userdata.csv', 'rb') as csvf:
reader = csv.reader(csvf, delimiter=',')
headers = next(reader)
for row in reader:
try:
x.append(float(row[5]))
y.append(float(row[7]))
except ValueError,e:
print "error",e,"on line",row
In [55]:
data=[]
for i in range(0,34):
data.append([x[i],y[i]])
In [60]:
plt.figure(figsize=(6,6))
plt.xlabel("cost",fontsize=14)
plt.ylabel("duration", fontsize=14)
plt.title("Before Clustering ", fontsize=20)
plt.plot(x, y, 'k.', color='#0080ff', markersize=30, alpha=0.6)
plt.show()
In [61]:
kmeans = KMeans(init='k-means++', n_clusters=3, n_init=10)
# kmeans = KMeans(init='random', n_clusters=3, n_init=10)
kmeans.fit(data)
Out[61]:
In [63]:
plt.figure(figsize=(6,6))
plt.xlabel("cost",fontsize=14)
plt.ylabel("duration", fontsize=14)
plt.title("After K-Means Clustering", fontsize=20)
plt.plot(x, y, 'k.', color='#ffaaaa', markersize=45, alpha=0.6)
# Plot the centroids as a blue X
centroids = kmeans.cluster_centers_
plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', s=200,
linewidths=3, color='b', zorder=10)
plt.show()