notebook.community

Edit and run



In [51]:

    
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import csv

%matplotlib inline



In [52]:

    
x=[]
y=[]



In [53]:

    
with open('userdata.csv', 'rb') as csvf:
    reader = csv.reader(csvf, delimiter=',')
    headers = next(reader)
    for row in reader:
        try:
            x.append(float(row[5]))
            y.append(float(row[7]))
        except ValueError,e:
            print "error",e,"on line",row



In [55]:

    
data=[]
for i in range(0,34):
  data.append([x[i],y[i]])



In [60]:

    
plt.figure(figsize=(6,6))

plt.xlabel("cost",fontsize=14)
plt.ylabel("duration", fontsize=14)

plt.title("Before Clustering ", fontsize=20)

plt.plot(x, y, 'k.', color='#0080ff', markersize=30, alpha=0.6)

plt.show()



In [61]:

    
kmeans = KMeans(init='k-means++', n_clusters=3, n_init=10)

# kmeans = KMeans(init='random', n_clusters=3, n_init=10)

kmeans.fit(data)









    Out[61]:





KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=3, n_init=10,
    n_jobs=1, precompute_distances='auto', random_state=None, tol=0.0001,
    verbose=0)



In [63]:

    
plt.figure(figsize=(6,6))

plt.xlabel("cost",fontsize=14)
plt.ylabel("duration", fontsize=14)

plt.title("After K-Means Clustering", fontsize=20)

plt.plot(x, y, 'k.', color='#ffaaaa', markersize=45, alpha=0.6)

# Plot the centroids as a blue X
centroids = kmeans.cluster_centers_

plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', s=200,
  linewidths=3, color='b', zorder=10)
plt.show()