In [5]:
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import csv
%matplotlib inline
In [6]:
x=[]
In [7]:
with open('userdata.csv', 'rb') as csvf:
reader = csv.reader(csvf, delimiter=',')
headers = next(reader)
for row in reader:
try:
x.append(float(row[8]))
except ValueError,e:
print "error",e,"on line",row
In [8]:
import numpy as np
from sklearn.cluster import MeanShift, estimate_bandwidth
In [10]:
X = np.array(zip(x,np.zeros(len(x))), dtype=np.int)
bandwidth = estimate_bandwidth(X, quantile=0.2)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
labels = ms.labels_
cluster_centers = ms.cluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
for k in range(n_clusters_):
my_members = labels == k
print "cluster {0}: {1}".format(k, X[my_members, 0])
In [ ]: