In [5]:
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import csv

%matplotlib inline

In [6]:
x=[]

In [7]:
with open('userdata.csv', 'rb') as csvf:
    reader = csv.reader(csvf, delimiter=',')
    headers = next(reader)
    for row in reader:
        try:
            x.append(float(row[8]))
        except ValueError,e:
            print "error",e,"on line",row

In [8]:
import numpy as np
from sklearn.cluster import MeanShift, estimate_bandwidth

In [10]:
X = np.array(zip(x,np.zeros(len(x))), dtype=np.int)
bandwidth = estimate_bandwidth(X, quantile=0.2)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
labels = ms.labels_
cluster_centers = ms.cluster_centers_

labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)

for k in range(n_clusters_):
    my_members = labels == k
    print "cluster {0}: {1}".format(k, X[my_members, 0])


cluster 0: [17 17 17 17 17 17 17 15 18 21 17 22 20 20 20 20]
cluster 1: [31 31 31 31 29 31 31 29 31 27 27 30 27 27 29 27]
cluster 2: [60 63]

In [ ]: