In [21]:
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt
import csv
%matplotlib inline
In [22]:
x=[]
y=[]
with open('userdata.csv', 'rb') as csvf:
reader = csv.reader(csvf, delimiter=',')
headers = next(reader)
for row in reader:
try:
x.append(float(row[5]))
y.append(float(row[7]))
except ValueError,e:
print "error",e,"on line",row
In [23]:
data=[]
for i in range(0,34):
data.append([x[i],y[i]])
In [41]:
dbscan = DBSCAN(random_state=111)
In [42]:
dbscan
Out[42]:
In [43]:
dbscan.fit(data)
Out[43]:
In [55]:
dbscan.labels_
Out[55]:
In [78]:
for i in range(0, 34):
if dbscan.labels_[i] == 0:
c1 = plt.scatter(data[i][0],data[i][1],c='r',marker='+', s=200)
elif dbscan.labels_[i] == 1:
c2 = plt.scatter(data[i][0],data[i][1],c='g',marker='o', s=200)
elif dbscan.labels_[i] == 2:
c3 = plt.scatter(data[i][0],data[i][1],c='y',marker='x', s=200)
elif dbscan.labels_[i] == -1:
c4 = plt.scatter(data[i][0],data[i][1],c='b',marker='*', s=200)
plt.legend([c1, c2, c3, c4], ['Cluster 1', 'Cluster 2','Cluster 3','Noise'])
plt.title('DBSCAN finds 3 clusters and noise')
plt.show()
In [ ]: