In [1]:
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
import pandas as pd
import numpy as np
import random
In [2]:
z1=[]
z2=[]
k1=[55,15]
k2=[15,35]
for i in range(100):
z1.append(random.randint(40,70))
z1.append(random.randint(0,30))
z2.append(random.randint(25,45))
z2.append(random.randint(0,20))
In [3]:
plt.scatter(z1,z2,color='g')
plt.scatter(k1[0],k1[1],color='r',marker='x')
plt.scatter(k2[0],k2[1],color='c',marker='x')
plt.show()
In [4]:
def kmeanscluster(z1,z2,k1,k2,iterate):
new_k1=k1
new_k2=k2
for i in range(iterate):
[ new_k1, new_k2]=clusteralgo(z1,z2,new_k1,new_k2)
return (new_k1,new_k2)
In [5]:
def clusteralgo(z1,z2,new_k1,new_k2):
x1=[]
x2=[]
y1=[]
y2=[]
for j in range(len(z1)):
if np.linalg.norm(np.array([z1[j],z2[j]])-np.array(new_k1)) < np.linalg.norm(np.array([z1[j],z2[j]])-np.array(new_k2)):
x1.append(z1[j])
x2.append(z2[j])
else:
y1.append(z1[j])
y2.append(z2[j])
plt.scatter(new_k1[0],new_k1[1],color='r',marker='x')
plt.scatter(new_k2[0],new_k2[1],color='c',marker='x')
new_k1[0]=sum(x1)/len(x1)
new_k1[1]=sum(x2)/len(x2)
new_k2[0]=sum(y1)/len(y1)
new_k2[1]=sum(y2)/len(y2)
plt.scatter(x1,x2,color='r')
plt.scatter(y1,y2,color='c')
plt.show()
return [new_k1,new_k2]
In [6]:
[b,m]=kmeanscluster(z1,z2,k1,k2,5)
In [ ]: