In [1]:
%load_ext watermark
In [18]:
%watermark -v -u -d -p numpy,scipy,scikit-learn,matplotlib
In [25]:
import numpy as np
import sklearn as sk
In [35]:
## Read in the data file and cluster membrships
df = np.loadtxt("sample.data/df.csv", delimiter=',', skiprows=1)
n = df.shape[0]
p1 = np.loadtxt("sample.data/p1_memb.csv", delimiter=',', skiprows=1)
p2 = np.loadtxt("sample.data/p2_memb.csv", delimiter=',', skiprows=1)
p3 = np.loadtxt("sample.data/p3_memb.csv", delimiter=',', skiprows=1)
In [50]:
## define similarity of two points i,j by their membrship vectors
def similarity_memb(wi, wj):
sij = np.dot(wi, wj)
return (sij)# / (np.sqrt(np.dot(wi,wi) * np.dot(wj,wj))))
similarity_memb(p1[1,:], p1[2,:])
Out[50]:
In [55]:
## assign each sample point to an individual cluster
pt_memb = np.repeat(-1, n)
nclust = 0
for i in range(0, n):
for j in range(i+1, n):
s1 = similarity_memb(p1[i,:], p1[j,:])
s2 = similarity_memb(p2[i,:], p2[j,:])
s3 = similarity_memb(p3[i,:], p3[j,:])
stot = s1 + s2 + s3
if stot > 2.1:
if pt_memb[i] == -1 and pt_memb[j]==-1:
nclust += 1
pt_memb[i] = pt_memb[j] = nclust
elif pt_memb[i] == -1:
pt_memb[i] = pt_memb[j]
elif pt_memb[j] == -1:
pt_memb[j] = pt_memb[i]
elif pt_memb[i] != pt_memb[j]:
nclust -= 1
pt_memb[pt_memb == pt_memb[j]] = pt_memb[i]
print(n)
print(nclust)
print(pt_memb[:])
In [ ]: