In [1]:
import itertools
import csv
import numpy as np
from scipy import linalg
from scipy.stats import cumfreq
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn import mixture
from sklearn import cluster
from rpy2 import robjects as ro
from rpy2.robjects.packages import importr
mvn = importr('MVN')
%matplotlib inline
np.random.seed(1)
In [2]:
X = np.load('SynapseFeatures.npy')
print 'data loaded'
In [3]:
kmeans = cluster.MiniBatchKMeans(n_clusters=17)
ClusterIdx = kmeans.fit_predict(X)
print 'k-means complete'
In [4]:
from rpy2.robjects.packages import importr
mvn = importr('MVN')
In [ ]:
ro.r.assign('ClusterIdx',ClusterIdx.tolist())
ro.r('ClusterIdx <- unlist(ClusterIdx)')
ro.r.assign('Xr',ro.FloatVector(np.ravel(X)))
nr = X.shape[0]
ro.r.assign('nr',nr)
ro.r('Xr <- matrix(Xr, nrow = nr, byrow = TRUE)')
ro.r('''
for (i in seq(1,max(ClusterIdx)))
{pdf(paste("qq_plot_",i,".pdf"))
qqnorm(Xr[ClusterIdx==i,1:ncol(Xr)], main = "Normal Q-Q Plot",
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles",
plot.it = TRUE)
qqline(Xr[ClusterIdx==i,1:ncol(Xr)])
dev.off()
hzTestResult <- hzTest(Xr, qqplot = FALSE)
print(hzTestResult)
}
''')
#ro.r('pdf("qq_plot.pdf")')
#ro.r('''qqnorm(Xr, main = "Normal Q-Q Plot",
# xlab = "Theoretical Quantiles", ylab = "Sample Quantiles",
# plot.it = TRUE)''')
#ro.r('''qqline(Xr)''')
#ro.r('dev.off()')
In [8]:
ro.r('hzTestResult <- hzTest(Xr, qqplot = FALSE)')
hzTestStat = ro.r('hzTestResult')
In [11]:
ro.r('print(hzTestResult)')
Out[11]:
In [19]:
ro.r('print(length(ClusterIdx))')
ro.r('print(nrow(Xr))')
Out[19]: