In [113]:
import sys
sys.path.append('./../hyperAFM')
sys.path.append('./../Jessica')
sys.path.append('./../data')

import numpy as np
from util import HyperImage, load_ibw
from gen_features import get_hyper_peaks
from PCAsynthetic import makefeaturematrix, stdfeature
import FindPeaks 
#for standardizing 
from sklearn.preprocessing import scale

#PCA packages
from sklearn.decomposition import PCA as sklearnPCA
from matplotlib.mlab import PCA

#linear regression package
from sklearn import linear_model 

import matplotlib.pyplot as plt
%matplotlib inline

In [122]:
#import hyperspectral image 
hyperimage = HyperImage('./../../Desktop/20170706_0712_0726_P3HTPMMA_hyper/Film10_0026.txt')
hyper = hyperimage.hyper_image
wavelengths = hyperimage.wavelength_data

#import corresponding cAFM image
cAFMimage = load_ibw('./../../Desktop/20170724_0725_0726_PMMAP3HT_cAFM/cAFM3_0000.ibw')
cAFM = cAFMimage[:,:,3]
cAFMlist = cAFM.reshape((65536, 1))

In [3]:
peaklocs, averagespectrum = get_hyper_peaks(hyper, threshold = 0.1)

In [103]:
plt.plot(averagespectrum)
plt.title('Average Spectrum')
indices, peaksdict = FindPeaks.FindPeaks(averagespectrum, thres=0.1, min_dist=1)
indices


Out[103]:
array([ 62, 168, 199, 233, 294, 382, 418, 521], dtype=int64)

In [97]:
wavelengths[168]


Out[97]:
1539.3010752688199

In [15]:
#standardize feature matrix to unit variance and zero mean 
featurematrix = makefeaturematrix(hyper, averagespectrum)
featurematrix_std = stdfeature(featurematrix, axis = 0)
mean = featurematrix_std.mean(axis=0)
variance = featurematrix_std.std(axis=0)
print(mean, variance)


(array([  9.26536307e-15,   1.28552501e-14,   1.07664816e-14,
        -3.04528944e-14,  -4.60071027e-16,   1.52243838e-15,
         2.08173068e-14,  -1.79396598e-14]), array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.]))

In [46]:
#define number of principal components 
sklearn_pca = sklearnPCA(n_components=8)

#matrix with each sample in terms of the PCs
SkPC = sklearn_pca.fit_transform(featurematrix_std)

#covariance matrix 
Skcov = sklearn_pca.get_covariance()

#score matrix 
Skscore = sklearn_pca.score_samples(featurematrix_std)

#loadings
Skloadings = sklearn_pca.components_

#explained variance
Skvariance = sklearn_pca.explained_variance_
Skvarianceratio = sklearn_pca.explained_variance_ratio_

In [40]:
SkPCreshape = SkPC.reshape(256,256,8)

In [110]:
plt.imshow(SkPCreshape[:,:,0])
plt.colorbar()


Out[110]:
<matplotlib.colorbar.Colorbar at 0x393a0d68>

In [87]:
plt.scatter([0,1,2,3,4,5,6,7],Skloadings[0,:])


Out[87]:
<matplotlib.collections.PathCollection at 0x38d0c400>

Matlab's PCA


In [93]:
mlPCA = PCA(featurematrix_std)
#get projections of samples into PCA space
mltrans = mlPCA.Y
#reshape
mltransreshape = mltrans.reshape((256,256,8))
mlloadings = mlPCA.Wt

plt.imshow(mltransreshape[:,:,2])
plt.colorbar()


Out[93]:
<matplotlib.colorbar.Colorbar at 0x381610f0>

In [83]:
#show contributions from each peak to each principal component

x = [0, 1, 2, 3, 4, 5, 6, 7]
fig, ax = plt.subplots(nrows=4,ncols=2)

plt.subplot(4,2,1)
plt.scatter(x, mlloadings[0,:])

plt.subplot(4,2,2)
plt.scatter(x, mlloadings[1,:])

plt.subplot(4,2,3)
plt.scatter(x, mlloadings[2,:])

plt.subplot(4,2,4)
plt.scatter(x, mlloadings[3,:])

plt.subplot(4,2,5)
plt.scatter(x, mlloadings[4,:])

plt.subplot(4,2,6)
plt.scatter(x, mlloadings[5,:])

plt.subplot(4,2,7)
plt.scatter(x, mlloadings[6,:])

plt.subplot(4,2,8)
plt.scatter(x, mlloadings[7,:])

fig.tight_layout()
plt.show()



In [130]:
regr = linear_model.LinearRegression()

In [177]:
PC1 = mltransreshape[:,:,0] 
PC1list = PC1.reshape((65536,))
PC2 = mltransreshape[:,:,1]
PC2list = PC2.reshape((65536,))
PC3 = mltransreshape[:,:,2]
PC3list = PC3.reshape((65536,))
PC4 = mltransreshape[:,:,3]
PC4list = PC4.reshape((65536,))
PC5 = mltransreshape[:,:,4]
PC5list = PC5.reshape((65536,))
PC6 = mltransreshape[:,:,5]
PC6list = PC6.reshape((65536,))

#get variables in appropriate format
d = {}
training_x = []
for x in range(55000, 65536): 
    d[x] = [PC1list[x], PC2list[x], PC3list[x], PC4list[x], PC5list[x], PC6list[x]]
    training_x.append(d[x])

In [179]:
regr.fit(training_x, cAFMlist[55000:])


Out[179]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [147]:
plt.scatter(PC1list[21845:], cAFMlist[55000:])
plt.ylim(1.0e-13, 1.0e-9)


Out[147]:
(1e-13, 1e-09)

In [149]:
PC1list.type()



AttributeErrorTraceback (most recent call last)
<ipython-input-149-e00aeb5cfe38> in <module>()
----> 1 PC1list.type()

AttributeError: 'numpy.ndarray' object has no attribute 'type'

In [175]:


In [ ]:


In [163]:
PC1list


Out[163]:
array([-1.11465546, -0.2688974 , -1.29260573, ..., -1.99052596,
       -1.5671271 , -1.62326712])

In [ ]: