notebook.community

Edit and run



In [113]:

    
import sys
sys.path.append('./../hyperAFM')
sys.path.append('./../Jessica')
sys.path.append('./../data')

import numpy as np
from util import HyperImage, load_ibw
from gen_features import get_hyper_peaks
from PCAsynthetic import makefeaturematrix, stdfeature
import FindPeaks 
#for standardizing 
from sklearn.preprocessing import scale

#PCA packages
from sklearn.decomposition import PCA as sklearnPCA
from matplotlib.mlab import PCA

#linear regression package
from sklearn import linear_model 

import matplotlib.pyplot as plt
%matplotlib inline



In [122]:

    
#import hyperspectral image 
hyperimage = HyperImage('./../../Desktop/20170706_0712_0726_P3HTPMMA_hyper/Film10_0026.txt')
hyper = hyperimage.hyper_image
wavelengths = hyperimage.wavelength_data

#import corresponding cAFM image
cAFMimage = load_ibw('./../../Desktop/20170724_0725_0726_PMMAP3HT_cAFM/cAFM3_0000.ibw')
cAFM = cAFMimage[:,:,3]
cAFMlist = cAFM.reshape((65536, 1))



In [3]:

    
peaklocs, averagespectrum = get_hyper_peaks(hyper, threshold = 0.1)



In [103]:

    
plt.plot(averagespectrum)
plt.title('Average Spectrum')
indices, peaksdict = FindPeaks.FindPeaks(averagespectrum, thres=0.1, min_dist=1)
indices









    Out[103]:





array([ 62, 168, 199, 233, 294, 382, 418, 521], dtype=int64)



In [97]:

    
wavelengths[168]









    Out[97]:





1539.3010752688199



In [15]:

    
#standardize feature matrix to unit variance and zero mean 
featurematrix = makefeaturematrix(hyper, averagespectrum)
featurematrix_std = stdfeature(featurematrix, axis = 0)
mean = featurematrix_std.mean(axis=0)
variance = featurematrix_std.std(axis=0)
print(mean, variance)









    



(array([  9.26536307e-15,   1.28552501e-14,   1.07664816e-14,
        -3.04528944e-14,  -4.60071027e-16,   1.52243838e-15,
         2.08173068e-14,  -1.79396598e-14]), array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.]))



In [46]:

    
#define number of principal components 
sklearn_pca = sklearnPCA(n_components=8)

#matrix with each sample in terms of the PCs
SkPC = sklearn_pca.fit_transform(featurematrix_std)

#covariance matrix 
Skcov = sklearn_pca.get_covariance()

#score matrix 
Skscore = sklearn_pca.score_samples(featurematrix_std)

#loadings
Skloadings = sklearn_pca.components_

#explained variance
Skvariance = sklearn_pca.explained_variance_
Skvarianceratio = sklearn_pca.explained_variance_ratio_



In [40]:

    
SkPCreshape = SkPC.reshape(256,256,8)



In [110]:

    
plt.imshow(SkPCreshape[:,:,0])
plt.colorbar()









    Out[110]:





<matplotlib.colorbar.Colorbar at 0x393a0d68>



In [87]:

    
plt.scatter([0,1,2,3,4,5,6,7],Skloadings[0,:])









    Out[87]:





<matplotlib.collections.PathCollection at 0x38d0c400>

Matlab's PCA



In [93]:

    
mlPCA = PCA(featurematrix_std)
#get projections of samples into PCA space
mltrans = mlPCA.Y
#reshape
mltransreshape = mltrans.reshape((256,256,8))
mlloadings = mlPCA.Wt

plt.imshow(mltransreshape[:,:,2])
plt.colorbar()









    Out[93]:





<matplotlib.colorbar.Colorbar at 0x381610f0>



In [83]:

    
#show contributions from each peak to each principal component

x = [0, 1, 2, 3, 4, 5, 6, 7]
fig, ax = plt.subplots(nrows=4,ncols=2)

plt.subplot(4,2,1)
plt.scatter(x, mlloadings[0,:])

plt.subplot(4,2,2)
plt.scatter(x, mlloadings[1,:])

plt.subplot(4,2,3)
plt.scatter(x, mlloadings[2,:])

plt.subplot(4,2,4)
plt.scatter(x, mlloadings[3,:])

plt.subplot(4,2,5)
plt.scatter(x, mlloadings[4,:])

plt.subplot(4,2,6)
plt.scatter(x, mlloadings[5,:])

plt.subplot(4,2,7)
plt.scatter(x, mlloadings[6,:])

plt.subplot(4,2,8)
plt.scatter(x, mlloadings[7,:])

fig.tight_layout()
plt.show()



In [130]:

    
regr = linear_model.LinearRegression()



In [177]:

    
PC1 = mltransreshape[:,:,0] 
PC1list = PC1.reshape((65536,))
PC2 = mltransreshape[:,:,1]
PC2list = PC2.reshape((65536,))
PC3 = mltransreshape[:,:,2]
PC3list = PC3.reshape((65536,))
PC4 = mltransreshape[:,:,3]
PC4list = PC4.reshape((65536,))
PC5 = mltransreshape[:,:,4]
PC5list = PC5.reshape((65536,))
PC6 = mltransreshape[:,:,5]
PC6list = PC6.reshape((65536,))

#get variables in appropriate format
d = {}
training_x = []
for x in range(55000, 65536): 
    d[x] = [PC1list[x], PC2list[x], PC3list[x], PC4list[x], PC5list[x], PC6list[x]]
    training_x.append(d[x])



In [179]:

    
regr.fit(training_x, cAFMlist[55000:])









    Out[179]:





LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)



In [147]:

    
plt.scatter(PC1list[21845:], cAFMlist[55000:])
plt.ylim(1.0e-13, 1.0e-9)









    Out[147]:





(1e-13, 1e-09)



In [149]:

    
PC1list.type()









    




AttributeErrorTraceback (most recent call last)
<ipython-input-149-e00aeb5cfe38> in <module>()
----> 1 PC1list.type()

AttributeError: 'numpy.ndarray' object has no attribute 'type'



In [175]:



In [ ]:



In [163]:

    
PC1list









    Out[163]:





array([-1.11465546, -0.2688974 , -1.29260573, ..., -1.99052596,
       -1.5671271 , -1.62326712])



In [ ]: