In [1]:
import sys
sys.path.append('./../hyperAFM')
sys.path.append('./../Jessica')
sys.path.append('./../Data')
import numpy as np
from util import HyperImage, load_ibw, HyperSlice
from gen_features import get_hyper_peaks
from PCAsynthetic import makefeaturematrix, stdfeature
import FindPeaks
#for standardizing
from sklearn.preprocessing import scale
#PCA packages
from sklearn.decomposition import PCA as sklearnPCA
from matplotlib.mlab import PCA
#linear regression package
from sklearn import linear_model
#stats/math
import math
from scipy import stats
#3-D plot
from mpl_toolkits.mplot3d import Axes3D
#import image alignment
from scipy import signal, optimize
from scipy.ndimage import rotate
from skimage.measure import compare_ssim as ssim
from skimage.feature import register_translation
from scipy.ndimage import fourier_shift
from scipy.optimize import minimize
from imagealignment import *
#error metrics
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
#import hyperspectral image
hyperimage = HyperImage('./../../Desktop/20170706_0712_0726_P3HTPMMA_hyper/Film10_0026.txt')
hyper = hyperimage.hyper_image
hypertopo = hyperimage.channel_data[:,:,0]
#flatten
hypertopo = signal.detrend(hypertopo, axis=1, type="linear")
wavelengths = hyperimage.wavelength_data
#import second hyperspectral image
hyperimage1 = HyperImage('./../../Desktop/20170706_0712_0726_P3HTPMMA_hyper/Film10_0005.txt')
hyper1 = hyperimage1.hyper_image
hypertopo1 = signal.detrend(hyperimage1.channel_data[:,:,0], axis=1, type = 'linear')
wavelengths = hyperimage1.wavelength_data
#import corresponding cAFM image
cAFMimage = load_ibw('./../../Desktop/20170724_0725_0726_PMMAP3HT_cAFM/cAFM3_0000.ibw')
cAFMtopo = cAFMimage[:,:,0]
cAFMtopo_mean = cAFMtopo.mean()
#flatten
cAFMtopo = signal.detrend(cAFMtopo, axis=1, type="linear")
cAFM = cAFMimage[:,:,3]
#store cAFM values in list.
cAFMlist = cAFM.reshape((65536,))
#import second corresponding cAFM image
cAFMimage1 = load_ibw('./../../Desktop/20170724_0725_0726_PMMAP3HT_cAFM/Film10cAFM_0001.ibw')
cAFMtopo1 =signal.detrend(cAFMimage1[:,:,0], axis = 1, type = "linear")
cAFM1 = cAFMimage1[:,:,3]
cAFMlist1 = cAFM1.reshape((65536,))
#laser power spectrum
laser = np.loadtxt('./../Data/Q0148.txt', delimiter = '\t' )
In [ ]:
laser = np.flip(laser, 0)
plt.plot(laser[:,0], laser[:,1])
In [ ]:
#laser powers corresponding to 8 peaks used in PCA
list_laser = [laser[32,1], laser[74,1], laser[87,1], laser[94,1], laser[118,1], laser[153,1], laser[167,1], laser[208,1]]
In [3]:
def showsave(img, filename, cmap = 'viridis'):
fig = plt.imshow(img, cmap = cmap)
imgstd = img.std()
imgmean = img.mean()
ll = imgmean - (2*imgstd)
ul = imgmean + (2*imgstd)
plt.clim(ll,ul)
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
#plt.colorbar()
plt.savefig(filename + '.png')
return
def scatter(x,y, invert = True):
plt.scatter(x, y)
if invert:
plt.gca().invert_xaxis()
In [ ]:
showsave(hypertopo, 'hypertopo')
In [ ]:
hyper1750 = HyperSlice(hyperimage, 1747, 1755)*10**6
showsave(hyper1750, '1750slice')
In [ ]:
hyper832 = HyperSlice(hyperimage,827,837)*10**6
showsave(hyper832, '832slice')
In [ ]:
import matplotlib.cm as cm
In [ ]:
#get random points on image
np.random.seed(2)
n = 10
xs = []
ys = []
for i in range(n):
x = np.random.randint(0,256)
y = np.random.randint(0,256)
xs.append(x)
ys.append(y)
#xs = [50, 55, 109, 145, 15, 199, 38, 120]
#ys = [185, 65, 244, 85, 30, 222, 140, 5]
fig, ax = plt.subplots()
cax = ax.imshow(HyperSlice(hyperimage, 1747, 1755)*10**6, cmap = 'viridis')
cax.axes.get_xaxis().set_visible(False)
cax.axes.get_yaxis().set_visible(False)
img = HyperSlice(hyperimage, 1747, 1755)*10**6
mean = img.mean()
std= img.std()
ll = mean - (2*std)
ul = mean + (2*std)
cbar = fig.colorbar(cax)
cbar.set_clim(ll,ul)
for x,y in zip(xs,ys):
ax.scatter(x,y, marker='D', s = 50)
colormap = plt.cm.gnuplot
colorst = [colormap(i) for i in np.linspace(0, 0.8,len(ax.collections))]
for t,j1 in enumerate(ax.collections):
j1.set_color(colorst[t])
fig.savefig('1750slice_pts')
In [ ]:
fig1 = plt.figure()
ax1 = fig1.add_subplot(111)
for i in range(n):
ax1.plot(hyperimage.wavelength_data, hyper[xs[i],ys[i],:]*10**6+40*i)
ax1.set_xlabel('Wavenumber (cm$^{-1}$)', fontsize=16)
ax1.set_ylabel('Intensity (a.u.)', fontsize=16)
fig1.gca().invert_xaxis()
colormap = plt.cm.gnuplot
colors = [colormap(i) for i in np.linspace(0,0.8,len(ax1.lines))]
for i, j in enumerate(ax1.lines):
j.set_color(colors[i])
fig1.savefig('ptspect')
In [ ]:
fig = plt.figure()
plt.plot(hyperimage.wavelength_data, averagespectrum*10**6, color = 'black')
plt.scatter(list_wavenumber, averagespectrum[indices]*10**6, color = 'blue')
plt.gca().invert_xaxis()
ax = plt.gca()
for tick in ax.xaxis.get_major_ticks():
tick.label1.set_fontweight('bold')
tick.label1.set_fontsize(14)
for tick in ax.yaxis.get_major_ticks():
tick.label1.set_fontsize(14)
tick.label1.set_fontweight('bold')
plt.xlabel('Wavenumber cm$^{-1}$', fontsize=16, fontweight='bold')
plt.ylabel('Intensity (a.u.)', fontsize=16, fontweight='bold')
fig.savefig('avgspec')
In [ ]:
from dipy.align.imwarp import SymmetricDiffeomorphicRegistration
from dipy.align.imaffine import AffineRegistration
from dipy.align.metrics import SSDMetric, CCMetric, EMMetric
import dipy.align.imwarp as imwarp
from dipy.viz import regtools
In [ ]:
moving = cAFMtopo
static = hypertopo
dim = static.ndim
metric = SSDMetric(dim)
level_iters = [200, 100, 50, 25]
sdr = SymmetricDiffeomorphicRegistration(metric, level_iters, inv_iter = 50)
mapping = sdr.optimize(static, moving)
warped_moving = mapping.transform(moving, 'linear')
regtools.overlay_images(static, warped_moving, 'Static','Overlay','Warped moving',
'direct_warp_result.png')
In [ ]:
mapping = sdr.optimize(static,moving)
want to start with images as close to solution as possible
In [4]:
static = hypertopo
moving = cAFMtopo
In [5]:
showsave(moving, 'moving')
In [6]:
showsave(static, 'static')
In [7]:
rotated = rot(cAFMtopo, deg = -4)
cAFMrotated = rot(cAFM, deg = -4)
showsave(rotated, 'rotated')
In [8]:
import numpy as np
from dipy.viz import regtools
from dipy.align.imaffine import (AffineMap,
MutualInformationMetric,
AffineRegistration)
from dipy.align.transforms import (AffineTransform2D,
RigidTransform2D,
RotationTransform2D,
ScalingTransform2D,
TranslationTransform2D)
First, translate the images in x-y space
In [9]:
nbins = 32
sampling_prop = None
metric = MutualInformationMetric(nbins, sampling_prop)
level_iters = [10000, 1000, 100]
sigmas = [3.0, 1.0, 0.0]
factors = [4, 2, 1]
affreg = AffineRegistration(metric=metric,
level_iters=level_iters,
sigmas=sigmas,
factors=factors)
In [10]:
transform = TranslationTransform2D()
params0= None
static = hypertopo
moving = rotated
translation = affreg.optimize(static, moving, transform, params0, starting_affine=None)
transformed = translation.transform(moving)
cAFM_transformed = translation.transform(cAFMrotated)
showsave(transformed, 'translation')
Now do rigid translation
In [11]:
starting_affine = translation.affine
transform = RigidTransform2D()
rigid = affreg.optimize(static, moving, transform, params0, starting_affine = starting_affine)
transformed = rigid.transform(moving)
cAFM_transformed = rigid.transform(cAFMrotated)
showsave(transformed, 'rigid')
In [12]:
starting_affine = rigid.affine
transform = AffineTransform2D()
affine = affreg.optimize(static, moving, transform, params0, starting_affine = starting_affine)
transformed = affine.transform(moving)
cAFM_transformed = affine.transform(cAFMrotated)
showsave(transformed, 'affine2')
In [13]:
error = transformed - static
showsave(error, 'alignment_error')
In [14]:
showsave(transformed[20:245, 20:245], 'affine2_crop')
In [15]:
fig = plt.imshow(cAFM[35:245, 25:245])
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
plt.clim(0.4e-10,1.25e-10)
plt.colorbar()
plt.savefig('cAFM_crop')
In [ ]:
fig = plt.imshow(cAFM_transformed[35:245, 25:245])
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
plt.clim(0.4e-10,1.25e-10)
plt.colorbar()
plt.savefig('cAFM_transformedcrop')
In [ ]:
cAFMstd = cAFMtopo.std()
cAFMmean = cAFMtopo.mean()
ll = cAFMmean - (2*cAFMstd)
ul = cAFMmean + (2*cAFMstd)
fig = plt.imshow(cAFMtopo+cAFMmean)
plt.clim(ll,ul)
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
plt.savefig('cAFMtopo.png')
In [ ]:
hyperstd = hypertopo.std()
hypermean = hypertopo.mean()
ll = hypermean - (2*hyperstd)
ul = hypermean + (2*hyperstd)
fig = plt.imshow(hypertopo)
plt.clim(ll, ul)
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
plt.savefig('hypertopo.png')
In [ ]:
rotated = rot(cAFMtopo1, deg = 4)
cAFMrotated2 = rot(cAFM1, deg = 4)
In [ ]:
static1 = hypertopo1
moving1 = cAFMtopo1
rotated1 = rot(cAFMtopo1, deg = 4)
moving1 = rotated1
cAFMrotated1 = rot(cAFM1, deg = 4)
transform = TranslationTransform2D()
params0= None
static = hypertopo1
moving = rotated1
translation = affreg.optimize(static1, moving1, transform, params0, starting_affine=None)
transformed = translation.transform(moving1)
cAFM_transformed = translation.transform(cAFMrotated1)
starting_affine = translation.affine
transform = RigidTransform2D()
rigid = affreg.optimize(static1, moving1, transform, params0, starting_affine = starting_affine)
transformed = rigid.transform(moving1)
cAFM_transformed = rigid.transform(cAFMrotated1)
starting_affine = rigid.affine
transform = AffineTransform2D()
affine = affreg.optimize(static1, moving1, transform, params0, starting_affine = starting_affine)
transformed = affine.transform(moving1)
cAFM_transformed = affine.transform(cAFMrotated1)
showsave(transformed, 'affine2')
In [ ]:
showsave(cAFM_transformed[60:225, 45:250], 'transformed2')
In [ ]:
showsave(transformed[60:225, 45:250], 'transformed2')
In [ ]:
cAFM_transformed2 = cAFMrotated[60:225, 45:250]
showsave(hypertopo1[60:225, 45:250], hypertopo1crop)
In [ ]:
cAFM2 = cAFMtransformed
In [ ]:
offimg_shift, offimg_rot, deg_opt, cAFMrot = shiftandrotate(hypertopo, cAFMtopo, cAFM, tol=1e-14, guessang = -1)
cAFMrot_crop = offimg_rot
cAFMrot_cropstd = offimg_rot.std()
cAFMrot_cropmean = offimg_rot.mean()
cll = cAFMrot_cropmean - (2*cAFMrot_cropstd)
cul = cAFMrot_cropmean + (2*cAFMrot_cropstd)
fig=plt.imshow(offimg_rot)
plt.clim(cll, cul)
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
plt.savefig('cAFMrot')
In [ ]:
fig=plt.imshow(hypertopo)
#plt.clim(ll,ul)
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
hypertopostd = hypertopo.std()
hypertopomean = hypertopo.mean()
cll = hypertopomean - (2*hypertopostd)
cul = hypertopomean + (2*hypertopostd)
plt.savefig('hyp')
In [ ]:
#see if deg_opt returns something reasonable after having shifted/rotated already
deg_opt, error=optang(hypertopo, offimg_rot, guessang=1)
deg_opt
In [16]:
peaklocs, averagespectrum = get_hyper_peaks(hyper, threshold = 0.1)
In [17]:
indices, peaksdict = FindPeaks.FindPeaks(averagespectrum, thres=0.1, min_dist=1)
from peakutils import plot
plt.figure(figsize=(10,7))
plot.plot(hyperimage.wavelength_data,averagespectrum*10**6, peaklocs)
plt.gca().invert_xaxis()
plt.title('Average Spectrum', fontweight='bold', fontsize = 20)
plt.xlabel('Wavenumber (cm$^{-1}$)', fontweight = 'bold', fontsize = 16)
plt.ylabel('Intensity ($\mu$V)', fontweight='bold', fontsize = 16)
plt.rc('xtick.major', size=5, pad=7)
plt.savefig('AverageSpectrum.png')
list_wavenumber = []
for i in range(len(indices)):
list_wavenumber.append(hyperimage.wavelength_data[indices[i]])
for i, el in enumerate(list_wavenumber):
list_wavenumber[i] = int(el)
In [20]:
list_wavenumber
Out[20]:
List of wavenumbers identified as peaks
In [ ]:
list_laser_std = stdfeature(list_laser, axis=0)
mean1 = list_laser_std.mean(axis=0)
var1 = list_laser_std.std(axis=0)
In [18]:
#standardize feature matrix to unit variance and zero mean
featurematrix = makefeaturematrix(hyper[35:245, 25:245, :], averagespectrum)
featurematrix_std = stdfeature(featurematrix, axis = 0)
mean = featurematrix_std.mean(axis=0)
variance = featurematrix_std.std(axis=0)
In [19]:
def fullspect_ft(spectralmatrix):
xaxis = spectralmatrix.shape[0]
yaxis = spectralmatrix.shape[1]
zaxis = spectralmatrix[0,0,:].shape[0]
featurematrix = np.zeros((xaxis*yaxis, zaxis))
counter = -1
#loop over elements in spectralmatrix
for x in range(xaxis):
for y in range(yaxis):
counter +=1
for z in range(zaxis):
featurematrix[counter]=(spectralmatrix[x,y,:])
return featurematrix
specfeat = fullspect_ft(hyper[35:245, 25:245, :])
specfeat_std = stdfeature(specfeat, axis = 0)
In [ ]:
z = np.dot(specfeat_std, Skloadings.T)
zreshape = z.reshape(256,256, 8)
z0_std = zreshape[:,:,0].std()
z0_mean = zreshape[:,:,0].mean()
zll = z0_mean - (2*z0_std)
zul = z0_mean + (2*z0_std)
#first PC
plt.imshow(zreshape[:,:,0])
plt.clim(zll,zul)
In [ ]:
#define number of principal components
sklearn_pca = sklearnPCA(n_components=8)
#matrix with each sample in terms of the PCs
SkPC = sklearn_pca.fit_transform(specfeat_std)
#covariance matrix
Skcov = sklearn_pca.get_covariance()
#loadings
Skloadings = sklearn_pca.components_
#score matrix
Skscore = sklearn_pca.score_samples(specfeat_std)
#explained variance
Skvariance = sklearn_pca.explained_variance_
Skvarianceratio = sklearn_pca.explained_variance_ratio_
In [ ]:
SkPCreshape = SkPC.reshape(256,256,8)
In [ ]:
Skavg = SkPCreshape[:,:,0].mean()
Skstd = SkPCreshape[:,:,0].std()
Skll = Skavg - 2*Skstd
Skul = Skavg + 2*Skstd
plt.imshow(SkPCreshape[:,:,0])
plt.colorbar()
plt.clim(Skll, Skul)
In [ ]:
plt.scatter([0,1,2,3,4,5,6,7],Skloadings[0,:])
In [22]:
mlvariance[0:65]
Out[22]:
In [23]:
#with fullspec feat std
mlPCA = PCA(specfeat_std)
#get projections of samples into PCA space
mltrans = mlPCA.Y
#reshape
mltransreshape = mltrans.reshape((210,220,559))
mltransreshape_flat0 = signal.detrend(mltransreshape[:,:,0], axis=1, type="linear")
mlloadings = mlPCA.Wt
mlvariance = mlPCA.fracs
mlavg = mltransreshape[:,:,0].mean()
mlstd = mltransreshape[:,:,0].std()
mlll = mlavg - 2*mlstd
mlul = mlavg + 2*mlstd
fig = plt.imshow(mltransreshape[:,:,1])
plt.colorbar()
plt.clim(mlll, mlul)
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
plt.savefig('PC1_scoreimage')
Project a different image onto the PCs derived from this image
In [ ]:
#project onto existing PCs
z = np.dot(specfeat_std, mlloadings.T)
In [ ]:
z.shape
In [ ]:
mlavg = mltransreshape[:,:,1].mean()
mlstd = mltransreshape[:,:,1].std()
mlll = mlavg - 2*mlstd
mlul = mlavg + 2*mlstd
fig = plt.imshow(mltransreshape[:,:,1])
plt.colorbar()
plt.clim(mlll, mlul)
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
plt.savefig('PC2_scoreimage')
In [ ]:
mlavg = mltransreshape[:,:,2].mean()
mlstd = mltransreshape[:,:,2].std()
mlll = mlavg - 2*mlstd
mlul = mlavg + 2*mlstd
fig = plt.imshow(mltransreshape[:,:,2])
plt.colorbar()
plt.clim(mlll, mlul)
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
plt.savefig('PC3_scoreimage')
Loadings plot for PC1 when using entire spectrum; interesting
Find peaks in PC1 and store into list
In [26]:
pc1indices, pc1peaks = FindPeaks.FindPeaks(mlloadings[1,:], thres=0.1, min_dist=1)
pc1_wavenumber = []
for i in range(len(pc1indices)):
pc1_wavenumber.append(hyperimage.wavelength_data[pc1indices[i]])
for i, el in enumerate(pc1_wavenumber):
pc1_wavenumber[i] = int(el)
In [27]:
#show PC1 loading 'spectra' with peaks
fig = plt.figure()
plt.plot(hyperimage.wavelength_data, mlloadings[0,:], color = 'black')
plt.scatter(pc1_wavenumber, mlloadings[0,pc1indices])
plt.gca().invert_xaxis()
ax = plt.gca()
for tick in ax.xaxis.get_major_ticks():
tick.label1.set_fontweight('bold')
tick.label1.set_fontsize(14)
for tick in ax.yaxis.get_major_ticks():
tick.label1.set_fontsize(14)
tick.label1.set_fontweight('bold')
plt.xlabel('Wavenumber cm$^{-1}$', fontsize=16, fontweight='bold')
plt.ylabel('Intensity (a.u.)', fontsize=16, fontweight='bold')
fig.savefig('PC1 loading')
Find peaks for PC3, store into list
In [ ]:
pc3indices, pc3peaks = FindPeaks.FindPeaks(mlloadings[1,:], thres=0.1, min_dist=1)
pc3_wavenumber = []
for i in range(len(pc3indices)):
pc3_wavenumber.append(hyperimage.wavelength_data[pc3indices[i]])
for i, el in enumerate(pc3_wavenumber):
pc3_wavenumber[i] = int(el)
In [ ]:
#show PC2 loading 'spectra' with peaks
fig = plt.figure()
plt.plot(hyperimage.wavelength_data, mlloadings[2,:], color = 'black')
plt.scatter(pc3_wavenumber, mlloadings[2,pc3indices])
plt.scatter([1757, 1477], mlloadings[2,[59,199]], color = 'orange')
plt.gca().invert_xaxis()
ax = plt.gca()
for tick in ax.xaxis.get_major_ticks():
tick.label1.set_fontweight('bold')
tick.label1.set_fontsize(14)
for tick in ax.yaxis.get_major_ticks():
tick.label1.set_fontsize(14)
tick.label1.set_fontweight('bold')
plt.xlabel('Wavenumber cm$^{-1}$', fontsize=16, fontweight='bold')
plt.ylabel('Intensity (a.u.)', fontsize=16, fontweight='bold')
fig.savefig('PC1 loading')
In [ ]:
pc3indices
In [ ]:
pc3_wavenumber
In [ ]:
plt.scatter(hyperimage.wavelength_data, mlloadings[1,:])
plt.gca().invert_xaxis()
In [ ]:
plt.scatter(hyperimage.wavelength_data, mlloadings[2,:])
plt.gca().invert_xaxis()
In [ ]:
plt.figure(figsize=(7,5))
fig = plt.scatter(list_wavenumber , mlloadings[0,:])
plt.title('Loadings Plot for PC2', fontweight='bold', fontsize = 16)
plt.xlabel('Wavenumber (cm$^{-1}$)', fontweight = 'bold' , fontsize = 14)
plt.gca().invert_xaxis()
plt.ylabel('Loading', fontweight = 'bold', fontsize = 14)
plt.savefig('PC1loadings')
In [ ]:
sum(mlvariance[0:7])
In [28]:
regr = linear_model.LinearRegression()
#get variables in appropriate format
d = {}
training_x = []
for x in range(0, 4000):
d[x] = mltrans[x,0:63]
training_x.append(d[x])
training_x = np.stack(training_x, axis=0)
In [29]:
mltrans.shape
Out[29]:
In [30]:
test_x = []
d= {}
for x in range(0,46200):
d[x] = mltrans[x,0:63]
test_x.append(d[x])
test_x = np.stack(test_x, axis=0)
In [ ]:
#for list of cAFM values of second set of images
cAFMshiftlist = cAFM_transformed[60:225, 45:250].reshape((33825,1))
In [32]:
cAFMshiftlist = cAFM_transformed[35:245, 25:245].reshape((46200,1))
regr.fit(training_x, cAFMshiftlist[:4000])
Out[32]:
In [ ]:
#store intercepts and B values from regression
#B0 = regr.intercept_
#B0_1 = regr2.intercept_
#for i in range (1, 9):
# vars()['B'+str(i)] = regr.coef_[0,i-1]
#for i in range(1,9):
# vars()['B'+str(i)] = regr2.coef_[0,i-1]
In [33]:
cAFMpred = regr.predict(test_x)
cAFMpredimage= cAFMpred.reshape(210,220)
#cAFMpredimage = cAFMpredimage[56:252,:]
fig=plt.imshow(cAFMpredimage)
plt.colorbar()
#mean = cAFMpredimage.mean()
#std = cAFMpredimage.std()
mean = cAFM[35:245,25:245].mean()
std = cAFM[35:245,25:245].std()
plt.clim(mean-2*std, mean+2*std)
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
plt.savefig('cAFMpredimage')
In [ ]:
fig=plt.imshow(cAFM1[60:225,45:250])
mean = cAFM1[60:225,45:250].mean()
std = cAFM1[60:225,45:250].std()
plt.clim(mean-2*std, mean+2*std)
#plt.clim(0.4e-10, 1.2e-10)
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
plt.colorbar()
plt.savefig('cAFMrealimage')
In [ ]:
error = cAFM_transformed[35:245, 25:245] - cAFMpredimage
fig = plt.imshow(error)
mean = error.mean()
std = error.std()
#plt.clim(mean-2*std, mean+2*std)
plt.colorbar()
#plt.clim(0.4e-10, 1.2e-10)
fig.axes.get_xaxis().set_visible(False)
fig.axes.get_yaxis().set_visible(False)
plt.savefig('error image')
In [ ]:
plt.plot(cAFMlist, 'o')
plt.ylim(0, 2e-10)
#plt.xlim(0, 0.1e-9)
In [ ]:
plt.subplot(311)
plt.scatter(PC1list,cAFMlist)
#plt.plot(training_x[:,0],regr2.predict(training_x),color='red',lw='3')
plt.ylabel('cAFM')
plt.xlabel('PC1')
plt.ylim(8e-15, 8e-10)
plt.subplot(312)
plt.scatter(PC2list,cAFMlist)
#plt.plot(training_x[:,1],regr2.predict(training_x),color='red',lw='3')
plt.ylabel('cAFM')
plt.xlabel('PC2')
plt.ylim(8e-20, 8e-10)
plt.subplot(313)
plt.scatter(PC3list,cAFMlist)
#plt.plot(training_x[:,2],regr2.predict(training_x),color='red')
plt.ylabel('cAFM')
plt.xlabel('PC3')
plt.ylim(8e-15, 6e-10)
plt.tight_layout()
In [ ]:
X1 = mltrans[:,0]
X2 = mltrans[:,1]
X = training_x[:,0:2]
xx1, xx2 = np.meshgrid(np.linspace(X1.min(), X1.max(), 200),
np.linspace(X2.min(), X2.max(), 200))
z = B0 + B1*xx1 + B2*xx2
fig = plt.figure(figsize=(12,8))
ax = Axes3D(fig, azim=-115, elev=15)
#hyperplane
surf = ax.plot_surface(xx1, xx2, z, cmap=plt.cm.RdBu_r)
resid = cAFMlist[5000:] - regr.predict(training_x)
newX = np.concatenate((X,resid),1)
ax.scatter(newX[:,0], newX[:,1], newX[:,2] , color='black', alpha=1.0, facecolor='white')
#ax.scatter(newX[:,0], newX[:,1], newX[:,2] < 0 , color='black', alpha=1.0)
#ax.scatter(X1, X2, cAFMlist, color='black', alpha=1.0, facecolor='white')
ax.set_xlabel('PC1')
ax.set_ylabel('PC2')
ax.set_zlabel('cAFM')
In [35]:
from sklearn.cluster import KMeans
import random
In [36]:
cAFMpair = np.zeros((46200,2))
cAFMpair = np.append(cAFMshiftlist, cAFMpred, axis=1)
cAFMerr = np.zeros((46200,1))
for i in range(cAFMpair.shape[0]):
cAFMerr[i]= (cAFMpair[i,0] - cAFMpair[i,1])
In [37]:
kmeans=KMeans(n_clusters=8).fit(cAFMerr)
In [56]:
random.seed(12)
trans = kmeans.transform(cAFMerr)
y_pred = KMeans(n_clusters=8, random_state = 12).fit_predict(cAFMerr)
fig = plt.scatter(cAFMpair[:,0], cAFMpair[:,1], c= y_pred)
plt.xlim(0.38e-10, 1.2e-10)
plt.ylim(-0.5e-10, 1.5e-10)
plt.xlabel('cAFM actual')
plt.ylabel('cAFM predicted')
plt.savefig('clusterscatter')
In [104]:
cAFMerr.shape
Out[104]:
In [102]:
cAFMpair.shape
Out[102]:
In [54]:
labelimage = np.reshape(kmeans.labels_, (210,220))
fig = plt.imshow(labelimage, cmap = 'viridis')
plt.colorbar()
plt.savefig('clusterimg')
In [ ]:
def betas(regr):
"""Gets B0, B1, B2...etc from linear regression"""
ncoefs = regr.coef_.shape[1] + 1
coefs = np.zeros((1,ncoefs))
coefs[0] = regr.intercept_
for i in range(regr.coef_.shape[1]):
i += 1
coefs[0,i] = regr.coef_[0,(i-1)]
return coefs
In [ ]:
coefs = betas(regr)
B0 = coefs[0,0]
B1 = coefs[0,1]
B2 = coefs[0,2]
#B3 = coefs[0,3]
#B4 = coefs[0,4]
In [ ]:
def square(list):
return [i ** 2 for i in list]
In [ ]:
#calc training RSS, TSS, and Rsquared
start = 0 #starting index in training data
end = 4000
x = cAFMshiftlist[start:end]
y = regr.predict(training_x)
z_train = []
for i in range(end-start):
z_train.append(cAFMshiftlist[i] - regr.predict(training_x)[i])
RSS_train = sum(square(z_train))
TSS_train = sum((cAFMshiftlist[:end] - cAFMshiftlist[:end].mean())**2)
Rsquared_train = 1 - (RSS_train/TSS_train)
print(Rsquared_train)
In [ ]:
#calc test RSS, TSS, and Rsquared
start = 0 #starting index in training data
end = 33825
x = cAFMshiftlist[start:end]
y = regr.predict(test_x)
z_test = []
for i in range(33825):
z_test.append(cAFMshiftlist[i] - regr.predict(test_x)[i])
RSS_test = sum(square(z_test))
TSS_test = sum((cAFMshiftlist[start:end] - cAFMshiftlist[start:end].mean())**2)
Rsquared_test = 1- (RSS_test/TSS_test)
print(Rsquared_test)
In [ ]:
Radj_num = (1-Rsquared_test)*(33824)
Radj_den = 33825-63-1
Radj = 1 - (Radj_num/Radj_den)
print(Radj)
In [ ]:
n = 4000 #no. of obvs used to train
start_train = 0
end_train = 4000
start_test= end_train
end_test = 46200
MSE_train = mean_squared_error(cAFMshiftlist[start_train: end_train], cAFMpred[start_train:end_train])
MSE_test = mean_squared_error(cAFMshiftlist[start_test: end_test], cAFMpred[start_test:end_test])
RSE_train = (math.sqrt(RSS_train/((end_train-start_train)-2))) #RSE is an estimate of the the standard deviation in the population.
RSE_test = (math.sqrt(RSS_test/((end_test-start_test)-2)))
std_train = RSE_train**2
std_test = RSE_test**2
SEB0 = math.sqrt((std**2)*(1/n + (cAFMshiftlist[start:].mean()**2)/sum(((cAFMshiftlist[start:]-cAFMshiftlist[start:].mean())**2))))
SEB1num = std**2
SEB1den = sum((mltrans[start:end,0]-mltrans[start:end,0].mean())**2)
SEB2den = sum((mltrans[start:end,1]-mltrans[start:end,1].mean())**2)
SEB3den = sum((mltrans[start:end,2]-mltrans[start:end,2].mean())**2)
SEB4den = sum((mltrans[start:end,3]-mltrans[start:end,3].mean())**2)
SEB1 = math.sqrt(SEB1num/SEB1den)
SEB2 = math.sqrt(SEB1num/SEB2den)
SEB3 = math.sqrt(SEB1num/SEB3den)
SEB4 = math.sqrt(SEB1num/SEB4den)
tB1 = regr.coef_[0,0]/SEB1
tB2 = regr.coef_[0,1]/SEB2
#tB3 = regr.coef_[0,2]/SEB3
#tB4 = regr.coef_[0,3]/SEB4
#trying bootstrap coefs:
#tB1 = B1/SEB1
#tB2 = B2/SEB2
#tB3 = B3/SEB3
#tB4 = B4/SEB4
#calc p values:
pB1=stats.t.sf(np.abs(tB1), n-1)*2
pB2=stats.t.sf(np.abs(tB2), n-1)*2
#pB3=stats.t.sf(np.abs(tB3), n-1)*2
print("P values are:", pB1, pB2)
In [ ]:
RSE_test
In [ ]:
B0ll = B2 - 2*SEB2
B0ul = B2 + 2*SEB2
np.set_printoptions(precision=12)
B0ll, B0ul
In [ ]:
predictors = 63
Fnumerator = (TSS_test-RSS_test)/predictors
Fdenominator = RSS_test/((33825)-predictors-1)
F= Fnumerator/Fdenominator
print(F, 'the p-value generated from the F-statistic is significant at level 0.05.')
In [ ]:
1-(RSS/TSS)
In [ ]:
RSS/TSS
In [ ]:
import random
uses results from affine transformation/cropping in registration step.
In [ ]:
def bootstrap(data, yact, samplesize, iterations):
"""This function draws bootstrap data sets that will be used to regress PCE onto V_OC, E_lumo, and mass with
each data set.
It takes as parameters a dataset in the form of a pandas array, the sample size, and iterations (number
of bootstrap samples of the specified sample size.) The function will return the MSE and the
three relevant beta coefficients: B0, Bmass, Bvoc, Blumo."""
#create variables to cumulative MSE, B0, Bmass, Bvoc, and Blumo. Divide by n at the end of everything to obtain
#average
cumulativeMSE=0
cumulativeRSS=0
TSS = 0
cumulativeB0=0
cumulativeB1=0
cumulativeB2=0
cumulativeB3=0
cumulativeB4=0
#initiate tuple of lists of running averages of MSE, intercept and betas
runningaverages = ([], [], [], [], [], [], [], [], [], [])
for i in range(iterations):
#set up empty matrix with appropriate number of rows to store values for the 4 relevant variables
iDataSet=np.zeros((samplesize,6))
for j in range(samplesize):
#pick random sample
row=random.randint(0,46199)
iDataSet[j,0]=data[row,0]
iDataSet[j,1]=data[row,1]
iDataSet[j,2]=data[row,2]
iDataSet[j,3]=data[row,3]
iDataSet[j,4]=data[row,4]
iDataSet[j, 5]=yact[row]
#exit inner for loop, now that we have the data we want
#regress pce onto mass, voc, elumoalpha
LinearRegr=linear_model.LinearRegression()
#feed array into LinearRegr.fit to do multiple linear regression
LinearRegr.fit(iDataSet[:,0:4], iDataSet[:,5])
cumulativeMSE += sum((iDataSet[:,5] - LinearRegr.predict(iDataSet[:, 0:4]))**2)/(samplesize)
RSS = sum( i**2 for i in (iDataSet[:,5] - LinearRegr.predict(iDataSet[:, 0:4])))
RSE = (math.sqrt(RSS/(40-2)))
TSS = sum(i **2 for i in (iDataSet[:,5] - iDataSet[:,5].mean()))
cumulativeB0 += LinearRegr.intercept_
cumulativeB1 += LinearRegr.coef_[0]
cumulativeB2 += LinearRegr.coef_[1]
cumulativeB3 += LinearRegr.coef_[2]
cumulativeB4 += LinearRegr.coef_[3]
#take averages
runningaverages[0].append(cumulativeMSE/(i+1))
runningaverages[1].append(cumulativeB0/(i+1))
runningaverages[2].append(cumulativeB1/(i+1))
runningaverages[3].append(cumulativeB2/(i+1))
runningaverages[4].append(cumulativeB3/(i+1))
runningaverages[5].append(cumulativeB4/(i+1))
runningaverages[6].append(RSS)
runningaverages[7].append(TSS)
runningaverages[8].append(RSE)
return runningaverages
In [ ]:
In [ ]:
data=mltrans
iterations=40
boot100=bootstrap(data, cAFMshiftlist, 100, iterations)
boot1000=bootstrap(data, cAFMshiftlist, 1000, iterations)
boot5000=bootstrap(data, cAFMshiftlist, 5000, iterations)
boot10000=bootstrap(data, cAFMshiftlist, 10000, iterations)
In [ ]:
Rsquared = (np.array(boot10000[7]) - np.array(boot10000[6]))/(np.array(boot10000[7]))
num = (1-Rsquared)*9999
denom = 9999-63-1
adjRsquared = 1 - num/denom
Rsquared
In [ ]:
Fnum = (np.array(boot5000[7]) - np.array(boot5000[6]))/5
Fdenom = np.array(boot5000[6])/(5000-5-1)
F = Fnum/Fdenom
F
In [ ]:
x=range(1,41)
plt.plot(x, boot100[0], color = "orange", label="100")
plt.plot(x, boot1000[0], color = "magenta", label="1000")
plt.plot(x, boot5000[0], color = "lightgreen", label="5000")
plt.legend(title = "Sample Size")
plt.xlabel("No iterations")
plt.ylabel("MSE")
plt.title("Running Average of MSE")
In [ ]:
#make figure
plt.figure(figsize=(20,15))
x=range(1,41)
#MSE plot
plt.subplot(221)
plt.plot(x, boot100[1], color = "orange", label="100")
plt.plot(x, boot1000[1], color = "magenta", label="1000")
plt.plot(x, boot5000[1], color = "lightgreen", label="5000")
plt.legend(loc="upper right", title ="Sample Size", fontsize="14")
plt.xlabel("No. of Iterations", fontsize="18")
plt.ylabel("Intercept", fontsize="18")
plt.title("Running Average of Intercept", fontsize="20")
#B_mass plot
plt.subplot(222)
plt.plot(x, boot100[2], color = "orange", label="100")
plt.plot(x, boot1000[2], color = "magenta", label="1000")
plt.plot(x, boot5000[2], color = "lightgreen", label="5000")
plt.legend(loc="lower right", title ="Sample Size", fontsize="14")
plt.xlabel("No. of Iterations", fontsize="18")
plt.ylabel("$B_{mass}$", fontsize="18")
plt.title("Running Average of B$_{mass}$", fontsize="20")
#B_VOC plot
plt.subplot(223)
plt.plot(x, boot100[3], color = "orange", label="100")
plt.plot(x, boot1000[3], color = "magenta", label="1000")
plt.plot(x, boot5000[3], color = "lightgreen", label="5000")
plt.legend(loc="upper right", title ="Sample Size", fontsize="14")
plt.xlabel("No. of Iterations", fontsize="18")
plt.ylabel("$B_{V_{OC}}$", fontsize="18")
plt.title("Running Average of $B_{V_{oc}}$", fontsize="20")
#B_LUMO plot
plt.subplot(224)
plt.plot(x, boot100[4], color = "orange", label="100")
plt.plot(x, boot1000[4], color = "magenta", label="1000")
plt.plot(x, boot5000[4], color = "lightgreen", label="5000")
plt.legend(loc="upper right", title ="Sample Size", fontsize="14")
plt.xlabel("No. of Iterations", fontsize="18")
plt.ylabel("B$_{LUMO}$", fontsize="18")
plt.title("Running Average of $B_{LUMO}$", fontsize="20")
In [ ]:
B0 = boot5000[1][39]
B1 = boot5000[2][39]
B2 = boot5000[3][39]
B3 = boot5000[4][39]
B4 = boot5000[5][39]