This notebook was put together by [Wesley Beckner](http://wesleybeckner.github.io/).
In [1]:
import numpy as np
import scipy.io
import scipy.optimize
from scipy import stats
import matplotlib.pyplot as plt
from matplotlib import gridspec
import pandas
#import magni
import math
from PIL import Image
#import seaborn as sns; sns.set()
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
%matplotlib inline
def myround(x, base):
return (float(base) * round(float(x)/float(base)))
params = {
'lines.markersize' : 3,
'axes.labelsize': 20,
'font.size': 20,
'legend.fontsize': 10,
'xtick.labelsize': 30,
'ytick.labelsize': 30,
'text.usetex': False,
}
#plp.rcParams.update(params)
plt.rcParams.update(params)
We would like to use Random Forests, a kind of non-parametric supervised learning algorithm, to predict the photoluminesence of a material given some AFM data
CPD light minus CPD dark is the photocurrent voltage
Maybe not include CPD at al
Focus on technique, useful demonstration
Sarah works on subsequent paper with actual application of technique
can we predict PL from things that are not difraction limited
Especially at the grain boundaries
Discuss important techniques that are oncovered by the regression
Chemical mapping microscope might correlate with PL
Data from Sarah + dark current PCAFM
Check out oakridge papers data science + afm; sergei kalinin ORNL; the oak ridge boys
In [3]:
Ht1 = np.loadtxt('Ht.txt',skiprows=0, dtype=np.float64)
Po1 = np.loadtxt('Po.txt',skiprows=0, dtype=np.float64)
Ph1 = np.loadtxt('Ph.txt',skiprows=0, dtype=np.float64)
Am1 = np.loadtxt('Am.txt',skiprows=0, dtype=np.float64)
Pl1 = np.loadtxt('Pl.txt',skiprows=0, dtype=np.float64)
fig = plt.figure(figsize=(18,8))
ht_ax = fig.add_subplot(241)
po_ax = fig.add_subplot(242)
ph_ax = fig.add_subplot(245)
am_ax = fig.add_subplot(246)
pl_ax = fig.add_subplot(122)
ht_ax.imshow(Ht1, cmap='viridis')#cmap='afmhot'
ht_ax.set_title('Height')
ht_ax.axis('off')
po_ax.imshow(Po1, cmap='viridis')#, cmap='cubehelix'
po_ax.set_title('Potential')
po_ax.axis('off')
ph_ax.imshow(Ph1, cmap='viridis')
ph_ax.set_title('Phase')
ph_ax.axis('off')
am_ax.imshow(Am1, cmap='viridis')
am_ax.set_title('Amplitude')
am_ax.axis('off')
pl_ax.imshow(Pl1, cmap='viridis')
pl_ax.set_title('Photoluminescence')
pl_ax.axis('off')
# flatten the images
Ht1_flat = Ht1.flatten()
Po1_flat = Po1.flatten()
Ph1_flat = Ph1.flatten()
Am1_flat = Am1.flatten()
Pl1_flat = Pl1.flatten()
plt.show()
In [2]:
Ht2 = np.loadtxt('MABr.1.Ht.txt',skiprows=0, dtype=np.float64)
Po2 = np.loadtxt('MABr.1.Po.txt',skiprows=0, dtype=np.float64)
Ph2 = np.loadtxt('MABr.1.Ph.txt',skiprows=0, dtype=np.float64)
Am2 = np.loadtxt('MABr.1.Am.txt',skiprows=0, dtype=np.float64)
Pl2 = np.loadtxt('MABr.1.Pl.txt',skiprows=0, dtype=np.float64)
fig = plt.figure(figsize=(18,8))
ht_ax = fig.add_subplot(241)
po_ax = fig.add_subplot(242)
ph_ax = fig.add_subplot(245)
am_ax = fig.add_subplot(246)
pl_ax = fig.add_subplot(122)
ht_ax.imshow(Ht2, cmap='viridis')#cmap='afmhot'
ht_ax.set_title('Height')
ht_ax.axis('off')
po_ax.imshow(Po2, cmap='viridis')#, cmap='cubehelix'
po_ax.set_title('Potential')
po_ax.axis('off')
ph_ax.imshow(Ph2, cmap='viridis')
ph_ax.set_title('Phase')
ph_ax.axis('off')
am_ax.imshow(Am2, cmap='viridis')
am_ax.set_title('Amplitude')
am_ax.axis('off')
pl_ax.imshow(Pl2, cmap='viridis')
pl_ax.set_title('Photoluminescence')
pl_ax.axis('off')
# flatten the images
Ht2_flat = Ht2.flatten()
Po2_flat = Po2.flatten()
Ph2_flat = Ph2.flatten()
Am2_flat = Am2.flatten()
Pl2_flat = Pl2.flatten()
plt.show()
In [3]:
X = [Ht2_flat, Po2_flat, Ph2_flat, Am2_flat]
X = np.array(X).T
Y = np.array(Pl2_flat).T
print(X.shape)
print(Y.shape)
In [4]:
clf = DecisionTreeRegressor()
Xtrain, Xtest, ytrain, ytest = train_test_split(X, Y, random_state=0)
clf = DecisionTreeRegressor(max_depth=11)
clf.fit(Xtrain, ytrain)
ypred = clf.predict(Xtest)
In [5]:
metrics.mean_squared_error(ytest, ypred)
Out[5]:
In [7]:
Xtrain = np.array([Ht2_flat[0:31625], Po2_flat[0:31625], Ph2_flat[0:31625], Am2_flat[0:31625]]).T
Xtest = np.array([Ht2_flat[31625:], Po2_flat[31625:], Ph2_flat[31625:], Am2_flat[31625:]]).T
Ytrain = np.array(Pl2_flat[0:31625])
Ytest = np.array(Pl2_flat[31625:])
clf = DecisionTreeRegressor(max_depth=11)
clf.fit(Xtrain, Ytrain)
Ypred = clf.predict(Xtest)
print(metrics.mean_squared_error(Ytest, Ypred))
x = Ht2.shape[0]
y = Ht2.shape[1]
k=0
merge = np.concatenate((Ytrain,Ypred))
Pl_predict = np.zeros((x,y))
for i in range(x):
for j in range (y):
Pl_predict[i,j] = merge[k]
k = k + 1
fig = plt.figure(figsize=(18,12))
pl_ax = fig.add_subplot(121)
pl_ax.imshow(Pl_predict, cmap='viridis')
pl_ax.set_title('Photoluminescence')
pl_ax.axis('off')
pl_ax = fig.add_subplot(122)
cax = pl_ax.imshow(Pl2, cmap='viridis')
pl_ax.set_title('Photoluminescence')
pl_ax.axis('off')
fig.colorbar(cax)
Out[7]:
In [11]:
Xtrain = np.array([Ht2_flat[0:31625], Po2_flat[0:31625], Ph2_flat[0:31625], Am2_flat[0:31625]]).T
Xtest = np.array([Ht2_flat[31625:], Po2_flat[31625:], Ph2_flat[31625:], Am2_flat[31625:]]).T
Ytrain = np.array(Pl2_flat[0:31625])
Ytest = np.array(Pl2_flat[31625:])
depths = 15
trees = 15
x = Ht2.shape[0]/2*trees
y = Ht2.shape[1]
k=0
prediction = []
scores = []
for q in range(1,depths+1):
for r in range(1,trees+1):
clf = RandomForestRegressor(max_depth=q, n_estimators=r, bootstrap=True)
clf.fit(Xtrain, Ytrain)
hold = clf.predict(Xtest)
scores.append(metrics.mean_squared_error(Ytest, hold))
prediction.append(hold)
k = k + 1
k=0
merge = (np.array(prediction).flatten())
Pl_predict = np.zeros((x,y*depths))
for l in range(depths):
for i in range(x):
for j in range (y):
Pl_predict[i,j+(l*y)] = merge[k]
k = k + 1
fig = plt.figure(figsize=(20,10))
pl_ax = fig.add_subplot(3,1,(1,2))
pl_ax.imshow(Pl_predict, cmap='viridis')#seismic
pl_ax.set_title('Photoluminescence Predictions', size=40)
pl_ax.set_ylabel('$\longleftarrow$ Trees', size=30)
pl_ax.set_xlabel('Depth $\longrightarrow$', size=30)
pl_ax.axes.get_xaxis().set_ticks([])
pl_ax.axes.get_yaxis().set_ticks([])
pl_ax2 = fig.add_subplot(4,1,4)
pl_ax2.set_title('Actual', size=40)
pl_ax2.imshow(Pl2[Pl2.shape[0]/2:,:], cmap='viridis')
pl_ax2.axes.get_xaxis().set_ticks([])
pl_ax2.axes.get_yaxis().set_ticks([])
fig.savefig(filename='afm_depth', bbox_inches='tight', dpi=900)
In [12]:
k=0
merge = (np.array(prediction).flatten())
Pl_predict = np.zeros((x,y*depths))
for l in range(depths):
for i in range(trees):
for a in range(125):
for j in range (y):
Pl_predict[a+(125*i),j+(l*y)] = scores[k]
k = k + 1
fig = plt.figure(figsize=(20,10))
fig.clf()
pl_ax = fig.add_subplot(3,1,(1,2))
cax = pl_ax.imshow(Pl_predict, interpolation='nearest', cmap='viridis', vmin=0.109, vmax=0.152)#seismic
fig.colorbar(cax)
pl_ax.set_title('Photoluminescence Prediction Error', size=40)
pl_ax.set_ylabel('$\longleftarrow$ Trees', size=30)
pl_ax.set_xlabel('Depth $\longrightarrow$', size=30)
pl_ax.axes.get_xaxis().set_ticks([])
pl_ax.axes.get_yaxis().set_ticks([])
fig.savefig(filename='afm_depth_score', bbox_inches='tight')
In [12]:
x = Ht2.shape[0]/2*trees
y = Ht2.shape[1]
k=0
merge = (np.array(prediction).flatten())
Pl_predict = np.zeros((x,y*depths))
for l in range(depths):
for i in range(x):
for j in range (y):
Pl_predict[i,j+(l*y)] = merge[k]
k = k + 1
fig = plt.figure(figsize=(30,10))
pl_ax = fig.add_subplot(6,4,(1,16))
pl_ax.imshow(Pl_predict, cmap='seismic')
pl_ax.set_title('Photoluminescence Predictions', size=40)
pl_ax.set_ylabel('$\longleftarrow$ Trees', size=30)
pl_ax.set_xlabel('Depth $\longrightarrow$', size=30)
pl_ax.axes.get_xaxis().set_ticks([])
pl_ax.axes.get_yaxis().set_ticks([])
pl_ax2 = fig.add_subplot(6,4,(21,24))
pl_ax2.set_title('Photoluminescence Actual', size=40)
pl_ax2.imshow(Pl2[Pl2.shape[0]/2:,:], cmap='seismic')
pl_ax2.axes.get_xaxis().set_ticks([])
pl_ax2.axes.get_yaxis().set_ticks([])
fig.savefig(filename='afm_depth', bbox_inches='tight', dpi=300)
In [8]:
###User specified parameters
inputs = [Ht2, Po2, Ph2, Am2]
x7x7 = [-3, -2, -1, 0, 1, 2, 3]
x5x5 = [-2, -1, 0, 1, 2]
x3x3 = [-1, 0, 1]
scores = [0.11, 0.108, 0.105]
stuff = [x3x3, x5x5, x7x7]
morestuff = ['3x3', '5x5', '7x7']
depths = 1
trees = 1
###Create training and testing arrays
x = Po2.shape[0]/2
x2 = Po2.shape[0]
y = Po2.shape[1]
fig = plt.figure(figsize=(10,10))
for wes in range(3):
pixelContext = stuff[wes]
Pl_predict = np.load('%s.npy' %(morestuff[wes]))
if wes == 2:
pl_ax.text(-130,-50,'Predictions (array, error)', size=30)#.set_position([.5, 1.2])
pl_ax.text(-130,280, 'Larger feature vector, lower error $\longrightarrow$')
pl_ax = fig.add_subplot(1,4,(wes+1))
pl_ax.imshow(Pl_predict.T, cmap='viridis')
#pl_ax.set_title('%s Feature Vector, score: %s' %(morestuff[wes],scores[wes]), size=24)
#pl_ax.set_ylabel('$\longleftarrow$ Trees', size=30)
#pl_ax.set_xlabel('Depth $\longrightarrow$', size=30)
pl_ax.axes.get_xaxis().set_ticks([])
pl_ax.axes.get_yaxis().set_ticks([])
pl_ax.set_title('%s, %s' %(morestuff[wes],scores[wes]), size=24)
pl_ax2 = fig.add_subplot(1,4,4)
pl_ax2.set_title('Actual', size=30).set_position([.5, 1.1])
pl_ax2.imshow(Pl2[Pl2.shape[0]/2:,:].T, cmap='viridis')
pl_ax2.axes.get_xaxis().set_ticks([])
pl_ax2.axes.get_yaxis().set_ticks([])
fig.subplots_adjust(left=None, bottom=None, right=None, top=None,\
wspace=None, hspace=None)
fig.savefig(filename='vector_variation_small_multiple', bbox_inches='tight')
In [9]:
###User specified parameters
inputs = [Ht2, Po2, Ph2, Am2]
x7x7 = [-3, -2, -1, 0, 1, 2, 3]
x5x5 = [-2, -1, 0, 1, 2]
x3x3 = [-1, 0, 1]
stuff = [x3x3, x5x5, x7x7]
morestuff = ['3x3', '5x5', '7x7']
depths = 1
trees = 1
###Create training and testing arrays
x = Po2.shape[0]/2
x2 = Po2.shape[0]
y = Po2.shape[1]
fig = plt.figure(figsize=(10,10))
for wes in range(3):
pixelContext = stuff[wes]
Xtrain = np.zeros(((y-(max(pixelContext)*2))*(x-max(pixelContext)),(len(pixelContext)*len(pixelContext)\
*len(inputs))))
k=0
for p in range(max(pixelContext),x):
for q in range(max(pixelContext),y-max(pixelContext)):
j=0
for h, i in enumerate(inputs):
for l in pixelContext:
for m in pixelContext:
Xtrain[k,j]=i[(p+l),(q+m)]
j=j+1
k = k + 1
Xtest = np.zeros(((y-(max(pixelContext)*2))*(x-max(pixelContext)),(len(pixelContext)*len(pixelContext)\
*len(inputs))))
k=0
for p in range(x,x2-max(pixelContext)):
for q in range(max(pixelContext),y-max(pixelContext)):
j=0
for h, i in enumerate(inputs):
for l in pixelContext:
for m in pixelContext:
Xtest[k,j]=i[(p+l),(q+m)]
j=j+1
k = k + 1
Ytrain = np.zeros(((y-(max(pixelContext)*2))*(x-max(pixelContext))))
k=0
for p in range(max(pixelContext),x):
for q in range(max(pixelContext),y-max(pixelContext)):
Ytrain[k]=Pl2[p,q]
k = k + 1
Ytest = np.zeros(((y-(max(pixelContext)*2))*(x-max(pixelContext))))
k=0
for p in range(x,x2-max(pixelContext)):
for q in range(max(pixelContext),y-max(pixelContext)):
Ytest[k]=Pl2[p,q]
k = k + 1
###Run Algorithm
k=0
prediction = []
q=5
n=50
clf = RandomForestRegressor(max_depth=q, n_estimators=n, bootstrap=True)
clf.fit(Xtrain, Ytrain)
hold = clf.predict(Xtest)
score = metrics.mean_squared_error(Ytest, hold)
roundscore = myround(score, 0.001)
print(roundscore)
prediction.append(hold)
k = k + 1
k=0
merge = (np.array(prediction).flatten())
Pl_predict = np.zeros(((x-max(pixelContext))*trees,(y-(max(pixelContext)*2))*depths))
for l in range(depths):
for i in range((x-max(pixelContext))*trees):
for j in range (y-(max(pixelContext)*2)):
Pl_predict[i,j+(l*(y-(max(pixelContext)*2)))] = merge[k]
k = k + 1
np.save('%s' %(morestuff[wes]),Pl_predict)
if wes == 2:
pl_ax.text(-130,-50,'Predictions (array, error)', size=30)#.set_position([.5, 1.2])
pl_ax.text(-130,280, 'Larger feature vector, lower error $\longrightarrow$')
pl_ax = fig.add_subplot(1,4,(wes+1))
pl_ax.imshow(Pl_predict.T, cmap='viridis')
#pl_ax.set_title('%s Feature Vector, score: %s' %(morestuff[wes],scores[wes]), size=24)
#pl_ax.set_ylabel('$\longleftarrow$ Trees', size=30)
#pl_ax.set_xlabel('Depth $\longrightarrow$', size=30)
pl_ax.axes.get_xaxis().set_ticks([])
pl_ax.axes.get_yaxis().set_ticks([])
pl_ax.set_title('%s, %s' %(morestuff[wes],roundscore), size=24)
pl_ax2 = fig.add_subplot(1,4,4)
pl_ax2.set_title('Actual', size=30).set_position([.5, 1.1])
pl_ax2.imshow(Pl2[Pl2.shape[0]/2:,:].T, cmap='viridis')
pl_ax2.axes.get_xaxis().set_ticks([])
pl_ax2.axes.get_yaxis().set_ticks([])
fig.subplots_adjust(left=None, bottom=None, right=None, top=None,\
wspace=None, hspace=None)
fig.savefig(filename='vector_variation_MABr', bbox_inches='tight')
In [9]:
###User specified parameters
inputs = [Ht2, Po2, Ph2, Am2]
x7x7 = [-3, -2, -1, 0, 1, 2, 3]
x5x5 = [-2, -1, 0, 1, 2]
x3x3 = [-1, 0, 1]
stuff = [x3x3, x5x5, x7x7]
morestuff = ['3x3', '5x5', '7x7']
depths = 1
trees = 1
###Create training and testing arrays
x = Po2.shape[0]/2
x2 = Po2.shape[0]
y = Po2.shape[1]
fig = plt.figure(figsize=(10,10))
for wes in range(3):
pixelContext = stuff[wes]
###FOR FIRST ALGORITHM
Xtrain = np.zeros(((y-(max(pixelContext)*2))*(x-max(pixelContext)),(len(pixelContext)*len(pixelContext)\
*len(inputs))))
k=0
for p in range(max(pixelContext),x):
for q in range(max(pixelContext),y-max(pixelContext)):
j=0
for h, i in enumerate(inputs):
for l in pixelContext:
for m in pixelContext:
Xtrain[k,j]=i[(p+l),(q+m)]
j=j+1
k = k + 1
Xtest = np.zeros(((y-(max(pixelContext)*2))*(x-max(pixelContext)),(len(pixelContext)*len(pixelContext)\
*len(inputs))))
k=0
for p in range(x,x2-max(pixelContext)):
for q in range(max(pixelContext),y-max(pixelContext)):
j=0
for h, i in enumerate(inputs):
for l in pixelContext:
for m in pixelContext:
Xtest[k,j]=i[(p+l),(q+m)]
j=j+1
k = k + 1
Ytrain = np.zeros(((y-(max(pixelContext)*2))*(x-max(pixelContext))))
k=0
for p in range(max(pixelContext),x):
for q in range(max(pixelContext),y-max(pixelContext)):
Ytrain[k]=Pl2[p,q]
k = k + 1
Ytest = np.zeros(((y-(max(pixelContext)*2))*(x-max(pixelContext))))
k=0
for p in range(x,x2-max(pixelContext)):
for q in range(max(pixelContext),y-max(pixelContext)):
Ytest[k]=Pl2[p,q]
k = k + 1
###FOR SECOND ALGORITHM
Xtrain2 = np.zeros(((y-(max(pixelContext)*2))*(x-max(pixelContext)),(len(pixelContext)*len(pixelContext)\
*len(inputs))))
k=0
for p in range(max(pixelContext),x):
for q in range(max(pixelContext),y-max(pixelContext)):
j=0
for h, i in enumerate(inputs):
for l in pixelContext:
for m in pixelContext:
Xtrain2[k,j]=i[(p+l),(q+m)]
j=j+1
k = k + 1
Xtest2 = np.zeros(((y-(max(pixelContext)*2))*(x-max(pixelContext)),(len(pixelContext)*len(pixelContext)\
*len(inputs))))
k=0
for p in range(x,x2-max(pixelContext)):
for q in range(max(pixelContext),y-max(pixelContext)):
j=0
for h, i in enumerate(inputs):
for l in pixelContext:
for m in pixelContext:
Xtest2[k,j]=i[(p+l),(q+m)]
j=j+1
k = k + 1
Ytrain2 = np.zeros(((y-(max(pixelContext)*2))*(x-max(pixelContext))))
k=0
for p in range(max(pixelContext),x):
for q in range(max(pixelContext),y-max(pixelContext)):
Ytrain2[k]=Pl2[p,q]
k = k + 1
Ytest2 = np.zeros(((y-(max(pixelContext)*2))*(x-max(pixelContext))))
k=0
for p in range(x,x2-max(pixelContext)):
for q in range(max(pixelContext),y-max(pixelContext)):
Ytest2[k]=Pl2[p,q]
k = k + 1
###Run Algorithm
k=0
prediction = []
q=5
n=50
clf = RandomForestRegressor(max_depth=q, n_estimators=n, bootstrap=True)
clf.fit(Xtrain, Ytrain)
hold = clf.predict(Xtest)
score = metrics.mean_squared_error(Ytest, hold)
roundscore = myround(score, 0.001)
print(roundscore)
prediction.append(hold)
k = k + 1
k=0
merge = (np.array(prediction).flatten())
Pl_predict = np.zeros(((x-max(pixelContext))*trees,(y-(max(pixelContext)*2))*depths))
for l in range(depths):
for i in range((x-max(pixelContext))*trees):
for j in range (y-(max(pixelContext)*2)):
Pl_predict[i,j+(l*(y-(max(pixelContext)*2)))] = merge[k]
k = k + 1
np.save('%s' %(morestuff[wes]),Pl_predict)
if wes == 2:
pl_ax.text(-130,-50,'Predictions (array, error)', size=30)#.set_position([.5, 1.2])
pl_ax.text(-130,280, 'Larger feature vector, lower error $\longrightarrow$')
pl_ax = fig.add_subplot(1,4,(wes+1))
pl_ax.imshow(Pl_predict.T, cmap='viridis')
#pl_ax.set_title('%s Feature Vector, score: %s' %(morestuff[wes],scores[wes]), size=24)
#pl_ax.set_ylabel('$\longleftarrow$ Trees', size=30)
#pl_ax.set_xlabel('Depth $\longrightarrow$', size=30)
pl_ax.axes.get_xaxis().set_ticks([])
pl_ax.axes.get_yaxis().set_ticks([])
pl_ax.set_title('%s, %s' %(morestuff[wes],roundscore), size=24)
pl_ax2 = fig.add_subplot(1,4,4)
pl_ax2.set_title('Actual', size=30).set_position([.5, 1.1])
pl_ax2.imshow(Pl2[Pl2.shape[0]/2:,:].T, cmap='viridis')
pl_ax2.axes.get_xaxis().set_ticks([])
pl_ax2.axes.get_yaxis().set_ticks([])
fig.subplots_adjust(left=None, bottom=None, right=None, top=None,\
wspace=None, hspace=None)
fig.savefig(filename='vector_variation_MABr', bbox_inches='tight')
In [14]:
###User specified parameters
inputs = [Ht2, Po2, Ph2, Am2]
x7x7 = [-3, -2, -1, 0, 1, 2, 3]
x5x5 = [-2, -1, 0, 1, 2]
x3x3 = [-1, 0, 1]
x1x1 = [0]
pixelContext = x1x1
depths = 1
trees = 1
###Create training and testing arrays
x = Po2.shape[0]/2
x2 = Po2.shape[0]
y = Po2.shape[1]
Xtrain = np.zeros(((y-(max(pixelContext)*2))*(x-max(pixelContext)),(len(pixelContext)*len(pixelContext)\
*len(inputs))))
k=0
for p in range(max(pixelContext),x):
for q in range(max(pixelContext),y-max(pixelContext)):
j=0
for h, i in enumerate(inputs):
for l in pixelContext:
for m in pixelContext:
Xtrain[k,j]=i[(p+l),(q+m)]
j=j+1
k = k + 1
Xtest = np.zeros(((y-(max(pixelContext)*2))*(x-max(pixelContext)),(len(pixelContext)*len(pixelContext)\
*len(inputs))))
k=0
for p in range(x,x2-max(pixelContext)):
for q in range(max(pixelContext),y-max(pixelContext)):
j=0
for h, i in enumerate(inputs):
for l in pixelContext:
for m in pixelContext:
Xtest[k,j]=i[(p+l),(q+m)]
j=j+1
k = k + 1
Ytrain = np.zeros(((y-(max(pixelContext)*2))*(x-max(pixelContext))))
k=0
for p in range(max(pixelContext),x):
for q in range(max(pixelContext),y-max(pixelContext)):
Ytrain[k]=Pl2[p,q]
k = k + 1
Ytest = np.zeros(((y-(max(pixelContext)*2))*(x-max(pixelContext))))
k=0
for p in range(x,x2-max(pixelContext)):
for q in range(max(pixelContext),y-max(pixelContext)):
Ytest[k]=Pl2[p,q]
k = k + 1
###Run Algorithm
k=0
prediction = []
q=15
n=15
clf = RandomForestRegressor(max_depth=q, n_estimators=n, bootstrap=True)
clf.fit(Xtrain, Ytrain)
hold = clf.predict(Xtest)
print(metrics.mean_squared_error(Ytest, hold))
prediction.append(hold)
k = k + 1
k=0
merge = (np.array(prediction).flatten())
Pl_predict = np.zeros(((x-max(pixelContext))*trees,(y-(max(pixelContext)*2))*depths))
for l in range(depths):
for i in range((x-max(pixelContext))*trees):
for j in range (y-(max(pixelContext)*2)):
Pl_predict[i,j+(l*(y-(max(pixelContext)*2)))] = merge[k]
k = k + 1
fig = plt.figure(figsize=(30,10))
pl_ax = fig.add_subplot(1,2,1)
pl_ax.imshow(Pl_predict, cmap='viridis')
pl_ax.set_title('Photoluminescence Predictions, 7x7 Pixel Contextualization', size=40)
#pl_ax.set_ylabel('$\longleftarrow$ Trees', size=30)
#pl_ax.set_xlabel('Depth $\longrightarrow$', size=30)
pl_ax.axes.get_xaxis().set_ticks([])
pl_ax.axes.get_yaxis().set_ticks([])
pl_ax2 = fig.add_subplot(1,2,2)
pl_ax2.set_title('Photoluminescence Actual', size=40)
pl_ax2.imshow(Pl2[Pl2.shape[0]/2:,:], cmap='viridis')
pl_ax2.axes.get_xaxis().set_ticks([])
pl_ax2.axes.get_yaxis().set_ticks([])
fig.savefig(filename='1x1', bbox_inches='tight', dpi=300)
In [65]:
pixelContext = [-2, -1, 0, 1, 2]
range(pixelContext)
In [179]:
x = Po2.shape[0]/2
x2 = Po2.shape[0]
y = Po2.shape[1]
Xtest = np.zeros(((y-2)*(x-1),9))
k=0
for p in range(x,x2-1):
for q in range(1,y-1):
Xtest[k,0]=Po2[p-1,q-1]
Xtest[k,1]=Po2[p-1,q]
Xtest[k,2]=Po2[p-1,q+1]
Xtest[k,3]=Po2[p,q-1]
Xtest[k,4]=Po2[p,q]
Xtest[k,5]=Po2[p,q+1]
Xtest[k,6]=Po2[p+1,q-1]
Xtest[k,7]=Po2[p+1,q]
Xtest[k,8]=Po2[p+1,q+1]
k = k + 1
In [155]:
x = Po2.shape[0]/2
x2 = Po2.shape[0]
y = Po2.shape[1]
Xtrain = np.zeros(((y-2)*(x-1),9))
k=0
for p in range(1,x):
for q in range(1,y-1):
Xtrain[k,0]=Po2[p-1,q-1]
Xtrain[k,1]=Po2[p-1,q]
Xtrain[k,2]=Po2[p-1,q+1]
Xtrain[k,3]=Po2[p,q-1]
Xtrain[k,4]=Po2[p,q]
Xtrain[k,5]=Po2[p,q+1]
Xtrain[k,6]=Po2[p+1,q-1]
Xtrain[k,7]=Po2[p+1,q]
Xtrain[k,8]=Po2[p+1,q+1]
k = k + 1
In [189]:
1255/5
Out[189]:
In [40]:
splitx = len(Ht2[:,0])/2
splity = len(Ht2[0,:])/2
###set X train/test data
sets = [Ht2, Po2, Ph2, Am2]
for p, q in enumerate(sets): #select top left quadrant for testing
iarray = q
x = iarray.shape[0]/2
y = iarray.shape[1]/2
k=0
test = np.zeros((x*y))
for i in range(0,x):
for j in range (0,y):
test[k] = iarray[i,j]
k = k + 1
sets[p] = test
Xtest = np.array(sets).T
sets = [Ht2, Po2, Ph2, Am2]
for p, q in enumerate(sets): #select top right and bottom half quadrants for training
iarray = q
xstart = iarray.shape[0]/2+1 #don't overlap with test set
xend = iarray.shape[0]
y = iarray.shape[1]/2
k=0
test = np.zeros((x*y*3))
for i in range(xstart,xend):
for j in range (0,y):
test[k] = iarray[i,j]
k = k + 1
x = iarray.shape[0]
ystart = iarray.shape[1]/2+1 #don't overlap with test set
yend = iarray.shape[1]
for i in range(x):
for j in range(ystart,yend):
test[k] = iarray[i,j]
k = k + 1
sets[p] = test
Xtrain = np.array(sets).T
###Set Y train/test data
sets = [Pl2]
for p, q in enumerate(sets): #select top left quadrant for testing
iarray = q
x = iarray.shape[0]/2
y = iarray.shape[1]/2
k=0
test = np.zeros((x*y))
for i in range(0,x):
for j in range (0,y):
test[k] = iarray[i,j]
k = k + 1
sets[p] = test
Ytest = np.array(sets)
sets = [Pl2]
for p, q in enumerate(sets): #select top right and bottom half quadrants for training
iarray = q
xstart = iarray.shape[0]/2+1 #don't overlap with test set
xend = iarray.shape[0]
y = iarray.shape[1]/2
k=0
test = np.zeros((x*y*3))
for i in range(xstart,xend):
for j in range (0,y):
test[k] = iarray[i,j]
k = k + 1
x = iarray.shape[0]
ystart = iarray.shape[1]/2+1 #don't overlap with test set
yend = iarray.shape[1]
for i in range(x):
for j in range(ystart,yend):
test[k] = iarray[i,j]
k = k + 1
sets[p] = test
Ytrain = np.array(sets)
clf = DecisionTreeRegressor(max_depth=11)
clf.fit(Xtrain, Ytrain)
Ypred = clf.predict(Xtest)
print(metrics.mean_squared_error(Ytest, Ypred))
In [42]:
sets = [Ht2, Po2, Ph2, Am2]
for p, q in enumerate(sets): #select top right and bottom half quadrants for training
iarray = q
xstart = iarray.shape[0]/2+1 #don't overlap with test set
xend = iarray.shape[0]
y = iarray.shape[1]/2
k=0
test = np.zeros((x*y*3))
for i in range(xstart,xend):
for j in range (0,y):
test[k] = iarray[i,j]
k = k + 1
x = iarray.shape[0]
ystart = iarray.shape[1]/2+1 #don't overlap with test set
yend = iarray.shape[1]
for i in range(x):
for j in range(ystart,yend):
test[k] = iarray[i,j]
k = k + 1
sets[p] = test
Xtrain = np.array(sets).T
Xtrain
Out[42]:
In [37]:
Xtrain = np.array([Ht2_flat[0:31625], Po2_flat[0:31625], Ph2_flat[0:31625], Am2_flat[0:31625]]).T
Xtest = np.array([Ht2_flat[31625:], Po2_flat[31625:], Ph2_flat[31625:], Am2_flat[31625:]]).T
Ytrain = np.array(Pl2_flat[0:31625])
Ytest = np.array(Pl2_flat[31625:])
clf = DecisionTreeRegressor(max_depth=11)
clf.fit(Xtrain, Ytrain)
Ypred = clf.predict(Xtest)
print(metrics.mean_squared_error(Ytest, Ypred))
x = Ht2.shape[0]
y = Ht2.shape[1]
k=0
merge = np.concatenate((Ytrain,Ypred))
Pl_predict = np.zeros((x,y))
for i in range(x):
for j in range (y):
Pl_predict[i,j] = merge[k]
k = k + 1
fig = plt.figure(figsize=(18,12))
pl_ax = fig.add_subplot(121)
pl_ax.imshow(Pl_predict, cmap='viridis')
pl_ax.set_title('Photoluminescence')
pl_ax.axis('off')
pl_ax = fig.add_subplot(122)
pl_ax.imshow(Pl2, cmap='viridis')
pl_ax.set_title('Photoluminescence')
pl_ax.axis('off')
Out[37]:
In [13]:
x = Po2.shape[0]/2
x2 = Po2.shape[0]
y = Po2.shape[1]
Xtrain = np.zeros(((y-2)*(x-1),36))
k=0
for p in range(1,x):
for q in range(1,y-1):
Xtrain[k,0]=Ht2[p-1,q-1]
Xtrain[k,1]=Ht2[p-1,q]
Xtrain[k,2]=Ht2[p-1,q+1]
Xtrain[k,3]=Ht2[p,q-1]
Xtrain[k,4]=Ht2[p,q]
Xtrain[k,5]=Ht2[p,q+1]
Xtrain[k,6]=Ht2[p+1,q-1]
Xtrain[k,7]=Ht2[p+1,q]
Xtrain[k,8]=Ht2[p+1,q+1]
Xtrain[k,9]=Po2[p-1,q-1]
Xtrain[k,10]=Po2[p-1,q]
Xtrain[k,11]=Po2[p-1,q+1]
Xtrain[k,12]=Po2[p,q-1]
Xtrain[k,13]=Po2[p,q]
Xtrain[k,14]=Po2[p,q+1]
Xtrain[k,15]=Po2[p+1,q-1]
Xtrain[k,16]=Po2[p+1,q]
Xtrain[k,17]=Po2[p+1,q+1]
Xtrain[k,18]=Ph2[p-1,q-1]
Xtrain[k,19]=Ph2[p-1,q]
Xtrain[k,20]=Ph2[p-1,q+1]
Xtrain[k,21]=Ph2[p,q-1]
Xtrain[k,22]=Ph2[p,q]
Xtrain[k,23]=Ph2[p,q+1]
Xtrain[k,24]=Ph2[p+1,q-1]
Xtrain[k,25]=Ph2[p+1,q]
Xtrain[k,26]=Ph2[p+1,q+1]
Xtrain[k,27]=Am2[p-1,q-1]
Xtrain[k,28]=Am2[p-1,q]
Xtrain[k,29]=Am2[p-1,q+1]
Xtrain[k,30]=Am2[p,q-1]
Xtrain[k,31]=Am2[p,q]
Xtrain[k,32]=Am2[p,q+1]
Xtrain[k,33]=Am2[p+1,q-1]
Xtrain[k,34]=Am2[p+1,q]
Xtrain[k,35]=Am2[p+1,q+1]
k = k + 1
Xtest = np.zeros(((y-2)*(x-1),36))
k=0
for p in range(x,x2-1):
for q in range(1,y-1):
Xtest[k,0]=Ht2[p-1,q-1]
Xtest[k,1]=Ht2[p-1,q]
Xtest[k,2]=Ht2[p-1,q+1]
Xtest[k,3]=Ht2[p,q-1]
Xtest[k,4]=Ht2[p,q]
Xtest[k,5]=Ht2[p,q+1]
Xtest[k,6]=Ht2[p+1,q-1]
Xtest[k,7]=Ht2[p+1,q]
Xtest[k,8]=Ht2[p+1,q+1]
Xtest[k,9]=Po2[p-1,q-1]
Xtest[k,10]=Po2[p-1,q]
Xtest[k,11]=Po2[p-1,q+1]
Xtest[k,12]=Po2[p,q-1]
Xtest[k,13]=Po2[p,q]
Xtest[k,14]=Po2[p,q+1]
Xtest[k,15]=Po2[p+1,q-1]
Xtest[k,16]=Po2[p+1,q]
Xtest[k,17]=Po2[p+1,q+1]
Xtest[k,18]=Ph2[p-1,q-1]
Xtest[k,19]=Ph2[p-1,q]
Xtest[k,20]=Ph2[p-1,q+1]
Xtest[k,21]=Ph2[p,q-1]
Xtest[k,22]=Ph2[p,q]
Xtest[k,23]=Ph2[p,q+1]
Xtest[k,24]=Ph2[p+1,q-1]
Xtest[k,25]=Ph2[p+1,q]
Xtest[k,26]=Ph2[p+1,q+1]
Xtest[k,27]=Am2[p-1,q-1]
Xtest[k,28]=Am2[p-1,q]
Xtest[k,29]=Am2[p-1,q+1]
Xtest[k,30]=Am2[p,q-1]
Xtest[k,31]=Am2[p,q]
Xtest[k,32]=Am2[p,q+1]
Xtest[k,33]=Am2[p+1,q-1]
Xtest[k,34]=Am2[p+1,q]
Xtest[k,35]=Am2[p+1,q+1]
k = k + 1
Ytrain = np.zeros(((y-2)*(x-1)))
k=0
for p in range(1,x):
for q in range(1,y-1):
Ytrain[k]=Pl2[p,q]
k = k + 1
Ytest = np.zeros(((y-2)*(x-1)))
k=0
for p in range(x,x2-1):
for q in range(1,y-1):
Ytest[k]=Pl2[p,q]
k = k + 1
depths = 20
trees = 10
k=0
prediction = []
for q in range(1,depths+1):
for r in range(1,trees+1):
clf = RandomForestRegressor(max_depth=q, n_estimators=r, bootstrap=True)
clf.fit(Xtrain, Ytrain)
hold = clf.predict(Xtest)
print(metrics.mean_squared_error(Ytest, hold))
prediction.append(hold)
k = k + 1
k=0
merge = (np.array(prediction).flatten())
Pl_predict = np.zeros(((x-1)*trees,(y-2)*depths))
for l in range(depths):
for i in range((x-1)*trees):
for j in range (y-2):
Pl_predict[i,j+(l*(y-2))] = merge[k]
k = k + 1
fig = plt.figure(figsize=(30,10))
pl_ax = fig.add_subplot(1,3,(1,2))
pl_ax.imshow(Pl_predict, cmap='viridis')
pl_ax.set_title('Photoluminescence Predictions, 3x3 Pixel Contextualization', size=40)
pl_ax.set_ylabel('$\longleftarrow$ Trees', size=30)
pl_ax.set_xlabel('Depth $\longrightarrow$', size=30)
pl_ax.axes.get_xaxis().set_ticks([])
pl_ax.axes.get_yaxis().set_ticks([])
pl_ax2 = fig.add_subplot(1,3,3)
pl_ax2.set_title('Photoluminescence Actual', size=40)
pl_ax2.imshow(Pl2[Pl2.shape[0]/2:,:], cmap='viridis')
pl_ax2.axes.get_xaxis().set_ticks([])
pl_ax2.axes.get_yaxis().set_ticks([])
fig.savefig(filename='afm_depth', bbox_inches='tight')
In [9]:
iarray = Ht2
x = Ht2.shape[0]
y = Ht2.shape[1]
k=0
test = np.zeros((x-2,y-2))
for i in range(1,x-1):
for j in range (1,y-1):
test[k,1] = iarray[i-1,j-1]
test[k,1] = iarray[i-1,j-1]
test[k,1] = iarray[i-1,j-1]
test[k,1] = iarray[i-1,j-1]
k = k + 1
In [9]:
from sklearn.ensemble import RandomForestRegressor
from scipy.stats import pearsonr
pearson_coefficient = pearsonr(Ht_flat, Po_flat)
print 'Pearson Coefficent = {:.3f}'.format(pearson_coefficient[0])
fig = plt.figure(figsize=(10,8))
gs = gridspec.GridSpec(1, 1)
ax = plt.subplot(gs[0, 0])
ax.scatter(Ht_flat, Po_flat)
ax.set_title('Perovskite Data')
ax.set_xlabel('Height')
ax.set_ylabel('Potential')
plt.show()
In [33]:
from sklearn import preprocessing
height_flat_norm = preprocessing.robust_scale(Ht1_flat)
potential_flat_norm = preprocessing.robust_scale(Po1_flat)
fig = plt.figure(figsize=(10,8))
gs = gridspec.GridSpec(1, 1)
ax = plt.subplot(gs[0, 0])
ax.scatter(height_flat_norm, potential_flat_norm)
ax.set_title('Perovskite Data')
ax.set_xlabel('Height')
ax.set_ylabel('Potential')
plt.show()
In [1]:
#!/usr/bin/python
#
# Meg Drouhard
# Hierarchical Agglomerative Clustering (HAC) with SciPy for CEI data
# 2/2016
import numpy as np
import scipy.cluster.hierarchy as hac
import scipy.spatial.distance as dist
import pylab
import argparse
import sys
def buildLinkageMatrix(dataMatrix, distanceMeasure):
# calculate distance
distanceMatrix = dist.pdist(dataMatrix, distanceMeasure)
distanceSquareMatrix = dist.squareform(distanceMatrix)
linkageMatrix = hac.linkage(distanceSquareMatrix)
return linkageMatrix
def saveDendrogram(linkageMatrix, label):
dendrogramName = 'Dendrogram-' + label + '.png'
# Create and save dendrogram image
dataDendrogram = hac.dendrogram(linkageMatrix)
pylab.savefig(dendrogramName)
def printStats(linkageMatrix):
# Calculate cluster statistics
copheneticMatrix = hac.cophenet(linkageMatrix)
copheneticMedian = np.median(copheneticMatrix)
inconsistencyMatrix = hac.inconsistent(linkageMatrix)
maxInconsistencyMatrix = hac.maxinconsts(linkageMatrix, inconsistencyMatrix)
maxInconsistencyMedian = np.median(maxInconsistencyMatrix)
maxdistsMatrix = hac.maxdists(linkageMatrix)
maxdistsMedian = np.median(maxdistsMatrix)
# print stats
#Cophenetic distance = intergroup dissimilarity
print "Median Cophenetic distance: " + str(copheneticMedian)
print "Median Maximum Inconsistency: " + str(maxInconsistencyMedian)
print "Median MaxDist: " + str(maxdistsMedian)
###############################################################################
# Parse arguments
parser = argparse.ArgumentParser(description='Cluster weighted text hierarchically.')
parser.add_argument('-i',
'--input-file',
dest='inputfile',
required=True,
help='input file name')
parser.add_argument('-l',
'--label',
dest='inputlabel',
required=False,
help='label for output files')
args = parser.parse_args()
# setup the path argument if provided
if args.inputfile:
fname = args.inputfile
else:
sys.exit('Error: missing input file.')
label = ''
if args.inputlabel:
label = args.inputlabel
with open(fname, 'r') as f:
# Load data
data = np.loadtxt(f,skiprows=0)
# reshape as 256x256 matrix
dataMatrix = np.reshape(data, (256,256))
print dataMatrix.shape
# Plot initial data as image
pylab.imshow(dataMatrix)
pylab.show()
# Build linkage matrices for clustering
# Euclidean distance
euclideanLinkageMatrix = buildLinkageMatrix(dataMatrix, 'euclidean')
# Cosine distance
cosineLinkageMatrix = buildLinkageMatrix(dataMatrix, 'cosine')
# Hamming distance
hammingLinkageMatrix = buildLinkageMatrix(dataMatrix, 'hamming')
# Jaccard distance
jaccardLinkageMatrix = buildLinkageMatrix(dataMatrix, 'jaccard')
# save dendrograms
saveDendrogram(euclideanLinkageMatrix, label + '-Euclidean')
saveDendrogram(cosineLinkageMatrix, label + '-Cosine')
saveDendrogram(hammingLinkageMatrix, label + '-Hamming')
saveDendrogram(jaccardLinkageMatrix, label +'-Jaccard')
# Calculate and print cluster statistics
print "Cluster Statistics:"
print "Eudclidean Distance:"
printStats(euclideanLinkageMatrix)
print ""
print "Cosine Distance:"
printStats(cosineLinkageMatrix)
print ""
print "Hamming Distance:"
printStats(hammingLinkageMatrix)
print ""
print "Jaccard Distance:"
printStats(jaccardLinkageMatrix)
In [ ]: