Test that I am extracting the parameters correctly


In [ ]:
out_test = r_mlp.predict(x3)
out_test = scaler_y.inverse_transform(out_test)
w1 = r_mlp.get_parameters()[0].weights
w2 = r_mlp.get_parameters()[1].weights
w3 = r_mlp.get_parameters()[2].weights
b1 = r_mlp.get_parameters()[0].biases
b2 = r_mlp.get_parameters()[1].biases
b3 = r_mlp.get_parameters()[2].biases

xscale_min = scaler_x.data_min_
xscale_max = scaler_x.data_max_
yscale_absmax = scaler_y.max_abs_

out_test_check = np.dot(x3,w1) + b1
out_test_check[out_test_check<0] = 0
out_test_check = np.dot(out_test_check,w2) + b2
out_test_check[out_test_check<0] = 0
out_test_check = np.dot(out_test_check,w3) + b3

out_test_check = out_test_check*yscale_absmax
ptl.plot(out_test-out_test_check)
plt.show()

Check that I understand what classification is doing


In [ ]:
c_w1=c_mlp.get_parameters()[0].weights
c_w2=c_mlp.get_parameters()[1].weights
c_w3=c_mlp.get_parameters()[2].weights
c_b1=c_mlp.get_parameters()[0].biases
c_b2=c_mlp.get_parameters()[1].biases
c_b3=c_mlp.get_parameters()[2].biases

out_test_check = np.dot(x1            ,c_w1) + c_b1
out_test_check[out_test_check<0] = 0
out_test_check = np.dot(out_test_check,c_w2) + c_b2
out_test_check[out_test_check<0] = 0
out_test_check = np.dot(out_test_check,c_w3) + c_b3

expo =  np.exp(out_test_check)
expos = np.sum(expo,axis=1)

#foo=np.empty((x1.shape[0], 2))
foo[:,0] = expo[:,0]/expos
foo[:,1] = expo[:,1]/expos
ff=np.zeros(x1.shape[0])
ff[foo[:,1]>0.5]=1.
print(x1.shape)
ee=c_mlp.predict(x1)
ee=np.squeeze(ee)
print(ee.shape)
print(ff.shape)
print(np.sum(np.logical_and(ff==0,ee==1)))
print(np.sum(np.logical_and(ff==1,ee==0)))

rexpo = np.exp(out_test_check[:,1])

Plot (1-d) histograms at each input and output level


In [ ]:
plt.figure(figsize=(8,40))
_,ax = plt.subplots(lev.size,2,sharex=True)
for i in range(lev.size):
    step=.05
    bins=np.arange(-1,1+step,step)
    n,bins,_ =ax[i,0].hist(unpack(x_train_norm,'T')[:,i],bins=bins,facecolor='yellow',alpha=0.5,normed=True)
    n2,bins2,_=ax[i,1].hist(unpack(y_train_norm,'T')[:,i],bins=bins,facecolor='blue'  ,alpha=0.5,normed=True)


    ax[i,0].set_xlim((-1,1))
    ax[i,0].set_ylim(0,np.amax(n))
    ax[i,1].set_ylim(0,np.amax(n2))


    #ax[i,0].set_ylim([-1,1])
    print(np.amax(n))
    print(np.amax(n2))
    #ax[i,1].hist(unpack(x_train_norm,'q')[:,i]*step,bins=np.arange(-1,1+step,step),facecolor='yellow',alpha=0.5,normed=True)
    #ax[i,1].hist(unpack(y_train_norm,'q')[:,i]*step,bins=np.arange(-1,1+step,step),facecolor='blue'  ,alpha=0.5,normed=True)
    #ax[i,1].set_xlim([-1,1])



    #plt.subplot(lev.size,2,i+1+lev.size)
    #plt.hist(y_train_norm[:,i],100,facecolor='green')
    #ax[i,0].get_yaxis().set_visible(False)

    #n, bins, patches = plt.hist(y_train_norm[:,28], 100, normed=1, facecolor='green', alpha=0.75)
plt.show()

Calculate out of bag error importance for random forest regressor


In [ ]:
oob_mlp = RandomForestRegressor(n_estimators=30)

oob_mlp.fit(x2,y2)
indmin=np.argmin(oob_mlp.feature_importances_)
print(indmin)
print(np.min(oob_mlp.feature_importances_))
print(oob_mlp.score(x3,y3))
x2 = np.delete(x2,indmin,1)

plt.plot(unpack(oob_mlp.feature_importances_,'T'),lev,label='T')
plt.plot(unpack(oob_mlp.feature_importances_,'q'),lev,label='q')
plt.ylim((1,0))
plt.show()
oob_mlp2 = RandomForestRegressor(n_estimators=30)
oob_mlp2.fit(x2[:,oob_mlp.feature_importances_>0.],cv2)

#oob_mlp2.feature_importances_.shape
np.argmin(oob_mlp.feature_importances_)#[oob_mlp.feature_importances_>0.]

Principal Component Analysis attempt


In [ ]:
from sklearn.decomposition import PCA
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
%matplotlib inline
reload(nnload)
x, y, cv, Pout, lat, lev, dlev, timestep = nnload.loaddata(data_dir + 'nntest.nc', minlev,
                                                       rainonly=rainonly) #,all_lats=False,indlat=8)
print(x.shape)
# pcax = PCA(n_components=10)
# pcay = PCA(n_components=10)
pcax=
xpp = preprocessing.StandardScaler()
ypp = preprocessing.StandardScaler()
x = xpp.fit_transform(x)
y = ypp.fit_transform(y)
# x = pcax.fit_transform(x)
# y = pcay.fit_transform(y)
# Subsample data
x1, x2, x3, y1, y2, y3 = nnload.subsample(x, y, N_samples=10000)

print(pcax.explained_variance_ratio_)
print(x2.shape)
plt.plot(pcax.components_[0,0:15],lev,color='blue')
plt.plot(pcax.components_[0,15:30],lev,color='red')


plt.show()