In [2]:
import nsaba.nsaba as na
from sklearn.ensemble import RandomForestRegressor
import numpy as np
import scipy as sp
import random as rn
import time
import matplotlib as mt
import matplotlib.pyplot as plt
mt.style.use('ggplot')
%matplotlib inline
In [3]:
dd = '../../data_dir'
na.Nsaba.aba_load(dd)
na.Nsaba.ns_load(dd)
In [4]:
na.Nsaba.ns_load_id_dict()
In [5]:
para = na.Nsaba()
para.load_ge_pickle('Nsaba_ABA_ge.pkl', dd)
In [6]:
feat_df = para.get_ns_struct("features_df")
In [7]:
_, df = para._term_to_coords('reward', -1)
In [8]:
df.reward.hist(bins=10);
plt.title("Term Distribution");
plt.yscale('log')
plt.xlabel('reward');
In [9]:
X = df.loc[:, 'x':'z'].as_matrix()
y = df.loc[:, 'reward'].as_matrix()
In [10]:
%%time
clf = RandomForestRegressor(n_estimators=50, n_jobs=4)
clf.fit(X,y)
In [11]:
%%time
# For comparison with NSABA KNN
para.get_ns_act(term='reward', k=10)
In [12]:
packer = lambda x,y: [[list(el),y] for el in x]
In [13]:
def Xy_split(mat):
X = []
y = []
for row in mat:
X.append(row[0])
y.append(row[1])
return np.array(X), np.array(y)
In [14]:
A_validation = []
R_validation = []
top_id_coords = para.term_to_coords('reward', 8)
for pmid, coords in top_id_coords:
A_validation += packer(coords, feat_df.loc[feat_df.pmid == pmid, "reward"].iloc[0])
R_validation = rn.sample(zip(X,y), 1000)
Ax, Ay = Xy_split(A_validation)
Rx, Ry = Xy_split(R_validation)
In [367]:
ApredRF = clf.predict(Ax)
RpredRF = clf.predict(Rx)
In [368]:
plt.plot(RpredRF, 'ro', label="RF");
plt.plot(Ry, 'bo')
plt.title("Random Sample Prediction");
plt.legend();
In [ ]:
In [369]:
plt.plot(ApredRF, 'ro', label='RF');
plt.plot(Ay, 'bo');
plt.title("High Term Prediction");
plt.legend();
In [370]:
Rpred = para.coords_to_ns_act(Rx, 'reward')
In [371]:
# Local Neighbors Estimate
plt.plot(Rpred, 'ro', label="LNE");
plt.plot(Ry, 'bo')
plt.title("Random Sample Prediction");
plt.legend();
In [372]:
Apred = para.coords_to_ns_act(Ax, 'reward')
In [373]:
plt.plot(Apred, 'ro', label='LNE');
plt.plot(Ay, 'bo');
plt.title("High Activation Prediction");
plt.legend();
In [375]:
print "RF Random Sample Residual: %f" % np.sum(np.abs(RpredRF - Ry))
print "LNE Random Sample Residual: %f" % np.sum(np.abs(Rpred - Ry))
print "RF High Activation Residual: %f" % np.sum(np.abs(ApredRF - Ay))
print "LNE High Activation Residual: %f" % np.sum(np.abs(Apred - Ay))
In [ ]: