In [1]:
from scipy.special import expit
from rbmpy.rbm import RBM
from rbmpy.sampler import DirtyCorrectionMulDimSampler,VanillaSampler,ContinuousSampler,ContinuousApproxSampler, ContinuousApproxMulDimSampler, ApproximatedSampler, LayerWiseApproxSampler,ApproximatedMulDimSampler
from rbmpy.trainer import VanillaTrainier
from rbmpy.performance import Result
import numpy as np
import rbmpy.datasets, rbmpy.performance, rbmpy.plotter, pickle, rbmpy.rbm, os, logging, rbmpy.sampler,math
import math
from rbmpy.rbm import weights_into_hiddens
from rbmpy.progress import Progress
from scipy.spatial.distance import cosine
import rbmpy.plotter as pp
from numpy import newaxis
from collections import Counter
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
logger = logging.getLogger()
# Set the logging level to logging.DEBUG
logger.setLevel(logging.INFO)
%matplotlib inline
In [2]:
from IPython.core.debugger import Tracer
In [3]:
#load the data
import glob
result_file_name = glob.glob("Results/approx_correction/job-2618368--task-1")
raw_data = []
for result in result_file_name:
with open(result,"rb") as f:
raw_data.append(pickle.load(f))
other_file_name = glob.glob("Results/2613190/*") other_raw_data = [] for result in other_file_name: with open(result,"rb") as f: other_raw_data.append(pickle.load(f))
In [4]:
def get_data(size):
mnist_data = dict()
for i in range(0,10):
ds = np.load("datasets/{}.npy".format(i))[:size]
try:
with open("models/{}_models".format(i), 'rb') as f:
model = pickle.load(f)
mnist_data[i] = (model, ds)
except FileNotFoundError:
logging.warn("There is no model for {}".format(i))
return mnist_data
In [28]:
class Result(object):
def __init__(self):
self.orbm_a_recons = {}
self.orbm_b_recons = {}
self.rbm_a_recons = {}
self.rbm_b_recons = {}
self.orbm_a_scores = {}
self.orbm_b_scores = {}
self.rbm_a_scores = {}
self.rbm_b_scores = {}
def safe_add(self, key, value, score_dict):
if key not in score_dict:
score_dict[key] = []
score_dict[key].append(value)
# orbm_a_recons, orbm_b_recons,rbm_a_recons, rbm_b_recons ,orbm_a_score, orbm_b_score, rbm_a_score, rbm_b_score
def add_scores(self,model_ids, orbm_a_recons, orbm_b_recons, rbm_a_recons, rbm_b_recons,orbm_a, orbm_b, rbm_a,rbm_b):
self.safe_add(model_ids, orbm_a_recons, self.orbm_a_recons)
self.safe_add(model_ids, orbm_b_recons, self.orbm_b_recons)
self.safe_add(model_ids, rbm_a_recons, self.rbm_a_recons)
self.safe_add(model_ids, rbm_b_recons, self.rbm_b_recons)
self.safe_add(model_ids, orbm_a, self.orbm_a_scores)
self.safe_add(model_ids, orbm_b, self.orbm_b_scores)
self.safe_add(model_ids, rbm_a, self.rbm_a_scores)
self.safe_add(model_ids, rbm_b, self.rbm_b_scores)
def hightest_scores(key, n):
"""Grab the n highest values from the key thing"""
pass
def ndarrayify(self, score_dict):
nd_score_dict = {}
for key in score_dict:
nd_score_dict[key] = np.array(score_dict[key])
return nd_score_dict
def mean_scores(self):
or_a = self.ndarrayify(self.orbm_a_scores)
or_b = self.ndarrayify(self.orbm_b_scores)
r_a = self.ndarrayify(self.rbm_a_scores)
r_b = self.ndarrayify(self.rbm_b_scores)
or_a = {k: v.mean(axis = 0) for k, v in or_a.items()}
or_b = {k: v.mean(axis = 0) for k, v in or_b.items()}
r_a = {k: v.mean(axis = 0) for k, v in r_a.items()}
r_b = {k: v.mean(axis = 0) for k, v in r_b.items()}
return or_a, or_b, r_a, r_b
def matrix_for_scores(self,scores):
x_idx, y_idx = list(map(list, zip(*scores.keys())))
matrix = np.ones(shape=(max(x_idx)+1, max(y_idx)+1))
for key in scores:
matrix[key[0],key[1]] = scores[key].sum()
return matrix
def mean_score_matrices(self):
or_a, or_b, r_a, r_b = self.mean_scores()
return self.matrix_for_scores(or_a), self.matrix_for_scores(or_b), self.matrix_for_scores(r_a), self.matrix_for_scores(r_b)
def ll_image_wise_score(v, v_prime):
# return -(abs(v - v_prime)).sum(1)
return (v * np.log(v_prime) + ((1 - v) * np.log((1 - v_prime)))).sum(1)
def cosine_score(v, v_prime):
thing = []
for i in range(v.shape[0]):
thing.append(1 - cosine(v[i,:],v_prime[i,:]))
return np.array(thing)
def unzip_and_ndarrays(list_of_2d_tuple):
a = list(map(list, zip(*list_of_2d_tuple)))
return np.array(a[0]), np.array(a[1])
def scores_for_recons(target_recons,orbm_recons, rbm_recons):
target_a_recons, target_b_recons = target_recons
orbm_a_recons, orbm_b_recons = unzip_and_ndarrays(orbm_recons)
rbm_a_recons, rbm_b_recons = unzip_and_ndarrays(rbm_recons)
# Calculate the scores
orbm_a_score = ll_image_wise_score(target_a_recons, orbm_a_recons)
orbm_b_score = ll_image_wise_score(target_b_recons, orbm_b_recons)
rbm_a_score = ll_image_wise_score(target_a_recons, rbm_a_recons)
rbm_b_score = ll_image_wise_score(target_b_recons, rbm_b_recons)
return orbm_a_recons, orbm_b_recons,rbm_a_recons, rbm_b_recons ,orbm_a_score, orbm_b_score, rbm_a_score, rbm_b_score
In [29]:
mnist_data = get_data(None)
In [30]:
def ds_from_mnist_ids(key, size):
a_data = mnist_data[key[0]][1][:size]
b_data = mnist_data[key[1]][1][:size]
return rbmpy.datasets.flatten_data_set(a_data),rbmpy.datasets.flatten_data_set(b_data)
In [31]:
# # def scores_for_recons(target_recons,obrm_recons, rbm_recons, flattened_ds_comp)
# temp_orbm = raw_data[0][(1,1)]["ORBM_RECONS"]
# temp_rbm = raw_data[0][(1,1)]["RBM_RECONS"]
# temp_ds_comp = raw_data[0][(1,1)]["DS_COMP"].reshape(temp_ds_comp.shape[0], temp_ds_comp.shape[1] * temp_ds_comp.shape[2])
# scores_for_recons((temp_ds_comp,temp_ds_comp),temp_orbm,temp_rbm, temp_ds_comp)[0].shape
# flattened_ds_comp = ds_comp.reshape(ds_comp.shape[0], ds_comp.shape[1] * ds_comp.shape[2])
In [32]:
result = Result()
for run in raw_data:
for key in run:
# key = (2,1)
current_comp = run[key]
orbm_recons = current_comp["ORBM_RECONS"]
rbm_recons = current_comp["RBM_RECONS"]
size = current_comp["DS_COMP"].shape[0]
target_recons = ds_from_mnist_ids(key, size)
result.add_scores(key, *scores_for_recons(target_recons, orbm_recons,rbm_recons))
In [33]:
result.orbm_a_recons[(7,3)][0].shape
Out[33]:
In [34]:
a = result.mean_scores()
o_a, o_b, r_a, r_b = result.mean_score_matrices()
In [35]:
def plot_matrix(m, ticks):
plt.imshow(m, interpolation='nearest',cmap=plt.cm.RdYlBu)
plt.xticks(ticks)
plt.yticks(ticks)
plt.colorbar()
plt.show()
In [36]:
raw_data[0].keys()
Out[36]:
In [37]:
ticks = np.arange(10)
plot_matrix(o_a, ticks)
plot_matrix(o_b, ticks)
49 - np.where(np.isclose((o_b - r_b).T, (o_a - r_a)), 1,0).sum()
In [62]:
Out[62]:
In [85]:
comp = (5,4)
or_score_of_interest = np.array(result.orbm_a_scores[comp])
or_recon_of_interest = np.array(result.orbm_a_recons[comp])
or_b_recon = np.array(result.orbm_b_recons[comp])
r_score_of_interest = np.array(result.rbm_a_scores[comp])
r_recon_of_interest = np.array(result.rbm_a_recons[comp])
r_b_recon = np.array(result.rbm_b_recons[comp])
or_max_idx = np.unravel_index(or_score_of_interest.argmax(), or_score_of_interest.shape)
# r_max_idx = np.unravel_index(r_score_of_interest.argmax(), r_score_of_interest.shape)
# pp.image(or_recon_of_interest[or_max_idx[0], or_max_idx[1], :].reshape(28,28))
# pp.image(r_recon_of_interest[r_max_idx[0], or_max_idx[1], :].reshape(28,28))
# # pp.image(mnist_data[comp[0]][1][or_max_idx[1]])
# target = mnist_data[comp[0]][1][or_max_idx[1]]
# van_sampler = ContinuousSampler(mnist_data[comp[0]][0])
# pp.image(van_sampler.reconstruction_given_visible(target.reshape(28 * 28)).reshape(28, 28))
In [86]:
# or_score_of_interest[run].argsort()
or_score_of_interest[run][np.flipud(or_score_of_interest[run].argsort())]
Out[86]:
In [89]:
def ppp(d,title):
plt.suptitle(title)
plt.imshow(d,interpolation='nearest',cmap = "Greys", vmin = 0, vmax =1)
In [ ]:
In [ ]:
In [91]:
run = 0
range_length = 5
# idx = np.flipud(or_score_of_interest[run].argsort())[:range_length]
idx = or_score_of_interest[run].argsort()[:range_length]
or_recon_of_interest[0].shape
cool_i = 1
for i in idx:
print("orbm {}".format(or_score_of_interest[run][i]))
print("rbm {}".format(r_score_of_interest[run][i]))
ppp(r_recon_of_interest[run,i].reshape(28,28),"RBM A Reconstruction")
plt.savefig("Assets/bad-rbm-{}-rank{}".format(comp[0],cool_i))
plt.show()
ppp(r_b_recon[run,i].reshape(28,28),"RBM B Reconstruction")
plt.savefig("Assets/bad-rbm-{}-rank{}".format(comp[1],cool_i))
plt.show()
ppp(or_recon_of_interest[run,i].reshape(28,28),"ORBM A Reconstruction")
plt.savefig("Assets/bad-orbm-{}-rank{}".format(comp[0],cool_i))
plt.show()
ppp(or_b_recon[run,i].reshape(28,28),"ORBM B Reconstruction")
plt.savefig("Assets/bad-orbm-{}-rank{}".format(comp[1],cool_i))
plt.show()
d1 = mnist_data[comp[0]][1][i]
d2 = mnist_data[comp[1]][1][i]
ppp(d1,"")
plt.savefig("Assets/comp-i-{}-rank{}".format(comp[0],cool_i))
plt.show()
ppp(d2,"")
plt.savefig("Assets/comp-i-{}-rank{}".format(comp[1],cool_i))
plt.show()
ppp(np.maximum(d1,d2),"")
plt.savefig("Assets/comp-i-{}-{}-rank{}".format(comp[0], comp[1], cool_i))
plt.show()
cool_i += 1
In [48]:
a = or_recon_of_interest[or_max_idx].reshape(28,28)
b = r_recon_of_interest[or_max_idx].reshape(28,28)
t = ds_from_mnist_ids(comp,size=200)[0][or_max_idx[1]].reshape(28,28)
In [49]:
pp.image(a)
pp.image(b)
pp.image(t)
pp.image(t-a)
print(abs(t - a).sum())
print(abs(t - b).sum())
or_score_of_interest[or_max_idx]
In [ ]:
or_score_of_interest.max()
In [ ]:
or_max_idx
In [ ]:
print("orbm{}".format(np.array(result.orbm_a_scores[(1,2)]).mean(0).max()))
print("rbm{}".format(np.array(result.rbm_a_scores[(1,2)]).mean(0).max()))
In [ ]:
key = (1,7)
for key in result.rbm_a_scores.keys():
plt.suptitle("{}".format(key))
plt.plot(np.array(result.rbm_a_scores[key]).mean(0), np.array(result.orbm_a_scores[key]).mean(0),'.k')
# plt.scatter(np.array(result.rbm_a_scores[key]).mean(0), np.array(result.orbm_a_scores[key]).mean(0),s=20,cmap=plt.cm.Greys)
plt.plot([0.4,1],[0.4,1])
plt.axis('equal')
plt.ylabel("ORBM Scores")
plt.xlabel("RBM Scores")
plt.savefig("Results/plots/" + str(key) +".png")
plt.show()
In [ ]:
mnist_data[2][0].visible_bias.max()
In [ ]:
!say "I'm finished!!!"
In [ ]: