In [ ]:
RCL_BASE=('http://redcatlabs.com/downloads/'+
'deep-learning-workshop/notebooks/data/'+
'research/ICONIP-2016/')
"""
# http://redcatlabs.com/downloads/deep-learning-workshop/LICENSE
# Files in : ${RCL_BASE} :
# :: These are either as downloaded from GloVe site, or generated by Levy code
# The downloadable pretrained GloVe is much larger, since it is 400k words,
# whereas the 'home-grown' GloVe (based on the 1-billion word Corpus, and a wikipedia
# snapshot has a vocabularly of 2^17 words, i.e. a ~131k vocab size)
# 507,206,240 Oct 25 2015 2-pretrained-vectors_glove.6B.300d.hkl
# 160,569,440 May 14 14:57 1-glove-1-billion-and-wiki_window11-lc-36_vectors.2-17.hkl
"""
import os, requests
def get_embedding_file( hkl ):
if os.path.isfile( hkl ):
print("%s already available locally" % (hkl,))
else:
# ... requests.get( RCL_BASE + basename(hkl))
print("Downloading : %s" % (hkl,))
with open(hkl, 'wb') as handle:
response = requests.get(RCL_BASE + (hkl.replace('../data/','')), stream=True)
if not response.ok:
# Something went wrong
print("Failed to download %s" % (hkl,))
for block in response.iter_content(64*1024):
handle.write(block)
print("Downloading : %s :: DONE" % (hkl,))
In [ ]:
default_embedding_file = '../data/1-glove-1-billion-and-wiki_window11-lc-36_vectors.2-17.hkl'
#default_embedding_file = '../data/2-pretrained-vectors_glove.6B.300d.hkl'
get_embedding_file( default_embedding_file )
In [ ]:
import time
import numpy as np
import theano
import lasagne
# http://blog.mdda.net/oss/2016/04/07/nvidia-on-fedora-23
#theano.config.nvcc.flags = '-D_GLIBCXX_USE_CXX11_ABI=0'
import sklearn.preprocessing
import hickle
d = hickle.load(default_embedding_file)
vocab, embedding = d['vocab'], d['embedding']
vocab_np = np.array(vocab, dtype=str)
vocab_orig=vocab_np.copy()
#dictionary = dict( (word.lower(), i) for i,word in enumerate(vocab) )
dictionary = dict( (word, i) for i,word in enumerate(vocab) if i<len(embedding) )
print("Embedding loaded :", embedding.shape) # (vocab_size, embedding_dimension)=(rows, columns)
def NO_NEED_save_to_txt(embedding_save, save_filename_txt):
with open(save_filename_txt, 'wb') as f:
embedding_save = embedding_normed
for l in range(0, embedding_save.shape[0]):
f.write("%s %s\n" % (
vocab[l],
' '.join([ ('0' if x==0. else ("%.6f" % (x,))) for x in embedding_save[l, :].tolist() ]), )
)
def save_embedding_to_hickle(vocab, embedding_save, save_filename_hkl, vocab_orig=None):
print("About to save to %s" % (save_filename_hkl,))
d=dict(
vocab=vocab,
vocab_orig=vocab if vocab_orig is None else vocab,
embedding=embedding_save,
)
hickle.dump(d, save_filename_hkl, mode='w', compression='gzip')
print("Saved to %s" % (save_filename_hkl,))
In [ ]:
quantisation_levels = 8
In [ ]:
def np_int_list(n, mult=100., size=3): # size includes the +/-
return "[ " + (', '.join([ ('% +*d') % (size,x,) for x in (n * mult).astype(int).tolist()])) + " ]"
## Quantise each entry into 'pct' (as an integer) level (optimised per vector location)
# Suppose that v is a vector of levels
# and c is a list of numbers that needs to be quantised,
# each c becomes c' where c' is the closest value in v
# :: update v so that (c - c')^2 is as low as possible
c_length = embedding.shape[0]
embedding_quantised = np.zeros_like(embedding)
t0 = time.time()
for d in range(embedding.shape[1]): # Quantise each dimension separately
levels = quantisation_levels
i_step = int(c_length/levels)
i_start = int(i_step/2)
v_indices = np.arange(start=i_start, stop=c_length, step=i_step, dtype='int')
#if d != 9: continue # Weird distribution
#if d != 1: continue # Very standard example
# Initialise v by sorting c, and placing them evenly through the list
e_column = embedding[:,d].astype('float32')
c_sorted = np.sort( e_column )
v_init = c_sorted[ v_indices ]
# the v_init are the initial centers
v=v_init
t1 = time.time()
epochs=0
for epoch in range(0, 1000):
#print(" Dimension:%3d, Epoch:%3d, %s" % (d, epoch, np_int_list(v),))
# works out the values in their middles
mids_np = (v[:-1] + v[1:])/2.
mids = mids_np.tolist()
mids.insert( 0, c_sorted[0] )
mids.append( c_sorted[-1] +1 )
centroids=[]
for i in range( 0, len(mids)-1 ):
pattern = np.where( (mids[i] <= c_sorted) & (c_sorted < mids[i+1]) )
centroids.append( c_sorted[ pattern ].mean() )
centroids_np = np.array(centroids)
if np.allclose(v, centroids_np):
if epochs>200: # This only prints out for 'long convergence cases'
print(" NB : long running convergence : embedding[%3d] - took %d epochs" % (d, epochs,))
break
v = centroids_np
epochs += 1
if d % 10 ==0:
print("Ran embedding[%3d] - average time for convergence : %6.2fms" % (d, (time.time() - t1)/epochs*1000.,))
#print("Check col updated: before ", np_int_list(embedding[0:20,d]))
# Ok, so now we have the centers in v, and the mids in 'mids'
for i in range( 0, len(mids)-1 ):
pattern = np.where( (mids[i] <= e_column) & (e_column < mids[i+1]) )
embedding_quantised[pattern, d] = v[i]
#print("Check col updated: after ", np_int_list(embedding_quantised[0:20,d]))
if False:
offset=101010 # Check rare-ish words
for d in range(5, embedding_quantised.shape[1], 25):
print("Col %3d updated: " % (d,), np_int_list(embedding_quantised[(offset+0):(offset+20),d]))
embedding_normed = sklearn.preprocessing.normalize(embedding_quantised, norm='l2', axis=1, copy=True)
print("Quantisation finished : results in embedding_quantised and (same, but normalised) in embedding_normed")
To save the created embedding, execute the following :
In [ ]:
# Save the embedding_normed as a hickle file (easy to reload into the 'explore' workbook)
save_embedding_to_hickle(vocab, embedding_normed, '../data/lloyds_normed_%d.hkl' % (quantisation_levels, ) )
python sparsify_lasagne.py
--mode=train \
--version=21 \
--save='./sparse.6B.300d_S-21_2n-shuf-noise-after-norm_.2.01_6-75_%04d.hkl' \
--sparsity=0.0675 \
--random=1 \
--iters=4000 | tee sparse.6B.300d_S-21_2n-shuf-noise-after-norm_.2.01_6-75.log
#sparse_dim = 1024, pre-num_units=embedding_dim*8,
# -> 4.0 l2 in 4.0k epochs (sigma=39) # sparsity_std_:, 0.4742,
python sparsify_lasagne.py
--mode=predict \
--version=21 \
--load='./sparse.6B.300d_S-21_2n-shuf-noise-after-norm_.2.01_6-75_4000.hkl' \
--sparsity=0.0675 \
--random=1 \
--output=sparse.6B.300d_S-21_2n-shuf-noise-after-norm_.2.01_6-75_4000_GPU-sparsity_recreate.hkl \
--direct=sparse.6B.300d_S-21_2n-shuf-noise-after-norm_.2.01_6-75_4000_GPU-sparse_matrix.hkl
In [ ]:
sparse_dim,sparsity_goal = 1024, 0.0675
#sparse_dim,sparsity_goal = 4096, 0.0150
shuffle_vocab = True
batchsize = 16384 # (GTX760 requires <20000)
pre_normalize = False
In [ ]:
default_save_file_fmt = './data/sparse.6B.300d_jupyter_%%04d.hkl'
"""
parser = argparse.ArgumentParser(description='')
parser.add_argument('-m','--mode', help='(train|predict)', type=str, default=None)
parser.add_argument('-i','--iters', help='Number of iterations', type=int, default=10000)
parser.add_argument('-o','--output', help='hickle to *create* embedding for testing', type=str, default=None)
parser.add_argument('-d','--direct', help='hickle to *create* *binary* embedding for testing', type=str, default=None)
parser.add_argument('-p','--param', help='Set param value initially', type=float, default=None)
args = parser.parse_args()
print("Mode : %s" % (args.mode,))
"""
if shuffle_vocab:
np.random.seed(1) # No need to get fancy - just want to mix up the word frequencies into different batches
perm = np.random.permutation(len(embedding))
embedding = embedding[perm]
vocab = vocab_np[perm].tolist()
dictionary = dict( (word, i) for i,word in enumerate(vocab) )
print("Embedding loaded :", embedding.shape) # (vocab_size, embedding_dimension)=(rows, columns)
print("Device=%s, OpenMP=%s" % (theano.config.device, ("True" if theano.config.openmp else "False"), ))
def np_int_list(n, mult=100., size=3): # size includes the +/-
return "[ " + (', '.join([ ('% +*d') % (size,x,) for x in (n * mult).astype(int).tolist()])) + " ]"
embedding_dim = embedding.shape[1]
In [ ]:
mode='train'
#mode='predict'
In [ ]:
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
class SparseWinnerTakeAllLayer(lasagne.layers.Layer):
def __init__(self, incoming, sparsity=0.05, **kwargs):
super(SparseWinnerTakeAllLayer, self).__init__(incoming, **kwargs)
self.sparsity = sparsity
def get_output_for(self, input, **kwargs):
"""
Parameters
----------
input : tensor
output from the previous layer
"""
# Sort within batch (Very likely on the CPU)
# theano.tensor.sort(self, axis, kind, order)
sort_input = input.sort( axis=0, kind='quicksort' )
# Find kth value
hurdles_raw = sort_input[ int( batchsize * (1.0 - self.sparsity) ), : ]
hurdles = theano.tensor.maximum(hurdles_raw, 0.0) # rectification...
# switch based on >kth value (or create mask), all other entries are zero
masked = theano.tensor.switch( theano.tensor.ge(input, hurdles), input, 0.0)
return masked
class SparseWinnerTakeAllLayerApprox(lasagne.layers.Layer):
def __init__(self, incoming, approx_sparsity=0.12, **kwargs):
super(SparseWinnerTakeAllLayerApprox, self).__init__(incoming, **kwargs)
self.sparsity = approx_sparsity
def get_output_for(self, input, **kwargs):
"""
Parameters
----------
input : tensor
output from the previous layer
"""
# input_shape is [ #in_batch, #vector_entries ] ~ [ 20k, 1024 ]
current_sparsity = self.sparsity
#print(current_sparsity) # A theano variable
if False:
# This is an 'advanced' tail-aware hurdle-level prediction.
# In the end, it works less well than the binary-search version below
# Find the max value in each column - this is the k=1 (top-most) entry
hurdles_max = input.max( axis=0 )
input = lasagne.layers.get_output(embedding_batch_middle)
# Find the max value in each column - this is the k=1 (top-most) entry
hurdles_max = input.max( axis=0 )
# Find the min value in each column - this is the k=all (bottom-most) entry
#hurdles_min = input.min( axis=0 )
# Let's guess (poorly) that the sparsity hurdle is (0... sparsity ...100%) within these bounds
#hurdles_guess = hurdles_max * (1.0 - current_sparsity) + hurdles_min * current_sparsity
#hurdles_guess = (hurdles_min + hurdles_max)/2.0
# New approach : We know that the mean() is zero and the std() is 1
# simulations suggest that the more stable indicators are at fractions of the max()
hurdles_hi = hurdles_max * 0.5
hurdles_lo = hurdles_max * 0.3
# Now, let's find the actual sparsity that this creates
sparsity_flag_hi = theano.tensor.switch( theano.tensor.ge(input, hurdles_hi), 1.0, 0.0)
sparsity_real_hi = sparsity_flag_hi.mean(axis=0) # Should be ~ sparsity (likely to be lower, though)
sparsity_flag_lo = theano.tensor.switch( theano.tensor.ge(input, hurdles_lo), 1.0, 0.0)
sparsity_real_lo = sparsity_flag_lo.mean(axis=0) # Should be ~ sparsity (likely to be higher, though)
# But this is wrong! Let's do another estimate (will be much closer, hopefully) using this knowledge
# For each column, the new hurdle guess
#hurdles_better = hurdles_max - (hurdles_max - hurdles_guess) * (
# current_sparsity / (sparsity_guess_real + 0.00001) ) )
if False: # This assumes that the distribution tails are linear (which is not true)
hurdles_interp = hurdles_hi + (hurdles_lo-hurdles_hi) * (
(current_sparsity - sparsity_real_hi) / ((sparsity_real_lo - sparsity_real_hi)+0.00001) )
else: # Assume that the areas under the tails are ~ exp(-x*x)
# See (2) in : https://math.uc.edu/~brycw/preprint/z-tail/z-tail.pdf
# *** See (Remark 15) in : http://m-hikari.com/ams/ams-2014/ams-85-88-2014/epureAMS85-88-2014.pdf
def tail_transform(z):
return theano.tensor.sqrt( -theano.tensor.log( z ) )
tail_target = tail_transform(current_sparsity)
tail_hi = tail_transform(sparsity_real_hi)
tail_lo = tail_transform(sparsity_real_lo)
hurdles_interp = hurdles_hi + (hurdles_lo-hurdles_hi) * (
(tail_target - tail_hi) / ((tail_lo - tail_hi)+0.00001) )
#hurdles = theano.tensor.maximum(hurdles_better, 0.0) # rectification... at mininim...
# (also solves everything-blowing-up problem)
hurdles = hurdles_interp.clip(hurdles_max*0.2, hurdles_max*0.9)
if True: # Simple, but effective : Binary search
hurdles_hi, hurdles_lo = [], []
hurdles_guess = []
sparsity_flag = []
sparsity_real = []
sparsity_hi, sparsity_lo = [], []
# Find the max value in each column - this is the k=1 (top-most) entry
hurdles_max = input.max( axis=0 )
hurdles_hi.append(hurdles_max)
sparsity_hi.append(hurdles_max * (1./batchsize) )
hurdles_lo_temp = input.mean( axis=0 ) # Different estimate idea...
hurdles_lo.append(hurdles_lo_temp)
sparsity_lo_temp = theano.tensor.switch( theano.tensor.ge(input, hurdles_lo_temp), 1.0, 0.0)
sparsity_lo.append( sparsity_lo_temp.mean(axis=0) )
for i in range(10):
if True: # WINS THE DAY!
hurdles_guess.append(
(
(hurdles_lo[-1] + hurdles_hi[-1]) * 0.5
)
)
if False: # A 'better approximation' that is actually worse
hurdles_guess.append(
(
hurdles_hi[-1] + (hurdles_lo[-1] - hurdles_hi[-1]) *
(current_sparsity - sparsity_hi[-1]) / ((sparsity_lo[-1] - sparsity_hi[-1])+0.000001)
).clip(hurdles_lo[-1], hurdles_hi[-1])
)
if False: # Another 'better approximation' that is actually worse
# switch on closeness to getting it correct
hurdles_guess.append(
theano.tensor.switch( theano.tensor.lt( sparsity_lo[-1], current_sparsity * 2.0 ),
(
hurdles_hi[-1] + (hurdles_lo[-1] - hurdles_hi[-1]) *
(current_sparsity - sparsity_hi[-1]) / ((sparsity_lo[-1] - sparsity_hi[-1])+0.000001)
).clip(hurdles_lo[-1], hurdles_hi[-1]),
(
(hurdles_lo[-1] + hurdles_hi[-1]) * 0.5
)
)
)
sparsity_flag.append( theano.tensor.switch( theano.tensor.ge(input, hurdles_guess[-1] ), 1.0, 0.0) )
sparsity_real.append( sparsity_flag[-1].mean(axis=0) )
# So, based on whether the real sparsity is greater or less than the real value, change the hi or lo values
hurdles_lo.append(
theano.tensor.switch( theano.tensor.gt(current_sparsity, sparsity_real[-1]),
hurdles_lo[-1], hurdles_guess[-1])
)
hurdles_hi.append(
theano.tensor.switch( theano.tensor.le(current_sparsity, sparsity_real[-1]),
hurdles_hi[-1], hurdles_guess[-1])
)
hurdles = hurdles_guess[-1]
#hurdles = hurdles_lo[-1] # Better to bound this at the highest relevant sparsity...
masked = theano.tensor.switch( theano.tensor.ge(input, hurdles), input, 0.0)
return masked
In [ ]:
embedding_N = (embedding) # No Normalization by default
if pre_normalize:
embedding_std = np.std(embedding, axis=1)
embedding_N = embedding / embedding_std[:, np.newaxis] # Try Normalizing std(row) == 1, making sure shapes are right
embedding_shared = theano.shared(embedding_N.astype('float32')) # 400000, 300
embedding_shared.name = "embedding_shared"
batch_start_index = theano.tensor.scalar('batch_start_index', dtype='int32')
embedding_batch = embedding_shared[ batch_start_index:(batch_start_index+batchsize) ]
network = lasagne.layers.InputLayer(
( batchsize, embedding_dim ),
input_var=embedding_batch,
)
pre_hidden_dim=embedding_dim*8 ## For sparse_dim=1024 and below
if sparse_dim>1024*1.5:
pre_hidden_dim=sparse_dim*2 ## Larger sparse_dim
network = lasagne.layers.DenseLayer(
network,
num_units=pre_hidden_dim,
nonlinearity=lasagne.nonlinearities.rectify,
W=lasagne.init.GlorotUniform(),
b=lasagne.init.Constant(0.)
)
network = lasagne.layers.DenseLayer(
network,
num_units=sparse_dim,
nonlinearity=lasagne.nonlinearities.identity,
W=lasagne.init.GlorotUniform(),
b=lasagne.init.Constant(0.)
)
sparse_embedding_batch_linear=network
#def hard01(x):
# # http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor.switch
# #return theano.tensor.switch( theano.tensor.gt(x, 0.), 0.95, 0.05)
# return theano.tensor.switch( theano.tensor.gt(x, 0.), 1.0, 0.0)
In [ ]:
if mode == 'train':
# This adds some 'fuzziness' to smooth out the training process
sigma = theano.tensor.scalar(name='sigma', dtype='float32')
embedding_batch_middle = lasagne.layers.batch_norm(
lasagne.layers.NonlinearityLayer( network, nonlinearity=lasagne.nonlinearities.rectify )
)
embedding_batch_middle = lasagne.layers.GaussianNoiseLayer(
embedding_batch_middle,
sigma=0.2 * theano.tensor.exp((-0.01) * sigma ) # Noise should die down over time...
)
sparsity_blend = theano.tensor.exp((-10.) * sigma ) # Goes from 1 to epsilon
current_sparsity = 0.50*(sparsity_blend) + sparsity_goal*(1. - sparsity_blend)
sparse_embedding_batch_squashed = SparseWinnerTakeAllLayerApprox(
embedding_batch_middle,
approx_sparsity=current_sparsity
)
elif mode == 'predict':
embedding_batch_middle = lasagne.layers.batch_norm(
lasagne.layers.NonlinearityLayer( network, nonlinearity=lasagne.nonlinearities.rectify )
)
#sparse_embedding_batch_squashed = SparseWinnerTakeAllLayer(
# embedding_batch_middle,
# sparsity=sparsity_goal,
# )
sparse_embedding_batch_squashed = SparseWinnerTakeAllLayerApprox(
embedding_batch_middle,
approx_sparsity=sparsity_goal, # Jam the actual (final) value in...
)
sparse_embedding_batch_probs = sparse_embedding_batch_squashed
network = sparse_embedding_batch_squashed
network = lasagne.layers.DenseLayer(
network,
num_units=embedding_dim,
nonlinearity=lasagne.nonlinearities.linear,
W=lasagne.init.GlorotUniform(),
b=lasagne.init.Constant(0.)
)
prediction = lasagne.layers.get_output(network)
l2_error = lasagne.objectives.squared_error( prediction, embedding_batch )
l2_error_mean = l2_error.mean() # This is a per-element error term
interim_output = lasagne.layers.get_output(sparse_embedding_batch_probs)
# Count the number of positive entries
sparse_flag = theano.tensor.switch( theano.tensor.ge(interim_output, 0.0001), 1.0, 0.0)
#sparsity_mean = sparse_flag.mean() / sparsity_goal # This is a number 0..1, where 1.0 = perfect = on-target
sparsity_mean = sparse_flag.mean() * 100. # This is realised sparsity
sparsity_std = (sparse_flag.mean(axis=1) / sparsity_goal).std() # assess the 'quality' of the sparsity per-row
# This is to monitor learning (not direct it)
sparsity_probe = sparse_flag.mean(axis=1) / sparsity_goal # sparsity across rows may not be ===1.0
#sparsity_probe = sparse_flag.mean(axis=0) / sparsity_goal # sparsity across columns should be ===1.0 (if approx works)
In [ ]:
sparsity_cost=0.0
if mode == 'train':
mix = theano.tensor.scalar(name='mix', dtype='float32')
sparsity_cost = -mix*sparsity_mean/1000. # The 1000 factor is because '10' l2 is Ok, and 1 sparsity_mean is Great
if version==20 or version==21:
sparsity_cost = mix*0.
cost = l2_error_mean + sparsity_cost
params = lasagne.layers.get_all_params(network, trainable=True)
In [ ]:
epoch_base=0
if args.load:
load_vars = hickle.load(args.load)
print("Saved file had : Epoch:%4d, sigma:%5.2f" % (load_vars['epoch'], load_vars['sigma'], ) )
#fraction_of_vocab=fraction_of_vocab
epoch_base = load_vars['epoch']
if 'layer_names' in load_vars:
layer_names = load_vars['layer_names']
else:
i=0
layer_names=[]
while "Lasagne%d" % (i,) in load_vars:
layer_names.append( "Lasagne%d" % (i,) )
i=i+1
layers = [ load_vars[ ln ] for ln in layer_names ]
lasagne.layers.set_all_param_values(network, layers)
In [ ]:
if mode == 'train':
updates = lasagne.updates.adam( cost, params )
iterate_net = theano.function(
[batch_start_index,sigma,mix],
[l2_error_mean,sparsity_mean,sparsity_std,sparsity_probe],
updates=updates,
allow_input_downcast=True,
on_unused_input='warn',
)
print("Built Theano op graph")
sigma_ = 0.0
mix_ = 0.0
if args.param:
mix_=args.param
t0 = time.time()
for epoch in range(epoch_base, epoch_base+args.iters):
t1 = time.time()
fraction_of_vocab = 1.0
max_l2_error_mean=-1000.0
batch_list = np.array( range(0, int(embedding.shape[0]*fraction_of_vocab), batchsize) )
batch_list = np.random.permutation( batch_list )
for b_start in batch_list.astype(int).tolist():
#l2_error_mean_,sparsity_mean_ = iterate_net(b_start)
l2_error_mean_,sparsity_mean_,sparsity_std_,sparsity_probe_ = iterate_net(b_start, sigma_, mix_)
print(" epoch:,%4d, b:,%7d, l2:,%9.2f, sparsity_mean_:,%9.4f, sparsity_std_:,%9.4f, sigma:,%5.2f, mix:,%5.2f, " %
(epoch, b_start, 1000*l2_error_mean_, sparsity_mean_, sparsity_std_, sigma_, mix_, ))
if b_start==0:
#print("Hurdles : " + np_int_list( sparsity_probe_[0:100] ))
print(" Row-wise sparsity : " + np_int_list( sparsity_probe_[0:30] ))
#print(" %d, vector_probe : %s" % (epoch, np_int_list( np.sort(sparsity_probe_[0:100]) ), ))
#print(" %d, vector_probe : %s" % (epoch, np_int_list( sparsity_probe_[0:100] ), ))
#print(" vector_probe : " + np_int_list( sparsity_probe_[0:1000] ))
if max_l2_error_mean<l2_error_mean_:
max_l2_error_mean=l2_error_mean_
print("Time per 100k words %6.2fs" % ((time.time() - t1)/embedding.shape[0]/fraction_of_vocab*1000.*100., ))
#exit()
boil_limit=10.
if pre_normalize:
boil_limit=40.
if max_l2_error_mean*1000.<boil_limit:
print("max_l2_error_mean<%6.2f - increasing sparseness emphasis" % (boil_limit,))
sigma_ += 0.01
mix_ += 0.1
if (epoch +1) % 10 == 0:
save_vars = dict(
version=version,
epoch=epoch,
sigma=sigma_,
mix=mix_,
fraction_of_vocab=fraction_of_vocab
)
layer_names = []
for i,p in enumerate(lasagne.layers.get_all_param_values(network)):
if len(p)>0:
name = "Lasagne%d" % (i,)
save_vars[ name ] = p
layer_names.append( name )
save_vars[ 'layer_names' ] = layer_names
epoch_thinned = int(epoch/100)*100
hickle.dump(save_vars, args.save % (epoch_thinned,), mode='w', compression='gzip')
In [ ]:
if args.load and mode == 'predict':
print("Parameters : ", lasagne.layers.get_all_params(network))
get_sparse_linear = theano.function( [batch_start_index], [ lasagne.layers.get_output(sparse_embedding_batch_linear), ]) # allow_input_downcast=True
predict_net = theano.function( [batch_start_index], [l2_error_mean,sparsity_mean], allow_input_downcast=True )
predict_emb = theano.function( [batch_start_index], [prediction], allow_input_downcast=True )
predict_bin = theano.function( [batch_start_index], [ lasagne.layers.get_output(sparse_embedding_batch_squashed),])
print("Built Theano op graph")
if True: # Shows the error predictions with hard01 sigmoid
for b_start in range(0, int(embedding.shape[0]), batchsize):
l2_error_mean_,sparsity_mean_ = predict_net(b_start)
print(" epoch:%4d, b:%7d, l2:%12.4f, sparsity:%6.4f - hard01" %
(epoch_base, b_start, 1000*l2_error_mean_, sparsity_mean_, ))
if False: # Shows the linear range of the sparse layer (pre-squashing)
for b_start in range(0, int(embedding.shape[0]), batchsize * 5):
sparse_embedding_batch_linear_, = get_sparse_linear(b_start)
for row in range(0,100,5):
print(np_int_list( sparse_embedding_batch_linear_[row][0:1000:50], mult=10, size=4 ))
if args.output:
predictions=[]
for b_start in range(0, int(embedding.shape[0]), batchsize):
prediction_, = predict_emb(b_start)
predictions.append( np.array( prediction_ ) )
print(" epoch:%3d, b:%7d, Downloading - reconstructed array" %
(epoch_base, b_start, ))
embedding_prediction = np.concatenate(predictions, axis=0)
predictions=None
print("About to save to %s" % (args.output,))
d=dict(
vocab=vocab,
vocab_orig=vocab_orig,
embedding=embedding_prediction,
)
hickle.dump(d, args.output, mode='w', compression='gzip')
if args.direct:
predictions=[]
for b_start in range(0, int(embedding.shape[0]), batchsize):
binarised_, = predict_bin(b_start)
#predictions.append( np.where( binarised_>0.5, 1., 0. ).astype('float32') )
predictions.append( binarised_.astype('float32') )
#print(" epoch:%3d, b:%7d, Downloading - hard01 to binary" %
print(" epoch:%3d, b:%7d, Downloading - sparse data" %
(epoch_base, b_start, ))
embedding_prediction = np.concatenate(predictions, axis=0)
predictions=None
print("About to save sparse version to %s" % (args.direct,))
d=dict(
vocab=vocab,
vocab_orig=vocab_orig,
embedding=embedding_prediction,
)
hickle.dump(d, args.direct, mode='w', compression='gzip')