In [1]:
from blocks.model import Model
from network import *
from numpy import array, load
from theano import function
from theano.tensor.sharedvar import SharedVariable
from theano.tensor import matrix, TensorType
from util import StateComputer
In [15]:
MODEL_FILE = './models/seqgen_lstm_512_512_512__q0.pkl'
IX_2_CHAR_FILE = './data/lk_ix2char.npy'
In [16]:
ix2char = load(IX_2_CHAR_FILE, encoding='latin1').item()
nt = Network(NetworkType.LSTM, input_dim=len(ix2char))
nt.set_parameters(MODEL_FILE)
In [5]:
model = Model(nt.generator.generate(n_steps=nt.x.shape[0], batch_size=nt.x.shape[1]))
param_dict = model.get_parameter_dict()
init_state_0 = param_dict['/sequencegenerator/with_fake_attention/transition/layer#0.initial_state']
init_state_1 = param_dict['/sequencegenerator/with_fake_attention/transition/layer#1.initial_state']
init_state_2 = param_dict['/sequencegenerator/with_fake_attention/transition/layer#2.initial_state']
init_cells_0 = param_dict['/sequencegenerator/with_fake_attention/transition/layer#0.initial_cells']
init_cells_1 = param_dict['/sequencegenerator/with_fake_attention/transition/layer#1.initial_cells']
init_cells_2 = param_dict['/sequencegenerator/with_fake_attention/transition/layer#2.initial_cells']
reset_values = {
0: (init_state_0.get_value(), init_cells_0.get_value()),
1: (init_state_1.get_value(), init_cells_1.get_value()),
2: (init_state_2.get_value(), init_cells_2.get_value())
}
gen_func = model.get_theano_function(allow_input_downcast=True)
In [6]:
char2ix = {v: k for k, v in ix2char.items()}
sc = StateComputer(nt.cost_model, char2ix)
In [7]:
def reset_generator():
init_state_0 = reset_values[0][0]
init_cells_0 = reset_values[0][1]
init_state_1 = reset_values[1][0]
init_cells_1 = reset_values[1][1]
init_state_2 = reset_values[2][0]
init_cells_2 = reset_values[2][1]
def generate_sequence(start, length):
if len(start) > 1:
state_cell_dict = sc.read_single_sequence(start[:-1])
init_state_0.set_value(state_cell_dict['sequencegenerator_cost_matrix_states'][-1])
init_cells_0.set_value(state_cell_dict['sequencegenerator_cost_matrix_cells'][-1])
init_state_1.set_value(state_cell_dict['sequencegenerator_cost_matrix_states'][-1])
init_cells_1.set_value(state_cell_dict['sequencegenerator_cost_matrix_cells'][-1])
init_state_2.set_value(state_cell_dict['sequencegenerator_cost_matrix_states'][-1])
init_cells_2.set_value(state_cell_dict['sequencegenerator_cost_matrix_cells'][-1])
seq = start
ix = array([[char2ix[start[-1]]]])
for i in range(length-len(start)):
state_0, cells_0, state_1, cells_1, state_2, cells_2, ix, costs = gen_func(ix)
init_state_0.set_value(state_0[0][0])
init_cells_0.set_value(cells_0[0][0])
init_state_1.set_value(state_1[0][0])
init_cells_1.set_value(cells_1[0][0])
init_state_2.set_value(state_2[0][0])
init_cells_2.set_value(cells_2[0][0])
seq += ix2char[ix[0][0]]
reset_generator()
return seq
In [14]:
print(generate_sequence('if ', 700)) # good results 500 - 1000
# OBSERVATION: I should have deleted comments, it is like learning two languages at the same time + learning when it is
# appropriate to use which one. E. g. there are natural language like variable names / word sequences in the C-sections
# or there is C-syntax in comments respectively. Basically the comments contaminate everything.
In [8]:
Out[8]: