In [1]:
require 'nngraph'
require 'torch'
require 'math'
require 'nn'
require 'graph'
OMP_NUM_THREADS=1 -- Torch threaded code and BLAS
cat data/input.txt > grep -v '[0-9]' | tr '[:upper:]\n' '[:lower:] ' | tr -d -c '[:digit:][:lower:]:;.?!)(, ' | tr -s " " > data/input.txt
magma@locust-ThinkPad-X1-Carbon:~/projects/m2/as/tme6/as| ⇒ th prepare_data.lua -txt data/input.txt
th prepare_data.lua -txt data/input.txt
timer: 1.0013580322266e-05
loading text file...
timer: 0.00018715858459473
creating vocabulary mapping...
timer: 0.00035405158996582
putting data into tensor...
saving two files...
Done in time (seconds): 0.0011570453643799
In [2]:
input_size = 5
rnn_size = 3
In [3]:
local inputs = {}
table.insert(inputs, nn.Identity()()) -- network input
table.insert(inputs, nn.Identity()()) -- c at time t-1
table.insert(inputs, nn.Identity()()) -- h at time t-1
local input = inputs[1]
local prev_c = inputs[2]
local prev_h = inputs[3]
local i2h = nn.Linear(input_size, 4 * rnn_size)(input) -- input to hidden
local h2h = nn.Linear(rnn_size, 4 * rnn_size)(prev_h) -- hidden to hidden
local preactivations = nn.CAddTable()({i2h, h2h}) -- i2h + h2h
-- gates
local pre_sigmoid_chunk = nn.Narrow(2, 1, 3 * rnn_size)(preactivations)
local all_gates = nn.Sigmoid()(pre_sigmoid_chunk)
-- input
local in_chunk = nn.Narrow(2, 3 * rnn_size + 1, rnn_size)(preactivations)
local in_transform = nn.Tanh()(in_chunk)
local in_gate = nn.Narrow(2, 1, rnn_size)(all_gates)
local forget_gate = nn.Narrow(2, rnn_size + 1, rnn_size)(all_gates)
local out_gate = nn.Narrow(2, 2 * rnn_size + 1, rnn_size)(all_gates)
-- previous cell state contribution
local c_forget = nn.CMulTable()({forget_gate, prev_c})
-- input contribution
local c_input = nn.CMulTable()({in_gate, in_transform})
-- next cell state
local next_c = nn.CAddTable()({
c_forget,
c_input
})
local c_transform = nn.Tanh()(next_c)
local next_h = nn.CMulTable()({out_gate, c_transform})
-- module outputs
outputs = {}
table.insert(outputs, next_c)
table.insert(outputs, next_h)
-- packs the graph into a convenient module with standard API (:forward(), :backward())
m = nn.gModule(inputs, outputs)
In [54]:
require 'nn'
require 'nngraph'
LSTM = {}
function LSTM.create(input_size, rnn_size)
--------------------- input structure ---------------------
local inputs = {}
table.insert(inputs, nn.Identity()()) -- network input
table.insert(inputs, nn.Identity()()) -- c at time t-1
table.insert(inputs, nn.Identity()()) -- h at time t-1
local input = inputs[1]
local prev_c = inputs[2]
local prev_h = inputs[3]
--------------------- preactivations ----------------------
local i2h = nn.Linear(input_size, 4 * rnn_size)(input) -- input to hidden
local h2h = nn.Linear(rnn_size, 4 * rnn_size)(prev_h) -- hidden to hidden
local preactivations = nn.CAddTable()({i2h, h2h}) -- i2h + h2h
------------------ non-linear transforms ------------------
-- gates
local pre_sigmoid_chunk = nn.Narrow(2, 1, 3 * rnn_size)(preactivations)
local all_gates = nn.Sigmoid()(pre_sigmoid_chunk)
-- input
local in_chunk = nn.Narrow(2, 3 * rnn_size + 1, rnn_size)(preactivations)
local in_transform = nn.Tanh()(in_chunk)
---------------------- gate narrows -----------------------
local in_gate = nn.Narrow(2, 1, rnn_size)(all_gates)
local forget_gate = nn.Narrow(2, rnn_size + 1, rnn_size)(all_gates)
local out_gate = nn.Narrow(2, 2 * rnn_size + 1, rnn_size)(all_gates)
--------------------- next cell state ---------------------
local c_forget = nn.CMulTable()({forget_gate, prev_c}) -- previous cell state contribution
local c_input = nn.CMulTable()({in_gate, in_transform}) -- input contribution
local next_c = nn.CAddTable()({
c_forget,
c_input
})
-------------------- next hidden state --------------------
local c_transform = nn.Tanh()(next_c)
local next_h = nn.CMulTable()({out_gate, c_transform})
---[[ adding g for output
local lx = nn.Linear(rnn_size, input_size)(next_h)
local g = nn.SoftMax()(lx)
--]]
--------------------- output structure --------------------
outputs = {}
table.insert(outputs, next_c)
table.insert(outputs, next_h)
---[[
table.insert(outputs, g)
--]]
-- packs the graph into a convenient module with standard API (:forward(), :backward())
return nn.gModule(inputs, outputs)
end
In [72]:
local input_size = 9
local latent_size = 5
layer = LSTM.create(input_size,latent_size)
nn_output = layer:forward({torch.randn(1,input_size), torch.randn(1,latent_size), torch.randn(1,latent_size)})
In [73]:
print(nn_output[1])
print(nn_output[2])
print(nn_output[3])
Out[73]:
In [19]:
tab={1,2,5,4,5}
for i,v in ipairs(tab) do
print(i,v)
end
Out[19]:
In [27]:
--h = latent
--x = donnees
function create_g(dim_x, dim_h)
local input_h = nn.Identity()()
local lx = nn.Linear(dim_h, dim_x)(input_h)
local model_graph = nn.SoftMax()(lx)
return nn.gModule({input_h}, {model_graph})
end
function create_h(dim_x, dim_h)
local input_x = nn.Identity()()
local input_h = nn.Identity()()
local lx = nn.Linear(dim_x, dim_h)(input_x)
local lh = nn.Linear(dim_h, dim_h)(input_h)
local res = nn.CAddTable()({lx, lh})
local model_graph = nn.Tanh()(res)
return nn.gModule({input_h, input_x}, {model_graph})
end
In [43]:
x1 = nn.Identity()()
x2 = nn.Identity()()
a = nn.CMulTable()({x1,x2})
aa = nn.CAddTable()({a,x1})
m = nn.gModule({x1,x2},{aa})
In [44]:
m:forward({torch.Tensor{1,2},torch.Tensor{2,4}})
Out[44]:
In [8]:
local CharLMMinibatchLoader=require 'util.CharLMMinibatchLoader'
batch_size = 1; seq_length = 5
v=CharLMMinibatchLoader.create("data.t7","vocab.t7",batch_size, seq_length)
print(v)
print(v.x_batches[1])
print(v.y_batches[1])
Out[8]:
In [1]:
#convert bytensors to float
data = {}
data['x'] = {}
data['y'] = {}
print (data)
for i = 1, #v.x_batches do
data['x'][i] = v.x_batches[i]:int()
data['y'][i] = v.y_batches[i]:int()
end
--get size of dictionary
function from_seq_to_vecs (seq, map, size_d)
vecs = {}
for i = 1, seq:size(2) do
t = torch.Tensor(size_d):zero()
t[seq[1][i]] = 1
vecs[i] = t
end
return vecs
end
n=0; for k,_ in pairs(v.vocab_mapping) do n=n+1 end;
size_d = n
for i = 1, #v.x_batches do
data['x'][i] = from_seq_to_vecs(data['x'][i], v.vocab_mapping, size_d)
data['y'][i] = from_seq_to_vecs(data['y'][i], v.vocab_mapping, size_d)
end
Out[1]:
In [4]:
print (data['x'][1])
print (data['x'][2])
print(data['y'][1])
Out[4]:
In [5]:
--N = espace latent, n = espace d'entrée
function create_g(N,n)
local input_x = nn.Identity()()
local lx = nn.Linear(N, n)(input_x)
local model_graph = nn.SoftMax()(lx)
return nn.gModule({input_x}, {model_graph})
end
In [6]:
--N = dim_x, n = dim_h
function create_h(dim_x, dim_h)
local input_x = nn.Identity()()
local input_h = nn.Identity()()
local lx = nn.Linear(dim_x, dim_h)(input_x)
local lh = nn.Linear(dim_h, dim_h)(input_h)
local res = nn.CAddTable()({lx, lh})
local model_graph = nn.Tanh()(res)
return nn.gModule({input_x, input_h}, {model_graph})
end
In [7]:
function clone_many_times(net, T)
local clones = {}
local params, gradParams
if net.parameters then
params, gradParams = net:parameters()
if params == nil then
params = {}
end
end
local paramsNoGrad
if net.parametersNoGrad then
paramsNoGrad = net:parametersNoGrad()
end
local mem = torch.MemoryFile("w"):binary()
mem:writeObject(net)
for t = 1, T do
-- We need to use a new reader for each clone.
-- We don't want to use the pointers to already read objects.
local reader = torch.MemoryFile(mem:storage(), "r"):binary()
local clone = reader:readObject()
reader:close()
if net.parameters then
local cloneParams, cloneGradParams = clone:parameters()
local cloneParamsNoGrad
for i = 1, #params do
cloneParams[i]:set(params[i])
cloneGradParams[i]:set(gradParams[i])
end
if paramsNoGrad then
cloneParamsNoGrad = clone:parametersNoGrad()
for i =1,#paramsNoGrad do
cloneParamsNoGrad[i]:set(paramsNoGrad[i])
end
end
end
clones[t] = clone
collectgarbage()
end
mem:close()
return clones
end
In [8]:
data['x'][1][1]:size(1)
Out[8]:
In [9]:
--model_utils=require 'model_utils'
-- creation des modules h et g
n= data['x'][1][1]:size(1) --espace d' entrée
N= 10--espace latent
print (n)
print (N)
local g = create_g(N, n)
local h = create_h(n, N)
graph.dot(g.fg, 'g_unit', 'g_unit')
graph.dot(h.fg, 'h_unit', 'h_unit')
T = 5 -- time steps : longueur de la fenetre
local clones_g = clone_many_times(g, T)
local clones_h = clone_many_times(h, T)
function build_rnn(clones_g, clones_h)
inputs = {nn.Identity()()}
outputs = {}
for t = 1, T do
inputs[t+1] = nn.Identity()()
if (t==1) then
gnode = clones_g[t]({inputs[1], inputs[t+1]})
else
gnode = clones_g[t]({gnode, inputs[t+1]})
end
outputs[t] = clones_h[t](gnode)
end
return nn.gModule(inputs, outputs)
end
model = build_rnn(clones_g, clones_h)
graph.dot(model.fg, 'rnn', 'rnn')
--print (model)
Out[9]:
In [13]:
--utils
--[]
--
function concat_2tables(table1, table2)
print (table1)
len = 0; for k,_ in pairs(table1) do len=len+1 end;
for key, val in pairs(table2)do
table1[key+len] = val
end
return table1
end
In [14]:
epsilon = 1e-3
loss = nn.ClassNLLCriterion()
function train_rnn(model, loss, data, epsilon)
nSeq=0; for k,_ in pairs(data['x']) do nSeq=nSeq+1 end;
print (nSeq)
--h_in = torch.Tensor(1,n):zero()
losses = nn.ParallelCriterion()
for i =1, T do
losses:add(nn.ClassNLLCriterion() 1/T)
end
-- print (T)
for i = 0, nSeq do
losses:add(loss, 1/T)
end
for i = 1, nSeq do
model:zeroGradParameters()
h_in = torch.Tensor(N):zero()
print ("avant")
in_seq = concat_2tables({h_in}, data['x'][i])
--print (data[])
print (in_seq)
out = model:forward(in_seq)
--print (out)
--convertir out qui sont des log probas, en probas
print ("apres")
err = losses:forward(out, data['y'][i]) --err ici
delta = losses:backward(out, data['y'][i])
model:backward(in_seq, delta)
model:updateParameters(epsilon)
end
end
train_rnn(model, loss, data, epsilon)
In [12]:
function generate_seq(init_seq, model I)
res = {}
-- mettre initseq dans res
--faire conversion de init_seq ici
--pour l instant on lui donne un vecteur random
input = initseq -- transformed
for i = 0, I do
-- ajouter vecteur h ici..
h_in = torch.Tensor(N):zero()
output = model:forward(init_seq)
--puis passer le output en entrée
input = output
res[i] = output[output:size()] -- on prend le dernier element
end
end
In [ ]:
In [ ]:
In [ ]: