In [1]:
require 'nngraph'
require 'torch'
require 'math'
require 'nn'
require 'graph'
OMP_NUM_THREADS=1 -- Torch threaded code and BLAS

Chargement des données

cat data/input.txt > grep -v '[0-9]' | tr '[:upper:]\n' '[:lower:] ' | tr -d -c '[:digit:][:lower:]:;.?!)(, ' | tr -s " " > data/input.txt

magma@locust-ThinkPad-X1-Carbon:~/projects/m2/as/tme6/as| ⇒ th prepare_data.lua -txt data/input.txt

th prepare_data.lua -txt data/input.txt timer: 1.0013580322266e-05 loading text file...
timer: 0.00018715858459473 creating vocabulary mapping...
timer: 0.00035405158996582 putting data into tensor... saving two files... Done in time (seconds): 0.0011570453643799


In [2]:
input_size = 5
rnn_size = 3

In [3]:
local inputs = {}
table.insert(inputs, nn.Identity()())   -- network input
table.insert(inputs, nn.Identity()())   -- c at time t-1
table.insert(inputs, nn.Identity()())   -- h at time t-1
local input = inputs[1]
local prev_c = inputs[2]
local prev_h = inputs[3]


local i2h = nn.Linear(input_size, 4 * rnn_size)(input)  -- input to hidden
local h2h = nn.Linear(rnn_size, 4 * rnn_size)(prev_h)   -- hidden to hidden
local preactivations = nn.CAddTable()({i2h, h2h})       -- i2h + h2h


-- gates
local pre_sigmoid_chunk = nn.Narrow(2, 1, 3 * rnn_size)(preactivations)
local all_gates = nn.Sigmoid()(pre_sigmoid_chunk)

-- input
local in_chunk = nn.Narrow(2, 3 * rnn_size + 1, rnn_size)(preactivations)
local in_transform = nn.Tanh()(in_chunk)


local in_gate = nn.Narrow(2, 1, rnn_size)(all_gates)
local forget_gate = nn.Narrow(2, rnn_size + 1, rnn_size)(all_gates)
local out_gate = nn.Narrow(2, 2 * rnn_size + 1, rnn_size)(all_gates)


-- previous cell state contribution
local c_forget = nn.CMulTable()({forget_gate, prev_c})
-- input contribution
local c_input = nn.CMulTable()({in_gate, in_transform})
-- next cell state
local next_c = nn.CAddTable()({
  c_forget,
  c_input
})


local c_transform = nn.Tanh()(next_c)
local next_h = nn.CMulTable()({out_gate, c_transform})


-- module outputs
outputs = {}
table.insert(outputs, next_c)
table.insert(outputs, next_h)

-- packs the graph into a convenient module with standard API (:forward(), :backward())
m =  nn.gModule(inputs, outputs)

In [54]:
require 'nn'
require 'nngraph'

LSTM = {}

function LSTM.create(input_size, rnn_size)
  --------------------- input structure ---------------------
  local inputs = {}
  table.insert(inputs, nn.Identity()())   -- network input
  table.insert(inputs, nn.Identity()())   -- c at time t-1
  table.insert(inputs, nn.Identity()())   -- h at time t-1
  local input = inputs[1]
  local prev_c = inputs[2]
  local prev_h = inputs[3]

  --------------------- preactivations ----------------------
  local i2h = nn.Linear(input_size, 4 * rnn_size)(input)   -- input to hidden
  local h2h = nn.Linear(rnn_size, 4 * rnn_size)(prev_h)    -- hidden to hidden
  local preactivations = nn.CAddTable()({i2h, h2h})        -- i2h + h2h

  ------------------ non-linear transforms ------------------
  -- gates
  local pre_sigmoid_chunk = nn.Narrow(2, 1, 3 * rnn_size)(preactivations)
  local all_gates = nn.Sigmoid()(pre_sigmoid_chunk)

  -- input
  local in_chunk = nn.Narrow(2, 3 * rnn_size + 1, rnn_size)(preactivations)
  local in_transform = nn.Tanh()(in_chunk)

  ---------------------- gate narrows -----------------------
  local in_gate = nn.Narrow(2, 1, rnn_size)(all_gates)
  local forget_gate = nn.Narrow(2, rnn_size + 1, rnn_size)(all_gates)
  local out_gate = nn.Narrow(2, 2 * rnn_size + 1, rnn_size)(all_gates)

  --------------------- next cell state ---------------------
  local c_forget = nn.CMulTable()({forget_gate, prev_c})  -- previous cell state contribution
  local c_input = nn.CMulTable()({in_gate, in_transform}) -- input contribution
  local next_c = nn.CAddTable()({
    c_forget,
    c_input
  })

  -------------------- next hidden state --------------------
  local c_transform = nn.Tanh()(next_c)
  local next_h = nn.CMulTable()({out_gate, c_transform})
    
    ---[[ adding g for output
    local lx = nn.Linear(rnn_size, input_size)(next_h)
    local g = nn.SoftMax()(lx)
    --]]

  --------------------- output structure --------------------
  outputs = {}
  table.insert(outputs, next_c)
  table.insert(outputs, next_h)
    ---[[
    table.insert(outputs, g)
    --]]
    
    
  -- packs the graph into a convenient module with standard API (:forward(), :backward())
  return nn.gModule(inputs, outputs)
end

In [72]:
local input_size = 9
local latent_size = 5
layer = LSTM.create(input_size,latent_size)
nn_output = layer:forward({torch.randn(1,input_size), torch.randn(1,latent_size), torch.randn(1,latent_size)})

In [73]:
print(nn_output[1])
print(nn_output[2])
print(nn_output[3])


Out[73]:
 0.1401  0.1003 -0.2103  1.2222  0.2659
[torch.DoubleTensor of size 1x5]

 0.0491  0.0778 -0.1089  0.7205  0.0457
[torch.DoubleTensor of size 1x5]

 0.1521  0.1253  0.0605  0.1003  0.0959  0.1637  0.0960  0.1277  0.0786
[torch.DoubleTensor of size 1x9]


In [19]:
tab={1,2,5,4,5}
for i,v in ipairs(tab) do
    print(i,v)
end


Out[19]:
1	1	
2	2	
3	5	
4	4	
5	5	

In [27]:
--h = latent
--x = donnees
function create_g(dim_x, dim_h)
    local input_h = nn.Identity()()
    local lx = nn.Linear(dim_h, dim_x)(input_h)
    local model_graph = nn.SoftMax()(lx)
    return nn.gModule({input_h}, {model_graph})
end

function create_h(dim_x, dim_h)
    local input_x = nn.Identity()()
    local input_h = nn.Identity()()

    local lx = nn.Linear(dim_x, dim_h)(input_x)
    local lh = nn.Linear(dim_h, dim_h)(input_h)

    local res = nn.CAddTable()({lx, lh})
    local model_graph = nn.Tanh()(res)

    return nn.gModule({input_h, input_x}, {model_graph})
end

In [43]:
x1 = nn.Identity()()
x2 = nn.Identity()()
a = nn.CMulTable()({x1,x2})
aa = nn.CAddTable()({a,x1})
m = nn.gModule({x1,x2},{aa})

In [44]:
m:forward({torch.Tensor{1,2},torch.Tensor{2,4}})


Out[44]:
  3
 10
[torch.DoubleTensor of size 2]


In [8]:
local CharLMMinibatchLoader=require 'util.CharLMMinibatchLoader'

batch_size = 1; seq_length = 5
v=CharLMMinibatchLoader.create("data.t7","vocab.t7",batch_size, seq_length)
print(v)
print(v.x_batches[1])
print(v.y_batches[1])


cannot open <data.t7> in mode r  at /Users/meat/torch/pkg/torch/lib/TH/THDiskFile.c:484
stack traceback:
	[C]: at 0x0ffb6380
	[C]: in function 'DiskFile'
	/Users/meat/torch/install/share/lua/5.1/torch/File.lua:309: in function 'load'
	./util/CharLMMinibatchLoader.lua:15: in function 'create'
	[string "local CharLMMinibatchLoader=require 'util.Cha..."]:4: in main chunk
	[C]: in function 'xpcall'
	/Users/meat/torch/install/share/lua/5.1/itorch/main.lua:179: in function </Users/meat/torch/install/share/lua/5.1/itorch/main.lua:143>
	/Users/meat/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	/Users/meat/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	/Users/meat/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	/Users/meat/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	/Users/meat/torch/install/share/lua/5.1/itorch/main.lua:350: in main chunk
	[C]: in function 'require'
	[string "arg={'/Users/meat/Library/Jupyter/runtime/ker..."]:1: in main chunk
Out[8]:
loading data files...	

convert to float and onehotencoder


In [1]:
#convert bytensors to float
data = {}
data['x'] = {} 
data['y'] = {}
print (data)
for i = 1, #v.x_batches do
    data['x'][i] = v.x_batches[i]:int()
    data['y'][i] = v.y_batches[i]:int()
end

--get size of dictionary
function from_seq_to_vecs (seq, map, size_d)
    vecs = {}
    for i = 1, seq:size(2) do
        t = torch.Tensor(size_d):zero()
        t[seq[1][i]] = 1
        vecs[i] = t
    end
    return vecs    
end
n=0; for k,_ in pairs(v.vocab_mapping) do n=n+1 end;
size_d = n
for i = 1, #v.x_batches do
    data['x'][i] = from_seq_to_vecs(data['x'][i], v.vocab_mapping, size_d)
    data['y'][i] = from_seq_to_vecs(data['y'][i], v.vocab_mapping, size_d)
end


Out[1]:
{
  y : table: 0x4054f1e8
  x : table: 0x40f895f8
}
[string "#convert bytensors to float..."]:6: attempt to index global 'v' (a nil value)
stack traceback:
	[string "#convert bytensors to float..."]:6: in main chunk
	[C]: in function 'xpcall'
	/home/magma/bin/torch/install/share/lua/5.1/itorch/main.lua:179: in function </home/magma/bin/torch/install/share/lua/5.1/itorch/main.lua:143>
	/home/magma/bin/torch/install/share/lua/5.1/lzmq/poller.lua:75: in function 'poll'
	...magma/bin/torch/install/share/lua/5.1/lzmq/impl/loop.lua:307: in function 'poll'
	...magma/bin/torch/install/share/lua/5.1/lzmq/impl/loop.lua:325: in function 'sleep_ex'
	...magma/bin/torch/install/share/lua/5.1/lzmq/impl/loop.lua:370: in function 'start'
	/home/magma/bin/torch/install/share/lua/5.1/itorch/main.lua:350: in main chunk
	[C]: in function 'require'
	(command line):1: in main chunk
	[C]: at 0x00405ab0

In [4]:
print (data['x'][1])

print (data['x'][2])

print(data['y'][1])


Out[4]:
{
  1 : DoubleTensor - size: 25
  2 : DoubleTensor - size: 25
  3 : DoubleTensor - size: 25
  4 : DoubleTensor - size: 25
  5 : DoubleTensor - size: 25
}
{
  1 : DoubleTensor - size: 25
  2 : DoubleTensor - size: 25
  3 : DoubleTensor - size: 25
  4 : DoubleTensor - size: 25
  5 : DoubleTensor - size: 25
}
{
  1 : DoubleTensor - size: 25
  2 : DoubleTensor - size: 25
  3 : DoubleTensor - size: 25
  4 : DoubleTensor - size: 25
  5 : DoubleTensor - size: 25
}

Définition de la fonction g

Pour commencer, en consid ́erant un espace latent de dimension N , et un espace d’ ́entr ́ee de dimension n, d ́efinissez sous forme de module (en utilisant nngraph) la fonction g τ sous la forme d’une fonction lin ́eaire suivie d’un softmax. 2 : D ́


In [5]:
--N = espace latent, n = espace d'entrée
function create_g(N,n)
    local input_x = nn.Identity()()
    local lx = nn.Linear(N, n)(input_x)
    local model_graph = nn.SoftMax()(lx)
    return nn.gModule({input_x}, {model_graph})
end

Definition de la fonction h

D ́efinissez sous forme de module la fonction h θ de la forme h t = tanh(Θ d h t−1 + Θ i w t−1 (2) ou w t−1 est le vecteur z ́ero, avec une unique valeur 1 correspondanta l’ ́el ́ement lu par le r ́eseau.


In [6]:
--N = dim_x, n = dim_h
function create_h(dim_x, dim_h)
    local input_x = nn.Identity()()
    local input_h = nn.Identity()()

    local lx = nn.Linear(dim_x, dim_h)(input_x)
    local lh = nn.Linear(dim_h, dim_h)(input_h)
    
    local res = nn.CAddTable()({lx, lh})    
    local model_graph = nn.Tanh()(res)
    
    return nn.gModule({input_x, input_h}, {model_graph})
end

Clonage


In [7]:
function clone_many_times(net, T)
    local clones = {}

    local params, gradParams
    if net.parameters then
        params, gradParams = net:parameters()
        if params == nil then
            params = {}
        end
    end

    local paramsNoGrad
    if net.parametersNoGrad then
        paramsNoGrad = net:parametersNoGrad()
    end

    local mem = torch.MemoryFile("w"):binary()
    mem:writeObject(net)

    for t = 1, T do
        -- We need to use a new reader for each clone.
        -- We don't want to use the pointers to already read objects.
        local reader = torch.MemoryFile(mem:storage(), "r"):binary()
        local clone = reader:readObject()
        reader:close()

        if net.parameters then
            local cloneParams, cloneGradParams = clone:parameters()
            local cloneParamsNoGrad
            for i = 1, #params do
                cloneParams[i]:set(params[i])
                cloneGradParams[i]:set(gradParams[i])
            end
            if paramsNoGrad then
                cloneParamsNoGrad = clone:parametersNoGrad()
                for i =1,#paramsNoGrad do
                    cloneParamsNoGrad[i]:set(paramsNoGrad[i])
                end
            end
        end

        clones[t] = clone
        collectgarbage()
    end

    mem:close()
    return clones
end

In [8]:
data['x'][1][1]:size(1)


Out[8]:
25	

In [9]:
--model_utils=require 'model_utils'

-- creation des modules h et g
n= data['x'][1][1]:size(1) --espace d' entrée
N= 10--espace latent

print (n)
print (N)
local g = create_g(N, n)
local h = create_h(n, N)

graph.dot(g.fg, 'g_unit', 'g_unit')
graph.dot(h.fg, 'h_unit', 'h_unit')

T = 5 -- time steps : longueur de la fenetre

local clones_g = clone_many_times(g, T)
local clones_h = clone_many_times(h, T)

function build_rnn(clones_g, clones_h)
    inputs = {nn.Identity()()}
    outputs = {}
    for t = 1, T do
        inputs[t+1] = nn.Identity()() 
        if (t==1) then
            gnode = clones_g[t]({inputs[1], inputs[t+1]})
        else
            gnode = clones_g[t]({gnode, inputs[t+1]})
        end
        outputs[t] = clones_h[t](gnode)
    end


    return nn.gModule(inputs, outputs)
end
model = build_rnn(clones_g, clones_h)

graph.dot(model.fg, 'rnn', 'rnn')

--print (model)


Out[9]:
25	
10	

Train


In [13]:
--utils

--[]
--
function concat_2tables(table1, table2) 
    print (table1)
    len = 0; for k,_ in pairs(table1) do len=len+1 end;
    for key, val in pairs(table2)do
        table1[key+len] = val
    end
    return table1
end

In [14]:
epsilon = 1e-3
loss = nn.ClassNLLCriterion()
function train_rnn(model, loss, data, epsilon) 
    nSeq=0; for k,_ in pairs(data['x']) do nSeq=nSeq+1 end;
    print (nSeq)

    --h_in = torch.Tensor(1,n):zero()
    losses = nn.ParallelCriterion()
    for i =1, T do
        losses:add(nn.ClassNLLCriterion() 1/T)
    end
    -- print (T)
    for i = 0, nSeq do
        losses:add(loss, 1/T)
    end 
    
    for i = 1, nSeq do
        model:zeroGradParameters()            
        h_in = torch.Tensor(N):zero()
        
        print ("avant")
        in_seq = concat_2tables({h_in}, data['x'][i])
        --print (data[])
        print (in_seq)
        out = model:forward(in_seq)
        --print (out)
        --convertir out qui sont des log probas, en probas 
        
        print ("apres")
        err = losses:forward(out, data['y'][i]) --err ici
        delta = losses:backward(out, data['y'][i])
        model:backward(in_seq, delta) 
        model:updateParameters(epsilon)
    end
end
train_rnn(model, loss, data, epsilon)


[string "epsilon = 1e-3..."]:10: ')' expected near '1'

In [12]:
function generate_seq(init_seq, model I)
    res = {}
    -- mettre initseq dans res
    --faire conversion de init_seq ici
    --pour l instant on lui donne un vecteur random 
    input = initseq -- transformed
    for i = 0, I do
        -- ajouter vecteur h ici..
               
        h_in = torch.Tensor(N):zero()
        output = model:forward(init_seq)

        --puis passer le output en entrée
        input = output
        res[i] = output[output:size()] -- on prend le dernier element
    end
end


[string "function generate_seq(init_seq, model I)..."]:1: ')' expected near 'I'

In [ ]:


In [ ]:


In [ ]: