In [ ]:
from pycnn import *
## ==== Create a new computation graph
# (it is a singleton, we have one at each stage.
# renew_cg() clears the current one and starts anew)
renew_cg()
## ==== Creating Expressions from user input / constants.
x = scalarInput(value)
v = vecInput(dimension)
v.set([1,2,3])
z = matInput(dim1, dim2)
# for example:
z1 = matInput(2, 2)
z1.set([1,2,3,4])
## ==== We can take the value of an expression.
# For complex expressions, this will run forward propagation.
print z.value()
print z.npvalue() # as numpy array
print v.vec_value() # as vector, if vector
print x.scalar_valur() # as scalar, if scalar
print x.value() # choose the correct one
## ==== Parameters
# Parameters are things we tune during training.
# Usually a matrix or a vector.
# First we create a model and add the parameters to it.
m = Model()
pW = m.add_parameters("W", (8,8)) # an 8x8 matrix
pb = m.add_parameters("b", 8)
# then we create an Expression out of the model's parameters
W = parameter(pW) # or W = parameter(m["W"])
b = parameter(pb)
## ===== Lookup parameters
# Similar to parameters, but are representing a "lookup table"
# that maps numbers to vectors.
# These are used for embedding matrices.
# for example, this will have VOCAB_SIZE rows, each of DIM dimensions.
lp = m.add_lookup_parameters("lookup", (VOCAB_SIZE, DIM))
# lookup parameters can be initialized from an existing array, i.e:
# m["lookup"].init_from_array(wv)
e5 = lookup(lp, 5) # create an Expression from row 5.
e5c = lookup(lp, 5, update=False) # as before, but don't update when optimizing.
e5.set(9) # now the e5 expression contains row 9
e5c.set(9) # ditto
## ===== Combine expression into complex expressions.
# Math
e = e1 + e2
e = e1 * e2 # for vectors/matrices: matrix multiplication (like e1.dot(e2) in numpy)
e = e1 - e2
e = -e1
e = dot_product(e1, e2)
e = cwise_multiply(e1, e2) # component-wise divide (like e1*e2 in numpy)
e = cdiv(e1, e2) # component-wise divide
e = colwise_add(e1, e2) # column-wise addition
# Matrix Shapes
e = reshape(e1, new_dimension)
e = transpose(e1)
# Per-element unary functions.
e = tanh(e1)
e = exp(e1)
e = log(e1)
e = logistic(e1) # Sigmoid(x)
e = rectify(e1) # Relu (= max(x,0))
e = softsign(e1) # x/(1+|x|)
# softmaxes
e = softmax(e1)
e = log_softmax(e1, restrict=[]) # restrict is a set of indices.
# if not empty, only entries in restrict are part
# of softmax computation, others get 0.
e = sum_cols(e1)
# Picking values from vector expressions
e = pick(e1, k) # k is unsigned integer, e1 is vector. return e1[k]
e = e1[k] # same
e = pickrange(e1, k, v) # like python's e1[k:v] for lists. e1 is an Expression, k,v integers.
e = e1[k:v] # same
e = pickneglogsoftmax(e1, k) # k is unsigned integer. equiv to: (pick(-log(softmax(e1)), k))
# Neural net stuff
noise(e1, stddev) # add a noise to each element from a gausian with standard-dev = stddev
dropout(e1, p) # apply dropout with probability p
# functions over lists of expressions
e = esum([e1, e2, ...]) # sum
e = average([e1, e2, ...]) # average
e = concatenate_cols([e1, e2, ...]) # e1, e2,.. are column vectors. return a matrix. (sim to np.hstack([e1,e2,...])
e = concatenate([e1, e2, ...]) # concatenate
e = affine_transform([e0,e1,e2, ...]) # e = e0 + ((e1*e2) + (e3*e4) ...)
## Loss functions
e = squared_distance(e1, e2)
e = l1_distance(e1, e2)
e = huber_distance(e1, e2, c=1.345)
# e1 must be a scalar that is a value between 0 and 1
# e2 (ty) must be a scalar that is a value between 0 and 1
# e = ty * log(e1) + (1 - ty) * log(e1)
e = binary_log_loss(e1, e2)
# e1 is row vector or scalar
# e2 is row vector or scalar
# m is number
# e = max(0, m - (e1 - e2))
e = pairwise_rank_loss(e1, e2, m=1.0)
# Convolutions TODO
e = conv1d_narrow(e1, e2) #
e = conv1d_wide(e1, e2) #
e = kmax_pooling(e1, k) # kmax-pooling operation (Kalchbrenner et al 2014)
e = kmh_ngram(e1, k) #
e = fold_rows(e1, nrows=2) #
In [1]:
from pycnn import *
# create model
m = Model()
# add parameters to model
m.add_parameters("W", (10,30))
m.add_parameters("b", 10)
m.add_lookup_parameters("lookup", (500, 10))
print "added"
# create trainer
trainer = SimpleSGDTrainer(m)
# L2 regularization and learning rate parameters can be passed to the trainer:
# alpha = 0.1 # learning rate
# lambda = 0.0001 # regularization
# trainer = SimpleSGDTrainer(m, lam=lambda, e0=alpha)
# function for graph creation
def create_network_return_loss(model, inputs, expected_output):
"""
inputs is a list of numbers
"""
renew_cg()
W = parameter(model["W"])
b = parameter(model["b"])
lookup = model["lookup"]
emb_vectors = [lookup[i] for i in inputs]
net_input = concatenate(emb_vectors)
net_output = softmax( (W*net_input) + b)
loss = -log(pick(net_output, expected_output))
return loss
# function for prediction
def create_network_return_best(model, inputs):
"""
inputs is a list of numbers
"""
renew_cg()
W = parameter(model["W"])
b = parameter(model["b"])
lookup = model["lookup"]
emb_vectors = [lookup[i] for i in inputs]
net_input = concatenate(emb_vectors)
net_output = softmax( (W*net_input) + b)
return np.argmax(net_output)
# train network
for inp,lbl in ( ([1,2,3],1), ([3,2,4],2) ):
print inp, lbl
loss = create_network_return_loss(m, inp, lbl)
print loss.value() # need to run loss.value() for the forward prop
loss.backward()
trainer.update()
print create_network_return_best(m, [1,2,3])
In [ ]:
In [ ]: