Expression building

(note: may have old API in some cases)


In [ ]:
from pycnn import *

## ==== Create a new computation graph
# (it is a singleton, we have one at each stage.
# renew_cg() clears the current one and starts anew)
renew_cg()

## ==== Creating Expressions from user input / constants.
x = scalarInput(value)

v = vecInput(dimension)
v.set([1,2,3])

z = matInput(dim1, dim2)

# for example:
z1 = matInput(2, 2)
z1.set([1,2,3,4])

## ==== We can take the value of an expression. 
# For complex expressions, this will run forward propagation.
print z.value()    
print z.npvalue()      # as numpy array
print v.vec_value()    # as vector, if vector
print x.scalar_valur() # as scalar, if scalar
print x.value()        # choose the correct one

## ==== Parameters
# Parameters are things we tune during training.
# Usually a matrix or a vector.

# First we create a model and add the parameters to it.
m = Model() 
pW = m.add_parameters("W", (8,8)) # an 8x8 matrix
pb = m.add_parameters("b", 8)

# then we create an Expression out of the model's parameters
W = parameter(pW) # or W = parameter(m["W"])
b = parameter(pb)

## ===== Lookup parameters
# Similar to parameters, but are representing a "lookup table"
# that maps numbers to vectors.
# These are used for embedding matrices.
# for example, this will have VOCAB_SIZE rows, each of DIM dimensions.
lp  = m.add_lookup_parameters("lookup", (VOCAB_SIZE, DIM))

# lookup parameters can be initialized from an existing array, i.e:
# m["lookup"].init_from_array(wv)

e5  = lookup(lp, 5)   # create an Expression from row 5.
e5c = lookup(lp, 5, update=False)  # as before, but don't update when optimizing.
e5.set(9)   # now the e5 expression contains row 9
e5c.set(9)  # ditto

## ===== Combine expression into complex expressions.

# Math 
e = e1 + e2   
e = e1 * e2   # for vectors/matrices: matrix multiplication (like e1.dot(e2) in numpy)
e = e1 - e2    
e = -e1 

e = dot_product(e1, e2)
e = cwise_multiply(e1, e2)  # component-wise divide  (like e1*e2 in numpy)
e = cdiv(e1, e2)            # component-wise divide
e = colwise_add(e1, e2)     # column-wise addition

# Matrix Shapes
e = reshape(e1, new_dimension)
e = transpose(e1)

# Per-element unary functions.
e = tanh(e1)      
e = exp(e1)
e = log(e1)
e = logistic(e1)   # Sigmoid(x)
e = rectify(e1)    # Relu (= max(x,0))
e = softsign(e1)    # x/(1+|x|)

# softmaxes
e = softmax(e1)
e = log_softmax(e1, restrict=[]) # restrict is a set of indices. 
                                 # if not empty, only entries in restrict are part 
                                 # of softmax computation, others get 0.


e = sum_cols(e1)


# Picking values from vector expressions
e = pick(e1, k)              # k is unsigned integer, e1 is vector. return e1[k]
e = e1[k]                    # same

e = pickrange(e1, k, v)      # like python's e1[k:v] for lists. e1 is an Expression, k,v integers.
e = e1[k:v]                  # same

e = pickneglogsoftmax(e1, k) # k is unsigned integer. equiv to: (pick(-log(softmax(e1)), k))
                             

# Neural net stuff
noise(e1, stddev) # add a noise to each element from a gausian with standard-dev = stddev
dropout(e1, p)    # apply dropout with probability p 

# functions over lists of expressions
e = esum([e1, e2, ...])            # sum
e = average([e1, e2, ...])         # average
e = concatenate_cols([e1, e2, ...])  # e1, e2,.. are column vectors. return a matrix. (sim to np.hstack([e1,e2,...])
e = concatenate([e1, e2, ...])     # concatenate

e = affine_transform([e0,e1,e2, ...])  # e = e0 + ((e1*e2) + (e3*e4) ...) 

## Loss functions
e = squared_distance(e1, e2)
e = l1_distance(e1, e2)
e = huber_distance(e1, e2, c=1.345)

# e1 must be a scalar that is a value between 0 and 1
# e2 (ty) must be a scalar that is a value between 0 and 1
# e = ty * log(e1) + (1 - ty) * log(e1)
e = binary_log_loss(e1, e2)

# e1 is row vector or scalar
# e2 is row vector or scalar
# m is number
# e = max(0, m - (e1 - e2))
e = pairwise_rank_loss(e1, e2, m=1.0) 

# Convolutions    TODO
e = conv1d_narrow(e1, e2) #
e = conv1d_wide(e1, e2)   #
e = kmax_pooling(e1, k) #  kmax-pooling operation (Kalchbrenner et al 2014)
e = kmh_ngram(e1, k) # 
e = fold_rows(e1, nrows=2) #

Recipe


In [1]:
from pycnn import *

# create model
m = Model()

# add parameters to model
m.add_parameters("W", (10,30))
m.add_parameters("b", 10)
m.add_lookup_parameters("lookup", (500, 10))
print "added"

# create trainer 
trainer = SimpleSGDTrainer(m)

# L2 regularization and learning rate parameters can be passed to the trainer:
# alpha = 0.1  # learning rate
# lambda = 0.0001  # regularization
# trainer = SimpleSGDTrainer(m, lam=lambda, e0=alpha)

# function for graph creation
def create_network_return_loss(model, inputs, expected_output):
    """
    inputs is a list of numbers
    """
    renew_cg()
    W = parameter(model["W"])
    b = parameter(model["b"])
    lookup = model["lookup"]
    emb_vectors = [lookup[i] for i in inputs]
    net_input = concatenate(emb_vectors)
    net_output = softmax( (W*net_input) + b)
    loss = -log(pick(net_output, expected_output))
    return loss

# function for prediction
def create_network_return_best(model, inputs):
    """
    inputs is a list of numbers
    """
    renew_cg()
    W = parameter(model["W"])
    b = parameter(model["b"])
    lookup = model["lookup"]
    emb_vectors = [lookup[i] for i in inputs]
    net_input = concatenate(emb_vectors)
    net_output = softmax( (W*net_input) + b)
    return np.argmax(net_output)


# train network
for inp,lbl in ( ([1,2,3],1), ([3,2,4],2) ):
    print inp, lbl
    loss = create_network_return_loss(m, inp, lbl)
    print loss.value() # need to run loss.value() for the forward prop
    loss.backward()
    trainer.update()

print create_network_return_best(m, [1,2,3])


added
[1, 2, 3] 1
3.28097844124
[3, 2, 4] 2
1.66672432423
0

In [ ]:


In [ ]: