In [28]:
import imaginet.task as task
task = reload(task)
import funktional.layer as layer
from funktional.layer import params
import funktional.util as util
from funktional.util import autoassign
import theano.tensor as T
import random
import numpy

In [2]:
from imaginet.simple_data import *
import imaginet.data_provider as dp

In [3]:
seed = 123
if seed is not None:
    random.seed(seed)
    numpy.random.seed(seed)
prov = dp.getDataProvider('flickr8k', root='/home/gchrupala/repos/reimaginet')
data_c = SimpleData(prov, tokenize=characters, min_df=1, scale=True, batch_size=64, shuffle=True, limit=None)
data_w = SimpleData(prov, tokenize=words, min_df=10, scale=True, batch_size=64, shuffle=True, limit=None)


Could not read file /home/gchrupala/repos/reimaginet/data/flickr8k/dataset.ipa.jsonl.gz: IPA transcription not available

In [ ]:
import imaginet.corep
reload(imaginet.corep)

In [ ]:
corep = imaginet.corep.Corep(data_c, data_w, 128, 512, 512, 3, 1, 4096)

In [ ]:
from collections import Counter
import sys
def valid_loss(task, data):
        result = []
        for item in data.iter_valid_batches():
            inp, target_v, _, _ = item
            result.append(task.loss_test(inp, target_v))
        return result

things = [("word", (corep.data_w, corep.Task_w, Counter())),
          ("char", (corep.data_c, corep.Task_c, Counter())) ]
for epoch in range(1,3):
    for name, thing in things:
        data, task, costs = thing
        for _j, item in enumerate(data_c.iter_train_batches()):
                j = _j + 1
                inp, target_v, _, _ = item
                cost = task.train(inp, target_v)
                costs.update(Counter({'cost':cost, 'N':1}))
                if j % 10 == 0:
                    print epoch, name, j, j*data.batch_size, "train", "".join([str(costs['cost']/costs['N'])])
                    sys.stdout.flush()

In [ ]:
things[0][1][2]

In [ ]:
things[1][1][2]

In [ ]:
corep.save("/tmp/corep.zip")

In [54]:
import imaginet.defn.visual
reload(imaginet.defn.visual)


Out[54]:
<module 'imaginet.defn.visual' from '/exp2/gchrupala/repos/reimaginet/imaginet/defn/visual.py'>

In [55]:
config = dict(size_embed=128, size=256)
model = imaginet.defn.visual.VisualModel({'scaler':data_c.scaler, 'batcher':data_c.batcher}, config)

In [56]:
model.save("/tmp/fufi.zip")

In [57]:
delmo = imaginet.defn.visual.load("/tmp/fufi.zip")

In [ ]:
from collections import Counter
import sys
for epoch in range(1,3):
    costs = Counter()
    name = "phoneme"
    data = model.dataset
    task = model.Visual
    for _j, item in enumerate(data_c.iter_train_batches()):
                j = _j + 1
                inp, target_v, _, _ = item
                cost = task.train(inp, target_v)
                costs.update(Counter({'cost':cost, 'N':1}))
                if j % 10 == 0:
                    print epoch, name, j, j*data.batch_size, "train", "".join([str(costs['cost']/costs['N'])])
                    sys.stdout.flush()

In [62]:
for i in range(len(model.params())):
    print numpy.mean(model.params()[i].get_value() == delmo.params()[i].get_value())


1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0

In [ ]: