In [1]:
import numpy
import imaginet.data_provider as dp
import imaginet.driver
from funktional.util import grouper
from imaginet.tokens import tokenize
from imaginet.models import predictor_v
from scipy.spatial.distance import cosine
from imaginet.simple_data import phonemes


Couldn't import dot_parser, loading of dot files will not be possible.
Using gpu device 0: Tesla K20m

In [6]:
prov = dp.getDataProvider('coco', root='/home/gchrupala/repos/reimaginet')
sents = list(prov.iterSentences(split='val')) + list(prov.iterSentences(split='restval'))

In [6]:
import json
rows = [ json.loads(line) for line in open("crowd/job_886896_meta.json")]

In [52]:
def features(cocoid):
    if cocoid in img_fs:
        return img_fs[cocoid]
    else:
        raise KeyError(cocoid)

def evaluate_old(M, rows):
    batcher = M['batcher']
    mapper = batcher.mapper
    scaler = M['scaler']
    model = M['model']
    predict_v = predictor_v(model)
    batch_size=64
    def response(row):
        inputs = list(mapper.transform([tokenize(row['data']['desc']) ]))
        pred = predict_v(batcher.batch_inp(inputs))[0]
        return 1+numpy.argmin([ cosine(pred, features(cocoid)) for cocoid in row['meta']['candidates']])
    preds = numpy.array([ response(row) for row in rows ])
    target = numpy.array([ row['meta']['response'] for row in rows])
    return numpy.mean(preds==target)

def evaluate(model, rows):
    scaler = model.scaler
    batcher = model.batcher
    mapper = batcher.mapper
    img_fs = {}
    sent_ids = {}
    prov = dp.getDataProvider('coco', root='/home/gchrupala/repos/reimaginet')
    for split in ['val','test','restval']:
        for img in prov.iterImages(split=split):
            img_fs[img['cocoid']] = scaler.transform([ img['feat'] ])[0]
            for sent in img['sentences']:
                sent_ids[sent['sentid']]=sent
    def response(row):
        sent = sent_ids[row['meta']['id']]
        inputs = list(mapper.transform([phonemes(sent) ]))
        pred = model.Visual.predict(batcher.batch_inp(inputs))[0]
        return 1+numpy.argmin([ cosine(pred, img_fs[cocoid]) for cocoid in row['meta']['candidates']])
    preds = numpy.array([ response(row) for row in rows ])
    target = numpy.array([ row['meta']['response'] for row in rows])
    return numpy.mean(preds==target)

In [53]:
import imaginet.defn.visual as visual
for i in range(1,7):
    modeldir = "/home/gchrupala/repos/reimaginet/run-1001-ipa/model.{}.zip".format(i)
    model = visual.load(path=modeldir)
    print i, evaluate(model, rows)


1 0.439882697947
2 0.674486803519
3 0.736070381232
4 0.765395894428
5 0.759530791789
6 0.74780058651

In [54]:
for i in range(7,10):
    modeldir = "/home/gchrupala/repos/reimaginet/run-1001-ipa/model.{}.zip".format(i)
    model = visual.load(path=modeldir)
    print i, evaluate(model, rows)


7 0.765395894428
8 0.765395894428
9 0.74780058651

In [50]:
import imaginet.defn.visual as visual
reload(visual)
modeldir = "/home/gchrupala/repos/reimaginet/run-1001-ipa/model.9.zip"
model = visual.load(path=modeldir)

In [51]:
inputs = list(model.batcher.mapper.transform([phonemes(sents[0])]))

In [52]:
task = model.Visual

In [53]:
print ''.join([ ph for ph in phonemes(sents[0]) if ph != '*' ])


ɐtʃaɪldhəʊldɪŋɐflaʊədʌmbɹɛləandpɛtɪŋɐjak

In [56]:
visual.states(model, phonemes(sents[0])).shape


Out[56]:
(47, 1024)

In [59]:
"abc'def".replace("'","")


Out[59]:
'abcdef'

In [ ]: