In principle, this should work on a computer without a GPU. It will help if you have a lot of RAM.
Download http://grzegorz.chrupala.me/data/coco.zip and unzip it in the reimaginet/data/coco directory
.
You should have the following files:
dataset.json
- MSCOCO sentencesvgg_feats.mat
- MSCOCO image vectorsdataset.ipa.jsonl.gz
- IPA transcriptions of MSCOCO sentences(Copy pretrained models from yellow.uvt.nl:/home/gchrupala/repos/reimaginet/examples/vis
and yellow.uvt.nl:/home/gchrupala/repos/reimaginet/examples/vis/lm
)
In [1]:
import imaginet.defn.visual as visual
import imaginet.task
In [2]:
model = imaginet.task.load(path="vis/model.10.zip")
In [8]:
reload(visual)
emb = visual.embeddings(model)
print(emb.shape)
The table of IPA symbols corresponding to the 49 dimensions
In [9]:
symb = visual.symbols(model)
print " ".join(symb.values())
Let's display the embeddings projected to 2D via PCA
In [22]:
%pylab inline
In [23]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
xy = pca.fit_transform(emb)
pylab.rc('font', family='DejaVu Sans')
pylab.figure(figsize=(8,8))
pylab.scatter(xy[:,0], xy[:,1], alpha=0.1)
for j,symb_j in symb.items():
if symb_j not in ["<BEG>","<END>", "<UNK>"]:
pylab.text(xy[j,0], xy[j,1], symb_j)
In [24]:
import imaginet.defn.lm
reload(imaginet.defn.lm)
model2 = imaginet.task.load("lm/model.10.zip")
emb2 = imaginet.defn.lm.embeddings(model2)
In [25]:
pca2 = PCA(n_components=2)
xy = pca.fit_transform(emb2)
pylab.rc('font', family='DejaVu Sans')
pylab.figure(figsize=(8,8))
pylab.scatter(xy[:,0], xy[:,1], alpha=0.1)
for j,symb_j in symb.items():
if symb_j not in ["<BEG>","<END>", "<UNK>"]:
pylab.text(xy[j,0], xy[j,1], symb_j)
There is clear structure in the embeddings for the LM model.
In [7]:
from imaginet.data_provider import getDataProvider
# Adjust the root to point to the directory above data
prov = getDataProvider('coco', root="..")
In [8]:
sents = list(prov.iterSentences(split='val'))
In [9]:
from imaginet.simple_data import phonemes
sents_ipa = [ phonemes(sent) for sent in sents ]
In [10]:
reps = imaginet.task.representation(model, sents_ipa)
In [30]:
from scipy.spatial.distance import cdist
distance = cdist(reps, reps, metric='cosine')
In [31]:
import numpy
def neighbors(k, distance=distance, size=5):
nn = numpy.argsort(distance[k,:])[1:size]
print sents[k]['raw'], ''.join(sents_ipa[k])
for n in nn:
print u"✔" if sents[n]['imgid']==sents[k]['imgid'] else u"✘", \
sents[n]['raw'], ''.join(sents_ipa[n])
In [32]:
import random
In [33]:
random.seed(41)
for _ in range(10):
neighbors(random.randint(0, len(sents)), distance=distance)
print
In [36]:
reload(visual)
reps2 = []
for i in range(0,len(sents_ipa),512):
r = [ ri[-1] for ri in imaginet.task.pile(model, sents_ipa[i:i+512], batch_size=256) ]
reps2.extend(r)
In [37]:
reps2 = numpy.array(reps2)
distance1 = cdist(reps2[:,1,:], reps2[:,1,:], metric='cosine')
distance0 = cdist(reps2[:,0,:], reps2[:,0,:], metric='cosine')
In [38]:
%reset_selective reps2 # Free memory
In [39]:
random.seed(41)
for _ in range(10):
i = random.randint(0, len(sents))
print "Layer 1"
neighbors(i, distance=distance0, size=2)
print "Layer 2"
neighbors(i, distance=distance1, size=2)
print "Layer 3"
neighbors(i, distance=distance, size=2)
print
In [11]:
import imaginet.tracer
In [12]:
tr = imaginet.tracer.Tracer()
In [13]:
tr.fit(reps)
In [14]:
tr.proj.explained_variance_
Out[14]:
In [15]:
from subprocess import check_output
def espeak(words):
return phon(check_output(["espeak", "-q", "--ipa=3",
'-v', 'en',
words]).decode('utf-8'))
def phon(inp):
return [ ph.replace(u"ˈ","") for word in inp.split() for ph in word.split("_") ]
In [19]:
%pylab inline --no-import-all
def trace(orths, tracer=tr, model=model, eos=True, size=(6,6)):
ipas = [ espeak(orth) for orth in orths ]
states = imaginet.task.states(model, ipas)
pylab.figure(figsize=size)
tracer.traces(ipas, states, eos=eos)
In [20]:
trace(["A bowl of salad","A plate of pizza","A brown dog", "A black cat"])
In [21]:
trace(["a cow", "a baby cow","a tiny baby cow"])
In [25]:
orths = ["A cow","A baby","A tiny"]
ipas = [ espeak(orth) for orth in orths ]
states = imaginet.task.states(model, ipas)
states[0][0] - states[1][0]
Out[25]:
In [23]:
trace(["some food on a table","a computer on a table","a table with food"])
pylab.axis('off')
Out[23]:
In [24]:
trace(["a bear in a cage", "a brown bear in the zoo","a teddy bear on a chair"])
In [ ]: