In principle, this should work on a computer without a GPU. It will help if you have a lot of RAM.
Download http://grzegorz.chrupala.me/data/coco.zip and unzip it in the reimaginet/data/coco directory
.
You should have the following files:
dataset.json
- MSCOCO sentencesvgg_feats.mat
- MSCOCO image vectorsdataset.ipa.jsonl.gz
- IPA transcriptions of MSCOCO sentencesDownload http://grzegorz.chrupala.me/data/model-ipa.zip and put it in the examples directory (same as this notebook)
In [1]:
import imaginet.task
In [2]:
model = imaginet.task.load(path="model-ipa.zip")
In [3]:
emb = imaginet.task.embeddings(model)
print(emb.shape)
The table of IPA symbols corresponding to the 49 dimensions
In [5]:
symb = imaginet.task.symbols(model)
print " ".join(symb.values())
Let's display the embeddings projected to 2D via PCA
In [7]:
%pylab inline
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
xy = pca.fit_transform(emb)
In [8]:
pylab.rc('font', family='DejaVu Sans')
pylab.figure(figsize=(8,8))
pylab.scatter(xy[:,0], xy[:,1], alpha=0.1)
for j,symb_j in symb.items():
pylab.text(xy[j,0], xy[j,1], symb_j)
Seems mostly random...
In [9]:
from imaginet.data_provider import getDataProvider
# Adjust the root to point to the directory above data
prov = getDataProvider('coco', root="..")
In [10]:
sents = list(prov.iterSentences(split='val'))
In [11]:
from imaginet.simple_data import phonemes
sents_ipa = [ phonemes(sent) for sent in sents ]
In [12]:
reps = imaginet.task.representation(model, sents_ipa)
In [13]:
from scipy.spatial.distance import cdist
distance = cdist(reps, reps, metric='cosine')
In [14]:
import numpy
def neighbors(k, distance=distance):
nn = numpy.argsort(distance[k,:])[1:5]
print sents[k]['raw'], ''.join(sents_ipa[k])
for n in nn:
print u"✔" if sents[n]['imgid']==sents[k]['imgid'] else u"✘", \
sents[n]['raw'], ''.join(sents_ipa[n])
In [15]:
import random
In [16]:
for _ in range(10):
neighbors(random.randint(0, len(sents)))
print
In [17]:
import imaginet.tracer
reload(imaginet.tracer)
Out[17]:
In [18]:
tr = imaginet.tracer.Tracer()
In [19]:
tr.fit(reps)
In [20]:
tr.proj.explained_variance_
Out[20]:
In [21]:
from subprocess import check_output
def espeak(words):
return phon(check_output(["espeak", "-q", "--ipa",
'-v', 'en-us',
words]).decode('utf-8'))
def phon(inp):
return list(''.join(inp.split()))
In [22]:
def trace(orths, tracer=tr, model=model, eos=True, size=(6,6)):
ipas = [ espeak(orth) for orth in orths ]
states = [ imaginet.task.states(model, ipa) for ipa in ipas ]
pylab.figure(figsize=size)
tracer.traces(ipas, states, eos=eos)
In [23]:
trace(["A bowl of salad","A plate of pizza","A brown dog", "A black cat"])
In [24]:
trace(["a girl skiing", "a girl wind surfing", "a girl water skiing",])
In [25]:
trace(["a cow", "a baby cow","a tiny baby cow"])
In [26]:
trace(["some food on a table","a computer on a table","a table with food"])
pylab.axis('off')
Out[26]:
In [42]:
trace(["a bear in a cage", "a brown bear in the zoo","a teddy bear on a chair"])
In [ ]: