In [1]:
import numpy
import imaginet.task as task
import imaginet.defn.audiovis_rhn as audiovis
In [2]:
model = task.load("/home/gchrupala/reimaginet/examples/audioviz/human-mfcc-rhn-flickr8k.zip")
In [3]:
import imaginet.data_provider as dp
prov = dp.getDataProvider('flickr8k', root='/home/gchrupala/reimaginet', audio_kind='human.max1K.accel3.ord.mfcc')
sent_val = list(prov.iterSentences(split='val'))
print sent_val[0]['raw']
print sent_val[0]['audio'].shape
In [4]:
data = [ numpy.asarray(sent['audio'], dtype='float32') for sent in sent_val ]
embeddings = audiovis.encode_sentences(model, data)
In [5]:
embeddings.shape
Out[5]:
In [6]:
states = audiovis.layer_states(model, data)
In [ ]: