In [1]:
import matplotlib.pyplot as plt
import cPickle as pickle
import tensorflow as tf
from core.solver import CaptioningSolver
from core.model import CaptionGenerator
from core.utils import load_coco_data
from core.bleu import evaluate

%matplotlib inline
plt.rcParams['figure.figsize'] = (8.0, 6.0)  # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

In [2]:
data = load_coco_data(data_path='./data', split='val')
with open('./data/train/word_to_idx.pkl', 'rb') as f:
    word_to_idx = pickle.load(f)


image_idxs <type 'numpy.ndarray'> (19589,) int32
file_names <type 'numpy.ndarray'> (4052,) <U51
features <type 'numpy.ndarray'> (4052, 196, 512) float32
captions <type 'numpy.ndarray'> (19589, 17) int32
Elapse time: 1.16

In [3]:
model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512,
                                   dim_hidden=1500, n_time_step=16, prev2out=True, 
                                             ctx2out=True, alpha_c=1.0, selector=True, dropout=True)

In [4]:
solver = CaptioningSolver(model, data, data, n_epochs=15, batch_size=128, update_rule='adam',
                                      learning_rate=0.0025, print_every=2000, save_every=1, image_path='./image/val2014_resized',
                                pretrained_model=None, model_path='./model/lstm', test_model='./model/lstm3/model-18',
                                 print_bleu=False, log_path='./log/')

In [7]:
solver.test(data, split='val')


Sampled Caption: a group of motorcycles parked next to each other .
Sampled Caption: a man riding a wave on top of a surfboard .
Sampled Caption: a large building with a clock on it .
Sampled Caption: a herd of sheep standing on top of a grass covered field .
Sampled Caption: a black and white photo of a man holding a tennis racket .
Sampled Caption: a man riding skis down a snow covered slope .
Sampled Caption: a man is standing in a bathroom with a toilet .
Sampled Caption: a person is flying a kite in a field .
Sampled Caption: a man is standing in a dark room with a black cat .
Sampled Caption: a giraffe standing in a field with a tall giraffe in the background .
Saved ./data/val/val.candidate.captions.pkl..

In [8]:
test = load_coco_data(data_path='./data', split='test')


image_idxs <type 'numpy.ndarray'> (19589,) int32
file_names <type 'numpy.ndarray'> (4048,) <U51
features <type 'numpy.ndarray'> (4048, 196, 512) float32
captions <type 'numpy.ndarray'> (19589, 17) int32
Elapse time: 1.15

In [13]:
tf.get_variable_scope().reuse_variables()
solver.test(test, split='test')


Sampled Caption: a sign that is on a pole in front of a building .
Sampled Caption: a train is traveling down the tracks in the city .
Sampled Caption: a giraffe standing in a field with tall grass .
Sampled Caption: a woman is holding a dog in a parking lot .
Sampled Caption: a woman standing in a kitchen with a refrigerator .
Sampled Caption: a polar bear is swimming in the water .
Sampled Caption: a man standing in front of a tv holding a wii remote .
Sampled Caption: a woman is sitting on a couch with a laptop .
Sampled Caption: a group of elephants standing in a fenced in area .
Sampled Caption: a man holding a cell phone in his hand .
Saved ./data/test/test.candidate.captions.pkl..

In [14]:
evaluate(data_path='./data', split='val')


{'reflen': 43408, 'guess': [43989, 39937, 35885, 31833], 'testlen': 43989, 'correct': [29093, 12382, 5232, 2401]}
ratio: 1.01338462956
Bleu_1:	0.661369887927
Bleu_2:	0.452824472132
Bleu_3:	0.310364129211
Bleu_4:	0.217912572998
METEOR:	0.217213601321
ROUGE_L: 0.518324778652
CIDEr:	0.683173385807

In [15]:
evaluate(data_path='./data', split='test')


{'reflen': 43314, 'guess': [43979, 39931, 35883, 31835], 'testlen': 43979, 'correct': [28846, 12119, 5085, 2243]}
ratio: 1.01535300365
Bleu_1:	0.65590395416
Bleu_2:	0.446168005543
Bleu_3:	0.30441521379
Bleu_4:	0.211145033206
METEOR:	0.215652854828
ROUGE_L: 0.513925691333
CIDEr:	0.657087635567