In [1]:

    
from examples.ImageCaption import *
from pyspark.sql import SQLContext
from pyspark import SparkConf,SparkContext
from itertools import izip_longest
from com.yahoo.ml.caffe.tools.DFConversions import *
from com.yahoo.ml.caffe.tools.Vocab import *
from com.yahoo.ml.caffe.Config import *
from com.yahoo.ml.caffe.CaffeOnSpark import *
from com.yahoo.ml.caffe.Config import *
from com.yahoo.ml.caffe.DataSource import *
import json

Create the input embedding dataframe from cocodataset



In [2]:

    
conv=DFConversions(sc)
df_image_caption=conv.Coco2ImageCaptionFile("/tmp/coco/annotations/captions_train2014.json",1)
vocab=Vocab(sc)
vocab.genFromData(df_image_caption,"caption",8800)
df_embedding = conv.ImageCaption2Embedding("/tmp/coco/images/train2014", df_image_caption, vocab,20)
df_embedding.write.parquet("/tmp/coco/parquet/df_embedded_train2014")

Train the model for images



In [2]:

    
cos=CaffeOnSpark(sc)
args={}
args['conf']='CaffeOnSpark/data/bvlc_reference_solver.prototxt'
args['model']='file:///tmp/coco/bvlc_reference_caffenet.caffemodel'
args['devices']='1'
args['clusterSize']='1'
cfg=Config(sc,args)
dl_train_image = DataSource(sc).getSource(cfg,True)
cos.train(dl_train_image)

Train the model for LSTM



In [ ]:

    
args={}
args['conf']='CaffeOnSpark/data/lrcn_solver.prototxt'
args['model']='file:///tmp/coco/parquet/lrcn_coco.model'
args['devices']='1'
args['clusterSize']='1'
args['weights']='/tmp/coco/bvlc_reference_caffenet.caffemodel'
args['resize']='True'
cfg=Config(sc,args)
dl_train_lstm = DataSource(sc).getSource(cfg,True)
cos.train(dl_train_lstm)

Test the model



In [4]:

    
conv=DFConversions(sc)
vocab=Vocab(sc)
df_image_caption_test=conv.Coco2ImageCaptionFile("/tmp/coco/annotations/captions_demo.json",1)
vocab.genFromData(df_image_caption_test,"caption",8800)
df_embedding = conv.ImageCaption2Embedding("/tmp/coco/images/demo/", df_image_caption_test, vocab,20)
df_embedding.write.parquet("/tmp/coco/parquet/df_embedded_test")
df_embedded_input = sqlContext.read.parquet("/tmp/coco/parquet/df_embedded_test")



In [17]:

    
df_images = df_embedded_input.select("data.image","data.height", "data.width", "id")
model="/tmp/coco/parquet/lrcn_coco.model"
imagenet="CaffeOnSpark/data/lstm_deploy.prototxt"
lstmnet="CaffeOnSpark/data/lrcn_word_to_preds.deploy.prototxt"
vocab="CaffeOnSpark/data/vocab.txt"



In [18]:

    
df_results = get_predictions(sqlContext,df_images,model,imagenet,lstmnet,vocab)
df_results.show()









    



+------+--------------------+
|    id|          prediction|
+------+--------------------+
|674390|A snowboarder is ...|
|687894|A man riding a sk...|
|623893|A kitchen with a ...|
|598423|A man is doing a ...|
|632450|A woman is playin...|
|754187|A man is holding ...|
|706813|A zebra standing ...|
| 86667|A city street wit...|
|202165|A baseball player...|
| 83551|A woman is sittin...|
+------+--------------------+



In [19]:

    
df=df_embedded_input.join(df_results, df_embedded_input.id == df_results.id)



In [20]:

    
from com.yahoo.ml.caffe.DisplayUtils import *
show_captions(df)









    Out[20]:




Image Id Image Prediction
674390 A snowboarder is riding a trick on a snowy slope.
687894 A man riding a skateboard down a street.
623893 A kitchen with a white refrigerator and a stove.
598423 A man is doing a trick on a skateboard.
632450 A woman is playing tennis on a tennis court.
754187 A man is holding a picture of a man.
706813 A zebra standing in the grass with trees and trees.
86667 A city street with a clock tower in the background.
202165 A baseball player is holding a bat and a baseball bat.
83551 A woman is sitting on a table with a dog.



In [ ]:

Image Id	Image	Prediction
674390		A snowboarder is riding a trick on a snowy slope.
687894		A man riding a skateboard down a street.
623893		A kitchen with a white refrigerator and a stove.
598423		A man is doing a trick on a skateboard.
632450		A woman is playing tennis on a tennis court.
754187		A man is holding a picture of a man.
706813		A zebra standing in the grass with trees and trees.
86667		A city street with a clock tower in the background.
202165		A baseball player is holding a bat and a baseball bat.
83551		A woman is sitting on a table with a dog.