In [ ]:
import sys, os
import h2o
from h2o.estimators.deepwater import H2ODeepWaterEstimator
from requests import get
from IPython.display import Image, display, HTML
import numpy as np
h2o.init()
if not H2ODeepWaterEstimator.available(): exit

In [ ]:
print("Downloading the model")
## http://data.dmlc.ml/mxnet/models/imagenet/inception-bn_old.tar.gz
## !gunzip ...

print("Importing the model architecture for scoring in H2O")
model = H2ODeepWaterEstimator(epochs=0, ## no training - just load the state - NOTE: training for this 3-class problem wouldn't work since the model has 1k classes
                            mini_batch_size=32, ## mini-batch size is used for scoring
                            ## all parameters below are needed
                            network='user',
                            network_definition_file=os.getcwd() + "/Inception_BN-symbol.json",
                            network_parameters_file=os.getcwd() + "/Inception_BN-0039.params",
                            mean_image_file=        os.getcwd() + "/mean_224.nd",
                            image_shape=[224,224],
                            channels=3
)

frame = h2o.import_file("bigdata/laptop/deepwater/imagenet/cat_dog_mouse.csv")
print(frame.head(5))
nclasses = frame[1].nlevels()[0]

model.train(x=[0],y=1, training_frame=frame) ## must call train() to initialize the model, but it isn't training

In [ ]:
## Extract deep features from final layer before going into Softmax.
extracted_features = model.deepfeatures(frame, "global_pool_output")
#extracted_features = model.deepfeatures(frame, "conv_5b_double_3x3_1_output")

print(extracted_features)
assert extracted_features.ncol == 1024

In [ ]:
## Find the squared cosine similarity 
## between the first 5 images (queries) and the rest (references)
references = extracted_features[5:,:]  ## large
queries    = extracted_features[:5,:]  ## small

df = references.distance(queries, "cosine")

print(df)
assert df.shape[0] == 262
assert df.shape[1] == 5

In [ ]:
img = frame[0,0]
display(Image(filename=img))

In [ ]:
references = extracted_features[:,:]  ## large
queries    = extracted_features[0,:]  ## small

df = references.distance(queries, "cosine").as_data_frame().values.ravel()
closest = np.argsort(df)[::-1]
for indx in closest[:3]:
    img = frame[indx,0]
    display(Image(filename=img))

In [ ]:
indx = 102
img = frame[indx,0]
display(Image(filename=img))

In [ ]:
references = extracted_features[:,:]  ## large
queries    = extracted_features[indx,:]  ## small

df = references.distance(queries, "cosine").as_data_frame().values.ravel()
closest = np.argsort(df)[::-1]
for indx in closest[:3]:
    img = frame[indx,0]
    display(Image(filename=img))

In [ ]:
indx = 200
img = frame[indx,0]
display(Image(filename=img))

In [ ]:
references = extracted_features[:,:]  ## large
queries    = extracted_features[indx,:]  ## small

df = references.distance(queries, "cosine").as_data_frame().values.ravel()
closest = np.argsort(df)[::-1]
for indx in closest[:3]:
    img = frame[indx,0]
    display(Image(filename=img))

In [ ]: