In [ ]:
import sys, os
import h2o
from h2o.estimators.deepwater import H2ODeepWaterEstimator
from requests import get
from IPython.display import Image, display, HTML
import numpy as np
h2o.init()
if not H2ODeepWaterEstimator.available(): exit
In [ ]:
print("Downloading the model")
## http://data.dmlc.ml/mxnet/models/imagenet/inception-bn_old.tar.gz
## !gunzip ...
print("Importing the model architecture for scoring in H2O")
model = H2ODeepWaterEstimator(epochs=0, ## no training - just load the state - NOTE: training for this 3-class problem wouldn't work since the model has 1k classes
mini_batch_size=32, ## mini-batch size is used for scoring
## all parameters below are needed
network='user',
network_definition_file=os.getcwd() + "/Inception_BN-symbol.json",
network_parameters_file=os.getcwd() + "/Inception_BN-0039.params",
mean_image_file= os.getcwd() + "/mean_224.nd",
image_shape=[224,224],
channels=3
)
frame = h2o.import_file("bigdata/laptop/deepwater/imagenet/cat_dog_mouse.csv")
print(frame.head(5))
nclasses = frame[1].nlevels()[0]
model.train(x=[0],y=1, training_frame=frame) ## must call train() to initialize the model, but it isn't training
In [ ]:
## Extract deep features from final layer before going into Softmax.
extracted_features = model.deepfeatures(frame, "global_pool_output")
#extracted_features = model.deepfeatures(frame, "conv_5b_double_3x3_1_output")
print(extracted_features)
assert extracted_features.ncol == 1024
In [ ]:
## Find the squared cosine similarity
## between the first 5 images (queries) and the rest (references)
references = extracted_features[5:,:] ## large
queries = extracted_features[:5,:] ## small
df = references.distance(queries, "cosine")
print(df)
assert df.shape[0] == 262
assert df.shape[1] == 5
In [ ]:
img = frame[0,0]
display(Image(filename=img))
In [ ]:
references = extracted_features[:,:] ## large
queries = extracted_features[0,:] ## small
df = references.distance(queries, "cosine").as_data_frame().values.ravel()
closest = np.argsort(df)[::-1]
for indx in closest[:3]:
img = frame[indx,0]
display(Image(filename=img))
In [ ]:
indx = 102
img = frame[indx,0]
display(Image(filename=img))
In [ ]:
references = extracted_features[:,:] ## large
queries = extracted_features[indx,:] ## small
df = references.distance(queries, "cosine").as_data_frame().values.ravel()
closest = np.argsort(df)[::-1]
for indx in closest[:3]:
img = frame[indx,0]
display(Image(filename=img))
In [ ]:
indx = 200
img = frame[indx,0]
display(Image(filename=img))
In [ ]:
references = extracted_features[:,:] ## large
queries = extracted_features[indx,:] ## small
df = references.distance(queries, "cosine").as_data_frame().values.ravel()
closest = np.argsort(df)[::-1]
for indx in closest[:3]:
img = frame[indx,0]
display(Image(filename=img))
In [ ]: