Generate some validation videos random, download them from the server and then use them to visualize the results.
In [1]:
import random
import os
import numpy as np
from work.dataset.activitynet import ActivityNetDataset
dataset = ActivityNetDataset(
videos_path='../dataset/videos.json',
labels_path='../dataset/labels.txt'
)
videos = dataset.get_subset_videos('validation')
videos = random.sample(videos, 8)
examples = []
for v in videos:
file_dir = os.path.join('../downloads/features/', v.features_file_name)
if not os.path.isfile(file_dir):
os.system('scp imatge:~/work/datasets/ActivityNet/v1.3/features/{} ../downloads/features/'.format(v.features_file_name))
features = np.load(file_dir)
examples.append((v, features))
Load the trained model with its weigths
In [2]:
from keras.layers import Input, BatchNormalization, LSTM, TimeDistributed, Dense
from keras.models import Model
input_features = Input(batch_shape=(1, 1, 4096,), name='features')
input_normalized = BatchNormalization(mode=1)(input_features)
lstm1 = LSTM(512, return_sequences=True, stateful=True, name='lstm1')(input_normalized)
lstm2 = LSTM(512, return_sequences=True, stateful=True, name='lstm2')(lstm1)
output = TimeDistributed(Dense(201, activation='softmax'), name='fc')(lstm2)
model = Model(input=input_features, output=output)
model.load_weights('../work/scripts/training/lstm_activity_classification/model_snapshot/lstm_activity_classification_02_e100.hdf5')
model.summary()
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
Extract the predictions for each video and print the scoring
In [3]:
predictions = []
for v, features in examples:
nb_instances = features.shape[0]
X = features.reshape((nb_instances, 1, 4096))
model.reset_states()
prediction = model.predict(X, batch_size=1)
prediction = prediction.reshape(nb_instances, 201)
class_prediction = np.argmax(prediction, axis=1)
predictions.append((v, prediction, class_prediction))
Print the global classification results
In [9]:
from IPython.display import YouTubeVideo, display
for v, prediction, class_prediction in predictions:
print('Video ID: {}\t\tGround truth: {}'.format(v.video_id, v.get_activity()))
class_means = np.mean(prediction, axis=0)
top_3 = np.argsort(class_means[1:])[::-1][:3] + 1
scores = class_means[top_3]/np.sum(class_means[1:])
for index, score in zip(top_3, scores):
if score == 0.:
continue
label = dataset.labels[index][1]
print('{:.4f}\t{}'.format(score, label))
vid = YouTubeVideo(v.video_id)
display(vid)
print('\n')
Now show the temporal prediction for the activity happening at the video.
In [7]:
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib
normalize = matplotlib.colors.Normalize(vmin=0, vmax=201)
for v, prediction, class_prediction in predictions:
v.get_video_instances(16, 0)
ground_truth = np.array([instance.output for instance in v.instances])
nb_instances = len(v.instances)
print('Video ID: {}\nMain Activity: {}'.format(v.video_id, v.get_activity()))
plt.figure(num=None, figsize=(18, 1), dpi=100)
plt.contourf(np.broadcast_to(ground_truth, (2, nb_instances)), norm=normalize, interpolation='nearest')
plt.title('Ground Truth')
plt.show()
plt.figure(num=None, figsize=(18, 1), dpi=100)
plt.contourf(np.broadcast_to(class_prediction, (2, nb_instances)), norm=normalize, interpolation='nearest')
plt.title('Prediction')
plt.show()
print('\n')
In [8]:
normalize = matplotlib.colors.Normalize(vmin=0, vmax=1)
for v, prediction, class_prediction in predictions:
v.get_video_instances(16, 0)
ground_truth = np.array([instance.output for instance in v.instances])
nb_instances = len(v.instances)
output_index = dataset.get_output_index(v.label)
print('Video ID: {}\nMain Activity: {}'.format(v.video_id, v.get_activity()))
class_means = np.mean(prediction, axis=0)
top_3 = np.argsort(class_means[1:])[::-1][:3] + 1
scores = class_means[top_3]/np.sum(class_means[1:])
for index, score in zip(top_3, scores):
if score == 0.:
continue
label = dataset.labels[index][1]
print('{:.4f}\t{}'.format(score, label))
plt.figure(num=None, figsize=(18, 1), dpi=100)
plt.contourf(np.broadcast_to(ground_truth/output_index, (2, nb_instances)), norm=normalize, interpolation='nearest')
plt.title('Ground Truth')
plt.show()
# print only the positions that predicted the global ground truth category
temp = np.zeros((nb_instances))
temp[class_prediction==output_index] = 1
plt.figure(num=None, figsize=(18, 1), dpi=100)
plt.contourf(np.broadcast_to(temp, (2, nb_instances)), norm=normalize, interpolation='nearest')
plt.title('Prediction of the ground truth class')
plt.show()
plt.figure(num=None, figsize=(18, 1), dpi=100)
plt.contourf(np.broadcast_to(prediction[:,output_index], (2, nb_instances)), norm=normalize, interpolation='nearest')
plt.title('Probability for ground truth')
plt.show()
print('\n')
In [ ]: