Generate some validation videos random, download them from the server and then use them to visualize the results.


In [1]:
import random
import os
import numpy as np
from work.dataset.activitynet import ActivityNetDataset

dataset = ActivityNetDataset(
    videos_path='../dataset/videos.json',
    labels_path='../dataset/labels.txt'
)
videos = dataset.get_subset_videos('validation')
videos = random.sample(videos, 8)

examples = []
for v in videos:
    file_dir = os.path.join('../downloads/features/', v.features_file_name)
    if not os.path.isfile(file_dir):
        os.system('scp imatge:~/work/datasets/ActivityNet/v1.3/features/{} ../downloads/features/'.format(v.features_file_name))
    features = np.load(file_dir)
    examples.append((v, features))

Load the trained model with its weigths


In [2]:
from keras.layers import Input, BatchNormalization, LSTM, TimeDistributed, Dense, merge
from keras.models import Model

input_features_a = Input(batch_shape=(1, 1, 4096,), name='features')
input_normalized_a = BatchNormalization(mode=1)(input_features_a)
lstm1_a = LSTM(512, return_sequences=True, stateful=True, name='lstm1')(input_normalized_a)
lstm2_a = LSTM(512, return_sequences=True, stateful=True, name='lstm2')(lstm1_a)
output_a = TimeDistributed(Dense(201, activation='softmax'), name='fc')(lstm2_a)
model_def = Model(input=input_features_a, output=output_a)
model_def.load_weights('../work/scripts/training/lstm_activity_classification/model_snapshot/lstm_activity_classification_02_e100.hdf5')
model_def.summary()
model_def.compile(loss='categorical_crossentropy', optimizer='rmsprop')


Using Theano backend.
____________________________________________________________________________________________________
Layer (type)                       Output Shape        Param #     Connected to                     
====================================================================================================
features (InputLayer)              (1, 1, 4096)        0                                            
____________________________________________________________________________________________________
batchnormalization_1 (BatchNormaliz(1, 1, 4096)        8192        features[0][0]                   
____________________________________________________________________________________________________
lstm1 (LSTM)                       (1, 1, 512)         9439232     batchnormalization_1[0][0]       
____________________________________________________________________________________________________
lstm2 (LSTM)                       (1, 1, 512)         2099200     lstm1[0][0]                      
____________________________________________________________________________________________________
fc (TimeDistributed)               (1, 1, 201)         103113      lstm2[0][0]                      
====================================================================================================
Total params: 11649737
____________________________________________________________________________________________________

In [3]:
input_features = Input(batch_shape=(1, 1, 4096,), name='features')
input_normalized = BatchNormalization()(input_features)
previous_output = Input(batch_shape=(1, 1, 202,), name='prev_output')
merging = merge([input_normalized, previous_output], mode='concat', concat_axis=-1)
lstm1 = LSTM(512, return_sequences=True, stateful=True, name='lstm1')(merging)
lstm2 = LSTM(512, return_sequences=True, stateful=True, name='lstm2')(lstm1)
output = TimeDistributed(Dense(201, activation='softmax'), name='fc')(lstm2)
model_feed = Model(input=[input_features, previous_output], output=output)

model_feed.load_weights('../work/scripts/training/lstm_activity_classification_feedback/model_snapshot/lstm_activity_classification_feedback_02_e100.hdf5')
model_feed.summary()
model_feed.compile(loss='categorical_crossentropy', optimizer='rmsprop')


____________________________________________________________________________________________________
Layer (type)                       Output Shape        Param #     Connected to                     
====================================================================================================
features (InputLayer)              (1, 1, 4096)        0                                            
____________________________________________________________________________________________________
batchnormalization_2 (BatchNormaliz(1, 1, 4096)        8192        features[0][0]                   
____________________________________________________________________________________________________
prev_output (InputLayer)           (1, 1, 202)         0                                            
____________________________________________________________________________________________________
merge_1 (Merge)                    (1, 1, 4298)        0           batchnormalization_2[0][0]       
                                                                   prev_output[0][0]                
____________________________________________________________________________________________________
lstm1 (LSTM)                       (1, 1, 512)         9852928     merge_1[0][0]                    
____________________________________________________________________________________________________
lstm2 (LSTM)                       (1, 1, 512)         2099200     lstm1[0][0]                      
____________________________________________________________________________________________________
fc (TimeDistributed)               (1, 1, 201)         103113      lstm2[0][0]                      
====================================================================================================
Total params: 12063433
____________________________________________________________________________________________________

Extract the predictions for each video and print the scoring


In [4]:
predictions_def = []
for v, features in examples:
    nb_instances = features.shape[0]
    X = features.reshape((nb_instances, 1, 4096))
    model_def.reset_states()
    prediction = model_def.predict(X, batch_size=1)
    prediction = prediction.reshape(nb_instances, 201)
    class_prediction = np.argmax(prediction, axis=1)
    predictions_def.append((v, prediction, class_prediction))


/Users/Alberto/Development/DeepLearning/frameworks/Keras/keras/keras/backend/theano_backend.py:514: UserWarning: theano.function was asked to create a function computing outputs given certain inputs, but the provided input variable at index 1 is not part of the computational graph needed to compute the outputs: keras_learning_phase.
To make this warning into an error, you can pass the parameter on_unused_input='raise' to theano.function. To disable it completely, use on_unused_input='ignore'.
  **kwargs)

In [5]:
predictions_feed = []
for v, features in examples:
    nb_instances = features.shape[0]
    X = features.reshape((nb_instances, 1, 4096))
    
    prediction = np.zeros((nb_instances, 201))
    X_prev_output = np.zeros((1, 202))
    X_prev_output[0,201] = 1
    model_feed.reset_states()
    for i in range(nb_instances):
        X_features = X[i,:,:].reshape(1, 1, 4096)
        X_prev_output = X_prev_output.reshape(1, 1, 202)
        next_output = model_feed.predict_on_batch(
            {'features': X_features,
            'prev_output': X_prev_output}
        )
        prediction[i,:] = next_output[0,:]
        X_prev_output = np.zeros((1, 202))
        X_prev_output[0,:201] = next_output[0,:]
    class_prediction = np.argmax(prediction, axis=1)
    predictions_feed.append((v, prediction, class_prediction))

Print the global classification results


In [6]:
from IPython.display import YouTubeVideo, display

for prediction_def, prediction_feed in zip(predictions_def, predictions_feed):
    v, prediction_d, class_prediction_d = prediction_def
    _, prediction_f, class_prediction_f = prediction_feed
    print('Video ID: {}\t\tMain Activity: {}'.format(v.video_id, v.get_activity()))
    labels = ('Default Model', 'Model with Feedback')
    for prediction, label in zip((prediction_d, prediction_f), labels):
        print(label)
        class_means = np.mean(prediction, axis=0)
        top_3 = np.argsort(class_means[1:])[::-1][:3] + 1
        scores = class_means[top_3]/np.sum(class_means[1:])
        for index, score in zip(top_3, scores):
            if score == 0.:
                continue
            label = dataset.labels[index][1]
            print('{:.4f}\t{}'.format(score, label))
    vid = YouTubeVideo(v.video_id)
    display(vid)
    print('\n')


Video ID: AR6_PW1um-I		Main Activity: Rollerblading
Default Model
0.4854	Longboarding
0.4065	Rollerblading
0.0599	Elliptical trainer
Model with Feedback
0.9904	Longboarding
0.0085	Rollerblading
0.0010	Tennis serve with ball bouncing

Video ID: MsalIjwP3no		Main Activity: Plastering
Default Model
0.6589	Plastering
0.1985	Doing a powerbomb
0.0642	Painting fence
Model with Feedback
0.9476	Capoeira
0.0183	Plastering
0.0098	Futsal

Video ID: esNQZCjMZaM		Main Activity: Archery
Default Model
0.4258	Snatch
0.2648	Archery
0.1729	Doing fencing
Model with Feedback
0.3957	Archery
0.1905	Javelin throw
0.1217	Canoeing

Video ID: 9fw8ODTEso4		Main Activity: Beach soccer
Default Model
0.9987	Beach soccer
0.0004	Wrapping presents
0.0003	Futsal
Model with Feedback
0.6453	Playing beach volleyball
0.3536	Beach soccer
0.0006	Putting on shoes

Video ID: m1aF1CVo-s8		Main Activity: Elliptical trainer
Default Model
0.8472	Spinning
0.0785	Using the rowing machine
0.0417	Assembling bicycle
Model with Feedback
0.9968	Elliptical trainer
0.0012	Fixing bicycle
0.0007	Playing accordion

Video ID: GvJxJf4m6_M		Main Activity: Putting on shoes
Default Model
0.9638	Spinning
0.0211	Throwing darts
0.0122	Decorating the Christmas tree
Model with Feedback
0.5262	Elliptical trainer
0.1128	Playing drums
0.0904	Spinning

Video ID: 5E2OdhrgG8s		Main Activity: Playing harmonica
Default Model
0.4643	Mixing drinks
0.1946	Tango
0.0745	Drinking beer
Model with Feedback
0.7711	Playing harmonica
0.1098	Smoking a cigarette
0.0768	Playing violin

Video ID: oS7Twj3Pou0		Main Activity: Hopscotch
Default Model
0.5398	Fun sliding down
0.0506	Mooping floor
0.0498	Drinking coffee
Model with Feedback
0.1677	Walking the dog
0.1294	Playing saxophone
0.1041	Bathing dog

Now show the temporal prediction for the activity happening at the video.


In [7]:
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib
normalize = matplotlib.colors.Normalize(vmin=0, vmax=201)

for prediction_d, prediction_f in zip(predictions_def, predictions_feed):
    v, _, class_prediction_d = prediction_d
    _, _, class_prediction_f = prediction_f
    v.get_video_instances(16, 0)
    ground_truth = np.array([instance.output for instance in v.instances])
    nb_instances = len(v.instances)
    
    print('Video ID: {}\nMain Activity: {}'.format(v.video_id, v.get_activity()))
    plt.figure(num=None, figsize=(18, 1), dpi=100)
    plt.contourf(np.broadcast_to(ground_truth, (2, nb_instances)), norm=normalize, interpolation='nearest')
    plt.title('Ground Truth')
    plt.show()
    
    plt.figure(num=None, figsize=(18, 1), dpi=100)
    plt.contourf(np.broadcast_to(class_prediction_d, (2, nb_instances)), norm=normalize, interpolation='nearest')
    plt.title('Prediction Default Model')
    plt.show()
    
    plt.figure(num=None, figsize=(18, 1), dpi=100)
    plt.contourf(np.broadcast_to(class_prediction_f, (2, nb_instances)), norm=normalize, interpolation='nearest')
    plt.title('Prediction Model with Feedback')
    plt.show()

    print('\n')


Video ID: AR6_PW1um-I
Main Activity: Rollerblading

Video ID: MsalIjwP3no
Main Activity: Plastering

Video ID: esNQZCjMZaM
Main Activity: Archery

Video ID: 9fw8ODTEso4
Main Activity: Beach soccer

Video ID: m1aF1CVo-s8
Main Activity: Elliptical trainer

Video ID: GvJxJf4m6_M
Main Activity: Putting on shoes

Video ID: 5E2OdhrgG8s
Main Activity: Playing harmonica

Video ID: oS7Twj3Pou0
Main Activity: Hopscotch


In [8]:
normalize = matplotlib.colors.Normalize(vmin=0, vmax=1)

for prediction_def, prediction_feed in zip(predictions_def, predictions_feed):
    v, prediction_d, class_prediction_d = prediction_def
    _, prediction_f, class_prediction_f = prediction_feed
    v.get_video_instances(16, 0)
    ground_truth = np.array([instance.output for instance in v.instances])
    nb_instances = len(v.instances)
    output_index = dataset.get_output_index(v.label)
    
    print('Video ID: {}\nMain Activity: {}'.format(v.video_id, v.get_activity()))

    labels = ('Default Model', 'Model with Feedback')
    for prediction, label in zip((prediction_d, prediction_f), labels):
        print(label)
        class_means = np.mean(prediction, axis=0)
        top_3 = np.argsort(class_means[1:])[::-1][:3] + 1
        scores = class_means[top_3]/np.sum(class_means[1:])
        for index, score in zip(top_3, scores):
            if score == 0.:
                continue
            label = dataset.labels[index][1]
            print('{:.4f}\t{}'.format(score, label))
    
    plt.figure(num=None, figsize=(18, 1), dpi=100)
    plt.contourf(np.broadcast_to(ground_truth/output_index, (2, nb_instances)), norm=normalize, interpolation='nearest')
    plt.title('Ground Truth')
    plt.show()
    
    # print only the positions that predicted the global ground truth category
    temp_d = np.zeros((nb_instances))
    temp_d[class_prediction_d==output_index] = 1
    plt.figure(num=None, figsize=(18, 1), dpi=100)
    plt.contourf(np.broadcast_to(temp_d, (2, nb_instances)), norm=normalize, interpolation='nearest')
    plt.title('Prediction of the ground truth class (Default model)')
    plt.show()
    
    plt.figure(num=None, figsize=(18, 1), dpi=100)
    plt.contourf(np.broadcast_to(prediction_d[:,output_index], (2, nb_instances)), norm=normalize, interpolation='nearest')
    plt.title('Probability for ground truth (Default model)')
    plt.show()
    
    # print only the positions that predicted the global ground truth category
    temp_f = np.zeros((nb_instances))
    temp_f[class_prediction_f==output_index] = 1
    plt.figure(num=None, figsize=(18, 1), dpi=100)
    plt.contourf(np.broadcast_to(temp_f, (2, nb_instances)), norm=normalize, interpolation='nearest')
    plt.title('Prediction of the ground truth class (Feedback model)')
    plt.show()
    
    plt.figure(num=None, figsize=(18, 1), dpi=100)
    plt.contourf(np.broadcast_to(prediction_f[:,output_index], (2, nb_instances)), norm=normalize, interpolation='nearest')
    plt.title('Probability for ground truth (Feedback model)')
    plt.show()

    print('\n')


Video ID: AR6_PW1um-I
Main Activity: Rollerblading
Default Model
0.4854	Longboarding
0.4065	Rollerblading
0.0599	Elliptical trainer
Model with Feedback
0.9904	Longboarding
0.0085	Rollerblading
0.0010	Tennis serve with ball bouncing

Video ID: MsalIjwP3no
Main Activity: Plastering
Default Model
0.6589	Plastering
0.1985	Doing a powerbomb
0.0642	Painting fence
Model with Feedback
0.9476	Capoeira
0.0183	Plastering
0.0098	Futsal

Video ID: esNQZCjMZaM
Main Activity: Archery
Default Model
0.4258	Snatch
0.2648	Archery
0.1729	Doing fencing
Model with Feedback
0.3957	Archery
0.1905	Javelin throw
0.1217	Canoeing

Video ID: 9fw8ODTEso4
Main Activity: Beach soccer
Default Model
0.9987	Beach soccer
0.0004	Wrapping presents
0.0003	Futsal
Model with Feedback
0.6453	Playing beach volleyball
0.3536	Beach soccer
0.0006	Putting on shoes

Video ID: m1aF1CVo-s8
Main Activity: Elliptical trainer
Default Model
0.8472	Spinning
0.0785	Using the rowing machine
0.0417	Assembling bicycle
Model with Feedback
0.9968	Elliptical trainer
0.0012	Fixing bicycle
0.0007	Playing accordion

Video ID: GvJxJf4m6_M
Main Activity: Putting on shoes
Default Model
0.9638	Spinning
0.0211	Throwing darts
0.0122	Decorating the Christmas tree
Model with Feedback
0.5262	Elliptical trainer
0.1128	Playing drums
0.0904	Spinning

Video ID: 5E2OdhrgG8s
Main Activity: Playing harmonica
Default Model
0.4643	Mixing drinks
0.1946	Tango
0.0745	Drinking beer
Model with Feedback
0.7711	Playing harmonica
0.1098	Smoking a cigarette
0.0768	Playing violin

Video ID: oS7Twj3Pou0
Main Activity: Hopscotch
Default Model
0.5398	Fun sliding down
0.0506	Mooping floor
0.0498	Drinking coffee
Model with Feedback
0.1677	Walking the dog
0.1294	Playing saxophone
0.1041	Bathing dog


In [ ]: