In [4]:
import json
from os import listdir

from work.dataset.activitynet import ActivityNetDataset
from work.config import (STORED_FEATURES_PATH, STORED_VIDEOS_EXTENSION,
                         STORED_VIDEOS_PATH)

# Loading dataset
print('Loading dataset')
dataset = ActivityNetDataset(
    videos_path='../dataset/videos.json',
    labels_path='../dataset/labels.txt'
)
# Removing the videos which wasn't able to extract its features
features_ids = [f[:-4] for f in listdir(STORED_FEATURES_PATH) if f[-4:] == '.npy']
print('Videos already extracted its features: {} videos'.format(len(features_ids)))
to_remove = []
for video in dataset.videos:
    if video.video_id not in features_ids:
        to_remove.append(video)
for video in to_remove:
    dataset.videos.remove(video)
nb_videos = len(dataset.videos)
print('Total number of videos: {} videos'.format(nb_videos))
json_dataset = dataset.serialize()

with open('videos.json', 'w') as f:
    json.dump(json_dataset, f)


Loading dataset
Videos already downloaded: 19633 videos
Total number of videos: 19633 videos

In [ ]: