In [1]:
# Create a validation set of 20 images randomly selected from the holdout set
import ujson, gzip, random

SATURATION_THRESHOLD = 0.1
DATASET_SIZE = 20

with gzip.open('../dataset_indexes/imagenet_validation_256_saturation_values.json.gz', 'rt') as f:
    saturation_indexes = ujson.load(f)
    
saturated_filepaths = [(path, value) for path, value in saturation_indexes.items() if value > SATURATION_THRESHOLD]

random.shuffle(saturated_filepaths)

validation_dataset = { path: value for path, value in saturated_filepaths[:DATASET_SIZE]}

print(validation_dataset)


{u'ILSVRC2012_val_00017716.JPEG': 0.2485, u'ILSVRC2012_val_00035387.JPEG': 0.4263, u'ILSVRC2012_val_00029389.JPEG': 0.1375, u'ILSVRC2012_val_00031453.JPEG': 0.5449, u'ILSVRC2012_val_00029758.JPEG': 0.4051, u'ILSVRC2012_val_00000849.JPEG': 0.14300000000000002, u'ILSVRC2012_val_00000006.JPEG': 0.1267, u'ILSVRC2012_val_00022202.JPEG': 0.139, u'ILSVRC2012_val_00033145.JPEG': 0.21880000000000002, u'ILSVRC2012_val_00016404.JPEG': 0.2252, u'ILSVRC2012_val_00022970.JPEG': 0.5043, u'ILSVRC2012_val_00014664.JPEG': 0.14650000000000002, u'ILSVRC2012_val_00009357.JPEG': 0.3609, u'ILSVRC2012_val_00015366.JPEG': 0.1985, u'ILSVRC2012_val_00006790.JPEG': 0.3098, u'ILSVRC2012_val_00014806.JPEG': 0.3839, u'ILSVRC2012_val_00044787.JPEG': 0.1333, u'ILSVRC2012_val_00048470.JPEG': 0.5348, u'ILSVRC2012_val_00010961.JPEG': 0.41550000000000004, u'ILSVRC2012_val_00033574.JPEG': 0.1749}

In [3]:
# Save the validation dataset
with open('../dataset_indexes/imagenet_human_validation_set.json', 'w') as f:
    ujson.dump(validation_dataset, f)

In [ ]: