Copyright 2018 Google LLC
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
This notebook contains the code for computing the performance of the fixed strategies in various scenarios. The full experiment is described in Sec. 5.2 of CVPR submission "Learning Intelligent Dialogs for Bounding Box Annotation". Please note that this notebook does not reproduce the experiment since the starting detector is too strong, there is no re-training, and there are only two iterations being done.
In [0]:
import matplotlib.pyplot as plt
import numpy as np
from __future__ import division
from __future__ import print_function
import math
import gym
import pandas as pd
from gym import spaces
from sklearn import neural_network, model_selection
from sklearn.neural_network import MLPClassifier
from third_party import np_box_ops
import annotator, detector, dialog, environment
To specify the experiments, define:
In [0]:
# desired quality: high (min_iou=0.7) and low (min_iou=0.5)
min_iou = 0.7 # @param ["0.5", "0.7"]
# drawing speed: high (time_draw=7) and low (time_draw=25)
time_draw = 7 # @param ["7", "25"]
Other parameters of the experiment
In [0]:
random_seed = 80590 # global variable that fixes the random seed everywhere for replroducibility of results
# what kind of features will be used to represent the state
# numerical values 1-20 correspond to one hot encoding of class
predictive_fields = ['prediction_score', 'relative_size', 'avg_score', 'dif_avg_score', 'dif_max_score', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
time_verify = 1.8 # @param
In [0]:
# Download GT:
# wget wget https://storage.googleapis.com/iad_pascal_annotations_and_detections/pascal_gt_for_iad.h5
# Download detections with features
# wget https://storage.googleapis.com/iad_pascal_annotations_and_detections/pascal_proposals_plus_features_for_iad.h5
download_dir = ''
ground_truth = pd.read_hdf(download_dir + 'pascal_gt_for_iad.h5', 'ground_truth')
box_proposal_features = pd.read_hdf(download_dir + 'pascal_proposals_plus_features_for_iad.h5', 'box_proposal_features')
In [0]:
annotator_real = annotator.AnnotatorSimple(ground_truth, random_seed, time_verify, time_draw, min_iou)
In [0]:
# better call it image_class_pairs later
image_class = ground_truth[['image_id', 'class_id']]
image_class = image_class.drop_duplicates()
Select the trainig and testing data according to the selected fold. We split all images in 10 approximately equal parts and each fold includes these images together with all classes present in them.
In [0]:
unique_image = image_class['image_id'].drop_duplicates()
# divide the images into exponentially growing groups
im1 = unique_image.iloc[157]
im2 = unique_image.iloc[157+157]
im3 = unique_image.iloc[157+157+314]
im4 = unique_image.iloc[157+157+314+625]
im5 = unique_image.iloc[157+157+314+625+1253]
# image_class pairs groups are determined by the images in them
image_class_array = image_class.values[:,0]
in1 = np.searchsorted(image_class_array, im1, side='right')
in2 = np.searchsorted(image_class_array, im2, side='right')
in3 = np.searchsorted(image_class_array, im3, side='right')
in4 = np.searchsorted(image_class_array, im4, side='right')
in5 = np.searchsorted(image_class_array, im5, side='right')
In [0]:
the_detector = detector.Detector(box_proposal_features, predictive_fields)
image_class_current = image_class.iloc[0:in1]
In [9]:
%output_height 300
env = environment.AnnotatingDataset(annotator_real, the_detector, image_class_current)
print('Running ', len(env.image_class), 'episodes with strategy X')
total_reward = 0
new_ground_truth_all = []
all_annotations = dict()
for i in range(len(env.image_class)):
print('Episode ', i, end = ': ')
state = env.reset(current_index=i)
agent = dialog.FixedDialog(0)
done = False
while not(done):
action = agent.get_next_action(state)
if action==0:
print('V', end='')
elif action==1:
print('D', end='')
next_state, reward, done, coordinates = env.step(action)
state = next_state
total_reward += reward
dataset_id = env.current_image
# ground truth with which we will initialise the new user
new_ground_truth = {}
new_ground_truth['image_id'] = dataset_id
new_ground_truth['class_id'] = env.current_class
new_ground_truth['xmax'] = coordinates['xmax']
new_ground_truth['xmin'] = coordinates['xmin']
new_ground_truth['ymax'] = coordinates['ymax']
new_ground_truth['ymin'] = coordinates['ymin']
new_ground_truth_all.append(new_ground_truth)
if dataset_id not in all_annotations:
current_annotation = dict()
current_annotation['boxes'] = np.array([[coordinates['ymin'], coordinates['xmin'], coordinates['ymax'], coordinates['xmax']]], dtype=np.int32)
current_annotation['box_labels'] = np.array([env.current_class])
all_annotations[dataset_id] = current_annotation
else:
all_annotations[dataset_id]['boxes'] = np.append(all_annotations[dataset_id]['boxes'], np.array([[coordinates['ymin'], coordinates['xmin'], coordinates['ymax'], coordinates['xmax']]], dtype=np.int32), axis=0)
all_annotations[dataset_id]['box_labels'] = np.append(all_annotations[dataset_id]['box_labels'], np.array([env.current_class]))
print()
print('total_reward = ', total_reward)
print('average episode reward = ', total_reward/len(env.image_class))
new_ground_truth_all = pd.DataFrame(new_ground_truth_all)
Starting from Batch 3 the code will be just repeated.
In [0]:
ground_truth_new = pd.DataFrame(new_ground_truth_all)
annotator_new = annotator.AnnotatorSimple(ground_truth_new, random_seed, time_verify, time_draw, min_iou)
In [11]:
# @title Collect data for classifier
env = environment.AnnotatingDataset(annotator_new, the_detector, image_class_current)
print('Running ', len(env.image_class), 'episodes with strategy V3X')
%output_height 300
total_reward = 0
data_for_classifier = []
for i in range(len(env.image_class)):
print(i, end = ': ')
agent = dialog.FixedDialog(3)
state = env.reset(current_index=i)
done = False
while not(done):
action = agent.get_next_action(state)
next_state, reward, done, _ = env.step(action)
if action==0:
state_dict = dict(state)
state_dict['is_accepted'] = done
data_for_classifier.append(state_dict)
print('V', end='')
elif action==1:
print('D', end='')
state = next_state
total_reward += reward
print()
print('Average episode reward = ', total_reward/len(env.image_class))
data_for_classifier = pd.DataFrame(data_for_classifier)
In [12]:
# @title Train classification model (might take some time)
#model_mlp = neural_network.MLPClassifier(alpha = 0.0001, activation = 'relu', hidden_layer_sizes = (50, 50, 50, 50, 50), random_state=602)
#model_for_agent = model_mlp.fit(data_from_Vx3X[predictive_fields], data_from_Vx3X['is_accepted'])
np.random.seed(random_seed) # for reproducibility of fitting the classifier and cross-validation
print('Cross-validating parameters\' values... This might take some time.')
# possible parameter values
parameters = {'hidden_layer_sizes': ((20, 20, 20), (50, 50, 50), (80, 80, 80), (20, 20, 20, 20), (50, 50, 50, 50), (80, 80, 80, 80), (20, 20, 20, 20, 20), (50, 50, 50, 50, 50), (80, 80, 80, 80, 80)), 'activation': ('logistic', 'relu'), 'alpha': [0.0001, 0.001, 0.01]}
model_mlp = neural_network.MLPClassifier()
# cross-validate parameters
grid_search = model_selection.GridSearchCV(model_mlp, parameters, scoring='neg_log_loss', refit=True)
grid_search.fit(data_for_classifier[predictive_fields], data_for_classifier['is_accepted'])
print('best score = ', grid_search.best_score_)
print('best parameters = ', grid_search.best_params_)
# use the model with the best parameters
model_for_agent = grid_search.best_estimator_
Now is the time to retrain the detector and obtain new box_proposal_features. This is not done in this notebook.
In [0]:
image_class_current = image_class.iloc[in1:in2]
the_detector = detector.Detector(box_proposal_features, predictive_fields)
agent = dialog.DialogProb(model_for_agent, annotator_real)
In [14]:
# @title Annotating data with intelligent dialog
env = environment.AnnotatingDataset(annotator_real, the_detector, image_class_current)
print('Running ', len(env.image_class), 'episodes with strategy IAD-Prob')
%output_height 300
print('intelligent dialog strategy')
total_reward = 0
# reset the gound truth because the user only needs to annotate the last 10% of data using the detector from the rest of the data
new_ground_truth_all = []
for i in range(len(env.image_class)):
print(i, end = ': ')
state = env.reset(current_index=i)
done = False
while not(done):
action = agent.get_next_action(state)
if action==0:
print('V', end='')
elif action==1:
print('D', end='')
next_state, reward, done, coordinates = env.step(action)
state = next_state
total_reward += reward
dataset_id = env.current_image
# ground truth with which we will initialise the new user
new_ground_truth = {}
new_ground_truth['image_id'] = dataset_id
new_ground_truth['class_id'] = env.current_class
new_ground_truth['xmax'] = coordinates['xmax']
new_ground_truth['xmin'] = coordinates['xmin']
new_ground_truth['ymax'] = coordinates['ymax']
new_ground_truth['ymin'] = coordinates['ymin']
new_ground_truth_all.append(new_ground_truth)
if dataset_id not in all_annotations:
current_annotation = dict()
current_annotation['boxes'] = np.array([[coordinates['ymin'], coordinates['xmin'], coordinates['ymax'], coordinates['xmax']]], dtype=np.int32)
current_annotation['box_labels'] = np.array([env.current_class])
all_annotations[dataset_id] = current_annotation
else:
all_annotations[dataset_id]['boxes'] = np.append(all_annotations[dataset_id]['boxes'], np.array([[coordinates['ymin'], coordinates['xmin'], coordinates['ymax'], coordinates['xmax']]], dtype=np.int32), axis=0)
all_annotations[dataset_id]['box_labels'] = np.append(all_annotations[dataset_id]['box_labels'], np.array([env.current_class]))
print()
print('total_reward = ', total_reward)
print('average episode reward = ', total_reward/len(env.image_class))
new_ground_truth_all = pd.DataFrame(new_ground_truth_all)