Copyright 2019 Google LLC
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
This colab accompanies NAS-Bench-101: Towards Reproducible Neural Architecture Search and the rest of the code at https://github.com/google-research/nasbench.
In this colab, we demonstrate how to use the dataset for simple benchmarking and analysis. The publicly available and free hosted colab instances are sufficient to run this colab.
In [1]:
# This code was written in TF 1.12 but should be supported all the way through
# TF 1.15. Untested in TF 2.0+.
%tensorflow_version 1.x
# Download the raw data (only 108 epoch data points, for full dataset,
# uncomment the second line for nasbench_full.tfrecord).
!curl -O https://storage.googleapis.com/nasbench/nasbench_only108.tfrecord
# !curl -O https://storage.googleapis.com/nasbench/nasbench_full.tfrecord
# Clone and install the code and dependencies.
!git clone https://github.com/google-research/nasbench
!pip install ./nasbench
# Initialize the NASBench object which parses the raw data into memory (this
# should only be run once as it takes up to a few minutes).
from nasbench import api
# Use nasbench_full.tfrecord for full dataset (run download command above).
nasbench = api.NASBench('nasbench_only108.tfrecord')
In [0]:
# Standard imports
import copy
import numpy as np
import matplotlib.pyplot as plt
import random
# Useful constants
INPUT = 'input'
OUTPUT = 'output'
CONV3X3 = 'conv3x3-bn-relu'
CONV1X1 = 'conv1x1-bn-relu'
MAXPOOL3X3 = 'maxpool3x3'
NUM_VERTICES = 7
MAX_EDGES = 9
EDGE_SPOTS = NUM_VERTICES * (NUM_VERTICES - 1) / 2 # Upper triangular matrix
OP_SPOTS = NUM_VERTICES - 2 # Input/output vertices are fixed
ALLOWED_OPS = [CONV3X3, CONV1X1, MAXPOOL3X3]
ALLOWED_EDGES = [0, 1] # Binary adjacency matrix
In [3]:
# Query an Inception-like cell from the dataset.
cell = api.ModelSpec(
matrix=[[0, 1, 1, 1, 0, 1, 0], # input layer
[0, 0, 0, 0, 0, 0, 1], # 1x1 conv
[0, 0, 0, 0, 0, 0, 1], # 3x3 conv
[0, 0, 0, 0, 1, 0, 0], # 5x5 conv (replaced by two 3x3's)
[0, 0, 0, 0, 0, 0, 1], # 5x5 conv (replaced by two 3x3's)
[0, 0, 0, 0, 0, 0, 1], # 3x3 max-pool
[0, 0, 0, 0, 0, 0, 0]], # output layer
# Operations at the vertices of the module, matches order of matrix.
ops=[INPUT, CONV1X1, CONV3X3, CONV3X3, CONV3X3, MAXPOOL3X3, OUTPUT])
# Querying multiple times may yield different results. Each cell is evaluated 3
# times at each epoch budget and querying will sample one randomly.
data = nasbench.query(cell)
for k, v in data.items():
print('%s: %s' % (k, str(v)))
In [0]:
def random_spec():
"""Returns a random valid spec."""
while True:
matrix = np.random.choice(ALLOWED_EDGES, size=(NUM_VERTICES, NUM_VERTICES))
matrix = np.triu(matrix, 1)
ops = np.random.choice(ALLOWED_OPS, size=(NUM_VERTICES)).tolist()
ops[0] = INPUT
ops[-1] = OUTPUT
spec = api.ModelSpec(matrix=matrix, ops=ops)
if nasbench.is_valid(spec):
return spec
def mutate_spec(old_spec, mutation_rate=1.0):
"""Computes a valid mutated spec from the old_spec."""
while True:
new_matrix = copy.deepcopy(old_spec.original_matrix)
new_ops = copy.deepcopy(old_spec.original_ops)
# In expectation, V edges flipped (note that most end up being pruned).
edge_mutation_prob = mutation_rate / NUM_VERTICES
for src in range(0, NUM_VERTICES - 1):
for dst in range(src + 1, NUM_VERTICES):
if random.random() < edge_mutation_prob:
new_matrix[src, dst] = 1 - new_matrix[src, dst]
# In expectation, one op is resampled.
op_mutation_prob = mutation_rate / OP_SPOTS
for ind in range(1, NUM_VERTICES - 1):
if random.random() < op_mutation_prob:
available = [o for o in nasbench.config['available_ops'] if o != new_ops[ind]]
new_ops[ind] = random.choice(available)
new_spec = api.ModelSpec(new_matrix, new_ops)
if nasbench.is_valid(new_spec):
return new_spec
def random_combination(iterable, sample_size):
"""Random selection from itertools.combinations(iterable, r)."""
pool = tuple(iterable)
n = len(pool)
indices = sorted(random.sample(range(n), sample_size))
return tuple(pool[i] for i in indices)
def run_random_search(max_time_budget=5e6):
"""Run a single roll-out of random search to a fixed time budget."""
nasbench.reset_budget_counters()
times, best_valids, best_tests = [0.0], [0.0], [0.0]
while True:
spec = random_spec()
data = nasbench.query(spec)
# It's important to select models only based on validation accuracy, test
# accuracy is used only for comparing different search trajectories.
if data['validation_accuracy'] > best_valids[-1]:
best_valids.append(data['validation_accuracy'])
best_tests.append(data['test_accuracy'])
else:
best_valids.append(best_valids[-1])
best_tests.append(best_tests[-1])
time_spent, _ = nasbench.get_budget_counters()
times.append(time_spent)
if time_spent > max_time_budget:
# Break the first time we exceed the budget.
break
return times, best_valids, best_tests
def run_evolution_search(max_time_budget=5e6,
population_size=50,
tournament_size=10,
mutation_rate=1.0):
"""Run a single roll-out of regularized evolution to a fixed time budget."""
nasbench.reset_budget_counters()
times, best_valids, best_tests = [0.0], [0.0], [0.0]
population = [] # (validation, spec) tuples
# For the first population_size individuals, seed the population with randomly
# generated cells.
for _ in range(population_size):
spec = random_spec()
data = nasbench.query(spec)
time_spent, _ = nasbench.get_budget_counters()
times.append(time_spent)
population.append((data['validation_accuracy'], spec))
if data['validation_accuracy'] > best_valids[-1]:
best_valids.append(data['validation_accuracy'])
best_tests.append(data['test_accuracy'])
else:
best_valids.append(best_valids[-1])
best_tests.append(best_tests[-1])
if time_spent > max_time_budget:
break
# After the population is seeded, proceed with evolving the population.
while True:
sample = random_combination(population, tournament_size)
best_spec = sorted(sample, key=lambda i:i[0])[-1][1]
new_spec = mutate_spec(best_spec, mutation_rate)
data = nasbench.query(new_spec)
time_spent, _ = nasbench.get_budget_counters()
times.append(time_spent)
# In regularized evolution, we kill the oldest individual in the population.
population.append((data['validation_accuracy'], new_spec))
population.pop(0)
if data['validation_accuracy'] > best_valids[-1]:
best_valids.append(data['validation_accuracy'])
best_tests.append(data['test_accuracy'])
else:
best_valids.append(best_valids[-1])
best_tests.append(best_tests[-1])
if time_spent > max_time_budget:
break
return times, best_valids, best_tests
In [5]:
# Run random search and evolution search 10 times each. This should take a few
# minutes to run. Note that each run would have taken days of compute to
# actually train and evaluate if the dataset were not precomputed.
random_data = []
evolution_data = []
for repeat in range(10):
print('Running repeat %d' % (repeat + 1))
times, best_valid, best_test = run_random_search()
random_data.append((times, best_valid, best_test))
times, best_valid, best_test = run_evolution_search()
evolution_data.append((times, best_valid, best_test))
In [6]:
plt.figure(figsize=(20, 5))
plt.subplot(1, 3, 1)
for times, best_valid, best_test in random_data:
plt.plot(times, best_valid, label='valid', color='red', alpha=0.5)
plt.plot(times, best_test, label='test', color='blue', alpha=0.5)
plt.ylabel('accuracy')
plt.xlabel('time spent (seconds)')
plt.ylim(0.92, 0.96)
plt.grid()
plt.title('Random search trajectories (red=validation, blue=test)')
plt.subplot(1, 3, 2)
for times, best_valid, best_test in evolution_data:
plt.plot(times, best_valid, label='valid', color='red', alpha=0.5)
plt.plot(times, best_test, label='test', color='blue', alpha=0.5)
plt.ylabel('accuracy')
plt.xlabel('time spent (seconds)')
plt.ylim(0.92, 0.96)
plt.grid()
plt.title('Evolution search trajectories (red=validation, blue=test)')
Out[6]:
In [7]:
# Compare the mean test accuracy along with error bars.
def plot_data(data, color, label, gran=10000, max_budget=5000000):
"""Computes the mean and IQR fixed time steps."""
xs = range(0, max_budget+1, gran)
mean = [0.0]
per25 = [0.0]
per75 = [0.0]
repeats = len(data)
pointers = [1 for _ in range(repeats)]
cur = gran
while cur < max_budget+1:
all_vals = []
for repeat in range(repeats):
while (pointers[repeat] < len(data[repeat][0]) and
data[repeat][0][pointers[repeat]] < cur):
pointers[repeat] += 1
prev_time = data[repeat][0][pointers[repeat]-1]
prev_test = data[repeat][2][pointers[repeat]-1]
next_time = data[repeat][0][pointers[repeat]]
next_test = data[repeat][2][pointers[repeat]]
assert prev_time < cur and next_time >= cur
# Linearly interpolate the test between the two surrounding points
cur_val = ((cur - prev_time) / (next_time - prev_time)) * (next_test - prev_test) + prev_test
all_vals.append(cur_val)
all_vals = sorted(all_vals)
mean.append(sum(all_vals) / float(len(all_vals)))
per25.append(all_vals[int(0.25 * repeats)])
per75.append(all_vals[int(0.75 * repeats)])
cur += gran
plt.plot(xs, mean, color=color, label=label, linewidth=2)
plt.fill_between(xs, per25, per75, alpha=0.1, linewidth=0, facecolor=color)
plot_data(random_data, 'red', 'random')
plot_data(evolution_data, 'blue', 'evolution')
plt.legend(loc='lower right')
plt.ylim(0.92, 0.95)
plt.xlabel('total training time spent (seconds)')
plt.ylabel('accuracy')
plt.grid()
For more information on using the dataset, see the API documentation at https://github.com/google-research/nasbench/blob/master/nasbench/api.py.