In [1]:
#@title Licensed under the Apache License, Version 2.0 & Creative Common licence 4.0
# EvoFlow and its tutorials are released under the Apache 2.0 licence
# its documentaton is licensed under the Creative Common licence 4.0
This notebook show how ot use EvoFlow to solve the well-known travelling salesman problem by showcasing how to find the best routes to visit European most populated cities.
EvoFlow, while heavily tested, is considered experimental - use at your own risks. Issues should be reported on [Github](https://github.com/google-research/evoflow/issues). For the rest: evoflow@google.com
In [2]:
# installing the latest version of evoflow
try:
import evoflow
except:
!pip install -U evoflow
In [2]:
import json
import numpy as np
import plotly.graph_objects as go
from collections import defaultdict
from tabulate import tabulate
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import tensorflow as tf
In [4]:
from evoflow.selection import SelectFittest
from evoflow.fitness import FitnessFunction
from evoflow.callbacks import Callback
from evoflow.population import uniform_population
from evoflow.ops import Input, RandomMutations1D, UniformCrossover1D, DualCrossover1D, SingleCrossover1D, Shuffle, Reverse1D
import evoflow.backend as B
from evoflow.engine import EvoFlow
In [5]:
#@title loading cities {display-mode: "form"}
NUM_CITIES = 20 #@param [10, 20, 50, 100, 200, 500, 1000]
NUM_CITIES = int(NUM_CITIES)
## loading cities
def load_cities(num_cities):
# get files
zip_fname = "tsp_%s.zip" % num_cities
origin = "https://storage.googleapis.com/evoflow/datasets/tsp/cities_%s.zip" % num_cities
download_path = tf.keras.utils.get_file(zip_fname, origin, extract=True)
# process city info
json_fname = "%s/cities_%s.json" % (download_path.replace(zip_fname, ''), num_cities)
cities = json.loads(open(json_fname).read())
### lookup table
idx2city = {}
for city in cities:
idx2city[city['idx']] = city
### create vizualization dictionary
chart_data = defaultdict(list)
lat_axis = [1000, -1000]
lon_axis = [1000, -1000]
for city in cities:
chart_data['lat'].append(city['lat'])
chart_data['lon'].append(city['lon'])
chart_data['name'].append(city['name'])
chart_data['population'].append(city['population'])
# bounding boxes
lat_axis[0] = min(city['lat'], lat_axis[0] - 0.1)
lat_axis[1] = max(city['lat'], lat_axis[1] + 0.1)
lon_axis[0] = min(city['lon'], lon_axis[0] - 0.1)
lon_axis[1] = max(city['lon'], lon_axis[1] + 0.1)
## loading distance matrix
distance_fname = "%sdistances_%s.npz" % (download_path.replace(zip_fname, ''), num_cities)
distances = np.load(distance_fname)['distances']
distances = distances.astype(B.intx())
if num_cities > 500:
print(num_cities, "cities loaded - solving is going to slow")
elif num_cities >= 100:
print(num_cities, "cities loaded - solving is going to be a little slow")
else:
print(num_cities, "cities loaded")
return cities, chart_data, distances, lat_axis, lon_axis, idx2city
cities, chart_data, distances, lat_axis, lon_axis, idx2city = load_cities(NUM_CITIES)
In [6]:
#@title Cities population breakdown {display-mode: "form"}
fig = go.Figure(go.Bar(y=chart_data['population'], x=chart_data['name']))
fig.update_layout(title = 'Cities population')
fig.show()
In [7]:
#@title Cities by countries breakdown {display-mode: "form"}
city_by_country = defaultdict(int)
for city in cities:
city_by_country[city['country_name']] += 1
fig = go.Figure(data=[go.Pie(
labels = list(city_by_country.keys()),
values = list(city_by_country.values()),
hole = .5,
textinfo = 'label+percent'
)])
fig.update_layout(title = 'Number of cities per country')
fig.show()
In [8]:
#@title Vizualizing cities on a map {display-mode: "form"}
# let's vizualize our cities on a map
fig = go.Figure(go.Scattergeo(
lat = chart_data['lat'],
lon = chart_data['lon'],
text = chart_data['name'],
marker_color= chart_data['population'],
))
fig.update_layout(
title = 'Most populated in-land European cities',
geo = go.layout.Geo(scope='europe', showframe = True, projection_type = 'mercator',
lonaxis_range= lon_axis, lataxis_range= lat_axis)
)
fig.show()
In [9]:
#@title Distance between cities heatmap {display-mode: "form"}
# sorting country by country for a more pleasing view
data = defaultdict(list)
for city in sorted(cities, key=lambda k: k['country_code']):
data['x'].append(city['name'])
data['y'].append(city['name'])
data['idx'].append(city['idx'])
# resort distances
for idx in data['idx']:
data['z'].append(np.take(distances[idx], data['idx']))
fig = go.Figure(data=go.Heatmap(x=data['x'], y=data['y'], z=data['z']))
fig.update_layout(title = 'Distances between cities sorted by country')
fig.show()
In [10]:
import tensorflow as tf
class TSPFitness(FitnessFunction):
def __init__(self, distances, num_cities, baseline_distance=0, penality=100000, **kwargs):
"""
Args:
penality (int): what value to add to routes that don't fit the requirements. Must be
larger than the
expected max_size as it to mark invalide ones.
baseline_distance (int): what is the travel distance we try to be beat. Will be shown in
the charts.
"""
self.num_cities = num_cities
self.distances = B.flatten(distances)
self.penality = penality
self.baseline_distance = int(baseline_distance)
super(TSPFitness, self).__init__(**kwargs)
def call(self, population, normalize=True):
"""
Parallel lookup and distance computation:
- multiply the tensor by population shifed by 1 which gives the id to lookup in the flat
distance array
- reduce_sum for the total distance
- 1/reduce_sum so fitness goes from 0 to 1
"""
self.print_debug('population', population)
shifted_population = B.roll(population, 1, axis=1)
self.print_debug('shifted populaiton', shifted_population)
idxs = (population * self.num_cities) + shifted_population
self.print_debug('distances idx', idxs)
distances = B.take(self.distances, idxs)
self.print_debug('distances', distances)
# total distance
total_distance = B.sum(distances, axis=1)
if self.baseline_distance:
min_distance = int(B.min(total_distance))
self.record_metric('EvoFlow', min_distance, group="Shortest route")
self.record_metric('Baseline', self.baseline_distance, group="Shortest route")
self.print_debug('fitness values', total_distance)
return total_distance
To compute a baseline of much distance will be traveled, we are going to draw 5000 random itineraries and takes the shortest one as our baseline. The more random examples are used to establish the baseline the better the baseline. You can try to experiment with this and visualizes how much the number of samples affect the baseline by changing the BASELINE_POPULATION_SIZE below.
In the travel salesman this problem you need population where each city is represented exactly once in each chromosome and the order of city is random. In EvoFlow this type of population is generated using the uniform_population()
function.
In [11]:
#@title Computing baseline using a random population {display-mode: "form"}
BASELINE_POPULATION_SIZE = 5000 #@param {type: "slider", min: 1000, max: 10000}
baseline_population = uniform_population((BASELINE_POPULATION_SIZE, NUM_CITIES))
# compute baseline distances using our fitness function
random_total_distances = TSPFitness(distances, NUM_CITIES, baseline_distance=42, debug=False).call(baseline_population)
# finding the shortest route
shortest_random_route_idx = B.bottom_k_indices(random_total_distances, 1)[0]
shortest_random_route = baseline_population[shortest_random_route_idx]
shortest_random_distance = int(random_total_distances[shortest_random_route_idx])
print("Shortest random route is ", shortest_random_distance , ' kms')
In [12]:
#@title Drawing best route on a map {display-mode: "form"}
def draw_route(route, idx2city, chart_data, title='', display_table=False):
# let's visualize our best random route
fig = go.Figure(go.Scattergeo(
lat = chart_data['lat'],
lon = chart_data['lon'],
text = chart_data['name'],
marker_color= chart_data['population'],
))
rows = []
initial_city = idx2city[int(route[0])]
total_distance = 0
for idx in range(len(route) - 1):
start_city = idx2city[int(route[idx])]
stop_city = idx2city[int(route[idx + 1])]
distance = distances[start_city['idx']][stop_city['idx']]
total_distance += distance
fig.add_trace(
go.Scattergeo(
lon = [start_city['lon'], stop_city['lon']],
lat = [start_city['lat'], stop_city['lat']],
mode = 'lines',
line = dict(width = 1,color = 'red'),
)
)
rows.append([start_city['name'], stop_city['name'], distance])
# last one
distance = distances[stop_city['idx']][initial_city['idx']]
total_distance += distance
rows.append([stop_city['name'], initial_city['name'], distance])
fig.add_trace(
go.Scattergeo(
lon = [stop_city['lon'], initial_city['lon']],
lat = [stop_city['lat'], initial_city['lat']],
mode = 'lines',
line = dict(width = 1,color = 'red'),
)
)
fig.update_layout(
title = '%s - Total distance %d kms' % (title, total_distance),
showlegend=False,
geo = go.layout.Geo(scope='europe', showframe = False, projection_type = 'mercator',
lonaxis_range=lon_axis, lataxis_range=lat_axis)
)
fig.show()
if display_table:
# FIXME show how the distance progress in chart -- will be prettier if we can animate all of it together
print(tabulate(rows, headers=['From', 'To', 'Distance traveled']))
draw_route(shortest_random_route, idx2city, chart_data, title='Shortest random route', display_table=False)
Here are a decriptions of the parameters you can tweak out of the box and some good heuristics to choose their values. Feel free to edit the code if you want to dig deeper :)
POPULATION_SIZE
: should be at least 10x the number of city. e.g 200 for 20 cities.GENERATIONS
: Should be about 5x the number of cities. Sometime it converge fasters or might requires more.
In [13]:
#@title Adjusting evoluation model parameters {display-mode: "form"}
POPULATION_SIZE = 200 #@param {type: "slider", min: 100, max: 5000}
GENERATIONS = 100 #@param {type: "slider", min: 20, max: 5000}
NUM_REVERSE_OPERATIONS = 3 #@param {type: "slider", min: 1, max: 10}
MAX_REVERSE_PROBABILITY = 0.3
REVERSE_POPULATION_FRACTION = 0.3
MIN_REVERSE_PROBABILITY = 0.1
SHUFFLE_POPULATION_FRACTION = 0.2
population = uniform_population((POPULATION_SIZE, NUM_CITIES))
reverse_probability_increment = MAX_REVERSE_PROBABILITY / NUM_REVERSE_OPERATIONS
reverse_probabilty = 1 - reverse_probability_increment
As the number of cities increase, we need to increase the depth of the model to reliably find better routes. To make this easy to experiment with, we are constructing the evolutionary model programtically by iteratively stacking Reverse1D()
operations with different max_reverse_size_probability
which controls how many cities are reversed at once. This ensure a wider diversity of the population and therefore a better convergence
In [14]:
# Evolution model
inputs = Input(shape=population.shape)
x = inputs
for idx in range(NUM_REVERSE_OPERATIONS):
x = Reverse1D(population_fraction=REVERSE_POPULATION_FRACTION, max_reverse_probability=reverse_probabilty)(x)
reverse_probabilty = max(reverse_probabilty - reverse_probability_increment, MIN_REVERSE_PROBABILITY)
x = Shuffle(population_fraction=SHUFFLE_POPULATION_FRACTION)(x)
outputs = x
ef = EvoFlow(inputs, outputs, debug=False)
ef.summary()
In [15]:
# note that here we use the select fitesst to select individual with the smallest fitness value (total travel distance)
evolution_strategy = SelectFittest(mode='min')
fitness_fn = TSPFitness(distances, NUM_CITIES, baseline_distance=shortest_random_distance)
ef.compile(evolution_strategy, fitness_fn)
results = ef.evolve(population, generations=GENERATIONS)
In [16]:
results.plot_fitness()
Out[16]:
In [17]:
# "Shortest route" is the name of the metric group we added to the fitness function
results.plot("Shortest route")
Out[17]:
In [18]:
route = results.get_populations()[0]
draw_route(route, idx2city, chart_data, title="Shortest GA route", display_table=False)