In [1]:
#@title Licensed under the Apache License, Version 2.0 & Creative Common licence 4.0
# EvoFlow and its tutorials are released under the Apache 2.0 licence
# its documentaton is licensed under the Creative Common licence 4.0

Solving Travelling salesman problem with EvoFlow

This notebook show how ot use EvoFlow to solve the well-known travelling salesman problem by showcasing how to find the best routes to visit European most populated cities.

EvoFlow, while heavily tested, is considered experimental - use at your own risks. Issues should be reported on [Github](https://github.com/google-research/evoflow/issues). For the rest: evoflow@google.com

Setup


In [2]:
# installing the latest version of evoflow
try:
    import evoflow
except:
    !pip install -U evoflow

In [2]:
import json
import numpy as np
import plotly.graph_objects as go
from collections import defaultdict
from tabulate import tabulate
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import tensorflow as tf

In [4]:
from evoflow.selection import SelectFittest
from evoflow.fitness import FitnessFunction
from evoflow.callbacks import Callback
from evoflow.population import uniform_population
from evoflow.ops import Input, RandomMutations1D, UniformCrossover1D, DualCrossover1D, SingleCrossover1D, Shuffle, Reverse1D
import evoflow.backend as B
from evoflow.engine import EvoFlow


GPUs Physical: 1 - Logical GPU:1
Using tensorflow backend

Loading and analyzing european cities data


In [5]:
#@title loading cities {display-mode: "form"}
NUM_CITIES = 20  #@param [10, 20, 50, 100, 200, 500, 1000]
NUM_CITIES = int(NUM_CITIES)
## loading cities
def load_cities(num_cities):

    
    # get files
    zip_fname = "tsp_%s.zip" % num_cities
    origin = "https://storage.googleapis.com/evoflow/datasets/tsp/cities_%s.zip" % num_cities
    download_path = tf.keras.utils.get_file(zip_fname, origin, extract=True)

    # process city info
    json_fname = "%s/cities_%s.json" % (download_path.replace(zip_fname, ''), num_cities)
    cities = json.loads(open(json_fname).read())

    ### lookup table
    idx2city = {}
    for city in cities:
        idx2city[city['idx']] = city

    ### create vizualization dictionary
    chart_data = defaultdict(list)
    lat_axis = [1000, -1000]
    lon_axis = [1000, -1000]
    for city in cities:
        chart_data['lat'].append(city['lat'])
        chart_data['lon'].append(city['lon'])
        chart_data['name'].append(city['name'])
        chart_data['population'].append(city['population'])

        # bounding boxes
        lat_axis[0] = min(city['lat'], lat_axis[0] - 0.1)
        lat_axis[1] = max(city['lat'], lat_axis[1] + 0.1)
        lon_axis[0] = min(city['lon'], lon_axis[0] - 0.1)
        lon_axis[1] = max(city['lon'], lon_axis[1] + 0.1)
        
    
    ## loading distance matrix
    distance_fname = "%sdistances_%s.npz" % (download_path.replace(zip_fname, ''), num_cities)
    distances = np.load(distance_fname)['distances']
    distances = distances.astype(B.intx())

    if num_cities > 500:
        print(num_cities, "cities loaded - solving is going to slow")
    elif num_cities >= 100:
        print(num_cities, "cities loaded - solving is going to be a little slow")
    else:
        print(num_cities, "cities loaded")
    
    return cities, chart_data, distances, lat_axis, lon_axis, idx2city


cities, chart_data, distances, lat_axis, lon_axis, idx2city = load_cities(NUM_CITIES)


20 cities loaded

In [6]:
#@title Cities population breakdown {display-mode: "form"}
fig = go.Figure(go.Bar(y=chart_data['population'], x=chart_data['name']))
fig.update_layout(title = 'Cities population')
fig.show()



In [7]:
#@title Cities by countries breakdown {display-mode: "form"}
city_by_country = defaultdict(int)
for city in cities:
    city_by_country[city['country_name']] += 1
fig = go.Figure(data=[go.Pie(
    labels = list(city_by_country.keys()), 
    values = list(city_by_country.values()),
    hole = .5,
    textinfo = 'label+percent'
)])

fig.update_layout(title = 'Number of cities per country')
fig.show()



In [8]:
#@title Vizualizing cities on a map {display-mode: "form"}
# let's vizualize our cities on a map
fig = go.Figure(go.Scattergeo(
    
        lat = chart_data['lat'],
        lon = chart_data['lon'],
        text = chart_data['name'],
        marker_color= chart_data['population'],
))
fig.update_layout(
        title = 'Most populated in-land European cities',
        geo = go.layout.Geo(scope='europe', showframe = True, projection_type = 'mercator',
                            lonaxis_range= lon_axis, lataxis_range= lat_axis)
    )
fig.show()



In [9]:
#@title Distance between cities heatmap {display-mode: "form"}

# sorting country by country for a more pleasing view
data = defaultdict(list)

for city in sorted(cities, key=lambda k: k['country_code']):
    data['x'].append(city['name'])
    data['y'].append(city['name'])
    data['idx'].append(city['idx'])
# resort distances
for idx in data['idx']:
    data['z'].append(np.take(distances[idx], data['idx']))

fig = go.Figure(data=go.Heatmap(x=data['x'], y=data['y'], z=data['z']))
fig.update_layout(title = 'Distances between cities sorted by country')
fig.show()


Define fitness function


In [10]:
import tensorflow as tf 
class TSPFitness(FitnessFunction):
    
    def __init__(self, distances, num_cities, baseline_distance=0, penality=100000, **kwargs):
        """
            Args:
                penality (int): what value to add to routes that don't fit the requirements. Must be
                larger than the 
                expected max_size as it to mark invalide ones.
                
                baseline_distance (int): what is the travel distance we try to be beat. Will be shown in
                the charts.
        """
        self.num_cities = num_cities
        self.distances = B.flatten(distances)
        self.penality = penality
        self.baseline_distance = int(baseline_distance)
        super(TSPFitness, self).__init__(**kwargs)
     
    
    def call(self, population, normalize=True):
        """            
            Parallel lookup and distance computation:
            - multiply the tensor by population shifed by 1 which gives the id to lookup in the flat
            distance array 
            - reduce_sum for the total distance
            - 1/reduce_sum so fitness goes from 0 to 1
        """
        
        self.print_debug('population', population)
        
        shifted_population =  B.roll(population, 1, axis=1)
        self.print_debug('shifted populaiton', shifted_population)
        
        idxs = (population * self.num_cities) + shifted_population
        self.print_debug('distances idx', idxs)
        
        distances = B.take(self.distances, idxs)
        self.print_debug('distances', distances)
        
        # total distance
        total_distance  = B.sum(distances, axis=1)
        
        if self.baseline_distance:
            min_distance = int(B.min(total_distance))
            self.record_metric('EvoFlow', min_distance, group="Shortest route")
            self.record_metric('Baseline', self.baseline_distance, group="Shortest route")
        
        self.print_debug('fitness values', total_distance)
        return total_distance

Computing baseline

To compute a baseline of much distance will be traveled, we are going to draw 5000 random itineraries and takes the shortest one as our baseline. The more random examples are used to establish the baseline the better the baseline. You can try to experiment with this and visualizes how much the number of samples affect the baseline by changing the BASELINE_POPULATION_SIZE below.

In the travel salesman this problem you need population where each city is represented exactly once in each chromosome and the order of city is random. In EvoFlow this type of population is generated using the uniform_population() function.


In [11]:
#@title Computing baseline using a random population {display-mode: "form"}
BASELINE_POPULATION_SIZE = 5000  #@param {type: "slider", min: 1000, max: 10000}
baseline_population  = uniform_population((BASELINE_POPULATION_SIZE, NUM_CITIES))


# compute baseline distances using our fitness function
random_total_distances = TSPFitness(distances, NUM_CITIES, baseline_distance=42, debug=False).call(baseline_population)

# finding the shortest route
shortest_random_route_idx = B.bottom_k_indices(random_total_distances, 1)[0]
shortest_random_route = baseline_population[shortest_random_route_idx]
shortest_random_distance = int(random_total_distances[shortest_random_route_idx])
print("Shortest random route is ", shortest_random_distance , ' kms')


Shortest random route is  12841  kms

Visualizing the best random route

Let's visualize our best route to ensure it indeed go through all cities and how entangled it look like. Obviously the more cities need to be traveled the more the random route will look bad.


In [12]:
#@title Drawing best route on a map {display-mode: "form"}
def draw_route(route, idx2city, chart_data, title='', display_table=False):
    # let's visualize our best random route
    fig = go.Figure(go.Scattergeo(
            lat = chart_data['lat'],
            lon = chart_data['lon'],
            text = chart_data['name'],
            marker_color= chart_data['population'],
    ))

    rows = []
    initial_city = idx2city[int(route[0])]
    total_distance = 0
    for idx in range(len(route) - 1):
        start_city = idx2city[int(route[idx])]
        stop_city = idx2city[int(route[idx + 1])]
        distance = distances[start_city['idx']][stop_city['idx']]
        total_distance += distance
        fig.add_trace(
            go.Scattergeo(
                lon = [start_city['lon'], stop_city['lon']],
                lat = [start_city['lat'], stop_city['lat']],
                mode = 'lines',
                line = dict(width = 1,color = 'red'),
            )
        )

        rows.append([start_city['name'], stop_city['name'], distance])

    # last one
    distance = distances[stop_city['idx']][initial_city['idx']]
    total_distance += distance
    rows.append([stop_city['name'], initial_city['name'], distance])
    fig.add_trace(
            go.Scattergeo(
                lon = [stop_city['lon'], initial_city['lon']],
                lat = [stop_city['lat'], initial_city['lat']],
                mode = 'lines',
                line = dict(width = 1,color = 'red'),
            )
        )


    fig.update_layout(
            title = '%s - Total distance %d kms'  % (title, total_distance),
            showlegend=False,
            geo = go.layout.Geo(scope='europe', showframe = False, projection_type = 'mercator',
                                lonaxis_range=lon_axis, lataxis_range=lat_axis)
        )
    fig.show()
    if display_table:
        # FIXME show how the distance progress in chart -- will be prettier if we can animate all of it together
        print(tabulate(rows, headers=['From', 'To', 'Distance traveled']))
draw_route(shortest_random_route, idx2city, chart_data, title='Shortest random route', display_table=False)


Building the evolutionary model

Here are a decriptions of the parameters you can tweak out of the box and some good heuristics to choose their values. Feel free to edit the code if you want to dig deeper :)

  • POPULATION_SIZE: should be at least 10x the number of city. e.g 200 for 20 cities.
  • GENERATIONS: Should be about 5x the number of cities. Sometime it converge fasters or might requires more.

In [13]:
#@title Adjusting evoluation model parameters {display-mode: "form"}

POPULATION_SIZE = 200  #@param {type: "slider", min: 100, max: 5000}
GENERATIONS = 100  #@param {type: "slider", min: 20, max: 5000}
NUM_REVERSE_OPERATIONS = 3 #@param {type: "slider", min: 1, max: 10}
MAX_REVERSE_PROBABILITY = 0.3
REVERSE_POPULATION_FRACTION = 0.3
MIN_REVERSE_PROBABILITY = 0.1

SHUFFLE_POPULATION_FRACTION = 0.2

population  = uniform_population((POPULATION_SIZE, NUM_CITIES))
reverse_probability_increment = MAX_REVERSE_PROBABILITY / NUM_REVERSE_OPERATIONS
reverse_probabilty = 1 - reverse_probability_increment

Evolutionary model programatic construction

As the number of cities increase, we need to increase the depth of the model to reliably find better routes. To make this easy to experiment with, we are constructing the evolutionary model programtically by iteratively stacking Reverse1D() operations with different max_reverse_size_probability which controls how many cities are reversed at once. This ensure a wider diversity of the population and therefore a better convergence


In [14]:
# Evolution model
inputs = Input(shape=population.shape)
x = inputs
for idx in range(NUM_REVERSE_OPERATIONS):
    x = Reverse1D(population_fraction=REVERSE_POPULATION_FRACTION, max_reverse_probability=reverse_probabilty)(x)
    reverse_probabilty = max(reverse_probabilty - reverse_probability_increment, MIN_REVERSE_PROBABILITY)
    
x = Shuffle(population_fraction=SHUFFLE_POPULATION_FRACTION)(x)
outputs = x
ef = EvoFlow(inputs, outputs, debug=False)
ef.summary()


OP (type)                     Output Shape    Inputs
----------------------------  --------------  ----------------
input_BA4B74 (Input)          (200, 20)
reverse1d_61F72B (Reverse1D)  []              input_BA4B74
reverse1d_37787E (Reverse1D)  []              reverse1d_61F72B
reverse1d_665AC7 (Reverse1D)  []              reverse1d_37787E
shuffle_A4D8BD (Shuffle)      []              reverse1d_665AC7

Running Evolution


In [15]:
# note that here we use the select fitesst to select individual with the smallest fitness value (total travel distance)
evolution_strategy = SelectFittest(mode='min')
fitness_fn = TSPFitness(distances, NUM_CITIES, baseline_distance=shortest_random_distance)
ef.compile(evolution_strategy, fitness_fn)
results = ef.evolve(population, generations=GENERATIONS)



Results


In [16]:
results.plot_fitness()


Out[16]:

In [17]:
# "Shortest route" is the name of the metric group we added to the fitness function
results.plot("Shortest route")


Out[17]:

In [18]:
route  = results.get_populations()[0]
draw_route(route, idx2city, chart_data, title="Shortest GA route", display_table=False)