The Euclidean Stacker-Crane problem (ESCP) is a generalization of the Euclidean Travelling Salesman Problem. In the ESCP we are given pickup-delivery pairs and aim to find delivery-pickup pairs to form a minimal tour. The SPLICE algorithm was proven to almost-surely provide an asymptotically optimal solution, where pickups and deliveries are each sampled from respective distributions.
The SPLICE algorithm, however, relies on a Euclidean Bipartite matching between all pairs, an O(n^3) operation (though complex approximations exist). Here we test 2 algorithms that we propose, PLG and ASPLICE each of which relies on a probabilistic quad tree, a quad tree that terminates decomposition where (number of points in cell)/(total points) < p_hat for every cell. Intuitively this means that most delivery points are close to pickups.
In PLG we simply follow a pickup-delivery pair, if the delivery falls in a cell with an unmatched pickup then link to that, otherwise connect to any unmatched pickup anywhere. This is an almost-surely asymtotically near-optimal algorithm when pickup and delivery distributions are identical, meaning that we can chose p_hat so that as number of points -> infinity, the ratio of PLG cost to optimal cost approaches some (1+eps) where eps may be made as small as desired. This algorithm runs in linear time. For non-idential distributions, this is still a constant factor approximation that depends on how similar the distributions are (in both a Wasserstein and Total Variation distances way.)
The ASPLICE algorithm is functionally similar to PLG, except that the connection between cells is not random. This algorithm first connects all delivery-pickups possible within cells and then assigns excess delivery and pickup pairs based on solving the Transportation Problem, and then merging subtours. This algorithm is guaranteed to outperform PLG in probability, and gains all the analysis of SPLICE, as it approximates the algorithm. The primary difference is that solving EBMP on all points is much more expensive than solving the Transportation Problem on excess in cells.
The SPLICE algorithm is impractically slow for over 250 pairs (over a minute to calculate), whereas ASPLICE, even using an LP solver (which is far from a fast approach), can handle over 1000 pairs with ease.
In [46]:
# Load modules
import sys
from __future__ import print_function
from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
import pandas as pd
from pandas import DataFrame
import time
import random
from pqt import PQTDecomposition
from splice import splice_alg
from asplice import asplice_alg
from plg import plg_alg
In [2]:
def run_experiments(gen_pd_edges, n_pairs_li, n_reps, p_hat_li, verbose=False, **kwargs):
"""
Parameters:
n_pairs_li - a list of the number of pairs to generate
n_reps - the number of repetitions of experiment with
that number of pairs
gen_pd_edges - function the generates n pairs
include_pqt_time - whether or not the time to compute the
pqt should be included
verbose - whether or not to print the repetitions to stdout
"""
data = defaultdict(list)
def add_datum_kv((k,v)):
data[k].append(v)
return (k,v)
# Run experiment
for n_pairs in n_pairs_li:
for rep in xrange(n_reps):
if verbose:
print("Number of pairs: {} Rep: {} at ({})"\
.format(n_pairs,rep, time.strftime(
"%H:%M %S", time.gmtime())
)
)
sys.stdout.flush()
# Generate pairs
pd_edges = gen_pd_edges(n_pairs)
time_stamp = int(time.time()*1000)
# Run SPLICE
start_time = time.clock()
_, splice_cost = splice_alg(pd_edges)
splice_runtime = time.clock() - start_time
splice_datum = {'alg': splice_alg.__name__,
'timestamp': time_stamp,
'n_pairs': n_pairs,
'cost': splice_cost,
'p_hat': float('inf'),
'alg_runtime': splice_runtime,
'pqt_runtime': None,
'rep': rep}
# Add datum
map(add_datum_kv, splice_datum.iteritems())
for p_hat in p_hat_li:
# Generate PQT
start_time = time.clock()
pqt = PQTDecomposition().from_points(pd_edges.keys(),
p_hat=p_hat)
pqt_runtime = time.clock() - start_time
# Run ASPLICE
start_time = time.clock()
_, asplice_cost = asplice_alg(pd_edges, pqt=pqt)
asplice_runtime = time.clock() - start_time
# Add datum
asplice_datum = {'alg': asplice_alg.__name__,
'timestamp': time_stamp,
'n_pairs': n_pairs,
'cost': asplice_cost,
'p_hat': p_hat,
'alg_runtime': asplice_runtime,
'pqt_runtime': pqt_runtime,
'rep': rep}
map(add_datum_kv, asplice_datum.iteritems())
# Run PLG
start_time = time.clock()
_, plg_cost = plg_alg(pd_edges, pqt=pqt)
plg_runtime = time.clock() - start_time
# Add datum
plg_datum = {'alg': plg_alg.__name__,
'timestamp': time_stamp,
'n_pairs': n_pairs,
'cost': plg_cost,
'p_hat': p_hat,
'alg_runtime': plg_runtime,
'pqt_runtime': pqt_runtime,
'rep': rep}
map(add_datum_kv, plg_datum.iteritems())
return DataFrame(data)
In [259]:
def gen_pd_edges(gen_p_fn, gen_d_fn, n_pairs=50):
# Generates random pd pair from distributions
# Must be hashable, so gen_p_fn cannot returns a list or np.array
return {gen_p_fn(): gen_d_fn() \
for i in xrange(n_pairs)}
def gen_pt_gmm(means, covs, mix_weights):
while True:
# Which Gaussian to sample from
ridx = np.random.choice(len(mix_weights), p=mix_weights)
# Sample point
pt = np.random.multivariate_normal(means[ridx], covs[ridx])
# Only accept point if it lies in the unit square
if 0.<=pt[0]<=1. and 0.<=pt[1]<=1.:
return tuple(pt)
else:
continue
In [267]:
# Define GMMs
gmm1_params = { 'means': [[0.5, 0.5],
[0.8,0.1]],
'covs': [[[0.01, 0.], [0., 0.01]],
[[0.015, 0.], [0., 0.02]]],
'mix_weights': [0.6, 0.4]
}
gmm2_params = { 'means': [[0.51, 0.55],
[0.78, 0.10]],
'covs': [[[0.01, 0.00], [0.00, 0.01]],
[[0.015, 0.00], [0.00, 0.02]]],
'mix_weights': [0.6, 0.4]
}
gmm3_params = { 'means': [[0.21, 0.30],
[0.78, 0.84],
[0.78, 0.10]],
'covs': [[[0.01, 0.00], [0.00, 0.01]],
[[0.015, 0.00], [0.00, 0.02]],
[[0.015, 0.00], [0.00, 0.02]]],
'mix_weights': [0.5, 0.3, 0.2]
}
In [268]:
# Setup gen_pd functions
def gen_pt_unif():
return (random.random(), random.random())
def gen_pd_unif(n_pairs):
return gen_pd_edges(gen_pt_unif, gen_pt_unif, n_pairs)
def gen_pt_gmm1():
return gen_pt_gmm(**gmm1_params)
def gen_pt_gmm2():
return gen_pt_gmm(**gmm2_params)
def gen_pt_gmm3():
return gen_pt_gmm(**gmm3_params)
def gen_pd_gmm_close(n_pairs):
return gen_pd_edges(gen_pt_gmm1, gen_pt_gmm2, n_pairs)
def gen_pd_gmm_far(n_pairs):
return gen_pd_edges(gen_pt_gmm1, gen_pt_gmm3, n_pairs)
In [279]:
# Setup parameters
experiment1 = {
'n_pairs_li': list(xrange(10, 100, 3)) \
+ list(xrange(100, 150, 5)) \
+ list(xrange(150, 200, 10)),
'p_hat_li': [0.01, 0.1],
'n_reps': 3,
'gen_pd_edges': gen_pd_unif,
'save_path': "results/comparison/uniform/",
'name': "uni",
'verbose': True
}
experiment2 = {
'n_pairs_li': list(xrange(10, 100, 3)) \
+ list(xrange(100, 150, 5)) \
+ list(xrange(150, 200, 10)),
'p_hat_li': [0.01, 0.1],
'n_reps': 3,
'gen_pd_edges': gen_pd_gmm_close,
'save_path': "results/comparison/gmm_close/",
'name': "close",
'verbose': True
}
experiment3 = {
'n_pairs_li': list(xrange(10, 100, 3)) \
+ list(xrange(100, 150, 5)) \
+ list(xrange(150, 200, 10)),
'p_hat_li': [0.01, 0.1],
'n_reps': 3,
'gen_pd_edges': gen_pd_gmm_far,
'save_path': "results/comparison/gmm_far/",
'name': "far",
'verbose': True
}
In [270]:
# Choose the experiment
experiment = experiment1
In [278]:
save_path = "{}scatter_{}.pdf" \
.format(experiment['save_path'],
experiment['name'])
fig, ax = plt.subplots()
pds = experiment['gen_pd_edges'](1000)
ax.scatter(*zip(*pds.keys()), color='r', edgecolors='none', s=2)
ax.scatter(*zip(*pds.values()), color='b', edgecolors='none', s=2)
ax.set_xlim([0.,1.])
ax.set_ylim([0.,1.])
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.savefig(save_path, bbox_inches='tight')
In [272]:
# Run the experiment
df = run_experiments(**experiment)
In [273]:
# Show the last 6 rows of results
df.tail(6)
Out[273]:
In [274]:
save_path = "{}avg_cost_{}.pdf" \
.format(experiment['save_path'],
experiment['name'])
with plt.style.context('seaborn-white'):
mean_df = df.groupby(['alg', 'p_hat', 'n_pairs']) \
[['cost','alg_runtime','pqt_runtime']] \
.mean() \
.add_prefix('mean_') \
.reset_index()
fig, ax = plt.subplots()
#ax.set_yscale('log')
group_plts = mean_df.groupby(['alg', 'p_hat'])
for i,(name, group) in enumerate(group_plts):
alg,p_hat = name
if alg == splice_alg.__name__:
plt.plot(group['n_pairs'],
group['mean_cost'],
label=r"SPLICE")#,
#color=cmap(i / float(len(group_plts))))
elif alg == asplice_alg.__name__:
plt.plot(group['n_pairs'],
group['mean_cost'],
label=r"ASPLICE $\hat{{p}}={:.3}$".format(p_hat))#,
#color=cmap(i / float(len(group_plts))))
elif alg == plg_alg.__name__:
plt.plot(group['n_pairs'],
group['mean_cost'],
label=r"PLG $\hat{{p}}={:.3}$".format(p_hat))#,
#color=cmap(i / float(len(group_plts))))
ax.set_xlabel("Number of pd Pairs", fontsize=15)
ax.set_ylabel("Cost", fontsize=15)
ax.set_title('Algorithm Cost vs Number of Pairs')
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5),
fancybox=True, shadow=True)
plt.savefig(save_path, bbox_inches='tight')
In [275]:
save_path = "{}avg_runtime_{}.pdf" \
.format(experiment['save_path'],
experiment['name'])
with plt.style.context('seaborn-white'):
fig, ax = plt.subplots()
ax.set_yscale('log')
group_plts = mean_df.groupby(['alg', 'p_hat'])
cmap = mpl.cm.autumn
for i,(name, group) in enumerate(group_plts):
alg,p_hat = name
if alg == splice_alg.__name__:
plt.plot(group['n_pairs'],
group['mean_alg_runtime'],
label=r"SPLICE")
#color=cmap(i / float(len(group_plts))))
elif alg == asplice_alg.__name__:
plt.plot(group['n_pairs'],
group['mean_alg_runtime'],
label=r"ASPLICE $\hat{{p}}={:.3}$".format(p_hat),
linestyle="-")
#color=cmap(i / float(len(group_plts))))
elif alg == plg_alg.__name__:
plt.plot(group['n_pairs'],
group['mean_alg_runtime'],
label=r"PLG $\hat{{p}}={:.3}$".format(p_hat),
linestyle="-")
#color=cmap(i / float(len(group_plts))))
ax.set_xlabel("Number of pd Pairs", fontsize=15)
ax.set_ylabel("Time (s)", fontsize=15)
ax.set_title('Algorithm Runtime vs Number of Pairs')
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5),
fancybox=True, shadow=True)
plt.savefig(save_path, bbox_inches='tight')
In [276]:
grouped = df.groupby(['alg','p_hat'])
# Extract splice costs
splice_costs = df[df['alg'] == splice_alg.__name__].cost
splice_costs[:5]
Out[276]:
In [277]:
save_path = "{}avg_ratios_{}.pdf" \
.format(experiment['save_path'],
experiment['name'])
with plt.style.context('seaborn-white'):
fig, ax = plt.subplots()
for i,(key, group) in enumerate(grouped):
alg, p_hat = key
# Compute ratio of alg cost to splice_cost
cost_ratios = group.cost.values / splice_costs.values
# Compute the mean of ratios for each rep
mean_cost_ratios = np.mean(cost_ratios.reshape(-1, experiment['n_reps']), axis=1)
if alg == splice_alg.__name__:
#Skip plotting SPLICE
continue
plt.plot(experiment['n_pairs_li'],
mean_cost_ratios,
label=r"SPLICE",
color=cmap(i / float(len(grouped))))
elif alg == asplice_alg.__name__:
plt.plot(experiment['n_pairs_li'],
mean_cost_ratios,
label=r"ASPLICE $\hat{{p}}={:.3}$".format(p_hat),
linestyle="-")
#,color=cmap(i / float(len(grouped))))
elif alg == plg_alg.__name__:
plt.plot(experiment['n_pairs_li'],
mean_cost_ratios,
label=r"PLG $\hat{{p}}={:.3}$".format(p_hat),
linestyle="-")
#,color=cmap(i / float(len(grouped))))
ax.set_xlabel("Number of pd Pairs", fontsize=15)
ax.set_ylabel("Mean Ratio to SPLICE", fontsize=15)
ax.set_title('Mean Cost Ratio vs Number of Pairs')
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5),
fancybox=True, shadow=True)
ax.grid(True)
plt.savefig(save_path, bbox_inches='tight')
Here we see that ASPLICE is very close to SPLICE.
In [ ]: