In [ ]:
# GNU LESSER GENERAL PUBLIC LICENSE
# Version 3, 29 June 2007
# Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
# Everyone is permitted to copy and distribute verbatim copies
# of this license document, but changing it is not allowed.
In [1]:
import IPython.display as IPd
import os
path = os.path.dirname(".")
graphics_path = path + "Graphics/"
data_path_in = path + "Data_In/"
data_path_out = path + "Data_Out/"
session = IPd.Image(graphics_path+"/logo.png")
session
Out[1]:
The Transportation Problem, which is also known as the Transportation Simplex or the
Transshipment Problem, seeks to allocate supply to demand while minimizing transportation
costs and was formally described by Hitchcock (1941). Supply ($n$) and demand ($m$) are
generally represented as unit weights of decision variables at facilities along a network with
the time or distance between nodes representing the cost of transporting one unit from a
supply node to a demand node. These costs are stored in an $n$ x $m$ cost matrix. The
Transportation Problem has continued to garner attention through to the present in multiple
academic disciplines with Spatial Optimization in Geography being one of these at the
forefront. As the processing power of computers has improved over the years, the capabilities
of mathematical programming software to solve these problems have also improved.
However, one of the main roadblocks is the need to store the ever-larger cost and decision
variable matrices in memory while building and solving the optimization model. This paper
seeks to provide a solution to the issue, not by modifying the structure of the problem or
proposing a new algorithm, but by tackling the memory bottleneck with out-of-core storage.
This will be attempted in an object-oriented programming language environment primarily
utilizing two Python
packages: dask
(a parallel computing library) and pulp
(an integer and
mixed-integer programing library).
\text{min} & \displaystyle \sum{i \in I} \sum{j \in J} c{ij} sd{ij}&&&&& \ \text{s.t.} & \displaystyle \sum{j \in J} sd{ij} = Si &\text{or}&\sum{j \in J} sd_{ij} >= Si& \text{or}&\sum{j \in J} sd_{ij} <= Si& \forall i \in I \ & \displaystyle \sum{i \in I} sd_{ij} = Dj &\text{or}&\sum{i \in I} sd_{ij} <= Dj& \text{or}&\sum{i \in I} sd_{ij} >= Dj& \forall j \in J \ & \displaystyle sd{ij} \geq 0 &&&&& \forall i \in I, \forall j \in J \ \end{array} $$
\text{where}&\ & \scriptsize{i = \text{a specific origin}} \ & \scriptsize{j = \text{a specific destination}} \ & \scriptsize{I = \text{the set of origins}} \ & \scriptsize{J = \text{the set of destinations}} \ & \scriptsize{a{i} = \text{demand weight at each node}} \ & \scriptsize{c{ij} = \text{travel costs between nodes}} \ & \scriptsize{sd{ij} = \text{shipping decision variable from } i \text{ to } j} \ & \scriptsize{S{i} = \text{units of supply from facility } i} \ & \scriptsize{D_{j} = \text{units of demand by facility } j} \ \end{array} $$
Originally published:
python
packages like dask
In [6]:
import dask
import geopandas as gpd
import pysal as ps
import dask.array as da
import gurobi as grb
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import matplotlib.patches as mpatches
from matplotlib.colors import ListedColormap
import numpy as np
import operator
import pandas as pd
import psutil
import platform
from shapely.geometry import Point
from shapely.geometry import LineString
import sys
import time
% matplotlib inline
GB = 1073741824.0005517 # Bytes to gigabytes
mile = 0.000621371 # meters to miles
operators = {"==": operator.eq, "<=": operator.le, ">=": operator.ge} # mathematical operators dictionary
methods = {"auto":-1, "primal simplex":0, "dual simplex":1, # algorithm dictionary
"barrier":2, "concurrent":3, "deterministic concurrent":4}
In [7]:
def transportation_simplex(r_dim, c_dim, seed=1, relaxed=False, simulated_costmatrix=None,
costmatrix=None, simulated_sd="lowhigh", s=None, d=None,
write_lp=True, name=None, output=1, mipfocus=2,
method=methods["auto"], timelimit=None):
"""
Solve the Transportation Simplex in Gurobi
Parameters
==========
r_dim int
Row dimension of the cost matrix.
c_dim int
Column dimension of the cost matrix.
seed int
Seed for psuedo-random number generation.
Default is 1.
relaxed bool
Relaxation of the supply and demand constraints.
Default is False.
simulated_costmatrix list, tuple, numpy.ndarray
Low and high for pseudo-random integer generation
for cost matrix from each i to each j. Default
is None.
costmatrix numpy.ndarray
Calculated cost matrix from PySAL.Network object.
Default is None.
simulated_sd str
Simulated supply and demand vectors.
`balanced`, `lowhigh`, `highlow`. Default is `lowhigh`.
s list, tuple, array
Empirical supply. Default is None.
d list, tuple, array
Empirical demand. Default is None.
write_lp bool
Write out a linear programming file (True) in
IBM Cplex format. Directory path must be a predefined
variable. Default is True.
name str
Name of the problem for model object and LP file.
Default is None.
output int
Print out solution progress `1`.
Hide solution progress `0`. Default in `1`.
mipfocus int
Solution focus. `1` for focus on feasibility. `2`
for focus on proven optimality. `3` for focus on moving
the best objective bound. Default is `2`.
method str
Algorithms defined in the `methods` dictionary.
Gurobi takes an int. Default is
>>> {"auto":-1, "primal simplex":0,
"dual simplex":1, "barrier":2,
"concurrent":3, "deterministic concurrent":4}
timelimit int
Time limit in seconds to solve the Transportation Problem.
Default is None.
Returns
=======
decisions pandas.DataFrame
Dataframe of decision variables and values
cm numpy.array
`r_dim` x `c_dim` array of i,j costs
total_time float
Total time to instantiate model and solve
"""
def _get_supply_constrs(m, oper="=="):
"""
Parameters
==========
m gurobi.Model
Instantiated mathematical programming model
oper str
Mathematical operator as decided by the relaxation constraint and
sums of supply/demand. Default is `==`.
Returns
=======
transportation problem supply constraints
"""
oper = operators[oper]
m.addConstrs((oper(decision_variables.sum(origin,'*')\
- supply[origin], 0)\
for origin in row_range),
"Supply")
return m
def _get_demand_constrs(m, oper="=="):
"""
Parameters
==========
m gurobi.Model
Instantiated mathematical programming model
oper str
Mathematical operator as decided by the relaxation constraint and
sums of supply/demand. Default is `==`.
Returns
=======
transportation problem demand constraints
"""
oper = operators[oper]
m.addConstrs((oper(decision_variables.sum('*',destination)\
- demand[destination], 0)\
for destination in column_range),
"Demand")
return m
time_start = time.time()
# Set Seed for Simulated Data Generation
np.random.seed(seed)
# Set Range of Matrix Dimensions
row_range, column_range = range(r_dim), range(c_dim)
# Emprical Cost Matrix
if costmatrix != None:
cm = costmatrix
# Simulated Cost Matrix
if simulated_costmatrix:
cm = np.random.randint(simulated_costmatrix[0],
simulated_costmatrix[1],
(r_dim,c_dim)).astype(float)
# Emprical Supply and Demand
if s:
supply = s
if d:
demand = d
# Simulated Supply and Demand
if simulated_sd:
sl, sh, dl, dh = 10, 100, 10, 100
if simulated_sd == "balanced":
supply = np.random.randint(sl, sh, (r_dim,1)).astype(float)
equal_sum = False
while not equal_sum:
demand = np.random.randint(dl, dh, (c_dim,1)).astype(float)
if demand.sum() == supply.sum():
equal_sum = True
if simulated_sd == "lowhigh":
supply = np.random.randint(sl,sh*2,(r_dim,1)).astype(float)
demand = np.random.randint(dl,dh,(c_dim,1)).astype(float)
if simulated_sd == "highlow":
dl, dh, sl, sh = sl, sh, dl, dh
supply = np.random.randint(sl,sh,(r_dim,1)).astype(float)
demand = np.random.randint(dl,dh*2,(c_dim,1)).astype(float)
# Instantiate Model and Set Parameters
transportation_simplex = grb.Model(name)
transportation_simplex.Params.OutputFlag = output
transportation_simplex.setParam('MIPFocus', mipfocus)
transportation_simplex.Params.method = method
if timelimit:
transportation_simplex.setParam('TimeLimit', timelimit)
# Set Decision Variables
decision_variables = transportation_simplex.addVars(r_dim,
c_dim,
vtype=grb.GRB.CONTINUOUS,
obj=cm,
name="sd")
# Set Supply and Demand Constraints
if not relaxed:
_get_supply_constrs(transportation_simplex)
_get_demand_constrs(transportation_simplex)
if relaxed and supply.sum() >= demand.sum():
_get_supply_constrs(transportation_simplex, oper="<=")
_get_demand_constrs(transportation_simplex, oper=">=")
if relaxed and supply.sum() <= demand.sum():
_get_supply_constrs(transportation_simplex, oper=">=")
_get_demand_constrs(transportation_simplex, oper="<=")
# Set Objective Function
cm_dict = {(row,column):cm[row][column] for row in row_range\
for column in column_range}
transportation_simplex.setObjective(decision_variables.prod(cm_dict))
# Solve
transportation_simplex.optimize()
# Write LP Function
if write_lp:
transportation_simplex.write(data_path_out+name+".lp")
# Record Solution Variables
try:
decisions = pd.DataFrame([[var.VarName, var.X]\
for var in transportation_simplex.getVars()\
if var.X > 0], columns=['Decision_Variable_Name',
'Decision_Variable_Value'])
except:
decisions = pd.DataFrame()
total_time = round(time.time() - time_start, 5)/60.
return transportation_simplex, decisions, cm, total_time
In [8]:
def solution_info(problem, variables, matrix, time_):
"""
Parameter
=========
problem gurobi.Model
Gurobi model object
variables pandas.DataFrame
Dataframe of decision variables and values
matrix array
Cost matrix from supply to demand locations
time_ float
Total time for model build and solve
Returns
=======
Printed solution information
"""
problem_size = sys.getsizeof(problem)/GB
matrix_size = sys.getsizeof(matrix)/GB
print "*************************************************************"
print "Model Size (GB):\t\t\t", problem_size
print "Cost Matrix Size (GB):\t\t\t", matrix_size
print "Real Time Build & Solve (min):\t\t", time_
print "*************************************************************"
print variables.head()
In [9]:
m, sv, cm, rt = transportation_simplex(2, 2, seed=2, relaxed=True, simulated_costmatrix=[1,10],
simulated_sd="lowhigh", name="TransportationProblem_2",
output=1, mipfocus=2, method=methods["primal simplex"])
solution_info(m, sv, cm, rt)
In [10]:
m, sv, cm, rt = transportation_simplex(20, 20, seed=2, relaxed=False, simulated_costmatrix=[1,10],
simulated_sd="balanced", name="TransportationProblem_20",
output=0,mipfocus=2, method=methods["primal simplex"])
solution_info(m, sv, cm, rt)
In [11]:
m, sv, cm, rt = transportation_simplex(200, 200, seed=2, relaxed=True, simulated_costmatrix=[1,10],
simulated_sd="highlow", name="TransportationProblem_200",
output=0, mipfocus=2, method=methods["primal simplex"])
solution_info(m, sv, cm, rt)
In [5]:
m, sv, cm, rt = transportation_simplex(2000, 2000, seed=2, relaxed=True, simulated_costmatrix=[1,10],
simulated_sd="lowhigh", name="TransportationProblem_2000",
output=0, mipfocus=2, method=methods["primal simplex"],
write_lp=False, timelimit=10)
solution_info(m, sv, cm, rt)
In [12]:
FLTallWav = IPd.Image(graphics_path+"/FLTallWav.png",
width=1200,
height=600)
FLTallWav
Out[12]:
In [13]:
def get_streets_buffer(street_file, buff=50):
"""
Parameters
==========
street_file geopandas.GeoDataFrame
GeoDataFrame of a shapefile representing a road network.
buff int or float
Desired buffer distance. Default is 50 (meters).
Returns
=======
Single polygon of the unioned street buffers.
"""
b1 = street_file.buffer(buff) #Buffer
ub = b1.unary_union #Buffer Union
b2 = gpd.GeoSeries(ub)
out_buff = gpd.GeoDataFrame(b2, crs=street_file.crs, columns=['geometry'])
return out_buff
In [14]:
def simulated_geo_points(in_buff, area, needed=20, seed=187, to_file=None):
"""
Parameters
==========
in_buff geopandas.GeoDataFrame
GeoDataFrame of a single polygon of the unioned street buffers.
area list or tuple
Bounding box coordinates of the area.
needed int
Number of points in the buffer. Default is 20.
seed int
Seed for psuedo-random number generation.
Default is 187.
to_file str
File name for write out.
Returns
=======
Points within the buffer.
"""
simulated_points_list = []
simulated_points_all = False
np.random.seed(seed)
while simulated_points_all == False:
x = np.random.uniform(area[0], area[2], 1)
y = np.random.uniform(area[1], area[3], 1)
point = Point(x,y)
if in_buff.intersects(point)[0]:
simulated_points_list.append(point)
if len(simulated_points_list) == needed:
simulated_points_all = True
simulated_points = gpd.GeoDataFrame(simulated_points_list,
columns=['geometry'],
crs=in_buff.crs)
if to_file:
simulated_points.to_file(data_path_out+to_file+".shp")
return simulated_points
In [15]:
in_file = data_path_in+'WAVERLY/WAVERLY.shp'
ntw_W = ps.Network(in_file)
shp_W = ps.open(in_file)
geo_W = gpd.read_file(in_file)
streets_buffer = get_streets_buffer(geo_W, 60)
supply_nodes_needed = 100
demand_nodes_needed = 100
simulated_supply_nodes = simulated_geo_points(streets_buffer.geometry,
shp_W.bbox,
needed=supply_nodes_needed,
to_file="simulated_supply_nodes")
simulated_demand_nodes = simulated_geo_points(streets_buffer.geometry,
shp_W.bbox,
needed=demand_nodes_needed,
seed=226,
to_file="simulated_demand_nodes")
In [16]:
mpl.rcParams['figure.figsize'] = 15,15
base = streets_buffer.plot(cmap=ListedColormap('y'),
linewidth=.25,
alpha=.5)
geo_W.plot(ax=base,
linewidth=2,
cmap=ListedColormap('k'))
simulated_supply_nodes.plot(ax=base,
markersize=5,
cmap=ListedColormap('red'))
simulated_demand_nodes.plot(ax=base,
markersize=5,
cmap=ListedColormap('blue'))
base.set_title("Waverly Hills", size=20)
plt.arrow(621500, 165750, 0.0, 250, width=30, head_width=100,
head_length=75, fc='k', ec='k',alpha=0.75)
plt.annotate('N', xy=(621575, 166000), fontstyle='italic', fontsize='xx-large',
fontweight='heavy', alpha=0.75)
plt.annotate('<| ~ .25 miles |>', xy=(621850, 166250),
fontstyle='italic', fontsize='large', alpha=0.95)
street_buffer = mpatches.Patch([], [], color='y', linewidth=2, alpha=.5, label='Street Buffer (60m)')
streets = mlines.Line2D([], [], color='k', linewidth=2, alpha=1, label='Streets')
supply = mlines.Line2D([], [], color='red', marker='o', linewidth=0,
alpha=1, label='Supply Locations')
demand = mlines.Line2D([], [], color='blue', marker='o', linewidth=0,
alpha=1, label='Demand Locations')
plt.legend(handles=[street_buffer,streets,supply,demand],
loc='upper left',
fancybox=True,
framealpha=.85)
Out[16]:
In [17]:
# Snap, Create frames of points
ntw_W.snapobservations(data_path_out+'simulated_supply_nodes.shp',
'supply', attribute=True)
ntw_W.snapobservations(data_path_out+'simulated_demand_nodes.shp',
'demand', attribute=True)
snapped_supply_list = [Point(j[0], j[1])\
for i,j in ntw_W.pointpatterns['supply'].snapped_coordinates.iteritems()]
snapped_supply = gpd.GeoDataFrame(snapped_supply_list, columns=["geometry"])
snapped_demand_list = [Point(j[0], j[1])\
for i,j in ntw_W.pointpatterns['demand'].snapped_coordinates.iteritems()]
snapped_demand = gpd.GeoDataFrame(snapped_demand_list, columns=["geometry"])
In [18]:
mpl.rcParams['figure.figsize'] = 15,15
base = streets_buffer.plot(cmap=ListedColormap('y'),
linewidth=.25,
alpha=.5)
geo_W.plot(ax=base,
linewidth=1,
cmap=ListedColormap('k'))
simulated_supply_nodes.plot(ax=base,
markersize=5,
cmap=ListedColormap('red'),
alpha=.25)
simulated_demand_nodes.plot(ax=base,
markersize=5,
cmap=ListedColormap('blue'),
alpha=.25)
snapped_supply.plot(ax=base,
markersize=5,
cmap=ListedColormap('red'))
snapped_demand.plot(ax=base,
markersize=5,
cmap=ListedColormap('blue'))
base.set_title("Waverly Hills", size=20)
plt.arrow(621500, 165650, 0.0, 250, width=30, head_width=100,
head_length=75, fc='k', ec='k',alpha=0.75)
plt.annotate('N', xy=(621575, 165900), fontstyle='italic', fontsize='xx-large',
fontweight='heavy', alpha=0.75)
plt.annotate('<| ~ .25 miles |>', xy=(622200, 166250),
fontstyle='italic', fontsize='large', alpha=0.95)
street_buffer = mpatches.Patch([], [], color='y', linewidth=2, alpha=.5, label='Street Buffer (60m)')
streets = mlines.Line2D([], [], color='k', linewidth=2, alpha=1, label='Streets')
supply = mlines.Line2D([], [], color='red', marker='o', linewidth=0,
alpha=.5, label='Supply Locations')
demand = mlines.Line2D([], [], color='blue', marker='o', linewidth=0,
alpha=.5, label='Demand Locations')
supply_snapped = mlines.Line2D([], [], color='red', marker='o', linewidth=0,
alpha=1, label='Snapped Supply Locations')
demand_snapped = mlines.Line2D([], [], color='blue', marker='o', linewidth=0,
alpha=1, label='Snapped Demand Locations')
plt.legend(handles=[street_buffer,streets,supply,demand,supply_snapped,demand_snapped],
loc='upper left',
fancybox=True,
framealpha=.85)
Out[18]:
In [19]:
All_Neigh_Dist = ntw_W.allneighbordistances(sourcepattern=ntw_W.pointpatterns['supply'],
destpattern=ntw_W.pointpatterns['demand']) # in meters
All_Dist_MILES = All_Neigh_Dist * mile
In [20]:
m, sv, cm, rt = transportation_simplex(len(All_Dist_MILES), len(All_Dist_MILES[0]), seed=2,
relaxed=True, costmatrix=All_Dist_MILES, simulated_sd="lowhigh",
name="TransportationProblem_WaverlyHills", output=1, mipfocus=2,
method=methods["primal simplex"])
solution_info(m, sv, cm, rt)
In [21]:
dimensions = 4000
dimensions_range = range(50,dimensions,50)
numpy_arrays = [np.zeros([d,d]) for d in dimensions_range]
array_range = range(len(numpy_arrays))
dask_arrays = [da.from_array(numpy_arrays[array], chunks=array+1) for array in array_range]
size_dataframe = pd.DataFrame()
size_dataframe["Dimensions"] = dimensions_range
size_dataframe["Numpy"] = [sys.getsizeof(numpy_arrays[array])*GB for array in array_range]
size_dataframe["Dask"] = [sys.getsizeof(dask_arrays[array])*GB for array in array_range]
size_dataframe.head()
Out[21]:
In [22]:
mpl.rcParams['figure.figsize'] = 8,8
base = size_dataframe.plot("Dimensions", "Numpy")
size_dataframe.plot("Dimensions", "Dask", ax=base)
base.set_title("RAM Disparity: Numpy vs. Dask", size=20)
base.set_xlabel('Cost Matrix Dimensions', size=20)
base.set_ylabel('RAM (GB)', size=20)
Out[22]:
In [23]:
spec_index = ['Python version', 'Python compiler', 'Gurobi version',\
'NumPy version', 'Pandas version', 'Dask version', 'GeoPandas version',\
'PySAL version', 'Platform', 'Cores available']
spec_info = [platform.python_version(), platform.python_compiler(),\
grb.gurobi.version(), np.__version__, pd.__version__,\
dask.__version__, gpd.__version__, ps.version,\
platform.platform(), psutil.cpu_count()]
system_specs = pd.DataFrame(index=spec_index)
system_specs['System & Version Specs'] = spec_info
system_specs
Out[23]: