In [1]:
# GNU LESSER GENERAL PUBLIC LICENSE
# Version 3, 29 June 2007
# Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
# Everyone is permitted to copy and distribute verbatim copies
# of this license document, but changing it is not allowed.
In [2]:
import IPython.display as IPd
import os
path = os.path.dirname(".")
graphics_path = path + "Graphics/"
data_path_in = path + "Data_In/"
data_path_out = path + "Data_Out/"
session = IPd.Image(graphics_path+"/logo.png")
session
Out[2]:
The Transportation Problem, which is also known as the Transportation Simplex or the Transshipment Problem, seeks to allocate supply to demand while minimizing transportation costs and was formally described by Hitchcock (1941). Supply ($n$) and demand ($m$) are generally represented as unit weights of decision variables at facilities along a network with the time or distance between nodes representing the cost of transporting one unit from a supply node to a demand node. These costs are stored in an $n$ x $m$ cost matrix. The Transportation Problem has continued to garner attention through to the present in multiple academic disciplines with Spatial Optimization in Geography being one of these at the forefront. As the processing power of computers has improved over the years, the capabilities of mathematical programming software to solve these problems have also improved. However, one of the main roadblocks is the need to store the ever-larger cost and decision variable matrices in memory while building and solving the optimization model. This paper seeks to provide a solution to the issue, not by modifying the structure of the problem or proposing a new algorithm, but by tackling the memory bottleneck with out-of-core storage. This will be attempted in an object-oriented programming language environment primarily utilizing two Python packages: dask (a parallel computing library) and pulp (an integer and mixed-integer programing library).
\text{min} & \displaystyle \sum{i \in I} \sum{j \in J} c{ij} sd{ij}&&&&& \ \text{s.t.} & \displaystyle \sum{j \in J} sd{ij} = Si &\text{or}&\sum{j \in J} sd_{ij} >= Si& \text{or}&\sum{j \in J} sd_{ij} <= Si& \forall i \in I \ & \displaystyle \sum{i \in I} sd_{ij} = Dj &\text{or}&\sum{i \in I} sd_{ij} <= Dj& \text{or}&\sum{i \in I} sd_{ij} >= Dj& \forall j \in J \ & \displaystyle sd{ij} \geq 0 &&&&& \forall i \in I, \forall j \in J \ \end{array} $$
\text{where}&\ & \scriptsize{i = \text{a specific origin}} \ & \scriptsize{j = \text{a specific destination}} \ & \scriptsize{I = \text{the set of origins}} \ & \scriptsize{J = \text{the set of destinations}} \ & \scriptsize{a{i} = \text{demand weight at each node}} \ & \scriptsize{c{ij} = \text{travel costs between nodes}} \ & \scriptsize{sd{ij} = \text{shipping decision variable from } i \text{ to } j} \ & \scriptsize{S{i} = \text{units of supply from facility } i} \ & \scriptsize{D_{j} = \text{units of demand by facility } j} \ \end{array} $$
Originally published:
Adapted from:
python
packages like dask
In [3]:
import dask
import geopandas as gpd
import pysal as ps
import dask.array as da
import gurobi as grb
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import numpy as np
import operator
import pandas as pd
import psutil
import platform
from shapely.geometry import Point
from shapely.geometry import LineString
import sys
import time
% matplotlib inline
mpl.rcParams['figure.figsize'] = 15,15
GB = 1073741824.0005517 # Bytes to gigabytes
mile = 0.000621371 # meters to miles
operators = {"==": operator.eq, "<=": operator.le, ">=": operator.ge} # mathematical operators dictionary
methods = {"auto":-1, "primal simplex":0, "dual simplex":1, # algorithm dictionary
"barrier":2, "concurrent":3, "deterministic concurrent":4}
In [4]:
def transportation_simplex(r_dim, c_dim, seed=1, relaxed=False, simulated_costmatrix=None,
costmatrix=None,
simulated_sd="lowhigh", write_lp=True, name=None, output=1, mipfocus=2,
method=methods["auto"], timelimit=None):
"""
Solve the Transportation Simplex in Gurobi
Parameters
==========
r_dim int
Row dimension of the cost matrix.
c_dim int
Column dimension of the cost matrix.
seed int
Seed for psuedo-random number generation.
relaxed bool
Relaxation of the supply and demand constraints.
Default is False.
simulated_costmatrix list, tuple
Low and high for pseudo-random integer generation
for cost matrix from each i to each j. Default
is [1,10].
simulated_sd str
Simulated supply and demand vectors.
`balanced`, `lowhigh`, `highlow`. Default is `lowhigh`.
write_lp bool
Write out a linear programming file (True) in
IBM Cplex format. Directory path must be a predefined
variable. Default is True.
name str
Name of the problem for model object and LP file.
Default is None.
output int
Print out solution progress `1`.
Hide solution progress `0`. Default in `1`.
mipfocus int
Solution focus. `1` for focus on feasibility. `2`
for focus on proven optimality. `3` for focus on moving
the best objective bound. Default is `2`.
method str
Algorithms defined in the `methods` dictionary.
Gurobi takes an int. Default is
>>> {"auto":-1, "primal simplex":0,
"dual simplex":1, "barrier":2,
"concurrent":3, "deterministic concurrent":4}
timelimit int
Time limit in seconds to solve the Transportation Problem.
Default is None.
Returns
=======
decisions pandas.DataFrame
Dataframe of decision variables and values
cm numpy.array
`r_dim` x `c_dim` array of i,j costs
total_time float
Total time to instantiate model and solve
"""
def _get_supply_constrs(m, oper="=="):
"""
Parameters
==========
m gurobi.Model
Instantiated mathematical programming model
oper str
Mathematical operator as decided by the relaxation constraint and
sums of supply/demand. Default is `==`.
Returns
=======
transportation problem supply constraints
"""
oper = operators[oper]
m.addConstrs((oper(decision_variables.sum(origin,'*')\
- supply[origin], 0)\
for origin in row_range),
"Supply")
return m
def _get_demand_constrs(m, oper="=="):
"""
Parameters
==========
m gurobi.Model
Instantiated mathematical programming model
oper str
Mathematical operator as decided by the relaxation constraint and
sums of supply/demand. Default is `==`.
Returns
=======
transportation problem demand constraints
"""
oper = operators[oper]
m.addConstrs((oper(decision_variables.sum('*',destination)\
- demand[destination], 0)\
for destination in column_range),
"Demand")
return m
time_start = time.time()
np.random.seed(seed)
row_range, column_range = range(r_dim), range(c_dim)
if costmatrix != None:
cm = costmatrix
if simulated_costmatrix:
cm = np.random.randint(simulated_costmatrix[0],
simulated_costmatrix[1],
(r_dim,c_dim)).astype(float)
if not simulated_sd:
pass
if simulated_sd:
sl, sh, dl, dh = 10, 100, 10, 100
if simulated_sd == "balanced":
supply = np.random.randint(sl, sh, (r_dim,1)).astype(float)
equal_sum = False
while not equal_sum:
demand = np.random.randint(dl, dh, (c_dim,1)).astype(float)
if demand.sum() == supply.sum():
equal_sum = True
if simulated_sd == "lowhigh":
supply = np.random.randint(sl,sh*2,(r_dim,1)).astype(float)
demand = np.random.randint(dl,dh,(c_dim,1)).astype(float)
if simulated_sd == "highlow":
dl, dh, sl, sh = sl, sh, dl, dh
supply = np.random.randint(sl,sh,(r_dim,1)).astype(float)
demand = np.random.randint(dl,dh*2,(c_dim,1)).astype(float)
var_names = pd.DataFrame(np.array(["sd["+str(r)+","+str(c)+"]"\
for r in row_range\
for c in column_range]).reshape(r_dim,c_dim),
index=['s'+str(r) for r in row_range],
columns=['d'+str(c) for c in column_range])
transportation_simplex = grb.Model(name)
transportation_simplex.Params.OutputFlag = output
transportation_simplex.setParam('MIPFocus', mipfocus)
transportation_simplex.Params.method = method
transportation_simplex.setParam('TimeLimit', timelimit)
decision_variables = transportation_simplex.addVars(r_dim,
c_dim,
vtype=grb.GRB.CONTINUOUS,
obj=cm,
name="sd")
# Set Supply and Demand Constraints
if not relaxed:
_get_supply_constrs(transportation_simplex)
_get_demand_constrs(transportation_simplex)
if relaxed and supply.sum() >= demand.sum():
_get_supply_constrs(transportation_simplex, oper="<=")
_get_demand_constrs(transportation_simplex, oper=">=")
if relaxed and supply.sum() <= demand.sum():
_get_supply_constrs(transportation_simplex, oper=">=")
_get_demand_constrs(transportation_simplex, oper="<=")
# Set Objective Function
"""
transportation_simplex.setObjective(grb.quicksum(decision_variables[origin,destination].Obj\
* decision_variables[origin,destination]
for origin in row_range
for destination in column_range))
"""
cm_dict = {(row,column):cm[row][column] for row in row_range\
for column in column_range}
transportation_simplex.setObjective(decision_variables.prod(cm_dict))
# Solve
transportation_simplex.optimize()
# Write LP Function
if write_lp:
transportation_simplex.write(data_path_out+name+".lp")
# Record Solution Variables
try:
decisions = pd.DataFrame([[var.VarName, var.X]\
for var in transportation_simplex.getVars()\
if var.X > 0], columns=['Decision_Variable_Name',
'Decision_Variable_Value'])
except:
decisions = pd.DataFrame()
total_time = round(time.time() - time_start, 5)/60.
return decisions, cm, total_time
In [5]:
def solution_info(variables, matrix, time_):
"""
Parameter
=========
variables
matrix
time_
Returns
=======
"""
matrix_size = sys.getsizeof(matrix)/GB
print "*************************************************************"
print "Cost Matrix Size (GB):\t\t\t", matrix_size
print "Real Time Build & Solve (sec):\t\t", time_
print "*************************************************************"
print variables.head()
In [6]:
sv, cm, rt = transportation_simplex(2, 2, seed=2,
relaxed=True,
simulated_costmatrix=[1,10],
simulated_sd="lowhigh",
name="TransportationProblem",
output=1,
mipfocus=2,
method=methods["primal simplex"],
timelimit=1)
solution_info(sv, cm, rt)
In [7]:
sv, cm, rt = transportation_simplex(20, 20, seed=2,
relaxed=False,
simulated_costmatrix=[1,10],
simulated_sd="balanced",
name="TransportationProblem",
output=0,
mipfocus=2,
method=methods["primal simplex"],
timelimit=1)
solution_info(sv, cm, rt)
In [8]:
sv, cm, rt = transportation_simplex(200, 200, seed=2,
relaxed=True,
simulated_costmatrix=[1,10],
simulated_sd="highlow",
name="TransportationProblem",
output=0,
mipfocus=2,
method=methods["primal simplex"],
timelimit=1)
solution_info(sv, cm, rt)
In [9]:
sv, cm, rt = transportation_simplex(2000, 2000, seed=2,
relaxed=True,
simulated_costmatrix=[1,10],
simulated_sd="lowhigh",
name="TransportationProblem",
output=0,
mipfocus=2,
method=methods["primal simplex"],
timelimit=1)
solution_info(sv, cm, rt)
In [ ]:
In [ ]:
In [ ]:
In [27]:
cm_sizes = []
iter_times = []
iterations = range(2,1200,100)
for dim in iterations:
sv, cm, rt = transportation_simplex(dim, dim, seed=2,
relaxed=True,
simulated_costmatrix=[1,10],
simulated_sd="lowhigh",
name="TransportationProblem",
output=0,
mipfocus=2,
method=methods["primal simplex"],
timelimit=1)
cm_size = sys.getsizeof(cm)/GB
cm_sizes.append(cm_size)
iter_times.append(rt)
plotter = pd.DataFrame()
plotter['dims'] = iterations
plotter['cm_size'] = cm_sizes
plotter['iter_time'] = iter_times
fig = plt.figure()
ax = fig.add_subplot(111,alpha=.15)
AX = ax.scatter(plotter.dims,
plotter.iter_time,
c=plotter.cm_size*100000000,
cmap="hot_r",
s=plotter.cm_size*100000000,
edgecolor="k")
fig.colorbar(AX)
ax.set_title("RAM Growth")
ax.set_xlabel('Cost Matrix Dimensions')
ax.set_ylabel('Time')
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [11]:
FLTallWav = IPd.Image(graphics_path+"/FLTallWav.png", width=1200, height=600)
FLTallWav
Out[11]:
In [12]:
def get_streets_buffer(street_file, buff=50):
"""
"""
b1 = street_file.buffer(buff) #Buffer
ub = b1.unary_union #Buffer Union
b2 = gpd.GeoSeries(ub)
out_buff = gpd.GeoDataFrame(b2, crs=street_file.crs, columns=['geometry'])
return out_buff
In [13]:
def simulated_geo_points(in_buff, area, needed=20, seed=187, to_file=None):
"""
"""
simulated_points_list = []
simulated_points_all = False
np.random.seed(seed)
while simulated_points_all == False:
x = np.random.uniform(area[0], area[2], 1)
y = np.random.uniform(area[1], area[3], 1)
point = Point(x,y)
if in_buff.intersects(point)[0]:
simulated_points_list.append(point)
if len(simulated_points_list) == needed:
simulated_points_all = True
simulated_points = gpd.GeoDataFrame(simulated_points_list,
columns=['geometry'],
crs=in_buff.crs)
if to_file:
simulated_points.to_file(data_path_out+to_file+".shp")
return simulated_points
In [14]:
in_file = data_path_in+'WAVERLY/WAVERLY.shp'
ntw_W = ps.Network(in_file)
shp_W = ps.open(in_file)
geo_W = gpd.read_file(in_file)
streets_buffer = get_streets_buffer(geo_W, 75)
supply_nodes_needed = 100
demand_nodes_needed = 100
simulated_supply_nodes = simulated_geo_points(streets_buffer.geometry,
shp_W.bbox,
needed=supply_nodes_needed,
to_file="simulated_supply_nodes")
simulated_demand_nodes = simulated_geo_points(streets_buffer.geometry,
shp_W.bbox,
needed=demand_nodes_needed,
seed=226,
to_file="simulated_demand_nodes")
In [15]:
base = streets_buffer.plot(cmap=ListedColormap('y'),
linewidth=.25,
alpha=.5)
geo_W.plot(ax=base,
linewidth=2,
cmap=ListedColormap('k'))
simulated_supply_nodes.plot(ax=base,
markersize=5,
cmap=ListedColormap('red'))
simulated_demand_nodes.plot(ax=base,
markersize=5,
cmap=ListedColormap('blue'))
Out[15]:
In [17]:
# Snap, Create frames of points, and visualize connections
ntw_W.snapobservations(data_path_out+'simulated_supply_nodes.shp',
'supply', attribute=True)
ntw_W.snapobservations(data_path_out+'simulated_demand_nodes.shp',
'demand', attribute=True)
snapped_supply_list = [Point(j[0], j[1])\
for i,j in ntw_W.pointpatterns['supply'].snapped_coordinates.iteritems()]
snapped_supply = gpd.GeoDataFrame(snapped_supply_list, columns=["geometry"])
snapped_demand_list = [Point(j[0], j[1])\
for i,j in ntw_W.pointpatterns['demand'].snapped_coordinates.iteritems()]
snapped_demand = gpd.GeoDataFrame(snapped_demand_list, columns=["geometry"])
r_supply = range(supply_nodes_needed)
supply_connectors = [LineString((snapped_supply.geometry[p],simulated_supply_nodes.geometry[p]))\
for p in r_supply]
supply_connectors = gpd.GeoDataFrame(supply_connectors, columns=["geometry"])
r_demand = range(demand_nodes_needed)
demand_connectors = [LineString((snapped_demand.geometry[p], simulated_demand_nodes.geometry[p]))\
for p in r_demand]
demand_connectors = gpd.GeoDataFrame(demand_connectors, columns=["geometry"])
In [25]:
base = streets_buffer.plot(cmap=ListedColormap('y'),
linewidth=.25,
alpha=.5)
geo_W.plot(ax=base,
linewidth=1,
cmap=ListedColormap('k'))
simulated_supply_nodes.plot(ax=base,
markersize=5,
cmap=ListedColormap('red'),
alpha=.25)
simulated_demand_nodes.plot(ax=base,
markersize=5,
cmap=ListedColormap('blue'),
alpha=.25)
snapped_supply.plot(ax=base,
markersize=5,
cmap=ListedColormap('red'))
snapped_demand.plot(ax=base,
markersize=5,
cmap=ListedColormap('blue'))
supply_connectors.plot(ax=base,
linewidth=2,
cmap=ListedColormap('k'))
demand_connectors.plot(ax=base,
linewidth=2,
cmap=ListedColormap('k'))
Out[25]:
In [26]:
All_Neigh_Dist = ntw_W.allneighbordistances(sourcepattern=ntw_W.pointpatterns['supply'],
destpattern=ntw_W.pointpatterns['demand']) # in meters
All_Dist_MILES = All_Neigh_Dist * mile
In [20]:
sv, cm, rt = transportation_simplex(len(All_Dist_MILES), len(All_Dist_MILES[0]), seed=2,
relaxed=True,
costmatrix=All_Dist_MILES,
simulated_sd="lowhigh",
name="TransportationProblem",
output=1,
mipfocus=2,
method=methods["primal simplex"],
timelimit=1)
solution_info(sv, cm, rt)
In [21]:
dimensions = 500
dimensions_range = range(2,dimensions,50)
numpy_arrays = [np.zeros([d,d]) for d in dimensions_range]
array_range = range(len(numpy_arrays))
dask_arrays = [da.from_array(numpy_arrays[array], chunks=array+1) for array in array_range]
size_dataframe = pd.DataFrame()
size_dataframe["Dimensions"] = dimensions_range
size_dataframe["Numpy"] = [sys.getsizeof(numpy_arrays[array])*GB for array in array_range]
size_dataframe["Dask"] = [sys.getsizeof(dask_arrays[array])*GB for array in array_range]
size_dataframe.head()
Out[21]:
In [22]:
base = size_dataframe.plot("Dimensions", "Numpy")
size_dataframe.plot("Dimensions", "Dask", ax=base)
base.set_title("Ram Disparity: Numpy vs. Dask")
base.set_xlabel('Cost Matrix Dimensions')
base.set_ylabel('RAM (GB)')
Out[22]:
In [ ]:
In [ ]:
In [23]:
spec_index = ['Python version', 'Python compiler', 'Gurobi version', 'PuLP version',\
'NumPy version', 'Pandas version', 'Dask version', 'GeoPandas version',\
'PySAl version', 'Platform', \
'Cores available', 'CPU Time %']
spec_info = [platform.python_version(), platform.python_compiler(),\
grb.gurobi.version(), pulp.VERSION,\
np.__version__, pd.__version__, dask.__version__,\
gpd.__version__, ps.version,\
platform.platform(), psutil.cpu_count(),\
psutil.cpu_times_percent(),]
system_specs = pd.DataFrame(index=spec_index)
system_specs['System & Version Specs'] = spec_info
system_specs