In [3]:
import numpy as np
In [149]:
x = np.arange(0.,80.,0.01)
In [150]:
y = np.sin(x)
In [151]:
%pylab inline
import pylab as plb
Populating the interactive namespace from numpy and matplotlib
In [152]:
plb.plot(x,y)
Out[152]:
[<matplotlib.lines.Line2D at 0x10b05eed0>]
In [153]:
y_rand = y + 0.1 * np.random.randn(len(y))
In [154]:
plb.plot(x,y_rand)
Out[154]:
[<matplotlib.lines.Line2D at 0x10b0de090>]
In [155]:
from scipy import optimize
In [71]:
help(optimize)
Help on package scipy.optimize in scipy:
NAME
scipy.optimize
FILE
/Users/mlightma/anaconda/lib/python2.7/site-packages/scipy/optimize/__init__.py
DESCRIPTION
=====================================================
Optimization and root finding (:mod:`scipy.optimize`)
=====================================================
.. currentmodule:: scipy.optimize
Optimization
============
General-purpose
---------------
.. autosummary::
:toctree: generated/
minimize - Unified interface for minimizers of multivariate functions
fmin - Nelder-Mead Simplex algorithm
fmin_powell - Powell's (modified) level set method
fmin_cg - Non-linear (Polak-Ribiere) conjugate gradient algorithm
fmin_bfgs - Quasi-Newton method (Broydon-Fletcher-Goldfarb-Shanno)
fmin_ncg - Line-search Newton Conjugate Gradient
leastsq - Minimize the sum of squares of M equations in N unknowns
Constrained (multivariate)
--------------------------
.. autosummary::
:toctree: generated/
fmin_l_bfgs_b - Zhu, Byrd, and Nocedal's constrained optimizer
fmin_tnc - Truncated Newton code
fmin_cobyla - Constrained optimization by linear approximation
fmin_slsqp - Minimization using sequential least-squares programming
nnls - Linear least-squares problem with non-negativity constraint
Global
------
.. autosummary::
:toctree: generated/
anneal - Simulated annealing
basinhopping - Basinhopping stochastic optimizer
brute - Brute force searching optimizer
Scalar function minimizers
--------------------------
.. autosummary::
:toctree: generated/
minimize_scalar - Unified interface for minimizers of univariate functions
fminbound - Bounded minimization of a scalar function
brent - 1-D function minimization using Brent method
golden - 1-D function minimization using Golden Section method
bracket - Bracket a minimum, given two starting points
Rosenbrock function
-------------------
.. autosummary::
:toctree: generated/
rosen - The Rosenbrock function.
rosen_der - The derivative of the Rosenbrock function.
rosen_hess - The Hessian matrix of the Rosenbrock function.
rosen_hess_prod - Product of the Rosenbrock Hessian with a vector.
Fitting
=======
.. autosummary::
:toctree: generated/
curve_fit -- Fit curve to a set of points
Root finding
============
Scalar functions
----------------
.. autosummary::
:toctree: generated/
brentq - quadratic interpolation Brent method
brenth - Brent method, modified by Harris with hyperbolic extrapolation
ridder - Ridder's method
bisect - Bisection method
newton - Secant method or Newton's method
Fixed point finding:
.. autosummary::
:toctree: generated/
fixed_point - Single-variable fixed-point solver
Multidimensional
----------------
General nonlinear solvers:
.. autosummary::
:toctree: generated/
root - Unified interface for nonlinear solvers of multivariate functions
fsolve - Non-linear multi-variable equation solver
broyden1 - Broyden's first method
broyden2 - Broyden's second method
Large-scale nonlinear solvers:
.. autosummary::
:toctree: generated/
newton_krylov
anderson
Simple iterations:
.. autosummary::
:toctree: generated/
excitingmixing
linearmixing
diagbroyden
:mod:`Additional information on the nonlinear solvers <scipy.optimize.nonlin>`
Utility Functions
=================
.. autosummary::
:toctree: generated/
line_search - Return a step that satisfies the strong Wolfe conditions
check_grad - Check the supplied derivative using finite differences
show_options - Show specific options optimization solvers
PACKAGE CONTENTS
_basinhopping
_cobyla
_lbfgsb
_minimize
_minpack
_nnls
_root
_slsqp
_trustregion
_trustregion_dogleg
_trustregion_ncg
_tstutils
_zeros
anneal
cobyla
lbfgsb
linesearch
minpack
minpack2
moduleTNC
nnls
nonlin
optimize
setup
slsqp
tnc
zeros
CLASSES
__builtin__.dict(__builtin__.object)
scipy.optimize.optimize.Result
exceptions.UserWarning(exceptions.Warning)
scipy.optimize.optimize.OptimizeWarning
class OptimizeWarning(exceptions.UserWarning)
| Method resolution order:
| OptimizeWarning
| exceptions.UserWarning
| exceptions.Warning
| exceptions.Exception
| exceptions.BaseException
| __builtin__.object
|
| Data descriptors defined here:
|
| __weakref__
| list of weak references to the object (if defined)
|
| ----------------------------------------------------------------------
| Methods inherited from exceptions.UserWarning:
|
| __init__(...)
| x.__init__(...) initializes x; see help(type(x)) for signature
|
| ----------------------------------------------------------------------
| Data and other attributes inherited from exceptions.UserWarning:
|
| __new__ = <built-in method __new__ of type object>
| T.__new__(S, ...) -> a new object with type S, a subtype of T
|
| ----------------------------------------------------------------------
| Methods inherited from exceptions.BaseException:
|
| __delattr__(...)
| x.__delattr__('name') <==> del x.name
|
| __getattribute__(...)
| x.__getattribute__('name') <==> x.name
|
| __getitem__(...)
| x.__getitem__(y) <==> x[y]
|
| __getslice__(...)
| x.__getslice__(i, j) <==> x[i:j]
|
| Use of negative indices is not supported.
|
| __reduce__(...)
|
| __repr__(...)
| x.__repr__() <==> repr(x)
|
| __setattr__(...)
| x.__setattr__('name', value) <==> x.name = value
|
| __setstate__(...)
|
| __str__(...)
| x.__str__() <==> str(x)
|
| __unicode__(...)
|
| ----------------------------------------------------------------------
| Data descriptors inherited from exceptions.BaseException:
|
| __dict__
|
| args
|
| message
class Result(__builtin__.dict)
| Represents the optimization result.
|
| Attributes
| ----------
| x : ndarray
| The solution of the optimization.
| success : bool
| Whether or not the optimizer exited successfully.
| status : int
| Termination status of the optimizer. Its value depends on the
| underlying solver. Refer to `message` for details.
| message : str
| Description of the cause of the termination.
| fun, jac, hess, hess_inv : ndarray
| Values of objective function, Jacobian, Hessian or its inverse (if
| available). The Hessians may be approximations, see the documentation
| of the function in question.
| nfev, njev, nhev : int
| Number of evaluations of the objective functions and of its
| Jacobian and Hessian.
| nit : int
| Number of iterations performed by the optimizer.
| maxcv : float
| The maximum constraint violation.
|
| Notes
| -----
| There may be additional attributes not listed above depending of the
| specific solver. Since this class is essentially a subclass of dict
| with attribute accessors, one can see which attributes are available
| using the `keys()` method.
|
| Method resolution order:
| Result
| __builtin__.dict
| __builtin__.object
|
| Methods defined here:
|
| __delattr__ = __delitem__(...)
| x.__delitem__(y) <==> del x[y]
|
| __getattr__(self, name)
|
| __repr__(self)
|
| __setattr__ = __setitem__(...)
| x.__setitem__(i, y) <==> x[i]=y
|
| ----------------------------------------------------------------------
| Data descriptors defined here:
|
| __dict__
| dictionary for instance variables (if defined)
|
| __weakref__
| list of weak references to the object (if defined)
|
| ----------------------------------------------------------------------
| Methods inherited from __builtin__.dict:
|
| __cmp__(...)
| x.__cmp__(y) <==> cmp(x,y)
|
| __contains__(...)
| D.__contains__(k) -> True if D has a key k, else False
|
| __delitem__(...)
| x.__delitem__(y) <==> del x[y]
|
| __eq__(...)
| x.__eq__(y) <==> x==y
|
| __ge__(...)
| x.__ge__(y) <==> x>=y
|
| __getattribute__(...)
| x.__getattribute__('name') <==> x.name
|
| __getitem__(...)
| x.__getitem__(y) <==> x[y]
|
| __gt__(...)
| x.__gt__(y) <==> x>y
|
| __init__(...)
| x.__init__(...) initializes x; see help(type(x)) for signature
|
| __iter__(...)
| x.__iter__() <==> iter(x)
|
| __le__(...)
| x.__le__(y) <==> x<=y
|
| __len__(...)
| x.__len__() <==> len(x)
|
| __lt__(...)
| x.__lt__(y) <==> x<y
|
| __ne__(...)
| x.__ne__(y) <==> x!=y
|
| __setitem__(...)
| x.__setitem__(i, y) <==> x[i]=y
|
| __sizeof__(...)
| D.__sizeof__() -> size of D in memory, in bytes
|
| clear(...)
| D.clear() -> None. Remove all items from D.
|
| copy(...)
| D.copy() -> a shallow copy of D
|
| fromkeys(...)
| dict.fromkeys(S[,v]) -> New dict with keys from S and values equal to v.
| v defaults to None.
|
| get(...)
| D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.
|
| has_key(...)
| D.has_key(k) -> True if D has a key k, else False
|
| items(...)
| D.items() -> list of D's (key, value) pairs, as 2-tuples
|
| iteritems(...)
| D.iteritems() -> an iterator over the (key, value) items of D
|
| iterkeys(...)
| D.iterkeys() -> an iterator over the keys of D
|
| itervalues(...)
| D.itervalues() -> an iterator over the values of D
|
| keys(...)
| D.keys() -> list of D's keys
|
| pop(...)
| D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
| If key is not found, d is returned if given, otherwise KeyError is raised
|
| popitem(...)
| D.popitem() -> (k, v), remove and return some (key, value) pair as a
| 2-tuple; but raise KeyError if D is empty.
|
| setdefault(...)
| D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D
|
| update(...)
| D.update([E, ]**F) -> None. Update D from dict/iterable E and F.
| If E present and has a .keys() method, does: for k in E: D[k] = E[k]
| If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v
| In either case, this is followed by: for k in F: D[k] = F[k]
|
| values(...)
| D.values() -> list of D's values
|
| viewitems(...)
| D.viewitems() -> a set-like object providing a view on D's items
|
| viewkeys(...)
| D.viewkeys() -> a set-like object providing a view on D's keys
|
| viewvalues(...)
| D.viewvalues() -> an object providing a view on D's values
|
| ----------------------------------------------------------------------
| Data and other attributes inherited from __builtin__.dict:
|
| __hash__ = None
|
| __new__ = <built-in method __new__ of type object>
| T.__new__(S, ...) -> a new object with type S, a subtype of T
FUNCTIONS
anderson(F, xin, iter=None, alpha=None, w0=0.01, M=5, verbose=False, maxiter=None, f_tol=None, f_rtol=None, x_tol=None, x_rtol=None, tol_norm=None, line_search='armijo', callback=None, **kw)
Find a root of a function, using (extended) Anderson mixing.
The Jacobian is formed by for a 'best' solution in the space
spanned by last `M` vectors. As a result, only a MxM matrix
inversions and MxN multiplications are required. [Ey]_
Parameters
----------
F : function(x) -> f
Function whose root to find; should take and return an array-like
object.
x0 : array_like
Initial guess for the solution
alpha : float, optional
Initial guess for the Jacobian is (-1/alpha).
M : float, optional
Number of previous vectors to retain. Defaults to 5.
w0 : float, optional
Regularization parameter for numerical stability.
Compared to unity, good values of the order of 0.01.
iter : int, optional
Number of iterations to make. If omitted (default), make as many
as required to meet tolerances.
verbose : bool, optional
Print status to stdout on every iteration.
maxiter : int, optional
Maximum number of iterations to make. If more are needed to
meet convergence, `NoConvergence` is raised.
f_tol : float, optional
Absolute tolerance (in max-norm) for the residual.
If omitted, default is 6e-6.
f_rtol : float, optional
Relative tolerance for the residual. If omitted, not used.
x_tol : float, optional
Absolute minimum step size, as determined from the Jacobian
approximation. If the step size is smaller than this, optimization
is terminated as successful. If omitted, not used.
x_rtol : float, optional
Relative minimum step size. If omitted, not used.
tol_norm : function(vector) -> scalar, optional
Norm to use in convergence check. Default is the maximum norm.
line_search : {None, 'armijo' (default), 'wolfe'}, optional
Which type of a line search to use to determine the step size in the
direction given by the Jacobian approximation. Defaults to 'armijo'.
callback : function, optional
Optional callback function. It is called on every iteration as
``callback(x, f)`` where `x` is the current solution and `f`
the corresponding residual.
Returns
-------
sol : ndarray
An array (of similar array type as `x0`) containing the final solution.
Raises
------
NoConvergence
When a solution was not found.
References
----------
.. [Ey] V. Eyert, J. Comp. Phys., 124, 271 (1996).
anneal(func, x0, args=(), schedule='fast', full_output=0, T0=None, Tf=1e-12, maxeval=None, maxaccept=None, maxiter=400, boltzmann=1.0, learn_rate=0.5, feps=1e-06, quench=1.0, m=1.0, n=1.0, lower=-100, upper=100, dwell=50, disp=True)
Minimize a function using simulated annealing.
Uses simulated annealing, a random algorithm that uses no derivative
information from the function being optimized. Other names for this
family of approaches include: "Monte Carlo", "Metropolis",
"Metropolis-Hastings", `etc`. They all involve (a) evaluating the
objective function on a random set of points, (b) keeping those that
pass their randomized evaluation critera, (c) cooling (`i.e.`,
tightening) the evaluation critera, and (d) repeating until their
termination critera are met. In practice they have been used mainly in
discrete rather than in continuous optimization.
Available annealing schedules are 'fast', 'cauchy' and 'boltzmann'.
Parameters
----------
func : callable
The objective function to be minimized. Must be in the form
`f(x, *args)`, where `x` is the argument in the form of a 1-D array
and `args` is a tuple of any additional fixed parameters needed to
completely specify the function.
x0: 1-D array
An initial guess at the optimizing argument of `func`.
args : tuple, optional
Any additional fixed parameters needed to completely
specify the objective function.
schedule : str, optional
The annealing schedule to use. Must be one of 'fast', 'cauchy' or
'boltzmann'. See `Notes`.
full_output : bool, optional
If `full_output`, then return all values listed in the Returns
section. Otherwise, return just the `xmin` and `status` values.
T0 : float, optional
The initial "temperature". If None, then estimate it as 1.2 times
the largest cost-function deviation over random points in the
box-shaped region specified by the `lower, upper` input parameters.
Tf : float, optional
Final goal temperature. Cease iterations if the temperature
falls below `Tf`.
maxeval : int, optional
Cease iterations if the number of function evaluations exceeds
`maxeval`.
maxaccept : int, optional
Cease iterations if the number of points accepted exceeds `maxaccept`.
See `Notes` for the probabilistic acceptance criteria used.
maxiter : int, optional
Cease iterations if the number of cooling iterations exceeds `maxiter`.
learn_rate : float, optional
Scale constant for tuning the probabilistc acceptance criteria.
boltzmann : float, optional
Boltzmann constant in the probabilistic acceptance criteria
(increase for less stringent criteria at each temperature).
feps : float, optional
Cease iterations if the relative errors in the function value over the
last four coolings is below `feps`.
quench, m, n : floats, optional
Parameters to alter the `fast` simulated annealing schedule.
See `Notes`.
lower, upper : floats or 1-D arrays, optional
Lower and upper bounds on the argument `x`. If floats are provided,
they apply to all components of `x`.
dwell : int, optional
The number of times to execute the inner loop at each value of the
temperature. See `Notes`.
disp : bool, optional
Print a descriptive convergence message if True.
Returns
-------
xmin : ndarray
The point where the lowest function value was found.
Jmin : float
The objective function value at `xmin`.
T : float
The temperature at termination of the iterations.
feval : int
Number of function evaluations used.
iters : int
Number of cooling iterations used.
accept : int
Number of tests accepted.
status : int
A code indicating the reason for termination:
- 0 : Points no longer changing.
- 1 : Cooled to final temperature.
- 2 : Maximum function evaluations reached.
- 3 : Maximum cooling iterations reached.
- 4 : Maximum accepted query locations reached.
- 5 : Final point not the minimum amongst encountered points.
See Also
--------
basinhopping : another (more performant) global optimizer
brute : brute-force global optimizer
Notes
-----
Simulated annealing is a random algorithm which uses no derivative
information from the function being optimized. In practice it has
been more useful in discrete optimization than continuous
optimization, as there are usually better algorithms for continuous
optimization problems.
Some experimentation by trying the different temperature
schedules and altering their parameters is likely required to
obtain good performance.
The randomness in the algorithm comes from random sampling in numpy.
To obtain the same results you can call `numpy.random.seed` with the
same seed immediately before calling `anneal`.
We give a brief description of how the three temperature schedules
generate new points and vary their temperature. Temperatures are
only updated with iterations in the outer loop. The inner loop is
over loop over ``xrange(dwell)``, and new points are generated for
every iteration in the inner loop. Whether the proposed new points
are accepted is probabilistic.
For readability, let ``d`` denote the dimension of the inputs to func.
Also, let ``x_old`` denote the previous state, and ``k`` denote the
iteration number of the outer loop. All other variables not
defined below are input variables to `anneal` itself.
In the 'fast' schedule the updates are::
u ~ Uniform(0, 1, size = d)
y = sgn(u - 0.5) * T * ((1 + 1/T)**abs(2*u - 1) - 1.0)
xc = y * (upper - lower)
x_new = x_old + xc
c = n * exp(-n * quench)
T_new = T0 * exp(-c * k**quench)
In the 'cauchy' schedule the updates are::
u ~ Uniform(-pi/2, pi/2, size=d)
xc = learn_rate * T * tan(u)
x_new = x_old + xc
T_new = T0 / (1 + k)
In the 'boltzmann' schedule the updates are::
std = minimum(sqrt(T) * ones(d), (upper - lower) / (3*learn_rate))
y ~ Normal(0, std, size = d)
x_new = x_old + learn_rate * y
T_new = T0 / log(1 + k)
References
----------
[1] P. J. M. van Laarhoven and E. H. L. Aarts, "Simulated Annealing: Theory
and Applications", Kluwer Academic Publishers, 1987.
[2] W.H. Press et al., "Numerical Recipies: The Art of Scientific Computing",
Cambridge U. Press, 1987.
Examples
--------
*Example 1.* We illustrate the use of `anneal` to seek the global minimum
of a function of two variables that is equal to the sum of a positive-
definite quadratic and two deep "Gaussian-shaped" craters. Specifically,
define the objective function `f` as the sum of three other functions,
``f = f1 + f2 + f3``. We suppose each of these has a signature
``(z, *params)``, where ``z = (x, y)``, ``params``, and the functions are
as defined below.
>>> params = (2, 3, 7, 8, 9, 10, 44, -1, 2, 26, 1, -2, 0.5)
>>> def f1(z, *params):
... x, y = z
... a, b, c, d, e, f, g, h, i, j, k, l, scale = params
... return (a * x**2 + b * x * y + c * y**2 + d*x + e*y + f)
>>> def f2(z, *params):
... x, y = z
... a, b, c, d, e, f, g, h, i, j, k, l, scale = params
... return (-g*np.exp(-((x-h)**2 + (y-i)**2) / scale))
>>> def f3(z, *params):
... x, y = z
... a, b, c, d, e, f, g, h, i, j, k, l, scale = params
... return (-j*np.exp(-((x-k)**2 + (y-l)**2) / scale))
>>> def f(z, *params):
... x, y = z
... a, b, c, d, e, f, g, h, i, j, k, l, scale = params
... return f1(z, *params) + f2(z, *params) + f3(z, *params)
>>> x0 = np.array([2., 2.]) # Initial guess.
>>> from scipy import optimize
>>> np.random.seed(555) # Seeded to allow replication.
>>> res = optimize.anneal(f, x0, args=params, schedule='boltzmann',
full_output=True, maxiter=500, lower=-10,
upper=10, dwell=250, disp=True)
Warning: Maximum number of iterations exceeded.
>>> res[0] # obtained minimum
array([-1.03914194, 1.81330654])
>>> res[1] # function value at minimum
-3.3817...
So this run settled on the point [-1.039, 1.813] with a minimum function
value of about -3.382. The final temperature was about 212. The run used
125301 function evaluations, 501 iterations (including the initial guess as
a iteration), and accepted 61162 points. The status flag of 3 also
indicates that `maxiter` was reached.
This problem's true global minimum lies near the point [-1.057, 1.808]
and has a value of about -3.409. So these `anneal` results are pretty
good and could be used as the starting guess in a local optimizer to
seek a more exact local minimum.
*Example 2.* To minimize the same objective function using
the `minimize` approach, we need to (a) convert the options to an
"options dictionary" using the keys prescribed for this method,
(b) call the `minimize` function with the name of the method (which
in this case is 'Anneal'), and (c) take account of the fact that
the returned value will be a `Result` object (`i.e.`, a dictionary,
as defined in `optimize.py`).
All of the allowable options for 'Anneal' when using the `minimize`
approach are listed in the ``myopts`` dictionary given below, although
in practice only the non-default values would be needed. Some of their
names differ from those used in the `anneal` approach. We can proceed
as follows:
>>> myopts = {
'schedule' : 'boltzmann', # Non-default value.
'maxfev' : None, # Default, formerly `maxeval`.
'maxiter' : 500, # Non-default value.
'maxaccept' : None, # Default value.
'ftol' : 1e-6, # Default, formerly `feps`.
'T0' : None, # Default value.
'Tf' : 1e-12, # Default value.
'boltzmann' : 1.0, # Default value.
'learn_rate' : 0.5, # Default value.
'quench' : 1.0, # Default value.
'm' : 1.0, # Default value.
'n' : 1.0, # Default value.
'lower' : -10, # Non-default value.
'upper' : +10, # Non-default value.
'dwell' : 250, # Non-default value.
'disp' : True # Default value.
}
>>> from scipy import optimize
>>> np.random.seed(777) # Seeded to allow replication.
>>> res2 = optimize.minimize(f, x0, args=params, method='Anneal',
options=myopts)
Warning: Maximum number of iterations exceeded.
>>> res2
status: 3
success: False
accept: 61742
nfev: 125301
T: 214.20624873839623
fun: -3.4084065576676053
x: array([-1.05757366, 1.8071427 ])
message: 'Maximum cooling iterations reached'
nit: 501
approx_fprime(xk, f, epsilon, *args)
Finite-difference approximation of the gradient of a scalar function.
Parameters
----------
xk : array_like
The coordinate vector at which to determine the gradient of `f`.
f : callable
The function of which to determine the gradient (partial derivatives).
Should take `xk` as first argument, other arguments to `f` can be
supplied in ``*args``. Should return a scalar, the value of the
function at `xk`.
epsilon : array_like
Increment to `xk` to use for determining the function gradient.
If a scalar, uses the same finite difference delta for all partial
derivatives. If an array, should contain one value per element of
`xk`.
\*args : args, optional
Any other arguments that are to be passed to `f`.
Returns
-------
grad : ndarray
The partial derivatives of `f` to `xk`.
See Also
--------
check_grad : Check correctness of gradient function against approx_fprime.
Notes
-----
The function gradient is determined by the forward finite difference
formula::
f(xk[i] + epsilon[i]) - f(xk[i])
f'[i] = ---------------------------------
epsilon[i]
The main use of `approx_fprime` is in scalar function optimizers like
`fmin_bfgs`, to determine numerically the Jacobian of a function.
Examples
--------
>>> from scipy import optimize
>>> def func(x, c0, c1):
... "Coordinate vector `x` should be an array of size two."
... return c0 * x[0]**2 + c1*x[1]**2
>>> x = np.ones(2)
>>> c0, c1 = (1, 200)
>>> eps = np.sqrt(np.finfo(np.float).eps)
>>> optimize.approx_fprime(x, func, [eps, np.sqrt(200) * eps], c0, c1)
array([ 2. , 400.00004198])
basinhopping(func, x0, niter=100, T=1.0, stepsize=0.5, minimizer_kwargs=None, take_step=None, accept_test=None, callback=None, interval=50, disp=False, niter_success=None)
Find the global minimum of a function using the basin-hopping algorithm
.. versionadded:: 0.12.0
Parameters
----------
func : callable ``f(x, *args)``
Function to be optimized. ``args`` can be passed as an optional item
in the dict ``minimizer_kwargs``
x0 : ndarray
Initial guess.
niter : integer, optional
The number of basin hopping iterations
T : float, optional
The "temperature" parameter for the accept or reject criterion. Higher
"temperatures" mean that larger jumps in function value will be
accepted. For best results ``T`` should be comparable to the
separation
(in function value) between local minima.
stepsize : float, optional
initial step size for use in the random displacement.
minimizer_kwargs : dict, optional
Extra keyword arguments to be passed to the minimizer
``scipy.optimize.minimize()`` Some important options could be:
method : str
The minimization method (e.g. ``"L-BFGS-B"``)
args : tuple
Extra arguments passed to the objective function (``func``) and
its derivatives (Jacobian, Hessian).
take_step : callable ``take_step(x)``, optional
Replace the default step taking routine with this routine. The default
step taking routine is a random displacement of the coordinates, but
other step taking algorithms may be better for some systems.
``take_step`` can optionally have the attribute ``take_step.stepsize``.
If this attribute exists, then ``basinhopping`` will adjust
``take_step.stepsize`` in order to try to optimize the global minimum
search.
accept_test : callable, ``accept_test(f_new=f_new, x_new=x_new, f_old=fold, x_old=x_old)``, optional
Define a test which will be used to judge whether or not to accept the
step. This will be used in addition to the Metropolis test based on
"temperature" ``T``. The acceptable return values are True,
False, or ``"force accept"``. If the latter, then this will
override any other tests in order to accept the step. This can be
used, for example, to forcefully escape from a local minimum that
``basinhopping`` is trapped in.
callback : callable, ``callback(x, f, accept)``, optional
A callback function which will be called for all minimum found. ``x``
and ``f`` are the coordinates and function value of the trial minima,
and ``accept`` is whether or not that minima was accepted. This can be
used, for example, to save the lowest N minima found. Also,
``callback`` can be used to specify a user defined stop criterion by
optionally returning True to stop the ``basinhopping`` routine.
interval : integer, optional
interval for how often to update the ``stepsize``
disp : bool, optional
Set to True to print status messages
niter_success : integer, optional
Stop the run if the global minimum candidate remains the same for this
number of iterations.
Returns
-------
res : Result
The optimization result represented as a ``Result`` object. Important
attributes are: ``x`` the solution array, ``fun`` the value of the
function at the solution, and ``message`` which describes the cause of
the termination. See `Result` for a description of other attributes.
See Also
--------
minimize :
The local minimization function called once for each basinhopping step.
``minimizer_kwargs`` is passed to this routine.
Notes
-----
Basin-hopping is a stochastic algorithm which attempts to find the global
minimum of a smooth scalar function of one or more variables [1]_ [2]_ [3]_
[4]_. The algorithm in its current form was described by David Wales and
Jonathan Doye [2]_ http://www-wales.ch.cam.ac.uk/.
The algorithm is iterative with each cycle composed of the following
features
1) random perturbation of the coordinates
2) local minimization
3) accept or reject the new coordinates based on the minimized function
value
The acceptance test used here is the Metropolis criterion of standard Monte
Carlo algorithms, although there are many other possibilities [3]_.
This global minimization method has been shown to be extremely efficient
for a wide variety of problems in physics and chemistry. It is
particularly useful when the function has many minima separated by large
barriers. See the Cambridge Cluster Database
http://www-wales.ch.cam.ac.uk/CCD.html for databases of molecular systems
that have been optimized primarily using basin-hopping. This database
includes minimization problems exceeding 300 degrees of freedom.
See the free software program GMIN (http://www-wales.ch.cam.ac.uk/GMIN) for
a Fortran implementation of basin-hopping. This implementation has many
different variations of the procedure described above, including more
advanced step taking algorithms and alternate acceptance criterion.
For stochastic global optimization there is no way to determine if the true
global minimum has actually been found. Instead, as a consistency check,
the algorithm can be run from a number of different random starting points
to ensure the lowest minimum found in each example has converged to the
global minimum. For this reason ``basinhopping`` will by default simply
run for the number of iterations ``niter`` and return the lowest minimum
found. It is left to the user to ensure that this is in fact the global
minimum.
Choosing ``stepsize``: This is a crucial parameter in ``basinhopping`` and
depends on the problem being solved. Ideally it should be comparable to
the typical separation between local minima of the function being
optimized. ``basinhopping`` will, by default, adjust ``stepsize`` to find
an optimal value, but this may take many iterations. You will get quicker
results if you set a sensible value for ``stepsize``.
Choosing ``T``: The parameter ``T`` is the temperature used in the
metropolis criterion. Basinhopping steps are accepted with probability
``1`` if ``func(xnew) < func(xold)``, or otherwise with probability::
exp( -(func(xnew) - func(xold)) / T )
So, for best results, ``T`` should to be comparable to the typical
difference in function value between between local minima
References
----------
.. [1] Wales, David J. 2003, Energy Landscapes, Cambridge University Press,
Cambridge, UK.
.. [2] Wales, D J, and Doye J P K, Global Optimization by Basin-Hopping and
the Lowest Energy Structures of Lennard-Jones Clusters Containing up to
110 Atoms. Journal of Physical Chemistry A, 1997, 101, 5111.
.. [3] Li, Z. and Scheraga, H. A., Monte Carlo-minimization approach to the
multiple-minima problem in protein folding, Proc. Natl. Acad. Sci. USA,
1987, 84, 6611.
.. [4] Wales, D. J. and Scheraga, H. A., Global optimization of clusters,
crystals, and biomolecules, Science, 1999, 285, 1368.
Examples
--------
The following example is a one-dimensional minimization problem, with many
local minima superimposed on a parabola.
>>> func = lambda x: cos(14.5 * x - 0.3) + (x + 0.2) * x
>>> x0=[1.]
Basinhopping, internally, uses a local minimization algorithm. We will use
the parameter ``minimizer_kwargs`` to tell basinhopping which algorithm to
use and how to set up that minimizer. This parameter will be passed to
``scipy.optimize.minimize()``.
>>> minimizer_kwargs = {"method": "BFGS"}
>>> ret = basinhopping(func, x0, minimizer_kwargs=minimizer_kwargs,
... niter=200)
>>> print("global minimum: x = %.4f, f(x0) = %.4f" % (ret.x, ret.fun))
global minimum: x = -0.1951, f(x0) = -1.0009
Next consider a two-dimensional minimization problem. Also, this time we
will use gradient information to significantly speed up the search.
>>> def func2d(x):
... f = cos(14.5 * x[0] - 0.3) + (x[1] + 0.2) * x[1] + (x[0] +
... 0.2) * x[0]
... df = np.zeros(2)
... df[0] = -14.5 * sin(14.5 * x[0] - 0.3) + 2. * x[0] + 0.2
... df[1] = 2. * x[1] + 0.2
... return f, df
We'll also use a different local minimization algorithm. Also we must tell
the minimizer that our function returns both energy and gradient (jacobian)
>>> minimizer_kwargs = {"method":"L-BFGS-B", "jac":True}
>>> x0 = [1.0, 1.0]
>>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
... niter=200)
>>> print("global minimum: x = [%.4f, %.4f], f(x0) = %.4f" % (ret.x[0],
... ret.x[1],
... ret.fun))
global minimum: x = [-0.1951, -0.1000], f(x0) = -1.0109
Here is an example using a custom step taking routine. Imagine you want
the first coordinate to take larger steps then the rest of the coordinates.
This can be implemented like so:
>>> class MyTakeStep(object):
... def __init__(self, stepsize=0.5):
... self.stepsize = stepsize
... def __call__(self, x):
... s = self.stepsize
... x[0] += np.random.uniform(-2.*s, 2.*s)
... x[1:] += np.random.uniform(-s, s, x[1:].shape)
... return x
Since ``MyTakeStep.stepsize`` exists basinhopping will adjust the magnitude
of ``stepsize`` to optimize the search. We'll use the same 2-D function as
before
>>> mytakestep = MyTakeStep()
>>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
... niter=200, take_step=mytakestep)
>>> print("global minimum: x = [%.4f, %.4f], f(x0) = %.4f" % (ret.x[0],
... ret.x[1],
... ret.fun))
global minimum: x = [-0.1951, -0.1000], f(x0) = -1.0109
Now let's do an example using a custom callback function which prints the
value of every minimum found
>>> def print_fun(x, f, accepted):
... print("at minima %.4f accepted %d" % (f, int(accepted)))
We'll run it for only 10 basinhopping steps this time.
>>> np.random.seed(1)
>>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
... niter=10, callback=print_fun)
at minima 0.4159 accepted 1
at minima -0.9073 accepted 1
at minima -0.1021 accepted 1
at minima -0.1021 accepted 1
at minima 0.9102 accepted 1
at minima 0.9102 accepted 1
at minima 2.2945 accepted 0
at minima -0.1021 accepted 1
at minima -1.0109 accepted 1
at minima -1.0109 accepted 1
The minima at -1.0109 is actually the global minimum, found already on the
8th iteration.
Now let's implement bounds on the problem using a custom ``accept_test``:
>>> class MyBounds(object):
... def __init__(self, xmax=[1.1,1.1], xmin=[-1.1,-1.1] ):
... self.xmax = np.array(xmax)
... self.xmin = np.array(xmin)
... def __call__(self, **kwargs):
... x = kwargs["x_new"]
... tmax = bool(np.all(x <= self.xmax))
... tmin = bool(np.all(x >= self.xmin))
... return tmax and tmin
>>> mybounds = MyBounds()
>>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
... niter=10, accept_test=mybounds)
bisect(f, a, b, args=(), xtol=1e-12, rtol=4.4408920985006262e-16, maxiter=100, full_output=False, disp=True)
Find root of a function within an interval.
Basic bisection routine to find a zero of the function `f` between the
arguments `a` and `b`. `f(a)` and `f(b)` can not have the same signs.
Slow but sure.
Parameters
----------
f : function
Python function returning a number. `f` must be continuous, and
f(a) and f(b) must have opposite signs.
a : number
One end of the bracketing interval [a,b].
b : number
The other end of the bracketing interval [a,b].
xtol : number, optional
The routine converges when a root is known to lie within `xtol` of the
value return. Should be >= 0. The routine modifies this to take into
account the relative precision of doubles.
rtol : number, optional
The routine converges when a root is known to lie within `rtol` times
the value returned of the value returned. Should be >= 0. Defaults to
``np.finfo(float).eps * 2``.
maxiter : number, optional
if convergence is not achieved in `maxiter` iterations, and error is
raised. Must be >= 0.
args : tuple, optional
containing extra arguments for the function `f`.
`f` is called by ``apply(f, (x)+args)``.
full_output : bool, optional
If `full_output` is False, the root is returned. If `full_output` is
True, the return value is ``(x, r)``, where x is the root, and r is
a `RootResults` object.
disp : bool, optional
If True, raise RuntimeError if the algorithm didn't converge.
Returns
-------
x0 : float
Zero of `f` between `a` and `b`.
r : RootResults (present if ``full_output = True``)
Object containing information about the convergence. In particular,
``r.converged`` is True if the routine converged.
See Also
--------
brentq, brenth, bisect, newton
fixed_point : scalar fixed-point finder
fsolve : n-dimensional root-finding
bracket(func, xa=0.0, xb=1.0, args=(), grow_limit=110.0, maxiter=1000)
Bracket the minimum of the function.
Given a function and distinct initial points, search in the
downhill direction (as defined by the initital points) and return
new points xa, xb, xc that bracket the minimum of the function
f(xa) > f(xb) < f(xc). It doesn't always mean that obtained
solution will satisfy xa<=x<=xb
Parameters
----------
func : callable f(x,*args)
Objective function to minimize.
xa, xb : float, optional
Bracketing interval. Defaults `xa` to 0.0, and `xb` to 1.0.
args : tuple, optional
Additional arguments (if present), passed to `func`.
grow_limit : float, optional
Maximum grow limit. Defaults to 110.0
maxiter : int, optional
Maximum number of iterations to perform. Defaults to 1000.
Returns
-------
xa, xb, xc : float
Bracket.
fa, fb, fc : float
Objective function values in bracket.
funcalls : int
Number of function evaluations made.
brent(func, args=(), brack=None, tol=1.48e-08, full_output=0, maxiter=500)
Given a function of one-variable and a possible bracketing interval,
return the minimum of the function isolated to a fractional precision of
tol.
Parameters
----------
func : callable f(x,*args)
Objective function.
args
Additional arguments (if present).
brack : tuple
Triple (a,b,c) where (a<b<c) and func(b) <
func(a),func(c). If bracket consists of two numbers (a,c)
then they are assumed to be a starting interval for a
downhill bracket search (see `bracket`); it doesn't always
mean that the obtained solution will satisfy a<=x<=c.
tol : float
Stop if between iteration change is less than `tol`.
full_output : bool
If True, return all output args (xmin, fval, iter,
funcalls).
maxiter : int
Maximum number of iterations in solution.
Returns
-------
xmin : ndarray
Optimum point.
fval : float
Optimum value.
iter : int
Number of iterations.
funcalls : int
Number of objective function evaluations made.
See also
--------
minimize_scalar: Interface to minimization algorithms for scalar
univariate functions. See the 'Brent' `method` in particular.
Notes
-----
Uses inverse parabolic interpolation when possible to speed up
convergence of golden section method.
brenth(f, a, b, args=(), xtol=1e-12, rtol=4.4408920985006262e-16, maxiter=100, full_output=False, disp=True)
Find root of f in [a,b].
A variation on the classic Brent routine to find a zero of the function f
between the arguments a and b that uses hyperbolic extrapolation instead of
inverse quadratic extrapolation. There was a paper back in the 1980's ...
f(a) and f(b) can not have the same signs. Generally on a par with the
brent routine, but not as heavily tested. It is a safe version of the
secant method that uses hyperbolic extrapolation. The version here is by
Chuck Harris.
Parameters
----------
f : function
Python function returning a number. f must be continuous, and f(a) and
f(b) must have opposite signs.
a : number
One end of the bracketing interval [a,b].
b : number
The other end of the bracketing interval [a,b].
xtol : number, optional
The routine converges when a root is known to lie within xtol of the
value return. Should be >= 0. The routine modifies this to take into
account the relative precision of doubles.
rtol : number, optional
The routine converges when a root is known to lie within `rtol` times
the value returned of the value returned. Should be >= 0. Defaults to
``np.finfo(float).eps * 2``.
maxiter : number, optional
if convergence is not achieved in maxiter iterations, and error is
raised. Must be >= 0.
args : tuple, optional
containing extra arguments for the function `f`.
`f` is called by ``apply(f, (x)+args)``.
full_output : bool, optional
If `full_output` is False, the root is returned. If `full_output` is
True, the return value is ``(x, r)``, where `x` is the root, and `r` is
a RootResults object.
disp : bool, optional
If True, raise RuntimeError if the algorithm didn't converge.
Returns
-------
x0 : float
Zero of `f` between `a` and `b`.
r : RootResults (present if ``full_output = True``)
Object containing information about the convergence. In particular,
``r.converged`` is True if the routine converged.
See Also
--------
fmin, fmin_powell, fmin_cg,
fmin_bfgs, fmin_ncg : multivariate local optimizers
leastsq : nonlinear least squares minimizer
fmin_l_bfgs_b, fmin_tnc, fmin_cobyla : constrained multivariate optimizers
anneal, brute : global optimizers
fminbound, brent, golden, bracket : local scalar minimizers
fsolve : n-dimensional root-finding
brentq, brenth, ridder, bisect, newton : one-dimensional root-finding
fixed_point : scalar fixed-point finder
brentq(f, a, b, args=(), xtol=1e-12, rtol=4.4408920985006262e-16, maxiter=100, full_output=False, disp=True)
Find a root of a function in given interval.
Return float, a zero of `f` between `a` and `b`. `f` must be a continuous
function, and [a,b] must be a sign changing interval.
Description:
Uses the classic Brent (1973) method to find a zero of the function `f` on
the sign changing interval [a , b]. Generally considered the best of the
rootfinding routines here. It is a safe version of the secant method that
uses inverse quadratic extrapolation. Brent's method combines root
bracketing, interval bisection, and inverse quadratic interpolation. It is
sometimes known as the van Wijngaarden-Deker-Brent method. Brent (1973)
claims convergence is guaranteed for functions computable within [a,b].
[Brent1973]_ provides the classic description of the algorithm. Another
description can be found in a recent edition of Numerical Recipes, including
[PressEtal1992]_. Another description is at
http://mathworld.wolfram.com/BrentsMethod.html. It should be easy to
understand the algorithm just by reading our code. Our code diverges a bit
from standard presentations: we choose a different formula for the
extrapolation step.
Parameters
----------
f : function
Python function returning a number. f must be continuous, and f(a) and
f(b) must have opposite signs.
a : number
One end of the bracketing interval [a,b].
b : number
The other end of the bracketing interval [a,b].
xtol : number, optional
The routine converges when a root is known to lie within xtol of the
value return. Should be >= 0. The routine modifies this to take into
account the relative precision of doubles.
rtol : number, optional
The routine converges when a root is known to lie within `rtol` times
the value returned of the value returned. Should be >= 0. Defaults to
``np.finfo(float).eps * 2``.
maxiter : number, optional
if convergence is not achieved in maxiter iterations, and error is
raised. Must be >= 0.
args : tuple, optional
containing extra arguments for the function `f`.
`f` is called by ``apply(f, (x)+args)``.
full_output : bool, optional
If `full_output` is False, the root is returned. If `full_output` is
True, the return value is ``(x, r)``, where `x` is the root, and `r` is
a RootResults object.
disp : bool, optional
If True, raise RuntimeError if the algorithm didn't converge.
Returns
-------
x0 : float
Zero of `f` between `a` and `b`.
r : RootResults (present if ``full_output = True``)
Object containing information about the convergence. In particular,
``r.converged`` is True if the routine converged.
See Also
--------
multivariate local optimizers
`fmin`, `fmin_powell`, `fmin_cg`, `fmin_bfgs`, `fmin_ncg`
nonlinear least squares minimizer
`leastsq`
constrained multivariate optimizers
`fmin_l_bfgs_b`, `fmin_tnc`, `fmin_cobyla`
global optimizers
`anneal`, `basinhopping`, `brute`
local scalar minimizers
`fminbound`, `brent`, `golden`, `bracket`
n-dimensional root-finding
`fsolve`
one-dimensional root-finding
`brentq`, `brenth`, `ridder`, `bisect`, `newton`
scalar fixed-point finder
`fixed_point`
Notes
-----
`f` must be continuous. f(a) and f(b) must have opposite signs.
References
----------
.. [Brent1973]
Brent, R. P.,
*Algorithms for Minimization Without Derivatives*.
Englewood Cliffs, NJ: Prentice-Hall, 1973. Ch. 3-4.
.. [PressEtal1992]
Press, W. H.; Flannery, B. P.; Teukolsky, S. A.; and Vetterling, W. T.
*Numerical Recipes in FORTRAN: The Art of Scientific Computing*, 2nd ed.
Cambridge, England: Cambridge University Press, pp. 352-355, 1992.
Section 9.3: "Van Wijngaarden-Dekker-Brent Method."
broyden1(F, xin, iter=None, alpha=None, reduction_method='restart', max_rank=None, verbose=False, maxiter=None, f_tol=None, f_rtol=None, x_tol=None, x_rtol=None, tol_norm=None, line_search='armijo', callback=None, **kw)
Find a root of a function, using Broyden's first Jacobian approximation.
This method is also known as \"Broyden's good method\".
Parameters
----------
F : function(x) -> f
Function whose root to find; should take and return an array-like
object.
x0 : array_like
Initial guess for the solution
alpha : float, optional
Initial guess for the Jacobian is ``(-1/alpha)``.
reduction_method : str or tuple, optional
Method used in ensuring that the rank of the Broyden matrix
stays low. Can either be a string giving the name of the method,
or a tuple of the form ``(method, param1, param2, ...)``
that gives the name of the method and values for additional parameters.
Methods available:
- ``restart``: drop all matrix columns. Has no extra parameters.
- ``simple``: drop oldest matrix column. Has no extra parameters.
- ``svd``: keep only the most significant SVD components.
Takes an extra parameter, ``to_retain`, which determines the
number of SVD components to retain when rank reduction is done.
Default is ``max_rank - 2``.
max_rank : int, optional
Maximum rank for the Broyden matrix.
Default is infinity (ie., no rank reduction).
iter : int, optional
Number of iterations to make. If omitted (default), make as many
as required to meet tolerances.
verbose : bool, optional
Print status to stdout on every iteration.
maxiter : int, optional
Maximum number of iterations to make. If more are needed to
meet convergence, `NoConvergence` is raised.
f_tol : float, optional
Absolute tolerance (in max-norm) for the residual.
If omitted, default is 6e-6.
f_rtol : float, optional
Relative tolerance for the residual. If omitted, not used.
x_tol : float, optional
Absolute minimum step size, as determined from the Jacobian
approximation. If the step size is smaller than this, optimization
is terminated as successful. If omitted, not used.
x_rtol : float, optional
Relative minimum step size. If omitted, not used.
tol_norm : function(vector) -> scalar, optional
Norm to use in convergence check. Default is the maximum norm.
line_search : {None, 'armijo' (default), 'wolfe'}, optional
Which type of a line search to use to determine the step size in the
direction given by the Jacobian approximation. Defaults to 'armijo'.
callback : function, optional
Optional callback function. It is called on every iteration as
``callback(x, f)`` where `x` is the current solution and `f`
the corresponding residual.
Returns
-------
sol : ndarray
An array (of similar array type as `x0`) containing the final solution.
Raises
------
NoConvergence
When a solution was not found.
Notes
-----
This algorithm implements the inverse Jacobian Quasi-Newton update
.. math:: H_+ = H + (dx - H df) dx^\dagger H / ( dx^\dagger H df)
which corresponds to Broyden's first Jacobian update
.. math:: J_+ = J + (df - J dx) dx^\dagger / dx^\dagger dx
References
----------
.. [vR] B.A. van der Rotten, PhD thesis,
\"A limited memory Broyden method to solve high-dimensional
systems of nonlinear equations\". Mathematisch Instituut,
Universiteit Leiden, The Netherlands (2003).
http://www.math.leidenuniv.nl/scripties/Rotten.pdf
broyden2(F, xin, iter=None, alpha=None, reduction_method='restart', max_rank=None, verbose=False, maxiter=None, f_tol=None, f_rtol=None, x_tol=None, x_rtol=None, tol_norm=None, line_search='armijo', callback=None, **kw)
Find a root of a function, using Broyden's second Jacobian approximation.
This method is also known as "Broyden's bad method".
Parameters
----------
F : function(x) -> f
Function whose root to find; should take and return an array-like
object.
x0 : array_like
Initial guess for the solution
alpha : float, optional
Initial guess for the Jacobian is ``(-1/alpha)``.
reduction_method : str or tuple, optional
Method used in ensuring that the rank of the Broyden matrix
stays low. Can either be a string giving the name of the method,
or a tuple of the form ``(method, param1, param2, ...)``
that gives the name of the method and values for additional parameters.
Methods available:
- ``restart``: drop all matrix columns. Has no extra parameters.
- ``simple``: drop oldest matrix column. Has no extra parameters.
- ``svd``: keep only the most significant SVD components.
Takes an extra parameter, ``to_retain`, which determines the
number of SVD components to retain when rank reduction is done.
Default is ``max_rank - 2``.
max_rank : int, optional
Maximum rank for the Broyden matrix.
Default is infinity (ie., no rank reduction).
iter : int, optional
Number of iterations to make. If omitted (default), make as many
as required to meet tolerances.
verbose : bool, optional
Print status to stdout on every iteration.
maxiter : int, optional
Maximum number of iterations to make. If more are needed to
meet convergence, `NoConvergence` is raised.
f_tol : float, optional
Absolute tolerance (in max-norm) for the residual.
If omitted, default is 6e-6.
f_rtol : float, optional
Relative tolerance for the residual. If omitted, not used.
x_tol : float, optional
Absolute minimum step size, as determined from the Jacobian
approximation. If the step size is smaller than this, optimization
is terminated as successful. If omitted, not used.
x_rtol : float, optional
Relative minimum step size. If omitted, not used.
tol_norm : function(vector) -> scalar, optional
Norm to use in convergence check. Default is the maximum norm.
line_search : {None, 'armijo' (default), 'wolfe'}, optional
Which type of a line search to use to determine the step size in the
direction given by the Jacobian approximation. Defaults to 'armijo'.
callback : function, optional
Optional callback function. It is called on every iteration as
``callback(x, f)`` where `x` is the current solution and `f`
the corresponding residual.
Returns
-------
sol : ndarray
An array (of similar array type as `x0`) containing the final solution.
Raises
------
NoConvergence
When a solution was not found.
Notes
-----
This algorithm implements the inverse Jacobian Quasi-Newton update
.. math:: H_+ = H + (dx - H df) df^\dagger / ( df^\dagger df)
corresponding to Broyden's second method.
References
----------
.. [vR] B.A. van der Rotten, PhD thesis,
"A limited memory Broyden method to solve high-dimensional
systems of nonlinear equations". Mathematisch Instituut,
Universiteit Leiden, The Netherlands (2003).
http://www.math.leidenuniv.nl/scripties/Rotten.pdf
brute(func, ranges, args=(), Ns=20, full_output=0, finish=<function fmin>, disp=False)
Minimize a function over a given range by brute force.
Uses the "brute force" method, i.e. computes the function's value
at each point of a multidimensional grid of points, to find the global
minimum of the function.
Parameters
----------
func : callable
The objective function to be minimized. Must be in the
form ``f(x, *args)``, where ``x`` is the argument in
the form of a 1-D array and ``args`` is a tuple of any
additional fixed parameters needed to completely specify
the function.
ranges : tuple
Each component of the `ranges` tuple must be either a
"slice object" or a range tuple of the form ``(low, high)``.
The program uses these to create the grid of points on which
the objective function will be computed. See `Note 2` for
more detail.
args : tuple, optional
Any additional fixed parameters needed to completely specify
the function.
Ns : int, optional
Number of grid points along the axes, if not otherwise
specified. See `Note2`.
full_output : bool, optional
If True, return the evaluation grid and the objective function's
values on it.
finish : callable, optional
An optimization function that is called with the result of brute force
minimization as initial guess. `finish` should take the initial guess
as positional argument, and take `args`, `full_output` and `disp`
as keyword arguments. Use None if no "polishing" function is to be
used. See Notes for more details.
disp : bool, optional
Set to True to print convergence messages.
Returns
-------
x0 : ndarray
A 1-D array containing the coordinates of a point at which the
objective function had its minimum value. (See `Note 1` for
which point is returned.)
fval : float
Function value at the point `x0`.
grid : tuple
Representation of the evaluation grid. It has the same
length as `x0`. (Returned when `full_output` is True.)
Jout : ndarray
Function values at each point of the evaluation
grid, `i.e.`, ``Jout = func(*grid)``. (Returned
when `full_output` is True.)
See Also
--------
anneal : Another approach to seeking the global minimum of
multivariate, multimodal functions.
Notes
-----
*Note 1*: The program finds the gridpoint at which the lowest value
of the objective function occurs. If `finish` is None, that is the
point returned. When the global minimum occurs within (or not very far
outside) the grid's boundaries, and the grid is fine enough, that
point will be in the neighborhood of the gobal minimum.
However, users often employ some other optimization program to
"polish" the gridpoint values, `i.e.`, to seek a more precise
(local) minimum near `brute's` best gridpoint.
The `brute` function's `finish` option provides a convenient way to do
that. Any polishing program used must take `brute's` output as its
initial guess as a positional argument, and take `brute's` input values
for `args` and `full_output` as keyword arguments, otherwise an error
will be raised.
`brute` assumes that the `finish` function returns a tuple in the form:
``(xmin, Jmin, ... , statuscode)``, where ``xmin`` is the minimizing value
of the argument, ``Jmin`` is the minimum value of the objective function,
"..." may be some other returned values (which are not used by `brute`),
and ``statuscode`` is the status code of the `finish` program.
Note that when `finish` is not None, the values returned are those
of the `finish` program, *not* the gridpoint ones. Consequently,
while `brute` confines its search to the input grid points,
the `finish` program's results usually will not coincide with any
gridpoint, and may fall outside the grid's boundary.
*Note 2*: The grid of points is a `numpy.mgrid` object.
For `brute` the `ranges` and `Ns` inputs have the following effect.
Each component of the `ranges` tuple can be either a slice object or a
two-tuple giving a range of values, such as (0, 5). If the component is a
slice object, `brute` uses it directly. If the component is a two-tuple
range, `brute` internally converts it to a slice object that interpolates
`Ns` points from its low-value to its high-value, inclusive.
Examples
--------
We illustrate the use of `brute` to seek the global minimum of a function
of two variables that is given as the sum of a positive-definite
quadratic and two deep "Gaussian-shaped" craters. Specifically, define
the objective function `f` as the sum of three other functions,
``f = f1 + f2 + f3``. We suppose each of these has a signature
``(z, *params)``, where ``z = (x, y)``, and ``params`` and the functions
are as defined below.
>>> params = (2, 3, 7, 8, 9, 10, 44, -1, 2, 26, 1, -2, 0.5)
>>> def f1(z, *params):
... x, y = z
... a, b, c, d, e, f, g, h, i, j, k, l, scale = params
... return (a * x**2 + b * x * y + c * y**2 + d*x + e*y + f)
>>> def f2(z, *params):
... x, y = z
... a, b, c, d, e, f, g, h, i, j, k, l, scale = params
... return (-g*np.exp(-((x-h)**2 + (y-i)**2) / scale))
>>> def f3(z, *params):
... x, y = z
... a, b, c, d, e, f, g, h, i, j, k, l, scale = params
... return (-j*np.exp(-((x-k)**2 + (y-l)**2) / scale))
>>> def f(z, *params):
... x, y = z
... a, b, c, d, e, f, g, h, i, j, k, l, scale = params
... return f1(z, *params) + f2(z, *params) + f3(z, *params)
Thus, the objective function may have local minima near the minimum
of each of the three functions of which it is composed. To
use `fmin` to polish its gridpoint result, we may then continue as
follows:
>>> rranges = (slice(-4, 4, 0.25), slice(-4, 4, 0.25))
>>> from scipy import optimize
>>> resbrute = optimize.brute(f, rranges, args=params, full_output=True,
finish=optimize.fmin)
>>> resbrute[0] # global minimum
array([-1.05665192, 1.80834843])
>>> resbrute[1] # function value at global minimum
-3.4085818767
Note that if `finish` had been set to None, we would have gotten the
gridpoint [-1.0 1.75] where the rounded function value is -2.892.
check_grad(func, grad, x0, *args)
Check the correctness of a gradient function by comparing it against a
(forward) finite-difference approximation of the gradient.
Parameters
----------
func : callable func(x0,*args)
Function whose derivative is to be checked.
grad : callable grad(x0, *args)
Gradient of `func`.
x0 : ndarray
Points to check `grad` against forward difference approximation of grad
using `func`.
args : \*args, optional
Extra arguments passed to `func` and `grad`.
Returns
-------
err : float
The square root of the sum of squares (i.e. the 2-norm) of the
difference between ``grad(x0, *args)`` and the finite difference
approximation of `grad` using func at the points `x0`.
See Also
--------
approx_fprime
Notes
-----
The step size used for the finite difference approximation is
`sqrt(numpy.finfo(float).eps)`, which is approximately 1.49e-08.
Examples
--------
>>> def func(x): return x[0]**2 - 0.5 * x[1]**3
>>> def grad(x): return [2 * x[0], -1.5 * x[1]**2]
>>> check_grad(func, grad, [1.5, -1.5])
2.9802322387695312e-08
curve_fit(f, xdata, ydata, p0=None, sigma=None, **kw)
Use non-linear least squares to fit a function, f, to data.
Assumes ``ydata = f(xdata, *params) + eps``
Parameters
----------
f : callable
The model function, f(x, ...). It must take the independent
variable as the first argument and the parameters to fit as
separate remaining arguments.
xdata : An N-length sequence or an (k,N)-shaped array
for functions with k predictors.
The independent variable where the data is measured.
ydata : N-length sequence
The dependent data --- nominally f(xdata, ...)
p0 : None, scalar, or M-length sequence
Initial guess for the parameters. If None, then the initial
values will all be 1 (if the number of parameters for the function
can be determined using introspection, otherwise a ValueError
is raised).
sigma : None or N-length sequence
If not None, this vector will be used as relative weights in the
least-squares problem.
Returns
-------
popt : array
Optimal values for the parameters so that the sum of the squared error
of ``f(xdata, *popt) - ydata`` is minimized
pcov : 2d array
The estimated covariance of popt. The diagonals provide the variance
of the parameter estimate.
See Also
--------
leastsq
Notes
-----
The algorithm uses the Levenberg-Marquardt algorithm through `leastsq`.
Additional keyword arguments are passed directly to that algorithm.
Examples
--------
>>> import numpy as np
>>> from scipy.optimize import curve_fit
>>> def func(x, a, b, c):
... return a*np.exp(-b*x) + c
>>> x = np.linspace(0,4,50)
>>> y = func(x, 2.5, 1.3, 0.5)
>>> yn = y + 0.2*np.random.normal(size=len(x))
>>> popt, pcov = curve_fit(func, x, yn)
diagbroyden(F, xin, iter=None, alpha=None, verbose=False, maxiter=None, f_tol=None, f_rtol=None, x_tol=None, x_rtol=None, tol_norm=None, line_search='armijo', callback=None, **kw)
Find a root of a function, using diagonal Broyden Jacobian approximation.
The Jacobian approximation is derived from previous iterations, by
retaining only the diagonal of Broyden matrices.
.. warning::
This algorithm may be useful for specific problems, but whether
it will work may depend strongly on the problem.
Parameters
----------
F : function(x) -> f
Function whose root to find; should take and return an array-like
object.
x0 : array_like
Initial guess for the solution
alpha : float, optional
Initial guess for the Jacobian is (-1/alpha).
iter : int, optional
Number of iterations to make. If omitted (default), make as many
as required to meet tolerances.
verbose : bool, optional
Print status to stdout on every iteration.
maxiter : int, optional
Maximum number of iterations to make. If more are needed to
meet convergence, `NoConvergence` is raised.
f_tol : float, optional
Absolute tolerance (in max-norm) for the residual.
If omitted, default is 6e-6.
f_rtol : float, optional
Relative tolerance for the residual. If omitted, not used.
x_tol : float, optional
Absolute minimum step size, as determined from the Jacobian
approximation. If the step size is smaller than this, optimization
is terminated as successful. If omitted, not used.
x_rtol : float, optional
Relative minimum step size. If omitted, not used.
tol_norm : function(vector) -> scalar, optional
Norm to use in convergence check. Default is the maximum norm.
line_search : {None, 'armijo' (default), 'wolfe'}, optional
Which type of a line search to use to determine the step size in the
direction given by the Jacobian approximation. Defaults to 'armijo'.
callback : function, optional
Optional callback function. It is called on every iteration as
``callback(x, f)`` where `x` is the current solution and `f`
the corresponding residual.
Returns
-------
sol : ndarray
An array (of similar array type as `x0`) containing the final solution.
Raises
------
NoConvergence
When a solution was not found.
excitingmixing(F, xin, iter=None, alpha=None, alphamax=1.0, verbose=False, maxiter=None, f_tol=None, f_rtol=None, x_tol=None, x_rtol=None, tol_norm=None, line_search='armijo', callback=None, **kw)
Find a root of a function, using a tuned diagonal Jacobian approximation.
The Jacobian matrix is diagonal and is tuned on each iteration.
.. warning::
This algorithm may be useful for specific problems, but whether
it will work may depend strongly on the problem.
Parameters
----------
F : function(x) -> f
Function whose root to find; should take and return an array-like
object.
x0 : array_like
Initial guess for the solution
alpha : float, optional
Initial Jacobian approximation is (-1/alpha).
alphamax : float, optional
The entries of the diagonal Jacobian are kept in the range
``[alpha, alphamax]``.
iter : int, optional
Number of iterations to make. If omitted (default), make as many
as required to meet tolerances.
verbose : bool, optional
Print status to stdout on every iteration.
maxiter : int, optional
Maximum number of iterations to make. If more are needed to
meet convergence, `NoConvergence` is raised.
f_tol : float, optional
Absolute tolerance (in max-norm) for the residual.
If omitted, default is 6e-6.
f_rtol : float, optional
Relative tolerance for the residual. If omitted, not used.
x_tol : float, optional
Absolute minimum step size, as determined from the Jacobian
approximation. If the step size is smaller than this, optimization
is terminated as successful. If omitted, not used.
x_rtol : float, optional
Relative minimum step size. If omitted, not used.
tol_norm : function(vector) -> scalar, optional
Norm to use in convergence check. Default is the maximum norm.
line_search : {None, 'armijo' (default), 'wolfe'}, optional
Which type of a line search to use to determine the step size in the
direction given by the Jacobian approximation. Defaults to 'armijo'.
callback : function, optional
Optional callback function. It is called on every iteration as
``callback(x, f)`` where `x` is the current solution and `f`
the corresponding residual.
Returns
-------
sol : ndarray
An array (of similar array type as `x0`) containing the final solution.
Raises
------
NoConvergence
When a solution was not found.
fixed_point(func, x0, args=(), xtol=1e-08, maxiter=500)
Find a fixed point of the function.
Given a function of one or more variables and a starting point, find a
fixed-point of the function: i.e. where ``func(x0) == x0``.
Parameters
----------
func : function
Function to evaluate.
x0 : array_like
Fixed point of function.
args : tuple, optional
Extra arguments to `func`.
xtol : float, optional
Convergence tolerance, defaults to 1e-08.
maxiter : int, optional
Maximum number of iterations, defaults to 500.
Notes
-----
Uses Steffensen's Method using Aitken's ``Del^2`` convergence acceleration.
See Burden, Faires, "Numerical Analysis", 5th edition, pg. 80
Examples
--------
>>> from scipy import optimize
>>> def func(x, c1, c2):
.... return np.sqrt(c1/(x+c2))
>>> c1 = np.array([10,12.])
>>> c2 = np.array([3, 5.])
>>> optimize.fixed_point(func, [1.2, 1.3], args=(c1,c2))
array([ 1.4920333 , 1.37228132])
fmin(func, x0, args=(), xtol=0.0001, ftol=0.0001, maxiter=None, maxfun=None, full_output=0, disp=1, retall=0, callback=None)
Minimize a function using the downhill simplex algorithm.
This algorithm only uses function values, not derivatives or second
derivatives.
Parameters
----------
func : callable func(x,*args)
The objective function to be minimized.
x0 : ndarray
Initial guess.
args : tuple, optional
Extra arguments passed to func, i.e. ``f(x,*args)``.
callback : callable, optional
Called after each iteration, as callback(xk), where xk is the
current parameter vector.
xtol : float, optional
Relative error in xopt acceptable for convergence.
ftol : number, optional
Relative error in func(xopt) acceptable for convergence.
maxiter : int, optional
Maximum number of iterations to perform.
maxfun : number, optional
Maximum number of function evaluations to make.
full_output : bool, optional
Set to True if fopt and warnflag outputs are desired.
disp : bool, optional
Set to True to print convergence messages.
retall : bool, optional
Set to True to return list of solutions at each iteration.
Returns
-------
xopt : ndarray
Parameter that minimizes function.
fopt : float
Value of function at minimum: ``fopt = func(xopt)``.
iter : int
Number of iterations performed.
funcalls : int
Number of function calls made.
warnflag : int
1 : Maximum number of function evaluations made.
2 : Maximum number of iterations reached.
allvecs : list
Solution at each iteration.
See also
--------
minimize: Interface to minimization algorithms for multivariate
functions. See the 'Nelder-Mead' `method` in particular.
Notes
-----
Uses a Nelder-Mead simplex algorithm to find the minimum of function of
one or more variables.
This algorithm has a long history of successful use in applications.
But it will usually be slower than an algorithm that uses first or
second derivative information. In practice it can have poor
performance in high-dimensional problems and is not robust to
minimizing complicated functions. Additionally, there currently is no
complete theory describing when the algorithm will successfully
converge to the minimum, or how fast it will if it does.
References
----------
.. [1] Nelder, J.A. and Mead, R. (1965), "A simplex method for function
minimization", The Computer Journal, 7, pp. 308-313
.. [2] Wright, M.H. (1996), "Direct Search Methods: Once Scorned, Now
Respectable", in Numerical Analysis 1995, Proceedings of the
1995 Dundee Biennial Conference in Numerical Analysis, D.F.
Griffiths and G.A. Watson (Eds.), Addison Wesley Longman,
Harlow, UK, pp. 191-208.
fmin_bfgs(f, x0, fprime=None, args=(), gtol=1e-05, norm=inf, epsilon=1.4901161193847656e-08, maxiter=None, full_output=0, disp=1, retall=0, callback=None)
Minimize a function using the BFGS algorithm.
Parameters
----------
f : callable f(x,*args)
Objective function to be minimized.
x0 : ndarray
Initial guess.
fprime : callable f'(x,*args), optional
Gradient of f.
args : tuple, optional
Extra arguments passed to f and fprime.
gtol : float, optional
Gradient norm must be less than gtol before succesful termination.
norm : float, optional
Order of norm (Inf is max, -Inf is min)
epsilon : int or ndarray, optional
If fprime is approximated, use this value for the step size.
callback : callable, optional
An optional user-supplied function to call after each
iteration. Called as callback(xk), where xk is the
current parameter vector.
maxiter : int, optional
Maximum number of iterations to perform.
full_output : bool, optional
If True,return fopt, func_calls, grad_calls, and warnflag
in addition to xopt.
disp : bool, optional
Print convergence message if True.
retall : bool, optional
Return a list of results at each iteration if True.
Returns
-------
xopt : ndarray
Parameters which minimize f, i.e. f(xopt) == fopt.
fopt : float
Minimum value.
gopt : ndarray
Value of gradient at minimum, f'(xopt), which should be near 0.
Bopt : ndarray
Value of 1/f''(xopt), i.e. the inverse hessian matrix.
func_calls : int
Number of function_calls made.
grad_calls : int
Number of gradient calls made.
warnflag : integer
1 : Maximum number of iterations exceeded.
2 : Gradient and/or function calls not changing.
allvecs : list
Results at each iteration. Only returned if retall is True.
See also
--------
minimize: Interface to minimization algorithms for multivariate
functions. See the 'BFGS' `method` in particular.
Notes
-----
Optimize the function, f, whose gradient is given by fprime
using the quasi-Newton method of Broyden, Fletcher, Goldfarb,
and Shanno (BFGS)
References
----------
Wright, and Nocedal 'Numerical Optimization', 1999, pg. 198.
fmin_cg(f, x0, fprime=None, args=(), gtol=1e-05, norm=inf, epsilon=1.4901161193847656e-08, maxiter=None, full_output=0, disp=1, retall=0, callback=None)
Minimize a function using a nonlinear conjugate gradient algorithm.
Parameters
----------
f : callable, ``f(x, *args)``
Objective function to be minimized. Here `x` must be a 1-D array of
the variables that are to be changed in the search for a minimum, and
`args` are the other (fixed) parameters of `f`.
x0 : ndarray
A user-supplied initial estimate of `xopt`, the optimal value of `x`.
It must be a 1-D array of values.
fprime : callable, ``fprime(x, *args)``, optional
A function that returns the gradient of `f` at `x`. Here `x` and `args`
are as described above for `f`. The returned value must be a 1-D array.
Defaults to None, in which case the gradient is approximated
numerically (see `epsilon`, below).
args : tuple, optional
Parameter values passed to `f` and `fprime`. Must be supplied whenever
additional fixed parameters are needed to completely specify the
functions `f` and `fprime`.
gtol : float, optional
Stop when the norm of the gradient is less than `gtol`.
norm : float, optional
Order to use for the norm of the gradient
(``-np.Inf`` is min, ``np.Inf`` is max).
epsilon : float or ndarray, optional
Step size(s) to use when `fprime` is approximated numerically. Can be a
scalar or a 1-D array. Defaults to ``sqrt(eps)``, with eps the
floating point machine precision. Usually ``sqrt(eps)`` is about
1.5e-8.
maxiter : int, optional
Maximum number of iterations to perform. Default is ``200 * len(x0)``.
full_output : bool, optional
If True, return `fopt`, `func_calls`, `grad_calls`, and `warnflag` in
addition to `xopt`. See the Returns section below for additional
information on optional return values.
disp : bool, optional
If True, return a convergence message, followed by `xopt`.
retall : bool, optional
If True, add to the returned values the results of each iteration.
callback : callable, optional
An optional user-supplied function, called after each iteration.
Called as ``callback(xk)``, where ``xk`` is the current value of `x0`.
Returns
-------
xopt : ndarray
Parameters which minimize f, i.e. ``f(xopt) == fopt``.
fopt : float, optional
Minimum value found, f(xopt). Only returned if `full_output` is True.
func_calls : int, optional
The number of function_calls made. Only returned if `full_output`
is True.
grad_calls : int, optional
The number of gradient calls made. Only returned if `full_output` is
True.
warnflag : int, optional
Integer value with warning status, only returned if `full_output` is
True.
0 : Success.
1 : The maximum number of iterations was exceeded.
2 : Gradient and/or function calls were not changing. May indicate
that precision was lost, i.e., the routine did not converge.
allvecs : list of ndarray, optional
List of arrays, containing the results at each iteration.
Only returned if `retall` is True.
See Also
--------
minimize : common interface to all `scipy.optimize` algorithms for
unconstrained and constrained minimization of multivariate
functions. It provides an alternative way to call
``fmin_cg``, by specifying ``method='CG'``.
Notes
-----
This conjugate gradient algorithm is based on that of Polak and Ribiere
[1]_.
Conjugate gradient methods tend to work better when:
1. `f` has a unique global minimizing point, and no local minima or
other stationary points,
2. `f` is, at least locally, reasonably well approximated by a
quadratic function of the variables,
3. `f` is continuous and has a continuous gradient,
4. `fprime` is not too large, e.g., has a norm less than 1000,
5. The initial guess, `x0`, is reasonably close to `f` 's global
minimizing point, `xopt`.
References
----------
.. [1] Wright & Nocedal, "Numerical Optimization", 1999, pp. 120-122.
Examples
--------
Example 1: seek the minimum value of the expression
``a*u**2 + b*u*v + c*v**2 + d*u + e*v + f`` for given values
of the parameters and an initial guess ``(u, v) = (0, 0)``.
>>> args = (2, 3, 7, 8, 9, 10) # parameter values
>>> def f(x, *args):
... u, v = x
... a, b, c, d, e, f = args
... return a*u**2 + b*u*v + c*v**2 + d*u + e*v + f
>>> def gradf(x, *args):
... u, v = x
... a, b, c, d, e, f = args
... gu = 2*a*u + b*v + d # u-component of the gradient
... gv = b*u + 2*c*v + e # v-component of the gradient
... return np.asarray((gu, gv))
>>> x0 = np.asarray((0, 0)) # Initial guess.
>>> from scipy import optimize
>>> res1 = optimize.fmin_cg(f, x0, fprime=gradf, args=args)
>>> print 'res1 = ', res1
Optimization terminated successfully.
Current function value: 1.617021
Iterations: 2
Function evaluations: 5
Gradient evaluations: 5
res1 = [-1.80851064 -0.25531915]
Example 2: solve the same problem using the `minimize` function.
(This `myopts` dictionary shows all of the available options,
although in practice only non-default values would be needed.
The returned value will be a dictionary.)
>>> opts = {'maxiter' : None, # default value.
... 'disp' : True, # non-default value.
... 'gtol' : 1e-5, # default value.
... 'norm' : np.inf, # default value.
... 'eps' : 1.4901161193847656e-08} # default value.
>>> res2 = optimize.minimize(f, x0, jac=gradf, args=args,
... method='CG', options=opts)
Optimization terminated successfully.
Current function value: 1.617021
Iterations: 2
Function evaluations: 5
Gradient evaluations: 5
>>> res2.x # minimum found
array([-1.80851064 -0.25531915])
fmin_cobyla(func, x0, cons, args=(), consargs=None, rhobeg=1.0, rhoend=0.0001, iprint=1, maxfun=1000, disp=None)
Minimize a function using the Constrained Optimization BY Linear
Approximation (COBYLA) method. This method wraps a FORTRAN
implentation of the algorithm.
Parameters
----------
func : callable
Function to minimize. In the form func(x, \*args).
x0 : ndarray
Initial guess.
cons : sequence
Constraint functions; must all be ``>=0`` (a single function
if only 1 constraint). Each function takes the parameters `x`
as its first argument.
args : tuple
Extra arguments to pass to function.
consargs : tuple
Extra arguments to pass to constraint functions (default of None means
use same extra arguments as those passed to func).
Use ``()`` for no extra arguments.
rhobeg :
Reasonable initial changes to the variables.
rhoend :
Final accuracy in the optimization (not precisely guaranteed). This
is a lower bound on the size of the trust region.
iprint : {0, 1, 2, 3}
Controls the frequency of output; 0 implies no output. Deprecated.
disp : {0, 1, 2, 3}
Over-rides the iprint interface. Preferred.
maxfun : int
Maximum number of function evaluations.
Returns
-------
x : ndarray
The argument that minimises `f`.
See also
--------
minimize: Interface to minimization algorithms for multivariate
functions. See the 'COBYLA' `method` in particular.
Notes
-----
This algorithm is based on linear approximations to the objective
function and each constraint. We briefly describe the algorithm.
Suppose the function is being minimized over k variables. At the
jth iteration the algorithm has k+1 points v_1, ..., v_(k+1),
an approximate solution x_j, and a radius RHO_j.
(i.e. linear plus a constant) approximations to the objective
function and constraint functions such that their function values
agree with the linear approximation on the k+1 points v_1,.., v_(k+1).
This gives a linear program to solve (where the linear approximations
of the constraint functions are constrained to be non-negative).
However the linear approximations are likely only good
approximations near the current simplex, so the linear program is
given the further requirement that the solution, which
will become x_(j+1), must be within RHO_j from x_j. RHO_j only
decreases, never increases. The initial RHO_j is rhobeg and the
final RHO_j is rhoend. In this way COBYLA's iterations behave
like a trust region algorithm.
Additionally, the linear program may be inconsistent, or the
approximation may give poor improvement. For details about
how these issues are resolved, as well as how the points v_i are
updated, refer to the source code or the references below.
References
----------
Powell M.J.D. (1994), "A direct search optimization method that models
the objective and constraint functions by linear interpolation.", in
Advances in Optimization and Numerical Analysis, eds. S. Gomez and
J-P Hennart, Kluwer Academic (Dordrecht), pp. 51-67
Powell M.J.D. (1998), "Direct search algorithms for optimization
calculations", Acta Numerica 7, 287-336
Powell M.J.D. (2007), "A view of algorithms for optimization without
derivatives", Cambridge University Technical Report DAMTP 2007/NA03
Examples
--------
Minimize the objective function f(x,y) = x*y subject
to the constraints x**2 + y**2 < 1 and y > 0::
>>> def objective(x):
... return x[0]*x[1]
...
>>> def constr1(x):
... return 1 - (x[0]**2 + x[1]**2)
...
>>> def constr2(x):
... return x[1]
...
>>> fmin_cobyla(objective, [0.0, 0.1], [constr1, constr2], rhoend=1e-7)
Normal return from subroutine COBYLA
NFVALS = 64 F =-5.000000E-01 MAXCV = 1.998401E-14
X =-7.071069E-01 7.071067E-01
array([-0.70710685, 0.70710671])
The exact solution is (-sqrt(2)/2, sqrt(2)/2).
fmin_l_bfgs_b(func, x0, fprime=None, args=(), approx_grad=0, bounds=None, m=10, factr=10000000.0, pgtol=1e-05, epsilon=1e-08, iprint=-1, maxfun=15000, maxiter=15000, disp=None, callback=None)
Minimize a function func using the L-BFGS-B algorithm.
Parameters
----------
func : callable f(x,*args)
Function to minimise.
x0 : ndarray
Initial guess.
fprime : callable fprime(x,*args)
The gradient of `func`. If None, then `func` returns the function
value and the gradient (``f, g = func(x, *args)``), unless
`approx_grad` is True in which case `func` returns only ``f``.
args : sequence
Arguments to pass to `func` and `fprime`.
approx_grad : bool
Whether to approximate the gradient numerically (in which case
`func` returns only the function value).
bounds : list
``(min, max)`` pairs for each element in ``x``, defining
the bounds on that parameter. Use None for one of ``min`` or
``max`` when there is no bound in that direction.
m : int
The maximum number of variable metric corrections
used to define the limited memory matrix. (The limited memory BFGS
method does not store the full hessian but uses this many terms in an
approximation to it.)
factr : float
The iteration stops when
``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``,
where ``eps`` is the machine precision, which is automatically
generated by the code. Typical values for `factr` are: 1e12 for
low accuracy; 1e7 for moderate accuracy; 10.0 for extremely
high accuracy.
pgtol : float
The iteration will stop when
``max{|proj g_i | i = 1, ..., n} <= pgtol``
where ``pg_i`` is the i-th component of the projected gradient.
epsilon : float
Step size used when `approx_grad` is True, for numerically
calculating the gradient
iprint : int
Controls the frequency of output. ``iprint < 0`` means no output;
``iprint == 0`` means write messages to stdout; ``iprint > 1`` in
addition means write logging information to a file named
``iterate.dat`` in the current working directory.
disp : int, optional
If zero, then no output. If a positive number, then this over-rides
`iprint` (i.e., `iprint` gets the value of `disp`).
maxfun : int
Maximum number of function evaluations.
maxiter : int
Maximum number of iterations.
callback : callable, optional
Called after each iteration, as ``callback(xk)``, where ``xk`` is the
current parameter vector.
Returns
-------
x : array_like
Estimated position of the minimum.
f : float
Value of `func` at the minimum.
d : dict
Information dictionary.
* d['warnflag'] is
- 0 if converged,
- 1 if too many function evaluations or too many iterations,
- 2 if stopped for another reason, given in d['task']
* d['grad'] is the gradient at the minimum (should be 0 ish)
* d['funcalls'] is the number of function calls made.
* d['nit'] is the number of iterations.
See also
--------
minimize: Interface to minimization algorithms for multivariate
functions. See the 'L-BFGS-B' `method` in particular.
Notes
-----
License of L-BFGS-B (FORTRAN code):
The version included here (in fortran code) is 3.0
(released April 25, 2011). It was written by Ciyou Zhu, Richard Byrd,
and Jorge Nocedal <nocedal@ece.nwu.edu>. It carries the following
condition for use:
This software is freely available, but we expect that all publications
describing work using this software, or all commercial products using it,
quote at least one of the references given below. This software is released
under the BSD License.
References
----------
* R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
Constrained Optimization, (1995), SIAM Journal on Scientific and
Statistical Computing, 16, 5, pp. 1190-1208.
* C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
FORTRAN routines for large scale bound constrained optimization (1997),
ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
* J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
FORTRAN routines for large scale bound constrained optimization (2011),
ACM Transactions on Mathematical Software, 38, 1.
fmin_ncg(f, x0, fprime, fhess_p=None, fhess=None, args=(), avextol=1e-05, epsilon=1.4901161193847656e-08, maxiter=None, full_output=0, disp=1, retall=0, callback=None)
Unconstrained minimization of a function using the Newton-CG method.
Parameters
----------
f : callable ``f(x, *args)``
Objective function to be minimized.
x0 : ndarray
Initial guess.
fprime : callable ``f'(x, *args)``
Gradient of f.
fhess_p : callable ``fhess_p(x, p, *args)``, optional
Function which computes the Hessian of f times an
arbitrary vector, p.
fhess : callable ``fhess(x, *args)``, optional
Function to compute the Hessian matrix of f.
args : tuple, optional
Extra arguments passed to f, fprime, fhess_p, and fhess
(the same set of extra arguments is supplied to all of
these functions).
epsilon : float or ndarray, optional
If fhess is approximated, use this value for the step size.
callback : callable, optional
An optional user-supplied function which is called after
each iteration. Called as callback(xk), where xk is the
current parameter vector.
avextol : float, optional
Convergence is assumed when the average relative error in
the minimizer falls below this amount.
maxiter : int, optional
Maximum number of iterations to perform.
full_output : bool, optional
If True, return the optional outputs.
disp : bool, optional
If True, print convergence message.
retall : bool, optional
If True, return a list of results at each iteration.
Returns
-------
xopt : ndarray
Parameters which minimize f, i.e. ``f(xopt) == fopt``.
fopt : float
Value of the function at xopt, i.e. ``fopt = f(xopt)``.
fcalls : int
Number of function calls made.
gcalls : int
Number of gradient calls made.
hcalls : int
Number of hessian calls made.
warnflag : int
Warnings generated by the algorithm.
1 : Maximum number of iterations exceeded.
allvecs : list
The result at each iteration, if retall is True (see below).
See also
--------
minimize: Interface to minimization algorithms for multivariate
functions. See the 'Newton-CG' `method` in particular.
Notes
-----
Only one of `fhess_p` or `fhess` need to be given. If `fhess`
is provided, then `fhess_p` will be ignored. If neither `fhess`
nor `fhess_p` is provided, then the hessian product will be
approximated using finite differences on `fprime`. `fhess_p`
must compute the hessian times an arbitrary vector. If it is not
given, finite-differences on `fprime` are used to compute
it.
Newton-CG methods are also called truncated Newton methods. This
function differs from scipy.optimize.fmin_tnc because
1. scipy.optimize.fmin_ncg is written purely in python using numpy
and scipy while scipy.optimize.fmin_tnc calls a C function.
2. scipy.optimize.fmin_ncg is only for unconstrained minimization
while scipy.optimize.fmin_tnc is for unconstrained minimization
or box constrained minimization. (Box constraints give
lower and upper bounds for each variable seperately.)
References
----------
Wright & Nocedal, 'Numerical Optimization', 1999, pg. 140.
fmin_powell(func, x0, args=(), xtol=0.0001, ftol=0.0001, maxiter=None, maxfun=None, full_output=0, disp=1, retall=0, callback=None, direc=None)
Minimize a function using modified Powell's method. This method
only uses function values, not derivatives.
Parameters
----------
func : callable f(x,*args)
Objective function to be minimized.
x0 : ndarray
Initial guess.
args : tuple, optional
Extra arguments passed to func.
callback : callable, optional
An optional user-supplied function, called after each
iteration. Called as ``callback(xk)``, where ``xk`` is the
current parameter vector.
direc : ndarray, optional
Initial direction set.
xtol : float, optional
Line-search error tolerance.
ftol : float, optional
Relative error in ``func(xopt)`` acceptable for convergence.
maxiter : int, optional
Maximum number of iterations to perform.
maxfun : int, optional
Maximum number of function evaluations to make.
full_output : bool, optional
If True, fopt, xi, direc, iter, funcalls, and
warnflag are returned.
disp : bool, optional
If True, print convergence messages.
retall : bool, optional
If True, return a list of the solution at each iteration.
Returns
-------
xopt : ndarray
Parameter which minimizes `func`.
fopt : number
Value of function at minimum: ``fopt = func(xopt)``.
direc : ndarray
Current direction set.
iter : int
Number of iterations.
funcalls : int
Number of function calls made.
warnflag : int
Integer warning flag:
1 : Maximum number of function evaluations.
2 : Maximum number of iterations.
allvecs : list
List of solutions at each iteration.
See also
--------
minimize: Interface to unconstrained minimization algorithms for
multivariate functions. See the 'Powell' `method` in particular.
Notes
-----
Uses a modification of Powell's method to find the minimum of
a function of N variables. Powell's method is a conjugate
direction method.
The algorithm has two loops. The outer loop
merely iterates over the inner loop. The inner loop minimizes
over each current direction in the direction set. At the end
of the inner loop, if certain conditions are met, the direction
that gave the largest decrease is dropped and replaced with
the difference between the current estiamted x and the estimated
x from the beginning of the inner-loop.
The technical conditions for replacing the direction of greatest
increase amount to checking that
1. No further gain can be made along the direction of greatest increase
from that iteration.
2. The direction of greatest increase accounted for a large sufficient
fraction of the decrease in the function value from that iteration of
the inner loop.
References
----------
Powell M.J.D. (1964) An efficient method for finding the minimum of a
function of several variables without calculating derivatives,
Computer Journal, 7 (2):155-162.
Press W., Teukolsky S.A., Vetterling W.T., and Flannery B.P.:
Numerical Recipes (any edition), Cambridge University Press
fmin_slsqp(func, x0, eqcons=[], f_eqcons=None, ieqcons=[], f_ieqcons=None, bounds=[], fprime=None, fprime_eqcons=None, fprime_ieqcons=None, args=(), iter=100, acc=1e-06, iprint=1, disp=None, full_output=0, epsilon=1.4901161193847656e-08)
Minimize a function using Sequential Least SQuares Programming
Python interface function for the SLSQP Optimization subroutine
originally implemented by Dieter Kraft.
Parameters
----------
func : callable f(x,*args)
Objective function.
x0 : 1-D ndarray of float
Initial guess for the independent variable(s).
eqcons : list
A list of functions of length n such that
eqcons[j](x,*args) == 0.0 in a successfully optimized
problem.
f_eqcons : callable f(x,*args)
Returns a 1-D array in which each element must equal 0.0 in a
successfully optimized problem. If f_eqcons is specified,
eqcons is ignored.
ieqcons : list
A list of functions of length n such that
ieqcons[j](x,*args) >= 0.0 in a successfully optimized
problem.
f_ieqcons : callable f(x,*args)
Returns a 1-D ndarray in which each element must be greater or
equal to 0.0 in a successfully optimized problem. If
f_ieqcons is specified, ieqcons is ignored.
bounds : list
A list of tuples specifying the lower and upper bound
for each independent variable [(xl0, xu0),(xl1, xu1),...]
Infinite values will be interpreted as large floating values.
fprime : callable `f(x,*args)`
A function that evaluates the partial derivatives of func.
fprime_eqcons : callable `f(x,*args)`
A function of the form `f(x, *args)` that returns the m by n
array of equality constraint normals. If not provided,
the normals will be approximated. The array returned by
fprime_eqcons should be sized as ( len(eqcons), len(x0) ).
fprime_ieqcons : callable `f(x,*args)`
A function of the form `f(x, *args)` that returns the m by n
array of inequality constraint normals. If not provided,
the normals will be approximated. The array returned by
fprime_ieqcons should be sized as ( len(ieqcons), len(x0) ).
args : sequence
Additional arguments passed to func and fprime.
iter : int
The maximum number of iterations.
acc : float
Requested accuracy.
iprint : int
The verbosity of fmin_slsqp :
* iprint <= 0 : Silent operation
* iprint == 1 : Print summary upon completion (default)
* iprint >= 2 : Print status of each iterate and summary
disp : int
Over-rides the iprint interface (preferred).
full_output : bool
If False, return only the minimizer of func (default).
Otherwise, output final objective function and summary
information.
epsilon : float
The step size for finite-difference derivative estimates.
Returns
-------
out : ndarray of float
The final minimizer of func.
fx : ndarray of float, if full_output is true
The final value of the objective function.
its : int, if full_output is true
The number of iterations.
imode : int, if full_output is true
The exit mode from the optimizer (see below).
smode : string, if full_output is true
Message describing the exit mode from the optimizer.
See also
--------
minimize: Interface to minimization algorithms for multivariate
functions. See the 'SLSQP' `method` in particular.
Notes
-----
Exit modes are defined as follows ::
-1 : Gradient evaluation required (g & a)
0 : Optimization terminated successfully.
1 : Function evaluation required (f & c)
2 : More equality constraints than independent variables
3 : More than 3*n iterations in LSQ subproblem
4 : Inequality constraints incompatible
5 : Singular matrix E in LSQ subproblem
6 : Singular matrix C in LSQ subproblem
7 : Rank-deficient equality constraint subproblem HFTI
8 : Positive directional derivative for linesearch
9 : Iteration limit exceeded
Examples
--------
Examples are given :ref:`in the tutorial <tutorial-sqlsp>`.
fmin_tnc(func, x0, fprime=None, args=(), approx_grad=0, bounds=None, epsilon=1e-08, scale=None, offset=None, messages=15, maxCGit=-1, maxfun=None, eta=-1, stepmx=0, accuracy=0, fmin=0, ftol=-1, xtol=-1, pgtol=-1, rescale=-1, disp=None, callback=None)
Minimize a function with variables subject to bounds, using
gradient information in a truncated Newton algorithm. This
method wraps a C implementation of the algorithm.
Parameters
----------
func : callable ``func(x, *args)``
Function to minimize. Must do one of:
1. Return f and g, where f is the value of the function and g its
gradient (a list of floats).
2. Return the function value but supply gradient function
seperately as `fprime`.
3. Return the function value and set ``approx_grad=True``.
If the function returns None, the minimization
is aborted.
x0 : array_like
Initial estimate of minimum.
fprime : callable ``fprime(x, *args)``
Gradient of `func`. If None, then either `func` must return the
function value and the gradient (``f,g = func(x, *args)``)
or `approx_grad` must be True.
args : tuple
Arguments to pass to function.
approx_grad : bool
If true, approximate the gradient numerically.
bounds : list
(min, max) pairs for each element in x0, defining the
bounds on that parameter. Use None or +/-inf for one of
min or max when there is no bound in that direction.
epsilon : float
Used if approx_grad is True. The stepsize in a finite
difference approximation for fprime.
scale : array_like
Scaling factors to apply to each variable. If None, the
factors are up-low for interval bounded variables and
1+|x| for the others. Defaults to None.
offset : array_like
Value to substract from each variable. If None, the
offsets are (up+low)/2 for interval bounded variables
and x for the others.
messages :
Bit mask used to select messages display during
minimization values defined in the MSGS dict. Defaults to
MGS_ALL.
disp : int
Integer interface to messages. 0 = no message, 5 = all messages
maxCGit : int
Maximum number of hessian*vector evaluations per main
iteration. If maxCGit == 0, the direction chosen is
-gradient if maxCGit < 0, maxCGit is set to
max(1,min(50,n/2)). Defaults to -1.
maxfun : int
Maximum number of function evaluation. if None, maxfun is
set to max(100, 10*len(x0)). Defaults to None.
eta : float
Severity of the line search. if < 0 or > 1, set to 0.25.
Defaults to -1.
stepmx : float
Maximum step for the line search. May be increased during
call. If too small, it will be set to 10.0. Defaults to 0.
accuracy : float
Relative precision for finite difference calculations. If
<= machine_precision, set to sqrt(machine_precision).
Defaults to 0.
fmin : float
Minimum function value estimate. Defaults to 0.
ftol : float
Precision goal for the value of f in the stoping criterion.
If ftol < 0.0, ftol is set to 0.0 defaults to -1.
xtol : float
Precision goal for the value of x in the stopping
criterion (after applying x scaling factors). If xtol <
0.0, xtol is set to sqrt(machine_precision). Defaults to
-1.
pgtol : float
Precision goal for the value of the projected gradient in
the stopping criterion (after applying x scaling factors).
If pgtol < 0.0, pgtol is set to 1e-2 * sqrt(accuracy).
Setting it to 0.0 is not recommended. Defaults to -1.
rescale : float
Scaling factor (in log10) used to trigger f value
rescaling. If 0, rescale at each iteration. If a large
value, never rescale. If < 0, rescale is set to 1.3.
callback : callable, optional
Called after each iteration, as callback(xk), where xk is the
current parameter vector.
Returns
-------
x : ndarray
The solution.
nfeval : int
The number of function evaluations.
rc : int
Return code as defined in the RCSTRINGS dict.
See also
--------
minimize: Interface to minimization algorithms for multivariate
functions. See the 'TNC' `method` in particular.
Notes
-----
The underlying algorithm is truncated Newton, also called
Newton Conjugate-Gradient. This method differs from
scipy.optimize.fmin_ncg in that
1. It wraps a C implementation of the algorithm
2. It allows each variable to be given an upper and lower bound.
The algorithm incoporates the bound constraints by determining
the descent direction as in an unconstrained truncated Newton,
but never taking a step-size large enough to leave the space
of feasible x's. The algorithm keeps track of a set of
currently active constraints, and ignores them when computing
the minimum allowable step size. (The x's associated with the
active constraint are kept fixed.) If the maximum allowable
step size is zero then a new constraint is added. At the end
of each iteration one of the constraints may be deemed no
longer active and removed. A constraint is considered
no longer active is if it is currently active
but the gradient for that variable points inward from the
constraint. The specific constraint removed is the one
associated with the variable of largest index whose
constraint is no longer active.
References
----------
Wright S., Nocedal J. (2006), 'Numerical Optimization'
Nash S.G. (1984), "Newton-Type Minimization Via the Lanczos Method",
SIAM Journal of Numerical Analysis 21, pp. 770-778
fminbound(func, x1, x2, args=(), xtol=1e-05, maxfun=500, full_output=0, disp=1)
Bounded minimization for scalar functions.
Parameters
----------
func : callable f(x,*args)
Objective function to be minimized (must accept and return scalars).
x1, x2 : float or array scalar
The optimization bounds.
args : tuple, optional
Extra arguments passed to function.
xtol : float, optional
The convergence tolerance.
maxfun : int, optional
Maximum number of function evaluations allowed.
full_output : bool, optional
If True, return optional outputs.
disp : int, optional
If non-zero, print messages.
0 : no message printing.
1 : non-convergence notification messages only.
2 : print a message on convergence too.
3 : print iteration results.
Returns
-------
xopt : ndarray
Parameters (over given interval) which minimize the
objective function.
fval : number
The function value at the minimum point.
ierr : int
An error flag (0 if converged, 1 if maximum number of
function calls reached).
numfunc : int
The number of function calls made.
See also
--------
minimize_scalar: Interface to minimization algorithms for scalar
univariate functions. See the 'Bounded' `method` in particular.
Notes
-----
Finds a local minimizer of the scalar function `func` in the
interval x1 < xopt < x2 using Brent's method. (See `brent`
for auto-bracketing).
fsolve(func, x0, args=(), fprime=None, full_output=0, col_deriv=0, xtol=1.49012e-08, maxfev=0, band=None, epsfcn=None, factor=100, diag=None)
Find the roots of a function.
Return the roots of the (non-linear) equations defined by
``func(x) = 0`` given a starting estimate.
Parameters
----------
func : callable ``f(x, *args)``
A function that takes at least one (possibly vector) argument.
x0 : ndarray
The starting estimate for the roots of ``func(x) = 0``.
args : tuple, optional
Any extra arguments to `func`.
fprime : callable(x), optional
A function to compute the Jacobian of `func` with derivatives
across the rows. By default, the Jacobian will be estimated.
full_output : bool, optional
If True, return optional outputs.
col_deriv : bool, optional
Specify whether the Jacobian function computes derivatives down
the columns (faster, because there is no transpose operation).
xtol : float
The calculation will terminate if the relative error between two
consecutive iterates is at most `xtol`.
maxfev : int, optional
The maximum number of calls to the function. If zero, then
``100*(N+1)`` is the maximum where N is the number of elements
in `x0`.
band : tuple, optional
If set to a two-sequence containing the number of sub- and
super-diagonals within the band of the Jacobi matrix, the
Jacobi matrix is considered banded (only for ``fprime=None``).
epsfcn : float, optional
A suitable step length for the forward-difference
approximation of the Jacobian (for ``fprime=None``). If
`epsfcn` is less than the machine precision, it is assumed
that the relative errors in the functions are of the order of
the machine precision.
factor : float, optional
A parameter determining the initial step bound
(``factor * || diag * x||``). Should be in the interval
``(0.1, 100)``.
diag : sequence, optional
N positive entries that serve as a scale factors for the
variables.
Returns
-------
x : ndarray
The solution (or the result of the last iteration for
an unsuccessful call).
infodict : dict
A dictionary of optional outputs with the keys:
``nfev``
number of function calls
``njev``
number of Jacobian calls
``fvec``
function evaluated at the output
``fjac``
the orthogonal matrix, q, produced by the QR
factorization of the final approximate Jacobian
matrix, stored column wise
``r``
upper triangular matrix produced by QR factorization
of the same matrix
``qtf``
the vector ``(transpose(q) * fvec)``
ier : int
An integer flag. Set to 1 if a solution was found, otherwise refer
to `mesg` for more information.
mesg : str
If no solution is found, `mesg` details the cause of failure.
See Also
--------
root : Interface to root finding algorithms for multivariate
functions. See the 'hybr' `method` in particular.
Notes
-----
``fsolve`` is a wrapper around MINPACK's hybrd and hybrj algorithms.
golden(func, args=(), brack=None, tol=1.4901161193847656e-08, full_output=0)
Return the minimum of a function of one variable.
Given a function of one variable and a possible bracketing interval,
return the minimum of the function isolated to a fractional precision of
tol.
Parameters
----------
func : callable func(x,*args)
Objective function to minimize.
args : tuple
Additional arguments (if present), passed to func.
brack : tuple
Triple (a,b,c), where (a<b<c) and func(b) <
func(a),func(c). If bracket consists of two numbers (a,
c), then they are assumed to be a starting interval for a
downhill bracket search (see `bracket`); it doesn't always
mean that obtained solution will satisfy a<=x<=c.
tol : float
x tolerance stop criterion
full_output : bool
If True, return optional outputs.
See also
--------
minimize_scalar: Interface to minimization algorithms for scalar
univariate functions. See the 'Golden' `method` in particular.
Notes
-----
Uses analog of bisection method to decrease the bracketed
interval.
leastsq(func, x0, args=(), Dfun=None, full_output=0, col_deriv=0, ftol=1.49012e-08, xtol=1.49012e-08, gtol=0.0, maxfev=0, epsfcn=None, factor=100, diag=None)
Minimize the sum of squares of a set of equations.
::
x = arg min(sum(func(y)**2,axis=0))
y
Parameters
----------
func : callable
should take at least one (possibly length N vector) argument and
returns M floating point numbers.
x0 : ndarray
The starting estimate for the minimization.
args : tuple
Any extra arguments to func are placed in this tuple.
Dfun : callable
A function or method to compute the Jacobian of func with derivatives
across the rows. If this is None, the Jacobian will be estimated.
full_output : bool
non-zero to return all optional outputs.
col_deriv : bool
non-zero to specify that the Jacobian function computes derivatives
down the columns (faster, because there is no transpose operation).
ftol : float
Relative error desired in the sum of squares.
xtol : float
Relative error desired in the approximate solution.
gtol : float
Orthogonality desired between the function vector and the columns of
the Jacobian.
maxfev : int
The maximum number of calls to the function. If zero, then 100*(N+1) is
the maximum where N is the number of elements in x0.
epsfcn : float
A suitable step length for the forward-difference approximation of the
Jacobian (for Dfun=None). If epsfcn is less than the machine precision,
it is assumed that the relative errors in the functions are of the
order of the machine precision.
factor : float
A parameter determining the initial step bound
(``factor * || diag * x||``). Should be in interval ``(0.1, 100)``.
diag : sequence
N positive entries that serve as a scale factors for the variables.
Returns
-------
x : ndarray
The solution (or the result of the last iteration for an unsuccessful
call).
cov_x : ndarray
Uses the fjac and ipvt optional outputs to construct an
estimate of the jacobian around the solution. None if a
singular matrix encountered (indicates very flat curvature in
some direction). This matrix must be multiplied by the
residual variance to get the covariance of the
parameter estimates -- see curve_fit.
infodict : dict
a dictionary of optional outputs with the key s:
``nfev``
The number of function calls
``fvec``
The function evaluated at the output
``fjac``
A permutation of the R matrix of a QR
factorization of the final approximate
Jacobian matrix, stored column wise.
Together with ipvt, the covariance of the
estimate can be approximated.
``ipvt``
An integer array of length N which defines
a permutation matrix, p, such that
fjac*p = q*r, where r is upper triangular
with diagonal elements of nonincreasing
magnitude. Column j of p is column ipvt(j)
of the identity matrix.
``qtf``
The vector (transpose(q) * fvec).
mesg : str
A string message giving information about the cause of failure.
ier : int
An integer flag. If it is equal to 1, 2, 3 or 4, the solution was
found. Otherwise, the solution was not found. In either case, the
optional output variable 'mesg' gives more information.
Notes
-----
"leastsq" is a wrapper around MINPACK's lmdif and lmder algorithms.
cov_x is a Jacobian approximation to the Hessian of the least squares
objective function.
This approximation assumes that the objective function is based on the
difference between some observed target data (ydata) and a (non-linear)
function of the parameters `f(xdata, params)` ::
func(params) = ydata - f(xdata, params)
so that the objective function is ::
min sum((ydata - f(xdata, params))**2, axis=0)
params
line_search = line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None, old_old_fval=None, args=(), c1=0.0001, c2=0.9, amax=50)
Find alpha that satisfies strong Wolfe conditions.
Parameters
----------
f : callable f(x,*args)
Objective function.
myfprime : callable f'(x,*args)
Objective function gradient.
xk : ndarray
Starting point.
pk : ndarray
Search direction.
gfk : ndarray, optional
Gradient value for x=xk (xk being the current parameter
estimate). Will be recomputed if omitted.
old_fval : float, optional
Function value for x=xk. Will be recomputed if omitted.
old_old_fval : float, optional
Function value for the point preceding x=xk
args : tuple, optional
Additional arguments passed to objective function.
c1 : float, optional
Parameter for Armijo condition rule.
c2 : float, optional
Parameter for curvature condition rule.
Returns
-------
alpha0 : float
Alpha for which ``x_new = x0 + alpha * pk``.
fc : int
Number of function evaluations made.
gc : int
Number of gradient evaluations made.
Notes
-----
Uses the line search algorithm to enforce strong Wolfe
conditions. See Wright and Nocedal, 'Numerical Optimization',
1999, pg. 59-60.
For the zoom phase it uses an algorithm by [...].
linearmixing(F, xin, iter=None, alpha=None, verbose=False, maxiter=None, f_tol=None, f_rtol=None, x_tol=None, x_rtol=None, tol_norm=None, line_search='armijo', callback=None, **kw)
Find a root of a function, using a scalar Jacobian approximation.
.. warning::
This algorithm may be useful for specific problems, but whether
it will work may depend strongly on the problem.
Parameters
----------
F : function(x) -> f
Function whose root to find; should take and return an array-like
object.
x0 : array_like
Initial guess for the solution
alpha : float, optional
The Jacobian approximation is (-1/alpha).
iter : int, optional
Number of iterations to make. If omitted (default), make as many
as required to meet tolerances.
verbose : bool, optional
Print status to stdout on every iteration.
maxiter : int, optional
Maximum number of iterations to make. If more are needed to
meet convergence, `NoConvergence` is raised.
f_tol : float, optional
Absolute tolerance (in max-norm) for the residual.
If omitted, default is 6e-6.
f_rtol : float, optional
Relative tolerance for the residual. If omitted, not used.
x_tol : float, optional
Absolute minimum step size, as determined from the Jacobian
approximation. If the step size is smaller than this, optimization
is terminated as successful. If omitted, not used.
x_rtol : float, optional
Relative minimum step size. If omitted, not used.
tol_norm : function(vector) -> scalar, optional
Norm to use in convergence check. Default is the maximum norm.
line_search : {None, 'armijo' (default), 'wolfe'}, optional
Which type of a line search to use to determine the step size in the
direction given by the Jacobian approximation. Defaults to 'armijo'.
callback : function, optional
Optional callback function. It is called on every iteration as
``callback(x, f)`` where `x` is the current solution and `f`
the corresponding residual.
Returns
-------
sol : ndarray
An array (of similar array type as `x0`) containing the final solution.
Raises
------
NoConvergence
When a solution was not found.
minimize(fun, x0, args=(), method='BFGS', jac=None, hess=None, hessp=None, bounds=None, constraints=(), tol=None, callback=None, options=None)
Minimization of scalar function of one or more variables.
.. versionadded:: 0.11.0
Parameters
----------
fun : callable
Objective function.
x0 : ndarray
Initial guess.
args : tuple, optional
Extra arguments passed to the objective function and its
derivatives (Jacobian, Hessian).
method : str, optional
Type of solver. Should be one of
- 'Nelder-Mead'
- 'Powell'
- 'CG'
- 'BFGS'
- 'Newton-CG'
- 'Anneal'
- 'L-BFGS-B'
- 'TNC'
- 'COBYLA'
- 'SLSQP'
- 'dogleg'
- 'trust-ncg'
jac : bool or callable, optional
Jacobian of objective function. Only for CG, BFGS, Newton-CG,
dogleg, trust-ncg.
If `jac` is a Boolean and is True, `fun` is assumed to return the
value of Jacobian along with the objective function. If False, the
Jacobian will be estimated numerically.
`jac` can also be a callable returning the Jacobian of the
objective. In this case, it must accept the same arguments as `fun`.
hess, hessp : callable, optional
Hessian of objective function or Hessian of objective function
times an arbitrary vector p. Only for Newton-CG,
dogleg, trust-ncg.
Only one of `hessp` or `hess` needs to be given. If `hess` is
provided, then `hessp` will be ignored. If neither `hess` nor
`hessp` is provided, then the hessian product will be approximated
using finite differences on `jac`. `hessp` must compute the Hessian
times an arbitrary vector.
bounds : sequence, optional
Bounds for variables (only for L-BFGS-B, TNC and SLSQP).
``(min, max)`` pairs for each element in ``x``, defining
the bounds on that parameter. Use None for one of ``min`` or
``max`` when there is no bound in that direction.
constraints : dict or sequence of dict, optional
Constraints definition (only for COBYLA and SLSQP).
Each constraint is defined in a dictionary with fields:
type : str
Constraint type: 'eq' for equality, 'ineq' for inequality.
fun : callable
The function defining the constraint.
jac : callable, optional
The Jacobian of `fun` (only for SLSQP).
args : sequence, optional
Extra arguments to be passed to the function and Jacobian.
Equality constraint means that the constraint function result is to
be zero whereas inequality means that it is to be non-negative.
Note that COBYLA only supports inequality constraints.
tol : float, optional
Tolerance for termination. For detailed control, use solver-specific
options.
options : dict, optional
A dictionary of solver options. All methods accept the following
generic options:
maxiter : int
Maximum number of iterations to perform.
disp : bool
Set to True to print convergence messages.
For method-specific options, see `show_options('minimize', method)`.
callback : callable, optional
Called after each iteration, as ``callback(xk)``, where ``xk`` is the
current parameter vector.
Returns
-------
res : Result
The optimization result represented as a ``Result`` object.
Important attributes are: ``x`` the solution array, ``success`` a
Boolean flag indicating if the optimizer exited successfully and
``message`` which describes the cause of the termination. See
`Result` for a description of other attributes.
See also
--------
minimize_scalar: Interface to minimization algorithms for scalar
univariate functions.
Notes
-----
This section describes the available solvers that can be selected by the
'method' parameter. The default method is *BFGS*.
**Unconstrained minimization**
Method *Nelder-Mead* uses the Simplex algorithm [1]_, [2]_. This
algorithm has been successful in many applications but other algorithms
using the first and/or second derivatives information might be preferred
for their better performances and robustness in general.
Method *Powell* is a modification of Powell's method [3]_, [4]_ which
is a conjugate direction method. It performs sequential one-dimensional
minimizations along each vector of the directions set (`direc` field in
`options` and `info`), which is updated at each iteration of the main
minimization loop. The function need not be differentiable, and no
derivatives are taken.
Method *CG* uses a nonlinear conjugate gradient algorithm by Polak and
Ribiere, a variant of the Fletcher-Reeves method described in [5]_ pp.
120-122. Only the first derivatives are used.
Method *BFGS* uses the quasi-Newton method of Broyden, Fletcher,
Goldfarb, and Shanno (BFGS) [5]_ pp. 136. It uses the first derivatives
only. BFGS has proven good performance even for non-smooth
optimizations. This method also returns an approximation of the Hessian
inverse, stored as `hess_inv` in the Result object.
Method *Newton-CG* uses a Newton-CG algorithm [5]_ pp. 168 (also known
as the truncated Newton method). It uses a CG method to the compute the
search direction. See also *TNC* method for a box-constrained
minimization with a similar algorithm.
Method *Anneal* uses simulated annealing, which is a probabilistic
metaheuristic algorithm for global optimization. It uses no derivative
information from the function being optimized.
Method *dogleg* uses the dog-leg trust-region algorithm [5]_
for unconstrained minimization. This algorithm requires the gradient
and Hessian; furthermore the Hessian is required to be positive definite.
Method *trust-ncg* uses the Newton conjugate gradient trust-region
algorithm [5]_ for unconstrained minimization. This algorithm requires
the gradient and either the Hessian or a function that computes the
product of the Hessian with a given vector.
**Constrained minimization**
Method *L-BFGS-B* uses the L-BFGS-B algorithm [6]_, [7]_ for bound
constrained minimization.
Method *TNC* uses a truncated Newton algorithm [5]_, [8]_ to minimize a
function with variables subject to bounds. This algorithm uses
gradient information; it is also called Newton Conjugate-Gradient. It
differs from the *Newton-CG* method described above as it wraps a C
implementation and allows each variable to be given upper and lower
bounds.
Method *COBYLA* uses the Constrained Optimization BY Linear
Approximation (COBYLA) method [9]_, [10]_, [11]_. The algorithm is
based on linear approximations to the objective function and each
constraint. The method wraps a FORTRAN implementation of the algorithm.
Method *SLSQP* uses Sequential Least SQuares Programming to minimize a
function of several variables with any combination of bounds, equality
and inequality constraints. The method wraps the SLSQP Optimization
subroutine originally implemented by Dieter Kraft [12]_. Note that the
wrapper handles infinite values in bounds by converting them into large
floating values.
References
----------
.. [1] Nelder, J A, and R Mead. 1965. A Simplex Method for Function
Minimization. The Computer Journal 7: 308-13.
.. [2] Wright M H. 1996. Direct search methods: Once scorned, now
respectable, in Numerical Analysis 1995: Proceedings of the 1995
Dundee Biennial Conference in Numerical Analysis (Eds. D F
Griffiths and G A Watson). Addison Wesley Longman, Harlow, UK.
191-208.
.. [3] Powell, M J D. 1964. An efficient method for finding the minimum of
a function of several variables without calculating derivatives. The
Computer Journal 7: 155-162.
.. [4] Press W, S A Teukolsky, W T Vetterling and B P Flannery.
Numerical Recipes (any edition), Cambridge University Press.
.. [5] Nocedal, J, and S J Wright. 2006. Numerical Optimization.
Springer New York.
.. [6] Byrd, R H and P Lu and J. Nocedal. 1995. A Limited Memory
Algorithm for Bound Constrained Optimization. SIAM Journal on
Scientific and Statistical Computing 16 (5): 1190-1208.
.. [7] Zhu, C and R H Byrd and J Nocedal. 1997. L-BFGS-B: Algorithm
778: L-BFGS-B, FORTRAN routines for large scale bound constrained
optimization. ACM Transactions on Mathematical Software 23 (4):
550-560.
.. [8] Nash, S G. Newton-Type Minimization Via the Lanczos Method.
1984. SIAM Journal of Numerical Analysis 21: 770-778.
.. [9] Powell, M J D. A direct search optimization method that models
the objective and constraint functions by linear interpolation.
1994. Advances in Optimization and Numerical Analysis, eds. S. Gomez
and J-P Hennart, Kluwer Academic (Dordrecht), 51-67.
.. [10] Powell M J D. Direct search algorithms for optimization
calculations. 1998. Acta Numerica 7: 287-336.
.. [11] Powell M J D. A view of algorithms for optimization without
derivatives. 2007.Cambridge University Technical Report DAMTP
2007/NA03
.. [12] Kraft, D. A software package for sequential quadratic
programming. 1988. Tech. Rep. DFVLR-FB 88-28, DLR German Aerospace
Center -- Institute for Flight Mechanics, Koln, Germany.
Examples
--------
Let us consider the problem of minimizing the Rosenbrock function. This
function (and its respective derivatives) is implemented in `rosen`
(resp. `rosen_der`, `rosen_hess`) in the `scipy.optimize`.
>>> from scipy.optimize import minimize, rosen, rosen_der
A simple application of the *Nelder-Mead* method is:
>>> x0 = [1.3, 0.7, 0.8, 1.9, 1.2]
>>> res = minimize(rosen, x0, method='Nelder-Mead')
>>> res.x
[ 1. 1. 1. 1. 1.]
Now using the *BFGS* algorithm, using the first derivative and a few
options:
>>> res = minimize(rosen, x0, method='BFGS', jac=rosen_der,
... options={'gtol': 1e-6, 'disp': True})
Optimization terminated successfully.
Current function value: 0.000000
Iterations: 52
Function evaluations: 64
Gradient evaluations: 64
>>> res.x
[ 1. 1. 1. 1. 1.]
>>> print res.message
Optimization terminated successfully.
>>> res.hess
[[ 0.00749589 0.01255155 0.02396251 0.04750988 0.09495377]
[ 0.01255155 0.02510441 0.04794055 0.09502834 0.18996269]
[ 0.02396251 0.04794055 0.09631614 0.19092151 0.38165151]
[ 0.04750988 0.09502834 0.19092151 0.38341252 0.7664427 ]
[ 0.09495377 0.18996269 0.38165151 0.7664427 1.53713523]]
Next, consider a minimization problem with several constraints (namely
Example 16.4 from [5]_). The objective function is:
>>> fun = lambda x: (x[0] - 1)**2 + (x[1] - 2.5)**2
There are three constraints defined as:
>>> cons = ({'type': 'ineq', 'fun': lambda x: x[0] - 2 * x[1] + 2},
... {'type': 'ineq', 'fun': lambda x: -x[0] - 2 * x[1] + 6},
... {'type': 'ineq', 'fun': lambda x: -x[0] + 2 * x[1] + 2})
And variables must be positive, hence the following bounds:
>>> bnds = ((0, None), (0, None))
The optimization problem is solved using the SLSQP method as:
>>> res = minimize(fun, (2, 0), method='SLSQP', bounds=bnds,
... constraints=cons)
It should converge to the theoretical solution (1.4 ,1.7).
minimize_scalar(fun, bracket=None, bounds=None, args=(), method='brent', tol=None, options=None)
Minimization of scalar function of one variable.
.. versionadded:: 0.11.0
Parameters
----------
fun : callable
Objective function.
Scalar function, must return a scalar.
bracket : sequence, optional
For methods 'brent' and 'golden', `bracket` defines the bracketing
interval and can either have three items `(a, b, c)` so that `a < b
< c` and `fun(b) < fun(a), fun(c)` or two items `a` and `c` which
are assumed to be a starting interval for a downhill bracket search
(see `bracket`); it doesn't always mean that the obtained solution
will satisfy `a <= x <= c`.
bounds : sequence, optional
For method 'bounded', `bounds` is mandatory and must have two items
corresponding to the optimization bounds.
args : tuple, optional
Extra arguments passed to the objective function.
method : str, optional
Type of solver. Should be one of
- 'Brent'
- 'Bounded'
- 'Golden'
tol : float, optional
Tolerance for termination. For detailed control, use solver-specific
options.
options : dict, optional
A dictionary of solver options.
xtol : float
Relative error in solution `xopt` acceptable for
convergence.
maxiter : int
Maximum number of iterations to perform.
disp : bool
Set to True to print convergence messages.
Returns
-------
res : Result
The optimization result represented as a ``Result`` object.
Important attributes are: ``x`` the solution array, ``success`` a
Boolean flag indicating if the optimizer exited successfully and
``message`` which describes the cause of the termination. See
`Result` for a description of other attributes.
See also
--------
minimize: Interface to minimization algorithms for scalar multivariate
functions.
Notes
-----
This section describes the available solvers that can be selected by the
'method' parameter. The default method is *Brent*.
Method *Brent* uses Brent's algorithm to find a local minimum.
The algorithm uses inverse parabolic interpolation when possible to
speed up convergence of the golden section method.
Method *Golden* uses the golden section search technique. It uses
analog of the bisection method to decrease the bracketed interval. It
is usually preferable to use the *Brent* method.
Method *Bounded* can perform bounded minimization. It uses the Brent
method to find a local minimum in the interval x1 < xopt < x2.
Examples
--------
Consider the problem of minimizing the following function.
>>> def f(x):
... return (x - 2) * x * (x + 2)**2
Using the *Brent* method, we find the local minimum as:
>>> from scipy.optimize import minimize_scalar
>>> res = minimize_scalar(f)
>>> res.x
1.28077640403
Using the *Bounded* method, we find a local minimum with specified
bounds as:
>>> res = minimize_scalar(f, bounds=(-3, -1), method='bounded')
>>> res.x
-2.0000002026
newton(func, x0, fprime=None, args=(), tol=1.48e-08, maxiter=50, fprime2=None)
Find a zero using the Newton-Raphson or secant method.
Find a zero of the function `func` given a nearby starting point `x0`.
The Newton-Raphson method is used if the derivative `fprime` of `func`
is provided, otherwise the secant method is used. If the second order
derivate `fprime2` of `func` is provided, parabolic Halley's method
is used.
Parameters
----------
func : function
The function whose zero is wanted. It must be a function of a
single variable of the form f(x,a,b,c...), where a,b,c... are extra
arguments that can be passed in the `args` parameter.
x0 : float
An initial estimate of the zero that should be somewhere near the
actual zero.
fprime : function, optional
The derivative of the function when available and convenient. If it
is None (default), then the secant method is used.
args : tuple, optional
Extra arguments to be used in the function call.
tol : float, optional
The allowable error of the zero value.
maxiter : int, optional
Maximum number of iterations.
fprime2 : function, optional
The second order derivative of the function when available and
convenient. If it is None (default), then the normal Newton-Raphson
or the secant method is used. If it is given, parabolic Halley's
method is used.
Returns
-------
zero : float
Estimated location where function is zero.
See Also
--------
brentq, brenth, ridder, bisect
fsolve : find zeroes in n dimensions.
Notes
-----
The convergence rate of the Newton-Raphson method is quadratic,
the Halley method is cubic, and the secant method is
sub-quadratic. This means that if the function is well behaved
the actual error in the estimated zero is approximately the square
(cube for Halley) of the requested tolerance up to roundoff
error. However, the stopping criterion used here is the step size
and there is no guarantee that a zero has been found. Consequently
the result should be verified. Safer algorithms are brentq,
brenth, ridder, and bisect, but they all require that the root
first be bracketed in an interval where the function changes
sign. The brentq algorithm is recommended for general use in one
dimensional problems when such an interval has been found.
newton_krylov(F, xin, iter=None, rdiff=None, method='lgmres', inner_maxiter=20, inner_M=None, outer_k=10, verbose=False, maxiter=None, f_tol=None, f_rtol=None, x_tol=None, x_rtol=None, tol_norm=None, line_search='armijo', callback=None, **kw)
Find a root of a function, using Krylov approximation for inverse Jacobian.
This method is suitable for solving large-scale problems.
Parameters
----------
F : function(x) -> f
Function whose root to find; should take and return an array-like
object.
x0 : array_like
Initial guess for the solution
rdiff : float, optional
Relative step size to use in numerical differentiation.
method : {'lgmres', 'gmres', 'bicgstab', 'cgs', 'minres'} or function
Krylov method to use to approximate the Jacobian.
Can be a string, or a function implementing the same interface as
the iterative solvers in `scipy.sparse.linalg`.
The default is `scipy.sparse.linalg.lgmres`.
inner_M : LinearOperator or InverseJacobian
Preconditioner for the inner Krylov iteration.
Note that you can use also inverse Jacobians as (adaptive)
preconditioners. For example,
>>> jac = BroydenFirst()
>>> kjac = KrylovJacobian(inner_M=jac.inverse).
If the preconditioner has a method named 'update', it will be called
as ``update(x, f)`` after each nonlinear step, with ``x`` giving
the current point, and ``f`` the current function value.
inner_tol, inner_maxiter, ...
Parameters to pass on to the \"inner\" Krylov solver.
See `scipy.sparse.linalg.gmres` for details.
outer_k : int, optional
Size of the subspace kept across LGMRES nonlinear iterations.
See `scipy.sparse.linalg.lgmres` for details.
iter : int, optional
Number of iterations to make. If omitted (default), make as many
as required to meet tolerances.
verbose : bool, optional
Print status to stdout on every iteration.
maxiter : int, optional
Maximum number of iterations to make. If more are needed to
meet convergence, `NoConvergence` is raised.
f_tol : float, optional
Absolute tolerance (in max-norm) for the residual.
If omitted, default is 6e-6.
f_rtol : float, optional
Relative tolerance for the residual. If omitted, not used.
x_tol : float, optional
Absolute minimum step size, as determined from the Jacobian
approximation. If the step size is smaller than this, optimization
is terminated as successful. If omitted, not used.
x_rtol : float, optional
Relative minimum step size. If omitted, not used.
tol_norm : function(vector) -> scalar, optional
Norm to use in convergence check. Default is the maximum norm.
line_search : {None, 'armijo' (default), 'wolfe'}, optional
Which type of a line search to use to determine the step size in the
direction given by the Jacobian approximation. Defaults to 'armijo'.
callback : function, optional
Optional callback function. It is called on every iteration as
``callback(x, f)`` where `x` is the current solution and `f`
the corresponding residual.
Returns
-------
sol : ndarray
An array (of similar array type as `x0`) containing the final solution.
Raises
------
NoConvergence
When a solution was not found.
See Also
--------
scipy.sparse.linalg.gmres
scipy.sparse.linalg.lgmres
Notes
-----
This function implements a Newton-Krylov solver. The basic idea is
to compute the inverse of the Jacobian with an iterative Krylov
method. These methods require only evaluating the Jacobian-vector
products, which are conveniently approximated by numerical
differentiation:
.. math:: J v \approx (f(x + \omega*v/|v|) - f(x)) / \omega
Due to the use of iterative matrix inverses, these methods can
deal with large nonlinear problems.
Scipy's `scipy.sparse.linalg` module offers a selection of Krylov
solvers to choose from. The default here is `lgmres`, which is a
variant of restarted GMRES iteration that reuses some of the
information obtained in the previous Newton steps to invert
Jacobians in subsequent steps.
For a review on Newton-Krylov methods, see for example [KK]_,
and for the LGMRES sparse inverse method, see [BJM]_.
References
----------
.. [KK] D.A. Knoll and D.E. Keyes, J. Comp. Phys. 193, 357 (2003).
.. [BJM] A.H. Baker and E.R. Jessup and T. Manteuffel,
SIAM J. Matrix Anal. Appl. 26, 962 (2005).
nnls(A, b)
Solve ``argmin_x || Ax - b ||_2`` for ``x>=0``. This is a wrapper
for a FORTAN non-negative least squares solver.
Parameters
----------
A : ndarray
Matrix ``A`` as shown above.
b : ndarray
Right-hand side vector.
Returns
-------
x : ndarray
Solution vector.
rnorm : float
The residual, ``|| Ax-b ||_2``.
Notes
-----
The FORTRAN code was published in the book below. The algorithm
is an active set method. It solves the KKT (Karush-Kuhn-Tucker)
conditions for the non-negative least squares problem.
References
----------
Lawson C., Hanson R.J., (1987) Solving Least Squares Problems, SIAM
ridder(f, a, b, args=(), xtol=1e-12, rtol=4.4408920985006262e-16, maxiter=100, full_output=False, disp=True)
Find a root of a function in an interval.
Parameters
----------
f : function
Python function returning a number. f must be continuous, and f(a) and
f(b) must have opposite signs.
a : number
One end of the bracketing interval [a,b].
b : number
The other end of the bracketing interval [a,b].
xtol : number, optional
The routine converges when a root is known to lie within xtol of the
value return. Should be >= 0. The routine modifies this to take into
account the relative precision of doubles.
rtol : number, optional
The routine converges when a root is known to lie within `rtol` times
the value returned of the value returned. Should be >= 0. Defaults to
``np.finfo(float).eps * 2``.
maxiter : number, optional
if convergence is not achieved in maxiter iterations, and error is
raised. Must be >= 0.
args : tuple, optional
containing extra arguments for the function `f`.
`f` is called by ``apply(f, (x)+args)``.
full_output : bool, optional
If `full_output` is False, the root is returned. If `full_output` is
True, the return value is ``(x, r)``, where `x` is the root, and `r` is
a RootResults object.
disp : bool, optional
If True, raise RuntimeError if the algorithm didn't converge.
Returns
-------
x0 : float
Zero of `f` between `a` and `b`.
r : RootResults (present if ``full_output = True``)
Object containing information about the convergence.
In particular, ``r.converged`` is True if the routine converged.
See Also
--------
brentq, brenth, bisect, newton : one-dimensional root-finding
fixed_point : scalar fixed-point finder
Notes
-----
Uses [Ridders1979]_ method to find a zero of the function `f` between the
arguments `a` and `b`. Ridders' method is faster than bisection, but not
generally as fast as the Brent rountines. [Ridders1979]_ provides the
classic description and source of the algorithm. A description can also be
found in any recent edition of Numerical Recipes.
The routine used here diverges slightly from standard presentations in
order to be a bit more careful of tolerance.
References
----------
.. [Ridders1979]
Ridders, C. F. J. "A New Algorithm for Computing a
Single Root of a Real Continuous Function."
IEEE Trans. Circuits Systems 26, 979-980, 1979.
root(fun, x0, args=(), method='hybr', jac=None, tol=None, callback=None, options=None)
Find a root of a vector function.
.. versionadded:: 0.11.0
Parameters
----------
fun : callable
A vector function to find a root of.
x0 : ndarray
Initial guess.
args : tuple, optional
Extra arguments passed to the objective function and its Jacobian.
method : str, optional
Type of solver. Should be one of
- 'hybr'
- 'lm'
- 'broyden1'
- 'broyden2'
- 'anderson'
- 'linearmixing'
- 'diagbroyden'
- 'excitingmixing'
- 'krylov'
jac : bool or callable, optional
If `jac` is a Boolean and is True, `fun` is assumed to return the
value of Jacobian along with the objective function. If False, the
Jacobian will be estimated numerically.
`jac` can also be a callable returning the Jacobian of `fun`. In
this case, it must accept the same arguments as `fun`.
tol : float, optional
Tolerance for termination. For detailed control, use solver-specific
options.
callback : function, optional
Optional callback function. It is called on every iteration as
``callback(x, f)`` where `x` is the current solution and `f`
the corresponding residual. For all methods but 'hybr' and 'lm'.
options : dict, optional
A dictionary of solver options. E.g. `xtol` or `maxiter`, see
``show_options('root', method)`` for details.
Returns
-------
sol : Result
The solution represented as a ``Result`` object.
Important attributes are: ``x`` the solution array, ``success`` a
Boolean flag indicating if the algorithm exited successfully and
``message`` which describes the cause of the termination. See
`Result` for a description of other attributes.
Notes
-----
This section describes the available solvers that can be selected by the
'method' parameter. The default method is *hybr*.
Method *hybr* uses a modification of the Powell hybrid method as
implemented in MINPACK [1]_.
Method *lm* solves the system of nonlinear equations in a least squares
sense using a modification of the Levenberg-Marquardt algorithm as
implemented in MINPACK [1]_.
Methods *broyden1*, *broyden2*, *anderson*, *linearmixing*,
*diagbroyden*, *excitingmixing*, *krylov* are inexact Newton methods,
with backtracking or full line searches [2]_. Each method corresponds
to a particular Jacobian approximations. See `nonlin` for details.
- Method *broyden1* uses Broyden's first Jacobian approximation, it is
known as Broyden's good method.
- Method *broyden2* uses Broyden's second Jacobian approximation, it
is known as Broyden's bad method.
- Method *anderson* uses (extended) Anderson mixing.
- Method *Krylov* uses Krylov approximation for inverse Jacobian. It
is suitable for large-scale problem.
- Method *diagbroyden* uses diagonal Broyden Jacobian approximation.
- Method *linearmixing* uses a scalar Jacobian approximation.
- Method *excitingmixing* uses a tuned diagonal Jacobian
approximation.
.. warning::
The algorithms implemented for methods *diagbroyden*,
*linearmixing* and *excitingmixing* may be useful for specific
problems, but whether they will work may depend strongly on the
problem.
References
----------
.. [1] More, Jorge J., Burton S. Garbow, and Kenneth E. Hillstrom.
1980. User Guide for MINPACK-1.
.. [2] C. T. Kelley. 1995. Iterative Methods for Linear and Nonlinear
Equations. Society for Industrial and Applied Mathematics.
<http://www.siam.org/books/kelley/>
Examples
--------
The following functions define a system of nonlinear equations and its
jacobian.
>>> def fun(x):
... return [x[0] + 0.5 * (x[0] - x[1])**3 - 1.0,
... 0.5 * (x[1] - x[0])**3 + x[1]]
>>> def jac(x):
... return np.array([[1 + 1.5 * (x[0] - x[1])**2,
... -1.5 * (x[0] - x[1])**2],
... [-1.5 * (x[1] - x[0])**2,
... 1 + 1.5 * (x[1] - x[0])**2]])
A solution can be obtained as follows.
>>> from scipy import optimize
>>> sol = optimize.root(fun, [0, 0], jac=jac, method='hybr')
>>> sol.x
array([ 0.8411639, 0.1588361])
rosen(x)
The Rosenbrock function.
The function computed is::
sum(100.0*(x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0
Parameters
----------
x : array_like
1-D array of points at which the Rosenbrock function is to be computed.
Returns
-------
f : float
The value of the Rosenbrock function.
See Also
--------
rosen_der, rosen_hess, rosen_hess_prod
rosen_der(x)
The derivative (i.e. gradient) of the Rosenbrock function.
Parameters
----------
x : array_like
1-D array of points at which the derivative is to be computed.
Returns
-------
rosen_der : (N,) ndarray
The gradient of the Rosenbrock function at `x`.
See Also
--------
rosen, rosen_hess, rosen_hess_prod
rosen_hess(x)
The Hessian matrix of the Rosenbrock function.
Parameters
----------
x : array_like
1-D array of points at which the Hessian matrix is to be computed.
Returns
-------
rosen_hess : ndarray
The Hessian matrix of the Rosenbrock function at `x`.
See Also
--------
rosen, rosen_der, rosen_hess_prod
rosen_hess_prod(x, p)
Product of the Hessian matrix of the Rosenbrock function with a vector.
Parameters
----------
x : array_like
1-D array of points at which the Hessian matrix is to be computed.
p : array_like
1-D array, the vector to be multiplied by the Hessian matrix.
Returns
-------
rosen_hess_prod : ndarray
The Hessian matrix of the Rosenbrock function at `x` multiplied
by the vector `p`.
See Also
--------
rosen, rosen_der, rosen_hess
show_options(solver, method=None)
Show documentation for additional options of optimization solvers.
These are method-specific options that can be supplied through the
``options`` dict.
Parameters
----------
solver : str
Type of optimization solver. One of {`minimize`, `root`}.
method : str, optional
If not given, shows all methods of the specified solver. Otherwise,
show only the options for the specified method. Valid values
corresponds to methods' names of respective solver (e.g. 'BFGS' for
'minimize').
Notes
-----
** minimize options
* BFGS options:
gtol : float
Gradient norm must be less than `gtol` before successful
termination.
norm : float
Order of norm (Inf is max, -Inf is min).
eps : float or ndarray
If `jac` is approximated, use this value for the step size.
* Nelder-Mead options:
xtol : float
Relative error in solution `xopt` acceptable for convergence.
ftol : float
Relative error in ``fun(xopt)`` acceptable for convergence.
maxfev : int
Maximum number of function evaluations to make.
* Newton-CG options:
xtol : float
Average relative error in solution `xopt` acceptable for
convergence.
eps : float or ndarray
If `jac` is approximated, use this value for the step size.
* CG options:
gtol : float
Gradient norm must be less than `gtol` before successful
termination.
norm : float
Order of norm (Inf is max, -Inf is min).
eps : float or ndarray
If `jac` is approximated, use this value for the step size.
* Powell options:
xtol : float
Relative error in solution `xopt` acceptable for convergence.
ftol : float
Relative error in ``fun(xopt)`` acceptable for convergence.
maxfev : int
Maximum number of function evaluations to make.
direc : ndarray
Initial set of direction vectors for the Powell method.
* Anneal options:
ftol : float
Relative error in ``fun(x)`` acceptable for convergence.
schedule : str
Annealing schedule to use. One of: 'fast', 'cauchy' or
'boltzmann'.
T0 : float
Initial Temperature (estimated as 1.2 times the largest
cost-function deviation over random points in the range).
Tf : float
Final goal temperature.
maxfev : int
Maximum number of function evaluations to make.
maxaccept : int
Maximum changes to accept.
boltzmann : float
Boltzmann constant in acceptance test (increase for less
stringent test at each temperature).
learn_rate : float
Scale constant for adjusting guesses.
quench, m, n : float
Parameters to alter fast_sa schedule.
lower, upper : float or ndarray
Lower and upper bounds on `x`.
dwell : int
The number of times to search the space at each temperature.
* L-BFGS-B options:
ftol : float
The iteration stops when ``(f^k -
f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``.
gtol : float
The iteration will stop when ``max{|proj g_i | i = 1, ..., n}
<= gtol`` where ``pg_i`` is the i-th component of the
projected gradient.
maxcor : int
The maximum number of variable metric corrections used to
define the limited memory matrix. (The limited memory BFGS
method does not store the full hessian but uses this many terms
in an approximation to it.)
maxiter : int
Maximum number of function evaluations.
* TNC options:
ftol : float
Precision goal for the value of f in the stoping criterion.
If ftol < 0.0, ftol is set to 0.0 defaults to -1.
xtol : float
Precision goal for the value of x in the stopping
criterion (after applying x scaling factors). If xtol <
0.0, xtol is set to sqrt(machine_precision). Defaults to
-1.
gtol : float
Precision goal for the value of the projected gradient in
the stopping criterion (after applying x scaling factors).
If gtol < 0.0, gtol is set to 1e-2 * sqrt(accuracy).
Setting it to 0.0 is not recommended. Defaults to -1.
scale : list of floats
Scaling factors to apply to each variable. If None, the
factors are up-low for interval bounded variables and
1+|x] fo the others. Defaults to None
offset : float
Value to substract from each variable. If None, the
offsets are (up+low)/2 for interval bounded variables
and x for the others.
maxCGit : int
Maximum number of hessian*vector evaluations per main
iteration. If maxCGit == 0, the direction chosen is
-gradient if maxCGit < 0, maxCGit is set to
max(1,min(50,n/2)). Defaults to -1.
maxiter : int
Maximum number of function evaluation. if None, `maxiter` is
set to max(100, 10*len(x0)). Defaults to None.
eta : float
Severity of the line search. if < 0 or > 1, set to 0.25.
Defaults to -1.
stepmx : float
Maximum step for the line search. May be increased during
call. If too small, it will be set to 10.0. Defaults to 0.
accuracy : float
Relative precision for finite difference calculations. If
<= machine_precision, set to sqrt(machine_precision).
Defaults to 0.
minfev : float
Minimum function value estimate. Defaults to 0.
rescale : float
Scaling factor (in log10) used to trigger f value
rescaling. If 0, rescale at each iteration. If a large
value, never rescale. If < 0, rescale is set to 1.3.
* COBYLA options:
tol : float
Final accuracy in the optimization (not precisely guaranteed).
This is a lower bound on the size of the trust region.
rhobeg : float
Reasonable initial changes to the variables.
maxfev : int
Maximum number of function evaluations.
* SLSQP options:
ftol : float
Precision goal for the value of f in the stopping criterion.
eps : float
Step size used for numerical approximation of the jacobian.
maxiter : int
Maximum number of iterations.
* dogleg options:
initial_trust_radius : float
Initial trust-region radius.
max_trust_radius : float
Maximum value of the trust-region radius. No steps that are longer
than this value will be proposed.
eta : float
Trust region related acceptance stringency for proposed steps.
gtol : float
Gradient norm must be less than `gtol` before successful
termination.
* trust-ncg options:
see dogleg options.
** root options
* hybrd options:
col_deriv : bool
Specify whether the Jacobian function computes derivatives down
the columns (faster, because there is no transpose operation).
xtol : float
The calculation will terminate if the relative error between
two consecutive iterates is at most `xtol`.
maxfev : int
The maximum number of calls to the function. If zero, then
``100*(N+1)`` is the maximum where N is the number of elements
in `x0`.
band : sequence
If set to a two-sequence containing the number of sub- and
super-diagonals within the band of the Jacobi matrix, the
Jacobi matrix is considered banded (only for ``fprime=None``).
epsfcn : float
A suitable step length for the forward-difference approximation
of the Jacobian (for ``fprime=None``). If `epsfcn` is less than
the machine precision, it is assumed that the relative errors
in the functions are of the order of the machine precision.
factor : float
A parameter determining the initial step bound (``factor * ||
diag * x||``). Should be in the interval ``(0.1, 100)``.
diag : sequence
N positive entries that serve as a scale factors for the
variables.
* LM options:
col_deriv : bool
non-zero to specify that the Jacobian function computes derivatives
down the columns (faster, because there is no transpose operation).
ftol : float
Relative error desired in the sum of squares.
xtol : float
Relative error desired in the approximate solution.
gtol : float
Orthogonality desired between the function vector and the columns
of the Jacobian.
maxiter : int
The maximum number of calls to the function. If zero, then
100*(N+1) is the maximum where N is the number of elements in x0.
epsfcn : float
A suitable step length for the forward-difference approximation of
the Jacobian (for Dfun=None). If epsfcn is less than the machine
precision, it is assumed that the relative errors in the functions
are of the order of the machine precision.
factor : float
A parameter determining the initial step bound
(``factor * || diag * x||``). Should be in interval ``(0.1, 100)``.
diag : sequence
N positive entries that serve as a scale factors for the variables.
* Broyden1 options:
nit : int, optional
Number of iterations to make. If omitted (default), make as many
as required to meet tolerances.
disp : bool, optional
Print status to stdout on every iteration.
maxiter : int, optional
Maximum number of iterations to make. If more are needed to
meet convergence, `NoConvergence` is raised.
ftol : float, optional
Relative tolerance for the residual. If omitted, not used.
fatol : float, optional
Absolute tolerance (in max-norm) for the residual.
If omitted, default is 6e-6.
xtol : float, optional
Relative minimum step size. If omitted, not used.
xatol : float, optional
Absolute minimum step size, as determined from the Jacobian
approximation. If the step size is smaller than this, optimization
is terminated as successful. If omitted, not used.
tol_norm : function(vector) -> scalar, optional
Norm to use in convergence check. Default is the maximum norm.
line_search : {None, 'armijo' (default), 'wolfe'}, optional
Which type of a line search to use to determine the step size in
the direction given by the Jacobian approximation. Defaults to
'armijo'.
jac_options : dict, optional
Options for the respective Jacobian approximation.
alpha : float, optional
Initial guess for the Jacobian is (-1/alpha).
reduction_method : str or tuple, optional
Method used in ensuring that the rank of the Broyden
matrix stays low. Can either be a string giving the
name of the method, or a tuple of the form ``(method,
param1, param2, ...)`` that gives the name of the
method and values for additional parameters.
Methods available:
- ``restart``: drop all matrix columns. Has no
extra parameters.
- ``simple``: drop oldest matrix column. Has no
extra parameters.
- ``svd``: keep only the most significant SVD
components.
Extra parameters:
- ``to_retain`: number of SVD components to
retain when rank reduction is done.
Default is ``max_rank - 2``.
max_rank : int, optional
Maximum rank for the Broyden matrix.
Default is infinity (ie., no rank reduction).
* Broyden2 options:
nit : int, optional
Number of iterations to make. If omitted (default), make as many
as required to meet tolerances.
disp : bool, optional
Print status to stdout on every iteration.
maxiter : int, optional
Maximum number of iterations to make. If more are needed to
meet convergence, `NoConvergence` is raised.
ftol : float, optional
Relative tolerance for the residual. If omitted, not used.
fatol : float, optional
Absolute tolerance (in max-norm) for the residual.
If omitted, default is 6e-6.
xtol : float, optional
Relative minimum step size. If omitted, not used.
xatol : float, optional
Absolute minimum step size, as determined from the Jacobian
approximation. If the step size is smaller than this, optimization
is terminated as successful. If omitted, not used.
tol_norm : function(vector) -> scalar, optional
Norm to use in convergence check. Default is the maximum norm.
line_search : {None, 'armijo' (default), 'wolfe'}, optional
Which type of a line search to use to determine the step size in
the direction given by the Jacobian approximation. Defaults to
'armijo'.
jac_options : dict, optional
Options for the respective Jacobian approximation.
alpha : float, optional
Initial guess for the Jacobian is (-1/alpha).
reduction_method : str or tuple, optional
Method used in ensuring that the rank of the Broyden
matrix stays low. Can either be a string giving the
name of the method, or a tuple of the form ``(method,
param1, param2, ...)`` that gives the name of the
method and values for additional parameters.
Methods available:
- ``restart``: drop all matrix columns. Has no
extra parameters.
- ``simple``: drop oldest matrix column. Has no
extra parameters.
- ``svd``: keep only the most significant SVD
components.
Extra parameters:
- ``to_retain`: number of SVD components to
retain when rank reduction is done.
Default is ``max_rank - 2``.
max_rank : int, optional
Maximum rank for the Broyden matrix.
Default is infinity (ie., no rank reduction).
* Anderson options:
nit : int, optional
Number of iterations to make. If omitted (default), make as many
as required to meet tolerances.
disp : bool, optional
Print status to stdout on every iteration.
maxiter : int, optional
Maximum number of iterations to make. If more are needed to
meet convergence, `NoConvergence` is raised.
ftol : float, optional
Relative tolerance for the residual. If omitted, not used.
fatol : float, optional
Absolute tolerance (in max-norm) for the residual.
If omitted, default is 6e-6.
xtol : float, optional
Relative minimum step size. If omitted, not used.
xatol : float, optional
Absolute minimum step size, as determined from the Jacobian
approximation. If the step size is smaller than this, optimization
is terminated as successful. If omitted, not used.
tol_norm : function(vector) -> scalar, optional
Norm to use in convergence check. Default is the maximum norm.
line_search : {None, 'armijo' (default), 'wolfe'}, optional
Which type of a line search to use to determine the step size in
the direction given by the Jacobian approximation. Defaults to
'armijo'.
jac_options : dict, optional
Options for the respective Jacobian approximation.
alpha : float, optional
Initial guess for the Jacobian is (-1/alpha).
M : float, optional
Number of previous vectors to retain. Defaults to 5.
w0 : float, optional
Regularization parameter for numerical stability.
Compared to unity, good values of the order of 0.01.
* LinearMixing options:
nit : int, optional
Number of iterations to make. If omitted (default), make as many
as required to meet tolerances.
disp : bool, optional
Print status to stdout on every iteration.
maxiter : int, optional
Maximum number of iterations to make. If more are needed to
meet convergence, `NoConvergence` is raised.
ftol : float, optional
Relative tolerance for the residual. If omitted, not used.
fatol : float, optional
Absolute tolerance (in max-norm) for the residual.
If omitted, default is 6e-6.
xtol : float, optional
Relative minimum step size. If omitted, not used.
xatol : float, optional
Absolute minimum step size, as determined from the Jacobian
approximation. If the step size is smaller than this, optimization
is terminated as successful. If omitted, not used.
tol_norm : function(vector) -> scalar, optional
Norm to use in convergence check. Default is the maximum norm.
line_search : {None, 'armijo' (default), 'wolfe'}, optional
Which type of a line search to use to determine the step size in
the direction given by the Jacobian approximation. Defaults to
'armijo'.
jac_options : dict, optional
Options for the respective Jacobian approximation.
alpha : float, optional
initial guess for the jacobian is (-1/alpha).
* DiagBroyden options:
nit : int, optional
Number of iterations to make. If omitted (default), make as many
as required to meet tolerances.
disp : bool, optional
Print status to stdout on every iteration.
maxiter : int, optional
Maximum number of iterations to make. If more are needed to
meet convergence, `NoConvergence` is raised.
ftol : float, optional
Relative tolerance for the residual. If omitted, not used.
fatol : float, optional
Absolute tolerance (in max-norm) for the residual.
If omitted, default is 6e-6.
xtol : float, optional
Relative minimum step size. If omitted, not used.
xatol : float, optional
Absolute minimum step size, as determined from the Jacobian
approximation. If the step size is smaller than this, optimization
is terminated as successful. If omitted, not used.
tol_norm : function(vector) -> scalar, optional
Norm to use in convergence check. Default is the maximum norm.
line_search : {None, 'armijo' (default), 'wolfe'}, optional
Which type of a line search to use to determine the step size in
the direction given by the Jacobian approximation. Defaults to
'armijo'.
jac_options : dict, optional
Options for the respective Jacobian approximation.
alpha : float, optional
initial guess for the jacobian is (-1/alpha).
* ExcitingMixing options:
nit : int, optional
Number of iterations to make. If omitted (default), make as many
as required to meet tolerances.
disp : bool, optional
Print status to stdout on every iteration.
maxiter : int, optional
Maximum number of iterations to make. If more are needed to
meet convergence, `NoConvergence` is raised.
ftol : float, optional
Relative tolerance for the residual. If omitted, not used.
fatol : float, optional
Absolute tolerance (in max-norm) for the residual.
If omitted, default is 6e-6.
xtol : float, optional
Relative minimum step size. If omitted, not used.
xatol : float, optional
Absolute minimum step size, as determined from the Jacobian
approximation. If the step size is smaller than this, optimization
is terminated as successful. If omitted, not used.
tol_norm : function(vector) -> scalar, optional
Norm to use in convergence check. Default is the maximum norm.
line_search : {None, 'armijo' (default), 'wolfe'}, optional
Which type of a line search to use to determine the step size in
the direction given by the Jacobian approximation. Defaults to
'armijo'.
jac_options : dict, optional
Options for the respective Jacobian approximation.
alpha : float, optional
Initial Jacobian approximation is (-1/alpha).
alphamax : float, optional
The entries of the diagonal Jacobian are kept in the range
``[alpha, alphamax]``.
* Krylov options:
nit : int, optional
Number of iterations to make. If omitted (default), make as many
as required to meet tolerances.
disp : bool, optional
Print status to stdout on every iteration.
maxiter : int, optional
Maximum number of iterations to make. If more are needed to
meet convergence, `NoConvergence` is raised.
ftol : float, optional
Relative tolerance for the residual. If omitted, not used.
fatol : float, optional
Absolute tolerance (in max-norm) for the residual.
If omitted, default is 6e-6.
xtol : float, optional
Relative minimum step size. If omitted, not used.
xatol : float, optional
Absolute minimum step size, as determined from the Jacobian
approximation. If the step size is smaller than this, optimization
is terminated as successful. If omitted, not used.
tol_norm : function(vector) -> scalar, optional
Norm to use in convergence check. Default is the maximum norm.
line_search : {None, 'armijo' (default), 'wolfe'}, optional
Which type of a line search to use to determine the step size in
the direction given by the Jacobian approximation. Defaults to
'armijo'.
jac_options : dict, optional
Options for the respective Jacobian approximation.
rdiff : float, optional
Relative step size to use in numerical differentiation.
method : {'lgmres', 'gmres', 'bicgstab', 'cgs', 'minres'} or
function
Krylov method to use to approximate the Jacobian.
Can be a string, or a function implementing the same
interface as the iterative solvers in
`scipy.sparse.linalg`.
The default is `scipy.sparse.linalg.lgmres`.
inner_M : LinearOperator or InverseJacobian
Preconditioner for the inner Krylov iteration.
Note that you can use also inverse Jacobians as (adaptive)
preconditioners. For example,
>>> jac = BroydenFirst()
>>> kjac = KrylovJacobian(inner_M=jac.inverse).
If the preconditioner has a method named 'update', it will
be called as ``update(x, f)`` after each nonlinear step,
with ``x`` giving the current point, and ``f`` the current
function value.
inner_tol, inner_maxiter, ...
Parameters to pass on to the "inner" Krylov solver.
See `scipy.sparse.linalg.gmres` for details.
outer_k : int, optional
Size of the subspace kept across LGMRES nonlinear
iterations.
See `scipy.sparse.linalg.lgmres` for details.
DATA
__all__ = ['OptimizeWarning', 'Result', 'absolute_import', 'anderson',...
absolute_import = _Feature((2, 5, 0, 'alpha', 1), (3, 0, 0, 'alpha', 0...
division = _Feature((2, 2, 0, 'alpha', 2), (3, 0, 0, 'alpha', 0), 8192...
print_function = _Feature((2, 6, 0, 'alpha', 2), (3, 0, 0, 'alpha', 0)...
In [26]:
help(optimize.fmin)
Help on function fmin in module scipy.optimize.optimize:
fmin(func, x0, args=(), xtol=0.0001, ftol=0.0001, maxiter=None, maxfun=None, full_output=0, disp=1, retall=0, callback=None)
Minimize a function using the downhill simplex algorithm.
This algorithm only uses function values, not derivatives or second
derivatives.
Parameters
----------
func : callable func(x,*args)
The objective function to be minimized.
x0 : ndarray
Initial guess.
args : tuple, optional
Extra arguments passed to func, i.e. ``f(x,*args)``.
callback : callable, optional
Called after each iteration, as callback(xk), where xk is the
current parameter vector.
xtol : float, optional
Relative error in xopt acceptable for convergence.
ftol : number, optional
Relative error in func(xopt) acceptable for convergence.
maxiter : int, optional
Maximum number of iterations to perform.
maxfun : number, optional
Maximum number of function evaluations to make.
full_output : bool, optional
Set to True if fopt and warnflag outputs are desired.
disp : bool, optional
Set to True to print convergence messages.
retall : bool, optional
Set to True to return list of solutions at each iteration.
Returns
-------
xopt : ndarray
Parameter that minimizes function.
fopt : float
Value of function at minimum: ``fopt = func(xopt)``.
iter : int
Number of iterations performed.
funcalls : int
Number of function calls made.
warnflag : int
1 : Maximum number of function evaluations made.
2 : Maximum number of iterations reached.
allvecs : list
Solution at each iteration.
See also
--------
minimize: Interface to minimization algorithms for multivariate
functions. See the 'Nelder-Mead' `method` in particular.
Notes
-----
Uses a Nelder-Mead simplex algorithm to find the minimum of function of
one or more variables.
This algorithm has a long history of successful use in applications.
But it will usually be slower than an algorithm that uses first or
second derivative information. In practice it can have poor
performance in high-dimensional problems and is not robust to
minimizing complicated functions. Additionally, there currently is no
complete theory describing when the algorithm will successfully
converge to the minimum, or how fast it will if it does.
References
----------
.. [1] Nelder, J.A. and Mead, R. (1965), "A simplex method for function
minimization", The Computer Journal, 7, pp. 308-313
.. [2] Wright, M.H. (1996), "Direct Search Methods: Once Scorned, Now
Respectable", in Numerical Analysis 1995, Proceedings of the
1995 Dundee Biennial Conference in Numerical Analysis, D.F.
Griffiths and G.A. Watson (Eds.), Addison Wesley Longman,
Harlow, UK, pp. 191-208.
In [31]:
def f_test(x):
return np.dot(x,x)
In [156]:
optimize.fmin(f_test, np.array([1,2]), xtol = 1.E-10)
Optimization terminated successfully.
Current function value: 0.000000
Iterations: 85
Function evaluations: 164
Out[156]:
array([ 3.31355231e-11, -1.82983207e-11])
In [30]:
# (y_rand - A * sin(b*x))**2
In [34]:
y_rand_cut = y_rand[:10]
In [35]:
x_cut = x[:10]
In [157]:
def objective_func(A):
return sum((y_rand - A[0] * sin(A[1] * x))**2)
In [176]:
A_fit = optimize.fmin_cg(objective_func, np.array([1,0.9]))
Warning: Desired error not necessarily achieved due to precision loss.
Current function value: 84.718225
Iterations: 1
Function evaluations: 32
Gradient evaluations: 5
In [177]:
A_fit
Out[177]:
array([ 0.96965634, 1.00018575])
In [178]:
objective_func(A_fit)
Out[178]:
84.718225273137918
In [179]:
plb.plot(x, y_rand, 'b', linestyle = ':')
plb.plot(x, A_fit[0] * sin(A_fit[1] * x), 'r')
Out[179]:
[<matplotlib.lines.Line2D at 0x10b1d18d0>]
In [ ]:
# Exercise:
# Generalize the code in fitting_sine_wave.py
# to generate sample data with a phase shift
#
# y = Amp * sin(freq * (x + delta))
#
# and to fit to a curve of this form. In other
# words, you will have three parameters to be
# optimized: Amp, freq, delta, rather than just
# the first two.
Content source: mattphotonman/SWC-Yale-06-20-2014
Similar notebooks: