<h2 align="center", style="color:gray">Mike McKerns</h2>
<h3 align="center", style="color:gray">California Institute of Technology; the UQ Foundation</h3>
In [2]:
%%file global_search.py
"""
Uses Ensemble Solvers to provide 'pseudo-global' search.
"""
# the ensemble solvers
from mystic.solvers import BuckshotSolver, LatticeSolver
# the local solvers
from mystic.solvers import PowellDirectionalSolver, NelderMeadSimplexSolver
# cost function
from mystic.models import griewangk as model
# if available, use a multiprocessing worker pool
try:
from pathos.multiprocessing import ProcessingPool as Pool
except ImportError:
from mystic.python import PythonSerial as Pool
# tools
from mystic.termination import VTR, ChangeOverGeneration as COG
from mystic.termination import NormalizedChangeOverGeneration as NCOG
from mystic.monitors import LoggingMonitor, VerboseMonitor, Monitor
def solve(cost, bounds, sprayer, seeker, stop, npts, _map, id=None, disp=True):
# configure monitor
stepmon = LoggingMonitor(1)
# get dimensions
ndim = len(bounds)
_min, _max = zip(*bounds)
# solve with ensemble solver
solver = sprayer(ndim, npts)
solver.id = id
solver.SetNestedSolver(seeker)
solver.SetMapper(_map)
solver.SetGenerationMonitor(stepmon)
solver.SetStrictRanges(min=_min, max=_max)
solver.Solve(cost, stop, disp=disp)
return solver
def print_results(solver, tol=8):
for _solver in solver._allSolvers:
bestSol = tuple(round(s, tol) for s in _solver.bestSolution)
bestRes = round(_solver.bestEnergy, tol)
print (bestSol, bestRes)
def memoize(solver, archive, tol=1):
from klepto import inf_cache
from klepto.keymaps import keymap
km = keymap()
ca = inf_cache(tol=tol, ignore=('**','out'), cache=archive, keymap=km)
@ca
def memo(*args, **kwds):
return kwds['out']
for _solver in solver._allSolvers:
bestSol = tuple(_solver.bestSolution)
bestRes = float(_solver.bestEnergy)
memo(*bestSol, out=bestRes)
return memo
if __name__ == '__main__':
try:
from pathos.helpers import freeze_support
freeze_support()
except ImportError:
pass
tol = 8 # rounding precision
ndim = 2 # model dimensionality
bounds = ndim * [(-9.5,9.5)] # griewangk
npts = 25 # number of solvers
sprayer = BuckshotSolver
seeker = PowellDirectionalSolver
stop = NCOG(1e-4)
mpmap = Pool().map
disp = False # print optimization summary
retry = 1 #9 # max consectutive iteration retries without a cache 'miss'
from klepto.archives import dir_archive
ar_name = '__%s_%sD_cache__' % (model.im_class.__name__,ndim)
archive = dir_archive(ar_name, serialized=True, cached=False)
count = 0 if retry else -1 #XXX: 'rerun' much shorter... unless clear
sid = 0 # keep track of which solver is which across multiple runs
while retry > count: # stop after retry consecutive no new results
_size = -1
size = osize = len(archive) #XXX: better compare 'size' or 'len(vals)'?
while size > _size: # stop if no new results
solver = solve(model, bounds, sprayer, seeker, stop, npts, mpmap, sid, disp)
sid += len(solver._allSolvers)
# print_results(solver, tol=tol)
_size = size
info = memoize(solver, archive, tol=1).info()
print info
size = info.size
if size == osize: count = count + 1
else: count = 0
#NOTE: len(size) = # of dirs; len(vals) = # unique dirs
vals = archive.values()
_min = min(vals)
_nmin = len([v for v in vals if round(v, tol) == round(_min, tol)])
print "min: %s (count=%s)" % (_min, _nmin)
print "pts: %s (values=%s, size=%s)" % (len(vals), len(set(vals)), len(archive))
In [4]:
%%file global_search_max.py
"""
Uses Ensemble Solvers to provide 'pseudo-global' search.
"""
# the ensemble solvers
from mystic.solvers import BuckshotSolver, LatticeSolver
# the local solvers
from mystic.solvers import PowellDirectionalSolver, NelderMeadSimplexSolver
# cost function
from mystic.models import griewangk as model
invmodel = lambda *args, **kwds: -model(*args, **kwds)
# if available, use a multiprocessing worker pool
try:
from pathos.multiprocessing import ProcessingPool as Pool
except ImportError:
from mystic.python import PythonSerial as Pool
# tools
from mystic.termination import VTR, ChangeOverGeneration as COG
from mystic.termination import NormalizedChangeOverGeneration as NCOG
from mystic.monitors import LoggingMonitor, VerboseMonitor, Monitor
def solve(cost, bounds, sprayer, seeker, stop, npts, _map, id=None, disp=True):
# configure monitor
stepmon = LoggingMonitor(1, filename='inv.txt')
# get dimensions
ndim = len(bounds)
_min, _max = zip(*bounds)
# solve with ensemble solver
solver = sprayer(ndim, npts)
solver.id = id
solver.SetNestedSolver(seeker)
solver.SetMapper(_map)
solver.SetGenerationMonitor(stepmon)
solver.SetStrictRanges(min=_min, max=_max)
solver.Solve(cost, stop, disp=disp)
return solver
def print_results(solver, tol=8):
for _solver in solver._allSolvers:
bestSol = tuple(round(s, tol) for s in _solver.bestSolution)
bestRes = round(_solver.bestEnergy, tol)
print (bestSol, bestRes)
def memoize(solver, archive, tol=1):
from klepto import inf_cache
from klepto.keymaps import keymap
km = keymap()
ca = inf_cache(tol=tol, ignore=('**','out'), cache=archive, keymap=km)
@ca
def memo(*args, **kwds):
return kwds['out']
for _solver in solver._allSolvers:
bestSol = tuple(_solver.bestSolution)
bestRes = float(_solver.bestEnergy)
memo(*bestSol, out=bestRes)
return memo
if __name__ == '__main__':
try:
from pathos.helpers import freeze_support
freeze_support()
except ImportError:
pass
tol = 8 # rounding precision
ndim = 2 # model dimensionality
bounds = ndim * [(-9.5,9.5)] # griewangk
npts = 25 # number of solvers
sprayer = BuckshotSolver
seeker = PowellDirectionalSolver
stop = NCOG(1e-4)
mpmap = Pool().map
disp = False # print optimization summary
retry = 1 #9 # max consectutive iteration retries without a cache 'miss'
from klepto.archives import dir_archive
ar_name = '__%s_%sD_invcache__' % (model.im_class.__name__,ndim)
archive = dir_archive(ar_name, serialized=True, cached=False)
count = 0 if retry else -1 #XXX: 'rerun' much shorter... unless clear
sid = 0 # keep track of which solver is which across multiple runs
while retry > count: # stop after retry consecutive no new results
_size = -1
size = osize = len(archive) #XXX: better compare 'size' or 'len(vals)'?
while size > _size: # stop if no new results
solver = solve(invmodel, bounds, sprayer, seeker, stop, npts, mpmap, sid, disp)
sid += len(solver._allSolvers)
# print_results(solver, tol=tol)
_size = size
info = memoize(solver, archive, tol=1).info()
print info
size = info.size
if size == osize: count = count + 1
else: count = 0
#NOTE: len(size) = # of dirs; len(vals) = # unique dirs
vals = archive.values()
_min = min(vals)
_nmin = len([v for v in vals if round(v, tol) == round(_min, tol)])
print "max: %s (count=%s)" % (-_min, _nmin)
print "pts: %s (values=%s, size=%s)" % (len(vals), len(set(vals)), len(archive))
In [1]:
%%file interp.py
import sys
import numpy as np
from scipy.interpolate import Rbf, griddata
# http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html
from mystic.munge import *
from mystic.models import griewangk as model
i,xy,z = logfile_reader('log.txt')
i,ixy,iz = logfile_reader('inv.txt')
#xy,z = ixy,iz
del i
#############
shift = 0
scale = 0
N = 10000.
M = 200
args = {
'smooth': 0,
'function': 'thin_plate',
}
#############
xyz = np.vstack((np.array(xy).T,z))
ixyz = np.vstack((np.array(ixy).T,iz))
ixyz[-1,:] = -ixyz[-1]
del ixy,iz
xyz = np.hstack((xyz, ixyz))
x = xyz.T[:,0]
y = xyz.T[:,1]
z = xyz.T[:,2]
#HACK: remove any duplicate points by adding noise
_x = x + np.random.normal(scale=1e-8, size=x.shape)
_y = y + np.random.normal(scale=1e-8, size=y.shape)
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt
from matplotlib import cm
figure = plt.figure()
kwds = {'projection':'3d'}
ax = figure.gca(**kwds)
ax.autoscale(tight=True)
if len(z) > N:
N = max(int(round(len(z)/N)),1)
print "for speed, sampling {} down to {}".format(len(z),len(z)/N)
x, _x, y, _y, z = x[::N], _x[::N], y[::N], _y[::N], z[::N]
# ax.plot(x, y, z, 'ko', linewidth=2, markersize=4)
# plt.show()
# sys.exit(0)
f = Rbf(_x, _y, z, **args)
M = complex('{}j'.format(M))
xgrid,ygrid = np.mgrid[x.min():x.max():M, y.min():y.max():M]
z_ = f(xgrid, ygrid)
mz = np.argmin(z)
xx,yy = x[mz],y[mz]
print "min: {}; min@f: {}".format(z[mz], f(xx,yy))
mz = np.argmax(z)
xx,yy = x[mz],y[mz]
print "max: {}; max@f: {}".format(z[mz], f(xx,yy))
# scaling used by model plotter
if scale:
if shift:
z_ = np.asarray(z_)+shift
z = np.asarray(z)+shift
z_ = np.log(4*np.asarray(z_)*scale+1)+2
z = np.log(4*np.asarray(z)*scale+1)+2
density = 9
d = max(11 - density, 1)
ax.plot_wireframe(xgrid, ygrid, z_, rstride=d, cstride=d)
#ax.plot_surface(xgrid, ygrid, z_, rstride=d, cstride=d, cmap=cm.jet, linewidth=0, antialiased=False)
ax.plot(x, y, z, 'ko', linewidth=2, markersize=4)
plt.show()
figure.savefig('griewangk.png')
import klepto
try:
from klepto.archives import file_archive
archive = file_archive('models.pkl', serialized=True, cached=False)
archive[model.im_class.__name__.lower()] = f
except Exception:
print "serialization failed"
In [2]:
%matplotlib inline
In [3]:
!python2.7 -c "from mystic.models import griewangk as model; print model.__doc__"
In [4]:
import mystic
import mystic.models
mystic.model_plotter(mystic.models.griewangk, bounds="-10:10:.1, -10:10:.1", depth=True)
In [5]:
!python2.7 global_search.py
In [6]:
mystic.model_plotter(mystic.models.griewangk, 'log.txt', bounds="-10:10:.1, -10:10:.1")
In [7]:
!python2.7 global_search_max.py
In [8]:
!python2.7 interp.py
In [9]:
from IPython.display import Image
Image(filename='griewangk.png')
Out[9]:
In [10]:
import klepto.archives as karch
models = karch.file_archive('models.pkl', serialized=True, cached=False)
griewangk = models['griewangk']
print griewangk(0,0)
In [12]:
import os
os.chdir('branins')
In [8]:
!python2.7 interp.py
In [13]:
Image(filename='branins.png')
Out[13]:
In [14]:
os.chdir('../ackley')
In [10]:
!python2.7 interp.py
In [15]:
Image(filename='ackley.png')
Out[15]:
In [11]:
os.chdir('..')
basic use (direct access to archive)
In [1]:
from klepto.archives import sqltable_archive
d = sqltable_archive(cached=False)
#from klepto.archives import sql_archive
#d = sql_archive('mysql://user:pass@localhost/defaultdb', cached=False)
d['1'] = 1
d['2'] = 2
d['max'] = max
squared = lambda x:x**2
d['squared'] = squared
# max(squared(2), 1)
print d['max'](d['squared'](d['2']), d['1'])
In [2]:
from klepto.archives import dir_archive
def check_numpy(archive):
import numpy as np
d = archive
x = np.array([1,2,3,4,5])
y = np.arange(1000)
t = np.dtype([('int',np.int),('float32',np.float32)])
d['a'] = x
d['b'] = y
d['c'] = np.inf
d['d'] = np.ptp
d['e'] = t
assert all(d['a'] == x)
assert all(d['b'] == y)
assert d['c'] == np.inf
assert d['d'](x) == np.ptp(x)
assert d['e'] == t
return
archive = dir_archive(cached=False)
check_numpy(archive)
archive = dir_archive(cached=False,fast=True)
check_numpy(archive)
archive = dir_archive(cached=False,compression=3)
check_numpy(archive)
archive = dir_archive(cached=False,memmode='r+')
check_numpy(archive)
archive = dir_archive(cached=False,serialized=False)
check_numpy(archive)
from pox import rmtree
rmtree('memo')
print "done"
alternate archive backends
In [3]:
import klepto.archives
print '\n'.join([i for i in dir(klepto.archives) if not i.startswith('_') and i.endswith('archive')])
using a cache layer
In [4]:
from klepto.archives import file_archive
original = file_archive('foo')
print "empty:", original
original['a'] = 1
original['b'] = 2
print "a,b in cache:", original
print "not in archive:", original.archive
original.dump()
print "a,b in archive:", original.archive
In [5]:
print original.keys()
print original.values()
print original.items()
original.setdefault('c',3)
print "a,b,c in cache:", original
print "c not in archive:", original.archive
In [6]:
duplicate = file_archive('foo')
print "new empty:", duplicate
duplicate.load()
print "load from file:", duplicate
original.dump()
duplicate.load()
print "c from original:", duplicate
In [7]:
duplicate['d'] = 4
print "d in cache:", duplicate.archive
duplicate.sync()
print "sync'd:", duplicate.archive
print "d in archive:", original.archive
print "d not in cache:", original
original.sync()
print "sync'd:", original
In [8]:
print '\n'.join([i for i in dir(duplicate) if not i.startswith("_")])
In [9]:
!rm -rf foo
basic use (caching, looking up stored values)
In [10]:
import dill
import klepto
@klepto.lru_cache()
def squared(x):
return x**2
print squared(2)
print squared(4)
print squared(6)
print squared
print squared.__cache__()
_s = dill.loads(dill.dumps(squared))
assert _s.lookup(4) == 16
assert squared.__cache__() == _s.__cache__()
In [11]:
from klepto.safe import inf_cache as memoized
from klepto.archives import sql_archive
@memoized(cache=sql_archive())
def add(x,y):
return x+y
add(1,2)
add(1,2)
add(1,3)
print add.info()
@memoized(cache=add.__cache__())
def add(x,y):
return x+y
add(1,2)
add(2,2)
add(1,3)
print "cache hit:", add.info().hit
print "cache miss:", add.info().miss
masking inputs
In [12]:
from klepto import lru_cache as memoize
from klepto.keymaps import hashmap
hasher = hashmap(algorithm='md5')
class Adder(object):
"""A simple class with a memoized method"""
@memoize(keymap=hasher, ignore=('self','**'))
def __call__(self, x, *args, **kwds):
debug = kwds.get('debug', False)
if debug:
print ('debug:', x, args, kwds)
return sum((x,)+args)
add = __call__
add = Adder()
assert add(2,0) == 2
assert add(2,0,z=4) == 2 # cached (ignore z)
assert add(2,0,debug=False) == 2 # cached (ignore debug)
assert add(1,2,debug=False) == 3
assert add(1,2,debug=True) == 3 # cached (ignore debug)
assert add(4) == 4
assert add(x=4) == 4 # cached
plus = Adder()
assert plus(4,debug=True) == 4 # cached (ignore debug)
assert plus(2,0,3) == 5
assert plus(2,0,4) == 6
info = add.__call__.info()
assert info.hit == 5
assert info.miss == 5
cache = add.__call__.__cache__()
assert sorted(cache.values()) == [2,3,4,5,6]
# careful... remember we have self as first argument
key = add.__call__.key(add,2,0)
assert cache[key] == add.__call__.__wrapped__(add,2,0)
assert cache[key] == add.__call__.lookup(add,2,0)
print "OK"
In [13]:
print key
print cache[key]
print add.__call__.__wrapped__
In [14]:
from klepto import inf_cache
class _Foo(object):
@inf_cache(ignore='self')
def bar(self, x,y):
return x+y
_fu = _Foo()
_fu.bar(1,2)
_fu.bar(2,2)
_fu.bar(2,3)
_fu.bar(1,2)
assert len(_fu.bar.__cache__()) == 3
assert _fu.bar.__cache__()[_fu.bar.key(_fu,1,2)] == 3
assert _fu.bar.lookup(_fu,1,2) == 3
print "OK"
In [15]:
@memoize(keymap=hasher, ignore=('self','**'))
def _add(x, *args, **kwds):
debug = kwds.get('debug', False)
if debug:
print ('debug:', x, args, kwds)
return sum((x,)+args)
_add(2,0)
_add(2,0,z=4)
_add(2,0,debug=False)
_add(1,2,debug=False)
_add(1,2,debug=True)
_add(4)
_add(x=4)
_add(4,debug=True)
_add(2,0,3)
_add(2,0,4)
_cache = _add.__cache__()
_func = _add.__wrapped__
# do a lookup
assert _add.lookup(2,0) == _func(2,0)
# generate the key, and do a look-up
key = _add.key(2,0)
assert _cache[key] == _func(2,0)
# look-up the key again, doing a little more work...
from klepto import keygen
lookup = keygen('self','**')(_func)
lookup.register(hasher)
key = lookup(2,0)
assert _cache[key] == _func(2,0)
# since we have the 'key lookup', let's play with lookup a bit
# check the inputs passed match the function signature
assert lookup.valid()
# actually call the function
assert lookup.call() == _func(2,0)
print "OK"
In [16]:
print _add.lookup(2,0)
print _add.key(2,0)
function signature discovery and validation
In [17]:
from functools import partial
def foo(x,y,z,a=1,b=2):
return x+y+z+a+b
p = partial(foo, 0,1)
print p(1,2,3)
print p(1,2,3,4,b=5)
In [18]:
from klepto import validate
assert validate(p, 1,2,3) == None
validate(p, 1,2,3,4,b=5)
In [19]:
from klepto import signature
print signature(p)
# takes args = (z,a,b), kwds = {a,y,b}
# cannot take (x,y) as kwds (fixed by the partial)
# no *args, **kwds used in the function
In [20]:
def bar(x,y,z,a=1,b=2,*args):
return x+y+z+a+b
p = partial(bar, 0,b=10)
print signature(p)
# a partial with a 'fixed' x, and positionally 'unsettable' b
# takes *args, but not **kwds
rounding
In [21]:
from klepto.rounding import *
@deep_round(tol=1)
def add(x,y):
return x+y
result = add(2.54, 5.47)
assert result == 8.0
# rounds each float, regardless of depth in an object
result = add([2.54, 5.47],['x','y'])
assert result == [2.5, 5.5, 'x', 'y']
# rounds each float, regardless of depth in an object
result = add([2.54, 5.47],['x',[8.99, 'y']])
assert result == [2.5, 5.5, 'x', [9.0, 'y']]
@simple_round(tol=1)
def add(x,y):
return x+y
result = add(2.54, 5.47)
assert result == 8.0
# does not round elements of iterables, only rounds at the top-level
result = add([2.54, 5.47],['x','y'])
assert result == [2.54, 5.4699999999999998, 'x', 'y']
# does not round elements of iterables, only rounds at the top-level
result = add([2.54, 5.47],['x',[8.99, 'y']])
assert result == [2.54, 5.4699999999999998, 'x', [8.9900000000000002, 'y']]
print "OK"
exchange strategies
In [24]:
from klepto.archives import file_archive
from random import choice, seed
def _test_hits(algorithm, maxsize=20, keymap=None, rangelimit=5,
tries=1000, archived=False, purge=False):
@algorithm(maxsize=maxsize, keymap=keymap, purge=purge)
def f(x, y):
return 3*x+y
if archived:
f.archive(file_archive('cache.pkl',cached=False))
domain = list(range(rangelimit))
domain += [float(i) for i in domain]
for i in range(tries):
r = f(choice(domain), choice(domain))
f.dump()
#print(f.info())
return f.info()
import os
import sys
from klepto import *
caches = [rr_cache, mru_cache, lru_cache, lfu_cache, inf_cache, no_cache]
# clean-up
if os.path.exists('cache.pkl'): os.remove('cache.pkl')
print ("WITHOUT ARCHIVE")
for cache in caches:
msg = cache.__name__ + ":"
msg += "%s" % str(_test_hits(cache, maxsize=100, rangelimit=20, tries=100))
print (msg)
print ("\nWITH ARCHIVE (purging)")
for cache in caches:
msg = cache.__name__ + ":"
msg += "%s" % str(_test_hits(cache, maxsize=100,
rangelimit=20, tries=100, archived=True))
print (msg)
print ("\nWITH ARCHIVE (non-purging)")
for cache in caches:
msg = cache.__name__ + ":"
msg += "%s" % str(_test_hits(cache, maxsize=50, purge=False,
rangelimit=20, tries=100, archived=True))
print (msg)
# clean-up
if os.path.exists('cache.pkl'): os.remove('cache.pkl')
cryptographic key generation and encoding
In [25]:
from klepto.crypto import *
assert string('1') == '1'
assert string('1', encoding='repr') == "'1'"
x = [1,2,3,'4',"'5'", min]
assert hash(x, 'sha1') == '3bdd73e79be4277dcb874d193b8dd08a46bc6885'
assert string(x) == '[1, 2, 3, \'4\', "\'5\'", <built-in function min>]'
assert string(x, encoding='repr') == '[1, 2, 3, \'4\', "\'5\'", <built-in function min>]'
assert string(x, encoding='utf_8') == '[1, 2, 3, \'4\', "\'5\'", <built-in function min>]'
assert string(x, encoding='unicode') == unicode('[1, 2, 3, \'4\', "\'5\'", <built-in function min>]')
assert string(x, encoding='zlib_codec') == 'x\x9c\x8b6\xd4Q0\xd2Q0\xd6QP7Q\xd7QPR7UW\xd2Q\xb0I*\xcd\xcc)\xd1\xcd\xccSH+\xcdK.\xc9\xcc\xcfS\xc8\xcd\xcc\xb3\x8b\x05\x00\xf6(\x0c\x9c'
assert string(x, encoding='hex_codec') == '5b312c20322c20332c202734272c2022273527222c203c6275696c742d696e2066756e6374696f6e206d696e3e5d'
print "OK"
In [26]:
print algorithms()
print serializers()
print encodings()
In [27]:
from klepto.keymaps import *
from klepto.crypto import *
h = hashmap(algorithm='md5')
p = picklemap(serializer='dill')
hp = p + h
assert p(1) == pickle(1, serializer='dill')
assert h(1) == 'c4ca4238a0b923820dcc509a6f75849b'
assert hp(1) == 'ee7f32117eab97ec5460313282938c93'
assert h(p(1)) == hp(1)
assert hp.inner(1) == p(1)
assert hp.outer(1) == h(1)
print "OK"
The main process reads and writes to a database through a cached function, while N-worker processes write to the database and try to fill in the "holes" in the database. Works in parallel and across network boundaries.
We have research-grade asynchronous cache-to-archive synchronization. Functions interact with fast local caches, and asynchronously sync with their arhive backends. Working on distributed archive-to-archive synchronization to seamlessly distribute archived data everywhere.
In the process of adding a h5py backend, and also "dill-augmented" hickle serialization.
Some preliminary work done with kriging interpolation, but API decisions need to be made before this is committed.
Slight refactoring needed so a cache exchange strategy is a first class object, as opposed to being produced by subclass a cache.