In [1]:
%matplotlib inline
from xyzpy import *
import numpy as np
In [2]:
def foo(a, b, c):
return a, b, c
and combos of the form:
In [3]:
combos = [
('a', [1, 2, 3]),
('b', ['x', 'y', 'z']),
('c', [True, False]),
]
and generates a nested (here 3 dimensional) array of all the outputs of foo with the 3 * 3 * 2 = 18 combinations of input arguments:
In [4]:
combo_runner(foo, combos)
Out[4]:
In [5]:
cases = [(4, 'z', False), (5, 'y', True)]
case_runner(foo, fn_args=('a', 'b', 'c'), cases=cases)
Out[5]:
In [6]:
runner = Runner(foo, var_names=['a_out', 'b_out', 'c_out'])
runner.run_combos(combos)
Out[6]:
In [7]:
var_names = ['A', 'B', 'C']
var_dims = {'B': ['x'], 'C': ['x', 't']}
var_coords = {'x': [10, 20, 30]}
constants = {'t': np.linspace(0, 1, 101)}
In [8]:
def bar(i, j, k, t):
A = np.random.rand()
B = np.random.rand(3) # 'B[x]'
C = np.random.rand(3, len(t)) # 'C[x, t]'
return A, B, C
# if we are using a runner, combos can be supplied as a dict
combos = {
'i': [5, 6, 7],
'j': [0.5, 0.6, 0.7],
'k': [0.05, 0.06, 0.07],
}
We can then run the combos:
In [9]:
r = Runner(bar, constants=constants,
var_names=var_names,
var_coords=var_coords,
var_dims=var_dims)
r.run_combos(combos)
Out[9]:
In [10]:
combos = [
('a', [1, 2, 3]),
('b', ['x', 'y', 'z']),
('c', [True, False]),
]
harvester = Harvester(runner, data_name='foo.h5')
harvester.harvest_combos(combos)
Which, because it didn't exist yet, created the file data_name:
In [11]:
ls *.h5
In [12]:
combos2 = {
'a': [4, 5, 6],
'b': ['w', 'v'],
'c': [True, False],
}
harvester.harvest_combos(combos2)
Now we can check the total dataset containing all combos and cases run so far:
In [13]:
harvester.full_ds
Out[13]:
In [14]:
import math
import random
@label(var_names=['out'])
def trig(amp, fn, x, phase):
return amp * getattr(math, fn)(x - phase)
# these are the default combos/distributions to sample from
default_combos = {
'amp': [1, 2, 3],
'fn': ['cos', 'sin'],
# for distributions we can supply callables
'x': lambda: 2 * math.pi * random.random(),
'phase': lambda: random.gauss(0.0, 0.1),
}
sampler = Sampler(trig, 'trig.pkl', default_combos)
sampler
Out[14]:
Now we can run the sampler many times (and supply any of the usual arguments such as parallel=True etc). This generates a pandas.DataFrame:
In [15]:
sampler.sample_combos(10000);
This has also synced the data with the on-disk file:
In [16]:
!ls *.pkl
You can specify Sampler(..., engine='csv') etc to use formats other than pickle.
As with the Harvester, next time we run combinations, the data is automatically
aggregated into the full set:
In [17]:
# here we will override some of the default sampling choices
combos = {
'fn': ['tan'],
'x': lambda: random.random() * math.pi / 4
}
sampler.sample_combos(5000, combos);
In [18]:
import seaborn as sns
sns.relplot(x='x', y='out', hue='fn', size='amp', data=sampler.full_df)
Out[18]:
In [19]:
# some cleanup
harvester.delete_ds()
sampler.delete_df()