notebook.community

Edit and run



In [1]:

    
%matplotlib inline
from xyzpy import *
import numpy as np



In [2]:

    
def foo(a, b, c):
    return a, b, c

and combos of the form:



In [3]:

    
combos = [
    ('a', [1, 2, 3]),
    ('b', ['x', 'y', 'z']),
    ('c', [True, False]),
]

and generates a nested (here 3 dimensional) array of all the outputs of foo with the 3 * 3 * 2 = 18 combinations of input arguments:



In [4]:

    
combo_runner(foo, combos)









    



100%|##########| 18/18 [00:00<00:00, 18083.23it/s]






    Out[4]:





((((1, 'x', True), (1, 'x', False)),
  ((1, 'y', True), (1, 'y', False)),
  ((1, 'z', True), (1, 'z', False))),
 (((2, 'x', True), (2, 'x', False)),
  ((2, 'y', True), (2, 'y', False)),
  ((2, 'z', True), (2, 'z', False))),
 (((3, 'x', True), (3, 'x', False)),
  ((3, 'y', True), (3, 'y', False)),
  ((3, 'z', True), (3, 'z', False))))



In [5]:

    
cases = [(4, 'z', False), (5, 'y', True)]
case_runner(foo, fn_args=('a', 'b', 'c'), cases=cases)









    



100%|##########| 2/2 [00:00<00:00, 471.93it/s]






    Out[5]:





((4, 'z', False), (5, 'y', True))



In [6]:

    
runner = Runner(foo, var_names=['a_out', 'b_out', 'c_out'])
runner.run_combos(combos)









    



100%|##########| 18/18 [00:00<00:00, 21024.08it/s]






    Out[6]:





<xarray.Dataset>
Dimensions:  (a: 3, b: 3, c: 2)
Coordinates:
  * a        (a) int64 1 2 3
  * b        (b) <U1 'x' 'y' 'z'
  * c        (c) bool True False
Data variables:
    a_out    (a, b, c) int64 1 1 1 1 1 1 2 2 2 2 2 2 3 3 3 3 3 3
    b_out    (a, b, c) <U1 'x' 'x' 'y' 'y' 'z' 'z' ... 'x' 'x' 'y' 'y' 'z' 'z'
    c_out    (a, b, c) bool True False True False True ... True False True False



In [7]:

    
var_names = ['A', 'B', 'C']
var_dims = {'B': ['x'], 'C': ['x', 't']}
var_coords = {'x': [10, 20, 30]}
constants = {'t': np.linspace(0, 1, 101)}



In [8]:

    
def bar(i, j, k, t):
    A = np.random.rand()
    B = np.random.rand(3)  # 'B[x]'
    C = np.random.rand(3, len(t))  # 'C[x, t]'
    return A, B, C

# if we are using a runner, combos can be supplied as a dict
combos = {
    'i': [5, 6, 7],
    'j': [0.5, 0.6, 0.7],
    'k': [0.05, 0.06, 0.07],
}

We can then run the combos:



In [9]:

    
r = Runner(bar, constants=constants,
           var_names=var_names,
           var_coords=var_coords,
           var_dims=var_dims)
r.run_combos(combos)









    



100%|##########| 27/27 [00:00<00:00, 3709.71it/s]






    Out[9]:





<xarray.Dataset>
Dimensions:  (i: 3, j: 3, k: 3, t: 101, x: 3)
Coordinates:
  * i        (i) int64 5 6 7
  * j        (j) float64 0.5 0.6 0.7
  * k        (k) float64 0.05 0.06 0.07
  * x        (x) int64 10 20 30
  * t        (t) float64 0.0 0.01 0.02 0.03 0.04 ... 0.96 0.97 0.98 0.99 1.0
Data variables:
    A        (i, j, k) float64 0.2533 0.4152 0.2226 ... 0.5986 0.7464 0.7801
    B        (i, j, k, x) float64 0.7641 0.2731 0.3556 ... 0.9066 0.2976 0.1442
    C        (i, j, k, x, t) float64 0.7664 0.4964 0.08095 ... 0.281 0.8488



In [10]:

    
combos = [
    ('a', [1, 2, 3]),
    ('b', ['x', 'y', 'z']),
    ('c', [True, False]),
]

harvester = Harvester(runner, data_name='foo.h5')
harvester.harvest_combos(combos)









    



100%|##########| 18/18 [00:00<00:00, 13503.39it/s]

Which, because it didn't exist yet, created the file data_name:



In [11]:

    
ls *.h5









    



foo.h5*



In [12]:

    
combos2 = {
    'a': [4, 5, 6],
    'b': ['w', 'v'],
    'c': [True, False],
}
harvester.harvest_combos(combos2)









    



100%|##########| 12/12 [00:00<00:00, 1345.23it/s]

Now we can check the total dataset containing all combos and cases run so far:



In [13]:

    
harvester.full_ds









    Out[13]:





<xarray.Dataset>
Dimensions:  (a: 6, b: 5, c: 2)
Coordinates:
  * a        (a) int64 1 2 3 4 5 6
  * b        (b) object 'v' 'w' 'x' 'y' 'z'
  * c        (c) bool True False
Data variables:
    a_out    (a, b, c) float64 nan nan nan nan 1.0 1.0 ... nan nan nan nan nan
    b_out    (a, b, c) object nan nan nan nan 'x' 'x' ... nan nan nan nan nan
    c_out    (a, b, c) float64 nan nan nan nan 1.0 0.0 ... nan nan nan nan nan



In [14]:

    
import math
import random

@label(var_names=['out'])
def trig(amp, fn, x, phase):
    return amp * getattr(math, fn)(x - phase)

# these are the default combos/distributions to sample from
default_combos = {
    'amp': [1, 2, 3],
    'fn': ['cos', 'sin'],
    # for distributions we can supply callables
    'x': lambda: 2 * math.pi * random.random(),
    'phase': lambda: random.gauss(0.0, 0.1),
}

sampler = Sampler(trig, 'trig.pkl', default_combos)
sampler









    Out[14]:





<xyzpy.Sampler>
Runner: <xyzpy.Runner>
    fn: <function trig at 0x7f08fbd1e6a8>
    fn_args: ('amp', 'fn', 'x', 'phase')
    var_names: ('out',)
    var_dims: {'out': ()}
Sync file -->
    trig.pkl    [pickle]

Now we can run the sampler many times (and supply any of the usual arguments such as parallel=True etc). This generates a pandas.DataFrame:



In [15]:

    
sampler.sample_combos(10000);









    



100%|##########| 10000/10000 [00:00<00:00, 37195.81it/s]

This has also synced the data with the on-disk file:



In [16]:

    
!ls *.pkl









    



trig.pkl

You can specify Sampler(..., engine='csv') etc to use formats other than pickle.

As with the Harvester, next time we run combinations, the data is automatically aggregated into the full set:



In [17]:

    
# here we will override some of the default sampling choices
combos = {
    'fn': ['tan'],
    'x': lambda: random.random() * math.pi / 4
}

sampler.sample_combos(5000, combos);









    



100%|##########| 5000/5000 [00:00<00:00, 64517.83it/s]



In [18]:

    
import seaborn as sns

sns.relplot(x='x', y='out', hue='fn', size='amp', data=sampler.full_df)









    Out[18]:





<seaborn.axisgrid.FacetGrid at 0x7f08f92c31d0>



In [19]:

    
# some cleanup
harvester.delete_ds()
sampler.delete_df()