In [1]:
import tohu
from tohu.v6.primitive_generators import *
from tohu.v6.derived_generators import *
from tohu.v6.generator_dispatch import *
from tohu.v6.custom_generator import *
from tohu.v6.utils import print_generated_sequence, make_dummy_tuples
#tohu.v6.logging.logger.setLevel('DEBUG')
from pandas.util.testing import assert_frame_equal, assert_series_equal
In [2]:
print(f'Tohu version: {tohu.__version__}')
At its most basic, a custom generator provides simply a convenient way of grouping other generators together in a single namespace.
In [3]:
class QuuxGenerator(CustomGenerator):
__fields__ = ["dd", "bb", "cc"]
aa = Integer(1, 7)
bb = HashDigest(length=8)
cc = FakerGenerator(method="name")
dd = Integer(100, 200)
#__fields__ = ['aa', 'cc'] # only these will be exported
In [4]:
g = QuuxGenerator()
In [5]:
print(f"Field names: {g.field_names}")
In [6]:
# NBVAL_IGNORE_OUTPUT
print(g.ns_gen_templates.to_str())
In [7]:
# NBVAL_IGNORE_OUTPUT
print(g.ns_gens.to_str())
In [8]:
print_generated_sequence(g, num=5, sep='\n', seed=12345)
In [9]:
chars = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
In [10]:
n_vals = Integer(1, 5)
g = SelectMultiple(chars, num=n_vals)
In [11]:
n_vals.reset(seed=11111)
g.reset(seed=99999)
print_generated_sequence(g, num=10, sep='\n')
In [12]:
class QuuxGenerator(CustomGenerator):
n_vals = Integer(1, 5)
vals = SelectMultiple(chars, num=n_vals)
In [13]:
g = QuuxGenerator()
In [14]:
print_generated_sequence(g, num=10, sep='\n', seed=12345)
In [15]:
mapping = {
'A': ['a', 'aa', 'aaa', 'aaaa', 'aaaaa'],
'B': ['b', 'bb', 'bbb', 'bbbb', 'bbbbb'],
'C': ['c', 'cc', 'ccc', 'cccc', 'ccccc'],
'D': ['d', 'dd', 'ddd', 'dddd', 'ddddd'],
'E': ['e', 'ee', 'eee', 'eeee', 'eeeee'],
'F': ['f', 'ff', 'fff', 'ffff', 'fffff'],
'G': ['g', 'gg', 'ggg', 'gggg', 'ggggg'],
}
Custom generators can have complex dependencies between their field generators. For example, in Quux1Generator
below the field generator bb
depends on ll
(and thus indirectly also on aa
) and nn
.
In [16]:
class Quux1Generator(CustomGenerator):
aa = SelectOne(['A', 'B', 'C', 'D', 'E', 'F', 'G'])
ll = Lookup(key=aa, mapping=mapping)
nn = Integer(1, 5)
bb = SelectMultiple(ll, num=nn)
In [17]:
g1 = Quux1Generator()
In [18]:
print_generated_sequence(g1, num=5, sep='\n', seed=99999)
We can get the same output for bb
without explicitly needing to define the input generators.
In [19]:
class Quux2Generator(CustomGenerator):
bb = SelectMultiple(Lookup(SelectOne(['A', 'B', 'C', 'D', 'E', 'F', 'G']), mapping), num=Integer(1, 5))
In [20]:
g2 = Quux2Generator()
In [21]:
print_generated_sequence(g2, num=5, sep='\n', seed=99999)
Let's check that both g1
and g2
really produce the same elements in column bb
.
In [22]:
df1 = g1.generate(num=20, seed=99999).to_df()
df2 = g2.generate(num=20, seed=99999).to_df()
assert_series_equal(df1["bb"], df2["bb"])
It is possible to define field generators in the __init__()
method of a custom generator. Note that you can use the __fields__
attribute to easily define the order in which fields should be output in generated items.
In [23]:
class QuuxGenerator(CustomGenerator):
__fields__ = ['aa', 'bb', 'cc'] # define the order of fields in generated items
cc = HashDigest(length=8)
aa = Integer(100, 200)
def __init__(self, method):
self.bb = FakerGenerator(method=method)
In [24]:
g = QuuxGenerator(method="first_name")
In [25]:
print_generated_sequence(g, num=10, seed=12345, sep='\n')
In [ ]: