Custom generators


In [1]:
import tohu
from tohu.v6.primitive_generators import *
from tohu.v6.derived_generators import *
from tohu.v6.generator_dispatch import *
from tohu.v6.custom_generator import *
from tohu.v6.utils import print_generated_sequence, make_dummy_tuples
#tohu.v6.logging.logger.setLevel('DEBUG')
from pandas.util.testing import assert_frame_equal, assert_series_equal

In [2]:
print(f'Tohu version: {tohu.__version__}')


Tohu version: v0.6.2+1.gcd0800e

Independent field generators

At its most basic, a custom generator provides simply a convenient way of grouping other generators together in a single namespace.


In [3]:
class QuuxGenerator(CustomGenerator):
    __fields__ = ["dd", "bb", "cc"]

    aa = Integer(1, 7)
    bb = HashDigest(length=8)
    cc = FakerGenerator(method="name")
    dd = Integer(100, 200)
    
    #__fields__ = ['aa', 'cc']  # only these will be exported

In [4]:
g = QuuxGenerator()

In [5]:
print(f"Field names: {g.field_names}")


Field names: ['dd', 'bb', 'cc']

In [6]:
# NBVAL_IGNORE_OUTPUT
print(g.ns_gen_templates.to_str())


aa: <Integer (id=df5b62542093) [owned by: <class '__main__.QuuxGenerator'>] >
bb: <HashDigest (id=c7e96a820570) [owned by: <class '__main__.QuuxGenerator'>] >
cc: <FakerGenerator (id=b19b47f86c57) [owned by: <class '__main__.QuuxGenerator'>] >
dd: <Integer (id=ce6e85bb263c) [owned by: <class '__main__.QuuxGenerator'>] >


In [7]:
# NBVAL_IGNORE_OUTPUT
print(g.ns_gens.to_str())


aa: <Integer (id=547f9e838434) [owned by: <QuuxGenerator (id=ca09ae98c8fd)>] >
bb: <HashDigest (id=bd130803eeb2) [owned by: <QuuxGenerator (id=ca09ae98c8fd)>] >
cc: <FakerGenerator (id=121f30fcdae2) [owned by: <QuuxGenerator (id=ca09ae98c8fd)>] >
dd: <Integer (id=fa6340a67100) [owned by: <QuuxGenerator (id=ca09ae98c8fd)>] >


In [8]:
print_generated_sequence(g, num=5, sep='\n', seed=12345)


Generated sequence:

Quux(dd=186, bb='C851F707', cc='Michelle Foster DDS')
Quux(dd=133, bb='2553FCD0', cc='Sandra Galvan')
Quux(dd=185, bb='CFF9005D', cc='Kristen Aguilar')
Quux(dd=107, bb='E9D2528C', cc='Sarah Moore')
Quux(dd=107, bb='EAB4D953', cc='Gary Bond')

Simple dependency between field generators


In [9]:
chars = ['a', 'b', 'c', 'd', 'e', 'f', 'g']

In [10]:
n_vals = Integer(1, 5)
g = SelectMultiple(chars, num=n_vals)

In [11]:
n_vals.reset(seed=11111)
g.reset(seed=99999)

print_generated_sequence(g, num=10, sep='\n')


Generated sequence:

['a', 'c', 'e', 'b']
['c', 'd', 'b', 'g', 'e']
['f', 'e', 'g']
['e', 'b', 'a']
['f', 'c']
['e', 'd', 'g']
['c']
['g', 'c', 'e', 'b', 'f']
['f', 'b', 'c']
['e', 'b', 'c', 'a']

In [12]:
class QuuxGenerator(CustomGenerator):
    n_vals = Integer(1, 5)
    vals = SelectMultiple(chars, num=n_vals)

In [13]:
g = QuuxGenerator()

In [14]:
print_generated_sequence(g, num=10, sep='\n', seed=12345)


Generated sequence:

Quux(n_vals=2, vals=['g', 'f'])
Quux(n_vals=3, vals=['e', 'd', 'f'])
Quux(n_vals=1, vals=['b'])
Quux(n_vals=2, vals=['e', 'c'])
Quux(n_vals=5, vals=['g', 'f', 'e', 'b', 'c'])
Quux(n_vals=4, vals=['a', 'd', 'b', 'f'])
Quux(n_vals=5, vals=['g', 'c', 'b', 'f', 'e'])
Quux(n_vals=4, vals=['g', 'f', 'b', 'c'])
Quux(n_vals=5, vals=['a', 'e', 'b', 'd', 'g'])
Quux(n_vals=3, vals=['c', 'd', 'e'])

Complex dependency between field generators


In [15]:
mapping = {
    'A': ['a', 'aa', 'aaa', 'aaaa', 'aaaaa'],
    'B': ['b', 'bb', 'bbb', 'bbbb', 'bbbbb'],
    'C': ['c', 'cc', 'ccc', 'cccc', 'ccccc'],
    'D': ['d', 'dd', 'ddd', 'dddd', 'ddddd'],
    'E': ['e', 'ee', 'eee', 'eeee', 'eeeee'],
    'F': ['f', 'ff', 'fff', 'ffff', 'fffff'],
    'G': ['g', 'gg', 'ggg', 'gggg', 'ggggg'],
}

Custom generators can have complex dependencies between their field generators. For example, in Quux1Generator below the field generator bb depends on ll (and thus indirectly also on aa) and nn.


In [16]:
class Quux1Generator(CustomGenerator):
    aa = SelectOne(['A', 'B', 'C', 'D', 'E', 'F', 'G'])
    ll = Lookup(key=aa, mapping=mapping)
    nn = Integer(1, 5)
    bb = SelectMultiple(ll, num=nn)

In [17]:
g1 = Quux1Generator()

In [18]:
print_generated_sequence(g1, num=5, sep='\n', seed=99999)


Generated sequence:

Quux1(aa='B', ll=['b', 'bb', 'bbb', 'bbbb', 'bbbbb'], nn=2, bb=['bbbbb', 'bb'])
Quux1(aa='A', ll=['a', 'aa', 'aaa', 'aaaa', 'aaaaa'], nn=3, bb=['aaaa', 'aaaaa', 'a'])
Quux1(aa='D', ll=['d', 'dd', 'ddd', 'dddd', 'ddddd'], nn=5, bb=['ddddd', 'd', 'dddd', 'ddd', 'dd'])
Quux1(aa='G', ll=['g', 'gg', 'ggg', 'gggg', 'ggggg'], nn=5, bb=['ggg', 'g', 'gg', 'ggggg', 'gggg'])
Quux1(aa='A', ll=['a', 'aa', 'aaa', 'aaaa', 'aaaaa'], nn=3, bb=['a', 'aa', 'aaaa'])

We can get the same output for bb without explicitly needing to define the input generators.


In [19]:
class Quux2Generator(CustomGenerator):
    bb = SelectMultiple(Lookup(SelectOne(['A', 'B', 'C', 'D', 'E', 'F', 'G']), mapping), num=Integer(1, 5))

In [20]:
g2 = Quux2Generator()

In [21]:
print_generated_sequence(g2, num=5, sep='\n', seed=99999)


Generated sequence:

Quux2(bb=['bbbbb', 'bb'])
Quux2(bb=['aaaa', 'aaaaa', 'a'])
Quux2(bb=['ddddd', 'd', 'dddd', 'ddd', 'dd'])
Quux2(bb=['ggg', 'g', 'gg', 'ggggg', 'gggg'])
Quux2(bb=['a', 'aa', 'aaaa'])

Let's check that both g1 and g2 really produce the same elements in column bb.


In [22]:
df1 = g1.generate(num=20, seed=99999).to_df()
df2 = g2.generate(num=20, seed=99999).to_df()

assert_series_equal(df1["bb"], df2["bb"])

Field generators defined in the __init__() method

It is possible to define field generators in the __init__() method of a custom generator. Note that you can use the __fields__ attribute to easily define the order in which fields should be output in generated items.


In [23]:
class QuuxGenerator(CustomGenerator):
    __fields__ = ['aa', 'bb', 'cc']  # define the order of fields in generated items

    cc = HashDigest(length=8)
    aa = Integer(100, 200)
    
    def __init__(self, method):
        self.bb = FakerGenerator(method=method)

In [24]:
g = QuuxGenerator(method="first_name")

In [25]:
print_generated_sequence(g, num=10, seed=12345, sep='\n')


Generated sequence:

Quux(aa=132, bb='Victoria', cc='09A38AEF')
Quux(aa=143, bb='Jasmine', cc='A2E2CCB2')
Quux(aa=147, bb='Randall', cc='A4C7842C')
Quux(aa=194, bb='Taylor', cc='DAC06047')
Quux(aa=174, bb='Abigail', cc='D9307442')
Quux(aa=129, bb='Maria', cc='D048295E')
Quux(aa=167, bb='Sarah', cc='C016EB9F')
Quux(aa=102, bb='Mikayla', cc='03298115')
Quux(aa=172, bb='Angela', cc='AA05D88E')
Quux(aa=107, bb='Brandi', cc='8A6D4201')

In [ ]: