Custom generators


In [1]:
import tohu
from tohu.v4.primitive_generators import *
from tohu.v4.derived_generators import *
from tohu.v4.dispatch_generators import *
from tohu.v4.custom_generator import *
from tohu.v4.utils import print_generated_sequence, make_dummy_tuples

In [2]:
print(f'Tohu version: {tohu.__version__}')


Tohu version: v0.5.0+240.g541b4c5

Custom generator without __init__ method


In [3]:
class QuuxGenerator(CustomGenerator):
    aa = Integer(100, 200)
    bb = HashDigest(length=6)
    cc = FakerGenerator(method='name')

In [4]:
g = QuuxGenerator()

In [5]:
print_generated_sequence(g, num=10, sep='\n', seed=12345)


Generated sequence:

Quux(aa=102, bb='C851F7', cc='Michelle Foster DDS')
Quux(aa=164, bb='2553FC', cc='Sandra Galvan')
Quux(aa=118, bb='CFF900', cc='Kristen Aguilar')
Quux(aa=185, bb='E9D252', cc='Sarah Moore')
Quux(aa=182, bb='EAB4D9', cc='Gary Bond')
Quux(aa=124, bb='5B9B84', cc='Patrick Wood')
Quux(aa=149, bb='8B4519', cc='Kathleen Browning')
Quux(aa=158, bb='2E5251', cc='Joseph Harris')
Quux(aa=100, bb='092E18', cc='Noah Wilson')
Quux(aa=160, bb='91AA24', cc='Darren Dawson')

Explicitly setting the name of generated items

Let's repeat the previous example, but explicitly set the name of generated items by setting the __tohu_items_name__ attribute inside the custom generator.


In [6]:
class SomeGeneratorWithExplicitItemsName(CustomGenerator):
    __tohu_items_name__ = 'Foobar'

    aa = Integer(100, 200)
    bb = HashDigest(length=6)
    cc = FakerGenerator(method='name')

In [7]:
g = SomeGeneratorWithExplicitItemsName()

The generated sequence is the same as above, but the name of the items has changed from Quux to Foobar.


In [8]:
print_generated_sequence(g, num=10, sep='\n', seed=12345)


Generated sequence:

Foobar(aa=102, bb='C851F7', cc='Michelle Foster DDS')
Foobar(aa=164, bb='2553FC', cc='Sandra Galvan')
Foobar(aa=118, bb='CFF900', cc='Kristen Aguilar')
Foobar(aa=185, bb='E9D252', cc='Sarah Moore')
Foobar(aa=182, bb='EAB4D9', cc='Gary Bond')
Foobar(aa=124, bb='5B9B84', cc='Patrick Wood')
Foobar(aa=149, bb='8B4519', cc='Kathleen Browning')
Foobar(aa=158, bb='2E5251', cc='Joseph Harris')
Foobar(aa=100, bb='092E18', cc='Noah Wilson')
Foobar(aa=160, bb='91AA24', cc='Darren Dawson')

Custom generator with __init__ method


In [9]:
class QuuxGenerator(CustomGenerator):
    aa = Integer(100, 200)
    
    def __init__(self, faker_method):
        self.bb = FakerGenerator(method=faker_method)

        # Note: the call to super().__init__() needs to be at the end,
        # and it needs to be passed the same arguments as the __init__()
        # method from which it is called (here: `faker_method`).
        super().__init__(faker_method)

In [10]:
g1 = QuuxGenerator(faker_method='first_name')
g2 = QuuxGenerator(faker_method='city')

In [11]:
print_generated_sequence(g1, num=10, sep='\n', seed=12345); print()
print_generated_sequence(g2, num=10, sep='\n', seed=12345)


Generated sequence:

Quux(aa=102, bb='Russell')
Quux(aa=164, bb='Derrick')
Quux(aa=118, bb='Lori')
Quux(aa=185, bb='Dustin')
Quux(aa=182, bb='Shawn')
Quux(aa=124, bb='Elaine')
Quux(aa=149, bb='Cathy')
Quux(aa=158, bb='Daniel')
Quux(aa=100, bb='Olivia')
Quux(aa=160, bb='Christina')

Generated sequence:

Quux(aa=102, bb='North Lori')
Quux(aa=164, bb='South Elainestad')
Quux(aa=118, bb='Port Christinafort')
Quux(aa=185, bb='North Gloriastad')
Quux(aa=182, bb='Port William')
Quux(aa=124, bb='East Mary')
Quux(aa=149, bb='New Matthew')
Quux(aa=158, bb='Tylerton')
Quux(aa=100, bb='Benjaminview')
Quux(aa=160, bb='South Lisa')

Custom generator containing derived generators


In [12]:
some_tuples = make_dummy_tuples('abcdefghijklmnopqrstuvwxyz')
#some_tuples[:5]

Example: extracting attributes


In [13]:
class QuuxGenerator(CustomGenerator):
    aa = SelectOne(some_tuples)
    bb = GetAttribute(aa, 'x')
    cc = GetAttribute(aa, 'y')

In [14]:
g = QuuxGenerator()

In [15]:
print_generated_sequence(g, num=10, sep='\n', seed=12345)


Generated sequence:

Quux(aa=['AA', 'aa'], bb='AA', cc='aa')
Quux(aa=['QQ', 'qq'], bb='QQ', cc='qq')
Quux(aa=['EE', 'ee'], bb='EE', cc='ee')
Quux(aa=['VV', 'vv'], bb='VV', cc='vv')
Quux(aa=['UU', 'uu'], bb='UU', cc='uu')
Quux(aa=['GG', 'gg'], bb='GG', cc='gg')
Quux(aa=['MM', 'mm'], bb='MM', cc='mm')
Quux(aa=['OO', 'oo'], bb='OO', cc='oo')
Quux(aa=['AA', 'aa'], bb='AA', cc='aa')
Quux(aa=['PP', 'pp'], bb='PP', cc='pp')

 Example: arithmetic


In [16]:
def square(x):
    return x * x

In [17]:
def add(x, y):
    return x + y

In [18]:
class QuuxGenerator(CustomGenerator):
    aa = Integer(0, 20)
    bb = Integer(0, 20)
    cc = Apply(add, aa, Apply(square, bb))

In [19]:
g = QuuxGenerator()

In [20]:
print_generated_sequence(g, num=10, sep='\n', seed=12345)


Generated sequence:

Quux(aa=0, bb=13, cc=169)
Quux(aa=16, bb=6, cc=52)
Quux(aa=4, bb=4, cc=20)
Quux(aa=20, bb=17, cc=309)
Quux(aa=6, bb=4, cc=22)
Quux(aa=12, bb=14, cc=208)
Quux(aa=14, bb=14, cc=210)
Quux(aa=0, bb=4, cc=16)
Quux(aa=15, bb=19, cc=376)
Quux(aa=15, bb=20, cc=415)

In [21]:
df = g.generate(num=100, seed=12345).to_df()
print(list(df['aa'][:20]))
print(list(df['bb'][:20]))
print(list(df['cc'][:20]))


[0, 16, 4, 20, 6, 12, 14, 0, 15, 15, 19, 11, 2, 5, 10, 11, 1, 13, 20, 20]
[13, 6, 4, 17, 4, 14, 14, 4, 19, 20, 3, 18, 19, 16, 14, 5, 1, 17, 7, 18]
[169, 52, 20, 309, 22, 208, 210, 16, 376, 415, 28, 335, 363, 261, 206, 36, 2, 302, 69, 344]

In [22]:
all(df['aa'] + df['bb']**2 == df['cc'])


Out[22]:
True

Example: multi-stage dependencies


In [23]:
class QuuxGenerator(CustomGenerator):
    name = FakerGenerator(method="name")
    tag = SelectOne(['a', 'bb', 'ccc'])

In [24]:
g = QuuxGenerator()

In [25]:
quux_items = g.generate(num=100, seed=12345)

In [26]:
quux_items.to_df().head(5)


Out[26]:
name tag
0 Jonathan Hernandez bb
1 Cheryl Stevens a
2 Manuel Mclean a
3 Paul Malone ccc
4 Jason Dickson a

In [27]:
tag_lookup = {
    'a': [1, 2, 3, 4, 5],
    'bb': [10, 20, 30, 40, 50],
    'ccc': [100, 200, 300, 400, 500],
}

In [28]:
class FoobarGenerator(CustomGenerator):
    some_quux = SelectOne(quux_items)
    number = SelectOneDerived(Lookup(GetAttribute(some_quux, 'tag'), tag_lookup))

In [29]:
h = FoobarGenerator()

In [30]:
h_items = h.generate(10000, seed=12345)

In [31]:
df = h_items.to_df(fields={'name': 'some_quux.name', 'tag': 'some_quux.tag', 'number': 'number'})
df.head()


Out[31]:
name tag number
0 Manuel Mclean a 4
1 Nancy Davis ccc 100
2 Sara Cook a 3
3 Jason Christensen ccc 100
4 Lisa Fernandez a 1

In [32]:
print(df.query('tag == "a"')['number'].isin([1, 2, 3, 4, 5]).all())
print(df.query('tag == "bb"')['number'].isin([10, 20, 30, 40, 50]).all())
print(df.query('tag == "ccc"')['number'].isin([100, 200, 300, 400, 500]).all())


True
True
True

In [33]:
df.query('tag == "a"').head(5)


Out[33]:
name tag number
0 Manuel Mclean a 4
2 Sara Cook a 3
4 Lisa Fernandez a 1
10 Tina Silva a 5
12 Jennifer Brooks a 3

In [34]:
df.query('tag == "bb"').head(5)


Out[34]:
name tag number
5 Brian Murray bb 20
7 Jennifer Williams bb 40
8 Jonathan Hernandez bb 10
9 Pamela Williamson bb 30
11 Christian Lopez bb 50

In [35]:
df.query('tag == "ccc"').head(5)


Out[35]:
name tag number
1 Nancy Davis ccc 100
3 Jason Christensen ccc 100
6 Devon Zimmerman ccc 400
13 Kristopher Robinson ccc 300
14 Gregory Simon ccc 100

In [ ]: