In [1]:
import mimesis
import pandas as pd
import random
from secrets import randbelow # requires python 3.6.3
from sqlalchemy import create_engine, select
from sqlalchemy import Column, Date, Integer, MetaData, Numeric, Table, Text, Time, String
from sqlalchemy_utils import UUIDType
import uuid

NUM_CUSTOMERS = 10
NUM_PURCHASES = 15 # Should be >= NUM_CUSTOMERS
NUM_BOOKS = 5

In [2]:
add = mimesis.Address('en')
bus = mimesis.Business('en')
cod = mimesis.Code('en')
cry = mimesis.Cryptographic('en')
dat = mimesis.Datetime('en')
num = mimesis.Numbers('en')
per = mimesis.Personal('en')

engine = create_engine("postgresql://clinton:reece@localhost:5432/transactions")
meta = MetaData()

Build Schemas


In [3]:
customers_table = Table('customers',
                        meta,
                        Column('customer_id', UUIDType(binary=False), primary_key=True),
                        Column('name', Text, nullable=False),
                        Column('email', Text, nullable=False),
                        Column('phone', Text, nullable=True),
                        Column('street', Text, nullable=False),
                        Column('city', Text, nullable=False),
                        Column('state', String(length=2), nullable=False))

In [4]:
books_table = Table('books',
                    meta,
                    Column('isbn', Text, nullable=False, primary_key=True),
                    Column('price', Numeric, nullable=False))

In [5]:
purchases_table = Table('purchases',
                        meta,
                        Column('receipt_id', UUIDType(binary=False), index=True),
                        Column('customer_id', UUIDType(binary=False)),
                        Column('date', Date, nullable=True),
                        Column('time', Time, nullable=True),
                        Column('isbn', Text, nullable=False),
                        Column('quantity', Integer, nullable=False))

Generate and Insert Data

Customers


In [6]:
with engine.connect() as conn:
    # create tables if they don't exist
    meta.create_all(engine, checkfirst=True)

    #generate customers table
    for _ in range(0, NUM_CUSTOMERS):
        insert_data = customers_table.insert() \
                                     .values(customer_id = cry.uuid(),
                                             name = per.full_name(),
                                             email = per.email(),
                                             phone = per.telephone(),
                                             street = add.address(),
                                             city = add.city(),
                                             state = add.state(abbr=True))
        conn.execute(insert_data)
    
    # create dataframe
    customers_df = pd.read_sql_table(table_name='customers', con=conn)
    
customers_df


Out[6]:
customer_id name email phone street city state
0 ceb388a9-f124-f73d-613f-a6a92a169a8c Laraine Rios aluta1813@live.com 188.013.8219 292 Margaret Trace Elizabeth City NH
1 0da2d776-273d-c232-b480-64c577e9cdae Zachery Rivera greengage2048@gmail.com (373) 957-4596 339 San Jacinto Bypass Jefferson City TN
2 8e102442-55eb-1d07-68b3-e2ffd5d4af4d Karlyn Anthony unspecific2001@live.com 373-957-4596 931 Stanton Viaduct Southgate MT
3 d0103c8d-7d1a-ce85-16d8-8358d9017e03 Scott Floyd bizen1939@gmail.com +1-(373)-957-4596 997 Wallen Drung Muskego IA
4 fd7759e2-b3bd-61e3-bf64-53e5ca2758af Rigoberto Frazier couplet1870@yahoo.com +1-(373)-957-4596 63 Cleo Rand Alley Elmira MO
5 ed61fb22-b173-49d8-4bfb-9215e3b4f01d Dannielle Espinoza detlev1961@yahoo.com 1-373-957-4596 113 Decatur Garden Charlottesville AR
6 8173095d-d036-4fd4-675f-e85af652bfd5 Jolanda Mccullough pediatric1847@live.com +1-(373)-957-4596 979 Sfgh Access Freeway Sedalia MS
7 6a7f5c71-ec97-ae7f-6cf2-3788b879c8f6 Abel Shields mastoid1842@yandex.com 1-373-957-4596 699 Jessie East High Street Malden MI
8 d6116d06-3862-690a-785b-dd6eb86b88b4 Genaro Beach broddy1899@gmail.com 188-013-8219 84 Clementina High Street Vallejo WV
9 c594ba42-8015-9c12-987a-3cf5f4a2cbc2 Dylan Branch binman1997@gmail.com 188.013.8219 1186 Mary Hill Davis MI

In [7]:
# select customer IDs
with engine.connect() as conn:
    customers_select = conn.execute(select([customers_table]))
    customers_list = []
    for c in customers_select:
        customers_list.append(c[0])
        
customers_list


Out[7]:
[UUID('ceb388a9-f124-f73d-613f-a6a92a169a8c'),
 UUID('0da2d776-273d-c232-b480-64c577e9cdae'),
 UUID('8e102442-55eb-1d07-68b3-e2ffd5d4af4d'),
 UUID('d0103c8d-7d1a-ce85-16d8-8358d9017e03'),
 UUID('fd7759e2-b3bd-61e3-bf64-53e5ca2758af'),
 UUID('ed61fb22-b173-49d8-4bfb-9215e3b4f01d'),
 UUID('8173095d-d036-4fd4-675f-e85af652bfd5'),
 UUID('6a7f5c71-ec97-ae7f-6cf2-3788b879c8f6'),
 UUID('d6116d06-3862-690a-785b-dd6eb86b88b4'),
 UUID('c594ba42-8015-9c12-987a-3cf5f4a2cbc2')]

Books


In [8]:
with engine.connect() as conn:
    # create tables if they don't exist
    meta.create_all(engine, checkfirst=True)

    #generate customers table
    for _ in range(0, NUM_BOOKS):
        insert_data = books_table.insert() \
                                     .values(isbn = cod.isbn(),
                                             price = bus.price(minimum=5.00, maximum=100.00).rstrip(' $'))
        conn.execute(insert_data)
    
    # create dataframe
    books_df = pd.read_sql_table(table_name='books', con=conn)
    
books_df


Out[8]:
isbn price
0 1-23378-527-4 40.79
1 1-09229-682-4 68.95
2 1-32060-596-1 10.00
3 1-38292-122-5 14.62
4 1-21650-822-6 53.50

In [9]:
# select ISBNs
with engine.connect() as conn:
    books_select = conn.execute(select([books_table]))
    books_list = []
    for b in books_select:
        books_list.append(b[0])
        
books_list


Out[9]:
['1-23378-527-4',
 '1-09229-682-4',
 '1-32060-596-1',
 '1-38292-122-5',
 '1-21650-822-6']

Purchases


In [10]:
# generate receipt IDs
receipt_id = []
for _ in range(0, NUM_PURCHASES):
    receipt_id.append(cry.uuid())

receipt_id


Out[10]:
['0cdf1166-5fa6-0246-33b7-d1149d87a50a',
 '592c2e53-e506-c5a9-eb3e-be8cd9f687f2',
 '71187b0b-c4d2-70ee-0852-51dfc64d7065',
 'e77d7f7e-2fb8-b56f-d4a9-ed97b550cf5f',
 '7d493d61-72f6-8042-7e50-f32c7fc04b70',
 'e31e262e-4d88-da66-523f-c0844e504418',
 '08f76f74-cb0e-4e57-6ce2-e2ac7cf34ced',
 '79523cbb-be6f-caba-0fe5-f8d62cbb43d7',
 'f409efc0-3008-dde5-b68d-fc19d92e0362',
 '87720ce2-5bd3-49c4-d597-87525ca87aad',
 'd188efe2-7781-ab5b-b169-f593afbba950',
 '41fac398-0838-94b2-fc05-aa31e575163c',
 'f5d7222a-2ba2-e38c-d699-91d5268f9d98',
 '4b1040d5-f2c5-f462-19a7-a9e038370c75',
 '867873cb-9083-2eab-b88c-85c720fec36a']

In [11]:
with engine.connect() as conn:
    # create tables if they don't exist
    meta.create_all(engine, checkfirst=True)

    # generate purchases table
    for _ in range(0, NUM_PURCHASES):
        # select random IDs with replacement
        insert_data = purchases_table.insert() \
                                     .values(receipt_id = receipt_id[randbelow(NUM_PURCHASES)],
                                             customer_id = customers_list[randbelow(NUM_CUSTOMERS)],
                                             date = dat.date(start=2010, end=2017),
                                             time = dat.time(),
                                             isbn = list(random.sample(books_list, 1))[0],
                                             quantity = num.between(1, 5))
        conn.execute(insert_data)
    
    # create dataframe
    purchases_df = pd.read_sql_table(table_name='purchases', con=conn)
        
purchases_df


Out[11]:
receipt_id customer_id date time isbn quantity
0 0cdf1166-5fa6-0246-33b7-d1149d87a50a fd7759e2-b3bd-61e3-bf64-53e5ca2758af 2013-05-25 18:54:47 1-21650-822-6 4
1 4b1040d5-f2c5-f462-19a7-a9e038370c75 8173095d-d036-4fd4-675f-e85af652bfd5 2011-08-06 10:07:29 1-32060-596-1 1
2 0cdf1166-5fa6-0246-33b7-d1149d87a50a d6116d06-3862-690a-785b-dd6eb86b88b4 2013-01-07 10:17:32 1-32060-596-1 4
3 7d493d61-72f6-8042-7e50-f32c7fc04b70 6a7f5c71-ec97-ae7f-6cf2-3788b879c8f6 2013-08-09 03:49:45 1-38292-122-5 4
4 7d493d61-72f6-8042-7e50-f32c7fc04b70 6a7f5c71-ec97-ae7f-6cf2-3788b879c8f6 2014-11-11 15:40:33 1-32060-596-1 1
5 79523cbb-be6f-caba-0fe5-f8d62cbb43d7 d0103c8d-7d1a-ce85-16d8-8358d9017e03 2016-02-18 14:20:05 1-21650-822-6 1
6 592c2e53-e506-c5a9-eb3e-be8cd9f687f2 8e102442-55eb-1d07-68b3-e2ffd5d4af4d 2014-12-01 08:26:02 1-09229-682-4 2
7 e77d7f7e-2fb8-b56f-d4a9-ed97b550cf5f 8173095d-d036-4fd4-675f-e85af652bfd5 2014-03-29 04:25:19 1-21650-822-6 4
8 867873cb-9083-2eab-b88c-85c720fec36a 8173095d-d036-4fd4-675f-e85af652bfd5 2017-02-09 21:46:09 1-23378-527-4 1
9 f409efc0-3008-dde5-b68d-fc19d92e0362 d0103c8d-7d1a-ce85-16d8-8358d9017e03 2010-08-18 18:00:14 1-09229-682-4 4
10 e77d7f7e-2fb8-b56f-d4a9-ed97b550cf5f 8e102442-55eb-1d07-68b3-e2ffd5d4af4d 2014-11-29 15:41:13 1-21650-822-6 5
11 0cdf1166-5fa6-0246-33b7-d1149d87a50a d6116d06-3862-690a-785b-dd6eb86b88b4 2013-06-18 05:47:29 1-21650-822-6 3
12 71187b0b-c4d2-70ee-0852-51dfc64d7065 ed61fb22-b173-49d8-4bfb-9215e3b4f01d 2011-08-08 09:55:53 1-09229-682-4 2
13 41fac398-0838-94b2-fc05-aa31e575163c d6116d06-3862-690a-785b-dd6eb86b88b4 2017-05-29 20:06:39 1-38292-122-5 4
14 71187b0b-c4d2-70ee-0852-51dfc64d7065 0da2d776-273d-c232-b480-64c577e9cdae 2011-03-12 21:54:08 1-21650-822-6 1