In [1]:
import numpy as np

In [3]:
import explogger
# import experiment

In [4]:
from explogger import ExperimentLog
from explogger import pseudo, extract

In [5]:
import datetime
datetime_format = "%Y-%m-%d_%H:%M:%S"
def datetime_now():
    return datetime.datetime.now().strftime(datetime_format)

The purpose of this notebook is to produce the data and run the analysis that is described in http://ademos.people.uic.edu/Chapter21.html.

experiment

We conduct a within-subject experiment with N factors, each of which have M levels.


In [6]:
factors = [('F0', ['a', 'b']), ('F1', ['x', 'y'])]
levels = [np.array(['a', 'b']), np.array(['x', 'y'])]

In [7]:
e = ExperimentLog(":memory:", ntp_sync=False)

In [8]:
if e.meta.stage == 'init':
    # needed?
    e.create('SESSION', 'data', description='')
    e.create('USER', 'user', description='user session')

    for factor in factors:
        e.create("SESSION", factor[0], stype='factor')
        for level in factor[1]:
            e.create("SESSION", level, stype='level')

    e.meta.stage='setup'

design

We will save design of the experiment in file or in the log, which will be traversed in order to conduct the experiment. This step can be assisted with other packages, in order for example to have a Latin square design.

2 cases: we have a fixed number of users, or we will add some as we go.


In [9]:
import hashlib

def seed_rng_with_username(username):
    m = hashlib.md5(username)
    seed = int(m.hexdigest()[:8], 16)
    np.random.seed(seed)
    
def shuffle_(data):
    t = data[:]
    np.random.shuffle(t)
    np.random.shuffle(t.T)
    return t

def cartesian_product(*arrays):
    la = len(arrays)
    dtype = np.find_common_type([a.dtype for a in arrays], [])
    arr = np.empty([len(a) for a in arrays] + [la], dtype=dtype)
    for i, a in enumerate(np.ix_(*arrays)):
        arr[...,i] = a
    return arr.reshape(-1, la)

In [10]:
n_users = 3

design = []

for user in range(n_users):
    username = pseudo.get_pseudo()

    cp = cartesian_product(*levels)
    shuffle_(cp)
    cdt = cp[:,0] + cp[:,1]
    design.append((username, cdt.tolist()))

In [11]:
# save the users
for r in design:
    username = r[0]
#     meta = extract.meta_dataframe(e.cursor)
#     is_new_user = not meta['USER']['name'].isin([username]).any()
#     if is_new_user: e.create("USER", name=username)
    e.create("USER", name=username)

run


In [12]:
meta = extract.meta_dataframe(e.cursor)

In [13]:
sessions = extract.dump_sessions_dataframe(e.cursor)
meta = extract.meta_dataframe(e.cursor)

In [14]:
sessions


Out[14]:
complete description end_time json last_time log_count name parent path random_seed start_time subcount test_run valid
id
1 None None None None None 0 [ROOT] None / None 1.505923e+09 0 None None

In [15]:
meta['USER']


Out[15]:
bound data description name type
0 [] None user session user
1 [] None PIHEP-EVUCI
2 [] None DOCUF-USOFA
3 [] None PACUS-IDIVU

In [16]:
meta['SESSION']


Out[16]:
bound data description name type
0 [] None data
1 [] None F0 factor
2 [] None a level
3 [] None b level
4 [] None F1 factor
5 [] None x level
6 [] None y level

In [17]:
# run the full experiment
for run in design:
    
    # select the user
    user = run[0]

    # get a unique session
    session_now = user + '_' + datetime_now()
    e.create('SESSION', session_now, data={'user':user}, stype='exprun')
    e.enter("data", session = session_now)
    # bind after the first enter
    e.bind('USER', user)


    # iterate through the treatments
    for t in run[1]:
        f0, f1 = t
        e.enter(f0, session = f0)
        e.enter(f1, session = f1)
        
        for i in range(3):
            e.enter()
            data = np.random.random_sample(1)[0]

            # tweak the data to show effect and interaction
            e.log("data", data={'d':data, 'F0':f0, 'F1':f1, 'level':t, 'user': user})
            e.leave()

        e.leave()
        e.leave()
    e.leave()


09-20 16:54 [WARNI]  No stream data registered; creating a new blank entry

In [22]:
flatdf = extract.dump_flat_dataframe(e.cursor)
flatdf['data'].head(10)


Out[22]:
d F0 F1 level user t valid session_valid path session
0 0.639324 x a xa PIHEP-EVUCI 1.505923e+09 1 1 /data/x/a/0/ 5
1 0.467095 x a xa PIHEP-EVUCI 1.505923e+09 1 1 /data/x/a/1/ 6
2 0.195950 x a xa PIHEP-EVUCI 1.505923e+09 1 1 /data/x/a/2/ 7
3 0.395401 y b yb PIHEP-EVUCI 1.505923e+09 1 1 /data/y/b/0/ 10
4 0.479439 y b yb PIHEP-EVUCI 1.505923e+09 1 1 /data/y/b/1/ 11
5 0.570896 y b yb PIHEP-EVUCI 1.505923e+09 1 1 /data/y/b/2/ 12
6 0.912641 y a ya PIHEP-EVUCI 1.505923e+09 1 1 /data/y/a/0/ 15
7 0.180232 y a ya PIHEP-EVUCI 1.505923e+09 1 1 /data/y/a/1/ 16
8 0.773161 y a ya PIHEP-EVUCI 1.505923e+09 1 1 /data/y/a/2/ 17
9 0.070539 x b xb PIHEP-EVUCI 1.505923e+09 1 1 /data/x/b/0/ 20

In [19]:
sessions = extract.dump_sessions_dataframe(e.cursor)
sessions.head(10)


Out[19]:
complete description end_time json last_time log_count name parent path random_seed start_time subcount test_run valid
id
1 NaN None NaN None NaN 0 [ROOT] NaN / NaN 1.505923e+09 0 NaN NaN
2 1.0 1.505923e+09 None NaN 0 data 1.0 /data/ 1.505923e+12 1.505923e+09 0 0.0 1.0
3 1.0 1.505923e+09 None NaN 0 x 2.0 /data/x/ 3.011846e+12 1.505923e+09 0 0.0 1.0
4 1.0 1.505923e+09 None NaN 0 a 3.0 /data/x/a/ 4.517769e+12 1.505923e+09 3 0.0 1.0
5 1.0 1.505923e+09 None 1.505923e+09 1 0 4.0 /data/x/a/0/ 6.023691e+12 1.505923e+09 0 0.0 1.0
6 1.0 1.505923e+09 None 1.505923e+09 1 1 4.0 /data/x/a/1/ 6.023691e+12 1.505923e+09 0 0.0 1.0
7 1.0 1.505923e+09 None 1.505923e+09 1 2 4.0 /data/x/a/2/ 6.023691e+12 1.505923e+09 0 0.0 1.0
8 1.0 1.505923e+09 None NaN 0 y 2.0 /data/y/ 3.011846e+12 1.505923e+09 0 0.0 1.0
9 1.0 1.505923e+09 None NaN 0 b 8.0 /data/y/b/ 1.204738e+13 1.505923e+09 3 0.0 1.0
10 1.0 1.505923e+09 None 1.505923e+09 1 0 9.0 /data/y/b/0/ 1.355331e+13 1.505923e+09 0 0.0 1.0

In [20]:
meta = extract.meta_dataframe(e.cursor)
meta['SESSION']


Out[20]:
bound data description name type
0 [] None data
1 [] None F0 factor
2 [4, 5, 6, 7, 14, 15, 16, 17, 24, 25, 26, 27, 2... None a level
3 [9, 10, 11, 12, 19, 20, 21, 22, 34, 35, 36, 37... None b level
4 [] None F1 factor
5 [3, 4, 5, 6, 7, 18, 19, 20, 21, 22, 30, 31, 32... None x level
6 [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 25, 26,... None y level
7 [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1... {'user': 'PIHEP-EVUCI'} PIHEP-EVUCI_2017-09-20_16:54:29 exprun
8 [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 3... {'user': 'DOCUF-USOFA'} DOCUF-USOFA_2017-09-20_16:54:29 exprun
9 [44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 5... {'user': 'PACUS-IDIVU'} PACUS-IDIVU_2017-09-20_16:54:29 exprun

analysis


In [ ]: