Hierarchical part with jags



In [1]:

    
#you have to make sure that you have all of these installed
import cProfile
import re
import math
import numpy as np
import scipy as sp
from scipy import stats
from scipy import optimize as opt
import pandas as pd
import random as rnd
from matplotlib import pyplot as plt
import time
import numpy.random
import warnings
warnings.filterwarnings('ignore')
import multiprocessing as mp



In [2]:

    
import chen_utils as ch
import user_simulation_utils as sim



In [3]:

    
###Generation of people and their characteristics
np.random.seed(123)
# beta predictors
# assume beta (rate parameter) is determined by avg. transaction size
# avg income and whether public transport was used and loyalty programm membership
# create this feature matrix
Std = 0.02
People = 50
mean_transaction = np.random.normal(np.log(15), Std, People)
mean_transaction = np.exp(mean_transaction)

mean_income = np.random.normal(1800, 100, People)

public_transport = [0] * 5 + [1] * 5
public_transport = np.array(public_transport * 5)
# p predictors
# assume p (SOW) is driven by loyalty card membership and whether discounter is present
# create this feature matrix
# membership in loyalty
loyalty = np.random.binomial(1, 0.6, 50)

discounter = np.array([0, 1] * 25)

preference = np.array(stats.uniform.rvs(0, 1, size=People, random_state = 123))

theta_beta = 1 -0.2*mean_transaction + 0.0005*mean_income + 0.5*public_transport + 0.6*loyalty + np.random.normal(0.01, 0.01, People) 
beta = np.exp(theta_beta) 

theta_p = -0.2 +0.4*loyalty - 0.8*discounter + preference + np.random.normal(0.01, 0.01, People)
p = sim.expit(theta_p)



In [4]:

    
covariates = pd.DataFrame({'mean_transaction': mean_transaction, 'mean_income': mean_income, 'public_transport': public_transport, 'loyalty': loyalty, 'discounter': discounter, 'preference': preference, 'true_beta': beta, 'true_p': p})
covariates









    Out[4]:






  
    
      
      discounter
      loyalty
      mean_income
      mean_transaction
      preference
      public_transport
      true_beta
      true_p
    
  
  
    
      0
      0
      1
      1670.591468
      14.677821
      0.696469
      0
      0.620273
      0.712686
    
    
      1
      1
      1
      1696.121179
      15.302208
      0.286139
      0
      0.549331
      0.424306
    
    
      2
      0
      1
      1974.371223
      15.085134
      0.226851
      0
      0.660819
      0.607978
    
    
      3
      1
      1
      1720.193726
      14.554851
      0.551315
      0
      0.642111
      0.482259
    
    
      4
      0
      1
      1802.968323
      14.827420
      0.719469
      0
      0.637987
      0.716421
    
    
      5
      1
      0
      1906.931597
      15.503704
      0.423106
      1
      0.536961
      0.361698
    
    
      6
      0
      0
      1889.070639
      14.289380
      0.980764
      1
      0.666485
      0.687262
    
    
      7
      1
      1
      1975.488618
      14.871877
      0.684830
      1
      1.132924
      0.523146
    
    
      8
      0
      1
      1949.564414
      15.384629
      0.480932
      1
      1.010449
      0.667736
    
    
      9
      1
      1
      1906.939267
      14.742219
      0.392118
      1
      1.125090
      0.449210
    
    
      10
      0
      0
      1722.729129
      14.797711
      0.343178
      0
      0.332108
      0.543685
    
    
      11
      1
      0
      1879.486267
      14.971614
      0.729050
      0
      0.345298
      0.436823
    
    
      12
      0
      1
      1831.427199
      15.454156
      0.438572
      0
      0.562528
      0.656674
    
    
      13
      1
      1
      1667.373454
      14.809549
      0.059678
      0
      0.596579
      0.369960
    
    
      14
      0
      0
      1941.729905
      14.867395
      0.398044
      0
      0.372665
      0.551610
    
    
      15
      1
      1
      1880.723653
      14.870259
      0.737995
      1
      1.073585
      0.534655
    
    
      16
      0
      0
      1804.549008
      15.676594
      0.182492
      1
      0.492002
      0.497885
    
    
      17
      1
      1
      1776.690794
      15.670593
      0.175452
      1
      0.871791
      0.398490
    
    
      18
      0
      0
      1680.169886
      15.304261
      0.531551
      1
      0.491367
      0.585974
    
    
      19
      1
      0
      1819.952407
      15.116304
      0.531828
      1
      0.545952
      0.388796
    
    
      20
      0
      0
      1846.843912
      15.222850
      0.634401
      0
      0.329622
      0.608653
    
    
      21
      1
      1
      1716.884502
      15.453953
      0.849432
      0
      0.540477
      0.561012
    
    
      22
      0
      1
      1916.220405
      14.721861
      0.724455
      0
      0.691007
      0.716621
    
    
      23
      1
      1
      1690.279695
      15.356929
      0.611024
      0
      0.535138
      0.509285
    
    
      24
      0
      1
      1587.689965
      14.628513
      0.722443
      0
      0.602528
      0.719383
    
    
      25
      1
      1
      1903.972709
      14.809890
      0.322959
      1
      1.093144
      0.434542
    
    
      26
      0
      1
      1759.663396
      15.274615
      0.361789
      1
      0.937725
      0.637418
    
    
      27
      1
      1
      1787.397041
      14.577461
      0.228263
      1
      1.089220
      0.410548
    
    
      28
      0
      0
      1716.248328
      14.958038
      0.293714
      1
      0.530480
      0.522774
    
    
      29
      1
      1
      1639.403724
      14.743689
      0.630976
      1
      0.980407
      0.508863
    
    
      30
      0
      1
      1925.523737
      14.923510
      0.092105
      0
      0.657475
      0.574358
    
    
      31
      1
      0
      1731.113102
      14.183487
      0.433701
      0
      0.382735
      0.363567
    
    
      32
      0
      0
      1966.095249
      14.477845
      0.430863
      0
      0.407184
      0.562282
    
    
      33
      1
      1
      1880.730819
      14.791499
      0.493685
      0
      0.674884
      0.472403
    
    
      34
      0
      0
      1768.524185
      15.280835
      0.425830
      0
      0.313853
      0.556551
    
    
      35
      1
      0
      1691.409760
      14.948000
      0.312261
      1
      0.527289
      0.333703
    
    
      36
      0
      1
      1726.753801
      15.000854
      0.426351
      1
      0.969757
      0.651123
    
    
      37
      1
      0
      1678.747687
      15.207894
      0.893389
      1
      0.501172
      0.476175
    
    
      38
      0
      1
      2008.711336
      14.738446
      0.944160
      1
      1.182465
      0.757341
    
    
      39
      1
      1
      1816.444123
      15.085330
      0.501837
      1
      1.015877
      0.479855
    
    
      40
      0
      0
      1915.020554
      14.760325
      0.623953
      0
      0.378855
      0.606228
    
    
      41
      1
      1
      1673.264795
      14.490551
      0.115618
      0
      0.634405
      0.383742
    
    
      42
      0
      0
      1818.103513
      14.883187
      0.317285
      0
      0.345393
      0.532582
    
    
      43
      1
      0
      1917.786194
      15.173133
      0.414826
      0
      0.335778
      0.359044
    
    
      44
      0
      0
      1766.498924
      15.101921
      0.866309
      0
      0.322187
      0.665218
    
    
      45
      1
      1
      1903.111446
      14.996451
      0.250455
      1
      1.053887
      0.415448
    
    
      46
      0
      1
      1691.543209
      15.735157
      0.483034
      1
      0.822993
      0.667960
    
    
      47
      1
      0
      1663.652846
      15.124387
      0.985560
      1
      0.507055
      0.498392
    
    
      48
      0
      1
      1837.940061
      15.296513
      0.519485
      1
      0.971843
      0.675330
    
    
      49
      1
      1
      1762.082357
      15.686698
      0.612895
      1
      0.863641
      0.506422



In [16]:

    
covariates.to_csv('covariates.csv', sep=',', encoding='utf-8',index=False)



In [5]:

    
#check distribution of beta and p
plt.hist(beta)
plt.show()

plt.hist(p)
plt.show()



In [6]:

    
observations = []
for i in range(len(p)):
    observations.append(sim.simulate_data(share_observed = p[i], rate_input = beta[i], observations_total = 20))



In [7]:

    
observations[0:3]









    Out[7]:





[[array([  4.36805206,   3.70012025,   7.74081173,   4.11348882,
           2.93417865,   2.67425042,   4.8110449 ,   1.64625645,
           4.23982854,   3.3239371 ,   1.63387758,   0.76906907,
           1.52036594,   6.95458129,   0.83353605,   0.89894822,
           0.60399416,   1.83686863,   0.50555509,  11.5760237 ]), 0],
 [array([  5.3673338 ,   8.82421469,   1.73359213,  11.53221115,
          11.65080895,  10.28599646,   4.20299802,   1.48951677,
          10.02067792,   6.92168086,  39.99050057,  20.1515926 ,
           4.90796792,   2.16179439,  14.17140275,   5.41132932,
           3.5467406 ,   6.54431988,   3.36264   ,   2.04154673]),
  3.9612540327046806],
 [array([  4.40121998,   1.0214549 ,   1.90461193,   5.78793025,
           1.39369296,   2.33420136,   2.12500885,   1.99441669,
           3.03251756,   2.20718302,   3.02953091,  15.94255655,
           2.59299191,  10.26842046,   4.22992941,   2.33406201,
           8.9084555 ,   0.94104006,  13.85426445,   0.48376873]),
  5.5694139652427248]]



In [8]:

    
npinput = np.asarray(observations)

# delete all last purchase bigger than 30
# check mapping and potentially keep user_id --> check
print("2 parameter model")
pool = mp.Pool(processes = None)
t0 = time.time()
results_2par = pool.map(ch.metropolis_solution_trajectory, npinput)
t1 = time.time()
print(t1-t0)









    



2 parameter model
182.2447919845581



In [12]:

    
results_2par[0:2]









    Out[12]:





[array([[ 0.4829912 ,  1.52604078],
        [ 0.41026325,  1.53184497],
        [ 0.41026325,  1.53184497],
        ..., 
        [ 0.30234187,  1.55747671],
        [ 0.30234187,  1.55747671],
        [ 0.30234187,  1.55747671]]), array([[ 0.66971353,  0.32633532],
        [ 0.46073976,  0.40642289],
        [ 0.46073976,  0.40642289],
        ..., 
        [ 0.2697629 ,  1.03362594],
        [ 0.2697629 ,  1.03362594],
        [ 0.25167985,  1.23818131]])]



In [13]:

    
final_results = np.zeros((len(results_2par)*len(results_2par[0]), 2))
final_ids = np.zeros((len(results_2par)*len(results_2par[0]), 1))

for i in range(len(results_2par)):
    for j in range(len(results_2par[i])):
        final_results[i*len(results_2par[i])+j] = [results_2par[i][j][0], results_2par[i][j][1]]
        final_ids[i*len(results_2par[i])+j] = i + 1



In [14]:

    
final_results









    Out[14]:





array([[ 0.4829912 ,  1.52604078],
       [ 0.41026325,  1.53184497],
       [ 0.41026325,  1.53184497],
       ..., 
       [ 0.32944808,  1.38816398],
       [ 0.32944808,  1.38816398],
       [ 0.32944808,  1.38816398]])



In [15]:

    
df = pd.DataFrame({'user_category_ids': final_ids.ravel(), '2par_first': final_results[:,0].ravel(), '2par_second': final_results[:,1].ravel()})
df









    Out[15]:






  
    
      
      2par_first
      2par_second
      user_category_ids
    
  
  
    
      0
      0.482991
      1.526041
      1.0
    
    
      1
      0.410263
      1.531845
      1.0
    
    
      2
      0.410263
      1.531845
      1.0
    
    
      3
      0.437632
      1.359841
      1.0
    
    
      4
      0.437632
      1.359841
      1.0
    
    
      5
      0.437632
      1.359841
      1.0
    
    
      6
      0.416020
      1.391801
      1.0
    
    
      7
      0.624459
      1.240210
      1.0
    
    
      8
      0.624459
      1.240210
      1.0
    
    
      9
      0.624459
      1.240210
      1.0
    
    
      10
      0.624459
      1.240210
      1.0
    
    
      11
      0.624459
      1.240210
      1.0
    
    
      12
      0.624459
      1.240210
      1.0
    
    
      13
      0.624459
      1.240210
      1.0
    
    
      14
      0.643476
      1.140503
      1.0
    
    
      15
      0.541417
      0.914670
      1.0
    
    
      16
      0.594454
      1.061232
      1.0
    
    
      17
      0.661763
      1.140822
      1.0
    
    
      18
      0.620130
      0.899562
      1.0
    
    
      19
      0.620130
      0.899562
      1.0
    
    
      20
      0.620130
      0.899562
      1.0
    
    
      21
      0.620130
      0.899562
      1.0
    
    
      22
      0.620130
      0.899562
      1.0
    
    
      23
      0.540646
      1.052261
      1.0
    
    
      24
      0.695248
      1.147009
      1.0
    
    
      25
      0.695248
      1.147009
      1.0
    
    
      26
      0.458088
      0.966280
      1.0
    
    
      27
      0.458088
      0.966280
      1.0
    
    
      28
      0.458088
      0.966280
      1.0
    
    
      29
      0.372139
      1.141575
      1.0
    
    
      ...
      ...
      ...
      ...
    
    
      249970
      0.324064
      1.153852
      50.0
    
    
      249971
      0.324064
      1.153852
      50.0
    
    
      249972
      0.324064
      1.153852
      50.0
    
    
      249973
      0.324064
      1.153852
      50.0
    
    
      249974
      0.293474
      1.277970
      50.0
    
    
      249975
      0.293474
      1.277970
      50.0
    
    
      249976
      0.278748
      1.373184
      50.0
    
    
      249977
      0.369274
      1.309650
      50.0
    
    
      249978
      0.369274
      1.309650
      50.0
    
    
      249979
      0.369274
      1.309650
      50.0
    
    
      249980
      0.369274
      1.309650
      50.0
    
    
      249981
      0.369274
      1.309650
      50.0
    
    
      249982
      0.387279
      1.366329
      50.0
    
    
      249983
      0.387279
      1.366329
      50.0
    
    
      249984
      0.387279
      1.366329
      50.0
    
    
      249985
      0.232915
      1.226898
      50.0
    
    
      249986
      0.232915
      1.226898
      50.0
    
    
      249987
      0.244697
      1.262230
      50.0
    
    
      249988
      0.244697
      1.262230
      50.0
    
    
      249989
      0.244697
      1.262230
      50.0
    
    
      249990
      0.271695
      1.290524
      50.0
    
    
      249991
      0.330447
      1.376261
      50.0
    
    
      249992
      0.330066
      1.384156
      50.0
    
    
      249993
      0.328166
      1.480841
      50.0
    
    
      249994
      0.328166
      1.480841
      50.0
    
    
      249995
      0.246766
      1.369154
      50.0
    
    
      249996
      0.366560
      1.547855
      50.0
    
    
      249997
      0.329448
      1.388164
      50.0
    
    
      249998
      0.329448
      1.388164
      50.0
    
    
      249999
      0.329448
      1.388164
      50.0
    
  

250000 rows × 3 columns



In [24]:

    
df.to_csv('metropolis_trajectory.csv', sep=',', encoding='utf-8',index=False)

	discounter	loyalty	mean_income	mean_transaction	preference	public_transport	true_beta	true_p
0	0	1	1670.591468	14.677821	0.696469	0	0.620273	0.712686
1	1	1	1696.121179	15.302208	0.286139	0	0.549331	0.424306
2	0	1	1974.371223	15.085134	0.226851	0	0.660819	0.607978
3	1	1	1720.193726	14.554851	0.551315	0	0.642111	0.482259
4	0	1	1802.968323	14.827420	0.719469	0	0.637987	0.716421
5	1	0	1906.931597	15.503704	0.423106	1	0.536961	0.361698
6	0	0	1889.070639	14.289380	0.980764	1	0.666485	0.687262
7	1	1	1975.488618	14.871877	0.684830	1	1.132924	0.523146
8	0	1	1949.564414	15.384629	0.480932	1	1.010449	0.667736
9	1	1	1906.939267	14.742219	0.392118	1	1.125090	0.449210
10	0	0	1722.729129	14.797711	0.343178	0	0.332108	0.543685
11	1	0	1879.486267	14.971614	0.729050	0	0.345298	0.436823
12	0	1	1831.427199	15.454156	0.438572	0	0.562528	0.656674
13	1	1	1667.373454	14.809549	0.059678	0	0.596579	0.369960
14	0	0	1941.729905	14.867395	0.398044	0	0.372665	0.551610
15	1	1	1880.723653	14.870259	0.737995	1	1.073585	0.534655
16	0	0	1804.549008	15.676594	0.182492	1	0.492002	0.497885
17	1	1	1776.690794	15.670593	0.175452	1	0.871791	0.398490
18	0	0	1680.169886	15.304261	0.531551	1	0.491367	0.585974
19	1	0	1819.952407	15.116304	0.531828	1	0.545952	0.388796
20	0	0	1846.843912	15.222850	0.634401	0	0.329622	0.608653
21	1	1	1716.884502	15.453953	0.849432	0	0.540477	0.561012
22	0	1	1916.220405	14.721861	0.724455	0	0.691007	0.716621
23	1	1	1690.279695	15.356929	0.611024	0	0.535138	0.509285
24	0	1	1587.689965	14.628513	0.722443	0	0.602528	0.719383
25	1	1	1903.972709	14.809890	0.322959	1	1.093144	0.434542
26	0	1	1759.663396	15.274615	0.361789	1	0.937725	0.637418
27	1	1	1787.397041	14.577461	0.228263	1	1.089220	0.410548
28	0	0	1716.248328	14.958038	0.293714	1	0.530480	0.522774
29	1	1	1639.403724	14.743689	0.630976	1	0.980407	0.508863
30	0	1	1925.523737	14.923510	0.092105	0	0.657475	0.574358
31	1	0	1731.113102	14.183487	0.433701	0	0.382735	0.363567
32	0	0	1966.095249	14.477845	0.430863	0	0.407184	0.562282
33	1	1	1880.730819	14.791499	0.493685	0	0.674884	0.472403
34	0	0	1768.524185	15.280835	0.425830	0	0.313853	0.556551
35	1	0	1691.409760	14.948000	0.312261	1	0.527289	0.333703
36	0	1	1726.753801	15.000854	0.426351	1	0.969757	0.651123
37	1	0	1678.747687	15.207894	0.893389	1	0.501172	0.476175
38	0	1	2008.711336	14.738446	0.944160	1	1.182465	0.757341
39	1	1	1816.444123	15.085330	0.501837	1	1.015877	0.479855
40	0	0	1915.020554	14.760325	0.623953	0	0.378855	0.606228
41	1	1	1673.264795	14.490551	0.115618	0	0.634405	0.383742
42	0	0	1818.103513	14.883187	0.317285	0	0.345393	0.532582
43	1	0	1917.786194	15.173133	0.414826	0	0.335778	0.359044
44	0	0	1766.498924	15.101921	0.866309	0	0.322187	0.665218
45	1	1	1903.111446	14.996451	0.250455	1	1.053887	0.415448
46	0	1	1691.543209	15.735157	0.483034	1	0.822993	0.667960
47	1	0	1663.652846	15.124387	0.985560	1	0.507055	0.498392
48	0	1	1837.940061	15.296513	0.519485	1	0.971843	0.675330
49	1	1	1762.082357	15.686698	0.612895	1	0.863641	0.506422

	2par_first	2par_second	user_category_ids
0	0.482991	1.526041	1.0
1	0.410263	1.531845	1.0
2	0.410263	1.531845	1.0
3	0.437632	1.359841	1.0
4	0.437632	1.359841	1.0
5	0.437632	1.359841	1.0
6	0.416020	1.391801	1.0
7	0.624459	1.240210	1.0
8	0.624459	1.240210	1.0
9	0.624459	1.240210	1.0
10	0.624459	1.240210	1.0
11	0.624459	1.240210	1.0
12	0.624459	1.240210	1.0
13	0.624459	1.240210	1.0
14	0.643476	1.140503	1.0
15	0.541417	0.914670	1.0
16	0.594454	1.061232	1.0
17	0.661763	1.140822	1.0
18	0.620130	0.899562	1.0
19	0.620130	0.899562	1.0
20	0.620130	0.899562	1.0
21	0.620130	0.899562	1.0
22	0.620130	0.899562	1.0
23	0.540646	1.052261	1.0
24	0.695248	1.147009	1.0
25	0.695248	1.147009	1.0
26	0.458088	0.966280	1.0
27	0.458088	0.966280	1.0
28	0.458088	0.966280	1.0
29	0.372139	1.141575	1.0
...	...	...	...
249970	0.324064	1.153852	50.0
249971	0.324064	1.153852	50.0
249972	0.324064	1.153852	50.0
249973	0.324064	1.153852	50.0
249974	0.293474	1.277970	50.0
249975	0.293474	1.277970	50.0
249976	0.278748	1.373184	50.0
249977	0.369274	1.309650	50.0
249978	0.369274	1.309650	50.0
249979	0.369274	1.309650	50.0
249980	0.369274	1.309650	50.0
249981	0.369274	1.309650	50.0
249982	0.387279	1.366329	50.0
249983	0.387279	1.366329	50.0
249984	0.387279	1.366329	50.0
249985	0.232915	1.226898	50.0
249986	0.232915	1.226898	50.0
249987	0.244697	1.262230	50.0
249988	0.244697	1.262230	50.0
249989	0.244697	1.262230	50.0
249990	0.271695	1.290524	50.0
249991	0.330447	1.376261	50.0
249992	0.330066	1.384156	50.0
249993	0.328166	1.480841	50.0
249994	0.328166	1.480841	50.0
249995	0.246766	1.369154	50.0
249996	0.366560	1.547855	50.0
249997	0.329448	1.388164	50.0
249998	0.329448	1.388164	50.0
249999	0.329448	1.388164	50.0