Reinforcement Learning Models of Social Group Preferences

Bandit Experiments 1-7


In [3]:
# imports / display plots in cell output
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as ss
import pandas as pd
import seaborn as sns
import statsmodels

Experiment 1: between subjects design

-participants randomly assigned to receive only 'gems' or 'bomb' type rewards


In [4]:
# Bayesian Model Selection (bor = .6240)
# Model 1: inverse temperature, stickiness, learning rate
# Model 2: inverse temperature, stickiness, positive learning rate, negative learning rate
models = ('Model 1', 'Model 2')
y_pos = np.arange(len(models))
pxp = [0.6880, 0.3120]
plt.bar(y_pos, pxp, align='center', alpha=0.5)
plt.xticks(y_pos, models)
plt.ylabel('protected exceedance probability')
plt.title('Bayesian model selection')
plt.show()



In [5]:
# import post-mfit b1 (bandit_either) summary data
#b1 = pd.read_csv('/Volumes/crisp/hinl/bandit/gems_vs_bomb/rez/b1_d100_table.csv')
b1 = pd.read_csv('~/Desktop/bandit/gems_vs_bomb/rez/b1_d100_table.csv')
b1 = b1.drop('subID', axis=1)
data = pd.DataFrame(b1)
data.describe()


Out[5]:
payout it lr sticky rt_mean rt_tot
count 208.000000 208.000000 208.000000 208.000000 208.000000 208.000000
mean 105.187500 6.590912 0.579132 0.977115 404.951763 404.951763
std 15.437843 3.392418 0.246023 1.143328 463.279102 463.279102
min 70.000000 0.606764 0.000256 -3.620414 69.797222 69.797222
25% 95.000000 4.151138 0.438162 0.267652 182.443750 182.443750
50% 104.000000 6.099496 0.601137 1.082933 246.231944 246.231944
75% 112.500000 8.295492 0.749011 1.782476 413.850000 413.850000
max 150.000000 18.074366 0.997111 3.789444 3169.880556 3169.880556

In [6]:
# plot differences in payout
with sns.plotting_context('talk', font_scale=1.4):

     sns.set_style("darkgrid")
     ax = sns.pointplot(data=b1, x='condition', y='payout', palette = ['#FFD479','#D783FF'])
     ax.figure.get_axes()[0].set_xticklabels(['bomb','gems'])
     #ax.figure.savefig('b1_pointplot')


Experiment 2: within subjects design

-participants receive either 'gems' or 'bomb' type reward on each trial
-reward type for each door determined by fixed probability [0.8 0.6 0.4 0.2]


In [7]:
# Bayesian Model Selection (bor = .778e-21)
# Model 1: inverse temperature, stickiness, learning rate
# Model 2: inverse temperature, stickiness, gems learning rate, bomb learning rate
# Model 3: inverse temperature, stickiness, positive learning rate, negative learning rate
# Model 4: inverse temperature, stickiness, learning rate, gems preference
models = ('Model 1', 'Model 2', 'Model 3', 'Model 4')
y_pos = np.arange(len(models))
pxp = [1, 0, 0, 0]
plt.bar(y_pos, pxp, align='center', alpha=0.5)
plt.xticks(y_pos, models)
plt.ylabel('protected exceedance probability')
plt.title('Bayesian model selection')
plt.show()



In [8]:
# import post-mfit b2 (bandit_either) summary data
#b2 = pd.read_csv('/Volumes/crisp/hinl/bandit/gems_vs_bomb/rez/b2_d100_table.csv')
b2 = pd.read_csv('~/Desktop/bandit/gems_vs_bomb/rez/b2_model_4_table.csv')
b2 = b2.drop('subID', axis=1)
data = pd.DataFrame(b2)
data.describe()


Out[8]:
gems bomb pGems wGems chose80 chose60 chose40 chose20 it lr sticky rt_mean rt_tot
count 162.000000 162.000000 162.000000 162.000000 162.000000 162.000000 162.000000 162.000000 162.000000 162.000000 162.000000 162.000000 162.000000
mean 50.672840 48.574074 0.513471 0.596328 0.272171 0.254132 0.242575 0.231121 11.218147 0.552247 1.114772 507.365724 182651.660494
std 8.826013 9.003661 0.046526 0.199595 0.094960 0.072796 0.071436 0.068968 6.299352 0.253491 1.039428 325.994537 117358.033165
min 29.000000 22.000000 0.396111 0.013926 0.044444 0.036111 0.036111 0.025000 1.486727 0.006983 -2.068427 73.750000 26550.000000
25% 44.000000 43.000000 0.485417 0.481278 0.211806 0.214583 0.203472 0.191667 6.718407 0.432810 0.444237 318.508333 114663.000000
50% 50.500000 48.000000 0.510556 0.551103 0.265278 0.250000 0.247222 0.231944 10.689439 0.593025 1.342327 454.241667 163527.000000
75% 57.000000 54.750000 0.537222 0.654968 0.320833 0.296528 0.277778 0.269444 14.346080 0.718668 1.857326 610.833333 219900.000000
max 71.000000 68.000000 0.706667 0.998942 0.761111 0.541667 0.463889 0.413889 46.280019 0.993452 2.951312 2608.050000 938898.000000

In [9]:
# make heatmap of all b2 correlations
data = b2[['gems','bomb','pGems','wGems','it','lr','rt_mean','rt_tot']]
r = data.corr()
with sns.plotting_context("talk", font_scale=1):
    ax = sns.heatmap(r, annot=True)
    ax.figure.set_size_inches((14, 10))


Experiment 3: narrative w/ low reward probability

-participants receive either 'gems' or 'bomb' type reward on each trial
-reward type for each door determined by fixed probability [0.8 0.6 0.4 0.2]
-intergroup bias computed by subtracting outgroup ID from ingroup ID


In [10]:
# Bayesian Model Selection (bor = .7427)
# Model 1: inverse temperature, stickiness, learning rate
# Model 2: inverse temperature, stickiness, gems learning rate, bomb learning rate
# Model 3: inverse temperature, stickiness, positive learning rate, negative learning rate
# Model 4: inverse temperature, stickiness, learning rate, gems preference
models = ('Model 1', 'Model 2', 'Model 3', 'Model 4')
y_pos = np.arange(len(models))
pxp = [0.3497, 0.1857, 0.1857, 0.2789]
plt.bar(y_pos, pxp, align='center', alpha=0.5)
plt.xticks(y_pos, models)
plt.ylabel('protected exceedance probability')
plt.title('Bayesian model selection')
plt.show()



In [11]:
# import post-mfit b3 (bandit_either) summary data
b3 = pd.read_csv('~/Desktop/bandit/gems_vs_bomb/rez/b3_model_4_table.csv')
b3 = b3.drop('subID', axis=1)
data = pd.DataFrame(b3)
data.describe()


Out[11]:
gems bomb igbias pGems wGems chose80 chose60 chose40 chose20 it lr sticky rt_mean rt_tot
count 160.000000 160.000000 160.000000 160.000000 160.000000 160.000000 160.000000 160.000000 160.000000 160.000000 160.000000 160.000000 160.000000 1.600000e+02
mean 51.756250 48.243750 2.363542 0.522413 0.617347 0.296215 0.242240 0.238941 0.222604 10.918244 0.539298 1.162176 566.760382 2.040337e+05
std 9.367915 9.678315 2.672352 0.056627 0.192162 0.119410 0.074170 0.065959 0.077237 4.925971 0.249155 1.104260 426.390411 1.535005e+05
min 31.000000 18.000000 -4.500000 0.413333 0.154795 0.130556 0.000000 0.052778 0.041667 1.600555 0.003745 -2.720556 96.430556 3.471500e+04
25% 45.000000 43.000000 0.000000 0.488611 0.478149 0.229861 0.202778 0.200000 0.180556 7.098115 0.418109 0.607309 328.757639 1.183528e+05
50% 51.000000 48.000000 2.000000 0.515556 0.570844 0.272222 0.244444 0.238889 0.226389 10.621770 0.563915 1.236374 449.455556 1.618040e+05
75% 58.000000 53.000000 5.000000 0.541250 0.690734 0.319444 0.283333 0.280556 0.269444 14.320732 0.712873 1.905327 628.483333 2.262540e+05
max 77.000000 85.000000 6.000000 0.716667 0.998053 0.830556 0.463889 0.419444 0.458333 25.181646 0.995920 3.268465 3822.966667 1.376268e+06

In [12]:
# make heatmap of all b3 correlations
data = b3[['igbias','gems','bomb','pGems','wGems','it','lr','rt_mean','rt_tot']]
r = data.corr()
with sns.plotting_context("talk", font_scale=1):
    ax = sns.heatmap(r, annot=True)
    ax.figure.set_size_inches((14, 10))



In [13]:
# regression of intergroup bias on model-based preference for gems
data = b3[['igbias','wGems']]
with sns.plotting_context('talk', font_scale=1.2):
    ax = (sns.jointplot(x='igbias', y='wGems', data=data, kind='reg', annot_kws=dict(stat='r'))
                       .set_axis_labels('intergroup bias', 'preference for gems parameter'))
    #ax.savefig('b3_lr_pGems')


/Users/wem3/Envs/anaconda3/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j

In [14]:
# regression of intergroup bias on preference for gems
data = b3[['igbias','pGems']]
with sns.plotting_context('talk', font_scale=1.4):
    ax = (sns.jointplot(x='igbias', y='pGems', data=data, kind='reg', annot_kws=dict(stat='r'))
                       .set_axis_labels('intergroup bias', 'preference for gems'))
    #ax.savefig('b3_igbias_pGems')


/Users/wem3/Envs/anaconda3/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j

Experiment 4: narrative w/ multiple reward outcomes

-participants can receive either 'gems' and/or 'bomb' type reward on each trial
-reward type for each door determined by independent drifting probabilities for gems and bomb
-intergroup bias computed by subtracting outgroup ID from ingroup ID


In [15]:
# Bayesian Model Selection (bor = 9.7058e-11)
# Model 1: inverse temperature, stickiness, learning rate
# Model 2: inverse temperature, stickiness, gems learning rate, bomb learning rate
# Model 3: inverse temperature, stickiness, positive learning rate, negative learning rate
# Model 4: inverse temperature, stickiness, learning rate, gems preference
models = ('Model 1', 'Model 2', 'Model 3', 'Model 4')
y_pos = np.arange(len(models))
pxp = [2.4264e-11, 2.4264e-11, 2.4264e-11, 1.0000]
plt.bar(y_pos, pxp, align='center', alpha=0.5)
plt.xticks(y_pos, models)
plt.ylabel('protected exceedance probability')
plt.title('Bayesian model selection')
plt.show()



In [16]:
# import post-mfit b4 (bandit_double) summary data
#b4 = pd.read_csv('/Volumes/crisp/hinl/bandit/gems_vs_bomb/rez/b4_d100_table.csv')
b4 = pd.read_csv('~/Desktop/bandit/gems_vs_bomb/rez/b4_model_4_table.csv')
b4 = b4.drop('subID', axis=1)
data = pd.DataFrame(b4)
data.describe()


Out[16]:
gems bomb igbias wGems it lr sticky rt_mean rt_tot
count 159.000000 159.000000 159.000000 159.000000 159.000000 159.000000 159.000000 159.000000 159.000000
mean 98.213836 94.993711 2.128931 0.673819 7.606354 0.631356 1.155607 437.429874 157474.754717
std 10.134141 11.484440 2.436386 0.193425 4.376166 0.223408 0.909335 339.284852 122142.546879
min 69.000000 67.000000 -4.833333 0.042825 0.688342 0.009612 -1.750565 76.075000 27387.000000
25% 92.000000 88.000000 0.000000 0.534406 4.528313 0.523059 0.532596 242.990278 87476.500000
50% 97.000000 96.000000 1.833333 0.628018 6.799249 0.642770 1.298711 352.005556 126722.000000
75% 105.500000 103.000000 3.916667 0.802756 9.953950 0.775285 1.702816 506.975000 182511.000000
max 121.000000 121.000000 6.000000 0.998410 30.720417 0.994118 3.320907 2468.427778 888634.000000

In [17]:
# make heatmap of all b4 correlations
data = b4[['igbias','gems','bomb','wGems','it','lr','rt_mean','rt_tot']]
r = data.corr()
with sns.plotting_context("talk", font_scale=1):
    ax = sns.heatmap(r, annot=True)
    ax.figure.set_size_inches((14, 10))


Experiment 5: narrative w/ high reward probability

-participants receive either 'gems' or 'bomb' type reward on each trial
-reward type for each door determined by fixed probability [0.8 0.6 0.4 0.2]
-intergroup bias computed by subtracting outgroup ID from ingroup ID
-probability of reward twice as high as in Experiment 3


In [18]:
# Bayesian Model Selection (bor = .0052)
# Model 1: inverse temperature, stickiness, learning rate
# Model 2: inverse temperature, stickiness, gems learning rate, bomb learning rate
# Model 3: inverse temperature, stickiness, positive learning rate, negative learning rate
# Model 4: inverse temperature, stickiness, learning rate, gems preference
models = ('Model 1', 'Model 2', 'Model 3', 'Model 4')
y_pos = np.arange(len(models))
pxp = [0.0013, 0.0013, 0.7480, 0.2494]
plt.bar(y_pos, pxp, align='center', alpha=0.5)
plt.xticks(y_pos, models)
plt.ylabel('protected exceedance probability')
plt.title('Bayesian model selection')
plt.show()



In [19]:
# import post-mfit b5 (bandit_either) summary data
b5 = pd.read_csv('~/Desktop/bandit/gems_vs_bomb/rez/b5_model_4_table.csv')
data = pd.DataFrame(b5)
data.describe()


Out[19]:
subID gems bomb igbias pGems wGems chose80 chose60 chose40 chose20 it lr sticky rt_mean rt_tot
count 1.440000e+02 144.000000 144.000000 144.000000 144.000000 144.000000 144.000000 144.000000 144.000000 144.000000 144.000000 144.000000 144.000000 144.000000 1.440000e+02
mean 4.269511e+07 112.861111 101.777778 2.326389 0.529753 0.612368 0.306674 0.250154 0.228434 0.214738 10.512226 0.677112 1.119738 595.322550 2.143161e+05
std 2.580464e+07 18.291582 22.974734 2.390482 0.068553 0.190407 0.141462 0.088360 0.092720 0.105534 4.765983 0.199058 1.056101 712.731425 2.565833e+05
min 2.711490e+05 40.000000 24.000000 -3.166667 0.222778 0.192474 0.030556 0.000000 0.000000 0.000000 1.175973 0.006522 -1.993973 28.808333 1.037100e+04
25% 2.287964e+07 101.000000 85.000000 0.000000 0.488750 0.486492 0.216667 0.188889 0.165972 0.152778 7.125326 0.574496 0.492553 302.267361 1.088162e+05
50% 4.216666e+07 113.000000 102.000000 2.250000 0.515556 0.565774 0.291667 0.247222 0.225000 0.213889 9.957198 0.707677 1.241818 428.487500 1.542555e+05
75% 6.475296e+07 122.250000 117.250000 4.666667 0.566944 0.687863 0.372917 0.300000 0.278472 0.278472 13.938157 0.803626 1.860440 599.926389 2.159735e+05
max 8.889695e+07 162.000000 153.000000 6.000000 0.800000 0.998956 1.000000 0.588889 0.472222 0.955556 27.273717 0.996590 3.339920 6592.616667 2.373342e+06

In [20]:
# make heatmap of all b5 correlations
data = b5[['igbias','gems','bomb','pGems','wGems','it','lr','rt_mean','rt_tot']]
r = data.corr()
with sns.plotting_context("talk", font_scale=1):
    ax = sns.heatmap(r, annot=True)
    ax.figure.set_size_inches((14, 10))


Experiment 6: political party w/ low reward probability

-participants receive either 'bill' or 'burning bill' type reward on each trial
-reward type for each door determined by fixed probability [0.8 0.6 0.4 0.2]
-intergroup bias computed by subtracting outgroup ID from ingroup ID
-probability of reward same as in Experiment 3 (mean = 0.25)


In [21]:
# Bayesian Model Selection (bor = 4.61e-37)
# Model 1: inverse temperature, stickiness, learning rate
# Model 2: inverse temperature, stickiness, gems learning rate, bomb learning rate
# Model 3: inverse temperature, stickiness, positive learning rate, negative learning rate
# Model 4: inverse temperature, stickiness, learning rate, gems preference
models = ('Model 1', 'Model 2', 'Model 3', 'Model 4')
y_pos = np.arange(len(models))
pxp = [1, 0, 0, 0]
plt.bar(y_pos, pxp, align='center', alpha=0.5)
plt.xticks(y_pos, models)
plt.ylabel('protected exceedance probability')
plt.title('Bayesian model selection')
plt.show()



In [22]:
# import post-mfit b6 (bandit_either) summary data
b6 = pd.read_csv('~/Desktop/bandit/gems_vs_bomb/rez/b6_model_4_table.csv')
data = pd.DataFrame(b6)
data.describe()


Out[22]:
gems bomb igbias pBurn wEarn chose80 chose60 chose40 chose20 it lr sticky rt_mean rt_tot
count 202.000000 202.000000 202.000000 202.000000 202.000000 202.000000 202.000000 202.000000 202.000000 202.000000 202.000000 202.000000 202.000000 2.020000e+02
mean 49.396040 49.351485 2.936469 0.499070 0.505281 0.253328 0.248089 0.248487 0.250096 11.003374 0.542899 1.107918 587.061180 2.113420e+05
std 7.893671 9.683048 1.776546 0.045387 0.191431 0.091086 0.081759 0.074275 0.082109 6.609712 0.277272 1.016523 477.631755 1.719474e+05
min 26.000000 23.000000 -3.500000 0.281111 0.010291 0.030556 0.055556 0.036111 0.002778 0.736757 0.001954 -1.633744 93.105556 3.351800e+04
25% 44.250000 43.000000 2.000000 0.479583 0.410612 0.211111 0.195139 0.209028 0.202778 6.420673 0.423519 0.437029 297.538889 1.071140e+05
50% 50.000000 50.000000 3.000000 0.500833 0.488809 0.244444 0.241667 0.250000 0.247222 10.063598 0.588127 1.177757 453.891667 1.634010e+05
75% 54.750000 54.750000 4.333333 0.521667 0.559371 0.280556 0.291667 0.280556 0.296528 13.695290 0.733960 1.886770 687.011111 2.473240e+05
max 72.000000 79.000000 6.000000 0.657778 0.997116 0.780556 0.666667 0.538889 0.688889 46.375746 0.996961 3.703684 3303.875000 1.189395e+06

In [24]:
# make heatmap of all b6 correlations
data = b6[['igbias','gems','bomb','pBurn','wEarn','it','lr','rt_mean','rt_tot']]
r = data.corr()
with sns.plotting_context("talk", font_scale=1):
    ax = sns.heatmap(r, annot=True)
    ax.figure.set_size_inches((14, 10))



In [25]:
# regression of model based 'preference for earn' on intergroup bias
data = b6[['wEarn','igbias']]
with sns.plotting_context('talk', font_scale=1.2):
    ax = (sns.jointplot(x='igbias', y='wEarn', data=data, kind='reg', annot_kws=dict(stat='r'))
                       .set_axis_labels('intergroup bias', 'preference for burn'))


/Users/wem3/Envs/anaconda3/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j

Experiment 7: political parties w/ high reward probability

-participants receive either 'bill' or 'burning bill' type reward on each trial
-reward type for each door determined by fixed probability [0.8 0.6 0.4 0.2]
-intergroup bias computed by subtracting outgroup ID from ingroup ID
-probability of reward same as in Experiment 5 (mean = 0.5)


In [26]:
# Bayesian Model Selection (bor = 1.410e-7)
# Model 1: inverse temperature, stickiness, learning rate
# Model 2: inverse temperature, stickiness, gems learning rate, bomb learning rate
# Model 3: inverse temperature, stickiness, positive learning rate, negative learning rate
# Model 4: inverse temperature, stickiness, learning rate, gems preference
models = ('Model 1', 'Model 2', 'Model 3', 'Model 4')
y_pos = np.arange(len(models))
pxp = [0.000001, 0.000001, 0.99999, 0.000001]
plt.bar(y_pos, pxp, align='center', alpha=0.5)
plt.xticks(y_pos, models)
plt.ylabel('protected exceedance probability')
plt.title('Bayesian model selection')
plt.show()



In [29]:
# import post-mfit b5 (bandit_either) summary data
b7 = pd.read_csv('~/Desktop/bandit/gems_vs_bomb/rez/b7_model_4_table.csv')
data = pd.DataFrame(b7)
data.describe()


Out[29]:
gems bomb igbias pBurn wEarn chose80 chose60 chose40 chose20 it lr sticky rt_mean rt_tot
count 196.000000 196.000000 196.000000 196.000000 196.000000 196.000000 196.000000 196.000000 196.000000 196.000000 196.000000 196.000000 196.000000 1.960000e+02
mean 108.602041 104.576531 3.024660 0.490700 0.523731 0.271542 0.247307 0.237259 0.243892 10.045946 0.656264 1.039286 588.818821 2.119748e+05
std 17.400407 20.504640 1.743801 0.071047 0.193316 0.133680 0.091423 0.104542 0.124371 4.437558 0.276301 1.201301 442.964312 1.594672e+05
min 42.000000 34.000000 -1.000000 0.231111 0.026781 0.025000 0.000000 0.008333 0.000000 0.546278 0.002541 -3.939137 0.000000 0.000000e+00
25% 97.000000 93.750000 1.666667 0.461528 0.415269 0.190972 0.190972 0.175000 0.174306 6.842878 0.561687 0.444837 347.098611 1.249555e+05
50% 107.500000 105.000000 3.000000 0.500000 0.498339 0.250000 0.237500 0.236111 0.236111 10.209908 0.709072 1.180087 503.444444 1.812400e+05
75% 122.000000 118.000000 4.333333 0.525833 0.586801 0.322222 0.292361 0.295139 0.297917 12.890488 0.827403 1.894040 674.522222 2.428280e+05
max 152.000000 153.000000 6.000000 0.772778 0.997331 0.925000 0.602778 0.680556 0.933333 22.097579 0.997054 4.035045 3586.850000 1.291266e+06

In [30]:
# make heatmap of all b6 correlations
data = b7[['igbias','gems','bomb','pBurn','wEarn','it','lr','rt_mean','rt_tot']]
r = data.corr()
with sns.plotting_context("talk", font_scale=1):
    ax = sns.heatmap(r, annot=True)
    ax.figure.set_size_inches((14, 10))