In [1]:

    
import os
import urllib, cStringIO

import pymongo as pm

from IPython.display import clear_output

import matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import seaborn as sns
sns.set_context('poster')
sns.set_style('white')

import numpy as np
from __future__ import division
import scipy.stats as stats
import pandas as pd
import json
import re

from PIL import Image
import base64
import analysis_helpers as h
reload(h)

## get standardized object list
categories = ['bird','car','chair','dog']
obj_list = []
for cat in categories:
    for i,j in h.objcat.iteritems():
        if j==cat:
            obj_list.append(i)

load in model param posterior (bdaOutput/**splitbyobjectParams.csv)

make loglikelihood plot for human encoder version



In [2]:

    
model_prefixes = ['human']
PP = h.model_comparison_bars(model_prefixes,adaptor_type='human',split_type='balancedavg1')
h.plot_human_bars(PP)



In [3]:

    
PP









    Out[3]:







  
    
      
      model
      logLikelihood
      perception
      pragmatics
      production
    
  
  
    
      0
      human_S0_cost
      -287.138499
      human
      S0
      cost
    
    
      1
      human_S0_nocost
      -316.320149
      human
      S0
      nocost
    
    
      2
      human_combined_cost
      -240.794297
      human
      combined
      cost
    
    
      3
      human_combined_nocost
      -269.563112
      human
      combined
      nocost

make loglikelihood plot for multimodal convnet encoder version



In [4]:

    
adaptor_type = 'multimodal_fc6'
split_type = 'balancedavg1'
model_prefixes = [adaptor_type]
PP = h.model_comparison_bars(model_prefixes,adaptor_type=adaptor_type,split_type=split_type)
# h.plot_multimodal_bars(PP)
PP









    Out[4]:







  
    
      
      model
      logLikelihood
      adaptor
      perception
      pragmatics
      production
    
  
  
    
      0
      multimodal_fc6_S0_cost
      -560.161659
      multimodal
      fc6
      S0
      cost
    
    
      1
      multimodal_fc6_S0_nocost
      -561.270152
      multimodal
      fc6
      S0
      nocost
    
    
      2
      multimodal_fc6_combined_cost
      -452.254866
      multimodal
      fc6
      combined
      cost
    
    
      3
      multimodal_fc6_combined_nocost
      -460.554496
      multimodal
      fc6
      combined
      nocost



In [5]:

    
adaptor_type = 'multimodal_conv42'
split_type = 'balancedavg1'
model_prefixes = ['multimodal_conv42']
PP = h.model_comparison_bars(model_prefixes,adaptor_type=adaptor_type,split_type=split_type)
# h.plot_multimodal_bars(PP)
PP









    Out[5]:







  
    
      
      model
      logLikelihood
      adaptor
      perception
      pragmatics
      production
    
  
  
    
      0
      multimodal_conv42_S0_cost
      -752.464675
      multimodal
      conv42
      S0
      cost
    
    
      1
      multimodal_conv42_S0_nocost
      -756.505671
      multimodal
      conv42
      S0
      nocost
    
    
      2
      multimodal_conv42_combined_cost
      -695.382719
      multimodal
      conv42
      combined
      cost
    
    
      3
      multimodal_conv42_combined_nocost
      -698.864526
      multimodal
      conv42
      combined
      nocost



In [6]:

    
adaptor_type = 'multimodal_pool1'
split_type = 'balancedavg1'
# model_prefixes = ['multimodal_pool1','multimodal_conv42','multimodal_fc6']
model_prefixes = ['multimodal_pool1']
PP = h.model_comparison_bars(model_prefixes,adaptor_type=adaptor_type,split_type=split_type)
# h.plot_multimodal_bars(PP)
PP









    Out[6]:







  
    
      
      model
      logLikelihood
      adaptor
      perception
      pragmatics
      production
    
  
  
    
      0
      multimodal_pool1_S0_cost
      -1110.380258
      multimodal
      pool1
      S0
      cost
    
    
      1
      multimodal_pool1_S0_nocost
      -1110.246551
      multimodal
      pool1
      S0
      nocost
    
    
      2
      multimodal_pool1_combined_cost
      -1097.753596
      multimodal
      pool1
      combined
      cost
    
    
      3
      multimodal_pool1_combined_nocost
      -1098.316452
      multimodal
      pool1
      combined
      nocost

check that distribution of log likelihoods is higher for the cost version of the model vs. nocost version



In [7]:

    
reallyRun = 0
if reallyRun:
    h.check_mean_LL_for_cost_vs_nocost(model_prefixes=['multimodal_fc6'],
                                         adaptor_type = 'multimodal_fc6',
                                         split_type='balancedavg1',
                                         plot=True)

Run evaluate to get scores on the test set



In [8]:

    
### FIRST STEP:
### iterate through Params and write back out in "flattened" form
reload(h)
reallyRun = 0
if reallyRun:
    h.flatten_param_posterior(adaptor_types = ['multimodal_pool1','multimodal_conv42','multimodal_fc6', 'human'],
                              verbosity=1)



In [9]:

    
#### SECOND STEP: Go to RSA.py and run evaluate on all of the models and splits
### e.g.,
### python RSA.py --wppl evaluate --perception human --pragmatics combined S0 --production cost nocost --split_type balancedavg1 balancedavg2 balancedavg3 balancedavg4 balancedavg5
### python RSA.py --wppl evaluate --perception multimodal_fc6 --pragmatics combined S0 --production cost nocost --split_type balancedavg1 balancedavg2 balancedavg3 balancedavg4 balancedavg5
### python RSA.py --wppl evaluate --perception multimodal_conv42 --pragmatics combined S0 --production cost nocost --split_type balancedavg1 balancedavg2 balancedavg3 balancedavg4 balancedavg5
### python RSA.py --wppl evaluate --perception multimodal_pool1 --pragmatics combined S0 --production cost nocost --split_type balancedavg1 balancedavg2 balancedavg3 balancedavg4 balancedavg5



In [10]:

    
### OPTIONAL: prior to running bda-enumerate, wouldn't it help to get a sense for the extreme values
### in the param posterior for each model, for each split to make sure our grid covers 
### everywhere where there is some mass in the posterior?
reload(h)
reallyRun = 0
if reallyRun:
    h.get_sense_for_param_range_across_splits()

check model predictions

Run the script ./get_all_model_predictions.py, which is a wrapper around this script, ./generate_model_predictions.py to generate csv files that summarize key statistics of interest (e.g., target rank, cost) from model predictions, for each model and data split.
Next, assess retrieval performance (i.e., rank of correct sketch category)
Also assess sketch cost across conditions

read in predictions and compute retrieval performance



In [11]:

    
import analysis_helpers as h
reload(h)
print h.load_all_model_preds.__doc__









    



    Load all model predictions from all five splits into a dictionary called P.
    P is a nested dictionary containing all predictions dataframes for all five primary models of interest and five splits.    
    P.keys() = ['multimodal_conv42_combined_cost', 'human_combined_cost', 'multimodal_fc6_combined_cost', 'multimodal_fc6_S0_cost', 'multimodal_fc6_combined_nocost']
    Nested inside each model are dataframes containing model predictions from each split.



In [12]:

    
P = h.load_all_model_preds(verbosity=1)









    



Loading model preds from: human_combined_cost
Loading model preds from: human_S0_cost
Loading model preds from: human_combined_nocost
Loading model preds from: multimodal_fc6_combined_cost
Loading model preds from: multimodal_fc6_S0_cost
Loading model preds from: multimodal_fc6_combined_nocost
Loading model preds from: multimodal_conv42_combined_cost
Loading model preds from: multimodal_pool1_combined_cost



In [13]:

    
# for i in np.arange(1,6):
#     _P = P['human_combined_cost']['balancedavg{}'.format(i)]
#     print _P.groupby('condition')['cost'].mean()

Visualize model predictions separately for each split

split_types = ['balancedavg1','balancedavg2','balancedavg3','balancedavg4','balancedavg5']
model_space = ['human_combined_cost','human_S0_cost','human_combined_nocost','multimodal_fc6_combined_cost', 'multimodal_fc6_S0_cost','multimodal_fc6_combined_nocost','multimodal_conv42_combined_cost', 'multimodal_pool1_combined_cost]
H = human_combined_cost
H0 = human_S0_cost
H1 = human_combined_nocost
M = multimodal_fc6_combined_cost
M0 = multimodal_conv42_combined_cost
- "effect of perception"
M1 = multimodal_fc6_S0_cost
- "effect of pragmatics/context"
M2 = multimodal_fc6_combined_nocost
- "effect of production/cost"
M3 = multimodal_pool1_combined_cost
- "effect of perception, even stronger"



In [14]:

    
import analysis_helpers as h
reload(h)
sns.set_context('talk')

split_type = 'balancedavg1'

What is the rank of the correct sketch category (correct object + correct context) vs. wrong sketch category (correct object + wrong context)?



In [ ]:



In [15]:

    
h.plot_target_vs_foil_rank_by_object(P,split_type=split_type)

Generate bar plot of average rank (out of 64) of correct sketch category, by model, for a particular split.



In [16]:

    
import analysis_helpers as h
reload(h)
h.plot_avg_rank_all_models(P,split_type=split_type)

Generate bar plot of proportion of trials for which context-congruent sketch preferred over incongruent sketch.



In [17]:

    
import analysis_helpers as h
reload(h)
h.plot_prop_congruent_all_models(P,split_type=split_type)

make a "top-k" plot (what percentage of sketches in the top k)



In [18]:

    
reallyRun = 0
if reallyRun:
    h.get_top_k_predictions(P,split_type=split_type,verbosity=1)



In [19]:

    
import analysis_helpers as h
reload(h)
h.plot_topk_all_models()

What is the average sketch cost by condition and model type?



In [20]:

    
### show the average cost (time, strokes, ink) for average model prediction



In [21]:

    
split_type = 'balancedavg1'



In [22]:

    
import analysis_helpers as h
reload(h)
HU,H0U,H1U,MU,M0U,M1U,M2U,M3U = h.get_avg_cost_all_models(P, split_type=split_type)



In [23]:

    
saveout=True
sns.set_context('talk')
fig = plt.figure(figsize=(5,8))
ax = fig.add_subplot(111)
U = pd.concat([HU,H0U,H1U,MU,M0U,M1U,M2U,M3U],axis=0)
sns.barplot(data=U,
            x='adaptor',
            y='cost',
            hue='condition',
            ci='sd',
            order=['human_combined_cost','human_S0_cost','human_combined_nocost','multimodal_fc6_combined_cost',
                   'multimodal_fc6_S0_cost','multimodal_fc6_combined_nocost','multimodal_conv42_combined_cost', \
                   'multimodal_pool1_combined_cost'])
plt.ylabel('normalized sketch duration')
plt.ylim([0,0.4])
xticklabels=['Context Cost Human','NoContext Cost Human','Context NoCost Human','Context Cost HighAdaptor',
             'NoContext Cost HighAdaptor','Context NoCost HighAdaptor', 'Context Cost MidAdaptor', 'Context Cost LowAdaptor']
plt.xlabel('')
l = ax.set_xticklabels(xticklabels, rotation = 90, ha="left")
plt.legend(bbox_to_anchor=(1., 1.))
plt.tight_layout()
if saveout:
    plt.savefig('./plots/prop_congruent_all_models.pdf')



In [ ]:



In [ ]:

Load in bootstrapped model predictions

First, you want to run python get_all_bootstrapped_model_predictions.py to generate a bunch of bootvec output in the directory bootstrap_results.
Next, follow the below code to generate visualizations of estimates of statistics of interest.



In [24]:

    
import analysis_helpers as h
reload(h)









    Out[24]:





<module 'analysis_helpers' from 'analysis_helpers.pyc'>



In [25]:

    
## load in boot filelist
boot_files = h.load_and_check_bootstrapped_model_preds()

## generate dataframe from boot filelist
B = h.generate_bootstrap_model_preds_dataframe(boot_files, out_dir='./bootstrap_results')









    



There are 480 files in the bootstrap_results directory.

Visualize model predictions aggregating across splits



In [26]:

    
split_types = ['balancedavg1','balancedavg2','balancedavg3','balancedavg4','balancedavg5']

model_space = ['human_combined_cost','human_S0_cost','human_combined_nocost','multimodal_fc6_combined_cost',\
               'multimodal_fc6_S0_cost','multimodal_fc6_combined_nocost','multimodal_conv42_combined_cost',\
               'multimodal_pool1_combined_cost']

vois = list(np.unique(B.var_of_interest.values))

Generate bar plot of average rank (out of 64) of correct sketch category, by model, across splits.



In [27]:

    
import analysis_helpers as h
reload(h)
## make dataframe
R = h.generate_aggregated_estimate_dataframe(B,
                                             condition_list = ['all'],
                                             model_space=model_space,
                                             split_types=split_types,
                                             var_of_interest='target_rank')

## make plot 
h.plot_average_target_rank_across_splits(R,
                                         var_of_interest='target_rank',
                                         condition_list = ['all'],
                                         model_space=model_space,
                                         split_types=split_types,
                                         condition='all',
                                         sns_context='talk',
                                         figsize=(6,10),
                                         errbar_multiplier=1.,
                                         ylabel='rank of correct sketch')

generate barplot for prop congruent sketch category aggregated across splits



In [28]:

    
reload(h)
## make dataframe
R = h.generate_aggregated_estimate_dataframe(B,
                                             condition_list = ['all'],
                                             model_space=model_space,
                                             split_types=split_types,
                                             var_of_interest='sign_diff_rank')

## make plot
h.plot_prop_congruent_across_splits(R,
                                    var_of_interest='sign_diff_rank',
                                    condition_list = ['all'],
                                    model_space=model_space,
                                    split_types=split_types,
                                    condition='all',
                                    sns_context='talk',
                                    figsize=(6,10),
                                    errbar_multiplier=1.,
                                    ylabel='proportion congruent context preferred')



In [ ]:

make barplot of cost x condition aggregated across splits



In [29]:

    
reload(h)
## make dataframe
R = h.generate_aggregated_estimate_dataframe(B,
                                             condition_list = ['closer','further'],
                                             model_space=model_space,
                                             split_types=split_types,
                                             var_of_interest='cost')

## make plot
h.plot_cost_by_condition_across_splits(R,
                                      var_of_interest='cost',
                                      condition_list = ['closer','further'],
                                      model_space=model_space,
                                      split_types=split_types,
                                      condition='all',
                                      sns_context='talk',
                                      figsize=(6,10),
                                      errbar_multiplier=1.,
                                      ylabel='predicted sketch cost')



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:

	model	logLikelihood	perception	pragmatics	production
0	human_S0_cost	-287.138499	human	S0	cost
1	human_S0_nocost	-316.320149	human	S0	nocost
2	human_combined_cost	-240.794297	human	combined	cost
3	human_combined_nocost	-269.563112	human	combined	nocost

	model	logLikelihood	adaptor	perception	pragmatics	production
0	multimodal_fc6_S0_cost	-560.161659	multimodal	fc6	S0	cost
1	multimodal_fc6_S0_nocost	-561.270152	multimodal	fc6	S0	nocost
2	multimodal_fc6_combined_cost	-452.254866	multimodal	fc6	combined	cost
3	multimodal_fc6_combined_nocost	-460.554496	multimodal	fc6	combined	nocost

	model	logLikelihood	adaptor	perception	pragmatics	production
0	multimodal_conv42_S0_cost	-752.464675	multimodal	conv42	S0	cost
1	multimodal_conv42_S0_nocost	-756.505671	multimodal	conv42	S0	nocost
2	multimodal_conv42_combined_cost	-695.382719	multimodal	conv42	combined	cost
3	multimodal_conv42_combined_nocost	-698.864526	multimodal	conv42	combined	nocost

	model	logLikelihood	adaptor	perception	pragmatics	production
0	multimodal_pool1_S0_cost	-1110.380258	multimodal	pool1	S0	cost
1	multimodal_pool1_S0_nocost	-1110.246551	multimodal	pool1	S0	nocost
2	multimodal_pool1_combined_cost	-1097.753596	multimodal	pool1	combined	cost
3	multimodal_pool1_combined_nocost	-1098.316452	multimodal	pool1	combined	nocost