In [28]:

    
import os
import urllib, cStringIO

import pymongo as pm

import matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import seaborn as sns
sns.set_context('poster')
sns.set_style('white')

import numpy as np
from __future__ import division
import scipy.stats as stats
import pandas as pd
import json
import re

from PIL import Image
import base64
import json

unadapted photo RDMs



In [29]:

    
## get path to fc6 features
path_to_fc6 = '/data/jefan/sketchpad_basic_fixedpose96_fc6/photos'

## get imaage ordering
ordering = pd.read_csv('/data/jefan/sketchpad_basic_fixedpose96_fc6/human_confusion_object_order.csv')
order = ordering['object_name'].values

## get paths in order that we want to make matrix
feat_files = [o +'.npy' for o in order]
path_to_feats =  [os.path.join(path_to_fc6,f) for f in feat_files]



In [30]:

    
## load in feats to make matrix
X = []
for path in path_to_feats:    
    x = np.load(path)
    if len(X) == 0:
        X = x
    else:
        X = np.vstack((X,x))   
## rename P for "photo" 
P = X



In [ ]:



In [6]:

    
## plot RDM
corrmat = np.corrcoef(X)
corrmat.shape

from matplotlib import cm
fig = plt.figure(figsize=(8,8))
ax = plt.subplot(111)
cax = ax.matshow(corrmat,vmin=0,vmax=1,cmap=cm.viridis)
plt.xticks(range(len(X)), order, fontsize=12,rotation='vertical')
plt.yticks(range(len(X)), order, fontsize=12)
plt.colorbar(cax,shrink=0.8)
plt.tight_layout()

unadapted sketch RDMs



In [7]:

    
## what do the sketches look like by itself 
path_to_sketches = '/data/jefan/sketchpad_basic_fixedpose96_fc6/sketch'



In [8]:

    
import cPickle
db_path = '/data/jefan/sketchpad_basic_fixedpose96_fc6/'
with open(os.path.join(db_path, 'sketchpad_context_dict.pickle')) as fp:
    context_dict = cPickle.load(fp)
with open(os.path.join(db_path, 'sketchpad_label_dict.pickle')) as fp:
    label_dict = cPickle.load(fp)



In [9]:

    
sketch_list = os.listdir(path_to_sketches)



In [10]:

    
condition = context_dict[sketch_list[0]]
label = label_dict[sketch_list[0]]



In [11]:

    
## load in all precomputed fc6 features as single numpy array
F = np.zeros([len(sketch_list),4096])
for i,s in enumerate(sketch_list):
    if i%1000==0:
        print '{} of {} sketches'.format(i,len(sketch_list))
    x = np.load(os.path.join(db_path,'sketch',s))
    F[i,:] = x









    



0 of 3072 sketches
1000 of 3072 sketches
2000 of 3072 sketches
3000 of 3072 sketches



In [12]:

    
# z-score normalization to de-mean & standardize variances within-voxel 
def normalize(X):
    X = X - X.mean(0)
    X = X / np.maximum(X.std(0), 1e-5)
    return X

normalize_on = False ## normalize AFTER stacking with the render features
if normalize_on:
    F = normalize(F)



In [13]:

    
## get lists of labels and conditions in same order as feature array above
labels = []
conditions = []
for i,s in enumerate(sketch_list):
    if i%1000==0:
        print '{} of {} sketches'.format(i,len(sketch_list))    
    labels.append(label_dict[s])
    conditions.append(context_dict[s])

## define dataframe for this feature matrix    
meta = pd.DataFrame([labels,conditions,sketch_list])
meta = meta.transpose()
meta.columns = ['label','condition','filename']









    



0 of 3072 sketches
1000 of 3072 sketches
2000 of 3072 sketches
3000 of 3072 sketches



In [14]:

    
## okay now make the class averaged feature matrices for each condition
close_feats = np.zeros([32,4096])
far_feats = np.zeros([32,4096])
for i,obj in enumerate(order):    
    inds = (meta['label']==obj) & (meta['condition']=='closer')
    close_feats[i,:] = F[inds,:].mean(0)
    inds = (meta['label']==obj) & (meta['condition']=='further')
    far_feats[i,:] = F[inds,:].mean(0)



In [ ]:



In [15]:

    
## plot RDM
corrmat = np.corrcoef(close_feats)
corrmat.shape
corrmat_close = corrmat

from matplotlib import cm
fig = plt.figure(figsize=(8,8))
ax = plt.subplot(111)
cax = ax.matshow(corrmat,vmin=0,vmax=1,cmap=cm.viridis)
plt.xticks(range(len(X)), order, fontsize=12,rotation='vertical')
plt.yticks(range(len(X)), order, fontsize=12)
plt.colorbar(cax,shrink=0.8)
plt.tight_layout()



In [16]:

    
## plot RDM
corrmat = np.corrcoef(far_feats)
corrmat.shape
corrmat_far = corrmat

from matplotlib import cm
fig = plt.figure(figsize=(8,8))
ax = plt.subplot(111)
cax = ax.matshow(corrmat,vmin=0,vmax=1,cmap=cm.viridis)
plt.xticks(range(len(X)), order, fontsize=12,rotation='vertical')
plt.yticks(range(len(X)), order, fontsize=12)
plt.colorbar(cax,shrink=0.8)
plt.tight_layout()



In [ ]:

stack feature matrices on top of each other to get full feature matrix with render, close sketches, and far sketches



In [17]:

    
FEAT = np.vstack((P,close_feats,far_feats))



In [18]:

    
## normalize to mean of this dataset
normalize_on = True
if normalize_on:
    FEAT = normalize(FEAT)



In [19]:

    
## plot RDM of unadapted render, close, far sketches
corrmat = np.corrcoef(FEAT)
corrmat_full = corrmat
fig = plt.figure(figsize=(12,12))
ax = plt.subplot(111)
cax = ax.matshow(corrmat,cmap=cm.viridis)
plt.xticks(range(len(X)*3), np.tile(order,3), fontsize=8,rotation='vertical')
plt.yticks(range(len(X)*3), np.tile(order,3), fontsize=8)
plt.xlabel('         OBJECT                    CLOSE                         FAR           ')
plt.colorbar(cax,shrink=0.8)    
plt.tight_layout()



In [ ]:



In [20]:

    
## extract the blocks that allow us to measure sketch-render similarity
close_render = corrmat_full[:32,32:64]
far_render = corrmat_full[:32,64:]



In [101]:

    
plt.figure(figsize=(4,4))
s = plt.scatter(np.diagonal(close_render),np.diagonal(far_render))
plt.plot([-0.3,0.5],[-0.3,0.5],linestyle='dashed',color='gray')
plt.xlabel('close-to-render correlation similarity')
plt.ylabel('far-to-render correlation similarity')
plt.title('Unadapted fc6 representation')









    Out[101]:





<matplotlib.text.Text at 0x7f7304c99290>



In [ ]:

prep similarity JSONs for putting through RSA

Format for json is a dictionary of dictionaries, where each top-level key refers to one of the renders, e.g. "trial_20_cuckoo". For each render, you can look up the similarity with each sketch, referenced with an abbreviated ID taken by trimming the last 12-character string, and appending an underscore, and the trial number. E.g., 'gameID_9903-d6e6a9ff-a878-4bee-b2d5-26e2e239460a_trial_9.npy' ==> '26e2e239460a_9'

first, regenerate sketch and render feature matrices and normalize w/r/t mean between domains



In [188]:

    
## load in all precomputed fc6 features as single numpy array
F = np.zeros([len(sketch_list),4096])
for i,s in enumerate(sketch_list):
    if i%1000==0:
        print '{} of {} sketches'.format(i,len(sketch_list))
    x = np.load(os.path.join(db_path,'sketch',s))
    F[i,:] = x
    
#### sketch feature matrix    
SF = F

## get lists of labels and conditions in same order as feature array above
labels = []
conditions = []
for i,s in enumerate(sketch_list):
    if i%1000==0:
        print '{} of {} sketches'.format(i,len(sketch_list))    
    labels.append(label_dict[s])
    conditions.append(context_dict[s])

## define dataframe for this feature matrix    
meta = pd.DataFrame([labels,conditions,sketch_list])
meta = meta.transpose()
meta.columns = ['label','condition','filename']

#### sketch metadata
SM = meta









    



0 of 3072 sketches
1000 of 3072 sketches
2000 of 3072 sketches
3000 of 3072 sketches
0 of 3072 sketches
1000 of 3072 sketches
2000 of 3072 sketches
3000 of 3072 sketches



In [189]:

    
## photo feature matrix & metadata
PF = P
PM = order



In [190]:

    
## normalize within feature ## get mean/std from the pre class & condition-averaged version of the feature matrix 
## in other words, get the mean and sd between DOMAINs, so you have balanced number of entries in each domain
S = np.dstack((close_feats,far_feats)).mean(2)
FEAT = np.vstack((P,S))
uFEAT = FEAT.mean(0)
sdFEAT = np.maximum(FEAT.std(0), 1e-5)

normalize_on = True
if normalize_on:
    PF = (PF - uFEAT) / sdFEAT
    SF = (SF - uFEAT) / sdFEAT

now, get distance from each sketch to its target render

option 1: get individual sketch-photo similarities that are computed for each sketch



In [191]:

    
## get distances for each sketch to every render
sp_similarity = []
for i,d in SM.iterrows():
    sketch_feat = SF[i,:]
    sp_similarity.append([np.corrcoef(SF[i,:],pf)[0,1] for pf in PF]) ## get correlation btw sketch and every object

## generate similarity dataframe    
sim = pd.DataFrame(sp_similarity)
sim.columns = order



In [192]:

    
## now concatenate with the main SM dataframe
SM2 = pd.concat([SM,sim],axis=1)



In [193]:

    
## save out to csv
SM2.to_csv('sketch_meta_item_level_similarity.csv')

option 2: compute sketch-photo similarities at the sketch-type (condition-object) level



In [200]:

    
normalize_on = True
if normalize_on:
    close_feats_norm = (close_feats - uFEAT) / sdFEAT
    far_feats_norm = (far_feats - uFEAT) / sdFEAT
    ## PF, the photo feature matrix, is already normalized



In [217]:

    
sp_centroid_similarity = []
for i,d in SM.iterrows():
    this_label = d['label']
    this_cond = d['condition']
    inds = (order==this_label)
    if this_cond=='closer':
        this_feat = close_feats_norm[inds,:]
    elif this_cond=='further':
        this_feat = far_feats_norm[inds,:]
    sp_centroid_similarity.append([np.corrcoef(this_feat,pf)[0,1] for pf in PF])

## generate similarity dataframe    
sim = pd.DataFrame(sp_centroid_similarity)
sim.columns = order



In [218]:

    
## now concatenate with the main SM dataframe
SM3 = pd.concat([SM,sim],axis=1)



In [219]:

    
## save out to csv
SM3.to_csv('sketch_meta_type_level_similarity.csv')



In [ ]:



In [ ]:

now get to the business of prepping jsons for real



In [220]:

    
# which similarity matrix are you going to use?
this_SM = SM3



In [221]:

    
import json

## sample json paths
json_path_prefix = '../models/refModule/json/'
json_file = 'strict-similarity-pragmatics-fixedpose-augmented-splitbycontext_conv4_2.json'
json_path = os.path.join(json_path_prefix,json_file)

def load_json(json_path):
    with open(json_path) as fp:
        data = json.load(fp)  
    return data

## build dictionary to look up the appropriate render ID to use to associate with each sketch
data = load_json(json_path)

## list of 3d rendered objects
render_list = data.keys()
    
obj_to_render = dict(zip([i.split('_')[-1] for i in data.keys()], data.keys()))  
render_to_obj = dict(zip(data.keys(),[i.split('_')[-1] for i in data.keys()]))



In [222]:

    
def simplify_sketch(path): ## example path: 'gameID_9903-d6e6a9ff-a878-4bee-b2d5-26e2e239460a_trial_9.npy' ==> '26e2e239460a_9'
    path = '_'.join(os.path.splitext(os.path.basename(path))[0].split('_')[1:])
    path = path.split('-')[-1]
    path = path.replace('_trial', '')
    return path

def add_simplified_ids(X):
    ## add renderID and sketchID to dataframen
    renderID = []
    sketchID = []
    for i,d in X.iterrows():
        renderID.append(obj_to_render[d['label']])
        sketchID.append(simplify_sketch(d['filename']))
    X['renderID'] = renderID
    X['sketchID'] = sketchID    
    return X



In [223]:

    
this_SM = add_simplified_ids(this_SM)



In [ ]:



In [232]:

    
## generate big json dictionary of dictionaries
from __future__ import division
out_json = {}
for i,this_render in enumerate(render_list):
    print i, this_render
    out_json[this_render] = {}
    for i,d in this_SM.iterrows():
        this_sketch = d['sketchID']
        _render = str(this_render.split('_')[-1])
        this_similarity = (d[_render]+1.00000001)/2 #### transform similarities to (0,1) scale!
        out_json[this_render][this_sketch] = this_similarity









    



0 trial_12_bluejay
1 trial_7_basset
2 trial_2_bullmastiff
3 trial_30_doberman
4 trial_10_beetle
5 trial_11_waiting
6 trial_1_bluesport
7 trial_31_bluesedan
8 trial_4_straight
9 trial_8_knob
10 trial_15_nightingale
11 trial_22_leather
12 trial_24_white
13 trial_28_chihuahua
14 trial_17_redsport
15 trial_29_sparrow
16 trial_21_woven
17 trial_25_goldenretriever
18 trial_32_squat
19 trial_3_redantique
20 trial_9_brown
21 trial_27_sling
22 trial_16_bloodhound
23 trial_23_weimaraner
24 trial_26_crow
25 trial_13_tomtit
26 trial_6_hatchback
27 trial_20_cuckoo
28 trial_18_pug
29 trial_5_inlay
30 trial_19_pigeon
31 trial_14_robin



In [233]:

    
## output json in the same format as the other similarity jsons
output_path = '../models/refModule/json/similarity-fc6-centroid.json'
with open(output_path, 'wb') as fp:
    json.dump(out_json, fp)



In [ ]:



In [ ]:

now evaluate model predictions



In [274]:

    
## define set of models to compare
model_zoo = ['fc6_combined_cost','fc6_combined_nocost','fc6_S0_cost','fc6_S0_nocost']
this_model = model_zoo[0]

## define paths to model predictions
path_to_evaluate = '../models/evaluateOutput'
pred_path = os.path.join(path_to_evaluate,this_model)



In [275]:

    
## get file with params from this model
this_params = os.path.join('../models/bdaOutput',this_model+'_alldataParams.csv')
params = pd.read_csv(this_params)
assert np.sum(np.exp(params.posteriorProb.values))==1



In [276]:

    
## get list of all predictives (accepted MCMC samples)
pred_files = [i for i in os.listdir(pred_path) if i[-15:] =='Predictives.csv']



In [ ]:



In [376]:

    
X = [] ## initialize giant dataframe that contains predictions from all MCMC samples

## loop through MCMC samples
for i,this_sample in enumerate(pred_files):
    
    print'{} | sample ID: {} '.format(i,int(this_sample.split('Predictives.csv')[0]))

    ## read in predictions from this sample
    sample_path = os.path.join(pred_path,this_sample)
    sample_preds = pd.read_csv(sample_path)
    sample_ind = int(this_sample.split('Predictives.csv')[0]) ## index of MCMC sample

    #### get params that generated these predictions
    #alpha = params.iloc[sample_ind]['alpha']
    #simScaling = params.iloc[sample_ind]['simScaling']
    #pragWeight = params.iloc[sample_ind]['pragWeight']
    #costWeight = params.iloc[sample_ind]['costWeight']
    posteriorProb = params.iloc[sample_ind]['posteriorProb']
    #logLikelihood = params.iloc[sample_ind]['logLikelihood']
    
    ## get congruent/incongruent context log odds for each sketch
    sketches = np.unique(sample_preds['trueSketch'].values)
    log_odds = []
    label = []
    condition = []
    for this_sketch in sketches:
        sketch_inds = sample_preds['trueSketch']==this_sketch
        these_rows = sample_preds[sketch_inds]
        cond = np.unique(these_rows['condition'].values)[0]
        other_cond = [i for i in ['closer','further'] if i != cond][0]
        obj = these_rows.iloc[0]['Target'].split('_')[-1]
        congruent_prob = these_rows[these_rows['coarseGrainedPossibleSketch']=='{}_{}'.format(cond,obj)]['modelProb'].values[0]
        other_prob = these_rows[these_rows['coarseGrainedPossibleSketch']=='{}_{}'.format(other_cond,obj)]['modelProb'].values[0]
        log_odds.append(congruent_prob - other_prob)
        label.append(obj)
        condition.append(cond)  
        
    ## make dataframe out of this sample
    sampleProb = [posteriorProb]*len(condition)
    sampleInd = [sample_ind]*len(condition)
    x = pd.DataFrame([sampleInd,sampleProb,condition,label,list(sketches),log_odds])
    x = x.transpose()
    x.columns = ['sample_ind','sample_prob','condition','label','sketch','odds']
    x = x.sort_values(by=['condition','label','sketch'])   
    
    ## concatenate dataframes containing log-odds from all MCMC samples
    if len(X)==0:
        X = x
    else:
        X = pd.concat([X,x])



In [390]:

    
X.odds = X.odds.astype('float')
X.sample_prob = X.sample_prob.astype('float')
X.to_csv('{}_model_predictions_log_odds.csv'.format(this_model))



In [382]:

    
X.head()









    Out[382]:







  
    
      
      sample_ind
      sample_prob
      condition
      label
      sketch
      odds
    
  
  
    
      29
      91
      -5.80914
      closer
      basset
      08711b390cfc_9
      1.23535
    
    
      36
      91
      -5.80914
      closer
      basset
      0934a9585321_16
      1.25268
    
    
      102
      91
      -5.80914
      closer
      basset
      0f3dadf71345_20
      1.09101
    
    
      146
      91
      -5.80914
      closer
      basset
      129c7cc55176_32
      1.20838
    
    
      153
      91
      -5.80914
      closer
      basset
      16d8e4ed049d_1
      1.2457



In [ ]:



In [407]:

    
pp = X.groupby(['sample_ind'])['sample_prob'].mean().apply(lambda x: np.exp(x)) ## posterior probs
lo = X.groupby(['sample_ind'])['odds'].mean() ## log odds
odds_overall = np.sum(pp*lo)
print '{} log odds in favor of picking sketch from congruent-context'.format(odds_overall)









    



0.0481207532728 log odds overall



In [441]:

    
weighted_odds = []
unweighted_odds = []
for i,sketch in enumerate(sketches):
    print '{} {}'.format(i,sketch)
    Y = X[X['sketch']==sketch]
    product = Y.apply(lambda x: x['odds'] * np.exp(x['sample_prob']), axis=1)
    weighted_odds.append(np.sum(product))
    unweighted_odds.append(Y['odds'].mean())



In [ ]:



In [452]:

    
weighted_odds = np.array(weighted_odds)
unweighted_odds = np.array(unweighted_odds)



In [454]:

    
h = plt.hist(weighted_odds)



In [455]:

    
np.mean(weighted_odds)









    Out[455]:





0.04812075327276482



In [456]:

    
np.median(weighted_odds)









    Out[456]:





0.032125857429063605



In [ ]:



In [ ]:



In [ ]:



In [ ]:

	sample_ind	sample_prob	condition	label	sketch	odds
29	91	-5.80914	closer	basset	08711b390cfc_9	1.23535
36	91	-5.80914	closer	basset	0934a9585321_16	1.25268
102	91	-5.80914	closer	basset	0f3dadf71345_20	1.09101
146	91	-5.80914	closer	basset	129c7cc55176_32	1.20838
153	91	-5.80914	closer	basset	16d8e4ed049d_1	1.2457