In [28]:
import os
import urllib, cStringIO
import pymongo as pm
import matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import seaborn as sns
sns.set_context('poster')
sns.set_style('white')
import numpy as np
from __future__ import division
import scipy.stats as stats
import pandas as pd
import json
import re
from PIL import Image
import base64
import json
In [29]:
## get path to fc6 features
path_to_fc6 = '/data/jefan/sketchpad_basic_fixedpose96_fc6/photos'
## get imaage ordering
ordering = pd.read_csv('/data/jefan/sketchpad_basic_fixedpose96_fc6/human_confusion_object_order.csv')
order = ordering['object_name'].values
## get paths in order that we want to make matrix
feat_files = [o +'.npy' for o in order]
path_to_feats = [os.path.join(path_to_fc6,f) for f in feat_files]
In [30]:
## load in feats to make matrix
X = []
for path in path_to_feats:
x = np.load(path)
if len(X) == 0:
X = x
else:
X = np.vstack((X,x))
## rename P for "photo"
P = X
In [ ]:
In [6]:
## plot RDM
corrmat = np.corrcoef(X)
corrmat.shape
from matplotlib import cm
fig = plt.figure(figsize=(8,8))
ax = plt.subplot(111)
cax = ax.matshow(corrmat,vmin=0,vmax=1,cmap=cm.viridis)
plt.xticks(range(len(X)), order, fontsize=12,rotation='vertical')
plt.yticks(range(len(X)), order, fontsize=12)
plt.colorbar(cax,shrink=0.8)
plt.tight_layout()
In [7]:
## what do the sketches look like by itself
path_to_sketches = '/data/jefan/sketchpad_basic_fixedpose96_fc6/sketch'
In [8]:
import cPickle
db_path = '/data/jefan/sketchpad_basic_fixedpose96_fc6/'
with open(os.path.join(db_path, 'sketchpad_context_dict.pickle')) as fp:
context_dict = cPickle.load(fp)
with open(os.path.join(db_path, 'sketchpad_label_dict.pickle')) as fp:
label_dict = cPickle.load(fp)
In [9]:
sketch_list = os.listdir(path_to_sketches)
In [10]:
condition = context_dict[sketch_list[0]]
label = label_dict[sketch_list[0]]
In [11]:
## load in all precomputed fc6 features as single numpy array
F = np.zeros([len(sketch_list),4096])
for i,s in enumerate(sketch_list):
if i%1000==0:
print '{} of {} sketches'.format(i,len(sketch_list))
x = np.load(os.path.join(db_path,'sketch',s))
F[i,:] = x
In [12]:
# z-score normalization to de-mean & standardize variances within-voxel
def normalize(X):
X = X - X.mean(0)
X = X / np.maximum(X.std(0), 1e-5)
return X
normalize_on = False ## normalize AFTER stacking with the render features
if normalize_on:
F = normalize(F)
In [13]:
## get lists of labels and conditions in same order as feature array above
labels = []
conditions = []
for i,s in enumerate(sketch_list):
if i%1000==0:
print '{} of {} sketches'.format(i,len(sketch_list))
labels.append(label_dict[s])
conditions.append(context_dict[s])
## define dataframe for this feature matrix
meta = pd.DataFrame([labels,conditions,sketch_list])
meta = meta.transpose()
meta.columns = ['label','condition','filename']
In [14]:
## okay now make the class averaged feature matrices for each condition
close_feats = np.zeros([32,4096])
far_feats = np.zeros([32,4096])
for i,obj in enumerate(order):
inds = (meta['label']==obj) & (meta['condition']=='closer')
close_feats[i,:] = F[inds,:].mean(0)
inds = (meta['label']==obj) & (meta['condition']=='further')
far_feats[i,:] = F[inds,:].mean(0)
In [ ]:
In [15]:
## plot RDM
corrmat = np.corrcoef(close_feats)
corrmat.shape
corrmat_close = corrmat
from matplotlib import cm
fig = plt.figure(figsize=(8,8))
ax = plt.subplot(111)
cax = ax.matshow(corrmat,vmin=0,vmax=1,cmap=cm.viridis)
plt.xticks(range(len(X)), order, fontsize=12,rotation='vertical')
plt.yticks(range(len(X)), order, fontsize=12)
plt.colorbar(cax,shrink=0.8)
plt.tight_layout()
In [16]:
## plot RDM
corrmat = np.corrcoef(far_feats)
corrmat.shape
corrmat_far = corrmat
from matplotlib import cm
fig = plt.figure(figsize=(8,8))
ax = plt.subplot(111)
cax = ax.matshow(corrmat,vmin=0,vmax=1,cmap=cm.viridis)
plt.xticks(range(len(X)), order, fontsize=12,rotation='vertical')
plt.yticks(range(len(X)), order, fontsize=12)
plt.colorbar(cax,shrink=0.8)
plt.tight_layout()
In [ ]:
In [17]:
FEAT = np.vstack((P,close_feats,far_feats))
In [18]:
## normalize to mean of this dataset
normalize_on = True
if normalize_on:
FEAT = normalize(FEAT)
In [19]:
## plot RDM of unadapted render, close, far sketches
corrmat = np.corrcoef(FEAT)
corrmat_full = corrmat
fig = plt.figure(figsize=(12,12))
ax = plt.subplot(111)
cax = ax.matshow(corrmat,cmap=cm.viridis)
plt.xticks(range(len(X)*3), np.tile(order,3), fontsize=8,rotation='vertical')
plt.yticks(range(len(X)*3), np.tile(order,3), fontsize=8)
plt.xlabel(' OBJECT CLOSE FAR ')
plt.colorbar(cax,shrink=0.8)
plt.tight_layout()
In [ ]:
In [20]:
## extract the blocks that allow us to measure sketch-render similarity
close_render = corrmat_full[:32,32:64]
far_render = corrmat_full[:32,64:]
In [101]:
plt.figure(figsize=(4,4))
s = plt.scatter(np.diagonal(close_render),np.diagonal(far_render))
plt.plot([-0.3,0.5],[-0.3,0.5],linestyle='dashed',color='gray')
plt.xlabel('close-to-render correlation similarity')
plt.ylabel('far-to-render correlation similarity')
plt.title('Unadapted fc6 representation')
Out[101]:
In [ ]:
Format for json is a dictionary of dictionaries, where each top-level key refers to one of the renders, e.g. "trial_20_cuckoo". For each render, you can look up the similarity with each sketch, referenced with an abbreviated ID taken by trimming the last 12-character string, and appending an underscore, and the trial number. E.g., 'gameID_9903-d6e6a9ff-a878-4bee-b2d5-26e2e239460a_trial_9.npy' ==> '26e2e239460a_9'
In [188]:
## load in all precomputed fc6 features as single numpy array
F = np.zeros([len(sketch_list),4096])
for i,s in enumerate(sketch_list):
if i%1000==0:
print '{} of {} sketches'.format(i,len(sketch_list))
x = np.load(os.path.join(db_path,'sketch',s))
F[i,:] = x
#### sketch feature matrix
SF = F
## get lists of labels and conditions in same order as feature array above
labels = []
conditions = []
for i,s in enumerate(sketch_list):
if i%1000==0:
print '{} of {} sketches'.format(i,len(sketch_list))
labels.append(label_dict[s])
conditions.append(context_dict[s])
## define dataframe for this feature matrix
meta = pd.DataFrame([labels,conditions,sketch_list])
meta = meta.transpose()
meta.columns = ['label','condition','filename']
#### sketch metadata
SM = meta
In [189]:
## photo feature matrix & metadata
PF = P
PM = order
In [190]:
## normalize within feature ## get mean/std from the pre class & condition-averaged version of the feature matrix
## in other words, get the mean and sd between DOMAINs, so you have balanced number of entries in each domain
S = np.dstack((close_feats,far_feats)).mean(2)
FEAT = np.vstack((P,S))
uFEAT = FEAT.mean(0)
sdFEAT = np.maximum(FEAT.std(0), 1e-5)
normalize_on = True
if normalize_on:
PF = (PF - uFEAT) / sdFEAT
SF = (SF - uFEAT) / sdFEAT
In [191]:
## get distances for each sketch to every render
sp_similarity = []
for i,d in SM.iterrows():
sketch_feat = SF[i,:]
sp_similarity.append([np.corrcoef(SF[i,:],pf)[0,1] for pf in PF]) ## get correlation btw sketch and every object
## generate similarity dataframe
sim = pd.DataFrame(sp_similarity)
sim.columns = order
In [192]:
## now concatenate with the main SM dataframe
SM2 = pd.concat([SM,sim],axis=1)
In [193]:
## save out to csv
SM2.to_csv('sketch_meta_item_level_similarity.csv')
In [200]:
normalize_on = True
if normalize_on:
close_feats_norm = (close_feats - uFEAT) / sdFEAT
far_feats_norm = (far_feats - uFEAT) / sdFEAT
## PF, the photo feature matrix, is already normalized
In [217]:
sp_centroid_similarity = []
for i,d in SM.iterrows():
this_label = d['label']
this_cond = d['condition']
inds = (order==this_label)
if this_cond=='closer':
this_feat = close_feats_norm[inds,:]
elif this_cond=='further':
this_feat = far_feats_norm[inds,:]
sp_centroid_similarity.append([np.corrcoef(this_feat,pf)[0,1] for pf in PF])
## generate similarity dataframe
sim = pd.DataFrame(sp_centroid_similarity)
sim.columns = order
In [218]:
## now concatenate with the main SM dataframe
SM3 = pd.concat([SM,sim],axis=1)
In [219]:
## save out to csv
SM3.to_csv('sketch_meta_type_level_similarity.csv')
In [ ]:
In [ ]:
In [220]:
# which similarity matrix are you going to use?
this_SM = SM3
In [221]:
import json
## sample json paths
json_path_prefix = '../models/refModule/json/'
json_file = 'strict-similarity-pragmatics-fixedpose-augmented-splitbycontext_conv4_2.json'
json_path = os.path.join(json_path_prefix,json_file)
def load_json(json_path):
with open(json_path) as fp:
data = json.load(fp)
return data
## build dictionary to look up the appropriate render ID to use to associate with each sketch
data = load_json(json_path)
## list of 3d rendered objects
render_list = data.keys()
obj_to_render = dict(zip([i.split('_')[-1] for i in data.keys()], data.keys()))
render_to_obj = dict(zip(data.keys(),[i.split('_')[-1] for i in data.keys()]))
In [222]:
def simplify_sketch(path): ## example path: 'gameID_9903-d6e6a9ff-a878-4bee-b2d5-26e2e239460a_trial_9.npy' ==> '26e2e239460a_9'
path = '_'.join(os.path.splitext(os.path.basename(path))[0].split('_')[1:])
path = path.split('-')[-1]
path = path.replace('_trial', '')
return path
def add_simplified_ids(X):
## add renderID and sketchID to dataframen
renderID = []
sketchID = []
for i,d in X.iterrows():
renderID.append(obj_to_render[d['label']])
sketchID.append(simplify_sketch(d['filename']))
X['renderID'] = renderID
X['sketchID'] = sketchID
return X
In [223]:
this_SM = add_simplified_ids(this_SM)
In [ ]:
In [232]:
## generate big json dictionary of dictionaries
from __future__ import division
out_json = {}
for i,this_render in enumerate(render_list):
print i, this_render
out_json[this_render] = {}
for i,d in this_SM.iterrows():
this_sketch = d['sketchID']
_render = str(this_render.split('_')[-1])
this_similarity = (d[_render]+1.00000001)/2 #### transform similarities to (0,1) scale!
out_json[this_render][this_sketch] = this_similarity
In [233]:
## output json in the same format as the other similarity jsons
output_path = '../models/refModule/json/similarity-fc6-centroid.json'
with open(output_path, 'wb') as fp:
json.dump(out_json, fp)
In [ ]:
In [ ]:
In [274]:
## define set of models to compare
model_zoo = ['fc6_combined_cost','fc6_combined_nocost','fc6_S0_cost','fc6_S0_nocost']
this_model = model_zoo[0]
## define paths to model predictions
path_to_evaluate = '../models/evaluateOutput'
pred_path = os.path.join(path_to_evaluate,this_model)
In [275]:
## get file with params from this model
this_params = os.path.join('../models/bdaOutput',this_model+'_alldataParams.csv')
params = pd.read_csv(this_params)
assert np.sum(np.exp(params.posteriorProb.values))==1
In [276]:
## get list of all predictives (accepted MCMC samples)
pred_files = [i for i in os.listdir(pred_path) if i[-15:] =='Predictives.csv']
In [ ]:
In [376]:
X = [] ## initialize giant dataframe that contains predictions from all MCMC samples
## loop through MCMC samples
for i,this_sample in enumerate(pred_files):
print'{} | sample ID: {} '.format(i,int(this_sample.split('Predictives.csv')[0]))
## read in predictions from this sample
sample_path = os.path.join(pred_path,this_sample)
sample_preds = pd.read_csv(sample_path)
sample_ind = int(this_sample.split('Predictives.csv')[0]) ## index of MCMC sample
#### get params that generated these predictions
#alpha = params.iloc[sample_ind]['alpha']
#simScaling = params.iloc[sample_ind]['simScaling']
#pragWeight = params.iloc[sample_ind]['pragWeight']
#costWeight = params.iloc[sample_ind]['costWeight']
posteriorProb = params.iloc[sample_ind]['posteriorProb']
#logLikelihood = params.iloc[sample_ind]['logLikelihood']
## get congruent/incongruent context log odds for each sketch
sketches = np.unique(sample_preds['trueSketch'].values)
log_odds = []
label = []
condition = []
for this_sketch in sketches:
sketch_inds = sample_preds['trueSketch']==this_sketch
these_rows = sample_preds[sketch_inds]
cond = np.unique(these_rows['condition'].values)[0]
other_cond = [i for i in ['closer','further'] if i != cond][0]
obj = these_rows.iloc[0]['Target'].split('_')[-1]
congruent_prob = these_rows[these_rows['coarseGrainedPossibleSketch']=='{}_{}'.format(cond,obj)]['modelProb'].values[0]
other_prob = these_rows[these_rows['coarseGrainedPossibleSketch']=='{}_{}'.format(other_cond,obj)]['modelProb'].values[0]
log_odds.append(congruent_prob - other_prob)
label.append(obj)
condition.append(cond)
## make dataframe out of this sample
sampleProb = [posteriorProb]*len(condition)
sampleInd = [sample_ind]*len(condition)
x = pd.DataFrame([sampleInd,sampleProb,condition,label,list(sketches),log_odds])
x = x.transpose()
x.columns = ['sample_ind','sample_prob','condition','label','sketch','odds']
x = x.sort_values(by=['condition','label','sketch'])
## concatenate dataframes containing log-odds from all MCMC samples
if len(X)==0:
X = x
else:
X = pd.concat([X,x])
In [390]:
X.odds = X.odds.astype('float')
X.sample_prob = X.sample_prob.astype('float')
X.to_csv('{}_model_predictions_log_odds.csv'.format(this_model))
In [382]:
X.head()
Out[382]:
In [ ]:
In [407]:
pp = X.groupby(['sample_ind'])['sample_prob'].mean().apply(lambda x: np.exp(x)) ## posterior probs
lo = X.groupby(['sample_ind'])['odds'].mean() ## log odds
odds_overall = np.sum(pp*lo)
print '{} log odds in favor of picking sketch from congruent-context'.format(odds_overall)
In [441]:
weighted_odds = []
unweighted_odds = []
for i,sketch in enumerate(sketches):
print '{} {}'.format(i,sketch)
Y = X[X['sketch']==sketch]
product = Y.apply(lambda x: x['odds'] * np.exp(x['sample_prob']), axis=1)
weighted_odds.append(np.sum(product))
unweighted_odds.append(Y['odds'].mean())
In [ ]:
In [452]:
weighted_odds = np.array(weighted_odds)
unweighted_odds = np.array(unweighted_odds)
In [454]:
h = plt.hist(weighted_odds)
In [455]:
np.mean(weighted_odds)
Out[455]:
In [456]:
np.median(weighted_odds)
Out[456]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: