In [ ]:
# imports
import os
import yaml
import numpy as np
import pandas as pd
from scipy.stats import bayes_mvs, entropy, linregress, spearmanr
import seaborn as sns
import matplotlib.pyplot as plt
import theano
import lasagne
import loading
from autoload_data import *
from training import *
from network import *
import architectures as arches
import results
pd.options.display.max_columns = 50
# aliases
L = lasagne.layers
T = theano.tensor
# styles
sns.set_style('white')
sns.set_context('talk')
plt.rc('text', usetex=True)
% matplotlib inline
In [ ]:
df['np'] = df.apply(results.count_pieces, axis=1)
In [ ]:
PTR, TR, PTP, TP, param_counts = results.rehydrate()
In [ ]:
F = results.aggregate_results(PTR, TR, param_counts)
F.loc[F['net name']=='h4']
In [ ]:
import matplotlib.patches as mpatches
fig, axes = plt.subplots(1, 1, figsize=(25, 7))
legdict = {'Columnar': 'blue', '2Conv': 'green', '1Conv': 'red', '1Conv No Pool': 'purple', 'Columnar Tanh Vals': 'orange'}
for net_name in F['net name']:
if 'deep' in net_name:
color = 'green'
elif 'regular' in net_name:
color = 'red'
elif 'nopool' in net_name:
color = 'purple'
elif 'tanh' in net_name:
color = 'orange'
else:
color='blue'
idx = F['net name'] == net_name
x = F.loc[idx, 'num params'].values[0]
y = F.loc[idx, 'tuned subject'].values[0]
axes.plot(x, y, linestyle='none', marker='o', color=color)
legpatches = [mpatches.Patch(color=c, label=l) for l, c in legdict.items()]
axes.legend(handles=legpatches)
plt.setp(axes, xlabel='\# parameters', ylabel='NLL', xlim=[0, 75000])
sns.despine()
In [ ]:
for archname in ['h4', 'f4', 'd4', 'c4', 'b4']:
preds, pp = error_per_piece(archname, TP, df)
plt.plot(pp.mean(axis=1))
plt.plot((0, 36), (0, 0), color='grey', linestyle='--')
sns.despine()
In [ ]:
F.to_csv(os.path.join(resultsdir, 'num params with nlls.csv'))
In [ ]:
FD = fake_data[0]
HH = hvhdata[0]
AD = data[0]
FD['position'] = FD['bp'] + FD['wp']
HH['position'] = HH['bp'] + FD['wp']
AD['position'] = AD['bp'] + AD['wp']
FDpiv = FD.pivot_table(index='position', values='zet', aggfunc=entropy_zets)
HHpiv = HH.pivot_table(index='position', values='zet', aggfunc=entropy_zets)
ADpiv = AD.pivot_table(index='position', values='zet', aggfunc=entropy_zets)
for piv in [FDpiv, ADpiv, HHpiv]:
print(piv.loc[piv.values[:, 0] > 0].mean())
print(len(piv), piv.mean())
In [ ]:
archname = 'h4'
Xt, yt, _, _, _ = loading.unpack_data(fake_data[0])
fake_results = []
fake_outputs = []
specs = arch_dict[archname]
af = getattr(arches, arch_dict[archname]['type'])
arch_func = lambda input_var: af(input_var, **specs['kwargs'])
net = Network(arch_func)
for idx in range(5):
fname = '{} {} split agg fit exp 1-4.npz'.format(archname, idx)
paramsdir = os.path.join(paramsdir_, archname[:-1])
net.load_params(os.path.join(paramsdir, fname))
nlls = net.itemized_test_fn(Xt, yt)
predictions = net.output_fn(Xt)
fake_results.append(nlls)
fake_outputs.append(predictions)
fake_results_df = pd.DataFrame(fake_results).T
fake_results_df.pivot_table(index=fake_data[0]['subject']).mean().mean()
In [ ]:
df['np'] = df.apply(count_pieces, axis=1)
In [ ]:
chancenll = lambda x: -np.log(1/(36-x))
df['chancenll'] = chancenll(df['np'].values)
df['m'] = -(train_results.mean(axis=1).values - df['chancenll'])
np_v_m = df.pivot_table(index='np', values='m')
np_v_m.to_csv(os.path.join(resultsdir, 'num_pieces_vs_nll.csv'), header=False)
plt.plot(np_v_m)
plt.setp(plt.gca(), xlabel='N Pieces', ylabel='NLL relative to chance', ylim=[-.5, 2])
sns.despine()
plt.tight_layout()
In [ ]:
plt.plot(df.pivot_table(index='np', values='chancenll'))
In [ ]:
scatterkws = {
'linestyle': 'none',
'marker': 'o', 'markerfacecolor': (.2, .2, .2), 'markeredgecolor': 'black',
'alpha': .3
}
histkws = {
'edgecolor': 'white'
}
In [ ]:
def hicks_entropy(pred):
H = pred * np.log2(1 / (pred + 1))
return H.sum(axis=1)
In [ ]:
X, y, S, G, Np = loading.unpack_data(df)
df['mean corrected rt'] = 0
for subject in df['subject'].unique():
fil = df['subject'] == subject
df.loc[fil, 'mean corrected rt'] = df.loc[fil, 'rt'] - df.loc[fil, 'rt'].mean()
rt = df['mean corrected rt']
In [ ]:
# compute mean entropy for each test group
E = []
for split_idx in range(25):
N = train_nets[split_idx]
locs = np.where(G==(split_idx//5))[0]
L = N.output_fn(X[locs, :, :, :])
E.append(hicks_entropy(L))
for g in range(5):
df.loc[df['group']==(g+1), 'entropy'] = np.array(E[g*5:(g+1)*5]).T.mean(axis=1)
In [ ]:
fig, axes = plt.subplots(1, 1, figsize=(10, 6))
x = df['entropy']
y = np.log(df['rt']/1000)
axes.plot(x, y, **scatterkws)
lr = linregress(x, y)
pval = lr.pvalue if lr.pvalue >= .001 else .001
axes.text(.05, .05, r"r = {:.2f}, p $<$ {:.3f}".format(lr.rvalue, pval), transform=axes.transAxes, fontsize=14)
plt.setp(axes, xlabel=r"$\textrm{Entropy}$", ylabel=r'$\log{\textrm{Response time (s)}}$', ylim=[-5, 5])
sns.despine()
Hick's law holds (ish).
In [ ]:
gendata = pd.read_csv(
os.path.join(headdir, 'Data/1_gen/Clean/_summaries/all_evals_model_input.csv'),
names=['subject', 'color', 'bp', 'wp', 'zet', 'rt', 'val']
)
gendata['group'] = -1
X, y, S, G, Np = loading.unpack_data(gendata)
In [ ]:
N = train_nets[0]
logistic = lambda x: 1 / (1 + np.exp(-x))
zscore = lambda x: (x - x.mean()) / (x.std() / np.sqrt(x.shape[0]))
Vr = N.value_fn(X)
V = Vr.sum(axis=1)
Vl = 7*logistic(zscore(V))
V2 = np.zeros_like(V)
yz = np.zeros_like(y)
for subject in range(S.max()):
V2[S==subject] = zscore(V[S==subject])
yz[S==subject] = zscore(y[S==subject])
V2l = 7*logistic(V2)
In [ ]:
Vr = N.value_fn(X) - N.value_fn(X[:, ::-1, :, :])
In [ ]:
plt.hist(V, **histkws)
sns.despine()
In [ ]:
plt.hist(V2, **histkws) #, bins=np.arange(0, 8, .5), **histkws)
sns.despine()
In [ ]:
plt.plot(V, gendata['val'], **scatterkws)
print(linregress(V2, gendata['val']))
sns.despine()
In [ ]:
plt.plot(gendata['val'], gendata['zet'], **scatterkws)
print(linregress(gendata['zet'], gendata['val']))
sns.despine()
In [ ]:
plt.plot(zscore(V), yz, **scatterkws)
print(linregress(zscore(V), yz))
sns.despine()
In [ ]:
gendata['valhat'] = 6*logistic(V2) + 1
gendata['valhat'] = gendata['valhat'].map(int)
gendata['position'] = gendata['bp'] + gendata['wp']
gp = gendata.pivot_table(index='position', columns='zet', values='group', aggfunc=len, fill_value=0)
gvp = gendata.pivot_table(index='position', values='valhat')
gp['valhat'] = gvp.values
gp['valsum'] = gp[list(np.arange(1, 8, 1))].values.argmax(axis=1) + 1
gp.head()
In [ ]:
linregress(gp['valhat'], gp['valsum'])