In [1]:
%cd ~/NetBeansProjects/ExpLosion/
%load_ext autoreload
from notebooks.common_imports import *
from gui.output_utils import *
sns.timeseries.algo.bootstrap = my_bootstrap
sns.categorical.bootstrap = my_bootstrap
In [2]:
s = {'document_features_ev': 'AN+NN',
'document_features_tr': 'J+N+AN+NN',
'expansions__decode_handler': 'SignifiedOnlyFeatureHandler',
'expansions__entries_of_id': None,
'expansions__k': 3,
'expansions__neighbour_strategy': 'linear',
'expansions__noise': 0.0,
'expansions__use_random_neighbours': 0,
'expansions__use_similarity': 0,
'expansions__vectors__algorithm': 'word2vec',
# 'expansions__vectors__composer': 'Add',
'expansions__vectors__dimensionality': 100,
'expansions__vectors__rep': 0,
'expansions__vectors__unlabelled': 'wiki',
'expansions__vectors__unlabelled_percentage__in': [100],
'labelled': 'amazon_grouped-tagged'}
ids = Experiment.objects.filter(**s).values_list('id', flat=True)
fields = {'unlab': 'expansions__vectors__unlabelled',
'percent': 'expansions__vectors__unlabelled_percentage',
'Composer': 'expansions__vectors__composer',
'Overlap': 'expansions__allow_overlap'}
print('ids are', ids, 'total', len(ids))
df = dataframe_from_exp_ids(ids, fields)
df['corpus'] = ['%s-%s'%(a,b) for a,b in zip(df.unlab, df.percent)]
df.Overlap = df.Overlap.map({'1': 'Allowed', '0': 'Not allowed'})
In [3]:
sns.timeseries.algo.bootstrap = my_bootstrap
sns.categorical.bootstrap = my_bootstrap
with sns.color_palette("cubehelix", 4):
g = sns.factorplot(y='Accuracy', hue='Composer', x='Overlap',
x_order=['Not allowed', 'Allowed'],
data=df, kind='bar', ci=68, aspect=1.5);
for ax in g.axes.flat:
ax.axhline(random_vect_baseline(), c='k');
ax.axhline(nondistributional_baseline(**settings_of(ids[0])), c='k');
g.savefig('plot-nps-with-lexical-overlap.pdf', format='pdf', dpi=300, bbox_inches='tight', pad_inches=0.1)
In [4]:
df.groupby(['Overlap', 'Composer']).mean()
Out[4]:
In [8]:
from gui.user_code import pairwise_significance_exp_ids
corpus='amazon_grouped-tagged'
document_features_tr='J+N+AN+NN'
document_features_ev='AN+NN'
res = Experiment.objects.get(labelled=corpus,
document_features_tr=document_features_tr,
document_features_ev=document_features_ev,
expansions__decode_handler='BaseFeatureHandler')
pairwise_significance_exp_ids(zip(ids, [res.id]*len(ids)))
Out[8]:
In [ ]: