In [1]:
%cd ~/NetBeansProjects/ExpLosion/
from itertools import chain
from notebooks.common_imports import *
from gui.output_utils import *
from gui.user_code import pretty_names
sns.timeseries.algo.bootstrap = my_bootstrap
sns.categorical.bootstrap = my_bootstrap
In [2]:
d = {'expansions__vectors__rep': 0,
'expansions__k':3,
'labelled':'amazon_grouped-tagged',
'expansions__use_similarity': 0,
'expansions__neighbour_strategy':'linear',
'expansions__vectors__dimensionality': 100,
'document_features_ev': 'AN+NN',
'document_features_tr': 'J+N+AN+NN',
'expansions__allow_overlap': False,
'expansions__entries_of': None,
'expansions__vectors__algorithm': 'word2vec',
'expansions__vectors__composer': 'Add',
'expansions__vectors__unlabelled__in': ['wiki', 'cwiki'],
'expansions__decode_handler': 'SignifiedOnlyFeatureHandler',
'expansions__noise': 0}
unconstrained_ids = Experiment.objects.filter(**d).values_list('id', flat=True)
constrained_ids = Experiment.objects.filter(expansions__entries_of__isnull=False,
expansions__entries_of__unlabelled_percentage__in=[1, 10],
expansions__vectors__composer='Add').values_list('id', flat=True)
In [3]:
names = {'percent':'expansions__vectors__unlabelled_percentage',
# 'reduced_to':'expansions__entries_of__unlabelled_percentage',
# 'composer':'expansions__vectors__composer',
'Corpus':'expansions__vectors__unlabelled'}
# df = dataframe_from_exp_ids(list(constrained_ids) + list(unconstrained_ids), names).convert_objects(convert_numeric=True)
df = dataframe_from_exp_ids(unconstrained_ids, names).convert_objects(convert_numeric=True)
In [4]:
df.head()
Out[4]:
In [5]:
# put together a legible name for the method
def legible_name(row):
if row.unlab == 'cwiki':
return 'clean wiki (Add)'
else:
if np.isnan(row.reduced_to):
return 'wiki (%s)'%row.composer
else:
return 'wiki 100 --> wiki %d (%s)'%(row.reduced_to, row.composer)
df['Tokens'] = df.apply(compute_token_count, axis=1)
# df['name'] = df.apply(legible_name, axis=1)
df = df.drop('percent', axis=1)
In [6]:
df.head()
Out[6]:
In [7]:
with sns.color_palette("husl", 6):
ax = sns.tsplot(df, time='Tokens', unit='folds', condition='Corpus', value='Accuracy')
ax.set_xlim(0, df.Tokens.max());
plt.savefig('plot-w2v-learning-curve-amazon-cwiki-vs-wiki.pdf', format='pdf', dpi=300, bbox_inches='tight', pad_inches=0.1)
In [ ]: