In [1]:
    
%cd ~/NetBeansProjects/ExpLosion/
from notebooks.common_imports import *
from gui.user_code import get_demsar_diagram, get_demsar_params
from gui.output_utils import *
    
    
In [4]:
    
d = {'labelled': 'amazon_grouped-tagged',
     'expansions__vectors__use_ppmi': False,
     'expansions__vectors__algorithm__in': ['count_windows', 'count_dependencies'],
     'expansions__vectors__dimensionality': 0}
# ignore baroni because it's with svd only
nosvd = [x.id for x in Experiment.objects.filter(**d).\
         exclude(expansions__vectors__composer='Baroni').\
         order_by('expansions__vectors__composer')]
d['expansions__vectors__dimensionality'] = 100
svd = [x.id for x in Experiment.objects.filter(**d).\
       exclude(expansions__vectors__composer='Baroni').\
       order_by('expansions__vectors__composer')]
def _window_deps_composers_naming(mylist):
    """
    Returns names like D-Add or W-Mult for **count vectors only**
    """
    labels = []
    for id in mylist:
        feats, composer = Experiment.objects.filter(id=id).values_list('expansions__vectors__algorithm', 
                                                                       'expansions__vectors__composer')[0]
        label = '{}-{}'.format(feats[6].upper(), composer)
        labels.append(label)
    return labels
print(nosvd, svd)
diff_plot(nosvd, svd,  _window_deps_composers_naming(svd))
# g.axes.flat[0].set_title('NOSVD - SVD (***=significant)');
plt.savefig('plot-effect-of-svd-deltas.pdf', format='pdf', dpi=300, bbox_inches='tight', pad_inches=0.1)
    
    
    
In [ ]:
    
d = {'noise': 0.0, 
     'decode_handler': 'SignifiedOnlyFeatureHandler', 
     'k': 3, 
     'vectors__unlabelled_percentage': 100.0,
     'vectors__dimensionality': 0,
     'labelled': 'amazon_grouped-tagged', 
     'vectors__algorithm__in': ['count_windows', 'count_dependencies'], 
     'labelled': 'amazon_grouped-tagged',
     'vectors__composer__in': ['Add', 'Mult', 'Right', 'Left'],
     'vectors__use_ppmi': True}
with_ppmi = [x.id for x in Experiment.objects.filter(**d).order_by('vectors__algorithm', 'vectors__composer')]
d['vectors__use_ppmi'] =  False
wo_ppmi = [x.id for x in Experiment.objects.filter(**d).order_by('vectors__algorithm', 'vectors__composer')]
print(with_ppmi, wo_ppmi)
diff_plot(with_ppmi, wo_ppmi,  _window_deps_composers_naming(with_ppmi))
plt.savefig('plot-effect-of-ppmi-deltas.pdf', format='pdf', dpi=300, bbox_inches='tight', pad_inches=0.1)
    
In [ ]:
    
d = {'vectors__dimensionality__in': ['100'], 
     'neighbour_strategy__in': ['linear', 'skipping'], 
     'labelled__in': ['reuters21578/r8-tagged-grouped'],
     'vectors__algorithm__in': ['word2vec'],
     'vectors__unlabelled_percentage__in': ['100.0'], 
     'vectors__unlabelled__in': ['gigaw'], 
     'document_features__in': ['AN_NN'], 
     'vectors__rep__in': ['0'],
     'decode_handler__in': ['SignifiedOnlyFeatureHandler'], 
     'k__in': ['3'], 'use_similarity__in': ['0']}
exp_ids = Experiment.objects.values_list('id', flat=True).filter(**d)
scores, folds = get_cv_scores_many_experiment(exp_ids)
assert ids # we succeeded in getting results
strats = np.repeat([Experiment.objects.get(id=foo).neighbour_strategy for foo in ids],
                   get_cv_fold_count(ids))
df = pd.DataFrame({'F1': scores,
                   'strategy': strats,
                   'cvfold': folds,
                   'Composer': get_vectors_field(ids, 'composer')})
grid = sns.factorplot(hue='strategy', y='F1', data=df, x='Composer',
                      kind='box', sharey=True);
plt.savefig('plot-w2v_linear-vs-skipping-neigh.pdf' ,format='pdf', dpi=300)