In [1]:
%cd ~/NetBeansProjects/ExpLosion/
from notebooks.common_imports import *
from gui.user_code import get_demsar_diagram, get_demsar_params
from gui.output_utils import *
sns.timeseries.algo.bootstrap = my_bootstrap
sns.categorical.bootstrap = my_bootstrap
In [2]:
query_dict = {
'clusters__noise': 0,
'clusters__vectors__algorithm__in': ['turian'],
'clusters__vectors__composer__in': ['Add', 'Socher'],
'clusters__vectors__unlabelled__in': ['turian'],
'clusters__vectors__dimensionality': 100,
'clusters__vectors__rep': 0,
'clusters__vectors__unlabelled_percentage': 100,
'document_features_ev': 'AN+NN',
'document_features_tr': 'J+N+AN+NN',
'labelled': 'amazon_grouped-tagged'}
def go(query_dict):
ids = Experiment.objects.filter(**query_dict).values_list('id', flat=True)
print(ids)
df = dataframe_from_exp_ids(ids, {'Clusters':'clusters__num_clusters',
'Composer':'clusters__vectors__composer',
'Unlabelled':'clusters__vectors__unlabelled'})
df = df.convert_objects(convert_numeric=True)
g = sns.tsplot(data=df, time='Clusters', condition='Composer',
value='Accuracy', unit='folds',
marker='s', linewidth=4, ci=68)
g.legend(loc='lower right')
g.set(ylim=(0.45, .8), xlim=(0, 2000))
# g.set(xscale='log')
# sns.despine(left=True)
In [3]:
with sns.color_palette("cubehelix", 4):
go(query_dict)
# plt.savefig('plot-clusters1.pdf', format='pdf', dpi=300, bbox_inches='tight', pad_inches=0.1)
In [4]:
query_dict['clusters__vectors__algorithm__in'] = ['word2vec']
query_dict['clusters__vectors__composer__in'] = ['Add', 'Mult']
query_dict['clusters__vectors__unlabelled__in'] = ['gigaw']
with sns.color_palette("cubehelix", 4):
go(query_dict)
# plt.savefig('plot-clusters2.pdf', format='pdf', dpi=300, bbox_inches='tight', pad_inches=0.1)
In [5]:
query_dict['clusters__vectors__unlabelled__in'] = ['wiki']
with sns.color_palette("cubehelix", 4):
go(query_dict)
plt.savefig('plot-clusters3.pdf', format='pdf', dpi=300, bbox_inches='tight', pad_inches=0.1)
In [6]:
query_dict = {
'clusters__num_clusters': 100,
'clusters__vectors__algorithm': 'word2vec',
'clusters__vectors__composer': 'Add',
'clusters__vectors__dimensionality': 100,
'clusters__vectors__rep': 0,
'clusters__vectors__unlabelled': 'wiki',
# 'clusters__vectors__unlabelled_percentage': 70,
'document_features_ev': 'AN+NN',
'document_features_tr': 'J+N+AN+NN',
# 'labelled': 'amazon_grouped-tagged'
}
ids = Experiment.objects.filter(**query_dict).values_list('id', flat=True)
print(ids)
df = dataframe_from_exp_ids(ids, {'Labelled':'labelled',
'Percent':'clusters__vectors__unlabelled_percentage'})
df = df.convert_objects(convert_numeric=True)
df.Labelled[df.Labelled == 'AM'] = 'Amazon'
with sns.color_palette("cubehelix", 4):
g = sns.tsplot(data=df, time='Percent', condition='Labelled',
value='Accuracy', unit='folds',
marker='s', linewidth=4, ci=68);
g.legend(loc='lower right')
plt.savefig('plot-clusters4.pdf', format='pdf', dpi=300, bbox_inches='tight', pad_inches=0.1)
In [7]:
query_dict = {
'clusters__noise':0,
'clusters__vectors__algorithm__in': ['turian', 'word2vec'],
'clusters__vectors__composer__in': ['Add', 'Socher', 'Mult'],
'clusters__vectors__unlabelled__in': ['turian', 'wiki', 'gigaw'],
'clusters__vectors__dimensionality': 100,
'clusters__vectors__rep': 0,
'clusters__vectors__unlabelled_percentage': 100,
'document_features_ev': 'AN+NN',
'document_features_tr': 'J+N+AN+NN',
'labelled': 'amazon_grouped-tagged'}
ids = Experiment.objects.filter(**query_dict).values_list('id', flat=True)
print(ids)
df = dataframe_from_exp_ids(ids, {'Clusters':'clusters__num_clusters',
'Composer':'clusters__vectors__composer',
'Unlabelled':'clusters__vectors__unlabelled'})
df = df.convert_objects(convert_numeric=True)
g = sns.FacetGrid(df, col='Unlabelled')
g.map_dataframe(tsplot_for_facetgrid, time='Clusters', condition='Composer',
value='Accuracy', unit='folds',
marker='s', linewidth=4, ci=68);
In [8]:
d = {#'clusters__noise': 2.0,
'clusters__num_clusters': 100,
'clusters__vectors__algorithm': 'word2vec',
'clusters__vectors__avg': 0,
'clusters__vectors__composer': 'Add',
'clusters__vectors__dimensionality': 100,
'clusters__vectors__reorder': 0,
'clusters__vectors__rep': 0,
'clusters__vectors__unlabelled': 'gigaw',
'clusters__vectors__unlabelled_percentage': 100,
'document_features_ev': 'AN+NN',
'document_features_tr': 'J+N+AN+NN',
'labelled': 'amazon_grouped-tagged'}
ids = Experiment.objects.filter(**d).values_list('id', flat=True)
df = dataframe_from_exp_ids(ids,
{'noise':'clusters__noise',
'id':'id'}).convert_objects(convert_numeric=True)
print(ids)
sns.tsplot(df, time='noise',value='Accuracy',unit='folds')
plt.savefig('plot-clustered-noise-validation.pdf', format='pdf', dpi=300, bbox_inches='tight', pad_inches=0.1);
In [9]:
from gui.user_code import pairwise_significance_exp_ids, get_data_for_signif_test
ids = Experiment.objects.filter(clusters__noise=0.0,
labelled='amazon_grouped-tagged',
clusters__vectors__composer='Add',
clusters__num_clusters=100,
clusters__vectors__unlabelled_percentage=100).values_list('id', flat=True)
ids
Out[9]:
In [10]:
df = dataframe_from_exp_ids(ids, {'algo':'clusters__vectors__algorithm',
'corpus':'clusters__vectors__unlabelled',
'id':'id'})
df = df.convert_objects(convert_numeric=True)
df.head().dtypes
Out[10]:
In [11]:
df.groupby(['algo', 'corpus']).mean()
Out[11]:
In [12]:
for i in ids:
low, mid, high = get_ci(i)[:-1]
print(i, (high-low)/2)
In [ ]: