In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%load_ext autotime

from ngram import tune, roc_scorer,spearman_scorer
from baselines import load_comments_and_labels, assemble_data, one_hot
from deep_learning import make_mlp, DenseTransformer
from deep_learning import make_lstm, make_conv_lstm, SequenceTransformer


from sklearn.pipeline import Pipeline
from sklearn.grid_search import RandomizedSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.decomposition import TruncatedSVD
from keras.wrappers.scikit_learn import KerasClassifier
from serialization import save_pipeline, load_pipeline
import joblib
import copy
import pandas as pd


Using TensorFlow backend.

In [2]:
import keras
keras.__version__


Out[2]:
'1.1.0'
time: 4.28 ms

Helpers


In [3]:
def get_best_estimator(cv):
    params = cv.best_params_
    model = cv.estimator
    model = model.set_params(**params)
    return model
    
def save_best_estimator(cv, directory, name):
    model = get_best_estimator(cv)
    save_pipeline(model, directory, name)


time: 3.25 ms

Load Annotated Data


In [4]:
task = 'attack'
data = load_comments_and_labels(task)


time: 2min 8s

Params


In [5]:
path = '../../models/cv/'
n_max = 10000000
n_iter = 15


time: 1.13 ms

Prep Data


In [6]:
X_train, y_train_ohv = assemble_data(data, 'comments', 'plurality', splits = ['train'])
X_dev, y_dev_ohv = assemble_data(data,  'comments', 'plurality', splits = ['dev'])

_, y_train_ed = assemble_data(data, 'comments', 'empirical_dist', splits = ['train'])
_, y_dev_ed = assemble_data(data,  'comments', 'empirical_dist', splits = ['dev'])

y_train_ohm = one_hot(y_train_ed)
y_dev_ohm = one_hot(y_dev_ed)

X_train = X_train[:n_max]
X_dev = X_dev[:n_max]

y_train_ohv = y_train_ohv[:n_max]
y_dev_ohv = y_dev_ohv[:n_max]

y_train_ed = y_train_ed[:n_max]
y_dev_ed = y_dev_ed[:n_max]

y_train_ohm = y_train_ohm[:n_max]
y_dev_ohm = y_dev_ohm[:n_max]


time: 69.6 ms

In [7]:
results_list = []


time: 1.02 ms

Sklearn Experiments

Lets run some quick experiments in sklearn, so that we have baselines for the following models built in keras. We will only be building logistic regressions with one-hot labels. This will also help us see if we should use tfidf weighting and normalization.


In [8]:
max_features = (5000, 10000, 50000, 100000)
C = (0.0001, 0.001, 0.01, 0.1, 1, 10)


time: 1.17 ms

No tfidf


In [9]:
alg = Pipeline([
    ('vect', CountVectorizer()),
    ('clf', LogisticRegression()),
])


time: 2.5 ms

In [10]:
# linear char-gram, no tfidf

param_grid = {
    'vect__max_features': max_features, 
    'vect__ngram_range': ((1,5),),  
    'vect__analyzer' : ('char',),
    'clf__C' : C,
}

m = tune (X_train, y_train_ohv, X_dev, y_dev_ohv, alg, param_grid, n_iter, roc_scorer, n_jobs = 6, verbose = True)


Best parameters set found:
{'clf__C': 10, 'vect__analyzer': 'char', 'vect__max_features': 100000, 'vect__ngram_range': (1, 5)} 0.916608436706


Grid scores:
0.90034 (+/-0.00000) for {'clf__C': 0.1, 'vect__analyzer': 'char', 'vect__max_features': 5000, 'vect__ngram_range': (1, 5)}
0.91030 (+/-0.00000) for {'clf__C': 0.1, 'vect__analyzer': 'char', 'vect__max_features': 100000, 'vect__ngram_range': (1, 5)}
0.89931 (+/-0.00000) for {'clf__C': 10, 'vect__analyzer': 'char', 'vect__max_features': 10000, 'vect__ngram_range': (1, 5)}
0.87261 (+/-0.00000) for {'clf__C': 0.0001, 'vect__analyzer': 'char', 'vect__max_features': 5000, 'vect__ngram_range': (1, 5)}
0.88324 (+/-0.00000) for {'clf__C': 0.0001, 'vect__analyzer': 'char', 'vect__max_features': 100000, 'vect__ngram_range': (1, 5)}
0.91456 (+/-0.00000) for {'clf__C': 0.001, 'vect__analyzer': 'char', 'vect__max_features': 100000, 'vect__ngram_range': (1, 5)}
0.90246 (+/-0.00000) for {'clf__C': 0.01, 'vect__analyzer': 'char', 'vect__max_features': 10000, 'vect__ngram_range': (1, 5)}
0.90847 (+/-0.00000) for {'clf__C': 1, 'vect__analyzer': 'char', 'vect__max_features': 100000, 'vect__ngram_range': (1, 5)}
0.90211 (+/-0.00000) for {'clf__C': 1, 'vect__analyzer': 'char', 'vect__max_features': 50000, 'vect__ngram_range': (1, 5)}
0.89497 (+/-0.00000) for {'clf__C': 0.001, 'vect__analyzer': 'char', 'vect__max_features': 5000, 'vect__ngram_range': (1, 5)}
0.91661 (+/-0.00000) for {'clf__C': 10, 'vect__analyzer': 'char', 'vect__max_features': 100000, 'vect__ngram_range': (1, 5)}
0.90302 (+/-0.00000) for {'clf__C': 0.001, 'vect__analyzer': 'char', 'vect__max_features': 50000, 'vect__ngram_range': (1, 5)}
0.87837 (+/-0.00000) for {'clf__C': 0.0001, 'vect__analyzer': 'char', 'vect__max_features': 10000, 'vect__ngram_range': (1, 5)}
0.90950 (+/-0.00000) for {'clf__C': 0.1, 'vect__analyzer': 'char', 'vect__max_features': 50000, 'vect__ngram_range': (1, 5)}
0.90478 (+/-0.00000) for {'clf__C': 1, 'vect__analyzer': 'char', 'vect__max_features': 10000, 'vect__ngram_range': (1, 5)}
time: 42min 52s

In [11]:
# linear word-gram, no tfidf

param_grid = {
    'vect__max_features': max_features, 
    'vect__ngram_range': ((1,2),),  
    'vect__analyzer' : ('word',),
    'clf__C' : C,
}

m = tune (X_train, y_train_ohv, X_dev, y_dev_ohv, alg, param_grid, n_iter, roc_scorer, n_jobs = 6, verbose = True)


Best parameters set found:
{'clf__C': 1, 'vect__analyzer': 'word', 'vect__max_features': 50000, 'vect__ngram_range': (1, 2)} 0.94125639939


Grid scores:
0.93139 (+/-0.00000) for {'clf__C': 0.1, 'vect__analyzer': 'word', 'vect__max_features': 5000, 'vect__ngram_range': (1, 2)}
0.93967 (+/-0.00000) for {'clf__C': 0.1, 'vect__analyzer': 'word', 'vect__max_features': 100000, 'vect__ngram_range': (1, 2)}
0.93643 (+/-0.00000) for {'clf__C': 10, 'vect__analyzer': 'word', 'vect__max_features': 10000, 'vect__ngram_range': (1, 2)}
0.80624 (+/-0.00000) for {'clf__C': 0.0001, 'vect__analyzer': 'word', 'vect__max_features': 5000, 'vect__ngram_range': (1, 2)}
0.80694 (+/-0.00000) for {'clf__C': 0.0001, 'vect__analyzer': 'word', 'vect__max_features': 100000, 'vect__ngram_range': (1, 2)}
0.89202 (+/-0.00000) for {'clf__C': 0.001, 'vect__analyzer': 'word', 'vect__max_features': 100000, 'vect__ngram_range': (1, 2)}
0.92744 (+/-0.00000) for {'clf__C': 0.01, 'vect__analyzer': 'word', 'vect__max_features': 10000, 'vect__ngram_range': (1, 2)}
0.93908 (+/-0.00000) for {'clf__C': 1, 'vect__analyzer': 'word', 'vect__max_features': 100000, 'vect__ngram_range': (1, 2)}
0.94126 (+/-0.00000) for {'clf__C': 1, 'vect__analyzer': 'word', 'vect__max_features': 50000, 'vect__ngram_range': (1, 2)}
0.88930 (+/-0.00000) for {'clf__C': 0.001, 'vect__analyzer': 'word', 'vect__max_features': 5000, 'vect__ngram_range': (1, 2)}
0.94067 (+/-0.00000) for {'clf__C': 10, 'vect__analyzer': 'word', 'vect__max_features': 100000, 'vect__ngram_range': (1, 2)}
0.89186 (+/-0.00000) for {'clf__C': 0.001, 'vect__analyzer': 'word', 'vect__max_features': 50000, 'vect__ngram_range': (1, 2)}
0.80648 (+/-0.00000) for {'clf__C': 0.0001, 'vect__analyzer': 'word', 'vect__max_features': 10000, 'vect__ngram_range': (1, 2)}
0.93988 (+/-0.00000) for {'clf__C': 0.1, 'vect__analyzer': 'word', 'vect__max_features': 50000, 'vect__ngram_range': (1, 2)}
0.93672 (+/-0.00000) for {'clf__C': 1, 'vect__analyzer': 'word', 'vect__max_features': 10000, 'vect__ngram_range': (1, 2)}
time: 6min 29s

With tfidf


In [12]:
alg = Pipeline([
    ('vect', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('clf', LogisticRegression()),
])


time: 5.76 ms

In [13]:
# linear char-gram, tfidf

param_grid = {
    'vect__max_features': max_features, 
    'vect__ngram_range': ((1,5),),  
    'vect__analyzer' : ('char',),
    'tfidf__sublinear_tf' : (True, False),
    'tfidf__norm' : (None, 'l2'),
    'clf__C' : C,
}

m = tune (X_train, y_train_ohv, X_dev, y_dev_ohv, alg, param_grid, n_iter, roc_scorer, n_jobs = 6, verbose = True)


Best parameters set found:
{'vect__analyzer': 'char', 'vect__max_features': 5000, 'tfidf__sublinear_tf': True, 'tfidf__norm': 'l2', 'clf__C': 10, 'vect__ngram_range': (1, 5)} 0.956730432607


Grid scores:
0.88271 (+/-0.00000) for {'vect__analyzer': 'char', 'vect__max_features': 5000, 'tfidf__sublinear_tf': False, 'tfidf__norm': 'l2', 'clf__C': 0.01, 'vect__ngram_range': (1, 5)}
0.86882 (+/-0.00000) for {'vect__analyzer': 'char', 'vect__max_features': 100000, 'tfidf__sublinear_tf': False, 'tfidf__norm': 'l2', 'clf__C': 0.01, 'vect__ngram_range': (1, 5)}
0.93314 (+/-0.00000) for {'vect__analyzer': 'char', 'vect__max_features': 5000, 'tfidf__sublinear_tf': True, 'tfidf__norm': None, 'clf__C': 1, 'vect__ngram_range': (1, 5)}
0.94676 (+/-0.00000) for {'vect__analyzer': 'char', 'vect__max_features': 100000, 'tfidf__sublinear_tf': True, 'tfidf__norm': None, 'clf__C': 1, 'vect__ngram_range': (1, 5)}
0.73052 (+/-0.00000) for {'vect__analyzer': 'char', 'vect__max_features': 10000, 'tfidf__sublinear_tf': True, 'tfidf__norm': 'l2', 'clf__C': 0.0001, 'vect__ngram_range': (1, 5)}
0.94619 (+/-0.00000) for {'vect__analyzer': 'char', 'vect__max_features': 100000, 'tfidf__sublinear_tf': True, 'tfidf__norm': None, 'clf__C': 10, 'vect__ngram_range': (1, 5)}
0.88859 (+/-0.00000) for {'vect__analyzer': 'char', 'vect__max_features': 10000, 'tfidf__sublinear_tf': False, 'tfidf__norm': None, 'clf__C': 0.001, 'vect__ngram_range': (1, 5)}
0.87204 (+/-0.00000) for {'vect__analyzer': 'char', 'vect__max_features': 5000, 'tfidf__sublinear_tf': False, 'tfidf__norm': None, 'clf__C': 0.01, 'vect__ngram_range': (1, 5)}
0.90099 (+/-0.00000) for {'vect__analyzer': 'char', 'vect__max_features': 100000, 'tfidf__sublinear_tf': False, 'tfidf__norm': None, 'clf__C': 10, 'vect__ngram_range': (1, 5)}
0.87905 (+/-0.00000) for {'vect__analyzer': 'char', 'vect__max_features': 50000, 'tfidf__sublinear_tf': False, 'tfidf__norm': None, 'clf__C': 1, 'vect__ngram_range': (1, 5)}
0.95673 (+/-0.00000) for {'vect__analyzer': 'char', 'vect__max_features': 5000, 'tfidf__sublinear_tf': True, 'tfidf__norm': 'l2', 'clf__C': 10, 'vect__ngram_range': (1, 5)}
0.69424 (+/-0.00000) for {'vect__analyzer': 'char', 'vect__max_features': 5000, 'tfidf__sublinear_tf': False, 'tfidf__norm': 'l2', 'clf__C': 0.0001, 'vect__ngram_range': (1, 5)}
0.94925 (+/-0.00000) for {'vect__analyzer': 'char', 'vect__max_features': 50000, 'tfidf__sublinear_tf': True, 'tfidf__norm': 'l2', 'clf__C': 0.1, 'vect__ngram_range': (1, 5)}
0.93904 (+/-0.00000) for {'vect__analyzer': 'char', 'vect__max_features': 10000, 'tfidf__sublinear_tf': True, 'tfidf__norm': None, 'clf__C': 1, 'vect__ngram_range': (1, 5)}
0.89593 (+/-0.00000) for {'vect__analyzer': 'char', 'vect__max_features': 100000, 'tfidf__sublinear_tf': False, 'tfidf__norm': None, 'clf__C': 0.01, 'vect__ngram_range': (1, 5)}
time: 41min 50s

In [14]:
# linear word-gram, tfidf

param_grid = {
    'vect__max_features': max_features, 
    'vect__ngram_range': ((1,2),),  
    'vect__analyzer' : ('word',),
    'tfidf__sublinear_tf' : (True, False),
    'tfidf__norm' : (None, 'l2'),
    'clf__C' : C,
}

m = tune (X_train, y_train_ohv, X_dev, y_dev_ohv, alg, param_grid, n_iter, roc_scorer, n_jobs = 6, verbose = True)


Best parameters set found:
{'vect__analyzer': 'word', 'vect__max_features': 5000, 'tfidf__sublinear_tf': True, 'tfidf__norm': 'l2', 'clf__C': 10, 'vect__ngram_range': (1, 2)} 0.944995533538


Grid scores:
0.90258 (+/-0.00000) for {'vect__analyzer': 'word', 'vect__max_features': 5000, 'tfidf__sublinear_tf': False, 'tfidf__norm': 'l2', 'clf__C': 0.01, 'vect__ngram_range': (1, 2)}
0.89977 (+/-0.00000) for {'vect__analyzer': 'word', 'vect__max_features': 100000, 'tfidf__sublinear_tf': False, 'tfidf__norm': 'l2', 'clf__C': 0.01, 'vect__ngram_range': (1, 2)}
0.90995 (+/-0.00000) for {'vect__analyzer': 'word', 'vect__max_features': 5000, 'tfidf__sublinear_tf': True, 'tfidf__norm': None, 'clf__C': 1, 'vect__ngram_range': (1, 2)}
0.93980 (+/-0.00000) for {'vect__analyzer': 'word', 'vect__max_features': 100000, 'tfidf__sublinear_tf': True, 'tfidf__norm': None, 'clf__C': 1, 'vect__ngram_range': (1, 2)}
0.75391 (+/-0.00000) for {'vect__analyzer': 'word', 'vect__max_features': 10000, 'tfidf__sublinear_tf': True, 'tfidf__norm': 'l2', 'clf__C': 0.0001, 'vect__ngram_range': (1, 2)}
0.93997 (+/-0.00000) for {'vect__analyzer': 'word', 'vect__max_features': 100000, 'tfidf__sublinear_tf': True, 'tfidf__norm': None, 'clf__C': 10, 'vect__ngram_range': (1, 2)}
0.93204 (+/-0.00000) for {'vect__analyzer': 'word', 'vect__max_features': 10000, 'tfidf__sublinear_tf': False, 'tfidf__norm': None, 'clf__C': 0.001, 'vect__ngram_range': (1, 2)}
0.92917 (+/-0.00000) for {'vect__analyzer': 'word', 'vect__max_features': 5000, 'tfidf__sublinear_tf': False, 'tfidf__norm': None, 'clf__C': 0.01, 'vect__ngram_range': (1, 2)}
0.94239 (+/-0.00000) for {'vect__analyzer': 'word', 'vect__max_features': 100000, 'tfidf__sublinear_tf': False, 'tfidf__norm': None, 'clf__C': 10, 'vect__ngram_range': (1, 2)}
0.93923 (+/-0.00000) for {'vect__analyzer': 'word', 'vect__max_features': 50000, 'tfidf__sublinear_tf': False, 'tfidf__norm': None, 'clf__C': 1, 'vect__ngram_range': (1, 2)}
0.94500 (+/-0.00000) for {'vect__analyzer': 'word', 'vect__max_features': 5000, 'tfidf__sublinear_tf': True, 'tfidf__norm': 'l2', 'clf__C': 10, 'vect__ngram_range': (1, 2)}
0.74456 (+/-0.00000) for {'vect__analyzer': 'word', 'vect__max_features': 5000, 'tfidf__sublinear_tf': False, 'tfidf__norm': 'l2', 'clf__C': 0.0001, 'vect__ngram_range': (1, 2)}
0.94012 (+/-0.00000) for {'vect__analyzer': 'word', 'vect__max_features': 50000, 'tfidf__sublinear_tf': True, 'tfidf__norm': 'l2', 'clf__C': 0.1, 'vect__ngram_range': (1, 2)}
0.92714 (+/-0.00000) for {'vect__analyzer': 'word', 'vect__max_features': 10000, 'tfidf__sublinear_tf': True, 'tfidf__norm': None, 'clf__C': 1, 'vect__ngram_range': (1, 2)}
0.94411 (+/-0.00000) for {'vect__analyzer': 'word', 'vect__max_features': 100000, 'tfidf__sublinear_tf': False, 'tfidf__norm': None, 'clf__C': 0.01, 'vect__ngram_range': (1, 2)}
time: 3min 9s

TFIDF improves the ROC score for both types of ngram models although it gives a bigger boost for the char-ngram models.

Tensorflow/Keras

Now we will cross-validate over model architectures (linear, mlp, lstm), ngram type (word, char), and label type (one hot or empirical distribution)

Linear and MLP

The mlp model class actually includes linear models (just set hidden layers to be empty)


In [9]:
alg = Pipeline([
    ('vect', CountVectorizer()),
    ('tfidf', TfidfTransformer()),
    ('to_dense', DenseTransformer()), 
    ('clf', KerasClassifier(build_fn=make_mlp, output_dim = 2, verbose=False)),
])

dependencies = [( 'vect__max_features', 'clf__input_dim')]


time: 2.57 ms

In [10]:
char_vec_params = {
    'vect__max_features': (5000, 10000, 30000), 
    'vect__ngram_range': ((1,5),),  
    'vect__analyzer' : ('char',)
    }

word_vect_params = {
    'vect__max_features': (5000, 10000, 30000), 
    'vect__ngram_range': ((1,2),),  
    'vect__analyzer' : ('word',)
    }

tfidf_params = {
    'tfidf__sublinear_tf' : (True, False),
    'tfidf__norm' : ('l2',),
    }

linear_clf_params = {
    'clf__alpha' : (0.000000001, 0.0000001, 0.00001, 0.001, 0.01),
    'clf__hidden_layer_sizes' : ((),),
    'clf__nb_epoch' : (2,4,8,16),
    'clf__batch_size': (200,)
    }

mlp_clf_params = {
    'clf__alpha' : (0.000000001, 0.0000001, 0.00001, 0.001, 0.01),
    'clf__hidden_layer_sizes' : ((50,), (50, 50), (50, 50, 50)),
    'clf__nb_epoch' : (2,4,8,16),
    'clf__batch_size': (200,)
    }


time: 6.66 ms

In [11]:
for model in ['linear', 'mlp']:
    for gram in ['word', 'char']:
        for label in ['oh', 'ed']:
            params = {}
            
            if model == 'linear':
                params.update(linear_clf_params)
            else:
                params.update(mlp_clf_params)
                
            params.update(tfidf_params)
                
            if gram == 'char':
                params.update(char_vec_params)
            else:
                params.update(word_vect_params)
                
            if label == 'oh':
                y_train = y_train_ohm
                y_dev = y_dev_ohm
            else:
                y_train = y_train_ed
                y_dev = y_dev_ed
            
            print('\n\n\n %s %s %s' % (model, gram, label))
            cv = tune (X_train, y_train, X_dev, y_dev,
                      alg, params,
                      n_iter,
                      roc_scorer,
                      n_jobs = 1,
                      verbose = True,
                      dependencies = dependencies)
            
            save_best_estimator(cv, path, '%s_%s_%s' % (model, gram, label))
            est = get_best_estimator(cv)
            est.fit(X_train, y_train)
            
            best_spearman = spearman_scorer(est, X_dev, y_dev_ed) * 100
            print ("\n best spearman: ", best_spearman)
            best_roc = max(cv.grid_scores_, key=lambda x: x[1])[1] * 100
            print ("\n best roc: ", best_roc)
            
            results_list.append({'model_type': model,
                                 'ngram_type': gram,
                                 'label_type' : label,
                                 'cv': cv.grid_scores_,
                                 'best_roc': round(best_roc, 3),
                                 'best_spearman': round(best_spearman, 3)
                                })




 linear word oh

Best parameters set found:
{'tfidf__sublinear_tf': True, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16} 0.949772916442


Grid scores:
0.94906 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-07, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.94941 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-07, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.88773 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.01, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.93372 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}
0.94977 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.84950 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 0.01, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 4}
0.90339 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 4}
0.87127 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.001, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 4}
0.94447 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.90389 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 0.01, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.93003 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-07, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}
0.90433 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.001, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}
0.94901 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.90490 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.001, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}
0.92965 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-09, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}

 best spearman:  55.5507647987

 best roc:  94.9772916442



 linear word ed

Best parameters set found:
{'tfidf__sublinear_tf': True, 'clf__alpha': 1e-07, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16} 0.957567473524


Grid scores:
0.95757 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-07, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.95700 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-07, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.88955 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.01, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.94717 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}
0.95446 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.89384 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 0.01, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 4}
0.92867 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 4}
0.90537 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.001, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 4}
0.94883 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.90742 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 0.01, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.94291 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-07, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}
0.91432 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.001, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}
0.95414 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.91373 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.001, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}
0.94316 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-09, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}

 best spearman:  65.3323951301

 best roc:  95.7567473524



 linear char oh

Best parameters set found:
{'tfidf__sublinear_tf': True, 'clf__alpha': 1e-07, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16} 0.963551611824


Grid scores:
0.96355 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-07, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.96151 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-07, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.84921 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.01, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.94230 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}
0.95804 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.85020 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 0.01, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 4}
0.91893 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 4}
0.86915 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.001, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 4}
0.94787 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.88766 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 0.01, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.94499 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-07, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}
0.88992 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.001, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}
0.95420 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.89227 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.001, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}
0.94489 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-09, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}

 best spearman:  60.0834022181

 best roc:  96.3551611824



 linear char ed

Best parameters set found:
{'tfidf__sublinear_tf': True, 'clf__alpha': 1e-07, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16} 0.964292927646


Grid scores:
0.96429 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-07, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.96371 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-07, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.86595 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.01, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.95135 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}
0.95840 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.87355 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 0.01, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 4}
0.93123 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 4}
0.89752 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.001, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 4}
0.95124 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.89797 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 0.01, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.94826 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-07, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}
0.90485 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.001, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}
0.95652 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 16}
0.90776 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.001, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}
0.94815 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-09, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (), 'clf__nb_epoch': 8}

 best spearman:  67.5765309648

 best roc:  96.4292927646



 mlp word oh

Best parameters set found:
{'tfidf__sublinear_tf': True, 'clf__alpha': 0.001, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 8} 0.953107110233


Grid scores:
0.94675 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 2}
0.92688 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 16}
0.92134 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-07, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 16}
0.94391 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-05, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 2}
0.88535 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.01, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 4}
0.95130 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.001, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 4}
0.94359 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-05, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 2}
0.50000 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.01, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 16}
0.94446 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50,), 'clf__nb_epoch': 4}
0.93153 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 16}
0.95311 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 0.001, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 8}
0.95075 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 0.001, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 2}
0.88224 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.01, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50,), 'clf__nb_epoch': 2}
0.93853 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-07, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50,), 'clf__nb_epoch': 8}
0.92049 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 16}

 best spearman:  57.0471278323

 best roc:  95.3107110233



 mlp word ed

Best parameters set found:
{'tfidf__sublinear_tf': True, 'clf__alpha': 0.001, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 8} 0.953535689532


Grid scores:
0.95325 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 2}
0.94452 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 16}
0.92940 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-07, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 16}
0.94888 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-05, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 2}
0.88775 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.01, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 4}
0.95112 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.001, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 4}
0.94792 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-05, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 2}
0.50000 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.01, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 16}
0.95071 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50,), 'clf__nb_epoch': 4}
0.93566 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 16}
0.95354 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 0.001, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 8}
0.95058 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 0.001, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 2}
0.88806 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.01, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50,), 'clf__nb_epoch': 2}
0.94923 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-07, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50,), 'clf__nb_epoch': 8}
0.93740 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 2), 'vect__analyzer': 'word', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 16}

 best spearman:  65.2198677035

 best roc:  95.3535689532



 mlp char oh

Best parameters set found:
{'tfidf__sublinear_tf': True, 'clf__alpha': 0.001, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 8} 0.95946687607


Grid scores:
0.95880 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 2}
0.94531 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 16}
0.93016 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-07, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 16}
0.95433 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-05, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 2}
0.85381 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.01, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 4}
0.95221 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.001, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 4}
0.95372 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-05, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 2}
0.50000 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.01, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 16}
0.95358 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50,), 'clf__nb_epoch': 4}
0.93902 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 16}
0.95947 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 0.001, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 8}
0.95322 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 0.001, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 2}
0.83712 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.01, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50,), 'clf__nb_epoch': 2}
0.95291 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-07, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50,), 'clf__nb_epoch': 8}
0.92803 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 16}

 best spearman:  60.6151311614

 best roc:  95.946687607



 mlp char ed

Best parameters set found:
{'tfidf__sublinear_tf': False, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 2} 0.959822788986


Grid scores:
0.95982 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 2}
0.95249 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 16}
0.94099 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-07, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 16}
0.95550 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-05, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 2}
0.84392 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.01, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 4}
0.95328 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.001, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 4}
0.95445 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-05, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 2}
0.50000 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.01, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 16}
0.95571 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50,), 'clf__nb_epoch': 4}
0.94547 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 1e-05, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 16}
0.95692 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 0.001, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 8}
0.95184 (+/-0.00000) for {'tfidf__sublinear_tf': True, 'clf__alpha': 0.001, 'vect__max_features': 30000, 'clf__input_dim': 30000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50), 'clf__nb_epoch': 2}
0.86378 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 0.01, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50,), 'clf__nb_epoch': 2}
0.95827 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-07, 'vect__max_features': 5000, 'clf__input_dim': 5000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50,), 'clf__nb_epoch': 8}
0.94134 (+/-0.00000) for {'tfidf__sublinear_tf': False, 'clf__alpha': 1e-09, 'vect__max_features': 10000, 'clf__input_dim': 10000, 'clf__batch_size': 200, 'vect__ngram_range': (1, 5), 'vect__analyzer': 'char', 'tfidf__norm': 'l2', 'clf__hidden_layer_sizes': (50, 50, 50), 'clf__nb_epoch': 16}

 best spearman:  67.1037518194

 best roc:  95.9822788986
time: 8h 27min 22s

In [20]:
results_df = pd.DataFrame(results_list)


time: 2.6 ms

In [21]:
results_df


Out[21]:
best_roc best_spearman cv label_type model_type ngram_type
0 94.977 55.551 [({'tfidf__sublinear_tf': True, 'clf__alpha': ... oh linear word
1 95.757 65.332 [({'tfidf__sublinear_tf': True, 'clf__alpha': ... ed linear word
2 96.355 60.083 [({'tfidf__sublinear_tf': True, 'clf__alpha': ... oh linear char
3 96.429 67.577 [({'tfidf__sublinear_tf': True, 'clf__alpha': ... ed linear char
4 95.311 57.047 [({'tfidf__sublinear_tf': False, 'clf__alpha':... oh mlp word
5 95.354 65.220 [({'tfidf__sublinear_tf': False, 'clf__alpha':... ed mlp word
6 95.947 60.615 [({'tfidf__sublinear_tf': False, 'clf__alpha':... oh mlp char
7 95.982 67.104 [({'tfidf__sublinear_tf': False, 'clf__alpha':... ed mlp char
time: 98.5 ms

In [30]:
grid_scores[0].mean_validation_score


Out[30]:
0.94905679423161959
time: 2.29 ms

In [32]:
grid_scores = results_df['cv'][0]
max(grid_scores, key = lambda x: x.mean_validation_score).parameters


Out[32]:
{'clf__alpha': 1e-05,
 'clf__batch_size': 200,
 'clf__hidden_layer_sizes': (),
 'clf__input_dim': 10000,
 'clf__nb_epoch': 16,
 'tfidf__norm': 'l2',
 'tfidf__sublinear_tf': True,
 'vect__analyzer': 'word',
 'vect__max_features': 10000,
 'vect__ngram_range': (1, 2)}
time: 3.34 ms

In [33]:
import json

def get_best_params(grid_scores):
    return json.dumps(max(grid_scores, key = lambda x: x.mean_validation_score).parameters)

results_df['best_params'] = results_df['cv'].apply(get_best_params)


time: 6.74 ms

In [34]:
results_df.to_csv('cv_results.csv')


time: 6.51 ms

LSTM


In [86]:
alg = Pipeline([
    ('seq', SequenceTransformer()),
    ('clf', KerasClassifier(build_fn=make_lstm, output_dim = 2, verbose=True)),
])

dependencies = [( 'seq__max_features', 'clf__max_features'),
                ( 'seq__max_len', 'clf__max_len')]


time: 138 ms

In [87]:
word_seq_params = {
    'seq__max_features' : (5000, 10000, 30000),
    'seq__max_len' : (100, 200, 500),
    'seq__analyzer' : ('word',)
}

char_seq_params = {
    'seq__max_features' : (100,),
    'seq__max_len' : (200, 500, 1000),
    'seq__analyzer' : ('char',)
}

clf_params = {
    'clf__dropout' : (0.1, 0.2, 0.4),
    'clf__embedding_size' : (64, 128),
    'clf__lstm_output_size': (64, 128),
    'clf__nb_epoch' : (2,3,4),
    'clf__batch_size': (200,)
}


time: 104 ms

In [88]:
from pprint import pprint


time: 27.7 ms

In [89]:
model = 'lstm'
for gram in ['word', 'char']:
    for label in ['oh', 'ed']:
        params = {}
        params.update(clf_params)

        if gram == 'char':
            params.update(char_seq_params)
        else:
            params.update(word_seq_params)

        if label == 'oh':
            y_train = y_train_ohm
            y_dev = y_dev_ohm
        else:
            y_train = y_train_ed
            y_dev = y_dev_ed
            
        pprint(params)

        print('\n\n\n %s %s %s' % (model, gram, label))
        cv = tune (X_train, y_train, X_dev, y_dev,
                  alg, params,
                  n_iter,
                  roc_scorer,
                  n_jobs = 1,
                  verbose = True,
                  dependencies = dependencies)

        save_best_estimator(cv, path, '%s_%s_%s' % (model, gram, label))
        est = get_best_estimator(cv)
        est.fit(X_train, y_train)
        
        best_spearman = spearman_scorer(est, X_dev, y_dev_ed) * 100
        print ("\n best spearman: ", best_spearman)
        best_roc = max(cv.grid_scores_, key=lambda x: x[1])[1] * 100
        print ("\n best roc: ", best_roc)

        results_list.append({'model_type': model,
                             'ngram_type': gram,
                             'label_type' : label,
                             'cv': cv.grid_scores_,
                             'best_roc': round(best_roc, 3),
                             'best_spearman': round(best_spearman, 3)
                            })


{'clf__batch_size': (200,),
 'clf__dropout': (0.1, 0.2, 0.4),
 'clf__embedding_size': (64, 128),
 'clf__lstm_output_size': (64, 128),
 'clf__nb_epoch': (2, 3, 4),
 'seq__analyzer': ('word',),
 'seq__max_features': (5000, 10000, 30000),
 'seq__max_len': (100, 200, 500)}



 lstm word oh
Epoch 1/2
69490/69490 [==============================] - 260s - loss: 0.2296 - mean_squared_error: 0.0650   
Epoch 2/2
69490/69490 [==============================] - 283s - loss: 0.1254 - mean_squared_error: 0.0349   
23170/23170 [==============================] - 42s    
Epoch 1/2
69490/69490 [==============================] - 1269s - loss: 0.2400 - mean_squared_error: 0.0680  
Epoch 2/2
69490/69490 [==============================] - 1281s - loss: 0.1510 - mean_squared_error: 0.0417  
23170/23170 [==============================] - 156s   
Epoch 1/2
69490/69490 [==============================] - 276s - loss: 0.3232 - mean_squared_error: 0.0938   
Epoch 2/2
69490/69490 [==============================] - 312s - loss: 0.1747 - mean_squared_error: 0.0479   
23170/23170 [==============================] - 57s    
Epoch 1/2
69490/69490 [==============================] - 689s - loss: 0.2768 - mean_squared_error: 0.0797   
Epoch 2/2
69490/69490 [==============================] - 677s - loss: 0.1569 - mean_squared_error: 0.0429   
23170/23170 [==============================] - 94s    
Epoch 1/3
69490/69490 [==============================] - 939s - loss: 0.3656 - mean_squared_error: 0.1090   
Epoch 2/3
69490/69490 [==============================] - 958s - loss: 0.2330 - mean_squared_error: 0.0646   
Epoch 3/3
69490/69490 [==============================] - 926s - loss: 0.2011 - mean_squared_error: 0.0554   
23170/23170 [==============================] - 143s   
Epoch 1/4
69490/69490 [==============================] - 547s - loss: 0.2377 - mean_squared_error: 0.0673   
Epoch 2/4
69490/69490 [==============================] - 521s - loss: 0.1353 - mean_squared_error: 0.0377   
Epoch 3/4
69490/69490 [==============================] - 580s - loss: 0.1069 - mean_squared_error: 0.0301   
Epoch 4/4
69490/69490 [==============================] - 583s - loss: 0.0984 - mean_squared_error: 0.0276   
23170/23170 [==============================] - 108s   
Epoch 1/2
69490/69490 [==============================] - 1480s - loss: 0.2406 - mean_squared_error: 0.0683  
Epoch 2/2
69490/69490 [==============================] - 1462s - loss: 0.1438 - mean_squared_error: 0.0396  
23170/23170 [==============================] - 247s   
Epoch 1/4
69490/69490 [==============================] - 332s - loss: 0.2591 - mean_squared_error: 0.0735   
Epoch 2/4
69490/69490 [==============================] - 322s - loss: 0.1521 - mean_squared_error: 0.0421   
Epoch 3/4
69490/69490 [==============================] - 284s - loss: 0.1375 - mean_squared_error: 0.0381   
Epoch 4/4
69490/69490 [==============================] - 281s - loss: 0.1379 - mean_squared_error: 0.0383   
23170/23170 [==============================] - 89s    
Epoch 1/2
69490/69490 [==============================] - 402s - loss: 0.2273 - mean_squared_error: 0.0643   
Epoch 2/2
69490/69490 [==============================] - 396s - loss: 0.1297 - mean_squared_error: 0.0361   
23170/23170 [==============================] - 100s   
Epoch 1/4
69490/69490 [==============================] - 168s - loss: 0.3410 - mean_squared_error: 0.0995   
Epoch 2/4
69490/69490 [==============================] - 163s - loss: 0.1804 - mean_squared_error: 0.0491   
Epoch 3/4
69490/69490 [==============================] - 178s - loss: 0.1427 - mean_squared_error: 0.0387   
Epoch 4/4
69490/69490 [==============================] - 187s - loss: 0.1247 - mean_squared_error: 0.0345   
23170/23170 [==============================] - 72s    
Epoch 1/4
69490/69490 [==============================] - 490s - loss: 0.2478 - mean_squared_error: 0.0703   
Epoch 2/4
69490/69490 [==============================] - 444s - loss: 0.1485 - mean_squared_error: 0.0413   
Epoch 3/4
69490/69490 [==============================] - 435s - loss: 0.1376 - mean_squared_error: 0.0381   
Epoch 4/4
69490/69490 [==============================] - 431s - loss: 0.1302 - mean_squared_error: 0.0366   
23170/23170 [==============================] - 104s   
Epoch 1/4
69490/69490 [==============================] - 485s - loss: 0.2291 - mean_squared_error: 0.0648   
Epoch 2/4
69490/69490 [==============================] - 436s - loss: 0.1344 - mean_squared_error: 0.0376   
Epoch 3/4
69490/69490 [==============================] - 434s - loss: 0.1165 - mean_squared_error: 0.0327   
Epoch 4/4
69490/69490 [==============================] - 432s - loss: 0.1114 - mean_squared_error: 0.0317   
23170/23170 [==============================] - 113s   
Epoch 1/2
69490/69490 [==============================] - 1172s - loss: 0.4676 - mean_squared_error: 0.1481  
Epoch 2/2
69490/69490 [==============================] - 1097s - loss: 0.3693 - mean_squared_error: 0.1072  
23170/23170 [==============================] - 200s   
Epoch 1/4
69490/69490 [==============================] - 1180s - loss: 0.4272 - mean_squared_error: 0.1312  
Epoch 2/4
69490/69490 [==============================] - 1094s - loss: 0.3459 - mean_squared_error: 0.0996  
Epoch 3/4
69490/69490 [==============================] - 1088s - loss: 0.3266 - mean_squared_error: 0.0940  
Epoch 4/4
69490/69490 [==============================] - 1085s - loss: 0.3106 - mean_squared_error: 0.0887  
23170/23170 [==============================] - 227s   
Epoch 1/3
69490/69490 [==============================] - 327s - loss: 0.2177 - mean_squared_error: 0.0614   
Epoch 2/3
69490/69490 [==============================] - 251s - loss: 0.1318 - mean_squared_error: 0.0369   
Epoch 3/3
69490/69490 [==============================] - 251s - loss: 0.1192 - mean_squared_error: 0.0334   
23170/23170 [==============================] - 112s   

Best parameters set found:
{'clf__lstm_output_size': 128, 'seq__max_len': 100, 'seq__max_features': 30000, 'clf__embedding_size': 64, 'clf__dropout': 0.1, 'clf__nb_epoch': 2, 'clf__batch_size': 200, 'clf__max_len': 100, 'seq__analyzer': 'word', 'clf__max_features': 30000} 0.956421659086


Grid scores:
0.95642 (+/-0.00000) for {'clf__lstm_output_size': 128, 'seq__max_len': 100, 'seq__max_features': 30000, 'clf__embedding_size': 64, 'clf__dropout': 0.1, 'clf__nb_epoch': 2, 'clf__batch_size': 200, 'clf__max_len': 100, 'seq__analyzer': 'word', 'clf__max_features': 30000}
0.94997 (+/-0.00000) for {'clf__lstm_output_size': 128, 'seq__max_len': 500, 'seq__max_features': 5000, 'clf__embedding_size': 64, 'clf__dropout': 0.1, 'clf__nb_epoch': 2, 'clf__batch_size': 200, 'clf__max_len': 500, 'seq__analyzer': 'word', 'clf__max_features': 5000}
0.93990 (+/-0.00000) for {'clf__lstm_output_size': 128, 'seq__max_len': 100, 'seq__max_features': 10000, 'clf__embedding_size': 64, 'clf__dropout': 0.4, 'clf__nb_epoch': 2, 'clf__batch_size': 200, 'clf__max_len': 100, 'seq__analyzer': 'word', 'clf__max_features': 10000}
0.95136 (+/-0.00000) for {'clf__lstm_output_size': 64, 'seq__max_len': 500, 'seq__max_features': 5000, 'clf__embedding_size': 64, 'clf__dropout': 0.2, 'clf__nb_epoch': 2, 'clf__batch_size': 200, 'clf__max_len': 500, 'seq__analyzer': 'word', 'clf__max_features': 5000}
0.91441 (+/-0.00000) for {'clf__lstm_output_size': 64, 'seq__max_len': 500, 'seq__max_features': 5000, 'clf__embedding_size': 128, 'clf__dropout': 0.4, 'clf__nb_epoch': 3, 'clf__batch_size': 200, 'clf__max_len': 500, 'seq__analyzer': 'word', 'clf__max_features': 5000}
0.95298 (+/-0.00000) for {'clf__lstm_output_size': 128, 'seq__max_len': 200, 'seq__max_features': 30000, 'clf__embedding_size': 128, 'clf__dropout': 0.2, 'clf__nb_epoch': 4, 'clf__batch_size': 200, 'clf__max_len': 200, 'seq__analyzer': 'word', 'clf__max_features': 30000}
0.95329 (+/-0.00000) for {'clf__lstm_output_size': 128, 'seq__max_len': 500, 'seq__max_features': 5000, 'clf__embedding_size': 128, 'clf__dropout': 0.2, 'clf__nb_epoch': 2, 'clf__batch_size': 200, 'clf__max_len': 500, 'seq__analyzer': 'word', 'clf__max_features': 5000}
0.94681 (+/-0.00000) for {'clf__lstm_output_size': 64, 'seq__max_len': 200, 'seq__max_features': 5000, 'clf__embedding_size': 64, 'clf__dropout': 0.2, 'clf__nb_epoch': 4, 'clf__batch_size': 200, 'clf__max_len': 200, 'seq__analyzer': 'word', 'clf__max_features': 5000}
0.95531 (+/-0.00000) for {'clf__lstm_output_size': 64, 'seq__max_len': 200, 'seq__max_features': 30000, 'clf__embedding_size': 128, 'clf__dropout': 0.1, 'clf__nb_epoch': 2, 'clf__batch_size': 200, 'clf__max_len': 200, 'seq__analyzer': 'word', 'clf__max_features': 30000}
0.94428 (+/-0.00000) for {'clf__lstm_output_size': 64, 'seq__max_len': 100, 'seq__max_features': 30000, 'clf__embedding_size': 64, 'clf__dropout': 0.4, 'clf__nb_epoch': 4, 'clf__batch_size': 200, 'clf__max_len': 100, 'seq__analyzer': 'word', 'clf__max_features': 30000}
0.94213 (+/-0.00000) for {'clf__lstm_output_size': 128, 'seq__max_len': 200, 'seq__max_features': 5000, 'clf__embedding_size': 64, 'clf__dropout': 0.2, 'clf__nb_epoch': 4, 'clf__batch_size': 200, 'clf__max_len': 200, 'seq__analyzer': 'word', 'clf__max_features': 5000}
0.94981 (+/-0.00000) for {'clf__lstm_output_size': 128, 'seq__max_len': 200, 'seq__max_features': 10000, 'clf__embedding_size': 64, 'clf__dropout': 0.1, 'clf__nb_epoch': 4, 'clf__batch_size': 200, 'clf__max_len': 200, 'seq__analyzer': 'word', 'clf__max_features': 10000}
0.83035 (+/-0.00000) for {'clf__lstm_output_size': 128, 'seq__max_len': 500, 'seq__max_features': 30000, 'clf__embedding_size': 64, 'clf__dropout': 0.4, 'clf__nb_epoch': 2, 'clf__batch_size': 200, 'clf__max_len': 500, 'seq__analyzer': 'word', 'clf__max_features': 30000}
0.82588 (+/-0.00000) for {'clf__lstm_output_size': 128, 'seq__max_len': 500, 'seq__max_features': 10000, 'clf__embedding_size': 64, 'clf__dropout': 0.4, 'clf__nb_epoch': 4, 'clf__batch_size': 200, 'clf__max_len': 500, 'seq__analyzer': 'word', 'clf__max_features': 10000}
0.95061 (+/-0.00000) for {'clf__lstm_output_size': 128, 'seq__max_len': 100, 'seq__max_features': 10000, 'clf__embedding_size': 128, 'clf__dropout': 0.1, 'clf__nb_epoch': 3, 'clf__batch_size': 200, 'clf__max_len': 100, 'seq__analyzer': 'word', 'clf__max_features': 10000}
Epoch 1/2
69490/69490 [==============================] - 299s - loss: 0.2313 - mean_squared_error: 0.0653   
Epoch 2/2
69490/69490 [==============================] - 218s - loss: 0.1268 - mean_squared_error: 0.0353   
23170/23170 [==============================] - 111s   

spearman:  0.535233011975
{'clf__batch_size': (200,),
 'clf__dropout': (0.1, 0.2, 0.4),
 'clf__embedding_size': (64, 128),
 'clf__lstm_output_size': (64, 128),
 'clf__nb_epoch': (2, 3, 4),
 'seq__analyzer': ('word',),
 'seq__max_features': (5000, 10000, 30000),
 'seq__max_len': (100, 200, 500)}



 lstm word ed
Epoch 1/2
69490/69490 [==============================] - 396s - loss: 0.2366 - mean_squared_error: 0.0730   
Epoch 2/2
69490/69490 [==============================] - 345s - loss: 0.1355 - mean_squared_error: 0.0352   
23170/23170 [==============================] - 165s   
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-89-83a6487fbafc> in <module>()
     26                   n_jobs = 1,
     27                   verbose = True,
---> 28                   dependencies = dependencies)
     29 
     30         save_best_estimator(cv, path, '%s_%s_%s' % (model, gram, label))

/Users/ellerywulczyn/detox/src/modeling/ngram.py in tune(X_train, y_train, X_dev, y_dev, alg, param_grid, n_iter, scoring, n_jobs, verbose, dependencies)
     78                                 refit=False,
     79                                 dependencies=dependencies)
---> 80     model.fit(X, y)
     81     if verbose:
     82         print("\nBest parameters set found:")

/Users/ellerywulczyn/detox/src/modeling/ngram.py in fit(self, X, y)
     44 
     45 
---> 46         return self._fit(X, y, sampled_params)
     47 
     48 

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/grid_search.py in _fit(self, X, y, parameter_iterable)
    551                                     self.fit_params, return_parameters=True,
    552                                     error_score=self.error_score)
--> 553                 for parameters in parameter_iterable
    554                 for train, test in cv)
    555 

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
    798             # was dispatched. In particular this covers the edge
    799             # case of Parallel used with an exhausted iterator.
--> 800             while self.dispatch_one_batch(iterator):
    801                 self._iterating = True
    802             else:

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
    656                 return False
    657             else:
--> 658                 self._dispatch(tasks)
    659                 return True
    660 

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
    564 
    565         if self._pool is None:
--> 566             job = ImmediateComputeBatch(batch)
    567             self._jobs.append(job)
    568             self.n_dispatched_batches += 1

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in __init__(self, batch)
    178         # Don't delay the application, to avoid keeping the input
    179         # arguments in memory
--> 180         self.results = batch()
    181 
    182     def get(self):

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in __call__(self)
     70 
     71     def __call__(self):
---> 72         return [func(*args, **kwargs) for func, args, kwargs in self.items]
     73 
     74     def __len__(self):

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
     70 
     71     def __call__(self):
---> 72         return [func(*args, **kwargs) for func, args, kwargs in self.items]
     73 
     74     def __len__(self):

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/cross_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, error_score)
   1529             estimator.fit(X_train, **fit_params)
   1530         else:
-> 1531             estimator.fit(X_train, y_train, **fit_params)
   1532 
   1533     except Exception as e:

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
    163         """
    164         Xt, fit_params = self._pre_transform(X, y, **fit_params)
--> 165         self.steps[-1][-1].fit(Xt, y, **fit_params)
    166         return self
    167 

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/keras/wrappers/scikit_learn.py in fit(self, X, y, **kwargs)
    135                 **self.filter_sk_params(self.build_fn.__call__))
    136         else:
--> 137             self.model = self.build_fn(**self.filter_sk_params(self.build_fn))
    138 
    139         loss_name = self.model.loss

/Users/ellerywulczyn/detox/src/modeling/deep_learning.py in make_lstm(max_features, output_dim, max_len, embedding_size, lstm_output_size, dropout)
     74     model.compile(loss='kullback_leibler_divergence',
     75                   optimizer='adam',
---> 76                   metrics=['mse'])
     77     return model
     78 

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/keras/models.py in compile(self, optimizer, loss, metrics, sample_weight_mode, **kwargs)
    522                 "None" defaults to sample-wise weights (1D).
    523             kwargs: for Theano backend, these are passed into K.function.
--> 524                 Ignored for Tensorflow backend.
    525 
    526         # Example

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/keras/engine/training.py in compile(self, optimizer, loss, metrics, loss_weights, sample_weight_mode, **kwargs)
    485         self.sample_weight_mode = sample_weight_mode
    486         self.loss = loss
--> 487         self.loss_weights = loss_weights
    488 
    489         # prepare loss weights

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/keras/optimizers.py in get(identifier, kwargs)
    531         ms = [K.zeros(shape) for shape in shapes]
    532         vs = [K.zeros(shape) for shape in shapes]
--> 533 
    534         self.weights = [self.iterations] + ms + vs
    535 

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/keras/utils/generic_utils.py in get_from_module(identifier, module_params, module_name, instantiate, kwargs)
     14         if not res:
     15             raise Exception('Invalid ' + str(module_name) + ': ' +
---> 16                             str(identifier))
     17         if instantiate and not kwargs:
     18             return res()

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/keras/optimizers.py in __init__(self, lr, beta_1, beta_2, epsilon, **kwargs)
    335             # apply constraints
    336             if p in constraints:
--> 337                 c = constraints[p]
    338                 new_p = c(new_p)
    339             self.updates.append(K.update(p, new_p))

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py in variable(value, dtype, name)
    130 
    131 
--> 132 def variable(value, dtype=_FLOATX, name=None):
    133     '''Instantiates a tensor.
    134 

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    380     try:
    381       result = self._run(None, fetches, feed_dict, options_ptr,
--> 382                          run_metadata_ptr)
    383       if run_metadata:
    384         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    653     movers = self._update_with_movers(feed_dict_string, feed_map)
    654     results = self._do_run(handle, target_list, unique_fetches,
--> 655                            feed_dict_string, options, run_metadata)
    656 
    657     # User may have fetched the same tensor multiple times, but we

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
    721     if handle is None:
    722       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
--> 723                            target_list, options, run_metadata)
    724     else:
    725       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
    728   def _do_call(self, fn, *args):
    729     try:
--> 730       return fn(*args)
    731     except errors.OpError as e:
    732       message = compat.as_text(e.message)

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
    710         return tf_session.TF_Run(session, options,
    711                                  feed_dict, fetch_list, target_list,
--> 712                                  status, run_metadata)
    713 
    714     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: 
time: 19h 51min 17s

Conv LSTM


In [81]:
alg = Pipeline([
    ('seq', SequenceTransformer()),
    ('clf', KerasClassifier(build_fn=make_conv_lstm, output_dim = 2, verbose=True)),
])

dependencies = [( 'seq__max_features', 'clf__max_features'),
                ( 'seq__max_len', 'clf__max_len')]


time: 2.3 ms

In [84]:
word_seq_params = {
    'seq__max_features' : (5000, 10000, 30000),
    'seq__max_len' : (100, 200, 500),
    'seq__analyzer' : ('word',),
    'clf__filter_length': (2, 4, 6),
    'clf__pool_length' : (2, 4, 6)
}

char_seq_params = {
    'seq__max_features' : (100,),
    'seq__max_len' : (200, 500, 1000),
    'seq__analyzer' : ('char',),
    'clf__filter_length': (5, 10, 15),
    'clf__pool_length' : (5, 10, 15)
}

clf_params = {
    'clf__dropout' : (0.1, 0.2, 0.4),
    'clf__embedding_size' : (64, 128),
    'clf__lstm_output_size': (64, 128),
    'clf__nb_epoch' : (2,3,4),
    'clf__batch_size': (200,),
    'clf__nb_filter' : (64, 128),
    
}


time: 6.62 ms

In [85]:
model = 'conv_lstm'
for gram in ['word', 'char']:
    for label in ['oh', 'ed']:
        params = {}
        params.update(clf_params)

        if gram == 'char':
            params.update(char_seq_params)
        else:
            params.update(word_seq_params)

        if label == 'oh':
            y_train = y_train_ohm
            y_dev = y_dev_ohm
        else:
            y_train = y_train_ed
            y_dev = y_dev_ed
            
        pprint(params)

        print('\n\n\n %s %s %s' % (model, gram, label))
        cv = tune (X_train, y_train, X_dev, y_dev,
                  alg, params,
                  n_iter,
                  roc_scorer,
                  n_jobs = 1,
                  verbose = True,
                  dependencies = dependencies)

        save_best_estimator(cv, path, '%s_%s_%s' % (model, gram, label))
        est = get_best_estimator(cv)
        est.fit(X_train, y_train)
        
        best_spearman = spearman_scorer(est, X_dev, y_dev_ed) * 100
        print ("\n best spearman: ", best_spearman)
        best_roc = max(cv.grid_scores_, key=lambda x: x[1])[1] * 100
        print ("\n best roc: ", best_roc)

        results_list.append({'model_type': model,
                             'ngram_type': gram,
                             'label_type' : label,
                             'cv': cv.grid_scores_,
                             'best_roc': round(best_roc, 3),
                             'best_spearman': round(best_spearman, 3)
                            })


{'clf__batch_size': (200,),
 'clf__dropout': (0.1, 0.2, 0.4),
 'clf__embedding_size': (64, 128),
 'clf__filter_length': (5, 10, 15),
 'clf__lstm_output_size': (64, 128),
 'clf__nb_epoch': (2, 3, 4),
 'clf__nb_filter': (64, 128),
 'clf__pool_length': (5, 10, 15),
 'seq__analyzer': ('char',),
 'seq__max_features': (100,),
 'seq__max_len': (500,)}



 conv_lstm char oh
Epoch 1/2
 7800/69490 [==>...........................] - ETA: 535s - loss: 0.3815 - acc: 0.8628
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-85-63c913bdd03c> in <module>()
     28                   n_jobs = 1,
     29                   verbose = True,
---> 30                   dependencies = dependencies)
     31 
     32         save_best_estimator(cv, path, '%s_%s_%s' % (model, gram, label))

/Users/ellerywulczyn/detox/src/modeling/ngram.py in tune(X_train, y_train, X_dev, y_dev, alg, param_grid, n_iter, scoring, n_jobs, verbose, dependencies)
     78                                 refit=False,
     79                                 dependencies=dependencies)
---> 80     model.fit(X, y)
     81     if verbose:
     82         print("\nBest parameters set found:")

/Users/ellerywulczyn/detox/src/modeling/ngram.py in fit(self, X, y)
     44 
     45 
---> 46         return self._fit(X, y, sampled_params)
     47 
     48 

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/grid_search.py in _fit(self, X, y, parameter_iterable)
    551                                     self.fit_params, return_parameters=True,
    552                                     error_score=self.error_score)
--> 553                 for parameters in parameter_iterable
    554                 for train, test in cv)
    555 

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
    798             # was dispatched. In particular this covers the edge
    799             # case of Parallel used with an exhausted iterator.
--> 800             while self.dispatch_one_batch(iterator):
    801                 self._iterating = True
    802             else:

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
    656                 return False
    657             else:
--> 658                 self._dispatch(tasks)
    659                 return True
    660 

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
    564 
    565         if self._pool is None:
--> 566             job = ImmediateComputeBatch(batch)
    567             self._jobs.append(job)
    568             self.n_dispatched_batches += 1

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in __init__(self, batch)
    178         # Don't delay the application, to avoid keeping the input
    179         # arguments in memory
--> 180         self.results = batch()
    181 
    182     def get(self):

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in __call__(self)
     70 
     71     def __call__(self):
---> 72         return [func(*args, **kwargs) for func, args, kwargs in self.items]
     73 
     74     def __len__(self):

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
     70 
     71     def __call__(self):
---> 72         return [func(*args, **kwargs) for func, args, kwargs in self.items]
     73 
     74     def __len__(self):

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/cross_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, error_score)
   1529             estimator.fit(X_train, **fit_params)
   1530         else:
-> 1531             estimator.fit(X_train, y_train, **fit_params)
   1532 
   1533     except Exception as e:

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
    163         """
    164         Xt, fit_params = self._pre_transform(X, y, **fit_params)
--> 165         self.steps[-1][-1].fit(Xt, y, **fit_params)
    166         return self
    167 

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/keras/wrappers/scikit_learn.py in fit(self, X, y, **kwargs)
    146         fit_args.update(kwargs)
    147 
--> 148         history = self.model.fit(X, y, **fit_args)
    149 
    150         return history

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/keras/models.py in fit(self, x, y, batch_size, nb_epoch, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, **kwargs)
    595                               shuffle=shuffle,
    596                               class_weight=class_weight,
--> 597                               sample_weight=sample_weight)
    598 
    599     def evaluate(self, x, y, batch_size=32, verbose=1,

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, nb_epoch, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight)
   1105                               verbose=verbose, callbacks=callbacks,
   1106                               val_f=val_f, val_ins=val_ins, shuffle=shuffle,
-> 1107                               callback_metrics=callback_metrics)
   1108 
   1109     def evaluate(self, x, y, batch_size=32, verbose=1, sample_weight=None):

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/keras/engine/training.py in _fit_loop(self, f, ins, out_labels, batch_size, nb_epoch, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics)
    823                 batch_logs['size'] = len(batch_ids)
    824                 callbacks.on_batch_begin(batch_index, batch_logs)
--> 825                 outs = f(ins_batch)
    826                 if type(outs) != list:
    827                     outs = [outs]

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
    932         feed_dict = dict(zip(names, inputs))
    933         session = get_session()
--> 934         updated = session.run(self.outputs + [self.updates_op], feed_dict=feed_dict)
    935         return updated[:len(self.outputs)]
    936 

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    380     try:
    381       result = self._run(None, fetches, feed_dict, options_ptr,
--> 382                          run_metadata_ptr)
    383       if run_metadata:
    384         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    653     movers = self._update_with_movers(feed_dict_string, feed_map)
    654     results = self._do_run(handle, target_list, unique_fetches,
--> 655                            feed_dict_string, options, run_metadata)
    656 
    657     # User may have fetched the same tensor multiple times, but we

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
    721     if handle is None:
    722       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
--> 723                            target_list, options, run_metadata)
    724     else:
    725       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
    728   def _do_call(self, fn, *args):
    729     try:
--> 730       return fn(*args)
    731     except errors.OpError as e:
    732       message = compat.as_text(e.message)

/Users/ellerywulczyn/miniconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
    710         return tf_session.TF_Run(session, options,
    711                                  feed_dict, fetch_list, target_list,
--> 712                                  status, run_metadata)
    713 
    714     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: 
time: 13min 37s

In [ ]: