In [1]:
import pandas as pd
import tensorflow as tf
from bayes_opt import BayesianOptimization
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
import re


/usr/local/lib/python3.5/dist-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

In [2]:
def clearstring(string):
    string = re.sub('[^A-Za-z0-9 ]+', '', string)
    string = string.split(' ')
    string = filter(None, string)
    string = [y.strip() for y in string]
    length = len(string)
    string = ' '.join(string)
    return string.lower()

In [3]:
with open('rt-polarity.neg', 'r') as fopen:
    negatives = fopen.read().split('\n')
with open('rt-polarity.pos', 'r') as fopen:
    positives = fopen.read().split('\n')

In [4]:
negatives = negatives[:50]
positives = positives[:50]

for i in range(len(negatives)):
    negatives[i] = clearstring(negatives[i])
    positives[i] = clearstring(positives[i])

In [5]:
vocab = []
for i in range(len(negatives)):
    vocab += negatives[i].split()
    vocab += positives[i].split()
    
vocab = sorted(vocab, key = vocab.count,reverse = True)
d1 = dict((k,v) for v,k in enumerate(reversed(vocab)))
vocab = ['PAD', 'EOS'] + sorted(d1, key = d1.get, reverse = True)
print('vocab size:', len(vocab))
dictionary = dict(zip(vocab, [i for i in range(len(vocab))]))


vocab size: 926

In [6]:
x_data = negatives + positives
y_data = [0] * len(negatives) + [1] * len(positives)
onehot = np.zeros((len(negatives) + len(positives), 2))
for i in range(onehot.shape[0]):
    onehot[i, y_data[i]] = 1.0
    
x_train, x_test, y_train, y_test, y_train_label, y_test_label = train_test_split(x_data, onehot, y_data, test_size = 0.20)
Activation function:
0- for sigmoid
1- for tanh
2- for relu

Now the constants are:
1- batch size : 20
2- epoch: 50
3- gradient descent
4- softmax with cross entropy

So you can change anything you want


In [7]:
def neural_network(num_hidden, size_layer, learning_rate, dropout_rate, beta, activation, seq_len,
                   batch_size = 20):
    tf.reset_default_graph()
    def lstm_cell(size_layer, activation):
        if activation == 0:
            activation = tf.nn.sigmoid
        elif activation == 1:
            activation = tf.nn.tanh
        else:
            activation = tf.nn.relu
        return tf.nn.rnn_cell.LSTMCell(size_layer, activation = activation)
    rnn_cells = tf.nn.rnn_cell.MultiRNNCell([lstm_cell(size_layer, activation) for _ in range(num_hidden)])
    X = tf.placeholder(tf.float32, [None, None, 1])
    Y = tf.placeholder(tf.float32, [None, np.unique(y_train).shape[0]])
    drop = tf.contrib.rnn.DropoutWrapper(rnn_cells, output_keep_prob = dropout_rate)
    outputs, _ = tf.nn.dynamic_rnn(drop, X, dtype = tf.float32)
    rnn_W = tf.Variable(tf.random_normal((size_layer, np.unique(y_train).shape[0])))
    rnn_B = tf.Variable(tf.random_normal([np.unique(y_train).shape[0]]))
    logits = tf.matmul(outputs[:, -1], rnn_W) + rnn_B
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = Y))
    cost += sum(beta * tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables())
    optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
    correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    COST, TEST_COST, ACC, TEST_ACC = [], [], [], []
    
    for i in range(50):
        train_acc, train_loss = 0, 0
        for n in range(0, (len(x_train) // batch_size) * batch_size, batch_size):
            batch_x = np.zeros((batch_size, seq_len, 1))
            for k in range(batch_size):
                tokens = x_train[n + k].split()[:seq_len]
                for no, text in enumerate(tokens[::-1]):
                    try:
                        batch_x[k, -1 - no, 0] = dictionary[text]
                    except:
                        batch_x[k, -1 - no, 0] = -1
            _, loss = sess.run([optimizer, cost], feed_dict = {X: batch_x, Y: y_train[n: n + batch_size, :]})
            train_acc += sess.run(accuracy, feed_dict = {X: batch_x, Y: y_train[n: n + batch_size, :]})
            train_loss += loss
            
        batch_x = np.zeros((len(x_test), seq_len, 1))
        for k in range(len(x_test)):
            tokens = x_test[k].split()[:seq_len]
            for no, text in enumerate(tokens[::-1]):
                try:
                    batch_x[k, -1 - no, 0] = dictionary[text]
                except:
                    batch_x[k, -1 - no, 0] = -1
        results = sess.run([cost, accuracy], feed_dict = {X: batch_x, Y: y_test})
        TEST_COST.append(results[0])
        TEST_ACC.append(results[1])
        train_loss /= (len(x_train) // batch_size)
        train_acc /= (len(x_train) // batch_size)
        ACC.append(train_acc)
        COST.append(train_loss)
    COST = np.array(COST).mean()
    TEST_COST = np.array(TEST_COST).mean()
    ACC = np.array(ACC).mean()
    TEST_ACC = np.array(TEST_ACC).mean()
    return COST, TEST_COST, ACC, TEST_ACC

In [8]:
def generate_nn(num_hidden, size_layer, learning_rate, dropout_rate, beta, activation, seq_len):
    global accbest
    param = {
        'num_hidden' : int(np.around(num_hidden)),
        'size_layer' : int(np.around(size_layer)),
        'learning_rate' : max(min(learning_rate, 1), 0.0001),
        'dropout_rate' : max(min(dropout_rate, 0.99), 0),
        'beta' : max(min(beta, 0.5), 0.000001),
        'activation': int(np.around(activation)),
        'seq_len' : int(np.around(seq_len))
    }
    print("\nSearch parameters %s" % (param), file = log_file)
    log_file.flush()
    learning_cost, valid_cost, learning_acc, valid_acc = neural_network(**param)
    print("stop after 50 iteration with train cost %f, valid cost %f, train acc %f, valid acc %f" % (learning_cost, valid_cost, learning_acc, valid_acc))
    if (valid_acc > accbest):
        costbest = valid_acc
    return valid_acc

In [9]:
log_file = open('nn-bayesian.log', 'a')
accbest = 0.0
NN_BAYESIAN = BayesianOptimization(generate_nn, 
                              {'num_hidden': (2, 10),
                               'size_layer': (32, 512),
                               'learning_rate': (0.0001, 1),
                               'dropout_rate': (0.1, 0.99),
                               'beta': (0.000001, 0.49),
                               'activation': (0, 2),
                               'seq_len': (5, 20)
                              })
NN_BAYESIAN.maximize(init_points = 10, n_iter = 20, acq = 'ei', xi = 0.0)


Initialization
-------------------------------------------------------------------------------------------------------------------------------------
 Step |   Time |      Value |   activation |      beta |   dropout_rate |   learning_rate |   num_hidden |   seq_len |   size_layer | 
stop after 50 iteration with train cost 42.381874, valid cost 36.617153, train acc 0.546000, valid acc 0.500000
    1 | 00m10s |    0.50000 |       0.6067 |    0.3674 |         0.5718 |          0.6362 |       2.9054 |    6.3420 |     435.3363 | 
stop after 50 iteration with train cost 3.349305, valid cost 2.959107, train acc 0.527750, valid acc 0.500000
    2 | 00m04s |    0.50000 |       0.6907 |    0.2037 |         0.8793 |          0.1485 |       2.9604 |   14.1557 |      34.4793 | 
stop after 50 iteration with train cost 15.869358, valid cost 14.480788, train acc 0.546250, valid acc 0.500000
    3 | 00m15s |    0.50000 |       1.1585 |    0.1007 |         0.6277 |          0.7885 |       7.2214 |   16.4103 |     235.6704 | 
stop after 50 iteration with train cost 4.302577, valid cost 3.017692, train acc 0.475000, valid acc 0.500000
    4 | 00m10s |    0.50000 |       0.2487 |    0.0791 |         0.7831 |          0.9348 |       9.0891 |   13.8299 |      90.4191 | 
stop after 50 iteration with train cost nan, valid cost nan, train acc 0.500000, valid acc 0.500000
    5 | 00m07s |    0.50000 |       1.9927 |    0.0077 |         0.6365 |          0.2179 |       5.1895 |    9.3360 |     264.7745 | 
stop after 50 iteration with train cost 29.283845, valid cost 25.389284, train acc 0.527500, valid acc 0.510000
    6 | 00m07s |    0.51000 |       0.9636 |    0.2779 |         0.1064 |          0.1390 |       2.6789 |   16.7739 |     242.9170 | 
stop after 50 iteration with train cost nan, valid cost nan, train acc 0.500000, valid acc 0.500000
    7 | 00m09s |    0.50000 |       1.5511 |    0.4615 |         0.1376 |          0.6660 |       3.1947 |   14.7922 |     401.3807 | 
stop after 50 iteration with train cost 387.484119, valid cost 358.366577, train acc 0.492250, valid acc 0.516000
    8 | 00m26s |    0.51600 |       1.5552 |    0.3468 |         0.3169 |          0.0444 |       9.8702 |    9.2395 |     495.6505 | 
stop after 50 iteration with train cost 73.797551, valid cost 61.530197, train acc 0.531250, valid acc 0.495000
    9 | 00m16s |    0.49500 |       0.7076 |    0.2841 |         0.6419 |          0.1419 |       5.0061 |   12.3230 |     491.8022 | 
stop after 50 iteration with train cost 15.092491, valid cost 14.541831, train acc 0.543500, valid acc 0.497000
   10 | 00m07s |    0.49700 |       0.8510 |    0.0589 |         0.4873 |          0.7115 |       4.5997 |   11.0504 |     185.4424 | 
Bayesian Optimization
-------------------------------------------------------------------------------------------------------------------------------------
 Step |   Time |      Value |   activation |      beta |   dropout_rate |   learning_rate |   num_hidden |   seq_len |   size_layer | 
stop after 50 iteration with train cost 20.418870, valid cost 10.943871, train acc 0.548000, valid acc 0.504000
/usr/local/lib/python3.5/dist-packages/sklearn/gaussian_process/gpr.py:457: UserWarning: fmin_l_bfgs_b terminated abnormally with the  state: {'grad': array([ -2.19648828e-05]), 'funcalls': 60, 'warnflag': 2, 'task': b'ABNORMAL_TERMINATION_IN_LNSRCH', 'nit': 6}
  " state: %s" % convergence_dict)
   11 | 00m36s |    0.50400 |       1.9722 |    0.1400 |         0.3632 |          0.9942 |       9.3480 |    5.1808 |     511.7820 | 
stop after 50 iteration with train cost 44.214597, valid cost 39.447975, train acc 0.547000, valid acc 0.511000
/usr/local/lib/python3.5/dist-packages/sklearn/gaussian_process/gpr.py:457: UserWarning: fmin_l_bfgs_b terminated abnormally with the  state: {'grad': array([ -1.09022457e-05]), 'funcalls': 49, 'warnflag': 2, 'task': b'ABNORMAL_TERMINATION_IN_LNSRCH', 'nit': 4}
  " state: %s" % convergence_dict)
   12 | 00m26s |    0.51100 |       1.7500 |    0.0607 |         0.4002 |          0.3759 |       9.9711 |    5.0667 |     470.4412 | 
stop after 50 iteration with train cost 99.537190, valid cost 94.412079, train acc 0.523000, valid acc 0.493000
/usr/local/lib/python3.5/dist-packages/sklearn/gaussian_process/gpr.py:457: UserWarning: fmin_l_bfgs_b terminated abnormally with the  state: {'grad': array([ -1.78025875e-05]), 'funcalls': 59, 'warnflag': 2, 'task': b'ABNORMAL_TERMINATION_IN_LNSRCH', 'nit': 7}
  " state: %s" % convergence_dict)
   13 | 00m43s |    0.49300 |       0.3265 |    0.0916 |         0.4331 |          0.1130 |       9.9007 |   19.3773 |     326.0415 | 
stop after 50 iteration with train cost 274.715683, valid cost 274.448212, train acc 0.509500, valid acc 0.512000
/usr/local/lib/python3.5/dist-packages/sklearn/gaussian_process/gpr.py:457: UserWarning: fmin_l_bfgs_b terminated abnormally with the  state: {'grad': array([ -3.68241077e-05]), 'funcalls': 51, 'warnflag': 2, 'task': b'ABNORMAL_TERMINATION_IN_LNSRCH', 'nit': 5}
  " state: %s" % convergence_dict)
   14 | 00m55s |    0.51200 |       1.4632 |    0.0478 |         0.1868 |          0.0046 |       9.9877 |   19.9527 |     436.0364 | 
stop after 50 iteration with train cost nan, valid cost nan, train acc 0.500000, valid acc 0.500000
   15 | 00m12s |    0.50000 |       1.7405 |    0.4607 |         0.4442 |          0.3457 |       2.0989 |   19.9088 |     135.9072 | 
stop after 50 iteration with train cost 219.328286, valid cost 214.091675, train acc 0.494000, valid acc 0.493000
   16 | 00m23s |    0.49300 |       1.7248 |    0.0845 |         0.1401 |          0.0570 |       9.8453 |    5.1397 |     422.3794 | 
stop after 50 iteration with train cost nan, valid cost nan, train acc 0.500250, valid acc 0.500000
   17 | 00m10s |    0.50000 |       1.5464 |    0.0535 |         0.6583 |          0.2869 |       2.0762 |    5.1217 |      71.4014 | 
stop after 50 iteration with train cost 1.684258, valid cost 1.324773, train acc 0.547250, valid acc 0.502000
/usr/local/lib/python3.5/dist-packages/sklearn/gaussian_process/gpr.py:457: UserWarning: fmin_l_bfgs_b terminated abnormally with the  state: {'grad': array([  1.05064401e-05]), 'funcalls': 48, 'warnflag': 2, 'task': b'ABNORMAL_TERMINATION_IN_LNSRCH', 'nit': 5}
  " state: %s" % convergence_dict)
   18 | 00m12s |    0.50200 |       0.5147 |    0.0981 |         0.2503 |          0.8700 |       2.1998 |   19.8795 |      64.4708 | 
stop after 50 iteration with train cost 20.364790, valid cost 16.596560, train acc 0.543500, valid acc 0.504000
   19 | 00m14s |    0.50400 |       0.6509 |    0.1735 |         0.9538 |          0.3490 |       2.0384 |   19.8586 |     280.5687 | 
stop after 50 iteration with train cost 35.997788, valid cost 7.070683, train acc 0.550000, valid acc 0.500000
/usr/local/lib/python3.5/dist-packages/sklearn/gaussian_process/gpr.py:457: UserWarning: fmin_l_bfgs_b terminated abnormally with the  state: {'grad': array([ -2.40310661e-05]), 'funcalls': 51, 'warnflag': 2, 'task': b'ABNORMAL_TERMINATION_IN_LNSRCH', 'nit': 5}
  " state: %s" % convergence_dict)
   20 | 00m26s |    0.50000 |       1.8937 |    0.4665 |         0.7281 |          0.5549 |       9.9243 |    5.0304 |     496.3627 | 
stop after 50 iteration with train cost 33.316251, valid cost 31.917385, train acc 0.532750, valid acc 0.498000
   21 | 01m05s |    0.49800 |       1.3592 |    0.0136 |         0.4119 |          0.9045 |       9.9148 |   19.6690 |     511.7459 | 
stop after 50 iteration with train cost 335.914213, valid cost 330.742371, train acc 0.516500, valid acc 0.501000
   22 | 00m59s |    0.50100 |       1.3139 |    0.0896 |         0.3180 |          0.0344 |       9.8874 |   19.1406 |     473.2550 | 
stop after 50 iteration with train cost 22.918857, valid cost 12.320593, train acc 0.549000, valid acc 0.498000
/usr/local/lib/python3.5/dist-packages/sklearn/gaussian_process/gpr.py:457: UserWarning: fmin_l_bfgs_b terminated abnormally with the  state: {'grad': array([ -1.76338156e-05]), 'funcalls': 46, 'warnflag': 2, 'task': b'ABNORMAL_TERMINATION_IN_LNSRCH', 'nit': 3}
  " state: %s" % convergence_dict)
   23 | 00m18s |    0.49800 |       1.1204 |    0.4581 |         0.2750 |          0.3324 |       2.1985 |   19.9052 |     366.9115 | 
stop after 50 iteration with train cost 197.230115, valid cost 196.958145, train acc 0.495500, valid acc 0.485000
   24 | 00m11s |    0.48500 |       0.0213 |    0.2907 |         0.2731 |          0.0010 |       2.0656 |    5.0754 |     219.8795 | 
stop after 50 iteration with train cost 518.450370, valid cost 490.195129, train acc 0.506000, valid acc 0.496000
   25 | 00m34s |    0.49600 |       0.5991 |    0.3734 |         0.1730 |          0.0297 |       9.8292 |    9.3370 |     448.6081 | 
stop after 50 iteration with train cost 25.127167, valid cost 17.650429, train acc 0.530250, valid acc 0.490000
   26 | 00m18s |    0.49000 |       0.1876 |    0.3141 |         0.1996 |          0.2209 |       9.9328 |    5.5145 |     149.1066 | 
stop after 50 iteration with train cost 65.672021, valid cost 63.703278, train acc 0.522500, valid acc 0.554000
/usr/local/lib/python3.5/dist-packages/sklearn/gaussian_process/gpr.py:457: UserWarning: fmin_l_bfgs_b terminated abnormally with the  state: {'grad': array([ -1.13953033e-05]), 'funcalls': 56, 'warnflag': 2, 'task': b'ABNORMAL_TERMINATION_IN_LNSRCH', 'nit': 5}
  " state: %s" % convergence_dict)
   27 | 00m11s |    0.55400 |       1.1016 |    0.0992 |         0.1672 |          0.0496 |       2.3178 |   19.8444 |     420.9660 | 
stop after 50 iteration with train cost nan, valid cost nan, train acc 0.500000, valid acc 0.500000
   28 | 00m34s |    0.50000 |       1.8591 |    0.3056 |         0.1494 |          0.1084 |       2.3779 |   19.4666 |     423.9416 | 
stop after 50 iteration with train cost 2.057963, valid cost 1.968167, train acc 0.488750, valid acc 0.498000
   29 | 01m10s |    0.49800 |       0.0000 |    0.0000 |         0.9900 |          0.0001 |      10.0000 |   20.0000 |     413.4489 | 
stop after 50 iteration with train cost 14.022513, valid cost 13.733238, train acc 0.496250, valid acc 0.517000
   30 | 00m33s |    0.51700 |       0.0000 |    0.0000 |         0.1000 |          0.0001 |       2.0000 |   20.0000 |     415.8341 | 

In [10]:
print('Maximum NN accuracy value: %f' % NN_BAYESIAN.res['max']['max_val'])
print('Best NN parameters: ', NN_BAYESIAN.res['max']['max_params'])


Maximum NN accuracy value: 0.554000
Best NN parameters:  {'num_hidden': 2.3178396780664174, 'dropout_rate': 0.16724952060867815, 'beta': 0.099189911765081795, 'learning_rate': 0.049638440024850142, 'size_layer': 420.96604492562358, 'activation': 1.1015571286131713, 'seq_len': 19.844388160934063}

That means, best optimized parameters are:

num hidden: 2
dropout rate: 0.16724952060867815
beta: 0.099189911765081795
learning rate: 0.049638440024850142
size layer: 421
activation: tanh
sequence length: 20

In [ ]: