Imports


In [3]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# tensorflow
import tensorflow as tf
print('Tested with TensorFlow 1.2.0')
print ('Your TensorFlow version:', tf.__version__) # tested with v1.2
from tensorflow.contrib.learn.python.learn.estimators import constants
from tensorflow.contrib.learn.python.learn.estimators.dynamic_rnn_estimator import PredictionType

# Estimators
from tensorflow.contrib import learn

# Model builder
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib


# Input function
from tensorflow.python.estimator.inputs import numpy_io

# data and data preprocessing
from tensorflow.contrib.keras.python.keras.datasets import imdb 
from tensorflow.contrib.keras.python.keras.preprocessing import sequence

# Run an experiment
from tensorflow.contrib.learn.python.learn import learn_runner

# numpy
import numpy as np

# Enable TensorFlow logs
tf.logging.set_verbosity(tf.logging.INFO)


Tested with TensorFlow 1.2.0
Your TensorFlow version: 1.2.0

Helpers


In [4]:
# map word to index
word_to_index = imdb.get_word_index()
# map index to word
index_to_word = {}
num_words = 0
for k in word_to_index: 
    index_to_word[word_to_index[k]] = k
    num_words += 1

# turn a sequence into a sentence
def get_sentence(seq):
    sentence = ''
    for v in seq:
        if v != 0: # 0 means it was just added to the sentence so it could have maxlen words
            sentence += index_to_word[int(v)] + ' '
    return sentence

# turn a sentence into a sequence
def gen_sequence(sentence):
    seq = []
    for word in sentence:
        seq.append(word_to_index[word])
    return np.asarray(seq, dtype=np.float32)

print('there are', num_words, 'words in the files')
#for i in range(1, 100):
#    print(i, index_to_word[i])
#print(index_to_word[49], index_to_word[212], index_to_word[242], index_to_word[136])


there are 88584 words in the files

Visualizing data


In [5]:
'''
# ------------------- negative
print('-' * 30)
print('Example of a negative review')
print('-' * 30)

x = open('data/train/neg/0_3.txt')
r = x.readline()
print(r)

# ------------------ positive
print()
print('-' * 30)
print('Example of a positive review')
print('-' * 30)

x = open('data/train/pos/0_9.txt')
r = x.readline()
print(r)
'''


Out[5]:
"\n# ------------------- negative\nprint('-' * 30)\nprint('Example of a negative review')\nprint('-' * 30)\n\nx = open('data/train/neg/0_3.txt')\nr = x.readline()\nprint(r)\n\n# ------------------ positive\nprint()\nprint('-' * 30)\nprint('Example of a positive review')\nprint('-' * 30)\n\nx = open('data/train/pos/0_9.txt')\nr = x.readline()\nprint(r)\n"

In [6]:
print('Loading data')
num_words = 10000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

# lets make things faster
limit = 3200
maxlen = 80

x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

x_train = x_train[:limit].astype('float32')
y_train = y_train[:limit].astype('int32')

x_test = x_test[:limit].astype('float32')
y_test = y_test[:limit].astype('int32')

'''
# y to onehot
y_train_one_hot = np.zeros((limit, 2), dtype=np.float32)
for i in range(limit):
    y_train_one_hot[i][y_train[i]] = 1

y_test_one_hot = np.zeros((limit, 2), dtype=np.float32)
for i in range(limit):
    y_test_one_hot[i][y_test[i]] = 1

print(x_train.shape)
#print(y_train)
#print(y_train_one_hot)
'''


Loading data
Out[6]:
'\n# y to onehot\ny_train_one_hot = np.zeros((limit, 2), dtype=np.float32)\nfor i in range(limit):\n    y_train_one_hot[i][y_train[i]] = 1\n\ny_test_one_hot = np.zeros((limit, 2), dtype=np.float32)\nfor i in range(limit):\n    y_test_one_hot[i][y_test[i]] = 1\n\nprint(x_train.shape)\n#print(y_train)\n#print(y_train_one_hot)\n'

In [9]:
# parameters
LEARNING_RATE = 0.01
BATCH_SIZE = 64
STEPS = 10000

# Input functions
'''
x_train_dict = {'x': x_train }

train_input_fn = numpy_io.numpy_input_fn(
          x_train_dict, y_train, batch_size=BATCH_SIZE, 
           shuffle=True, num_epochs=None, 
            queue_capacity=1000, num_threads=4)

x_test_dict = {'x': x_test }
	
test_input_fn = numpy_io.numpy_input_fn(
          x_test_dict, y_test, batch_size=BATCH_SIZE, shuffle=False, num_epochs=1)
'''

# Input Function                                                 
def get_input_fn(labels, targets, batch_size, epochs=None):
    def input_fn():
        batched = tf.train.shuffle_batch({'x': labels,
                                          'y': targets},
                                     batch_size,
                                     min_after_dequeue=100,
                                     num_threads=4,
                                     capacity=1000,
                                     enqueue_many=True,
                                     allow_smaller_final_batch=True)
        label = batched.pop('y')
        return batched, label
    return input_fn

train_input_fn = get_input_fn(x_train, y_train, BATCH_SIZE)
test_input_fn = get_input_fn(x_test, y_test, BATCH_SIZE)

# In[ ]:

# create estimator
xc = tf.contrib.layers.real_valued_column("x")
estimator = tf.contrib.learn.DynamicRnnEstimator(problem_type = constants.ProblemType.CLASSIFICATION,
                                                 prediction_type = PredictionType.SINGLE_VALUE,
                                                 sequence_feature_columns = [xc],
                                                 context_feature_columns = None,
                                                 num_units = [256, 128],
                                                 cell_type = 'lstm', 
                                                 optimizer = 'Adam',
                                                 learning_rate = LEARNING_RATE,
                                                 num_classes = 2)

# create experiment
def generate_experiment_fn():
  
  """
  Create an experiment function given hyperparameters.
  Returns:
    A function (output_dir) -> Experiment where output_dir is a string
    representing the location of summaries, checkpoints, and exports.
    this function is used by learn_runner to create an Experiment which
    executes model code provided in the form of an Estimator and
    input functions.
    All listed arguments in the outer function are used to create an
    Estimator, and input functions (training, evaluation, serving).
    Unlisted args are passed through to Experiment.
  """

  def _experiment_fn(output_dir):

    train_input = train_input_fn
    test_input = test_input_fn
    
    return tf.contrib.learn.Experiment(
        estimator,
        train_input_fn=train_input,
        eval_input_fn=test_input,
        train_steps=STEPS
    )
  return _experiment_fn

# run experiment 
learn_runner.run(generate_experiment_fn(), output_dir='/tmp/sentiment_analysis')


WARNING:tensorflow:From /usr/local/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py:662: multi_class_target (from tensorflow.contrib.layers.python.layers.target_column) is deprecated and will be removed after 2016-11-12.
Instructions for updating:
This file will be removed after the deprecation date.Please switch to third_party/tensorflow/contrib/learn/python/learn/estimators/head.py
INFO:tensorflow:Using default config.
WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmphtco3_pa
INFO:tensorflow:Using config: {'_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fb3c462f7b8>, '_task_type': None, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_model_dir': '/tmp/tmphtco3_pa', '_tf_random_seed': None, '_master': '', '_evaluation_master': '', '_keep_checkpoint_max': 5, '_save_checkpoints_secs': 600, '_keep_checkpoint_every_n_hours': 10000, '_num_worker_replicas': 0, '_task_id': 0, '_is_chief': True, '_num_ps_replicas': 0, '_session_config': None, '_save_checkpoints_steps': None, '_save_summary_steps': 100, '_environment': 'local'}
WARNING:tensorflow:From /usr/local/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/monitors.py:268: BaseMonitor.__init__ (from tensorflow.contrib.learn.python.learn.monitors) is deprecated and will be removed after 2016-12-05.
Instructions for updating:
Monitors are deprecated. Please use tf.train.SessionRunHook.
WARNING:tensorflow:sequence_input_from_feature_columns (from tensorflow.contrib.layers.python.layers.feature_column_ops) is experimental and may change or be removed at any time, and without warning.
/usr/local/lib/python3.5/site-packages/tensorflow/python/ops/gradients_impl.py:93: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmphtco3_pa/model.ckpt.
INFO:tensorflow:loss = 0.707535, step = 1
WARNING:tensorflow:sequence_input_from_feature_columns (from tensorflow.contrib.layers.python.layers.feature_column_ops) is experimental and may change or be removed at any time, and without warning.
INFO:tensorflow:Starting evaluation at 2017-06-20-23:08:44
INFO:tensorflow:Restoring parameters from /tmp/tmphtco3_pa/model.ckpt-1
INFO:tensorflow:Evaluation [1/100]
INFO:tensorflow:Evaluation [2/100]
INFO:tensorflow:Evaluation [3/100]
INFO:tensorflow:Evaluation [4/100]
INFO:tensorflow:Evaluation [5/100]
INFO:tensorflow:Evaluation [6/100]
INFO:tensorflow:Evaluation [7/100]
INFO:tensorflow:Evaluation [8/100]
INFO:tensorflow:Evaluation [9/100]
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [11/100]
INFO:tensorflow:Evaluation [12/100]
INFO:tensorflow:Evaluation [13/100]
INFO:tensorflow:Evaluation [14/100]
INFO:tensorflow:Evaluation [15/100]
INFO:tensorflow:Evaluation [16/100]
INFO:tensorflow:Evaluation [17/100]
INFO:tensorflow:Evaluation [18/100]
INFO:tensorflow:Evaluation [19/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [21/100]
INFO:tensorflow:Evaluation [22/100]
INFO:tensorflow:Evaluation [23/100]
INFO:tensorflow:Evaluation [24/100]
INFO:tensorflow:Evaluation [25/100]
INFO:tensorflow:Evaluation [26/100]
INFO:tensorflow:Evaluation [27/100]
INFO:tensorflow:Evaluation [28/100]
INFO:tensorflow:Evaluation [29/100]
INFO:tensorflow:Evaluation [30/100]
INFO:tensorflow:Evaluation [31/100]
INFO:tensorflow:Evaluation [32/100]
INFO:tensorflow:Evaluation [33/100]
INFO:tensorflow:Evaluation [34/100]
INFO:tensorflow:Evaluation [35/100]
INFO:tensorflow:Evaluation [36/100]
INFO:tensorflow:Evaluation [37/100]
INFO:tensorflow:Evaluation [38/100]
INFO:tensorflow:Evaluation [39/100]
INFO:tensorflow:Evaluation [40/100]
INFO:tensorflow:Evaluation [41/100]
INFO:tensorflow:Evaluation [42/100]
INFO:tensorflow:Evaluation [43/100]
INFO:tensorflow:Evaluation [44/100]
INFO:tensorflow:Evaluation [45/100]
INFO:tensorflow:Evaluation [46/100]
INFO:tensorflow:Evaluation [47/100]
INFO:tensorflow:Evaluation [48/100]
INFO:tensorflow:Evaluation [49/100]
INFO:tensorflow:Evaluation [50/100]
INFO:tensorflow:Evaluation [51/100]
INFO:tensorflow:Evaluation [52/100]
INFO:tensorflow:Evaluation [53/100]
INFO:tensorflow:Evaluation [54/100]
INFO:tensorflow:Evaluation [55/100]
INFO:tensorflow:Evaluation [56/100]
INFO:tensorflow:Evaluation [57/100]
INFO:tensorflow:Evaluation [58/100]
INFO:tensorflow:Evaluation [59/100]
INFO:tensorflow:Evaluation [60/100]
INFO:tensorflow:Evaluation [61/100]
INFO:tensorflow:Evaluation [62/100]
INFO:tensorflow:Evaluation [63/100]
INFO:tensorflow:Evaluation [64/100]
INFO:tensorflow:Evaluation [65/100]
INFO:tensorflow:Evaluation [66/100]
INFO:tensorflow:Evaluation [67/100]
INFO:tensorflow:Evaluation [68/100]
INFO:tensorflow:Evaluation [69/100]
INFO:tensorflow:Evaluation [70/100]
INFO:tensorflow:Evaluation [71/100]
INFO:tensorflow:Evaluation [72/100]
INFO:tensorflow:Evaluation [73/100]
INFO:tensorflow:Evaluation [74/100]
INFO:tensorflow:Evaluation [75/100]
INFO:tensorflow:Evaluation [76/100]
INFO:tensorflow:Evaluation [77/100]
INFO:tensorflow:Evaluation [78/100]
INFO:tensorflow:Evaluation [79/100]
INFO:tensorflow:Evaluation [80/100]
INFO:tensorflow:Evaluation [81/100]
INFO:tensorflow:Evaluation [82/100]
INFO:tensorflow:Evaluation [83/100]
INFO:tensorflow:Evaluation [84/100]
INFO:tensorflow:Evaluation [85/100]
INFO:tensorflow:Evaluation [86/100]
INFO:tensorflow:Evaluation [87/100]
INFO:tensorflow:Evaluation [88/100]
INFO:tensorflow:Evaluation [89/100]
INFO:tensorflow:Evaluation [90/100]
INFO:tensorflow:Evaluation [91/100]
INFO:tensorflow:Evaluation [92/100]
INFO:tensorflow:Evaluation [93/100]
INFO:tensorflow:Evaluation [94/100]
INFO:tensorflow:Evaluation [95/100]
INFO:tensorflow:Evaluation [96/100]
INFO:tensorflow:Evaluation [97/100]
INFO:tensorflow:Evaluation [98/100]
INFO:tensorflow:Evaluation [99/100]
INFO:tensorflow:Evaluation [100/100]
INFO:tensorflow:Finished evaluation at 2017-06-20-23:08:49
INFO:tensorflow:Saving dict for global step 1: accuracy = 0.500938, global_step = 1, loss = 0.826691
INFO:tensorflow:Validation (step 1): loss = 0.826691, accuracy = 0.500938, global_step = 1
INFO:tensorflow:global_step/sec: 4.94952
INFO:tensorflow:loss = 0.697217, step = 101 (20.205 sec)
INFO:tensorflow:global_step/sec: 6.8423
INFO:tensorflow:loss = 0.679994, step = 201 (14.615 sec)
INFO:tensorflow:global_step/sec: 7.01853
INFO:tensorflow:loss = 0.687514, step = 301 (14.248 sec)
INFO:tensorflow:global_step/sec: 6.96258
INFO:tensorflow:loss = 0.725777, step = 401 (14.363 sec)
INFO:tensorflow:global_step/sec: 7.00502
INFO:tensorflow:loss = 0.685064, step = 501 (14.275 sec)
INFO:tensorflow:global_step/sec: 7.06336
INFO:tensorflow:loss = 0.719429, step = 601 (14.158 sec)
INFO:tensorflow:global_step/sec: 6.73853
INFO:tensorflow:loss = 0.704809, step = 701 (14.840 sec)
INFO:tensorflow:global_step/sec: 7.11131
INFO:tensorflow:loss = 0.701828, step = 801 (14.062 sec)
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-9-908e837efeb7> in <module>()
     81 
     82 # run experiment
---> 83 learn_runner.run(generate_experiment_fn(), output_dir='/tmp/sentiment_analysis')

/usr/local/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/learn_runner.py in run(experiment_fn, output_dir, schedule, run_config, hparams)
    208   schedule = schedule or _get_default_schedule(run_config)
    209 
--> 210   return _execute_schedule(experiment, schedule)
    211 
    212 

/usr/local/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/learn_runner.py in _execute_schedule(experiment, schedule)
     45     logging.error('Allowed values for this experiment are: %s', valid_tasks)
     46     raise TypeError('Schedule references non-callable member %s' % schedule)
---> 47   return task()
     48 
     49 

/usr/local/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/experiment.py in train_and_evaluate(self)
    493             name=eval_dir_suffix, hooks=self._eval_hooks
    494         )]
--> 495       self.train(delay_secs=0)
    496 
    497     eval_result = self._call_evaluate(input_fn=self._eval_input_fn,

/usr/local/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/experiment.py in train(self, delay_secs)
    273     return self._call_train(input_fn=self._train_input_fn,
    274                             max_steps=self._train_steps,
--> 275                             hooks=self._train_monitors + extra_hooks)
    276 
    277   def evaluate(self, delay_secs=None):

/usr/local/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/experiment.py in _call_train(self, _sentinel, input_fn, steps, hooks, max_steps)
    663                                  steps=steps,
    664                                  max_steps=max_steps,
--> 665                                  monitors=hooks)
    666 
    667   def _call_evaluate(self, _sentinel=None,  # pylint: disable=invalid-name,

/usr/local/lib/python3.5/site-packages/tensorflow/python/util/deprecation.py in new_func(*args, **kwargs)
    287             'in a future version' if date is None else ('after %s' % date),
    288             instructions)
--> 289       return func(*args, **kwargs)
    290     return tf_decorator.make_decorator(func, new_func, 'deprecated',
    291                                        _add_deprecated_arg_notice_to_docstring(

/usr/local/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in fit(self, x, y, input_fn, steps, batch_size, monitors, max_steps)
    453       hooks.append(basic_session_run_hooks.StopAtStepHook(steps, max_steps))
    454 
--> 455     loss = self._train_model(input_fn=input_fn, hooks=hooks)
    456     logging.info('Loss for final step: %s.', loss)
    457     return self

/usr/local/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py in _train_model(self, input_fn, hooks)
   1005         loss = None
   1006         while not mon_sess.should_stop():
-> 1007           _, loss = mon_sess.run([model_fn_ops.train_op, model_fn_ops.loss])
   1008       summary_io.SummaryWriterCache.clear()
   1009       return loss

/usr/local/lib/python3.5/site-packages/tensorflow/python/training/monitored_session.py in run(self, fetches, feed_dict, options, run_metadata)
    503                           feed_dict=feed_dict,
    504                           options=options,
--> 505                           run_metadata=run_metadata)
    506 
    507   def should_stop(self):

/usr/local/lib/python3.5/site-packages/tensorflow/python/training/monitored_session.py in run(self, fetches, feed_dict, options, run_metadata)
    840                               feed_dict=feed_dict,
    841                               options=options,
--> 842                               run_metadata=run_metadata)
    843       except _PREEMPTION_ERRORS as e:
    844         logging.info('An error was raised. This may be due to a preemption in '

/usr/local/lib/python3.5/site-packages/tensorflow/python/training/monitored_session.py in run(self, *args, **kwargs)
    796 
    797   def run(self, *args, **kwargs):
--> 798     return self._sess.run(*args, **kwargs)
    799 
    800 

/usr/local/lib/python3.5/site-packages/tensorflow/python/training/monitored_session.py in run(self, fetches, feed_dict, options, run_metadata)
    950                                   feed_dict=feed_dict,
    951                                   options=options,
--> 952                                   run_metadata=run_metadata)
    953 
    954     for hook in self._hooks:

/usr/local/lib/python3.5/site-packages/tensorflow/python/training/monitored_session.py in run(self, *args, **kwargs)
    796 
    797   def run(self, *args, **kwargs):
--> 798     return self._sess.run(*args, **kwargs)
    799 
    800 

/usr/local/lib/python3.5/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    787     try:
    788       result = self._run(None, fetches, feed_dict, options_ptr,
--> 789                          run_metadata_ptr)
    790       if run_metadata:
    791         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/usr/local/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    995     if final_fetches or final_targets:
    996       results = self._do_run(handle, final_targets, final_fetches,
--> 997                              feed_dict_string, options, run_metadata)
    998     else:
    999       results = []

/usr/local/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1130     if handle is None:
   1131       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1132                            target_list, options, run_metadata)
   1133     else:
   1134       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/usr/local/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1137   def _do_call(self, fn, *args):
   1138     try:
-> 1139       return fn(*args)
   1140     except errors.OpError as e:
   1141       message = compat.as_text(e.message)

/usr/local/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1119         return tf_session.TF_Run(session, options,
   1120                                  feed_dict, fetch_list, target_list,
-> 1121                                  status, run_metadata)
   1122 
   1123     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: 

In [ ]:
# generate predictions
preds = list(estimator.predict(input_fn=test_input_fn))

# number of outputs we want to see the prediction
NUM_EVAL = 10
def check_prediction(x, y, p, index):
    print('prediction:', p[index]['classes'])
    print('target:', y[index])
    print('sentence:', get_sentence(x[index]))

for i in range(NUM_EVAL):
    index = np.random.randint(limit)
    print('test:', index)s
    print('-' * 30)
    print(np.asarray(x_test[index], dtype=np.int32))
    check_prediction(x_test, y_test, preds, index)
    print()