Import necessary modules


In [1]:
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
# fix random seed for reproducibility
np.random.seed(123)
# magic fix for displaying plots
%matplotlib inline


Using TensorFlow backend.

In [2]:
# load the dataset but only keep the top n words, zero the rest
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=top_words)


/anaconda/lib/python2.7/site-packages/keras/datasets/imdb.py:45: UserWarning: The `nb_words` argument in `load_data` has been renamed `num_words`.
  warnings.warn('The `nb_words` argument in `load_data` '
Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz
17448960/17464789 [============================>.] - ETA: 0s 

In [22]:
print 'Shape of X_train: ' + str(X_train.shape)
print 'Shape of y_train: ' + str(y_train.shape)
print 'Shape of X_test: ' + str(X_test.shape)
print 'Shape of y_test: ' + str(y_test.shape)
print str(type(X_train))


Shape of X_train: (25000, 500)
Shape of y_train: (25000,)
Shape of X_test: (25000, 500)
Shape of y_test: (25000,)
<type 'numpy.ndarray'>

In [23]:
# truncate and pad input sequences
max_review_length = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)

In [24]:
print 'Shape of X_train: ' + str(X_train.shape)
print 'Shape of X_test: ' + str(X_test.shape)
print str(type(X_train))


Shape of X_train: (25000, 500)
Shape of X_test: (25000, 500)
<type 'numpy.ndarray'>

In [27]:
# create the model
embedding_vecor_length = 16
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(LSTM(20))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, epochs=3, batch_size=64)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_3 (Embedding)      (None, 500, 16)           80000     
_________________________________________________________________
lstm_3 (LSTM)                (None, 20)                2960      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 21        
=================================================================
Total params: 82,981
Trainable params: 82,981
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/3
 5120/25000 [=====>........................] - ETA: 286s - loss: 0.6781 - acc: 0.5861
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-27-84f74ba09002> in <module>()
      7 model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
      8 print(model.summary())
----> 9 model.fit(X_train, y_train, epochs=3, batch_size=64)

/anaconda/lib/python2.7/site-packages/keras/models.pyc in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
    865                               class_weight=class_weight,
    866                               sample_weight=sample_weight,
--> 867                               initial_epoch=initial_epoch)
    868 
    869     def evaluate(self, x, y, batch_size=32, verbose=1,

/anaconda/lib/python2.7/site-packages/keras/engine/training.pyc in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
   1596                               initial_epoch=initial_epoch,
   1597                               steps_per_epoch=steps_per_epoch,
-> 1598                               validation_steps=validation_steps)
   1599 
   1600     def evaluate(self, x, y,

/anaconda/lib/python2.7/site-packages/keras/engine/training.pyc in _fit_loop(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
   1181                     batch_logs['size'] = len(batch_ids)
   1182                     callbacks.on_batch_begin(batch_index, batch_logs)
-> 1183                     outs = f(ins_batch)
   1184                     if not isinstance(outs, list):
   1185                         outs = [outs]

/anaconda/lib/python2.7/site-packages/keras/backend/tensorflow_backend.pyc in __call__(self, inputs)
   2271         updated = session.run(self.outputs + [self.updates_op],
   2272                               feed_dict=feed_dict,
-> 2273                               **self.session_kwargs)
   2274         return updated[:len(self.outputs)]
   2275 

/anaconda/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
    893     try:
    894       result = self._run(None, fetches, feed_dict, options_ptr,
--> 895                          run_metadata_ptr)
    896       if run_metadata:
    897         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/anaconda/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
   1122     if final_fetches or final_targets or (handle and feed_dict_tensor):
   1123       results = self._do_run(handle, final_targets, final_fetches,
-> 1124                              feed_dict_tensor, options, run_metadata)
   1125     else:
   1126       results = []

/anaconda/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1319     if handle is None:
   1320       return self._do_call(_run_fn, self._session, feeds, fetches, targets,
-> 1321                            options, run_metadata)
   1322     else:
   1323       return self._do_call(_prun_fn, self._session, handle, feeds, fetches)

/anaconda/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
   1325   def _do_call(self, fn, *args):
   1326     try:
-> 1327       return fn(*args)
   1328     except errors.OpError as e:
   1329       message = compat.as_text(e.message)

/anaconda/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1304           return tf_session.TF_Run(session, options,
   1305                                    feed_dict, fetch_list, target_list,
-> 1306                                    status, run_metadata)
   1307 
   1308     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: 

Filepath management


In [ ]:
project_dir = r'/Users/hudson/Code/marketModel/'

Load tagged data


In [ ]:
stock_data = pd.read_hdf(project_dir + 'data/stock_data/tagged_stock_data.hdf', 'table')

In [ ]:
s