In [1]:
#%env KERAS_BACKEND=theano

In [1]:
import keras
from Protein_Dataset import Protein_Dataset
import numpy as np
from keras import layers
from keras.layers import wrappers
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

%matplotlib inline


Using TensorFlow backend.

In [2]:
maxlen = 400
stride = 100

In [3]:
prots = Protein_Dataset("legacy_data", maxlen, stride)

In [4]:
input_classes = len(prots.acid_table)
output_classes = len(prots.class_table) + 1

In [6]:
inputs = layers.Input(shape=[maxlen, input_classes], dtype="float32")

masking = layers.Masking(mask_value=np.zeros(input_classes))(inputs)

bidir = wrappers.Bidirectional(layers.LSTM(128, return_sequences=True))(masking)

output = wrappers.TimeDistributed(layers.Dense(output_classes, activation=keras.activations.softmax,
                                                           activity_regularizer=keras.regularizers.l1_l2()))(bidir)

In [7]:
model = keras.models.Model(inputs, output)

In [8]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [5]:
x_train, y_train = prots.get_prepared_data(train=True)
x_test, y_test = prots.get_prepared_data(train=False)

In [9]:
model.load_weights("weights/weights.h5")

In [ ]:
model.fit(x_train, y_train, 50, 10, validation_data=(x_test, y_test))