In [1]:
#%env KERAS_BACKEND=theano
In [1]:
import keras
from Protein_Dataset import Protein_Dataset
import numpy as np
from keras import layers
from keras.layers import wrappers
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
maxlen = 400
stride = 100
In [3]:
prots = Protein_Dataset("legacy_data", maxlen, stride)
In [4]:
input_classes = len(prots.acid_table)
output_classes = len(prots.class_table) + 1
In [6]:
inputs = layers.Input(shape=[maxlen, input_classes], dtype="float32")
masking = layers.Masking(mask_value=np.zeros(input_classes))(inputs)
bidir = wrappers.Bidirectional(layers.LSTM(128, return_sequences=True))(masking)
output = wrappers.TimeDistributed(layers.Dense(output_classes, activation=keras.activations.softmax,
activity_regularizer=keras.regularizers.l1_l2()))(bidir)
In [7]:
model = keras.models.Model(inputs, output)
In [8]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
In [5]:
x_train, y_train = prots.get_prepared_data(train=True)
x_test, y_test = prots.get_prepared_data(train=False)
In [9]:
model.load_weights("weights/weights.h5")
In [ ]:
model.fit(x_train, y_train, 50, 10, validation_data=(x_test, y_test))