In [2]:
from src.python.deepseq0 import *
In [3]:
from keras.models import model_from_json
from src.python import baselines
baselines.init_GO(asp=ASPECT)
ASPECT = 'F' # Molecular Function
client = MongoClient("mongodb://localhost:27017/")
db = client['prot2vec']
print("Loading Ontology...")
onto = get_ontology(ASPECT)
model_str_deeperseq = "deeperseq-19-0.00224-0.57"
model_str_inception = "inception-14-0.00269-0.51"
# classes = [c for c in np.load('../../checkpoints/%s.npy' % model_str)]
classes = onto.classes
classes.remove(onto.root)
assert onto.root not in classes
with open('../../checkpoints/%s.json' % model_str_deeperseq, 'r') as f:
deeperseq = model_from_json(f.read())
with open('../../checkpoints/%s.json' % model_str_inception, 'r') as f:
inception = model_from_json(f.read())
In [3]:
inception.load_weights("../../checkpoints/%s.hdf5" % model_str_inception)
print("Loaded model from disk")
deeperseq.load_weights("../../checkpoints/%s.hdf5" % model_str_deeperseq)
print("Loaded model from disk")
In [4]:
lim = None
trn_stream, tst_stream = get_training_and_validation_streams(db, onto, classes, limit=lim)
In [5]:
y_true, y_deeperseq = predict(deeperseq, batch_generator(tst_stream), len(tst_stream), classes)
In [6]:
perf_deeperseq = baselines.performance(y_deeperseq, y_true, classes)
baselines.plot_precision_recall({"deeperseq": perf_deeperseq})
In [1]:
import pandas as pd
prs, rcs, f1s = perf_deeperseq
pd.DataFrame({"precision": prs, "recall": rcs, "F_max": f1s}).head(20)
In [8]:
lim = None
trn_stream, tst_stream = get_training_and_validation_streams(db, onto, classes, limit=lim)
In [9]:
y_true, y_inception = predict(inception, batch_generator(tst_stream), len(tst_stream), classes)
In [10]:
perf_inception = baselines.performance(y_inception, y_true, classes)
baselines.plot_precision_recall({"inception": perf_inception})
In [11]:
import pandas as pd
prs, rcs, f1s = perf_inception
pd.DataFrame({"precision": prs, "recall": rcs, "F_max": f1s}).head(10)
Out[11]:
In [9]:
def unpad_seq(padded_seq):
return ''.join([AA.index2aa[ix] for ix in padded_seq if ix != PAD])
_, tst_stream = get_training_and_validation_streams(db, onto, classes, limit=lim)
seqs_valid = {k: unpad_seq(seq) for k, seq, _ in tst_stream}
annots_valid = tst_stream._seq2go
trn_stream, _ = get_training_and_validation_streams(db, onto, classes, limit=None)
seqs_train = {k: unpad_seq(seq) for k, seq, _ in trn_stream}
annots_train = trn_stream._seq2go
In [13]:
y_naive = baselines.predict(seqs_train, annots_train, seqs_valid, "naive", load_file=0)
perf_naive = baselines.performance(y_naive, annots_valid)
baselines.plot_precision_recall({"naive": perf_naive})
In [18]:
import pandas as pd
prs, rcs, f1s = perf_naive
pd.DataFrame({"precision": prs, "recall": rcs, "F_max": f1s}).head(20)
Out[18]:
In [11]:
y_blast = baselines.predict(seqs_train, annots_train, seqs_valid, "blast", load_file=1)
perf_blast = baselines.performance(y_blast, annots_valid)
baselines.plot_precision_recall({"blast": perf_blast})
In [16]:
%matplotlib inline
import pylab
pylab.rcParams['figure.figsize'] = (10.0, 8.0)
perf = {"naive": perf_naive, "blast": perf_blast, "deeperseq": perf_deeperseq, "inception": perf_inception}
baselines.plot_precision_recall(perf)
In [ ]:
s1 = reduce(lambda x, y: set(x) | set(y), trn_stream._seq2go.values(), set())
s2 = reduce(lambda x, y: set(x) | set(y), tst_stream._seq2go.values(), set())
len(s1), len(s2), len(s1 | s2), len(s1 & s2), len(s1 - s2), len(s2 - s1)
In [ ]: