Critical Assesment of Baselines


In [5]:
%matplotlib inline  
import pylab
pylab.rcParams['figure.figsize'] = (10.0, 8.0)

import os
import numpy as np

from src.python.baselines import *

from tempfile import gettempdir
tmp_dir = gettempdir()

In [6]:
client = MongoClient("mongodb://127.0.0.1:27017")
db = client['prot2vec']

Molecular Function


In [6]:
evaluate_performance(db, ["naive", "blast", "seq2go-proba"], 'F')


100%
Finished loading 81630 mappings!
100%
Finished loading 44839 sequences!
100%
Finished loading 7909 mappings!
95%
targets processed: 100%|██████████| 2941/2941 [00:00<00:00, 184706.64it/s]
100%
Finished loading 5528 sequences!

Cellular Component


In [8]:
evaluate_performance(db, ["naive", "seq2go"], 'C')


100%
Finished loading 117586 mappings!
100%
Finished loading 59489 sequences!
100%
Finished loading 10993 mappings!
97%
targets processed:   0%|          | 0/4134 [00:00<?, ?it/s]
targets processed: 100%|██████████| 4134/4134 [00:00<00:00, 232598.03it/s]
100%
Finished loading 7869 sequences!

Biological Process


In [5]:
evaluate_performance(db, ["naive", "blast"], 'P')


100%
Finished loading 219338 mappings!
100%
Finished loading 65144 sequences!
100%
Finished loading 22750 mappings!
100%
Finished loading 10151 sequences!
targets processed: 100%|██████████| 4655/4655 [00:00<00:00, 335270.63it/s]

In [ ]: