In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
In [2]:
from eden.converter.molecule import obabel
import networkx as nx
import pybel
import requests
import os.path
from itertools import tee
from numpy.random import randint
from numpy.random import uniform
from eden.graph import Vectorizer
from sklearn.linear_model import SGDClassifier
import datetime, time
from eden.util import random_bipartition_iter
from eden.model import ActiveLearningBinaryClassificationModel
from eden.util import configure_logging
import logging
configure_logging(logging.getLogger(),verbosity=2)
In [3]:
def make_iterable(filename, file_format):
if file_format == 'sdf':
with open(filename) as f:
s = ''
for line in f:
if line.strip() != '$$$$':
s = s + line
else:
return_value = s + line
s = ''
yield return_value
elif file_format == 'smi':
with open(filename) as f:
for line in f:
yield line
This is where the data sets are defined:
In [20]:
AID = 602325
#AID=2401
DATA_DIR = '/home/liconj/proj/thesis/EDeN/examples/model_comparison/data'
active_fname=DATA_DIR + '/AID%s_active.sdf'%AID
inactive_fname=DATA_DIR + '/AID%s_inactive.sdf'%AID
In [21]:
########
# Create iterables from files
########
iterable_pos = make_iterable('AID720577_active.sdf', 'sdf')
iterable_pos, iterable_pos_ = tee(iterable_pos)
iterable_pos, iterable_pos_ = tee(iterable_pos)
# Split train/test
iterable_pos_train, iterable_pos_test = random_bipartition_iter(iterable_pos, relative_size=0.8)
In [22]:
graphs = obabel.obabel_to_eden3d(iterable_pos_train)
In [23]:
graph = graphs.next()
In [24]:
graph
Out[24]:
In [17]:
for x in test:
mol = pybel.readstring("sdf", x)
print mol
In [27]:
print mol
In [29]:
mols = obabel.generate_conformers(mol.write("sdf"), 0)
In [30]:
mols
Out[30]:
In [68]: