In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
from eden.converter.molecule import obabel
import networkx as nx
import pybel
import requests
import os.path
from itertools import tee
from numpy.random import randint
from numpy.random import uniform
from eden.graph import Vectorizer
from sklearn.linear_model import SGDClassifier
import datetime, time
from eden.util import random_bipartition_iter
from eden.model import ActiveLearningBinaryClassificationModel

from eden.util import configure_logging
import logging
configure_logging(logging.getLogger(),verbosity=2)

In [3]:
def make_iterable(filename, file_format):
    if file_format == 'sdf':
        with open(filename) as f:
            s = ''
            for line in f:
                if line.strip() != '$$$$':
                    s = s + line
                else:
                    return_value = s + line
                    s = ''
                    yield return_value
    elif file_format == 'smi':
        with open(filename) as f:
            for line in f:
                yield line

This is where the data sets are defined:


In [20]:
AID = 602325
#AID=2401
DATA_DIR = '/home/liconj/proj/thesis/EDeN/examples/model_comparison/data'
active_fname=DATA_DIR + '/AID%s_active.sdf'%AID
inactive_fname=DATA_DIR + '/AID%s_inactive.sdf'%AID

In [21]:
########
# Create iterables from files
########

iterable_pos = make_iterable('AID720577_active.sdf', 'sdf')
iterable_pos, iterable_pos_ = tee(iterable_pos)

iterable_pos, iterable_pos_ = tee(iterable_pos)

# Split train/test
iterable_pos_train, iterable_pos_test = random_bipartition_iter(iterable_pos, relative_size=0.8)

In [22]:
graphs = obabel.obabel_to_eden3d(iterable_pos_train)

In [23]:
graph = graphs.next()

In [24]:
graph


Out[24]:
<networkx.classes.graph.Graph at 0x47bf050>

In [17]:
for x in test:
    mol = pybel.readstring("sdf", x)
    print mol


[Br-].[n+]1(CCCCCCCCCCCCCCCC)ccccc1	8816

[Br-].[n+]1(c(c2c(c3c1cc(N)cc3)ccc(N)c2)c1ccccc1)CC	14710

[Cl-].O.[n+]1(CCCCCCCCCCCCCCCC)ccccc1	22324

O(c1c2c(nccc2)c(NCCCCCCN)cc1OC)C	224618

ClC1=C(N2CCN(CC2)C)C(=O)c2c(C1=O)cccc2	237102

ClC1=C(N)C(=O)c2c(ccc(S(=O)(=O)N)c2)C1=O	240739

S(C1=C(C(=O)c2c(C1=O)cccc2)C)Cc1ccccc1	248894

BrC1=C(N)C(=O)c2c(nccc2)C1=O	266923

Brc1cc2[nH]c(=O)c3c(c2cc1N)cccc3	271216

O=C1N(Cc2ccccc2)C(=O)c2c(C1)cccc2	272514

O=C1c2c(nccc2)C(=CC1=O)N	279596

O(C1=C(N)C(=O)c2nc(ccc2C1=O)C)C	280616

O(c1cc2c(cc1OC)ccnc2C(=O)c1c([N+](=O)[O-])cc(OC)c(OC)c1)C	286873

O1C(OCC1)c1ncc(c2c1cccc2N)C	323009

O=C1NC(=O)c2c(C1)cccc2	349435

Clc1c(N2CCN(CC2)CCO)c(O)c2c(c1O)C(=O)C=CC2=O.Cl	377971

Brc1c(N2CCN(CC2)CCOCCO)c(O)c2c(c1O)C(=O)C=CC2=O.Cl	378196

OC(C(NC1=CC(=O)c2c(nccc2)C1=O)C)c1ccccc1	386231

S(=O)(=O)([O-])C(F)(F)F.Oc1cc2c(n3[n+](c(cc3c2=Cc2ccc(N(C)C)cc2)c2ccccc2)C)cc1	392758

S(=O)(=O)([O-])C(F)(F)F.[O-][N+](=O)c1ccc(C=c2c3n([n+](c(c3)c3ccccc3)C)c3c2cccc3)cc1	392760

S(=O)(=O)([O-])C(F)(F)F.n12[n+](c(cc1c(=Cc1ccccc1)c1c2cccc1)c1ccccc1)C	392762

S(=O)(=O)([O-])C(F)(F)F.O(c1cc(C=c2c3n([n+](c(c3)c3ccccc3)C)c3c2cccc3)cc(OC)c1OC)C	395171

S(=O)(=O)([O-])C(F)(F)F.O(C(=O)c1[n+](n2c(c(=Cc3ccc(N(C)C)cc3)c3c2cccc3)c1)C)CC	395179

O(C(=O)c1c2c(n(c1C)Cc1ccccc1)C(=O)C(=O)c1c2cccc1)CC	406008

O=C1N(CCc2ccccc2)C(=O)c2c(C1)cccc2	713096

s1nc2cc(NC(=S)Nc3ccc(OC)cc3)ccc2n1	753169

o1nc2c3c(C(=O)c4c(c13)cccc4)c(OC)cc2	774975

BrC1=C(N)C(=O)c2c(C1=O)cccc2	934233

O1CCN(C2=C(n3nnc4c3cccc4)C(=O)c3c(C2=O)cccc3)CC1	1405137

O=C1N(Cc2ccc(OC)cc2)C(=O)c2c(C1)cccc2	1475552

O=C(Nc1n(CCCC)c2nc3c(nc2c1C#N)cccc3)c1ccccc1	1910302

Clc1ccc(S(=O)(=O)Nc2n(c3nc4c(nc3c2)cccc4)Cc2ccccc2)cc1	2044030

S(=O)(=O)(Nc1c2c(c(O)c(Sc3[nH]ncn3)c1)cccc2)c1ccccc1	2133777

O=C1N(Cc2ccccc2)C(=O)/C(=C/OCC)/c2c1cccc2	2339359

Clc1ccc(NC(=S)Nc2c3c(ccc2)cncc3)cc1	2407446

S=C(Nc1c2c(ccc1)cncc2)Nc1ccc(cc1)C(=O)C	2545001

O=C1c2c(nnc(c2)c2c(cccc2)C)c2c1cccc2	2769183

[Br-].n1(CCCCCCCCCCCCCCCC)c[n+](cc1)C	2846928

Brc1c(C(=O)c2nccc3c2cc(OC)c(OC)c3)cc(OC)c(OC)c1	2974206

S1(=O)(=O)CC(N(C2=C(NC(=O)c3ccccc3)C(=O)c3c(C2=O)cccc3)C)CC1	3342467

O(C1=CC(=O)c2c3c(ncnc3N)ccc2C1=O)C	3711067

S(c1c(O)c2c(c(O)c1)cccc2)/C(=C/C(=O)OCC)/C	5358773

O(C(=O)c1[nH]c(c(N)/c(=C/2\C(=O)C(=C(OC)C=C2)OC)/c1C)c1nc2c(cc1)C(=O)C(=C(N)C2=O)OC)C	5359866

O=c1n(c(=O)nc2n(nc(nc12)/C=C/c1ccccc1)CC)C	5756371

S(=O)(=O)(/N=C\1/c2c(C(=O)C(=C1)Nc1cc(ccc1)C(=O)O)cccc2)c1ccc(cc1)C	5824722

S(=O)(=O)(/N=C\1/c2c(C(=O)C(=C1)Nc1cc(ccc1)C(=O)O)cccc2)c1c(cc(cc1)C)C	5824726

S(=O)(=O)(/N=C/1\c2c(C(=O)C(=C1)Nc1ccc(cc1)C(=O)O)cccc2)c1ccccc1	5998602

S(=O)(=O)(/N=C\1/c2c(C(=O)C(=C1)Nc1c(cccc1)C)cccc2)c1ccc(cc1)C(=O)O	6023693

S(=O)(=O)(/N=C\1/c2c(C(=O)C(=C1)Nc1c(F)cccc1)cccc2)c1ccc(cc1)C(=O)O	6023694

S(=O)(=O)(/N=C/1\c2c(C(=O)C(=C1)Nc1ccc(cc1)C)cccc2)c1sccc1	6032979

Cl.O(c1cc(NCCCC(N)C)c2ncccc2c1)C	6603104

O(/C=C/1\c2c(C(=O)N(C1=O)C)cc(OC)cc2)CC	8143087

O=C(N/N=C/1\C/C(=N\NC(=O)c2ccncc2)/c2c1cccc2)c1ccncc1	9701594

S(=O)(=O)(/N=C\1/c2c(C(=O)C(=C1)Nc1c(cccc1)C)cccc2)c1sccc1	11834470

Cl.[Cl-].[n+]1(c2c(cc3c1cc(N)cc3)ccc(N)c2)C	15558347

[I-].[n+]1(CCCCCCCCCCCCCCCC)c2c(ccc1)cccc2	15945189

[Br-].O.[n+]1(CCCCCCCCCCCCCCCC)ccccc1	16211806

O([C@@H]1O[C@@H]([C@@H](O)[C@H](O)[C@H]1O)CO)c1c2c([C@@H]([C@@H]3c4c(C(=O)c5c3cc(cc5O)C(=O)O)c(OC3O[C@@H]([C@@H](O)[C@H](O)[C@H]3O)CO)ccc4)c3c(C2=O)c(O)cc(c3)C(=O)O)ccc1	16218404

Brc1cc2c(c(=O)[nH]c(Nc3ccccc3)c2)cc1	16727376

Brc1cc2c(c(=O)[nH]c(Nc3c(OC)cccc3)c2)cc1	16727377

S(=O)(=O)(/N=C\1/c2c(C(=O)C(=C1)Nc1ccc(O)cc1)cccc2)c1ccc(OC)cc1	22829045

Clc1ccc(OCc2n(c3c([n+]2C)cccc3)CC(=O)OCCCCCCCCCC)cc1.[Cl-]	24747653

ClC1=C(N(c2ccc(cc2)C(=O)O)C(=O)C)C(=O)c2c(C1=O)cccc2	24761366

Clc1cc2nc3c(n(C(CCCN(CC)CC)C)c2cc1)nc(=O)[nH]c3=O.S(=O)(=O)(O)O	44602155


In [27]:
print mol


Clc1cc2nc3c(n(C(CCCN(CC)CC)C)c2cc1)nc(=O)[nH]c3=O.S(=O)(=O)(O)O	44602155


In [29]:
mols = obabel.generate_conformers(mol.write("sdf"), 0)

In [30]:
mols


Out[30]:
[<pybel.Molecule at 0x47bf210>]

In [68]:



---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
<ipython-input-68-d18cfdbd7ec9> in <module>()
----> 1 graph = graphs.next()

StopIteration: