In [ ]:
from ontobio.ontol_factory import OntologyFactory

# Create ontology object, for GO
# Transparently uses remote SPARQL service.
# (May take a few seconds to run first time, Jupyter will show '*'. BE PATIENT, do
# not re-execute cell)
ofactory = OntologyFactory()
ont = ofactory.create('go')

In [ ]:
from ontobio.assoc_factory import AssociationSetFactory

MOUSE = 'NCBITaxon:10090'

# Create association set
# Transparently uses remote Monarch service.
# (May take a few seconds to run first time, Jupyter will show '*'. BE PATIENT, do
# not re-execute cell)
afactory = AssociationSetFactory()
aset = afactory.create(ontology=ont,
                       subject_category='gene',
                       object_category='function',
                       taxon=MOUSE)

In [ ]:
aset.subjects[0:5]

In [ ]:
aset.jaccard_similarity('MGI:1929214', 'MGI:1917826')

In [ ]:
HUMAN = 'NCBITaxon:9606'

# Create association set
# Transparently uses remote Monarch service.
# (May take a few seconds to run first time, Jupyter will show '*'. BE PATIENT, do
# not re-execute cell)
afactory = AssociationSetFactory()
aset_human = afactory.create(ontology=ont,
                       subject_category='gene',
                       object_category='function',
                       taxon=HUMAN)

In [10]:
ZFISH = 'NCBITaxon:7955'

# Create association set
# Transparently uses remote Monarch service.
# (May take a few seconds to run first time, Jupyter will show '*'. BE PATIENT, do
# not re-execute cell)
afactory = AssociationSetFactory()
aset_zfish = afactory.create(ontology=ont,
                       subject_category='gene',
                       object_category='function',
                       taxon=ZFISH)


ERROR:pysolr:Connection to server 'http://golr.berkeleybop.org/select/?q=%2A%3A%2A&fq=document_category%3A%22annotation%22&fq=taxon_closure%3A%22NCBITaxon%3A7955%22&facet=off&facet.limit=0&facet.mincount=1&fl=bioentity%2Cbioentity_label%2Cqualifier%2Cannotation_class&rows=100000&wt=json' timed out: HTTPConnectionPool(host='golr.berkeleybop.org', port=80): Read timed out. (read timeout=4)
Traceback (most recent call last):
  File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
    six.raise_from(e, None)
  File "<string>", line 2, in raise_from
  File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
    httplib_response = conn.getresponse()
  File "/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1331, in getresponse
    response.begin()
  File "/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 297, in begin
    version, status, reason = self._read_status()
  File "/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 258, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socket.py", line 586, in readinto
    return self._sock.recv_into(b)
socket.timeout: timed out

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/requests/adapters.py", line 440, in send
    timeout=timeout
  File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py", line 639, in urlopen
    _stacktrace=sys.exc_info()[2])
  File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/util/retry.py", line 357, in increment
    raise six.reraise(type(error), error, _stacktrace)
  File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/packages/six.py", line 686, in reraise
    raise value
  File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py", line 601, in urlopen
    chunked=chunked)
  File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py", line 389, in _make_request
    self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
  File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py", line 309, in _raise_timeout
    raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
urllib3.exceptions.ReadTimeoutError: HTTPConnectionPool(host='golr.berkeleybop.org', port=80): Read timed out. (read timeout=4)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/pysolr.py", line 366, in _send_request
    timeout=self.timeout)
  File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/requests/sessions.py", line 515, in get
    return self.request('GET', url, **kwargs)
  File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/requests/sessions.py", line 502, in request
    resp = self.send(prep, **send_kwargs)
  File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/requests/sessions.py", line 612, in send
    r = adapter.send(request, **kwargs)
  File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/requests/adapters.py", line 516, in send
    raise ReadTimeout(e, request=request)
requests.exceptions.ReadTimeout: HTTPConnectionPool(host='golr.berkeleybop.org', port=80): Read timed out. (read timeout=4)
---------------------------------------------------------------------------
timeout                                   Traceback (most recent call last)
~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    386                     # otherwise it looks like a programming error was the cause.
--> 387                     six.raise_from(e, None)
    388         except (SocketTimeout, BaseSSLError, SocketError) as e:

~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)

~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    382                 try:
--> 383                     httplib_response = conn.getresponse()
    384                 except Exception as e:

/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py in getresponse(self)
   1330             try:
-> 1331                 response.begin()
   1332             except ConnectionError:

/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py in begin(self)
    296         while True:
--> 297             version, status, reason = self._read_status()
    298             if status != CONTINUE:

/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py in _read_status(self)
    257     def _read_status(self):
--> 258         line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
    259         if len(line) > _MAXLINE:

/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socket.py in readinto(self, b)
    585             try:
--> 586                 return self._sock.recv_into(b)
    587             except timeout:

timeout: timed out

During handling of the above exception, another exception occurred:

ReadTimeoutError                          Traceback (most recent call last)
~/repos/ontobio/venv/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    439                     retries=self.max_retries,
--> 440                     timeout=timeout
    441                 )

~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    638             retries = retries.increment(method, url, error=e, _pool=self,
--> 639                                         _stacktrace=sys.exc_info()[2])
    640             retries.sleep()

~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
    356             if read is False or not self._is_method_retryable(method):
--> 357                 raise six.reraise(type(error), error, _stacktrace)
    358             elif read is not None:

~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/packages/six.py in reraise(tp, value, tb)
    685             raise value.with_traceback(tb)
--> 686         raise value
    687 

~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    600                                                   body=body, headers=headers,
--> 601                                                   chunked=chunked)
    602 

~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    388         except (SocketTimeout, BaseSSLError, SocketError) as e:
--> 389             self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
    390             raise

~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py in _raise_timeout(self, err, url, timeout_value)
    308         if isinstance(err, SocketTimeout):
--> 309             raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
    310 

ReadTimeoutError: HTTPConnectionPool(host='golr.berkeleybop.org', port=80): Read timed out. (read timeout=4)

During handling of the above exception, another exception occurred:

ReadTimeout                               Traceback (most recent call last)
~/repos/ontobio/venv/lib/python3.6/site-packages/pysolr.py in _send_request(self, method, path, body, headers, files)
    365             resp = requests_method(url, data=bytes_body, headers=headers, files=files,
--> 366                                    timeout=self.timeout)
    367         except requests.exceptions.Timeout as err:

~/repos/ontobio/venv/lib/python3.6/site-packages/requests/sessions.py in get(self, url, **kwargs)
    514         kwargs.setdefault('allow_redirects', True)
--> 515         return self.request('GET', url, **kwargs)
    516 

~/repos/ontobio/venv/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    501         send_kwargs.update(settings)
--> 502         resp = self.send(prep, **send_kwargs)
    503 

~/repos/ontobio/venv/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs)
    611         # Send the request
--> 612         r = adapter.send(request, **kwargs)
    613 

~/repos/ontobio/venv/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    515             elif isinstance(e, ReadTimeoutError):
--> 516                 raise ReadTimeout(e, request=request)
    517             else:

ReadTimeout: HTTPConnectionPool(host='golr.berkeleybop.org', port=80): Read timed out. (read timeout=4)

During handling of the above exception, another exception occurred:

SolrError                                 Traceback (most recent call last)
<ipython-input-10-79ea128c10b8> in <module>()
      9                        subject_category='gene',
     10                        object_category='function',
---> 11                        taxon=ZFISH)

~/repos/ontobio/ontobio/assoc_factory.py in create(self, ontology, subject_category, object_category, evidence, taxon, relation, file, fmt)
     62                                    object_category=object_category,
     63                                    evidence=evidence,
---> 64                                    taxon=taxon)
     65 
     66         logging.info("Creating map for {} subjects".format(len(assocs)))

~/repos/ontobio/venv/lib/python3.6/site-packages/cachier/core.py in func_wrapper(*args, **kwds)
    174                     return core.wait_on_entry_calc(key)
    175             _print('No entry found. No current calc. Calling like a boss.')
--> 176             return _calc_entry(core, key, func, args, kwds)
    177 
    178         def clear_cache():

~/repos/ontobio/venv/lib/python3.6/site-packages/cachier/core.py in _calc_entry(core, key, func, args, kwds)
     74         core.mark_entry_being_calculated(key)
     75         # _get_executor().submit(core.mark_entry_being_calculated, key)
---> 76         func_res = func(*args, **kwds)
     77         core.set_entry(key, func_res)
     78         # _get_executor().submit(core.set_entry, key, func_res)

~/repos/ontobio/ontobio/assoc_factory.py in bulk_fetch_cached(**args)
    189 def bulk_fetch_cached(**args):
    190         logging.info("Fetching assocs from store (will be cached)")
--> 191         return bulk_fetch(**args)

~/repos/ontobio/ontobio/golr/golr_associations.py in bulk_fetch(subject_category, object_category, taxon, rows, **kwargs)
    144                                          rows=rows,
    145                                          iterate=True,
--> 146                                          **kwargs)
    147     logging.info("Rows retrieved: {}".format(len(assocs)))
    148     if len(assocs) == 0:

~/repos/ontobio/ontobio/golr/golr_associations.py in search_associations_compact(**kwargs)
     75     searchresult = search_associations(use_compact_associations=True,
     76                                        facet_fields=[],
---> 77                                        **kwargs
     78     )
     79     return searchresult['compact_associations']

~/repos/ontobio/ontobio/golr/golr_associations.py in search_associations(**kwargs)
     31     logging.info("CREATING_GOLR_QUERY {}".format(kwargs))
     32     q = GolrAssociationQuery(**kwargs)
---> 33     return q.exec()
     34 
     35 def get_objects_for_subject(subject=None,

~/repos/ontobio/ontobio/golr/golr_query.py in exec(self, **kwargs)
    886         params = self.solr_params()
    887         logging.info("PARAMS="+str(params))
--> 888         results = self.solr.search(**params)
    889         n_docs = len(results.docs)
    890         logging.info("Docs found: {}".format(results.hits))

~/repos/ontobio/venv/lib/python3.6/site-packages/pysolr.py in search(self, q, search_handler, **kwargs)
    718         params = {'q': q}
    719         params.update(kwargs)
--> 720         response = self._select(params, handler=search_handler)
    721         decoded = self.decoder.decode(response)
    722 

~/repos/ontobio/venv/lib/python3.6/site-packages/pysolr.py in _select(self, params, handler)
    416             # Typical case.
    417             path = '%s/?%s' % (handler, params_encoded)
--> 418             return self._send_request('get', path)
    419         else:
    420             # Handles very long queries by submitting as a POST.

~/repos/ontobio/venv/lib/python3.6/site-packages/pysolr.py in _send_request(self, method, path, body, headers, files)
    368             error_message = "Connection to server '%s' timed out: %s"
    369             self.log.error(error_message, url, err, exc_info=True)
--> 370             raise SolrError(error_message % (url, err))
    371         except requests.exceptions.ConnectionError as err:
    372             error_message = "Failed to connect to server at '%s', are you sure that URL is correct? Checking it in a browser might help: %s"

SolrError: Connection to server 'http://golr.berkeleybop.org/select/?q=%2A%3A%2A&fq=document_category%3A%22annotation%22&fq=taxon_closure%3A%22NCBITaxon%3A7955%22&facet=off&facet.limit=0&facet.mincount=1&fl=bioentity%2Cbioentity_label%2Cqualifier%2Cannotation_class&rows=100000&wt=json' timed out: HTTPConnectionPool(host='golr.berkeleybop.org', port=80): Read timed out. (read timeout=4)

In [17]:
# TODO - do not re-read
aset = afa.create_from_gaf(url, ontology=ont)

In [ ]:


In [20]:
from collections import defaultdict
rmap = defaultdict(set)
for a in assocs:
    ev = a['evidence']['type']
    gene = a['subject']['id']
    p = a['object']['id']
    up = process_regulates(p)
    for rp in up:
        rmap[(gene,rp)].add('R')
    rmap[(gene,p)].add('I')
    

len(rmap)


Out[20]:
334682

In [22]:
mixed = [(k,v) for (k,v) in rmap.items() if len(v)>1]
len(mixed)


Out[22]:
4921

In [25]:
mixed[0:20]


Out[25]:
[(('MGI:MGI:102890', 'GO:0007049'), {'I', 'R'}),
 (('MGI:MGI:1915332', 'GO:0007254'), {'I', 'R'}),
 (('MGI:MGI:1338799', 'GO:0006351'), {'I', 'R'}),
 (('MGI:MGI:2135607', 'GO:0006476'), {'I', 'R'}),
 (('MGI:MGI:96960', 'GO:0006914'), {'I', 'R'}),
 (('MGI:MGI:2183426', 'GO:0060070'), {'I', 'R'}),
 (('MGI:MGI:1860488', 'GO:0007596'), {'I', 'R'}),
 (('MGI:MGI:1346348', 'GO:0006366'), {'I', 'R'}),
 (('MGI:MGI:107179', 'GO:0003700'), {'I', 'R'}),
 (('MGI:MGI:1927468', 'GO:0045087'), {'I', 'R'}),
 (('MGI:MGI:2443584', 'GO:0006351'), {'I', 'R'}),
 (('MGI:MGI:109452', 'GO:0051216'), {'I', 'R'}),
 (('MGI:MGI:1913464', 'GO:0007049'), {'I', 'R'}),
 (('MGI:MGI:1913744', 'GO:0006915'), {'I', 'R'}),
 (('MGI:MGI:2684845', 'GO:0007623'), {'I', 'R'}),
 (('MGI:MGI:1202890', 'GO:0001525'), {'I', 'R'}),
 (('MGI:MGI:97844', 'GO:0006887'), {'I', 'R'}),
 (('MGI:MGI:1891227', 'GO:0003677'), {'I', 'R'}),
 (('MGI:MGI:99474', 'GO:0016567'), {'I', 'R'}),
 (('MGI:MGI:108409', 'GO:0007186'), {'I', 'R'})]

In [24]:
ont.label('GO:0007049')


Out[24]:
'cell cycle'

In [ ]: