In [ ]:
from ontobio.ontol_factory import OntologyFactory
# Create ontology object, for GO
# Transparently uses remote SPARQL service.
# (May take a few seconds to run first time, Jupyter will show '*'. BE PATIENT, do
# not re-execute cell)
ofactory = OntologyFactory()
ont = ofactory.create('go')
In [ ]:
from ontobio.assoc_factory import AssociationSetFactory
MOUSE = 'NCBITaxon:10090'
# Create association set
# Transparently uses remote Monarch service.
# (May take a few seconds to run first time, Jupyter will show '*'. BE PATIENT, do
# not re-execute cell)
afactory = AssociationSetFactory()
aset = afactory.create(ontology=ont,
subject_category='gene',
object_category='function',
taxon=MOUSE)
In [ ]:
aset.subjects[0:5]
In [ ]:
aset.jaccard_similarity('MGI:1929214', 'MGI:1917826')
In [ ]:
HUMAN = 'NCBITaxon:9606'
# Create association set
# Transparently uses remote Monarch service.
# (May take a few seconds to run first time, Jupyter will show '*'. BE PATIENT, do
# not re-execute cell)
afactory = AssociationSetFactory()
aset_human = afactory.create(ontology=ont,
subject_category='gene',
object_category='function',
taxon=HUMAN)
In [10]:
ZFISH = 'NCBITaxon:7955'
# Create association set
# Transparently uses remote Monarch service.
# (May take a few seconds to run first time, Jupyter will show '*'. BE PATIENT, do
# not re-execute cell)
afactory = AssociationSetFactory()
aset_zfish = afactory.create(ontology=ont,
subject_category='gene',
object_category='function',
taxon=ZFISH)
ERROR:pysolr:Connection to server 'http://golr.berkeleybop.org/select/?q=%2A%3A%2A&fq=document_category%3A%22annotation%22&fq=taxon_closure%3A%22NCBITaxon%3A7955%22&facet=off&facet.limit=0&facet.mincount=1&fl=bioentity%2Cbioentity_label%2Cqualifier%2Cannotation_class&rows=100000&wt=json' timed out: HTTPConnectionPool(host='golr.berkeleybop.org', port=80): Read timed out. (read timeout=4)
Traceback (most recent call last):
File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py", line 387, in _make_request
six.raise_from(e, None)
File "<string>", line 2, in raise_from
File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py", line 383, in _make_request
httplib_response = conn.getresponse()
File "/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1331, in getresponse
response.begin()
File "/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 297, in begin
version, status, reason = self._read_status()
File "/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 258, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socket.py", line 586, in readinto
return self._sock.recv_into(b)
socket.timeout: timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/requests/adapters.py", line 440, in send
timeout=timeout
File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py", line 639, in urlopen
_stacktrace=sys.exc_info()[2])
File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/util/retry.py", line 357, in increment
raise six.reraise(type(error), error, _stacktrace)
File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/packages/six.py", line 686, in reraise
raise value
File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py", line 601, in urlopen
chunked=chunked)
File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py", line 389, in _make_request
self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py", line 309, in _raise_timeout
raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
urllib3.exceptions.ReadTimeoutError: HTTPConnectionPool(host='golr.berkeleybop.org', port=80): Read timed out. (read timeout=4)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/pysolr.py", line 366, in _send_request
timeout=self.timeout)
File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/requests/sessions.py", line 515, in get
return self.request('GET', url, **kwargs)
File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/requests/sessions.py", line 502, in request
resp = self.send(prep, **send_kwargs)
File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/requests/sessions.py", line 612, in send
r = adapter.send(request, **kwargs)
File "/Users/cjm/repos/ontobio/venv/lib/python3.6/site-packages/requests/adapters.py", line 516, in send
raise ReadTimeout(e, request=request)
requests.exceptions.ReadTimeout: HTTPConnectionPool(host='golr.berkeleybop.org', port=80): Read timed out. (read timeout=4)
---------------------------------------------------------------------------
timeout Traceback (most recent call last)
~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
386 # otherwise it looks like a programming error was the cause.
--> 387 six.raise_from(e, None)
388 except (SocketTimeout, BaseSSLError, SocketError) as e:
~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)
~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
382 try:
--> 383 httplib_response = conn.getresponse()
384 except Exception as e:
/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py in getresponse(self)
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py in begin(self)
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py in _read_status(self)
257 def _read_status(self):
--> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
259 if len(line) > _MAXLINE:
/usr/local/Cellar/python3/3.6.2/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socket.py in readinto(self, b)
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
timeout: timed out
During handling of the above exception, another exception occurred:
ReadTimeoutError Traceback (most recent call last)
~/repos/ontobio/venv/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
439 retries=self.max_retries,
--> 440 timeout=timeout
441 )
~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
638 retries = retries.increment(method, url, error=e, _pool=self,
--> 639 _stacktrace=sys.exc_info()[2])
640 retries.sleep()
~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
356 if read is False or not self._is_method_retryable(method):
--> 357 raise six.reraise(type(error), error, _stacktrace)
358 elif read is not None:
~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/packages/six.py in reraise(tp, value, tb)
685 raise value.with_traceback(tb)
--> 686 raise value
687
~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
600 body=body, headers=headers,
--> 601 chunked=chunked)
602
~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
388 except (SocketTimeout, BaseSSLError, SocketError) as e:
--> 389 self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
390 raise
~/repos/ontobio/venv/lib/python3.6/site-packages/urllib3/connectionpool.py in _raise_timeout(self, err, url, timeout_value)
308 if isinstance(err, SocketTimeout):
--> 309 raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
310
ReadTimeoutError: HTTPConnectionPool(host='golr.berkeleybop.org', port=80): Read timed out. (read timeout=4)
During handling of the above exception, another exception occurred:
ReadTimeout Traceback (most recent call last)
~/repos/ontobio/venv/lib/python3.6/site-packages/pysolr.py in _send_request(self, method, path, body, headers, files)
365 resp = requests_method(url, data=bytes_body, headers=headers, files=files,
--> 366 timeout=self.timeout)
367 except requests.exceptions.Timeout as err:
~/repos/ontobio/venv/lib/python3.6/site-packages/requests/sessions.py in get(self, url, **kwargs)
514 kwargs.setdefault('allow_redirects', True)
--> 515 return self.request('GET', url, **kwargs)
516
~/repos/ontobio/venv/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
501 send_kwargs.update(settings)
--> 502 resp = self.send(prep, **send_kwargs)
503
~/repos/ontobio/venv/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs)
611 # Send the request
--> 612 r = adapter.send(request, **kwargs)
613
~/repos/ontobio/venv/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
515 elif isinstance(e, ReadTimeoutError):
--> 516 raise ReadTimeout(e, request=request)
517 else:
ReadTimeout: HTTPConnectionPool(host='golr.berkeleybop.org', port=80): Read timed out. (read timeout=4)
During handling of the above exception, another exception occurred:
SolrError Traceback (most recent call last)
<ipython-input-10-79ea128c10b8> in <module>()
9 subject_category='gene',
10 object_category='function',
---> 11 taxon=ZFISH)
~/repos/ontobio/ontobio/assoc_factory.py in create(self, ontology, subject_category, object_category, evidence, taxon, relation, file, fmt)
62 object_category=object_category,
63 evidence=evidence,
---> 64 taxon=taxon)
65
66 logging.info("Creating map for {} subjects".format(len(assocs)))
~/repos/ontobio/venv/lib/python3.6/site-packages/cachier/core.py in func_wrapper(*args, **kwds)
174 return core.wait_on_entry_calc(key)
175 _print('No entry found. No current calc. Calling like a boss.')
--> 176 return _calc_entry(core, key, func, args, kwds)
177
178 def clear_cache():
~/repos/ontobio/venv/lib/python3.6/site-packages/cachier/core.py in _calc_entry(core, key, func, args, kwds)
74 core.mark_entry_being_calculated(key)
75 # _get_executor().submit(core.mark_entry_being_calculated, key)
---> 76 func_res = func(*args, **kwds)
77 core.set_entry(key, func_res)
78 # _get_executor().submit(core.set_entry, key, func_res)
~/repos/ontobio/ontobio/assoc_factory.py in bulk_fetch_cached(**args)
189 def bulk_fetch_cached(**args):
190 logging.info("Fetching assocs from store (will be cached)")
--> 191 return bulk_fetch(**args)
~/repos/ontobio/ontobio/golr/golr_associations.py in bulk_fetch(subject_category, object_category, taxon, rows, **kwargs)
144 rows=rows,
145 iterate=True,
--> 146 **kwargs)
147 logging.info("Rows retrieved: {}".format(len(assocs)))
148 if len(assocs) == 0:
~/repos/ontobio/ontobio/golr/golr_associations.py in search_associations_compact(**kwargs)
75 searchresult = search_associations(use_compact_associations=True,
76 facet_fields=[],
---> 77 **kwargs
78 )
79 return searchresult['compact_associations']
~/repos/ontobio/ontobio/golr/golr_associations.py in search_associations(**kwargs)
31 logging.info("CREATING_GOLR_QUERY {}".format(kwargs))
32 q = GolrAssociationQuery(**kwargs)
---> 33 return q.exec()
34
35 def get_objects_for_subject(subject=None,
~/repos/ontobio/ontobio/golr/golr_query.py in exec(self, **kwargs)
886 params = self.solr_params()
887 logging.info("PARAMS="+str(params))
--> 888 results = self.solr.search(**params)
889 n_docs = len(results.docs)
890 logging.info("Docs found: {}".format(results.hits))
~/repos/ontobio/venv/lib/python3.6/site-packages/pysolr.py in search(self, q, search_handler, **kwargs)
718 params = {'q': q}
719 params.update(kwargs)
--> 720 response = self._select(params, handler=search_handler)
721 decoded = self.decoder.decode(response)
722
~/repos/ontobio/venv/lib/python3.6/site-packages/pysolr.py in _select(self, params, handler)
416 # Typical case.
417 path = '%s/?%s' % (handler, params_encoded)
--> 418 return self._send_request('get', path)
419 else:
420 # Handles very long queries by submitting as a POST.
~/repos/ontobio/venv/lib/python3.6/site-packages/pysolr.py in _send_request(self, method, path, body, headers, files)
368 error_message = "Connection to server '%s' timed out: %s"
369 self.log.error(error_message, url, err, exc_info=True)
--> 370 raise SolrError(error_message % (url, err))
371 except requests.exceptions.ConnectionError as err:
372 error_message = "Failed to connect to server at '%s', are you sure that URL is correct? Checking it in a browser might help: %s"
SolrError: Connection to server 'http://golr.berkeleybop.org/select/?q=%2A%3A%2A&fq=document_category%3A%22annotation%22&fq=taxon_closure%3A%22NCBITaxon%3A7955%22&facet=off&facet.limit=0&facet.mincount=1&fl=bioentity%2Cbioentity_label%2Cqualifier%2Cannotation_class&rows=100000&wt=json' timed out: HTTPConnectionPool(host='golr.berkeleybop.org', port=80): Read timed out. (read timeout=4)
In [17]:
# TODO - do not re-read
aset = afa.create_from_gaf(url, ontology=ont)
In [ ]:
In [20]:
from collections import defaultdict
rmap = defaultdict(set)
for a in assocs:
ev = a['evidence']['type']
gene = a['subject']['id']
p = a['object']['id']
up = process_regulates(p)
for rp in up:
rmap[(gene,rp)].add('R')
rmap[(gene,p)].add('I')
len(rmap)
Out[20]:
334682
In [22]:
mixed = [(k,v) for (k,v) in rmap.items() if len(v)>1]
len(mixed)
Out[22]:
4921
In [25]:
mixed[0:20]
Out[25]:
[(('MGI:MGI:102890', 'GO:0007049'), {'I', 'R'}),
(('MGI:MGI:1915332', 'GO:0007254'), {'I', 'R'}),
(('MGI:MGI:1338799', 'GO:0006351'), {'I', 'R'}),
(('MGI:MGI:2135607', 'GO:0006476'), {'I', 'R'}),
(('MGI:MGI:96960', 'GO:0006914'), {'I', 'R'}),
(('MGI:MGI:2183426', 'GO:0060070'), {'I', 'R'}),
(('MGI:MGI:1860488', 'GO:0007596'), {'I', 'R'}),
(('MGI:MGI:1346348', 'GO:0006366'), {'I', 'R'}),
(('MGI:MGI:107179', 'GO:0003700'), {'I', 'R'}),
(('MGI:MGI:1927468', 'GO:0045087'), {'I', 'R'}),
(('MGI:MGI:2443584', 'GO:0006351'), {'I', 'R'}),
(('MGI:MGI:109452', 'GO:0051216'), {'I', 'R'}),
(('MGI:MGI:1913464', 'GO:0007049'), {'I', 'R'}),
(('MGI:MGI:1913744', 'GO:0006915'), {'I', 'R'}),
(('MGI:MGI:2684845', 'GO:0007623'), {'I', 'R'}),
(('MGI:MGI:1202890', 'GO:0001525'), {'I', 'R'}),
(('MGI:MGI:97844', 'GO:0006887'), {'I', 'R'}),
(('MGI:MGI:1891227', 'GO:0003677'), {'I', 'R'}),
(('MGI:MGI:99474', 'GO:0016567'), {'I', 'R'}),
(('MGI:MGI:108409', 'GO:0007186'), {'I', 'R'})]
In [24]:
ont.label('GO:0007049')
Out[24]:
'cell cycle'
In [ ]:
Content source: biolink/ontobio
Similar notebooks: