In [1]:
from __future__ import print_function
import csv
import sys
import xml.etree.ElementTree as ET
import pandas as pd
import requests
if sys.version_info.major == 2:
from StringIO import StringIO
else:
from io import StringIO
In [2]:
def get_psiquic(service, query, full_url=False, **kwargs):
kwargs['format'] = kwargs.get('format', 'tab27')
if full_url:
req = requests.get('%s%s' % (service, query), params=kwargs)
else:
server = 'http://www.ebi.ac.uk/Tools/webservices/psicquic'
req = requests.get('%s/%s/%s' % (server, service, query), params=kwargs)
if not req.ok:
req.raise_for_status()
return req.content
In [3]:
def get_databases(db_xml):
for service in db_xml:
for elem in service:
ns_clean_tag = elem.tag[elem.tag.find('}') + 1:]
if ns_clean_tag == 'name':
name = elem.text
elif ns_clean_tag == 'active':
active = False if elem.text == 'false' else True
elif ns_clean_tag == 'restUrl':
rest_url = elem.text
elif ns_clean_tag == 'restExample':
example = elem.text
elif ns_clean_tag == 'organizationUrl':
org_url = elem.text
else:
pass # there are a few more
yield {'name': name, 'active': active, 'org_url': org_url,
'example': example, 'rest_url': rest_url}
dbs_xml = get_psiquic('registry', 'registry', action='STATUS', format='xml')
dbs_xml_parsed = ET.fromstring(dbs_xml)
dbs = pd.DataFrame.from_records(get_databases(dbs_xml_parsed))
pd.options.display.max_colwidth = 100
active_dbs = dbs[dbs.active==True]
active_dbs.drop(['active', 'example', 'rest_url'], 1)
Out[3]:
In [4]:
req = get_psiquic('intact/webservices/current/search/query', 'tp53', format='count')
print(req)
for index, db in active_dbs.iterrows():
req = get_psiquic(db['rest_url'], 'query/tp53', full_url=True, format='count')
count = int(req)
print('DB: %s, count: %d' % ( db['name'], count))
In [5]:
req = get_psiquic('intact/webservices/current/search/query', 'tp53',
firstResult=0, maxResults=1000)
answer = csv.reader(StringIO(req), delimiter='\t')
db_types = set()
for record in answer:
db_types.add(record[0].split(':')[0])
db_types.add(record[1].split(':')[0])
print(db_types)
In [ ]: