In [103]:
import requests
import json
from tabulate import tabulate
Our list of targets
In [104]:
targets = ['ENSG00000069696', 'ENSG00000144285']
targets_string = ', '.join('"{0}"'.format(t) for t in targets)
Make the API call with our list of targets to find the associations. Set facets to true.
In [105]:
url = 'https://www.targetvalidation.org/api/latest/public/association/filter'
headers = {"Accept": "application/json"}
# There may be an easier way of building these parameters...
data = "{\"target\":[" + targets_string + "], \"facets\":true}"
response = requests.post(url, headers=headers, data=data)
output = response.json()
Print out all the json returned just for reference
In [106]:
#print json.dumps(output, indent=2)
The therapeutic area facets look interesting - lets iterate through these and display
In [107]:
therapeuticareas = []
for bucket in output['facets']['therapeutic_area']['buckets']:
therapeuticareas.append({
'target_count' : bucket['unique_target_count']['value'],
'disease_count' : bucket['unique_disease_count']['value'],
'therapeutic_area' : bucket['label'],
'key' : bucket['key']
})
Sort by target count and then disease count
In [108]:
therapeuticareas = sorted(therapeuticareas, key=lambda k: (k['target_count'],k['disease_count']), reverse=True)
Using the python tabulate library to render a pretty table of our extracted therapeutic areas.
Note: You may need to run pip install tabulate
in your python environment
In [109]:
print tabulate(therapeuticareas, headers="keys", tablefmt="grid")
Lets just consider the first 5 top therapeutic areas
In [110]:
therapeuticareas = therapeuticareas[:5]
print tabulate(therapeuticareas, headers="keys", tablefmt="grid")
Now for each of those identify the top 5 diseases. Unfortunately we don't get the disease names in the facets, just the codes. Is this is the right approach then an API change???
In [111]:
for therapeuticarea in therapeuticareas:
print "Therapeutic area: " + therapeuticarea['therapeutic_area']
data = "{\"target\":[" + targets_string + "], \"facets\":true, \"therapeutic_area\":[\"" + therapeuticarea['key'] + "\"]}"
response = requests.post(url, headers=headers, data=data)
output = response.json()
diseases = []
for bucket in output['facets']['disease']['buckets']:
diseases.append({
'target_count' : bucket['unique_target_count']['value'],
'doc_count' : bucket['doc_count'],
'key' : bucket['key']
})
# Sort and take top 5
diseases = sorted(diseases, key=lambda k: (k['target_count'],k['doc_count']), reverse=True)
diseases = diseases[:5]
print tabulate(diseases, headers="keys", tablefmt="grid")
print ""
In [ ]: