In [1]:
import requests
import json
# requests_toolbelt module is used to handle the multipart responses.
# Need to `pip install requests-toolbelt` from a terminal to install. This might need doing each time the Notebook pod starts
from requests_toolbelt.multipart import decoder
In [2]:
# Define some URLs and params
base_url = 'https://jobexecutor.prod.openrisknet.org/jobexecutor/rest'
services_url = base_url + '/v1/services'
jobexecutor_url = base_url + '/v1/jobs'
keycloak_url = 'https://sso.prod.openrisknet.org/auth/realms/openrisknet/protocol/openid-connect/token'
# set to False if self signed certificates are being used
tls_verify=True
In [3]:
# Test the PING service. Should give a 200 response and return 'OK'.
# If not then nothing else is going to work.
url = base_url + '/ping'
print("Requesting GET " + url)
resp = requests.get(url, verify=tls_verify)
print('Response Code: ' + str(resp.status_code))
print(resp.text)
In [5]:
# Need to specify your Keycloak SSO username and password so that we can get a token
import getpass
username = input('Username')
password = getpass.getpass('Password')
In [87]:
# Get token from Keycloak. This will have a finite lifetime.
# If your requests are getting a 401 error your token has probably expired.
data = {'grant_type': 'password', 'client_id': 'squonk-jobexecutor', 'username': username, 'password': password}
kresp = requests.post(keycloak_url, data = data)
j = kresp.json()
token = j['access_token']
token
Out[87]:
In [88]:
# Get a list of all the Squonk services that can be executed.
#
print("Requesting GET " + services_url)
jobs_resp = requests.get(services_url, headers={'Authorization': 'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
print(str(len(json)) + " services found")
print(json)
In [89]:
# find the service ID from the list in the list services cell
#service_id = 'core.dataset.filter.slice.v1'
#service_id = 'pipelines.rdkit.conformer.basic'
service_id = 'pipelines.rdkit.o3da.basic'
url = services_url + '/' + service_id
print("Requesting GET " + url)
jobs_resp = requests.get(url, headers={'Authorization': 'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
print(json)
In [90]:
# Result of the request is an array of JobStatus objects.
# The job ID and status are listed
print("Requesting GET " + jobexecutor_url)
jobs_resp = requests.get(jobexecutor_url, headers={'Authorization': 'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
print(str(len(json)) + " jobs found")
for status in json:
print(status['jobId'] + ' ' + status['status'])
In [91]:
# The 'Datast slice' takes a slice through a dataset specified by the number of records to skip and then the number to include.
# This is one of Squonk's 'internal' services.
# The job ID is stored in the job_id variable.
url = jobexecutor_url + '/core.dataset.filter.slice.v1'
data = {
'options': '{"skip":2,"count":3}',
'input_data': ('input_data', open('nci10.data', 'rb'), 'application/x-squonk-molecule-object+json'),
'input_metadata': ('input_metadata', open('nci10.metadata', 'rb'), 'application/x-squonk-dataset-metadata+json')
}
print("Requesting POST " + jobexecutor_url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization': 'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)
In [92]:
# The job is defined by the job_id variable and is probably the last job executed
url = jobexecutor_url + '/' + job_id + '/status'
print("Requesting GET " + url )
jobs_resp = requests.get(url, headers={'Authorization': 'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
json
Out[92]:
In [93]:
# The job is defined by the job_id variable and is probably the last job executed.
# The status of the job needs to be 'RESULTS_READY'
# The response is a multipart response, typically containing the job status, the results metadata and the results data.
# This method can be called for a job any number of times until the job is deleted.
url = jobexecutor_url + '/' + job_id + '/results'
print("Requesting GET " + url )
jobs_resp = requests.get(url, headers={'Authorization': 'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
multipart_data = decoder.MultipartDecoder.from_response(jobs_resp)
for part in multipart_data.parts:
print(part.content)
print(part.headers)
In [94]:
# Once you have fetched the results you MUST delete the job.
# The job is defined by the job_id variable and is probably the last job executed.
url = jobexecutor_url + '/' + job_id
print("Requesting DELETE " + url)
jobs_resp = requests.delete(url, headers={'Authorization': 'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
if 'status' in json and json['status'] == 'COMPLETED':
print('Job deleted')
else:
print('Problem deleting job')
In [95]:
# Delete all jobs
# First get the current jobs
jobs_resp = requests.get(jobexecutor_url, headers={'Authorization': 'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
print('Found ' + str(len(json)) + ' jobs')
# Now go through them and delete
# If successful the status of the job will then be COMPLETED.
for job in json:
id = job['jobId']
url = jobexecutor_url + '/' + id
print("Deleting " + url)
jobs_resp = requests.delete(url, headers={'Authorization': 'bearer ' + token}, verify=tls_verify)
j = jobs_resp.json()
print("Status: " + j['status'])
In addition to the simple 'dataset slice' service many more meaningful ones are available.
Here are some examples illustrating the different categories of Squonk services:
Execute one of these instead of the dataset slice one above.
In [69]:
# The 'Lipinski filter' takes calculates the classical rule of five properties and allows to filter based on these.
# We have implementations for ChemAxon and RDKit. Here we use the RDKit one.
# The default filter is the classical drug-likeness one defined by Lipinski but you can specify your owwn criteria instaead.
# This is one of Squonk's 'HTTP' services.
# The job ID is stored in the job_id variable.
url = jobexecutor_url + '/rdkit.calculators.lipinski'
data = {
'options': '{"filterMode":"INCLUDE_PASS"}',
'input_data': ('input_data', open('nci10.data', 'rb'), 'application/x-squonk-molecule-object+json'),
'input_metadata': ('input_metadata', open('nci10.metadata', 'rb'), 'application/x-squonk-dataset-metadata+json')
}
print("Requesting POST " + url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization': 'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)
In [23]:
# passing data as SDF
url = jobexecutor_url + '/rdkit.calculators.lipinski'
data = {
'options': '{"filterMode":"INCLUDE_PASS"}',
'input': ('input', open('Kinase_inhibs.sdf', 'rb'), 'chemical/x-mdl-sdfile')
}
print("Requesting POST " + url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization': 'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)
In [26]:
# sucos scoring passing 2 inputs as SDF
url = jobexecutor_url + '/pipelines.rdkit.sucos.basic'
data = {
'options': '{}',
'input': ('input', open('mols.sdf', 'rb'), 'chemical/x-mdl-sdfile'),
'target': ('target', open('benzene.sdf', 'rb'), 'chemical/x-mdl-sdfile')
}
print("Requesting POST " + url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization': 'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)
In [74]:
# open3dAlign scoring passing 2 inputs as SDF
# passing the queryMol as pyrimethamine.mol does not work - it needs tob e converted to SDF
url = jobexecutor_url + '/pipelines.rdkit.o3da.basic'
data = {
'options': '{"arg.crippen":"false"}',
'input': ('input', open('dhfr_3d.sdf', 'rb'), 'chemical/x-mdl-sdfile'),
'queryMol': ('queryMol', open('pyrimethamine.sdf', 'rb'), 'chemical/x-mdl-sdfile')
}
print("Requesting POST " + url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization': 'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)
In [64]:
# open3dAlign scoring passing inputs as dataset and query as SDF
url = jobexecutor_url + '/pipelines.rdkit.o3da.basic'
data = {
'options': '{"arg.crippen":"false"}',
'input_data': ('input_data', open('dhfr_3d.data.gz', 'rb'), 'application/x-squonk-molecule-object+json'),
'input_metadata': ('input_metadata', open('dhfr_3d.metadata', 'rb'), 'application/x-squonk-dataset-metadata+json'),
'queryMol': ('queryMol', open('pyrimethamine.sdf', 'rb'), 'chemical/x-mdl-sdfile')
}
print("Requesting POST " + url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization': 'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)
In [19]:
# The 'Conformer generator' used RDKit ETKDG conformer generation tool to generate a number of conformers for the input structures.
# This is one of Squonk's 'Docker' services.
# The job ID is stored in the job_id variable.
service_id = 'pipelines.rdkit.conformer.basic'
data = {
'options': '{"arg.num":10,"arg.method":"RMSD"}',
'input_data': ('input_data', open('nci10.data', 'rb'), 'application/x-squonk-molecule-object+json'),
'input_metadata': ('input_metadata', open('nci10.metadata', 'rb'), 'application/x-squonk-dataset-metadata+json')
}
jobs_resp = requests.post(jobexecutor_url + '/' + service_id, files=data, headers = {'Authorization': 'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)
In [171]:
# Similarity screening using RDKit.
# This is one of Squonk's 'Nextflow' services.
# The job ID is stored in the job_id variable.
# NOTE: THIS IS NOT WORKING AS THE QUERY STRUCTURE IS NOT BEING PASSED CORRECTLY
service_id = 'pipelines.rdkit.screen.basic'
data = {
'options': '{"arg.query":{"source":"CC1=CC(=O)C=CC1=O","format":"smiles"},"arg.sim":{"minValue":0.5,"maxValue":1.0}}',
'input_data': ('input_data', open('nci10_data.json', 'rb'), 'application/x-squonk-molecule-object+json'),
'input_metadata': ('input_metadata', open('nci10_meta.json', 'rb'), 'application/x-squonk-dataset-metadata+json')
}
jobs_resp = requests.post(jobexecutor_url + '/' + service_id, files=data, headers = {'Authorization': 'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)
In [ ]: