Executing Squonk services

This notebook is an example of executing Squonk services using Python's requests module.

It assumes you are executing against the JobExector service running in an OpenShift environment.


In [1]:
import requests
import json

# requests_toolbelt module is used to handle the multipart responses.
# Need to `pip install requests-toolbelt` from a terminal to install. This might need doing each time the Notebook pod starts
from requests_toolbelt.multipart import decoder

In [2]:
# Define some URLs and params
base_url = 'https://jobexecutor.prod.openrisknet.org/jobexecutor/rest'
services_url = base_url + '/v1/services'
jobexecutor_url = base_url + '/v1/jobs'
keycloak_url = 'https://sso.prod.openrisknet.org/auth/realms/openrisknet/protocol/openid-connect/token'

# set to False if self signed certificates are being used
tls_verify=True

Check basic operation


In [3]:
# Test the PING service. Should give a 200 response and return 'OK'.
# If not then nothing else is going to work.

url = base_url + '/ping'

print("Requesting GET " + url)
resp = requests.get(url, verify=tls_verify)
print('Response Code: ' + str(resp.status_code))
print(resp.text)


Requesting GET https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/ping
Response Code: 200
OK

Authentication


In [5]:
# Need to specify your Keycloak SSO username and password so that we can get a token

import getpass
username = input('Username')
password = getpass.getpass('Password')



In [87]:
# Get token from Keycloak. This will have a finite lifetime.
# If your requests are getting a 401 error your token has probably expired.

data = {'grant_type': 'password', 'client_id': 'squonk-jobexecutor', 'username': username, 'password': password}
kresp = requests.post(keycloak_url, data = data)
j = kresp.json()
token = j['access_token']
token


Out[87]:
'eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJoX2p2Z3I3bWZ4VGJ3OHJLNW9Fb3dWWUVHUms2Z0hsLW9sSjdPUnQ3V2QwIn0.eyJqdGkiOiI5ZDFmYjQzYS02ZjMzLTQ2NzEtYjI4Zi04NGU3NDU1NjdlMWUiLCJleHAiOjE1NzIzNjYwNDYsIm5iZiI6MCwiaWF0IjoxNTcyMzU4ODQ2LCJpc3MiOiJodHRwczovL3Nzby5wcm9kLm9wZW5yaXNrbmV0Lm9yZy9hdXRoL3JlYWxtcy9vcGVucmlza25ldCIsImF1ZCI6ImFjY291bnQiLCJzdWIiOiI1Yzk2Y2IzMC05YmNiLTQ2NmUtOTg5NS02NmQwOGY5NmE3MzUiLCJ0eXAiOiJCZWFyZXIiLCJhenAiOiJzcXVvbmstam9iZXhlY3V0b3IiLCJhdXRoX3RpbWUiOjAsInNlc3Npb25fc3RhdGUiOiIyNDI5YmMxZS0zZmRlLTQ1MWEtYmRkNS03OWYyM2Y0OWFhNTYiLCJhY3IiOiIxIiwiYWxsb3dlZC1vcmlnaW5zIjpbImh0dHA6Ly9qb2JleGVjdXRvci5wcm9kLm9wZW5yaXNrbmV0Lm9yZyIsImh0dHBzOi8vam9iZXhlY3V0b3IucHJvZC5vcGVucmlza25ldC5vcmciXSwicmVhbG1fYWNjZXNzIjp7InJvbGVzIjpbInN0YW5kYXJkLXVzZXIiLCJvZmZsaW5lX2FjY2VzcyIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJwcm9maWxlIGVtYWlsIiwiZW1haWxfdmVyaWZpZWQiOmZhbHNlLCJuYW1lIjoiVXNlciBPbmUiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJ1c2VyMSIsImdpdmVuX25hbWUiOiJVc2VyIiwiZmFtaWx5X25hbWUiOiJPbmUifQ.IF0QF9qclSLP7gTwnFW-1wFPVTIj6VOLYNud8EvTx11786EsDXb-jr2V3q58JYpn6xYIjHq9w3lyPsbhoTCBOebeQJc89rubf-s0AitNn8Z_MWNInzAKUPvuH_IiVZ6UzBOwrNUgQcmvY5h32r8zhKm-jOl35WhEn4DJJpAKfv2Bf1usQ-pjXg2BY8uN2MCQ2nPBM5gPsfcdLWvdxymA-laWO1urAh40zj0tgoFgbF_bhbv5MlE7_Nx0BsOeHwqSMQvmGVKTKi26hTZJXOYRhCm3WZfjgrls_t1hcQl9ZNLo4D5WhpQOp4NggaekUi1oNGs4Wx5cx02ts39SSSQK5Q'

List all services


In [88]:
# Get a list of all the Squonk services that can be executed.
# 

print("Requesting GET " + services_url)
jobs_resp = requests.get(services_url, headers={'Authorization':  'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
print(str(len(json)) + " services found")
print(json)


Requesting GET https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/services
Response Code: 200
75 services found
[{'id': 'cdk.dataset.convert.molecule.format', 'name': 'Convert molecule format', 'description': 'Convert molecule format'}, {'id': 'pipelines.pli.v1', 'name': 'PLI docking scoring', 'description': 'PLI docking scoring'}, {'id': 'chemaxon.calculators.verify', 'name': 'Verify structure (ChemAxon)', 'description': 'Verify structure (ChemAxon)'}, {'id': 'cdk.donors_acceptors', 'name': 'HBA & HBD (CDK)', 'description': 'HBA & HBD (CDK)'}, {'id': 'pipelines.rdkit.maxminpicker.simple.1', 'name': 'RDKitMaxMinPickerSimple', 'description': 'RDKitMaxMinPickerSimple'}, {'id': 'chemaxon.calculators.logd', 'name': 'LogD (CXN)', 'description': 'LogD (CXN)'}, {'id': 'chemaxon.calculators.bpka', 'name': 'Basic pKa (CXN)', 'description': 'Basic pKa (CXN)'}, {'id': 'chemaxon.calculators.druglikefilter', 'name': 'Drug-like Filter (CXN)', 'description': 'Drug-like Filter (CXN)'}, {'id': 'cdk.export.sdf', 'name': 'SDF Export (CDK)', 'description': 'SDF Export (CDK)'}, {'id': 'chemaxon.calculators.logp', 'name': 'LogP (CXN)', 'description': 'LogP (CXN)'}, {'id': 'chemaxon.calculators.logs', 'name': 'LogS (CXN)', 'description': 'LogS (CXN)'}, {'id': 'chemaxon.calculators.ghosefilter', 'name': 'Ghose Filter (CXN)', 'description': 'Ghose Filter (CXN)'}, {'id': 'cdk.wiener_numbers', 'name': 'Wiener Numbers (CDK)', 'description': 'Wiener Numbers (CDK)'}, {'id': 'pipelines.xchem.smarts_filter.reaction', 'name': 'XChemReactionSmartsFilter', 'description': 'XChemReactionSmartsFilter'}, {'id': 'pipelines.docking.rdock.basic.v2', 'name': 'rDock docking', 'description': 'rDock docking'}, {'id': 'chemaxon.clustering.sperex', 'name': 'SpereEx Clustering (CXN)', 'description': 'SpereEx Clustering (CXN)'}, {'id': 'rdkit.calculators.rotatable_bonds', 'name': 'Rotatable bonds (RDKit)', 'description': 'Rotatable bonds (RDKit)'}, {'id': 'pipelines.xchem.sanify.standardise.flatkinson.v1', 'name': 'FlatkinsonStandardiser', 'description': 'FlatkinsonStandardiser'}, {'id': 'rdkit.chemcentral.search.similarity', 'name': 'ChemCentral similarity search', 'description': 'ChemCentral similarity search'}, {'id': 'ocl.psa', 'name': 'PSA (OpenChemLib)', 'description': 'PSA (OpenChemLib)'}, {'id': 'rdkit.calculators.formal_charge', 'name': 'Formal Charge (RDKit)', 'description': 'Formal Charge (RDKit)'}, {'id': 'pipelines.rdkit.o3da.basic', 'name': 'RDKitOpen3DAlign', 'description': 'RDKitOpen3DAlign'}, {'id': 'chemaxon.calculators.lipinski', 'name': 'Lipinski (CXN)', 'description': 'Lipinski (CXN)'}, {'id': 'cdk.logp', 'name': 'LogP (CDK)', 'description': 'LogP (CDK)'}, {'id': 'chemaxon.calculators.veberfilter', 'name': 'Veber Filter (CXN)', 'description': 'Veber Filter (CXN)'}, {'id': 'pipelines.rdkit.cluster.butina', 'name': 'RDKitButinaClustering', 'description': 'RDKitButinaClustering'}, {'id': 'rdkit.calculators.frac_c_sp3', 'name': 'Frac sp3 C (RDKit)', 'description': 'Frac sp3 C (RDKit)'}, {'id': 'core.dataset.uuidfilter.v1', 'name': 'DatasetUUIDFilter', 'description': 'DatasetUUIDFilter'}, {'id': 'rdkit.calculators.ghose', 'name': 'Ghose filter (RDKit)', 'description': 'Ghose filter (RDKit)'}, {'id': 'pipelines.dmpk.sygnature.tmax_cmax_sim.1', 'name': 'TmaxCmaxSimulation', 'description': 'TmaxCmaxSimulation'}, {'id': 'chemaxon.calculators.cnsMpo', 'name': 'CNS MPO (CXN)', 'description': 'CNS MPO (CXN)'}, {'id': 'rdkit.calculators.canonical_smiles', 'name': 'Canonical Smiles (RDKit)', 'description': 'Canonical Smiles (RDKit)'}, {'id': 'rdkit.calculators.donors_acceptors', 'name': 'HBA & HBD (RDKit)', 'description': 'HBA & HBD (RDKit)'}, {'id': 'core.dataset.filter.slice.v1', 'name': 'Dataset slice selector', 'description': 'Dataset slice selector'}, {'id': 'chemaxon.calculators.atomcount', 'name': 'Atom Count (CXN)', 'description': 'Atom Count (CXN)'}, {'id': 'rdkit.calculators.molar_refractivity', 'name': 'Molar Refractivity (RDKit)', 'description': 'Molar Refractivity (RDKit)'}, {'id': 'pipelines.rdkit.maxminpicker.enrich.1', 'name': 'RDKitMaxMinPickerEnrich', 'description': 'RDKitMaxMinPickerEnrich'}, {'id': 'core.dataset.merger.v1', 'name': 'DatasetMerger', 'description': 'DatasetMerger'}, {'id': 'chemaxon.calculators.reosfilter', 'name': 'REOS (CXN)', 'description': 'REOS (CXN)'}, {'id': 'pipelines.rdkit.screen.multi', 'name': 'RDKitMultiSimilarityScreening', 'description': 'RDKitMultiSimilarityScreening'}, {'id': 'rdkit.calculators.logp', 'name': 'LogP (RDKit)', 'description': 'LogP (RDKit)'}, {'id': 'rdkit.calculators.rings', 'name': 'Ring Count (RDKit)', 'description': 'Ring Count (RDKit)'}, {'id': 'pipelines.xchem.sanify.enumerate.v1', 'name': 'MoleculeEnumerator', 'description': 'MoleculeEnumerator'}, {'id': 'pipelines.rdkit.sucos.basic', 'name': 'RDKitSuCOS', 'description': 'RDKitSuCOS'}, {'id': 'rdkit.calculators.tpsa', 'name': 'TPSA (RDKit)', 'description': 'TPSA (RDKit)'}, {'id': 'chemaxon.calculators.apka', 'name': 'Acidic pKa (CXN)', 'description': 'Acidic pKa (CXN)'}, {'id': 'ocl.calculators.verify', 'name': 'Verify structure (OCL)', 'description': 'Verify structure (OCL)'}, {'id': 'core.dataset.filter.random.v1', 'name': 'Dataset random selector', 'description': 'Dataset random selector'}, {'id': 'chemaxon.screening.pharmacophore', 'name': 'Pharmacophore Screen (CXN)', 'description': 'Pharmacophore Screen (CXN)'}, {'id': 'pipelines.rdkit.conformer.constrained', 'name': 'RDKitConstrainedConformers', 'description': 'RDKitConstrainedConformers'}, {'id': 'core.dataset.enricher.v1', 'name': 'DatasetEnricher', 'description': 'DatasetEnricher'}, {'id': 'rdkit.chemcentral.multisearch', 'name': 'ChemCentral multi search', 'description': 'ChemCentral multi search'}, {'id': 'chemaxon.screening.ecpf4', 'name': 'ECFP4 Screen (CXN)', 'description': 'ECFP4 Screen (CXN)'}, {'id': 'rdkit.calculators.reos', 'name': 'REOS (RDKit)', 'description': 'REOS (RDKit)'}, {'id': 'rdkit.calculators.veber', 'name': 'Veber filter (RDKit)', 'description': 'Veber filter (RDKit)'}, {'id': 'rdkit.chemcentral.search.structure', 'name': 'ChemCentral structure search', 'description': 'ChemCentral structure search'}, {'id': 'smartcyp.predict', 'name': 'SMARTCyp', 'description': 'SMARTCyp'}, {'id': 'pipelines.rdkit.cluster.butina.matrix.v1', 'name': 'RDKitClusterMatrix', 'description': 'RDKitClusterMatrix'}, {'id': 'pipelines.rdkit.screen.basic', 'name': 'RDKitSimilarityScreening', 'description': 'RDKitSimilarityScreening'}, {'id': 'pipelines.xchem.obabel.prepare.pdb.v1', 'name': 'OBabelPreparePDB', 'description': 'OBabelPreparePDB'}, {'id': 'rdkit.calculators.ruleofthree', 'name': 'Rule of 3 (RDKit)', 'description': 'Rule of 3 (RDKit)'}, {'id': 'pipelines.docking.smog2016.v1', 'name': 'SMoG2016 docking scoring', 'description': 'SMoG2016 docking scoring'}, {'id': 'pipelines.xchem.pbf_ev.v1', 'name': 'PBF EV analyisis', 'description': 'PBF EV analyisis'}, {'id': 'chemaxon.calculators.kidsMpo', 'name': 'KiDS MPO (CXN)', 'description': 'KiDS MPO (CXN)'}, {'id': 'cdk.calculators.verify', 'name': 'Verify structure (CDK)', 'description': 'Verify structure (CDK)'}, {'id': 'pipelines.rdkit.cluster.subsetpicker.simple', 'name': 'RDKitDiverseSubsetPicker', 'description': 'RDKitDiverseSubsetPicker'}, {'id': 'rdkit.calculators.lipinski', 'name': 'Lipinski (RDKit)', 'description': 'Lipinski (RDKit)'}, {'id': 'pipelines.xchem.reaction.maker.v1', 'name': 'XChemReactionMaker', 'description': 'XChemReactionMaker'}, {'id': 'chemaxon.calculators.ruleofthreefilter', 'name': 'Rule of 3 Filter (CXN)', 'description': 'Rule of 3 Filter (CXN)'}, {'id': 'ocl.logp', 'name': 'LogP (OpenChemLib)', 'description': 'LogP (OpenChemLib)'}, {'id': 'ocl.logs', 'name': 'LogS (OpenChemLib)', 'description': 'LogS (OpenChemLib)'}, {'id': 'pipelines.xchem.sanify.standardise.molvs.v1', 'name': 'MolVSStandardiser', 'description': 'MolVSStandardiser'}, {'id': 'docker.generic.dataset.v1', 'name': 'GenericDockerProcessDataset', 'description': 'GenericDockerProcessDataset'}, {'id': 'rdkit.calculators.verify', 'name': 'Verify structure (RDKit)', 'description': 'Verify structure (RDKit)'}, {'id': 'pipelines.rdkit.conformer.basic', 'name': 'RDKitConformers', 'description': 'RDKitConformers'}]

Getting details of a particular service


In [89]:
# find the service ID from the list in the list services cell
#service_id = 'core.dataset.filter.slice.v1'
#service_id = 'pipelines.rdkit.conformer.basic'
service_id = 'pipelines.rdkit.o3da.basic'

url = services_url + '/' + service_id
print("Requesting GET " + url)
jobs_resp = requests.get(url, headers={'Authorization':  'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
print(json)


Requesting GET https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/services/pipelines.rdkit.o3da.basic
Response Code: 200
{'id': 'pipelines.rdkit.o3da.basic', 'name': 'RDKitOpen3DAlign', 'description': 'Generate 3D alignments using Open3DAlign in RDKit', 'tags': ['rdkit', 'conformer', 'alignment', 'open3dalign', '3d', 'docker'], 'icon': 'icons/filter_molecules.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}, {'name': 'queryMol', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'outputDescriptors': [{'name': 'output', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'optionDescriptors': [{'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Integer'}, 'key': 'arg.qmolidx', 'label': 'Query mol index', 'description': 'Query molecule index', 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Float'}, 'key': 'arg.threshold', 'label': 'O3DAlign score threshold', 'description': 'Keep molecules with O3DAlign scores within this range of the score for aligning the query to itself', 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Boolean'}, 'key': 'arg.crippen', 'label': 'Use Crippen (logP) contributions', 'description': 'Use Crippen (logP) contributions', 'visible': True, 'editable': True, 'minValues': 1, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Integer'}, 'key': 'arg.num', 'label': 'Number of conformers', 'description': 'Number of conformers to generate if not already 3D', 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}], 'executorClassName': 'org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep'}

List all jobs


In [90]:
# Result of the request is an array of JobStatus objects.
# The job ID and status are listed

print("Requesting GET " + jobexecutor_url)
jobs_resp = requests.get(jobexecutor_url, headers={'Authorization':  'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
print(str(len(json)) + " jobs found")
for status in json:
    print(status['jobId'] + ' ' + status['status'])


Requesting GET https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs
Response Code: 200
2 jobs found
c7f8199b-02cf-43e0-86b4-3bab89781c7f RESULTS_READY
1a46a1fc-193a-4f72-acd7-c81de1f096d4 RESULTS_READY

Execute the 'Dataset Slice' service


In [91]:
# The 'Datast slice' takes a slice through a dataset specified by the number of records to skip and then the number to include.
# This is one of Squonk's 'internal' services.
# The job ID is stored in the job_id variable.

url = jobexecutor_url + '/core.dataset.filter.slice.v1'

data = {
    'options': '{"skip":2,"count":3}',
    'input_data': ('input_data', open('nci10.data', 'rb'), 'application/x-squonk-molecule-object+json'),
    'input_metadata': ('input_metadata', open('nci10.metadata', 'rb'), 'application/x-squonk-dataset-metadata+json')
}

print("Requesting POST " + jobexecutor_url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization':  'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)


Requesting POST https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs
Response Code: 201
{'jobId': 'fd0bc3b5-a9e8-4121-afd1-5d410d8297df', 'username': 'user1', 'status': 'RUNNING', 'totalCount': 0, 'processedCount': 0, 'errorCount': 0, 'started': 1572358867584, 'completed': None, 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition', 'serviceDescriptor': {'@class': 'org.squonk.core.DefaultServiceDescriptor', 'serviceConfig': {'id': 'core.dataset.filter.slice.v1', 'name': 'Dataset slice selector', 'description': 'Generate a defined slice of the dataset', 'tags': ['filter', 'slice', 'dataset'], 'icon': 'icons/filter.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-basic+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.BasicObject'}], 'outputDescriptors': [{'name': 'output', 'mediaType': 'application/x-squonk-dataset-basic+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.BasicObject'}], 'optionDescriptors': [{'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Integer'}, 'key': 'skip', 'label': 'Number to skip', 'description': 'The number of records to skip', 'visible': True, 'editable': True, 'minValues': 1, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Integer'}, 'key': 'count', 'label': 'Number to include', 'description': 'The number of records to include after skipping', 'visible': True, 'editable': True, 'minValues': 1, 'maxValues': 1, 'modes': ['User']}], 'status': 'ACTIVE', 'statusLastChecked': 1571071494011, 'executorClassName': 'org.squonk.execution.steps.impl.DatasetSelectSliceStep'}}, 'options': {'skip': 2, 'count': 3}, 'jobId': 'fd0bc3b5-a9e8-4121-afd1-5d410d8297df'}, 'events': []}

JobID: fd0bc3b5-a9e8-4121-afd1-5d410d8297df

Get the status of the current job


In [92]:
# The job is defined by the job_id variable and is probably the last job executed
url = jobexecutor_url + '/' + job_id + '/status'
print("Requesting GET " + url )
jobs_resp = requests.get(url, headers={'Authorization':  'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
json


Requesting GET https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/fd0bc3b5-a9e8-4121-afd1-5d410d8297df/status
Response Code: 200
Out[92]:
{'jobId': 'fd0bc3b5-a9e8-4121-afd1-5d410d8297df',
 'username': 'user1',
 'status': 'RESULTS_READY',
 'totalCount': 0,
 'processedCount': -3,
 'errorCount': -3,
 'started': 1572358867584,
 'completed': 1572358868113,
 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition',
  'serviceDescriptor': {'@class': 'org.squonk.core.DefaultServiceDescriptor',
   'serviceConfig': {'id': 'core.dataset.filter.slice.v1',
    'name': 'Dataset slice selector',
    'description': 'Generate a defined slice of the dataset',
    'tags': ['filter', 'slice', 'dataset'],
    'icon': 'icons/filter.png',
    'inputDescriptors': [{'name': 'input',
      'mediaType': 'application/x-squonk-dataset-basic+json',
      'primaryType': 'org.squonk.dataset.Dataset',
      'secondaryType': 'org.squonk.types.BasicObject'}],
    'outputDescriptors': [{'name': 'output',
      'mediaType': 'application/x-squonk-dataset-basic+json',
      'primaryType': 'org.squonk.dataset.Dataset',
      'secondaryType': 'org.squonk.types.BasicObject'}],
    'optionDescriptors': [{'@class': 'org.squonk.options.OptionDescriptor',
      'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor',
       'type': 'java.lang.Integer'},
      'key': 'skip',
      'label': 'Number to skip',
      'description': 'The number of records to skip',
      'visible': True,
      'editable': True,
      'minValues': 1,
      'maxValues': 1,
      'modes': ['User']},
     {'@class': 'org.squonk.options.OptionDescriptor',
      'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor',
       'type': 'java.lang.Integer'},
      'key': 'count',
      'label': 'Number to include',
      'description': 'The number of records to include after skipping',
      'visible': True,
      'editable': True,
      'minValues': 1,
      'maxValues': 1,
      'modes': ['User']}],
    'status': 'ACTIVE',
    'statusLastChecked': 1571071494011,
    'executorClassName': 'org.squonk.execution.steps.impl.DatasetSelectSliceStep'}},
  'options': {'skip': 2, 'count': 3},
  'jobId': 'fd0bc3b5-a9e8-4121-afd1-5d410d8297df'},
 'events': ['Results ready']}

Get the results of a job.


In [93]:
# The job is defined by the job_id variable and is probably the last job executed.
# The status of the job needs to be 'RESULTS_READY'
# The response is a multipart response, typically containing the job status, the results metadata and the results data.
# This method can be called for a job any number of times until the job is deleted.

url = jobexecutor_url + '/' + job_id + '/results'
print("Requesting GET " + url )
jobs_resp = requests.get(url, headers={'Authorization':  'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
multipart_data = decoder.MultipartDecoder.from_response(jobs_resp)
for part in multipart_data.parts:
    print(part.content)
    print(part.headers)


Requesting GET https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/fd0bc3b5-a9e8-4121-afd1-5d410d8297df/results
Response Code: 200
b'{"jobId":"fd0bc3b5-a9e8-4121-afd1-5d410d8297df","username":"user1","status":"RESULTS_READY","totalCount":0,"processedCount":-3,"errorCount":-3,"started":1572358867584,"completed":1572358868113,"jobDefinition":{"@class":"org.squonk.jobdef.ExternalJobDefinition","serviceDescriptor":{"@class":"org.squonk.core.DefaultServiceDescriptor","serviceConfig":{"id":"core.dataset.filter.slice.v1","name":"Dataset slice selector","description":"Generate a defined slice of the dataset","tags":["filter","slice","dataset"],"icon":"icons/filter.png","inputDescriptors":[{"name":"input","mediaType":"application/x-squonk-dataset-basic+json","primaryType":"org.squonk.dataset.Dataset","secondaryType":"org.squonk.types.BasicObject"}],"outputDescriptors":[{"name":"output","mediaType":"application/x-squonk-dataset-basic+json","primaryType":"org.squonk.dataset.Dataset","secondaryType":"org.squonk.types.BasicObject"}],"optionDescriptors":[{"@class":"org.squonk.options.OptionDescriptor","typeDescriptor":{"@class":"org.squonk.options.SimpleTypeDescriptor","type":"java.lang.Integer"},"key":"skip","label":"Number to skip","description":"The number of records to skip","visible":true,"editable":true,"minValues":1,"maxValues":1,"modes":["User"]},{"@class":"org.squonk.options.OptionDescriptor","typeDescriptor":{"@class":"org.squonk.options.SimpleTypeDescriptor","type":"java.lang.Integer"},"key":"count","label":"Number to include","description":"The number of records to include after skipping","visible":true,"editable":true,"minValues":1,"maxValues":1,"modes":["User"]}],"status":"ACTIVE","statusLastChecked":1571071494011,"executorClassName":"org.squonk.execution.steps.impl.DatasetSelectSliceStep"}},"options":{"skip":2,"count":3},"jobId":"fd0bc3b5-a9e8-4121-afd1-5d410d8297df"},"events":["Results ready"]}'
{b'Content-Type': b'text/plain', b'Content-Transfer-Encoding': b'8bit'}
b'{"type":"org.squonk.types.MoleculeObject","size":3,"valueClassMappings":{"Name":"java.lang.String"},"fieldMetaProps":[{"fieldName":"Name","values":{"created":"31-Jan-2019 18:03:58 UTC","source":"User provided name","description":"Name provided by user with smiles"}}],"properties":{"created":"31-Jan-2019 18:03:58 UTC","source":"User provided Smiles","description":"Read from user provided Smiles","history":"[31-Jan-2019 18:03:58 UTC] Added field Name"}}'
{b'Content-Type': b'application/x-squonk-dataset-metadata+json', b'Content-Transfer-Encoding': b'8bit', b'Content-Disposition': b'attachment; filename=output_metadata'}
b'[{"uuid":"9077e1c3-d0fc-4e93-9081-ed2714abafc0","source":"OC1=C(Cl)C=C(C=C1[N+]([O-])=O)[N+]([O-])=O","format":"smiles","values":{"Name":"3"}},{"uuid":"323a0cd0-400f-4806-8a9d-914601fad27f","source":"[O-][N+](=O)C1=CNC(=N)S1","format":"smiles","values":{"Name":"4"}},{"uuid":"03418bcb-672c-47ab-a3c1-a1eb3a13e00d","source":"NC1=CC2=C(C=C1)C(=O)C3=C(C=CC=C3)C2=O","format":"smiles","values":{"Name":"5"}}]'
{b'Content-Type': b'application/x-squonk-molecule-object+json', b'Content-Transfer-Encoding': b'8bit', b'Content-Disposition': b'attachment; filename=output_output.data.gz'}

Delete the job


In [94]:
#  Once you have fetched the results you MUST delete the job.
# The job is defined by the job_id variable and is probably the last job executed.

url = jobexecutor_url + '/' + job_id
print("Requesting DELETE " + url)
jobs_resp = requests.delete(url, headers={'Authorization':  'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
if 'status' in json and json['status'] == 'COMPLETED':
    print('Job deleted')
else:
    print('Problem deleting job')


Requesting DELETE https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/fd0bc3b5-a9e8-4121-afd1-5d410d8297df
Response Code: 200
Job deleted

Delete all jobs

This is to help clean up if you get into a mess!


In [95]:
# Delete all jobs

# First get the current jobs
jobs_resp = requests.get(jobexecutor_url, headers={'Authorization':  'bearer ' + token}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
json = jobs_resp.json()
print('Found ' + str(len(json)) + ' jobs')

# Now go through them and delete
# If successful the status of the job will then be COMPLETED.
for job in json:
    id = job['jobId']
    url = jobexecutor_url + '/' + id
    print("Deleting " + url)
    jobs_resp = requests.delete(url, headers={'Authorization':  'bearer ' + token}, verify=tls_verify)
    j = jobs_resp.json()
    print("Status: " + j['status'])


Response Code: 200
Found 2 jobs
Deleting https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/c7f8199b-02cf-43e0-86b4-3bab89781c7f
Status: COMPLETED
Deleting https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/1a46a1fc-193a-4f72-acd7-c81de1f096d4
Status: COMPLETED

Other services

In addition to the simple 'dataset slice' service many more meaningful ones are available.

Here are some examples illustrating the different categories of Squonk services:

  1. Built in services running within the job executor Java process. These are limited to very simple and very fast operations
  2. HTTP services running in the chemservices module that stream results and are designed for relatively short term execution (seconds or at most a few minutes) with the results being streamed immediately back to the requester.
  3. Services running in a Docker container given the input data as files and writing the results as files. These are designed for more flexible implementation of services that can take longer to execute.
  4. Nextflow services. Similar to Docker services, but defined as a Nextflow workflow that typically allows parallel execution on the K8S cluster or potentionally on an external cluster.

Execute one of these instead of the dataset slice one above.


In [69]:
# The 'Lipinski filter' takes calculates the classical rule of five properties and allows to filter based on these.
# We have implementations for ChemAxon and RDKit. Here we use the RDKit one.
# The default filter is the classical drug-likeness one defined by Lipinski but you can specify your owwn criteria instaead.
# This is one of Squonk's 'HTTP' services.
# The job ID is stored in the job_id variable.

url = jobexecutor_url + '/rdkit.calculators.lipinski'

data = {
    'options': '{"filterMode":"INCLUDE_PASS"}',
    'input_data': ('input_data', open('nci10.data', 'rb'), 'application/x-squonk-molecule-object+json'),
    'input_metadata': ('input_metadata', open('nci10.metadata', 'rb'), 'application/x-squonk-dataset-metadata+json')
}

print("Requesting POST " + url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization':  'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)


Requesting POST https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/rdkit.calculators.lipinski
Response Code: 201
{'jobId': 'af9060da-89a3-45dc-b46e-ad8861cc95c5', 'username': 'user1', 'status': 'RUNNING', 'totalCount': 0, 'processedCount': 0, 'errorCount': 0, 'started': 1572351678988, 'completed': None, 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition', 'serviceDescriptor': {'@class': 'org.squonk.core.HttpServiceDescriptor', 'serviceConfig': {'id': 'rdkit.calculators.lipinski', 'name': 'Lipinski (RDKit)', 'description': 'Lipinski rule of 5 filter using RDKit', 'tags': ['lipinski', 'ruleoffive', 'ro5', 'hbond', 'donors', 'acceptors', 'logp', 'molecularweight', 'druglike', 'molecularproperties', 'filter', 'rdkit'], 'resourceUrl': '/docs/cells/Lipinski%20filter%20(RDKit)/', 'icon': 'icons/filter_molecules.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'outputDescriptors': [{'name': 'output', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'optionDescriptors': [{'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.String'}, 'key': 'query.filterMode', 'label': 'Filter mode', 'description': 'How to filter results', 'values': ['INCLUDE_PASS', 'INCLUDE_FAIL', 'INCLUDE_ALL'], 'defaultValue': 'INCLUDE_PASS', 'visible': True, 'editable': True, 'minValues': 1, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Integer'}, 'key': 'query.filterThreshold', 'label': 'Number of violations', 'description': 'Number of violations to accept', 'defaultValue': 1, 'visible': True, 'editable': True, 'minValues': 1, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'org.squonk.types.NumberRange$Float'}, 'key': 'query.ExactMW_RDKit', 'label': 'Mol weight', 'description': 'Molecular weight', 'defaultValue': {'@class': 'org.squonk.types.NumberRange$Float', 'minValue': 0.0, 'maxValue': 500.0}, 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'org.squonk.types.NumberRange$Float'}, 'key': 'query.LogP_RDKit', 'label': 'LogP', 'description': 'LogP partition coefficient', 'defaultValue': {'@class': 'org.squonk.types.NumberRange$Float', 'maxValue': 5.0}, 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'org.squonk.types.NumberRange$Integer'}, 'key': 'query.HBD_Lip_RDKit', 'label': 'HBD count', 'description': 'H-bond donor count', 'defaultValue': {'@class': 'org.squonk.types.NumberRange$Integer', 'minValue': 0, 'maxValue': 5}, 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'org.squonk.types.NumberRange$Integer'}, 'key': 'query.HBA_LIP_RDKit', 'label': 'HBA count', 'description': 'H-bond acceptor count', 'defaultValue': {'@class': 'org.squonk.types.NumberRange$Integer', 'minValue': 0, 'maxValue': 10}, 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}], 'status': 'ACTIVE', 'statusLastChecked': 1572351417534, 'executorClassName': 'org.squonk.execution.steps.impl.DatasetHttpExecutorStep'}, 'thinDescriptors': [{'input': 'input', 'output': 'output', 'filtering': True}], 'executionEndpoint': 'http://chemservices:8080/chem-services-rdkit-basic/rest/v1/calculators/lipinski'}, 'options': {'arg.filterMode': 'INCLUDE_PASS'}, 'jobId': 'af9060da-89a3-45dc-b46e-ad8861cc95c5'}, 'events': []}

JobID: af9060da-89a3-45dc-b46e-ad8861cc95c5

In [23]:
# passing data as SDF

url = jobexecutor_url + '/rdkit.calculators.lipinski'

data = {
    'options': '{"filterMode":"INCLUDE_PASS"}',
    'input': ('input', open('Kinase_inhibs.sdf', 'rb'), 'chemical/x-mdl-sdfile')
}

print("Requesting POST " + url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization':  'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)


Requesting POST https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/rdkit.calculators.lipinski
Response Code: 201
{'jobId': '86d744c4-f0a5-4a4f-9418-b3dd0dba1f6f', 'username': 'user1', 'status': 'RUNNING', 'totalCount': 0, 'processedCount': 0, 'errorCount': 0, 'started': 1572349391457, 'completed': None, 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition', 'serviceDescriptor': {'@class': 'org.squonk.core.HttpServiceDescriptor', 'serviceConfig': {'id': 'rdkit.calculators.lipinski', 'name': 'Lipinski (RDKit)', 'description': 'Lipinski rule of 5 filter using RDKit', 'tags': ['lipinski', 'ruleoffive', 'ro5', 'hbond', 'donors', 'acceptors', 'logp', 'molecularweight', 'druglike', 'molecularproperties', 'filter', 'rdkit'], 'resourceUrl': '/docs/cells/Lipinski%20filter%20(RDKit)/', 'icon': 'icons/filter_molecules.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'outputDescriptors': [{'name': 'output', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'optionDescriptors': [{'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.String'}, 'key': 'query.filterMode', 'label': 'Filter mode', 'description': 'How to filter results', 'values': ['INCLUDE_PASS', 'INCLUDE_FAIL', 'INCLUDE_ALL'], 'defaultValue': 'INCLUDE_PASS', 'visible': True, 'editable': True, 'minValues': 1, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Integer'}, 'key': 'query.filterThreshold', 'label': 'Number of violations', 'description': 'Number of violations to accept', 'defaultValue': 1, 'visible': True, 'editable': True, 'minValues': 1, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'org.squonk.types.NumberRange$Float'}, 'key': 'query.ExactMW_RDKit', 'label': 'Mol weight', 'description': 'Molecular weight', 'defaultValue': {'@class': 'org.squonk.types.NumberRange$Float', 'minValue': 0.0, 'maxValue': 500.0}, 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'org.squonk.types.NumberRange$Float'}, 'key': 'query.LogP_RDKit', 'label': 'LogP', 'description': 'LogP partition coefficient', 'defaultValue': {'@class': 'org.squonk.types.NumberRange$Float', 'maxValue': 5.0}, 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'org.squonk.types.NumberRange$Integer'}, 'key': 'query.HBD_Lip_RDKit', 'label': 'HBD count', 'description': 'H-bond donor count', 'defaultValue': {'@class': 'org.squonk.types.NumberRange$Integer', 'minValue': 0, 'maxValue': 5}, 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'org.squonk.types.NumberRange$Integer'}, 'key': 'query.HBA_LIP_RDKit', 'label': 'HBA count', 'description': 'H-bond acceptor count', 'defaultValue': {'@class': 'org.squonk.types.NumberRange$Integer', 'minValue': 0, 'maxValue': 10}, 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}], 'status': 'ACTIVE', 'statusLastChecked': 1572348717384, 'executorClassName': 'org.squonk.execution.steps.impl.DatasetHttpExecutorStep'}, 'thinDescriptors': [{'input': 'input', 'output': 'output', 'filtering': True}], 'executionEndpoint': 'http://chemservices:8080/chem-services-rdkit-basic/rest/v1/calculators/lipinski'}, 'options': {'filterMode': 'INCLUDE_PASS'}, 'jobId': '86d744c4-f0a5-4a4f-9418-b3dd0dba1f6f'}, 'events': []}

JobID: 86d744c4-f0a5-4a4f-9418-b3dd0dba1f6f

In [26]:
# sucos scoring passing 2 inputs as SDF

url = jobexecutor_url + '/pipelines.rdkit.sucos.basic'

data = {
    'options': '{}',
    'input': ('input', open('mols.sdf', 'rb'), 'chemical/x-mdl-sdfile'),
    'target': ('target', open('benzene.sdf', 'rb'), 'chemical/x-mdl-sdfile')
}

print("Requesting POST " + url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization':  'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)


Requesting POST https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/pipelines.rdkit.sucos.basic
Response Code: 201
{'jobId': '19e7a237-1980-4c3c-9efc-7ce804410275', 'username': 'user1', 'status': 'RUNNING', 'totalCount': 0, 'processedCount': 0, 'errorCount': 0, 'started': 1572349506478, 'completed': None, 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition', 'serviceDescriptor': {'@class': 'org.squonk.core.DockerServiceDescriptor', 'serviceConfig': {'id': 'pipelines.rdkit.sucos.basic', 'name': 'RDKitSuCOS', 'description': 'Generate 3D overlay using SuCOS in RDKit', 'tags': ['rdkit', 'alignment', 'sucos', '3d', 'docker'], 'icon': 'icons/filter_molecules.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}, {'name': 'target', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'outputDescriptors': [{'name': 'output', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'optionDescriptors': [{'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Integer'}, 'key': 'arg.targetidx', 'label': 'Target mol index', 'description': 'Target molecule index (default is the first)', 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}], 'executorClassName': 'org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep'}, 'thinDescriptors': [{'input': 'input'}], 'inputRoutes': [{'route': 'FILE'}, {'route': 'FILE'}], 'outputRoutes': [{'route': 'FILE'}], 'imageName': 'informaticsmatters/rdkit_pipelines', 'command': "python -m pipelines.rdkit.sucos --target ${PIN}target.data.gz -i ${PIN}input.data.gz -o ${POUT}output -of json ${binding.variables.containsKey('targetidx') ? '--targetidx ' + targetidx : ''} --meta"}, 'options': {}, 'jobId': '19e7a237-1980-4c3c-9efc-7ce804410275'}, 'events': []}

JobID: 19e7a237-1980-4c3c-9efc-7ce804410275

In [74]:
# open3dAlign scoring passing 2 inputs as SDF
# passing the queryMol as pyrimethamine.mol does not work - it needs tob e converted to SDF

url = jobexecutor_url + '/pipelines.rdkit.o3da.basic'

data = {
    'options': '{"arg.crippen":"false"}',
    'input': ('input', open('dhfr_3d.sdf', 'rb'), 'chemical/x-mdl-sdfile'),
    'queryMol': ('queryMol', open('pyrimethamine.sdf', 'rb'), 'chemical/x-mdl-sdfile')
}

print("Requesting POST " + url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization':  'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)


Requesting POST https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/pipelines.rdkit.o3da.basic
Response Code: 201
{'jobId': '0660dae6-5553-4553-8533-3d9e9353e8d4', 'username': 'user1', 'status': 'RUNNING', 'totalCount': 0, 'processedCount': 0, 'errorCount': 0, 'started': 1572351791974, 'completed': None, 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition', 'serviceDescriptor': {'@class': 'org.squonk.core.DockerServiceDescriptor', 'serviceConfig': {'id': 'pipelines.rdkit.o3da.basic', 'name': 'RDKitOpen3DAlign', 'description': 'Generate 3D alignments using Open3DAlign in RDKit', 'tags': ['rdkit', 'conformer', 'alignment', 'open3dalign', '3d', 'docker'], 'icon': 'icons/filter_molecules.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}, {'name': 'queryMol', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'outputDescriptors': [{'name': 'output', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'optionDescriptors': [{'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Integer'}, 'key': 'arg.qmolidx', 'label': 'Query mol index', 'description': 'Query molecule index', 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Float'}, 'key': 'arg.threshold', 'label': 'O3DAlign score threshold', 'description': 'Keep molecules with O3DAlign scores within this range of the score for aligning the query to itself', 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Boolean'}, 'key': 'arg.crippen', 'label': 'Use Crippen (logP) contributions', 'description': 'Use Crippen (logP) contributions', 'visible': True, 'editable': True, 'minValues': 1, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Integer'}, 'key': 'arg.num', 'label': 'Number of conformers', 'description': 'Number of conformers to generate if not already 3D', 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}], 'executorClassName': 'org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep'}, 'thinDescriptors': [{'input': 'input'}], 'inputRoutes': [{'route': 'FILE'}, {'route': 'FILE'}], 'outputRoutes': [{'route': 'FILE'}], 'imageName': 'informaticsmatters/rdkit_pipelines', 'command': "python -m pipelines.rdkit.o3dAlign queryMol.data.gz -i ${PIN}input.data.gz -o ${POUT}output -of json ${binding.variables.containsKey('qmolidx') ? '--qmolidx ' + qmolidx : ''} ${binding.variables.containsKey('num') ? '--num ' + num : ''} ${binding.variables.containsKey('threshold') ? '--threshold ' + threshold : ''} ${crippen ? '--crippen' : ''} --meta"}, 'options': {'arg.crippen': 'false'}, 'jobId': '0660dae6-5553-4553-8533-3d9e9353e8d4'}, 'events': []}

JobID: 0660dae6-5553-4553-8533-3d9e9353e8d4

In [64]:
# open3dAlign scoring passing inputs as dataset and query as SDF

url = jobexecutor_url + '/pipelines.rdkit.o3da.basic'

data = {
    'options': '{"arg.crippen":"false"}',
    'input_data': ('input_data', open('dhfr_3d.data.gz', 'rb'), 'application/x-squonk-molecule-object+json'),
    'input_metadata': ('input_metadata', open('dhfr_3d.metadata', 'rb'), 'application/x-squonk-dataset-metadata+json'),
    'queryMol': ('queryMol', open('pyrimethamine.sdf', 'rb'), 'chemical/x-mdl-sdfile')
}

print("Requesting POST " + url)
jobs_resp = requests.post(url, files=data, headers = {'Authorization':  'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)


Requesting POST https://jobexecutor.prod.openrisknet.org/jobexecutor/rest/v1/jobs/pipelines.rdkit.o3da.basic
Response Code: 201
{'jobId': 'c7300679-1a58-438b-b6d6-deaa434a0783', 'username': 'user1', 'status': 'RUNNING', 'totalCount': 0, 'processedCount': 0, 'errorCount': 0, 'started': 1572351520464, 'completed': None, 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition', 'serviceDescriptor': {'@class': 'org.squonk.core.DockerServiceDescriptor', 'serviceConfig': {'id': 'pipelines.rdkit.o3da.basic', 'name': 'RDKitOpen3DAlign', 'description': 'Generate 3D alignments using Open3DAlign in RDKit', 'tags': ['rdkit', 'conformer', 'alignment', 'open3dalign', '3d', 'docker'], 'icon': 'icons/filter_molecules.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}, {'name': 'queryMol', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'outputDescriptors': [{'name': 'output', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'optionDescriptors': [{'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Integer'}, 'key': 'arg.qmolidx', 'label': 'Query mol index', 'description': 'Query molecule index', 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Float'}, 'key': 'arg.threshold', 'label': 'O3DAlign score threshold', 'description': 'Keep molecules with O3DAlign scores within this range of the score for aligning the query to itself', 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Boolean'}, 'key': 'arg.crippen', 'label': 'Use Crippen (logP) contributions', 'description': 'Use Crippen (logP) contributions', 'visible': True, 'editable': True, 'minValues': 1, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Integer'}, 'key': 'arg.num', 'label': 'Number of conformers', 'description': 'Number of conformers to generate if not already 3D', 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}], 'executorClassName': 'org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep'}, 'thinDescriptors': [{'input': 'input'}], 'inputRoutes': [{'route': 'FILE'}, {'route': 'FILE'}], 'outputRoutes': [{'route': 'FILE'}], 'imageName': 'informaticsmatters/rdkit_pipelines', 'command': "python -m pipelines.rdkit.o3dAlign queryMol.data.gz -i ${PIN}input.data.gz -o ${POUT}output -of json ${binding.variables.containsKey('qmolidx') ? '--qmolidx ' + qmolidx : ''} ${binding.variables.containsKey('num') ? '--num ' + num : ''} ${binding.variables.containsKey('threshold') ? '--threshold ' + threshold : ''} ${crippen ? '--crippen' : ''} --meta"}, 'options': {'arg.crippen': 'false'}, 'jobId': 'c7300679-1a58-438b-b6d6-deaa434a0783'}, 'events': []}

JobID: c7300679-1a58-438b-b6d6-deaa434a0783

In [19]:
# The 'Conformer generator' used RDKit ETKDG conformer generation tool to generate a number of conformers for the input structures.
# This is one of Squonk's 'Docker' services.
# The job ID is stored in the job_id variable.

service_id = 'pipelines.rdkit.conformer.basic'

data = {
    'options': '{"arg.num":10,"arg.method":"RMSD"}',
    'input_data': ('input_data', open('nci10.data', 'rb'), 'application/x-squonk-molecule-object+json'),
    'input_metadata': ('input_metadata', open('nci10.metadata', 'rb'), 'application/x-squonk-dataset-metadata+json')
}

jobs_resp = requests.post(jobexecutor_url + '/' + service_id, files=data, headers = {'Authorization':  'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)


Response Code: 201
{'jobId': 'c00e90ba-8045-4c1c-9ab4-7e8d47e82f89', 'username': 'user1', 'status': 'RUNNING', 'totalCount': 0, 'processedCount': 0, 'errorCount': 0, 'started': 1572348726866, 'completed': None, 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition', 'serviceDescriptor': {'@class': 'org.squonk.core.DockerServiceDescriptor', 'serviceConfig': {'id': 'pipelines.rdkit.conformer.basic', 'name': 'RDKitConformers', 'description': 'Generate 3D conformers using RDKit', 'tags': ['rdkit', 'conformer', '3d', 'docker'], 'icon': 'icons/molecule_generator.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'outputDescriptors': [{'name': 'output', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'optionDescriptors': [{'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Integer'}, 'key': 'arg.num', 'label': 'Number of conformers', 'description': 'Number of conformers to aim to generate', 'defaultValue': 1, 'visible': True, 'editable': True, 'minValues': 1, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Integer'}, 'key': 'arg.attempts', 'label': 'Number of attempts', 'description': 'Number of attempts to generate conformers', 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Float'}, 'key': 'arg.prune', 'label': 'Prune RMSD threshold', 'description': 'Prune RMSD threshold for removing similar conformers', 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.String'}, 'key': 'arg.method', 'label': 'Cluster method', 'description': 'Cluster method (RMSD or TFD)', 'values': ['RMSD', 'TFD'], 'defaultValue': 'RMSD', 'visible': True, 'editable': True, 'minValues': 1, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Float'}, 'key': 'arg.threshold', 'label': 'Cluster threshold', 'description': 'Cluster threshold', 'visible': True, 'editable': True, 'minValues': 0, 'maxValues': 1, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.Integer'}, 'key': 'arg.minimize', 'label': 'Energy minimization iterations', 'description': 'Number of energy minimization iterations', 'defaultValue': 0, 'visible': True, 'editable': True, 'minValues': 1, 'maxValues': 1, 'modes': ['User']}], 'executorClassName': 'org.squonk.execution.steps.impl.ThinDatasetDockerExecutorStep'}, 'thinDescriptors': [{'input': 'input'}], 'inputRoutes': [{'route': 'FILE'}], 'outputRoutes': [{'route': 'FILE'}], 'imageName': 'informaticsmatters/rdkit_pipelines', 'command': "python -m pipelines.rdkit.conformers -i ${PIN}input.data.gz -if json -o ${POUT}output -of json -n $num -c $method ${binding.variables.containsKey('attempts') ? ' -a ' + attempts : ''} ${binding.variables.containsKey('prune') ? ' -r ' + prune : ''} ${binding.variables.containsKey('threshold') ? ' -t ' + threshold : ''} ${binding.variables.containsKey('minimize') ? ' -e ' + minimize : ''} --meta"}, 'options': {'arg.num': 10, 'arg.method': 'RMSD'}, 'jobId': 'c00e90ba-8045-4c1c-9ab4-7e8d47e82f89'}, 'events': []}

JobID: c00e90ba-8045-4c1c-9ab4-7e8d47e82f89

In [171]:
# Similarity screening using RDKit.
# This is one of Squonk's 'Nextflow' services.
# The job ID is stored in the job_id variable.

# NOTE: THIS IS NOT WORKING AS THE QUERY STRUCTURE IS NOT BEING PASSED CORRECTLY

service_id = 'pipelines.rdkit.screen.basic'

data = {
    'options': '{"arg.query":{"source":"CC1=CC(=O)C=CC1=O","format":"smiles"},"arg.sim":{"minValue":0.5,"maxValue":1.0}}',
    'input_data': ('input_data', open('nci10_data.json', 'rb'), 'application/x-squonk-molecule-object+json'),
    'input_metadata': ('input_metadata', open('nci10_meta.json', 'rb'), 'application/x-squonk-dataset-metadata+json')
}

jobs_resp = requests.post(jobexecutor_url + '/' + service_id, files=data, headers = {'Authorization':  'bearer ' + token, 'Content-Type': 'multipart/form'}, verify=tls_verify)
print('Response Code: ' + str(jobs_resp.status_code))
job_status = jobs_resp.json()
job_id = job_status['jobId']
print(job_status)
print("\nJobID: " + job_id)


Response Code: 201
{'jobId': '4a67ebdb-603f-4458-99fb-b23263edd176', 'username': 'user1', 'status': 'RUNNING', 'totalCount': 0, 'processedCount': 0, 'errorCount': 0, 'started': 1556724503937, 'completed': None, 'jobDefinition': {'@class': 'org.squonk.jobdef.ExternalJobDefinition', 'serviceDescriptor': {'@class': 'org.squonk.core.NextflowServiceDescriptor', 'serviceConfig': {'id': 'pipelines.rdkit.screen.basic', 'name': 'RDKitSimilarityScreening', 'description': 'RDKit Similarity Screening', 'tags': ['rdkit', 'screening', 'similarity', 'docker', 'nextflow'], 'icon': 'icons/filter_molecules.png', 'inputDescriptors': [{'name': 'input', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'outputDescriptors': [{'name': 'output', 'mediaType': 'application/x-squonk-dataset-molecule+json', 'primaryType': 'org.squonk.dataset.Dataset', 'secondaryType': 'org.squonk.types.MoleculeObject'}], 'optionDescriptors': [{'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.MoleculeTypeDescriptor', 'molType': 'DISCRETE', 'formats': ['smiles'], 'type': 'org.squonk.options.types.Structure'}, 'key': 'arg.query', 'label': 'Query molecule', 'description': 'Query molecule as smiles', 'visible': True, 'editable': True, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'org.squonk.types.NumberRange$Float'}, 'key': 'arg.sim', 'label': 'Similarity', 'description': 'Similarity threshold (1.0 is identical)', 'defaultValue': {'@class': 'org.squonk.types.NumberRange$Float', 'minValue': 0.7, 'maxValue': 1.0}, 'visible': True, 'editable': True, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.String'}, 'key': 'arg.descriptor', 'label': 'Descriptor', 'description': 'Descriptor/fingerprint to use', 'values': ['maccs', 'morgan2', 'morgan3', 'rdkit'], 'defaultValue': 'rdkit', 'visible': True, 'editable': True, 'modes': ['User']}, {'@class': 'org.squonk.options.OptionDescriptor', 'typeDescriptor': {'@class': 'org.squonk.options.SimpleTypeDescriptor', 'type': 'java.lang.String'}, 'key': 'arg.metric', 'label': 'Metric', 'description': 'Similarity metric to use', 'values': ['asymmetric', 'braunblanquet', 'cosine', 'dice', 'kulczynski', 'mcconnaughey', 'rogotgoldberg', 'russel', 'sokal', 'tanimoto'], 'defaultValue': 'tanimoto', 'visible': True, 'editable': True, 'modes': ['User']}], 'executorClassName': 'org.squonk.execution.steps.impl.ThinDatasetNextflowInDockerExecutorStep'}, 'thinDescriptors': [{'input': 'input', 'output': 'output', 'filtering': True}], 'inputRoutes': [{'route': 'FILE'}], 'outputRoutes': [{'route': 'FILE'}], 'nextflowFile': '#!/usr/bin/env nextflow\n\nparams.input = "$baseDir/input.data.gz"\nparams.qsmiles\nparams.simmin = 0.7\nparams.simmax = 1.0\nparams.descriptor = \'rdkit\'\nparams.metric = \'tanimoto\'\nparams.chunk = 2500\nparams.limit = 0\nparams.digits = 4\n\ntarget = file(params.input)\n\nprocess splitter {\n\n    container \'informaticsmatters/rdkit_pipelines:latest\'\n\n    input:\n    file target\n\n    output:\n    file \'target_part*.sdf.gz\' into target_parts mode flatten\n    file \'target_part_metrics.txt\' into splitter_metrics\n\n    """\n    python -m pipelines_utils_rdkit.filter -i $target -c $params.chunk -l $params.limit -d $params.digits -o target_part -of sdf --meta\n    """\n}\n\nprocess rdkitScreen {\n\n    container \'informaticsmatters/rdkit_pipelines\'\n\n\tinput:\n    file part from target_parts\n\n    output:\n    file \'screened_part*.sdf.gz\' into screened_parts\n\n    """\n    python -m pipelines.rdkit.screen --qsmiles \'$params.qsmiles\' --simmin $params.simmin --simmax $params.simmax -d $params.descriptor -m $params.metric -i $part -o ${part.name.replace(\'target\', \'screened\')[0..-8]} -of sdf\n    """\n}\n\nprocess joiner {\n\n    container \'informaticsmatters/rdkit_pipelines:latest\'\n\n    publishDir "$baseDir/results", mode: \'move\'\n\n    input:\n    file \'splitter_metrics.txt\' from splitter_metrics\n\tfile parts from screened_parts.collect()\n\n\toutput:\n\tfile \'output_metrics.txt\'\n\tfile \'output.data.gz\'\n\tfile \'output.metadata\'\n\n\t"""\n\tzcat $parts | python -m pipelines_utils_rdkit.filter -if sdf -of json -o output --meta\n\tmv output_metrics.txt joiner_metrics.txt\n\tgrep \'__InputCount__\' splitter_metrics.txt | sed s/__InputCount__/RDKitScreen/ > output_metrics.txt\n    grep \'__InputCount__\' splitter_metrics.txt >> output_metrics.txt\n    grep \'__OutputCount__\' joiner_metrics.txt >> output_metrics.txt\n\t"""\n}\n', 'nextflowConfig': '// Intentionally Empty', 'nextflowParams': "params.qsmiles = '$query_source'\nparams.simmin = $sim.minValue\nparams.simmax = $sim.maxValue\nparams.descriptor = '$descriptor'\nparams.metric = '$metric'\n"}, 'options': {'arg.sim': {'maxValue': 1.0, 'minValue': 0.5}, 'arg.query': {'format': 'smiles', 'source': 'CC1=CC(=O)C=CC1=O'}}, 'jobId': '4a67ebdb-603f-4458-99fb-b23263edd176'}, 'events': []}

JobID: 4a67ebdb-603f-4458-99fb-b23263edd176

In [ ]: