In [1]:
import cwltool
In [2]:
tool = '/Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl'
In [3]:
%%sh
cwltool --print-pre /Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl
In [4]:
import yaml
In [5]:
y = None
with open(tool) as f:
y = yaml.load(f)
In [6]:
# find if y has a hint that is a docker requirement
In [7]:
import dpath.util
In [8]:
dpath.util.search(y,'*/*/dockerImageId')
Out[8]:
In [9]:
# parse function
import urlparse
from schema_salad import schema
from cwltool import process, update
def parse(cwlpath):
(document_loader, avsc_names, schema_metadata) = process.get_schema()
fileuri = 'file://' + cwlpath
workflowobj = document_loader.fetch(fileuri)
# If strict is true, names are required everywhere (among other requirements)
strict = False
# This updates from draft2 to draft3
workflowobj = update.update(workflowobj, document_loader, fileuri)
document_loader.idx.clear()
processobj, metadata = schema.load_and_validate(document_loader, avsc_names, workflowobj, strict)
return processobj
In [10]:
import json
print json.dumps(parse(tool), indent=2)
In [11]:
workflow = parse('/Users/dcl9/Code/python/mmap-cwl/mmap.cwl')
Yes, that works
In [84]:
# This function will find dockerImageId anyhwere in the tree
def find_key(d, key, path=[]):
if isinstance(d, list):
for i, v in enumerate(d):
for f in find_key(v, key, path + [str(i)]):
yield f
elif isinstance(d, dict):
if key in d:
pathstring = '/'.join(path + [key])
yield pathstring
for k, v in d.items():
for f in find_key(v, key, path + [k]):
yield f
In [97]:
# Could adapt to find class: DockerRequirement instead
for x in find_key(workflow, 'dockerImageId'):
print x, dpath.util.get(workflow, x)
In [94]:
dpath.util.get(workflow, 'steps/0/run/steps/0/run/hints/0')
Out[94]:
In [104]:
def image_names(workflow):
image_ids = []
for x in find_key(workflow, 'dockerImageId'):
image_id = dpath.util.get(workflow, x)
if image_id not in image_ids: image_ids.append(image_id)
return image_ids
In [105]:
image_names(workflow)
Out[105]:
In [108]:
import docker
def docker_hashes(image_ids):
for name in image_ids:
print name
docker_hashes(image_names(workflow))
In [112]:
%%sh
eval $(docker-machine env default)
In [146]:
import docker_io
In [143]:
images = get_image_metadata(client, 'dukegcb/xgenovo')
for img in images:
write_image(client, img, '/tmp/images')
In [134]:
md
Out[134]:
In [ ]: