In [1]:
import cwltool

In [2]:
tool = '/Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl'

In [3]:
%%sh 
cwltool --print-pre /Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl


{
    "cwlVersion": "https://w3id.org/cwl/cwl#draft-3.dev1", 
    "inputs": [
        {
            "type": "string", 
            "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#db_name", 
            "description": "Prefix of the BLAST db files"
        }, 
        {
            "type": "File", 
            "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#db_dir", 
            "description": "Directory containing the BLAST database"
        }, 
        {
            "default": "", 
            "inputBinding": {
                "position": 1, 
                "prefix": "-db", 
                "valueFrom": {
                    "engine": "file:///Users/dcl9/Code/python/mmap-cwl/py-expr-engine/py-expr-engine.cwl", 
                    "script": "os.path.join(self.job['db_dir']['path'], self.job['db_name'])\n"
                }
            }, 
            "type": "string", 
            "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#db"
        }, 
        {
            "inputBinding": {
                "position": 2, 
                "prefix": "-query"
            }, 
            "type": "File", 
            "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#query", 
            "description": "Input file containing sequences to query"
        }, 
        {
            "default": "goblast-terms.out", 
            "inputBinding": {
                "position": 3, 
                "prefix": "-out"
            }, 
            "type": "string", 
            "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#results_file_name", 
            "description": "Output file to put BLAST results"
        }, 
        {
            "default": "1e-15", 
            "inputBinding": {
                "position": 4, 
                "prefix": "-evalue"
            }, 
            "type": "string", 
            "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#evalue"
        }, 
        {
            "default": "yes", 
            "inputBinding": {
                "position": 5, 
                "prefix": "-seg"
            }, 
            "type": "string", 
            "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#seg"
        }, 
        {
            "default": "10 std stitle", 
            "inputBinding": {
                "position": 6, 
                "prefix": "-outfmt"
            }, 
            "type": "string", 
            "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#outfmt"
        }
    ], 
    "requirements": [
        {
            "id": "file:///Users/dcl9/Code/python/mmap-cwl/py-expr-engine/py-expr-engine.cwl", 
            "engineCommand": "py-expr-engine/engine.py", 
            "class": "ExpressionEngineRequirement", 
            "name": "file:///Users/dcl9/Code/python/mmap-cwl/py-expr-engine/py-expr-engine.cwl"
        }
    ], 
    "name": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl", 
    "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl", 
    "outputs": [
        {
            "outputBinding": {
                "glob": {
                    "engine": "https://w3id.org/cwl/cwl#JsonPointer", 
                    "script": "job/results_file_name"
                }
            }, 
            "type": "File", 
            "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#results_file"
        }
    ], 
    "hints": [
        {
            "dockerImageId": "dukegcb/go-blast", 
            "class": "DockerRequirement"
        }
    ], 
    "baseCommand": "blastx", 
    "class": "CommandLineTool"
}
/Users/dcl9/Code/python/cwl-freeze/env/bin/cwltool 1.0.20151112194920

In [4]:
import yaml

In [5]:
y = None
with open(tool) as f:
    y = yaml.load(f)

In [6]:
# find if y has a hint that is a docker requirement

In [7]:
import dpath.util

In [8]:
dpath.util.search(y,'*/*/dockerImageId')


Out[8]:
{'hints': [{'dockerImageId': 'dukegcb/go-blast'}]}

In [9]:
# parse function
import urlparse
from schema_salad import schema
from cwltool import process, update
def parse(cwlpath):
    (document_loader, avsc_names, schema_metadata) = process.get_schema()
    fileuri = 'file://' + cwlpath
    workflowobj = document_loader.fetch(fileuri)
    # If strict is true, names are required everywhere (among other requirements)
    strict = False
    # This updates from draft2 to draft3
    workflowobj = update.update(workflowobj, document_loader, fileuri)
    document_loader.idx.clear()
    processobj, metadata = schema.load_and_validate(document_loader, avsc_names, workflowobj, strict)
    return processobj

In [10]:
import json
print json.dumps(parse(tool), indent=2)


{
  "cwlVersion": "https://w3id.org/cwl/cwl#draft-3.dev1", 
  "inputs": [
    {
      "type": "string", 
      "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#db_name", 
      "description": "Prefix of the BLAST db files"
    }, 
    {
      "type": "File", 
      "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#db_dir", 
      "description": "Directory containing the BLAST database"
    }, 
    {
      "default": "", 
      "inputBinding": {
        "position": 1, 
        "prefix": "-db", 
        "valueFrom": {
          "engine": "file:///Users/dcl9/Code/python/mmap-cwl/py-expr-engine/py-expr-engine.cwl", 
          "script": "os.path.join(self.job['db_dir']['path'], self.job['db_name'])\n"
        }
      }, 
      "type": "string", 
      "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#db"
    }, 
    {
      "inputBinding": {
        "position": 2, 
        "prefix": "-query"
      }, 
      "type": "File", 
      "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#query", 
      "description": "Input file containing sequences to query"
    }, 
    {
      "default": "goblast-terms.out", 
      "inputBinding": {
        "position": 3, 
        "prefix": "-out"
      }, 
      "type": "string", 
      "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#results_file_name", 
      "description": "Output file to put BLAST results"
    }, 
    {
      "default": "1e-15", 
      "inputBinding": {
        "position": 4, 
        "prefix": "-evalue"
      }, 
      "type": "string", 
      "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#evalue"
    }, 
    {
      "default": "yes", 
      "inputBinding": {
        "position": 5, 
        "prefix": "-seg"
      }, 
      "type": "string", 
      "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#seg"
    }, 
    {
      "default": "10 std stitle", 
      "inputBinding": {
        "position": 6, 
        "prefix": "-outfmt"
      }, 
      "type": "string", 
      "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#outfmt"
    }
  ], 
  "requirements": [
    {
      "id": "file:///Users/dcl9/Code/python/mmap-cwl/py-expr-engine/py-expr-engine.cwl", 
      "engineCommand": "py-expr-engine/engine.py", 
      "class": "ExpressionEngineRequirement", 
      "name": "file:///Users/dcl9/Code/python/mmap-cwl/py-expr-engine/py-expr-engine.cwl"
    }
  ], 
  "name": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl", 
  "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl", 
  "outputs": [
    {
      "outputBinding": {
        "glob": {
          "engine": "https://w3id.org/cwl/cwl#JsonPointer", 
          "script": "job/results_file_name"
        }
      }, 
      "type": "File", 
      "id": "file:///Users/dcl9/Code/python/mmap-cwl/go-blast/go-blasttool.cwl#results_file"
    }
  ], 
  "hints": [
    {
      "dockerImageId": "dukegcb/go-blast", 
      "class": "DockerRequirement"
    }
  ], 
  "baseCommand": "blastx", 
  "class": "CommandLineTool"
}

Questions

  1. Could this be a CWL compiler?
  2. WIll it take a root document and return the whole structure?
  3. Can I find the dockerRequirement anywhere in the doc?
  4. Can I find the dockerRequirement using the schema?

1. CWL Docker Compiler

What does that mean? Abstractly, that it would read an input document, look for all docker requirements and hints, pull the images, and then write a shell script to reload everything

2. Root document and return whole structure?


In [11]:
workflow = parse('/Users/dcl9/Code/python/mmap-cwl/mmap.cwl')

Yes, that works


In [84]:
# This function will find dockerImageId anyhwere in the tree
def find_key(d, key, path=[]):
    if isinstance(d, list):
        for i, v in enumerate(d):
            for f in find_key(v, key, path + [str(i)]):
                yield f
    elif isinstance(d, dict):
        if key in d:
            pathstring = '/'.join(path + [key])
            yield pathstring
        for k, v in d.items():
            for f in find_key(v, key, path + [k]):
                yield f

In [97]:
# Could adapt to find class: DockerRequirement instead
for x in find_key(workflow, 'dockerImageId'):
    print x, dpath.util.get(workflow, x)


steps/0/run/steps/0/run/hints/0/dockerImageId dukegcb/xgenovo
steps/0/run/steps/1/run/hints/0/dockerImageId dukegcb/xgenovo
steps/1/run/steps/0/run/hints/0/dockerImageId dukegcb/glimmer
steps/1/run/steps/1/run/hints/0/dockerImageId dukegcb/glimmer
steps/1/run/steps/2/run/hints/0/dockerImageId dukegcb/glimmer
steps/2/run/steps/0/run/hints/0/dockerImageId dukegcb/go-blast
steps/2/run/steps/1/run/hints/0/dockerImageId dukegcb/extract-go-terms

In [94]:
dpath.util.get(workflow, 'steps/0/run/steps/0/run/hints/0')


Out[94]:
{'class': 'DockerRequirement', 'dockerImageId': 'dukegcb/xgenovo'}

extract docker image names


In [104]:
def image_names(workflow):
    image_ids = []
    for x in find_key(workflow, 'dockerImageId'):
        image_id = dpath.util.get(workflow, x)
        if image_id not in image_ids: image_ids.append(image_id) 
    return image_ids

In [105]:
image_names(workflow)


Out[105]:
['dukegcb/xgenovo',
 'dukegcb/glimmer',
 'dukegcb/go-blast',
 'dukegcb/extract-go-terms']

In [108]:
import docker

def docker_hashes(image_ids):
    for name in image_ids:
        print name

docker_hashes(image_names(workflow))


dukegcb/xgenovo
dukegcb/glimmer
dukegcb/go-blast
dukegcb/extract-go-terms

Docker IO

Query docker for the sha of the docker image id


In [112]:
%%sh

eval $(docker-machine env default)

In [146]:
import docker_io

In [143]:
images = get_image_metadata(client, 'dukegcb/xgenovo')
for img in images:
    write_image(client, img, '/tmp/images')


Writing image metadata to /tmp/images/1396a7cc9e06ffca92a8673584fe62d2b457c201b9a6a41738f6dfd9b5d28e11/image.json
Writing image tar to /tmp/images/1396a7cc9e06ffca92a8673584fe62d2b457c201b9a6a41738f6dfd9b5d28e11/image.tar

In [134]:
md


Out[134]:
[{u'Created': 1447784969,
  u'Id': u'1396a7cc9e06ffca92a8673584fe62d2b457c201b9a6a41738f6dfd9b5d28e11',
  u'Labels': {},
  u'ParentId': u'475b615fbaa0cb06573a95d8c9adb77d32ea3b54cc399c2fdaa59bd7b222a758',
  u'RepoDigests': [],
  u'RepoTags': [u'dukegcb/xgenovo:latest'],
  u'Size': 0,
  u'VirtualSize': 298635224}]

In [ ]: