Skip to content

Commit d6ef9c0

Browse files
authored
Validate that output files actually exist (#374)
* Add visit_class and refactor redundant traversals when both adjustFileObjs and adjustDirObjs are called on the same objects with the same function. * Validate file/directory locations on output. * Fix output of outdir directly. * Add test that outputs are better checked.
1 parent 0b9086e commit d6ef9c0

File tree

12 files changed

+130
-89
lines changed

12 files changed

+130
-89
lines changed

cwltool/builder.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from . import expression
1010
from .errors import WorkflowException
11-
from .pathmapper import PathMapper, adjustFileObjs, normalizeFilesDirs, get_listing
11+
from .pathmapper import PathMapper, normalizeFilesDirs, get_listing, visit_class
1212
from .stdfsaccess import StdFsAccess
1313
from .utils import aslist
1414

@@ -144,7 +144,7 @@ def _capture_files(f):
144144
self.files.append(f)
145145
return f
146146

147-
adjustFileObjs(datum.get("secondaryFiles", []), _capture_files)
147+
visit_class(datum.get("secondaryFiles", []), ("File", "Directory"), _capture_files)
148148

149149
if schema["type"] == "Directory":
150150
ll = self.loadListing or (binding and binding.get("loadListing"))

cwltool/draft2tool.py

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
from schema_salad.sourceline import SourceLine, indent
1717
from typing import Any, Callable, cast, Generator, Text, Union
1818

19-
from .builder import CONTENT_LIMIT, substitute, Builder, adjustFileObjs
20-
from .pathmapper import adjustDirObjs
19+
from .builder import CONTENT_LIMIT, substitute, Builder
20+
from .pathmapper import adjustFileObjs, adjustDirObjs, visit_class
2121
from .errors import WorkflowException
2222
from .job import CommandLineJob
2323
from .pathmapper import PathMapper, get_listing, trim_listing
@@ -106,6 +106,8 @@ def revmap_file(builder, outdir, f):
106106
revmap_f = builder.pathmapper.reversemap(path)
107107
if revmap_f:
108108
f["location"] = revmap_f[1]
109+
elif path == builder.outdir:
110+
f["location"] = outdir
109111
elif path.startswith(builder.outdir):
110112
f["location"] = builder.fs_access.join(outdir, path[len(builder.outdir) + 1:])
111113
return f
@@ -156,6 +158,13 @@ def check_adjust(builder, f):
156158
raise WorkflowException("Invalid filename: '%s' contains illegal characters" % (f["basename"]))
157159
return f
158160

161+
def check_valid_locations(fs_access, ob):
162+
if ob["location"].startswith("_:"):
163+
pass
164+
if ob["class"] == "File" and not fs_access.isfile(ob["location"]):
165+
raise validate.ValidationException("Does not exist or is not a File: '%s'" % ob["location"])
166+
if ob["class"] == "Directory" and not fs_access.isdir(ob["location"]):
167+
raise validate.ValidationException("Does not exist or is not a Directory: '%s'" % ob["location"])
159168

160169
class CommandLineTool(Process):
161170
def __init__(self, toolpath_object, **kwargs):
@@ -190,10 +199,9 @@ def job(self,
190199
cachebuilder.stagedir,
191200
separateDirs=False)
192201
_check_adjust = partial(check_adjust, cachebuilder)
193-
adjustFileObjs(cachebuilder.files, _check_adjust)
194-
adjustFileObjs(cachebuilder.bindings, _check_adjust)
195-
adjustDirObjs(cachebuilder.files, _check_adjust)
196-
adjustDirObjs(cachebuilder.bindings, _check_adjust)
202+
203+
visit_class([cachebuilder.files, cachebuilder.bindings],
204+
("File", "Directory"), _check_adjust)
197205
cmdline = flatten(map(cachebuilder.generate_arg, cachebuilder.bindings))
198206
(docker_req, docker_is_req) = self.get_requirement("DockerRequirement")
199207
if docker_req and kwargs.get("use_container") is not False:
@@ -290,10 +298,7 @@ def rm_pending_output_callback(output_callbacks, jobcachepending,
290298

291299
_check_adjust = partial(check_adjust, builder)
292300

293-
adjustFileObjs(builder.files, _check_adjust)
294-
adjustFileObjs(builder.bindings, _check_adjust)
295-
adjustDirObjs(builder.files, _check_adjust)
296-
adjustDirObjs(builder.bindings, _check_adjust)
301+
visit_class([builder.files, builder.bindings], ("File", "Directory"), _check_adjust)
297302

298303
if self.tool.get("stdin"):
299304
with SourceLine(self.tool, "stdin", validate.ValidationException):
@@ -419,21 +424,19 @@ def collect_output_ports(self, ports, builder, outdir, compute_checksum=True):
419424
% (shortname(port["id"]), indent(u(str(e)))))
420425

421426
if ret:
427+
revmap = partial(revmap_file, builder, outdir)
422428
adjustDirObjs(ret, trim_listing)
423-
adjustFileObjs(ret,
424-
cast(Callable[[Any], Any], # known bug in mypy
425-
# https://github.com/python/mypy/issues/797
426-
partial(revmap_file, builder, outdir)))
427-
adjustFileObjs(ret, remove_path)
428-
adjustDirObjs(ret, remove_path)
429+
visit_class(ret, ("File", "Directory"), cast(Callable[[Any], Any], revmap))
430+
visit_class(ret, ("File", "Directory"), remove_path)
429431
normalizeFilesDirs(ret)
432+
visit_class(ret, ("File", "Directory"), partial(check_valid_locations, fs_access))
430433
if compute_checksum:
431434
adjustFileObjs(ret, partial(compute_checksums, fs_access))
432435

433436
validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret)
434437
return ret if ret is not None else {}
435438
except validate.ValidationException as e:
436-
raise WorkflowException("Error validating output record, " + Text(e) + "\n in " + json.dumps(ret, indent=4))
439+
raise WorkflowException("Error validating output record. " + Text(e) + "\n in " + json.dumps(ret, indent=4))
437440

438441
def collect_output(self, schema, builder, outdir, fs_access, compute_checksum=True):
439442
# type: (Dict[Text, Any], Builder, Text, StdFsAccess, bool) -> Union[Dict[Text, Any], List[Union[Dict[Text, Any], Text]]]

cwltool/job.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -339,10 +339,6 @@ def linkoutdir(src, tgt):
339339
_logger.debug(u"[job %s] Removing temporary directory %s", self.name, self.tmpdir)
340340
shutil.rmtree(self.tmpdir, True)
341341

342-
if move_outputs == "move" and empty_subtree(self.outdir):
343-
_logger.debug(u"[job %s] Removing empty output directory %s", self.name, self.outdir)
344-
shutil.rmtree(self.outdir, True)
345-
346342

347343
def _job_popen(
348344
commands, # type: List[str]

cwltool/load_tool.py

Lines changed: 28 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -68,39 +68,36 @@ def _convert_stdstreams_to_files(workflowobj):
6868
# type: (Union[Dict[Text, Any], List[Dict[Text, Any]]]) -> None
6969

7070
if isinstance(workflowobj, dict):
71-
if ('class' in workflowobj
72-
and workflowobj['class'] == 'CommandLineTool'):
73-
if 'outputs' in workflowobj:
74-
for out in workflowobj['outputs']:
75-
for streamtype in ['stdout', 'stderr']:
76-
if out['type'] == streamtype:
77-
if 'outputBinding' in out:
78-
raise ValidationException(
79-
"Not allowed to specify outputBinding when"
80-
" using %s shortcut." % streamtype)
81-
if streamtype in workflowobj:
82-
filename = workflowobj[streamtype]
83-
else:
84-
filename = Text(uuid.uuid4())
85-
workflowobj[streamtype] = filename
86-
out['type'] = 'File'
87-
out['outputBinding'] = {'glob': filename}
88-
if 'inputs' in workflowobj:
89-
for inp in workflowobj['inputs']:
90-
if inp['type'] == 'stdin':
91-
if 'inputBinding' in inp:
71+
if workflowobj.get('class') == 'CommandLineTool':
72+
for out in workflowobj.get('outputs', []):
73+
for streamtype in ['stdout', 'stderr']:
74+
if out.get('type') == streamtype:
75+
if 'outputBinding' in out:
9276
raise ValidationException(
93-
"Not allowed to specify inputBinding when"
94-
" using stdin shortcut.")
95-
if 'stdin' in workflowobj:
96-
raise ValidationException(
97-
"Not allowed to specify stdin path when"
98-
" using stdin type shortcut.")
77+
"Not allowed to specify outputBinding when"
78+
" using %s shortcut." % streamtype)
79+
if streamtype in workflowobj:
80+
filename = workflowobj[streamtype]
9981
else:
100-
workflowobj['stdin'] = \
101-
"$(inputs.%s.path)" % \
102-
inp['id'].rpartition('#')[2]
103-
inp['type'] = 'File'
82+
filename = Text(uuid.uuid4())
83+
workflowobj[streamtype] = filename
84+
out['type'] = 'File'
85+
out['outputBinding'] = {'glob': filename}
86+
for inp in workflowobj.get('inputs', []):
87+
if inp.get('type') == 'stdin':
88+
if 'inputBinding' in inp:
89+
raise ValidationException(
90+
"Not allowed to specify inputBinding when"
91+
" using stdin shortcut.")
92+
if 'stdin' in workflowobj:
93+
raise ValidationException(
94+
"Not allowed to specify stdin path when"
95+
" using stdin type shortcut.")
96+
else:
97+
workflowobj['stdin'] = \
98+
"$(inputs.%s.path)" % \
99+
inp['id'].rpartition('#')[2]
100+
inp['type'] = 'File'
104101
else:
105102
for entry in itervalues(workflowobj):
106103
_convert_stdstreams_to_files(entry)

cwltool/main.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
from . import draft2tool
2222
from . import workflow
23-
from .pathmapper import adjustDirObjs, get_listing, adjustFileObjs, trim_listing
23+
from .pathmapper import adjustDirObjs, get_listing, adjustFileObjs, trim_listing, visit_class
2424
from .cwlrdf import printrdf, printdot
2525
from .errors import WorkflowException, UnsupportedRequirement
2626
from .load_tool import fetch_document, validate_document, make_tool
@@ -503,8 +503,7 @@ def pathToLoc(p):
503503
p["location"] = p["path"]
504504
del p["path"]
505505

506-
adjustDirObjs(job_order_object, pathToLoc)
507-
adjustFileObjs(job_order_object, pathToLoc)
506+
visit_class(job_order_object, ("File", "Directory"), pathToLoc)
508507
adjustDirObjs(job_order_object, trim_listing)
509508
normalizeFilesDirs(job_order_object)
510509

@@ -548,8 +547,7 @@ def loadref(b, u):
548547
else:
549548
raise Exception(u"Unknown relative_deps %s" % relative_deps)
550549

551-
adjustFileObjs(deps, functools.partial(makeRelative, base))
552-
adjustDirObjs(deps, functools.partial(makeRelative, base))
550+
visit_class(deps, ("File", "Directory"), functools.partial(makeRelative, base))
553551

554552
stdout.write(json.dumps(deps, indent=4))
555553

@@ -774,8 +772,7 @@ def locToPath(p):
774772
if p["location"].startswith("file://"):
775773
p["path"] = uri_file_path(p["location"])
776774

777-
adjustDirObjs(out, locToPath)
778-
adjustFileObjs(out, locToPath)
775+
visit_class(out, ("File", "Directory"), locToPath)
779776

780777
if isinstance(out, basestring):
781778
stdout.write(out)

cwltool/pathmapper.py

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import schema_salad.validate as validate
99
from schema_salad.ref_resolver import uri_file_path
1010
from schema_salad.sourceline import SourceLine
11-
from typing import Any, Callable, Set, Text, Tuple, Union
11+
from typing import Any, Callable, Set, Text, Tuple, Union, Iterable
1212
from six.moves import urllib
1313

1414
from .stdfsaccess import abspath, StdFsAccess
@@ -30,33 +30,26 @@ def adjustFiles(rec, op): # type: (Any, Union[Callable[..., Any], partial[Any]]
3030
for d in rec:
3131
adjustFiles(d, op)
3232

33-
34-
def adjustFileObjs(rec, op): # type: (Any, Union[Callable[..., Any], partial[Any]]) -> None
35-
"""Apply an update function to each File object in the object `rec`."""
33+
def visit_class(rec, cls, op): # type: (Any, Iterable, Union[Callable[..., Any], partial[Any]]) -> None
34+
"""Apply a function to with "class" in cls."""
3635

3736
if isinstance(rec, dict):
38-
if rec.get("class") == "File":
37+
if rec.get("class") in cls:
3938
op(rec)
4039
for d in rec:
41-
adjustFileObjs(rec[d], op)
40+
visit_class(rec[d], cls, op)
4241
if isinstance(rec, list):
4342
for d in rec:
44-
adjustFileObjs(d, op)
43+
visit_class(d, cls, op)
4544

45+
def adjustFileObjs(rec, op): # type: (Any, Union[Callable[..., Any], partial[Any]]) -> None
46+
"""Apply an update function to each File object in the object `rec`."""
47+
visit_class(rec, ("File",), op)
4648

4749
def adjustDirObjs(rec, op):
4850
# type: (Any, Union[Callable[..., Any], partial[Any]]) -> None
4951
"""Apply an update function to each Directory object in the object `rec`."""
50-
51-
if isinstance(rec, dict):
52-
if rec.get("class") == "Directory":
53-
op(rec)
54-
for key in rec:
55-
adjustDirObjs(rec[key], op)
56-
if isinstance(rec, list):
57-
for d in rec:
58-
adjustDirObjs(d, op)
59-
52+
visit_class(rec, ("Directory",), op)
6053

6154
def normalizeFilesDirs(job):
6255
# type: (Union[List[Dict[Text, Any]], Dict[Text, Any]]) -> None
@@ -84,8 +77,7 @@ def addLocation(d):
8477
if "basename" not in d:
8578
d["basename"] = os.path.basename(urllib.request.url2pathname(path))
8679

87-
adjustFileObjs(job, addLocation)
88-
adjustDirObjs(job, addLocation)
80+
visit_class(job, ("File", "Directory"), addLocation)
8981

9082

9183
def dedup(listing): # type: (List[Any]) -> List[Any]

cwltool/process.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@
2626
from typing import (Any, AnyStr, Callable, cast, Dict, List, Generator, Text,
2727
Tuple, Union)
2828

29-
from .builder import Builder, adjustFileObjs
29+
from .builder import Builder
3030
from .pathmapper import adjustDirObjs, get_listing
3131
from .errors import WorkflowException, UnsupportedRequirement
32-
from .pathmapper import PathMapper, normalizeFilesDirs
32+
from .pathmapper import PathMapper, normalizeFilesDirs, visit_class
3333
from .stdfsaccess import StdFsAccess
3434
from .utils import aslist, get_feature
3535

@@ -189,7 +189,7 @@ def stageFiles(pm, stageFunc, ignoreWritable=False):
189189
continue
190190
if not os.path.exists(os.path.dirname(p.target)):
191191
os.makedirs(os.path.dirname(p.target), 0o0755)
192-
if p.type in ("File", "Directory") and p.resolved.startswith("/"):
192+
if p.type in ("File", "Directory") and (p.resolved.startswith("/") or p.resolved.startswith("file:///")):
193193
stageFunc(p.resolved, p.target)
194194
elif p.type == "Directory" and not os.path.exists(p.target) and p.resolved.startswith("_:"):
195195
os.makedirs(p.target, 0o0755)
@@ -234,7 +234,10 @@ def moveIt(src, dst):
234234
shutil.move(src, dst)
235235
return
236236
_logger.debug("Copying %s to %s", src, dst)
237-
shutil.copy(src, dst)
237+
if os.path.isdir(src):
238+
shutil.copytree(src, dst)
239+
else:
240+
shutil.copy(src, dst)
238241

239242
outfiles = [] # type: List[Dict[Text, Any]]
240243
collectFilesAndDirs(outputObj, outfiles)
@@ -245,12 +248,11 @@ def _check_adjust(f):
245248
f["location"] = file_uri(pm.mapper(f["location"])[1])
246249
if "contents" in f:
247250
del f["contents"]
248-
if f["class"] == "File":
249-
compute_checksums(fs_access, f)
250251
return f
251252

252-
adjustFileObjs(outputObj, _check_adjust)
253-
adjustDirObjs(outputObj, _check_adjust)
253+
visit_class(outputObj, ("File", "Directory"), _check_adjust)
254+
255+
visit_class(outputObj, ("File",), functools.partial(compute_checksums, fs_access))
254256

255257
return outputObj
256258

tests/test_check.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import unittest
2+
3+
import cwltool.expression as expr
4+
import cwltool.factory
5+
import cwltool.pathmapper
6+
import cwltool.process
7+
import cwltool.workflow
8+
from .util import get_data
9+
from cwltool.main import main
10+
11+
class TestCheck(unittest.TestCase):
12+
def test_output_checking(self):
13+
self.assertEquals(main([get_data('tests/wf/badout1.cwl')]), 1)
14+
self.assertEquals(main([get_data('tests/wf/badout2.cwl')]), 1)
15+
self.assertEquals(main([get_data('tests/wf/badout3.cwl')]), 1)

tests/test_pack.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import cwltool.workflow
88
from cwltool.load_tool import fetch_document, validate_document
99
from cwltool.main import makeRelative
10-
from cwltool.process import adjustFileObjs, adjustDirObjs
10+
from cwltool.pathmapper import adjustFileObjs, adjustDirObjs
1111
from .util import get_data
1212

1313
class TestPack(unittest.TestCase):

tests/wf/badout1.cwl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
class: CommandLineTool
2+
cwlVersion: v1.0
3+
baseCommand: touch
4+
arguments: [file1]
5+
requirements:
6+
InlineJavascriptRequirement: {}
7+
inputs: []
8+
outputs:
9+
out:
10+
type: File
11+
outputBinding:
12+
outputEval: |
13+
$({"class": "File", "path": runtime.outdir+"/file2"})

0 commit comments

Comments
 (0)