diff --git a/src/dcqc/file.py b/src/dcqc/file.py index a8be041..197d833 100644 --- a/src/dcqc/file.py +++ b/src/dcqc/file.py @@ -334,7 +334,9 @@ def stage( if self._local_path is not None: return self._local_path else: - destination_str = mkdtemp() + # TODO: This prefix is used by nf-dcqc to easily find the staged file. + # It might be worth using a DCQCTMPDIR to avoid hard-coding this. + destination_str = mkdtemp(prefix="dcqc-staged-") destination = Path(destination_str) # By this point, destination is defined (not None) diff --git a/src/dcqc/main.py b/src/dcqc/main.py index 5b2af3f..2129281 100644 --- a/src/dcqc/main.py +++ b/src/dcqc/main.py @@ -1,7 +1,7 @@ import sys from csv import DictWriter from pathlib import Path -from typing import List, Optional +from typing import List from typer import Argument, Exit, Option, Typer @@ -28,11 +28,8 @@ # Common options overwrite_opt = Option(False, "--overwrite", "-f", help="Ignore existing files") -required_tests_opt = Option(None, "--required-tests", "-rt", help="Required tests") -skipped_tests_opt = Option(None, "--skipped-tests", "-st", help="Skipped tests") -stage_files_opt = Option(False, "--stage-files", "-sf", help="Stage remote files.") -prt_help = "Update paths to be relative to given directory upon serialization." -paths_relative_to_opt = Option(None, "--paths-relative-to", "-prt", help=prt_help) +required_tests_opt = Option(None, "--required-tests", "-r", help="Required tests") +skipped_tests_opt = Option(None, "--skipped-tests", "-s", help="Skipped tests") @app.callback() @@ -48,12 +45,11 @@ def create_targets( input_csv: Path = input_path_arg, output_dir: Path = output_dir_path_arg, overwrite: bool = overwrite_opt, - stage_files: bool = stage_files_opt, ): """Create target JSON files from a targets CSV file""" output_dir.mkdir(parents=True, exist_ok=True) - parser = CsvParser(input_csv, stage_files) + parser = CsvParser(input_csv) targets = parser.create_targets() # Naming the targets by index to ensure no clashes @@ -63,24 +59,6 @@ def create_targets( report.save_many(named_targets, output_dir.as_posix(), overwrite) -@app.command() -def stage_target( - input_json: Path = input_path_arg, - output_json: str = output_arg, - output_dir: Path = output_dir_path_arg, - overwrite: bool = overwrite_opt, - paths_relative_to: Optional[Path] = paths_relative_to_opt, -): - """Create local file copies from a target JSON file""" - output_dir.mkdir(parents=True, exist_ok=True) - - target = JsonParser.parse_object(input_json, Target) - target.stage(output_dir, overwrite) - - report = JsonReport(paths_relative_to) - report.save(target, output_json, overwrite) - - @app.command() def create_tests( input_json: Path = input_path_arg, @@ -93,8 +71,8 @@ def create_tests( output_dir.mkdir(parents=True, exist_ok=True) # Interpret empty lists from CLI as None (to auto-generate values) - required_tests_maybe = required_tests if len(required_tests) > 0 else None - skipped_tests_maybe = skipped_tests if len(skipped_tests) > 0 else None + required_tests_maybe = required_tests if required_tests else None + skipped_tests_maybe = skipped_tests if skipped_tests else None target = JsonParser.parse_object(input_json, Target) suite = SuiteABC.from_target(target, required_tests_maybe, skipped_tests_maybe) @@ -150,8 +128,8 @@ def create_suite( ): """Create a suite from a set of test JSON files sharing the same target""" # Interpret empty lists from CLI as None (to auto-generate values) - required_tests_maybe = required_tests if len(required_tests) > 0 else None - skipped_tests_maybe = skipped_tests if len(skipped_tests) > 0 else None + required_tests_maybe = required_tests if required_tests else None + skipped_tests_maybe = skipped_tests if skipped_tests else None tests = [JsonParser.parse_object(test_json, TestABC) for test_json in input_jsons] suite = SuiteABC.from_tests(tests, required_tests_maybe, skipped_tests_maybe) diff --git a/src/dcqc/tests/test_abc.py b/src/dcqc/tests/test_abc.py index d737a04..7463e21 100644 --- a/src/dcqc/tests/test_abc.py +++ b/src/dcqc/tests/test_abc.py @@ -69,8 +69,40 @@ def get_status(self, compute_ok: bool = True) -> TestStatus: self._status = self.compute_status() return self._status - def _get_single_target_file(self) -> File: - files = self.target.files + def get_files(self, staged: bool = True) -> list[File]: + """Get and stage files for target. + + Args: + staged: Whether to make sure that the files are staged. + Defaults to True. + + Returns: + Staged target files. + """ + files = [] + for file in self.target.files: + if staged: + file.stage() + files.append(file) + return files + + def get_file(self, staged: bool = True) -> File: + """Get and stage file for single-file target. + + Args: + staged: Whether to make sure that the files are staged. + Defaults to True. + + Raises: + ValueError: If the target has multiple files. + + Returns: + Staged target file. + """ + files = self.get_files(staged) + if len(files) != 1: + message = "This method only supports single-file targets." + raise ValueError(message) return files[0] @classmethod diff --git a/src/dcqc/tests/tests.py b/src/dcqc/tests/tests.py index b06ab5d..ab6cf5c 100644 --- a/src/dcqc/tests/tests.py +++ b/src/dcqc/tests/tests.py @@ -1,7 +1,7 @@ import hashlib import json +from pathlib import Path -from dcqc.file import File from dcqc.tests.test_abc import ExternalTestMixin, Process, TestABC, TestStatus @@ -11,7 +11,7 @@ class FileExtensionTest(TestABC): def compute_status(self) -> TestStatus: status = TestStatus.PASS - for file in self.target.files: + for file in self.get_files(staged=False): file_type = file.get_file_type() file_extensions = file_type.file_extensions if not file.name.endswith(file_extensions): @@ -26,18 +26,17 @@ class Md5ChecksumTest(TestABC): def compute_status(self) -> TestStatus: status = TestStatus.PASS - for file in self.target.files: + for file in self.get_files(): expected_md5 = file.get_metadata("md5_checksum") - actual_md5 = self._compute_md5_checksum(file) + actual_md5 = self._compute_md5_checksum(file.local_path) if expected_md5 != actual_md5: status = TestStatus.FAIL break return status - def _compute_md5_checksum(self, file: File) -> str: - local_path = file.local_path + def _compute_md5_checksum(self, path: Path) -> str: hash_md5 = hashlib.md5() - with local_path.open("rb") as infile: + with path.open("rb") as infile: for chunk in iter(lambda: infile.read(4096), b""): hash_md5.update(chunk) actual_md5 = hash_md5.hexdigest() @@ -50,16 +49,15 @@ class JsonLoadTest(TestABC): def compute_status(self) -> TestStatus: status = TestStatus.PASS - for file in self.target.files: - if not self._can_be_loaded(file): + for file in self.get_files(): + if not self._can_be_loaded(file.local_path): status = TestStatus.FAIL break return status - def _can_be_loaded(self, file: File) -> bool: + def _can_be_loaded(self, path: Path) -> bool: success = True - local_path = file.local_path - with local_path.open("r") as infile: + with path.open("r") as infile: try: json.load(infile) except Exception: @@ -73,19 +71,18 @@ class JsonLdLoadTest(TestABC): def compute_status(self) -> TestStatus: status = TestStatus.PASS - for file in self.target.files: - if not self._can_be_loaded(file): + for file in self.get_files(): + if not self._can_be_loaded(file.local_path): status = TestStatus.FAIL break return status - def _can_be_loaded(self, file: File) -> bool: + def _can_be_loaded(self, path: Path) -> bool: rdflib = self.import_module("rdflib") graph = rdflib.Graph() success = True - local_path = file.local_path - with local_path.open("r") as infile: + with path.open("r") as infile: try: graph.parse(infile, format="json-ld") except Exception: @@ -97,9 +94,8 @@ class LibTiffInfoTest(ExternalTestMixin, TestABC): tier = 2 def generate_process(self) -> Process: - file = self._get_single_target_file() - path = file.local_path.as_posix() - command_args = ["tiffinfo", path] + file = self.get_file() + command_args = ["tiffinfo", file.local_path.as_posix()] process = Process( container="quay.io/sagebionetworks/libtiff:2.0", command_args=command_args, @@ -111,14 +107,13 @@ class BioFormatsInfoTest(ExternalTestMixin, TestABC): tier = 2 def generate_process(self) -> Process: - file = self._get_single_target_file() - path = file.local_path.as_posix() + file = self.get_file() command_args = [ "/opt/bftools/showinf", "-nopix", "-novalid", "-nocore", - path, + file.local_path.as_posix(), ] process = Process( container="quay.io/sagebionetworks/bftools:latest", @@ -131,11 +126,10 @@ class OmeXmlSchemaTest(ExternalTestMixin, TestABC): tier = 2 def generate_process(self) -> Process: - file = self._get_single_target_file() - path = file.local_path.as_posix() + file = self.get_file() command_args = [ "/opt/bftools/xmlvalid", - path, + file.local_path.as_posix(), ] process = Process( container="quay.io/sagebionetworks/bftools:latest", diff --git a/tests/test_main.py b/tests/test_main.py index 7b4840b..c263a01 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -55,27 +55,13 @@ def test_create_targets(get_data, get_output): assert len(list(output_dir.iterdir())) > 0 -def test_stage_target(get_data, get_output): - input_json = get_data("target.json") - output_json = get_output("stage_target/target.staged.json") - output_dir = get_output("stage_target/targets") - output_json.unlink(missing_ok=True) - shutil.rmtree(output_dir, ignore_errors=True) - - assert not output_dir.exists() - args = ["stage-target", "-prt", ".", input_json, output_json, output_dir] - result = run_command(args) - check_command_result(result) - assert len(list(output_dir.iterdir())) > 0 - - def test_create_tests(get_data, get_output): input_json = get_data("target.json") output_dir = get_output("create_tests") shutil.rmtree(output_dir, ignore_errors=True) assert not output_dir.exists() - args = ["create-tests", "-rt", "Md5ChecksumTest", input_json, output_dir] + args = ["create-tests", "-r", "Md5ChecksumTest", input_json, output_dir] result = run_command(args) check_command_result(result) assert len(list(output_dir.iterdir())) > 0 diff --git a/tests/test_tests.py b/tests/test_tests.py index cf0b9db..03be178 100644 --- a/tests/test_tests.py +++ b/tests/test_tests.py @@ -190,3 +190,11 @@ def test_that_a_process_can_be_serialized_and_deserialized(): process_dict = process.to_dict() process_from_dict = Process.from_dict(process_dict) assert process_dict == process_from_dict.to_dict() + + +def test_for_an_error_when_getting_one_file_from_multi_file_target(test_files): + file = test_files["good"] + target = Target(file, file) + test = tests.FileExtensionTest(target) + with pytest.raises(ValueError): + test.get_file()