diff --git a/orthanc/orthanc-anon/plugin/pixl.py b/orthanc/orthanc-anon/plugin/pixl.py index 40967aa19..bf4de73a4 100644 --- a/orthanc/orthanc-anon/plugin/pixl.py +++ b/orthanc/orthanc-anon/plugin/pixl.py @@ -38,7 +38,7 @@ from pydicom import dcmread import orthanc -from pixl_dcmd.main import anonymise_dicom, write_dataset_to_bytes +from pixl_dcmd.main import anonymise_dicom, should_exclude_series, write_dataset_to_bytes if TYPE_CHECKING: from typing import Any @@ -281,6 +281,12 @@ def ReceivedInstanceCallback(receivedDicom: bytes, origin: str) -> Any: # Read the bytes as DICOM/ dataset = dcmread(BytesIO(receivedDicom)) + # Do before anonymisation in case someone decides to delete the + # Series Description tag as part of anonymisation. + if should_exclude_series(dataset): + orthanc.LogWarning("DICOM instance discarded due to its series description") + return orthanc.ReceivedInstanceAction.DISCARD, None + # Attempt to anonymise and drop the study if any exceptions occur try: dataset = anonymise_dicom(dataset) diff --git a/orthanc/orthanc-raw/plugin/pixl.py b/orthanc/orthanc-raw/plugin/pixl.py index 307952fc3..a9fe62c95 100644 --- a/orthanc/orthanc-raw/plugin/pixl.py +++ b/orthanc/orthanc-raw/plugin/pixl.py @@ -27,12 +27,13 @@ import os import traceback from io import BytesIO -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Optional from core.dicom_tags import DICOM_TAG_PROJECT_NAME, add_private_tag -from pydicom import Dataset, dcmread, dcmwrite +from pydicom import dcmread import orthanc +from pixl_dcmd.main import write_dataset_to_bytes from pixl_dcmd.tagrecording import record_dicom_headers if TYPE_CHECKING: @@ -50,7 +51,17 @@ def OnChange(changeType, level, resourceId): # noqa: ARG001 """ if changeType == orthanc.ChangeType.STABLE_STUDY and should_auto_route(): print("Sending study: %s" % resourceId) # noqa: T201 - orthanc.RestApiPost("/modalities/PIXL-Anon/store", resourceId) + # Although this can throw, since we have nowhere to report errors + # back to (eg. an HTTP client), don't try to handle anything here. + # The client will have to detect that it hasn't happened and retry. + orthanc_anon_store_study(resourceId) + + +def orthanc_anon_store_study(resource_id): + """Call the API to send the specified resource (study) to the orthanc anon server.""" + # RestApiPost raises an orthanc.OrthancException if it fails + orthanc.RestApiPost("/modalities/PIXL-Anon/store", resource_id) + orthanc.LogInfo(f"Successfully sent study to anon modality: {resource_id}") def OnHeartBeat(output, uri, **request): # noqa: ARG001 @@ -82,19 +93,6 @@ def should_auto_route(): return os.environ.get("ORTHANC_AUTOROUTE_RAW_TO_ANON", "false").lower() == "true" -def write_dataset_to_bytes(dataset: Dataset) -> bytes: - """ - Write pydicom DICOM dataset to byte array - - Original from: - https://pydicom.github.io/pydicom/stable/auto_examples/memory_dataset.html - """ - with BytesIO() as buffer: - dcmwrite(buffer, dataset) - buffer.seek(0) - return buffer.read() - - def modify_dicom_tags(receivedDicom: bytes, origin: str) -> Any: """ A new incoming DICOM file needs to have the project name private tag added here, so @@ -107,6 +105,11 @@ def modify_dicom_tags(receivedDicom: bytes, origin: str) -> Any: logger.debug("modify_dicom_tags - doing nothing as change triggered by API") return orthanc.ReceivedInstanceAction.KEEP_AS_IS, None dataset = dcmread(BytesIO(receivedDicom)) + # See the orthanc.json config file for where this tag is given a nickname + # The private block is the first free block >= 0x10. + # We can't directly control it, but the orthanc config requires it to be + # hardcoded to 0x10 + # https://dicom.nema.org/dicom/2013/output/chtml/part05/sect_7.8.html # Add project name as private tag, at this point, the value is unknown private_block = add_private_tag(dataset, DICOM_TAG_PROJECT_NAME) @@ -114,35 +117,66 @@ def modify_dicom_tags(receivedDicom: bytes, origin: str) -> Any: logger.debug( "modify_dicom_tags - added new private block starting at 0x%x", private_block.block_start ) - if not DICOM_TAG_PROJECT_NAME.acceptable_private_block(private_block.block_start >> 8): - print( # noqa: T201 - "ERROR: The private block does not match the value hardcoded in the orthanc " - "config. This can be because there was an unexpected pre-existing private block" - f"in group {DICOM_TAG_PROJECT_NAME.group_id}" - ) return orthanc.ReceivedInstanceAction.MODIFY, write_dataset_to_bytes(dataset) +def log_and_return_http( + output, http_code: int, http_message: str, log_message: Optional[str] = None +): + """ + Log and make an HTTP response in case of success or failure. For failure, log + a stack/exception trace as well. + + :param output: the orthanc output object as given to the callback function + :param http_code: HTTP code to return + :param http_message: message to return in HTTP body + :param log_message: message to log, if different to http_message. + If None, do not log at all if success + """ + http_json_str = json.dumps({"Message": http_message}) + if http_code == 200: # noqa: PLR2004 + if log_message: + orthanc.LogInfo(log_message) + output.AnswerBuffer(http_json_str, "application/json") + else: + orthanc.LogWarning(f"{log_message or http_message}:\n{traceback.format_exc()}") + # length needed in bytes not chars + output.SendHttpStatus(http_code, http_json_str, len(http_json_str.encode())) + + def SendResourceToAnon(output, uri, **request): # noqa: ARG001 """Send an existing study to the anon modality""" orthanc.LogWarning(f"Received request to send study to anon modality: {request}") if not should_auto_route(): - orthanc.LogWarning("Auto-routing is not enabled, dropping request {request}") - output.AnswerBuffer( - json.dumps({"Message": "Auto-routing is not enabled"}), "application/json" + log_and_return_http( + output, + 200, + "Auto-routing is not enabled", + f"Auto-routing is not enabled, dropping request {request}", ) return try: body = json.loads(request["body"]) resource_id = body["ResourceId"] - orthanc.RestApiPost("/modalities/PIXL-Anon/store", resource_id) - output.AnswerBuffer(json.dumps({"Message": "OK"}), "application/json") - orthanc.LogInfo(f"Succesfully sent study to anon modality: {resource_id}") - except: # noqa: E722 - orthanc.LogWarning(f"Failed to send study to anon:\n{traceback.format_exc()}") - output.AnswerBuffer( - json.dumps({"Message": "Failed to send study to anon"}), "application/json" - ) + except (json.decoder.JSONDecodeError, KeyError): + err_str = "Body needs to be JSON with key ResourceId" + log_and_return_http(output, 400, err_str) + except: + err_str = "Other error decoding request" + log_and_return_http(output, 500, err_str) + raise + + try: + orthanc_anon_store_study(resource_id) + except orthanc.OrthancException: + err_str = "Failed contacting downstream server" + log_and_return_http(output, 502, err_str) + except: + err_str = "Misc error sending study to anon" + log_and_return_http(output, 500, err_str) + raise + else: + log_and_return_http(output, 200, "OK") orthanc.RegisterOnChangeCallback(OnChange) diff --git a/pixl_core/src/core/dicom_tags.py b/pixl_core/src/core/dicom_tags.py index ef9dab989..6f370481a 100644 --- a/pixl_core/src/core/dicom_tags.py +++ b/pixl_core/src/core/dicom_tags.py @@ -26,6 +26,8 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Any, Optional +from pydicom import Dataset + if TYPE_CHECKING: from pydicom.dataset import Dataset, PrivateBlock @@ -41,6 +43,8 @@ class PrivateDicomTag: it's not 0x10. """ + PLACEHOLDER_VALUE = "__pixl_unknown_value__" + group_id: int offset_id: int required_private_block: int @@ -70,26 +74,38 @@ def acceptable_private_block(self, actual_private_block: int) -> bool: offset_id=0x01, creator_string="UCLH PIXL", tag_nickname="UCLHPIXLProjectName", - vr="LO", + vr="LO", # LO = Long string max 64 unknown_value="__pixl_unknown_value__", ) -def add_private_tag(dataset: Dataset, private_tag: PrivateDicomTag) -> PrivateBlock: +def add_private_tag( + dataset: Dataset, private_tag: PrivateDicomTag, value: Optional[str] = None +) -> PrivateBlock: """ Add a private tag to an existing DICOM dataset. This uses pydicom.Dataset.private_block - :param ds: The DICOM dataset to add the private tags to. - :type ds: pydicom.Dataset + :param dataset: The DICOM dataset to add the private tags to. + :type dataset: pydicom.Dataset :param private_tag: A custom tag to add to the DICOM dataset. + :param value: Optional value string. If None, use the default placeholder value. """ private_block = dataset.private_block( private_tag.group_id, private_tag.creator_string, create=True ) - private_block.add_new(private_tag.offset_id, private_tag.vr, private_tag.unknown_value) + if value is None: + value = private_tag.unknown_value + private_block.add_new(private_tag.offset_id, private_tag.vr, value) + if not private_tag.acceptable_private_block(private_block.block_start >> 8): + err_str = ( + "The private block does not match the value hardcoded in the orthanc " + "config. This can be because there was an unexpected pre-existing private block " + f"in group {private_tag.group_id}" + ) + raise RuntimeError(err_str) return private_block diff --git a/pixl_core/src/core/project_config/pixl_config_model.py b/pixl_core/src/core/project_config/pixl_config_model.py index 70955f2de..5dd0664b9 100644 --- a/pixl_core/src/core/project_config/pixl_config_model.py +++ b/pixl_core/src/core/project_config/pixl_config_model.py @@ -125,5 +125,20 @@ class PixlConfig(BaseModel): """Project-specific configuration for Pixl.""" project: _Project + series_filters: Optional[list[str]] = None tag_operation_files: TagOperationFiles destination: _Destination + + def is_series_excluded(self, series_description: str) -> bool: + """ + Return whether this config excludes the series with the given description + :param series_description: the series description to test + :returns: True if it should be excluded, False if not + """ + if self.series_filters is None: + return False + # Do a simple case-insensitive substring check - this data is ultimately typed by a human, + # and different image sources may have different conventions for case conversion. + return any( + series_description.upper().find(filt.upper()) != -1 for filt in self.series_filters + ) diff --git a/pixl_core/tests/test_project_config.py b/pixl_core/tests/test_project_config.py index 44bc4b08c..ba415cb71 100644 --- a/pixl_core/tests/test_project_config.py +++ b/pixl_core/tests/test_project_config.py @@ -135,3 +135,39 @@ def test_invalid_base_tags_fails(invalid_base_tags): """Test that invalid base tags raise an error.""" with pytest.raises(ValidationError): load_tag_operations(invalid_base_tags) + + +FILTER_SET_0 = None +FILTER_SET_1 = [] +FILTER_SET_2 = ["nak", "Badg"] +FILTER_SET_BROKEN = ["", "Badg"] + + +@pytest.mark.parametrize( + ("series_filters", "test_series_desc", "expect_exclude"), + [ + # Missing or empty filter set: allow everything + (FILTER_SET_0, "Snake", False), + (FILTER_SET_0, "Badger", False), + (FILTER_SET_0, "Mushroom", False), + (FILTER_SET_1, "Snake", False), + (FILTER_SET_1, "Badger", False), + (FILTER_SET_1, "Mushroom", False), + # A non-empty filter set, a match to any in the set means exclude + (FILTER_SET_2, "Snake", True), + (FILTER_SET_2, "Badger", True), + (FILTER_SET_2, "Mushroom", False), + # And then some weird cases. + # Empty series string never gets excluded + (FILTER_SET_2, "", False), + # Empty exclude string matches everything - not ideal but let's fix it when we decide + # what to do about regexes etc. + (FILTER_SET_BROKEN, "Mushroom", True), + ], +) +def test_series_filtering(base_yaml_data, series_filters, test_series_desc, expect_exclude): + """Check that series filters work, including some edge cases. No regexes yet.""" + if series_filters is not None: + base_yaml_data["series_filters"] = series_filters + cfg = PixlConfig.model_validate(base_yaml_data) + assert cfg.is_series_excluded(test_series_desc) == expect_exclude diff --git a/pixl_dcmd/src/pixl_dcmd/_dicom_helpers.py b/pixl_dcmd/src/pixl_dcmd/_dicom_helpers.py new file mode 100644 index 000000000..5f2a54867 --- /dev/null +++ b/pixl_dcmd/src/pixl_dcmd/_dicom_helpers.py @@ -0,0 +1,39 @@ +# Copyright (c) 2022 University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Helper functions for DICOM data.""" + +from __future__ import annotations + +from pydicom import Dataset +from logging import getLogger + +from core.dicom_tags import DICOM_TAG_PROJECT_NAME + +logger = getLogger(__name__) + + +def get_project_name_as_string(dataset: Dataset) -> str: + raw_slug = dataset.get_private_item( + DICOM_TAG_PROJECT_NAME.group_id, + DICOM_TAG_PROJECT_NAME.offset_id, + DICOM_TAG_PROJECT_NAME.creator_string, + ).value + # Get both strings and bytes, which is fun + if isinstance(raw_slug, bytes): + logger.debug(f"Bytes slug {raw_slug!r}") + slug = raw_slug.decode("utf-8").strip() + else: + logger.debug(f"String slug '{raw_slug}'") + slug = raw_slug + return slug diff --git a/pixl_dcmd/src/pixl_dcmd/main.py b/pixl_dcmd/src/pixl_dcmd/main.py index caf8b4058..ffdc8cff2 100644 --- a/pixl_dcmd/src/pixl_dcmd/main.py +++ b/pixl_dcmd/src/pixl_dcmd/main.py @@ -21,13 +21,12 @@ from core.exceptions import PixlSkipMessageError from core.project_config import load_project_config -from core.dicom_tags import DICOM_TAG_PROJECT_NAME - import requests from core.project_config import load_tag_operations from decouple import config from pydicom import Dataset, dcmwrite +from pixl_dcmd._dicom_helpers import get_project_name_as_string from pixl_dcmd._tag_schemes import merge_tag_schemes from pixl_dcmd._database import add_hashed_identifier_and_save, query_db from pixl_dcmd._datetime import combine_date_time, format_date_time @@ -51,6 +50,17 @@ def write_dataset_to_bytes(dataset: Dataset) -> bytes: return buffer.read() +def should_exclude_series(dataset: Dataset) -> bool: + slug = get_project_name_as_string(dataset) + + series_description = dataset.get("SeriesDescription") + cfg = load_project_config(slug) + if cfg.is_series_excluded(series_description): + logger.warning("FILTERING OUT series description: %s", series_description) + return True + return False + + def anonymise_dicom(dataset: Dataset) -> Dataset: """ Anonymises a DICOM dataset as Received by Orthanc. @@ -61,18 +71,7 @@ def anonymise_dicom(dataset: Dataset) -> Dataset: - applying tag operations based on the config file Returns anonymised dataset. """ - raw_slug = dataset.get_private_item( - DICOM_TAG_PROJECT_NAME.group_id, - DICOM_TAG_PROJECT_NAME.offset_id, - DICOM_TAG_PROJECT_NAME.creator_string, - ).value - # Get both strings and bytes, which is fun - if isinstance(raw_slug, bytes): - logger.debug(f"Bytes slug {raw_slug!r}") - slug = raw_slug.decode("utf-8").strip() - else: - logger.debug(f"String slug '{raw_slug}'") - slug = raw_slug + slug = get_project_name_as_string(dataset) project_config = load_project_config(slug) logger.debug(f"Received instance for project {slug}") diff --git a/pixl_dcmd/tests/test_main.py b/pixl_dcmd/tests/test_main.py index 5b7533680..717796335 100644 --- a/pixl_dcmd/tests/test_main.py +++ b/pixl_dcmd/tests/test_main.py @@ -30,15 +30,19 @@ ) from core.project_config import load_project_config, load_tag_operations from decouple import config +from pydicom.data import get_testdata_file + +from pydicom.dataset import Dataset + +from pytest_pixl.dicom import generate_dicom_dataset +from pytest_pixl.helpers import run_subprocess + from pixl_dcmd.main import ( anonymise_dicom, apply_tag_scheme, remove_overlays, + should_exclude_series, ) -from pydicom.data import get_testdata_file -from pydicom.dataset import Dataset -from pytest_pixl.dicom import generate_dicom_dataset -from pytest_pixl.helpers import run_subprocess PROJECT_CONFIGS_DIR = Path(config("PROJECT_CONFIGS_DIR")) TEST_PROJECT_SLUG = "test-extract-uclh-omop-cdm" @@ -207,6 +211,43 @@ def test_pseudo_identifier_processing(rows_in_session, tag_scheme): assert image.hashed_identifier == fake_hash +@pytest.fixture() +def dicom_series_to_keep() -> list[Dataset]: + series = [ + "", + "whatever", + ] + return [_make_dicom(s) for s in series] + + +@pytest.fixture() +def dicom_series_to_exclude() -> list[Dataset]: + series = [ + "positioning", + "foo_barpositioning", + "positioningla", + "scout", + "localiser", + "localizer", + # Matching should be case insensitive + "lOcALIsER", + ] + return [_make_dicom(s) for s in series] + + +def _make_dicom(series_description) -> Dataset: + ds = generate_dicom_dataset(SeriesDescription=series_description) + add_private_tag(ds, DICOM_TAG_PROJECT_NAME, "test-extract-uclh-omop-cdm") + return ds + + +def test_should_exclude_series(dicom_series_to_exclude, dicom_series_to_keep): + for s in dicom_series_to_keep: + assert not should_exclude_series(s) + for s in dicom_series_to_exclude: + assert should_exclude_series(s) + + def test_can_nifti_convert_post_anonymisation( row_for_dicom_testing, tmp_path, directory_of_mri_dicoms, tag_scheme ): diff --git a/pixl_imaging/src/pixl_imaging/_orthanc.py b/pixl_imaging/src/pixl_imaging/_orthanc.py index 4f8d4459d..a86299916 100644 --- a/pixl_imaging/src/pixl_imaging/_orthanc.py +++ b/pixl_imaging/src/pixl_imaging/_orthanc.py @@ -15,7 +15,9 @@ import logging from abc import ABC, abstractmethod +from asyncio import sleep from json import JSONDecodeError +from time import time from typing import Any, Optional import requests @@ -91,6 +93,18 @@ def retrieve_from_remote(self, query_id: str) -> str: ) return str(response["ID"]) + async def wait_for_job_success(self, query_id: str, timeout: float) -> None: + job_id = self.retrieve_from_remote(query_id=query_id) # C-Move + job_state = "Pending" + start_time = time() + + while job_state != "Success": + if (time() - start_time) > timeout: + raise TimeoutError + + await sleep(0.1) + job_state = self.job_state(job_id=job_id) + def job_state(self, job_id: str) -> str: # See: https://book.orthanc-server.com/users/advanced-rest.html#jobs-monitoring return str(self._get(f"/jobs/{job_id}")["State"]) diff --git a/pixl_imaging/src/pixl_imaging/_processing.py b/pixl_imaging/src/pixl_imaging/_processing.py index 852b72d8d..67b0e71a0 100644 --- a/pixl_imaging/src/pixl_imaging/_processing.py +++ b/pixl_imaging/src/pixl_imaging/_processing.py @@ -14,9 +14,7 @@ from __future__ import annotations import logging -from asyncio import sleep from dataclasses import dataclass -from time import time from typing import TYPE_CHECKING, Any from core.dicom_tags import DICOM_TAG_PROJECT_NAME @@ -31,7 +29,11 @@ async def process_message(message: Message) -> None: - """Process message from queue.""" + """ + Process message from queue by retrieving a study with the given Patient and Accession Number. + We may receive multiple messages with same Patient + Acc Num, either as retries or because + they are needed for multiple projects. + """ logger.debug("Processing: %s", message) study = ImagingStudy.from_message(message) @@ -47,21 +49,13 @@ async def process_message(message: Message) -> None: logger.error("Failed to find %s in the VNA", study) raise RuntimeError - # Get image from VNA for patient and accession number - job_id = orthanc_raw.retrieve_from_remote(query_id=query_id) # C-Move - job_state = "Pending" - start_time = time() - - while job_state != "Success": - if (time() - start_time) > config("PIXL_DICOM_TRANSFER_TIMEOUT", cast=float): - msg = ( - f"Failed to transfer {message} within " - f"{config('PIXL_DICOM_TRANSFER_TIMEOUT')} seconds" - ) - raise TimeoutError(msg) - - await sleep(0.1) - job_state = orthanc_raw.job_state(job_id=job_id) + # Wait for query to complete + timeout: float = config("PIXL_DICOM_TRANSFER_TIMEOUT", cast=float) + try: + await orthanc_raw.wait_for_job_success(query_id, timeout) + except TimeoutError as te: + msg = f"Failed to transfer {message} within {timeout} seconds" + raise TimeoutError(msg) from te # Now that instance has arrived in orthanc raw, we can set its project name tag via the API studies_with_tags = orthanc_raw.query_local(study.orthanc_query_dict) @@ -76,13 +70,17 @@ def _update_or_resend_existing_study_( project_name: str, orthanc_raw: PIXLRawOrthanc, study: ImagingStudy ) -> bool: """ - If study exists in orthanc_raw, add project name or send directly to orthanc raw. + If study does not yet exist in orthanc raw, do nothing. + If study exists in orthanc raw and has the wrong project name, update it. + If study exists in orthanc raw and has the correct project name, send to orthanc anon. - Return True if exists, otherwise False. + Return True if study exists in orthanc raw, otherwise False. """ existing_resources = study.query_local(orthanc_raw, project_tag=True) if len(existing_resources) == 0: return False + + # Check whether study already has the correct project name different_project: list[str] = [] for resource in existing_resources: project_tags = ( @@ -91,10 +89,7 @@ def _update_or_resend_existing_study_( "Unknown Tag & Data" ), # Fallback for testing where we're not using the entire plugin, remains undefined ) - try: - if project_name not in project_tags: - different_project.append(resource["ID"]) - except KeyError: + if project_name not in project_tags: different_project.append(resource["ID"]) if different_project: diff --git a/pixl_imaging/tests/docker-compose.yml b/pixl_imaging/tests/docker-compose.yml index 743e0d24b..e84236bb9 100644 --- a/pixl_imaging/tests/docker-compose.yml +++ b/pixl_imaging/tests/docker-compose.yml @@ -64,7 +64,7 @@ services: image: rabbitmq:3.12.9-management healthcheck: test: rabbitmq-diagnostics -q check_running - interval: 5s + interval: 7s timeout: 5s retries: 20 ports: diff --git a/pixl_imaging/tests/test_imaging_processing.py b/pixl_imaging/tests/test_imaging_processing.py index 7a2f5caac..9a152b25c 100644 --- a/pixl_imaging/tests/test_imaging_processing.py +++ b/pixl_imaging/tests/test_imaging_processing.py @@ -16,8 +16,8 @@ from __future__ import annotations import datetime -import os import pathlib +import shlex import pytest from core.patient_queue.message import Message @@ -26,6 +26,7 @@ from pixl_imaging._processing import ImagingStudy, process_message from pydicom import dcmread from pydicom.data import get_testdata_file +from pytest_pixl.helpers import run_subprocess pytest_plugins = ("pytest_asyncio",) @@ -49,9 +50,11 @@ def aet(self) -> str: return "VNAQR" def upload(self, filename: str) -> None: - os.system( - f"curl -u {self._username}:{self._password} " # noqa: S605 - f"-X POST {self._url}/instances --data-binary @{filename}" + run_subprocess( + shlex.split( + f"curl -u {self._username}:{self._password} " + f"-X POST {self._url}/instances --data-binary @{filename}" + ) ) @@ -112,6 +115,7 @@ async def test_existing_message_sent_twice(orthanc_raw) -> None: """ study = ImagingStudy.from_message(message) + assert not study.query_local(orthanc_raw) await process_message(message) assert study.query_local(orthanc_raw) diff --git a/projects/configs/test-extract-uclh-omop-cdm.yaml b/projects/configs/test-extract-uclh-omop-cdm.yaml index cc4685e98..582e1f60a 100644 --- a/projects/configs/test-extract-uclh-omop-cdm.yaml +++ b/projects/configs/test-extract-uclh-omop-cdm.yaml @@ -22,6 +22,12 @@ tag_operation_files: - "test-extract-uclh-omop-cdm.yaml" manufacturer_overrides: "mri-diffusion.yaml" +series_filters: + - "localizer" + - "localiser" + - "scout" + - "positioning" + destination: dicom: "ftps" parquet: "ftps" diff --git a/pytest-pixl/src/pytest_pixl/helpers.py b/pytest-pixl/src/pytest_pixl/helpers.py index 01c2ad37a..cf0299fe5 100644 --- a/pytest-pixl/src/pytest_pixl/helpers.py +++ b/pytest-pixl/src/pytest_pixl/helpers.py @@ -21,13 +21,18 @@ from typing import TYPE_CHECKING, Callable, Optional if TYPE_CHECKING: + from collections.abc import Sequence from pathlib import Path logger = logging.getLogger(__name__) def run_subprocess( - cmd: list[str], working_dir: Optional[Path] = None, *, shell: bool = False, timeout: int = 360 + cmd: Sequence[Path | str], + working_dir: Optional[Path] = None, + *, + shell: bool = False, + timeout: int = 360, ) -> subprocess.CompletedProcess[bytes]: """ Run a command but capture the stderr and stdout better than the CalledProcessError @@ -60,6 +65,7 @@ def wait_for_condition( *, seconds_max: int = 1, seconds_interval: int = 1, + seconds_condition_stays_true_for: Optional[int] = None, progress_string_fn: Optional[Callable[..., str]] = None, ) -> None: """ @@ -68,6 +74,8 @@ def wait_for_condition( the logging output so recommended to name it well. :param seconds_max: maximum seconds to wait for condition to be true :param seconds_interval: time to sleep in between attempts + :param seconds_condition_stays_true_for: if not None, check that the condition is still + true this many seconds after first becoming true :param progress_string_fn: callable to generate a status string (eg. partial success) that will be part of the log message at each attempt :raises AssertionError: if the condition doesn't occur during the specified period @@ -78,6 +86,17 @@ def wait_for_condition( progress_str = ": " + progress_string_fn() if progress_string_fn is not None else "" if success: logger.info("Achieved condition '%s' %s", test_condition.__name__, progress_str) + if seconds_condition_stays_true_for is not None: + # This is intended for the case where data may be dripping in and the correct + # set of data may have been temporarily achieved, only to be joined by some + # incorrect data. So we have the option to check it's stably true. + logger.info( + "Checking that condition '%s' is still true in %s seconds", + test_condition.__name__, + seconds_condition_stays_true_for, + ) + sleep(seconds_condition_stays_true_for) + wait_for_condition(test_condition, progress_string_fn=progress_string_fn) return logger.info( "Waiting for condition '%s' (%s seconds out of %s) %s", diff --git a/test/conftest.py b/test/conftest.py index 85f81b540..b3151ca75 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -13,11 +13,14 @@ # limitations under the License. """System/E2E test setup""" +# ruff: noqa: C408 dict() makes test data easier to read and write from collections.abc import Generator from pathlib import Path from typing import Any import pytest +import requests +from pytest_pixl.dicom import generate_dicom_dataset from pytest_pixl.ftpserver import PixlFTPServer from pytest_pixl.helpers import run_subprocess from pytest_pixl.plugin import FtpHostAddress @@ -37,13 +40,106 @@ def host_export_root_dir() -> Path: RESOURCES_OMOP_DIR = RESOURCES_DIR / "omop" +def _upload_to_vna(image_filename: Path) -> None: + with image_filename.open("rb") as dcm: + data = dcm.read() + requests.post( + "http://localhost:8043/instances", + auth=("orthanc", "orthanc"), + data=data, + timeout=60, + ) + + +@pytest.fixture(scope="session") +def _populate_vna(tmp_path_factory: pytest.TempPathFactory) -> None: + dicom_dir = tmp_path_factory.mktemp("dicom_series") + # more detailed series testing is found in pixl_dcmd tests, but here + # we just stick an instance to each study, one of which is expected to be propagated through + + # studies are also defined by the StudyID, the StudyInstanceUID + def study_instance_uid(offset: int) -> dict[str, str]: + baseline = "1.3.46.670589.11.38023.5.0.14068.2023012517090166000" + offset_str = f"{offset:04d}" + return dict(StudyInstanceUID=baseline[: -len(offset_str)] + offset_str) + + def series_instance_uid(offset: int) -> dict[str, str]: + baseline = "1.3.46.670589.11.38023.5.0.7404.2023012517551898153" + offset_str = f"{offset:04d}" + return dict(SeriesInstanceUID=baseline[: -len(offset_str)] + offset_str) + + def sop_instance_uid(offset: int) -> dict[str, str]: + baseline = "1.3.46.670589.11.38023.5.0.7404.2023012517580650156" + offset_str = f"{offset:04d}" + return dict(SOPInstanceUID=baseline[: -len(offset_str)] + offset_str) + + study_1 = dict( + AccessionNumber="AA12345601", + PatientID="987654321", + StudyID="12340001", + **study_instance_uid(1), + ) + study_2 = dict( + AccessionNumber="AA12345605", + PatientID="987654321", + StudyID="12340002", + **study_instance_uid(2), + ) + + # Series are also defined by the SeriesInstanceUID and SeriesNumber. + # SeriesNumber doesn't have to be globally unique, only within a study, + # however I'm assuming SeriesInstanceUID does. So that must be generated when + # a series is attached to a study. + series_0 = dict(SeriesDescription="AP", SeriesNumber=900, Modality="DX") + series_1 = dict(SeriesDescription="include123", SeriesNumber=901, Modality="DX") + # excluded by modality filter + series_exclude_2 = dict(SeriesDescription="exclude123", SeriesNumber=902, Modality="MR") + # excluded by series description + series_exclude_3 = dict(SeriesDescription="positioning", SeriesNumber=903, Modality="DX") + + # instances are also defined by the SOPInstanceUID + + # to replace Dicom1.dcm + instance_0 = dict(**study_1, **series_0, **series_instance_uid(0), **sop_instance_uid(0)) + + instance_1 = dict(**study_1, **series_1, **series_instance_uid(1), **sop_instance_uid(1)) + instance_2 = dict( + **study_1, **series_exclude_2, **series_instance_uid(2), **sop_instance_uid(2) + ) + instance_3 = dict( + **study_1, **series_exclude_3, **series_instance_uid(3), **sop_instance_uid(3) + ) + instance_4 = dict(**study_2, **series_1, **series_instance_uid(4), **sop_instance_uid(4)) + instance_5 = dict( + **study_2, **series_exclude_3, **series_instance_uid(5), **sop_instance_uid(5) + ) + + _upload_dicom_instance(dicom_dir, **instance_0) + _upload_dicom_instance(dicom_dir, **instance_1) + _upload_dicom_instance(dicom_dir, **instance_2) + _upload_dicom_instance(dicom_dir, **instance_3) + _upload_dicom_instance(dicom_dir, **instance_4) + _upload_dicom_instance(dicom_dir, **instance_5) + + +def _upload_dicom_instance(dicom_dir: Path, **kwargs: Any) -> None: + ds = generate_dicom_dataset(**kwargs) + test_dcm_file = ( + dicom_dir + / f"{kwargs['PatientID']}_{kwargs['AccessionNumber']}_{kwargs['SeriesDescription']}.dcm" + ) + ds.save_as(str(test_dcm_file), write_like_original=False) + # I think we can skip writing to disk! + _upload_to_vna(test_dcm_file) + + @pytest.fixture(scope="session") -def _setup_pixl_cli(ftps_server: PixlFTPServer) -> Generator: +def _setup_pixl_cli(ftps_server: PixlFTPServer, _populate_vna: None) -> Generator: """Run pixl populate/start. Cleanup intermediate export dir on exit.""" # CLI calls need to have CWD = test dir so they can find the pixl_config.yml file run_subprocess(["pixl", "populate", str(RESOURCES_OMOP_DIR.absolute())], TEST_DIR) # poll here for two minutes to check for imaging to be processed, printing progress - wait_for_stable_orthanc_anon(121, 5) + wait_for_stable_orthanc_anon(121, 5, 15) yield run_subprocess( [ diff --git a/test/scripts/insert_test_data.sh b/test/scripts/insert_test_data.sh index 29ddb70b8..807b7e57d 100755 --- a/test/scripts/insert_test_data.sh +++ b/test/scripts/insert_test_data.sh @@ -38,11 +38,3 @@ insert into star.lab_result(lab_result_id, lab_order_id, lab_test_definition_id, " docker exec system-test-fake-star-db /bin/bash -c "psql -U postgres -d emap -c \"$_sql_command\"" - -# Uses an accession number of "AA12345601" for MRN 987654321 -curl -X POST -u "orthanc:orthanc" "http://localhost:8043/instances" \ - --data-binary @"$SCRIPT_DIR/../resources/Dicom1.dcm" -# Uses an accession number of "AA12345605" for MRN 987654321, already has project name added -# Send to orthanc raw to ensure that we can resend an existing message without querying VNA again -curl -X POST -u "orthanc_raw_username:orthanc_raw_password" "http://localhost:7005/instances" \ - --data-binary @"$SCRIPT_DIR/../resources/Dicom2.dcm" diff --git a/test/system_test.py b/test/system_test.py index 5da4dcff8..bf7e63d46 100644 --- a/test/system_test.py +++ b/test/system_test.py @@ -72,14 +72,14 @@ def test_ftps_parquet_upload(self) -> None: @pytest.mark.usefixtures("_extract_radiology_reports") def test_ftps_dicom_upload(self, tmp_path_factory: pytest.TempPathFactory) -> None: """Test whether DICOM images have been uploaded""" - zip_files: list[str] = [] + zip_files: list[Path] = [] def zip_file_list() -> str: return f"zip files found: {zip_files}" def two_zip_files_present() -> bool: nonlocal zip_files - zip_files = [str(x) for x in TestFtpsUpload.expected_output_dir.glob("*.zip")] + zip_files = list(TestFtpsUpload.expected_output_dir.glob("*.zip")) # We expect 2 DICOM image studies to be uploaded return len(zip_files) == 2 @@ -87,30 +87,64 @@ def two_zip_files_present() -> bool: two_zip_files_present, seconds_max=121, seconds_interval=5, + seconds_condition_stays_true_for=15, progress_string_fn=zip_file_list, ) - + expected_studies = { + "a971b114b9133c81c03fb88c6a958f7d95eb1387f04c17ad7ff9ba7cf684c392": { + # tuple made up of (AccessionNumber, SeriesDescription) + # hash of AA12345601 + ("ad630a8a84d72d71", "include123"), + ("ad630a8a84d72d71", "AP"), + }, + "f71b228fa97d6c87db751e0bb35605fd9d4c1274834be4bc4bb0923ab8029b2a": { + # hash of AA12345605, + ("c2f4b59b0291c6fe", "include123"), + }, + } assert zip_files for z in zip_files: unzip_dir = tmp_path_factory.mktemp("unzip_dir", numbered=True) - self._check_dcm_tags_from_zip(z, unzip_dir) + self._check_dcm_tags_from_zip(z, unzip_dir, expected_studies) - def _check_dcm_tags_from_zip(self, zip_path: str, unzip_dir: Path) -> None: + def _check_dcm_tags_from_zip( + self, zip_path: Path, unzip_dir: Path, expected_studies: dict[str, set[tuple[str, str]]] + ) -> None: """Check that private tag has survived anonymisation with the correct value.""" + expected_instances = expected_studies[zip_path.stem] run_subprocess( ["unzip", zip_path], working_dir=unzip_dir, ) - all_dicom = list(unzip_dir.rglob("*.dcm")) - assert len(all_dicom) == 1 - dcm = pydicom.dcmread(all_dicom[0]) - block = dcm.private_block( - DICOM_TAG_PROJECT_NAME.group_id, DICOM_TAG_PROJECT_NAME.creator_string - ) - tag_offset = DICOM_TAG_PROJECT_NAME.offset_id - private_tag = block[tag_offset] - assert private_tag is not None - assert private_tag.value == TestFtpsUpload.project_slug + dicom_in_zip = list(unzip_dir.rglob("*.dcm")) + + # One zip file == one study. + # There can be multiple instances in the zip file, one per file + logging.info("In zip file, %s DICOM files: %s", len(dicom_in_zip), dicom_in_zip) + actual_instances = set() + for dcm_file in dicom_in_zip: + dcm = pydicom.dcmread(dcm_file) + # The actual dicom filename and dir structure isn't checked - should it be? + assert dcm.get("PatientID") == zip_path.stem # PatientID stores study id post anon + actual_instances.add((dcm.get("AccessionNumber"), dcm.get("SeriesDescription"))) + block = dcm.private_block( + DICOM_TAG_PROJECT_NAME.group_id, DICOM_TAG_PROJECT_NAME.creator_string + ) + tag_offset = DICOM_TAG_PROJECT_NAME.offset_id + private_tag = block[tag_offset] + assert private_tag is not None + if isinstance(private_tag.value, bytes): + # Allow this for the time being, until it has been investigated + # See https://github.com/UCLH-Foundry/PIXL/issues/363 + logging.error( + "TEMPORARILY IGNORE: tag value %s should be of type str, but is of type bytes", + private_tag.value, + ) + assert private_tag.value.decode() == TestFtpsUpload.project_slug + else: + assert private_tag.value == TestFtpsUpload.project_slug + # check the basic info about the instances exactly matches + assert actual_instances == expected_instances @pytest.mark.usefixtures("_setup_pixl_cli") diff --git a/test/utils.py b/test/utils.py index 8db48ca78..307fc16a4 100644 --- a/test/utils.py +++ b/test/utils.py @@ -21,7 +21,9 @@ from pytest_pixl.helpers import wait_for_condition -def wait_for_stable_orthanc_anon(seconds_max: int, seconds_interval: int) -> None: +def wait_for_stable_orthanc_anon( + seconds_max: int, seconds_interval: int, seconds_condition_stays_true_for: int +) -> None: """ Query the orthanc-anon REST API to check that the correct number of instances have been received. @@ -29,7 +31,7 @@ def wait_for_stable_orthanc_anon(seconds_max: int, seconds_interval: int) -> Non """ instances = [] - def are_two_instances() -> bool: + def are_three_instances() -> bool: nonlocal instances instances_cmd = shlex.split( "docker exec system-test-orthanc-anon-1 " @@ -38,14 +40,15 @@ def are_two_instances() -> bool: ) instances_output = subprocess.run(instances_cmd, capture_output=True, check=True, text=True) # noqa: S603 instances = json.loads(instances_output.stdout) - return len(instances) == 2 + return len(instances) == 3 def list_instances() -> str: return f"orthanc-anon instances: {instances}" wait_for_condition( - are_two_instances, + are_three_instances, seconds_max=seconds_max, seconds_interval=seconds_interval, progress_string_fn=list_instances, + seconds_condition_stays_true_for=seconds_condition_stays_true_for, )