Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Series filtering out based on description string #351

Merged
merged 34 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
a8aa826
Move placeholder value to central location
jeremyestein Mar 20, 2024
397d658
Return proper HTTP errors inside orthanc raw "/send-to-anon" API call
jeremyestein Mar 21, 2024
ab6371c
Check error code from shell - should use requests API but never mind
jeremyestein Mar 21, 2024
589ee29
Refactor waiting for query into orthanc code
jeremyestein Mar 22, 2024
855f371
Merge branch 'main' into jeremy/series-filtering
jeremyestein Mar 22, 2024
2aea37b
The "correct" test data wasn't being validated to ensure it was actually
jeremyestein Mar 22, 2024
0efb732
Merge branch 'jeremy/config-fixes' into jeremy/series-filtering
jeremyestein Mar 22, 2024
a894f13
Remove unnecessary try except
jeremyestein Mar 25, 2024
1916a3a
Merge branch 'main' into jeremy/series-filtering
jeremyestein Mar 25, 2024
61520d5
Add explanatory comments
jeremyestein Mar 27, 2024
31d34f5
Move private tag creation code to library
jeremyestein Mar 27, 2024
3d1d7ba
Merge branch 'main' into jeremy/series-filtering
jeremyestein Mar 27, 2024
a0afd82
Merge branch 'main' into jeremy/series-filtering
jeremyestein Mar 27, 2024
de4356b
Reuse existing method
jeremyestein Mar 27, 2024
1997d6a
Add series filtering feature and some unit tests (system tests to come)
jeremyestein Mar 27, 2024
958e3b6
Speculative fix for occasional rabbitmq crashes
jeremyestein Mar 27, 2024
be7e094
I think this check got missed
jeremyestein Mar 27, 2024
0ce17ca
First attempt at a system test. The data is not behaving though.
jeremyestein Mar 27, 2024
706c4d4
The fake VNA was not seeing the test instances as part of the same
jeremyestein Mar 28, 2024
a6f5a85
Modality was causing some test instances to get filtered out. Also de…
jeremyestein Mar 28, 2024
684855a
Tidy up system test and move dicom data set up from shell script to
jeremyestein Apr 9, 2024
6702a3d
Merge branch 'main' into jeremy/series-filtering
jeremyestein Apr 9, 2024
4a05bf5
Fix the merge; de-dupe the VR field
jeremyestein Apr 10, 2024
a2a1f51
Some typing fixes needed now that we have mypy enabled
jeremyestein Apr 10, 2024
6625174
Merge branch 'main' into jeremy/series-filtering
jeremyestein Apr 10, 2024
a844cee
Pytest classes cannot have an __init__ method; several tests were being
jeremyestein Apr 10, 2024
6e4b75a
Merge branch 'jeremy/fix-system-test-class' into jeremy/series-filtering
jeremyestein Apr 10, 2024
ad4aaa4
Return accurate content type
jeremyestein Apr 10, 2024
493fd66
Make series description test case-insensitive
jeremyestein Apr 10, 2024
21f7f1b
Remove defunct comment
jeremyestein Apr 10, 2024
7e9bd41
Tidy up logging
jeremyestein Apr 10, 2024
0eec011
Merge branch 'main' into jeremy/series-filtering
jeremyestein Apr 10, 2024
debf08c
Get the media type actually right this time.
jeremyestein Apr 10, 2024
2d71237
Move method to utility module
jeremyestein Apr 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion orthanc/orthanc-anon/plugin/pixl.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from pydicom import dcmread

import orthanc
from pixl_dcmd.main import anonymise_dicom, write_dataset_to_bytes
from pixl_dcmd.main import anonymise_dicom, should_exclude_series, write_dataset_to_bytes

if TYPE_CHECKING:
from typing import Any
Expand Down Expand Up @@ -281,6 +281,12 @@ def ReceivedInstanceCallback(receivedDicom: bytes, origin: str) -> Any:
# Read the bytes as DICOM/
dataset = dcmread(BytesIO(receivedDicom))

# Do before anonymisation in case someone decides to delete the
# Series Description tag as part of anonymisation.
if should_exclude_series(dataset):
orthanc.LogWarning("DICOM instance discarded due to its series description")
return orthanc.ReceivedInstanceAction.DISCARD, None

# Attempt to anonymise and drop the study if any exceptions occur
try:
dataset = anonymise_dicom(dataset)
Expand Down
100 changes: 67 additions & 33 deletions orthanc/orthanc-raw/plugin/pixl.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,13 @@
import os
import traceback
from io import BytesIO
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Optional

from core.dicom_tags import DICOM_TAG_PROJECT_NAME, add_private_tag
from pydicom import Dataset, dcmread, dcmwrite
from pydicom import dcmread

import orthanc
from pixl_dcmd.main import write_dataset_to_bytes
from pixl_dcmd.tagrecording import record_dicom_headers

if TYPE_CHECKING:
Expand All @@ -50,7 +51,17 @@ def OnChange(changeType, level, resourceId): # noqa: ARG001
"""
if changeType == orthanc.ChangeType.STABLE_STUDY and should_auto_route():
print("Sending study: %s" % resourceId) # noqa: T201
orthanc.RestApiPost("/modalities/PIXL-Anon/store", resourceId)
# Although this can throw, since we have nowhere to report errors
# back to (eg. an HTTP client), don't try to handle anything here.
# The client will have to detect that it hasn't happened and retry.
orthanc_anon_store_study(resourceId)


def orthanc_anon_store_study(resource_id):
"""Call the API to send the specified resource (study) to the orthanc anon server."""
# RestApiPost raises an orthanc.OrthancException if it fails
orthanc.RestApiPost("/modalities/PIXL-Anon/store", resource_id)
orthanc.LogInfo(f"Successfully sent study to anon modality: {resource_id}")


def OnHeartBeat(output, uri, **request): # noqa: ARG001
Expand Down Expand Up @@ -82,19 +93,6 @@ def should_auto_route():
return os.environ.get("ORTHANC_AUTOROUTE_RAW_TO_ANON", "false").lower() == "true"


def write_dataset_to_bytes(dataset: Dataset) -> bytes:
"""
Write pydicom DICOM dataset to byte array

Original from:
https://pydicom.github.io/pydicom/stable/auto_examples/memory_dataset.html
"""
with BytesIO() as buffer:
dcmwrite(buffer, dataset)
buffer.seek(0)
return buffer.read()


def modify_dicom_tags(receivedDicom: bytes, origin: str) -> Any:
"""
A new incoming DICOM file needs to have the project name private tag added here, so
Expand All @@ -107,42 +105,78 @@ def modify_dicom_tags(receivedDicom: bytes, origin: str) -> Any:
logger.debug("modify_dicom_tags - doing nothing as change triggered by API")
return orthanc.ReceivedInstanceAction.KEEP_AS_IS, None
dataset = dcmread(BytesIO(receivedDicom))
# See the orthanc.json config file for where this tag is given a nickname
# The private block is the first free block >= 0x10.
# We can't directly control it, but the orthanc config requires it to be
# hardcoded to 0x10
# https://dicom.nema.org/dicom/2013/output/chtml/part05/sect_7.8.html

# Add project name as private tag, at this point, the value is unknown
private_block = add_private_tag(dataset, DICOM_TAG_PROJECT_NAME)

logger.debug(
"modify_dicom_tags - added new private block starting at 0x%x", private_block.block_start
)
if not DICOM_TAG_PROJECT_NAME.acceptable_private_block(private_block.block_start >> 8):
print( # noqa: T201
"ERROR: The private block does not match the value hardcoded in the orthanc "
"config. This can be because there was an unexpected pre-existing private block"
f"in group {DICOM_TAG_PROJECT_NAME.group_id}"
)
return orthanc.ReceivedInstanceAction.MODIFY, write_dataset_to_bytes(dataset)


def log_and_return_http(
output, http_code: int, http_message: str, log_message: Optional[str] = None
):
"""
Log and make an HTTP response in case of success or failure. For failure, log
a stack/exception trace as well.

:param output: the orthanc output object as given to the callback function
:param http_code: HTTP code to return
:param http_message: message to return in HTTP body
:param log_message: message to log, if different to http_message.
If None, do not log at all if success
"""
http_json_str = json.dumps({"Message": http_message})
if http_code == 200: # noqa: PLR2004
if log_message:
orthanc.LogInfo(log_message)
output.AnswerBuffer(http_json_str, "application/json")
else:
orthanc.LogWarning(f"{log_message or http_message}:\n{traceback.format_exc()}")
# length needed in bytes not chars
output.SendHttpStatus(http_code, http_json_str, len(http_json_str.encode()))


def SendResourceToAnon(output, uri, **request): # noqa: ARG001
"""Send an existing study to the anon modality"""
orthanc.LogWarning(f"Received request to send study to anon modality: {request}")
if not should_auto_route():
orthanc.LogWarning("Auto-routing is not enabled, dropping request {request}")
output.AnswerBuffer(
json.dumps({"Message": "Auto-routing is not enabled"}), "application/json"
log_and_return_http(
output,
200,
"Auto-routing is not enabled",
f"Auto-routing is not enabled, dropping request {request}",
)
return
try:
body = json.loads(request["body"])
resource_id = body["ResourceId"]
orthanc.RestApiPost("/modalities/PIXL-Anon/store", resource_id)
output.AnswerBuffer(json.dumps({"Message": "OK"}), "application/json")
orthanc.LogInfo(f"Succesfully sent study to anon modality: {resource_id}")
except: # noqa: E722
orthanc.LogWarning(f"Failed to send study to anon:\n{traceback.format_exc()}")
output.AnswerBuffer(
json.dumps({"Message": "Failed to send study to anon"}), "application/json"
)
except (json.decoder.JSONDecodeError, KeyError):
err_str = "Body needs to be JSON with key ResourceId"
log_and_return_http(output, 400, err_str)
except:
err_str = "Other error decoding request"
log_and_return_http(output, 500, err_str)
raise

try:
orthanc_anon_store_study(resource_id)
except orthanc.OrthancException:
err_str = "Failed contacting downstream server"
log_and_return_http(output, 502, err_str)
except:
err_str = "Misc error sending study to anon"
log_and_return_http(output, 500, err_str)
raise
else:
log_and_return_http(output, 200, "OK")


orthanc.RegisterOnChangeCallback(OnChange)
Expand Down
26 changes: 21 additions & 5 deletions pixl_core/src/core/dicom_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Optional

from pydicom import Dataset

if TYPE_CHECKING:
from pydicom.dataset import Dataset, PrivateBlock

Expand All @@ -41,6 +43,8 @@ class PrivateDicomTag:
it's not 0x10.
"""

PLACEHOLDER_VALUE = "__pixl_unknown_value__"

group_id: int
offset_id: int
required_private_block: int
Expand Down Expand Up @@ -70,26 +74,38 @@ def acceptable_private_block(self, actual_private_block: int) -> bool:
offset_id=0x01,
creator_string="UCLH PIXL",
tag_nickname="UCLHPIXLProjectName",
vr="LO",
vr="LO", # LO = Long string max 64
unknown_value="__pixl_unknown_value__",
)


def add_private_tag(dataset: Dataset, private_tag: PrivateDicomTag) -> PrivateBlock:
def add_private_tag(
dataset: Dataset, private_tag: PrivateDicomTag, value: Optional[str] = None
) -> PrivateBlock:
"""
Add a private tag to an existing DICOM dataset.

This uses pydicom.Dataset.private_block

:param ds: The DICOM dataset to add the private tags to.
:type ds: pydicom.Dataset
:param dataset: The DICOM dataset to add the private tags to.
:type dataset: pydicom.Dataset
:param private_tag: A custom tag to add to the DICOM dataset.
:param value: Optional value string. If None, use the default placeholder value.

"""
private_block = dataset.private_block(
private_tag.group_id, private_tag.creator_string, create=True
)
private_block.add_new(private_tag.offset_id, private_tag.vr, private_tag.unknown_value)
if value is None:
value = private_tag.unknown_value
private_block.add_new(private_tag.offset_id, private_tag.vr, value)
if not private_tag.acceptable_private_block(private_block.block_start >> 8):
err_str = (
"The private block does not match the value hardcoded in the orthanc "
"config. This can be because there was an unexpected pre-existing private block "
f"in group {private_tag.group_id}"
)
raise RuntimeError(err_str)
return private_block


Expand Down
15 changes: 15 additions & 0 deletions pixl_core/src/core/project_config/pixl_config_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,5 +125,20 @@ class PixlConfig(BaseModel):
"""Project-specific configuration for Pixl."""

project: _Project
series_filters: Optional[list[str]] = None
tag_operation_files: TagOperationFiles
destination: _Destination

def is_series_excluded(self, series_description: str) -> bool:
"""
Return whether this config excludes the series with the given description
:param series_description: the series description to test
:returns: True if it should be excluded, False if not
"""
if self.series_filters is None:
return False
# Do a simple case-insensitive substring check - this data is ultimately typed by a human,
# and different image sources may have different conventions for case conversion.
return any(
series_description.upper().find(filt.upper()) != -1 for filt in self.series_filters
)
36 changes: 36 additions & 0 deletions pixl_core/tests/test_project_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,39 @@ def test_invalid_base_tags_fails(invalid_base_tags):
"""Test that invalid base tags raise an error."""
with pytest.raises(ValidationError):
load_tag_operations(invalid_base_tags)


FILTER_SET_0 = None
FILTER_SET_1 = []
FILTER_SET_2 = ["nak", "Badg"]
FILTER_SET_BROKEN = ["", "Badg"]


@pytest.mark.parametrize(
("series_filters", "test_series_desc", "expect_exclude"),
[
# Missing or empty filter set: allow everything
(FILTER_SET_0, "Snake", False),
(FILTER_SET_0, "Badger", False),
(FILTER_SET_0, "Mushroom", False),
(FILTER_SET_1, "Snake", False),
(FILTER_SET_1, "Badger", False),
(FILTER_SET_1, "Mushroom", False),
# A non-empty filter set, a match to any in the set means exclude
(FILTER_SET_2, "Snake", True),
(FILTER_SET_2, "Badger", True),
(FILTER_SET_2, "Mushroom", False),
# And then some weird cases.
# Empty series string never gets excluded
(FILTER_SET_2, "", False),
# Empty exclude string matches everything - not ideal but let's fix it when we decide
# what to do about regexes etc.
(FILTER_SET_BROKEN, "Mushroom", True),
],
)
def test_series_filtering(base_yaml_data, series_filters, test_series_desc, expect_exclude):
"""Check that series filters work, including some edge cases. No regexes yet."""
if series_filters is not None:
base_yaml_data["series_filters"] = series_filters
cfg = PixlConfig.model_validate(base_yaml_data)
assert cfg.is_series_excluded(test_series_desc) == expect_exclude
39 changes: 39 additions & 0 deletions pixl_dcmd/src/pixl_dcmd/_dicom_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Copyright (c) 2022 University College London Hospitals NHS Foundation Trust
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helper functions for DICOM data."""

from __future__ import annotations

from pydicom import Dataset
from logging import getLogger

from core.dicom_tags import DICOM_TAG_PROJECT_NAME

logger = getLogger(__name__)


def get_project_name_as_string(dataset: Dataset) -> str:
raw_slug = dataset.get_private_item(
DICOM_TAG_PROJECT_NAME.group_id,
DICOM_TAG_PROJECT_NAME.offset_id,
DICOM_TAG_PROJECT_NAME.creator_string,
).value
# Get both strings and bytes, which is fun
if isinstance(raw_slug, bytes):
logger.debug(f"Bytes slug {raw_slug!r}")
slug = raw_slug.decode("utf-8").strip()
else:
logger.debug(f"String slug '{raw_slug}'")
slug = raw_slug
return slug
27 changes: 13 additions & 14 deletions pixl_dcmd/src/pixl_dcmd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,12 @@
from core.exceptions import PixlSkipMessageError
from core.project_config import load_project_config

from core.dicom_tags import DICOM_TAG_PROJECT_NAME

import requests
from core.project_config import load_tag_operations
from decouple import config
from pydicom import Dataset, dcmwrite

from pixl_dcmd._dicom_helpers import get_project_name_as_string
from pixl_dcmd._tag_schemes import merge_tag_schemes
from pixl_dcmd._database import add_hashed_identifier_and_save, query_db
from pixl_dcmd._datetime import combine_date_time, format_date_time
Expand All @@ -51,6 +50,17 @@ def write_dataset_to_bytes(dataset: Dataset) -> bytes:
return buffer.read()


def should_exclude_series(dataset: Dataset) -> bool:
slug = get_project_name_as_string(dataset)

series_description = dataset.get("SeriesDescription")
cfg = load_project_config(slug)
if cfg.is_series_excluded(series_description):
logger.warning("FILTERING OUT series description: %s", series_description)
return True
return False


def anonymise_dicom(dataset: Dataset) -> Dataset:
"""
Anonymises a DICOM dataset as Received by Orthanc.
Expand All @@ -61,18 +71,7 @@ def anonymise_dicom(dataset: Dataset) -> Dataset:
- applying tag operations based on the config file
Returns anonymised dataset.
"""
raw_slug = dataset.get_private_item(
DICOM_TAG_PROJECT_NAME.group_id,
DICOM_TAG_PROJECT_NAME.offset_id,
DICOM_TAG_PROJECT_NAME.creator_string,
).value
# Get both strings and bytes, which is fun
if isinstance(raw_slug, bytes):
logger.debug(f"Bytes slug {raw_slug!r}")
slug = raw_slug.decode("utf-8").strip()
else:
logger.debug(f"String slug '{raw_slug}'")
slug = raw_slug
slug = get_project_name_as_string(dataset)

project_config = load_project_config(slug)
logger.debug(f"Received instance for project {slug}")
Expand Down
Loading