Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adding finish of function to derive groups (values, fields)
Browse files Browse the repository at this point in the history
Signed-off-by: vsoch <vsochat@stanford.edu>
vsoch committed Mar 10, 2020
1 parent 6fc5e53 commit 94160bb
Showing 11 changed files with 303 additions and 32 deletions.
2 changes: 1 addition & 1 deletion deid/config/__init__.py
Original file line number Diff line number Diff line change
@@ -167,7 +167,7 @@ def has_actions(self):

def listof(self, section):
"""return a list of keys for a section"""
listing = self._get_section(section)
listing = self._get_section(section) or {}
return list(listing.keys())

def ls_filters(self):
13 changes: 10 additions & 3 deletions deid/config/utils.py
Original file line number Diff line number Diff line change
@@ -28,7 +28,7 @@
# pylint: skip-file

from deid.logger import bot
from deid.utils import read_file
from deid.utils import read_file, get_installdir
from deid.data import data_base
from deid.config.standards import (
formats,
@@ -195,7 +195,7 @@ def load_deid(path=None):
section=section, section_name=section_name, line=line, config=config
)
else:
bot.debug("%s not recognized to be in valid format, skipping." % line)
bot.warning("%s not recognized to be in valid format, skipping." % line)
return config


@@ -208,6 +208,9 @@ def find_deid(path=None):
path: a path on the filesystem. If not provided, will assume PWD.
"""
# A default deid will be loaded if all else fails
default_deid = os.path.join(get_installdir(), "data", "deid.dicom")

if path is None:
path = os.getcwd()

@@ -218,7 +221,11 @@ def find_deid(path=None):
]

if len(contenders) == 0:
bot.exit("No deid settings files found in %s, exiting." % (path))
bot.warning(
"No deid settings files found in %s, will use default dicom.deid."
% path
)
contenders.append(default_deid)

elif len(contenders) > 1:
bot.warning("Multiple deid files found in %s, will use first." % (path))
1 change: 1 addition & 0 deletions deid/dicom/actions.py
Original file line number Diff line number Diff line change
@@ -51,6 +51,7 @@ def perform_action(dicom, action, item=None, fields=None, return_seen=False):
"action" (eg, REPLACE) what to do with the field
"value": if needed, the field from the response to replace with
"""
print(action)
field = action.get("field") # e.g: PatientID, endswith:ID, values:name, fields:name
value = action.get("value") # "suid" or "var:field"
action = action.get("action") # "REPLACE"
5 changes: 4 additions & 1 deletion deid/dicom/fields.py
Original file line number Diff line number Diff line change
@@ -91,6 +91,9 @@ def find_by_values(values, dicom):
"""Given a list of values, find fields in the dicom that contain any
of those values, as determined by a regular expression search.
"""
# Values must be strings
values = [str(x) for x in values]

fields = []
contenders = get_fields(dicom)

@@ -127,7 +130,7 @@ def expand_field_expression(field, dicom, contenders=None):
fields = contenders
return fields

# Case 2: The field is a specific field OR an axpander with argument (A:B)
# Case 2: The field is a specific field OR an expander with argument (A:B)
fields = field.split(":")
if len(fields) == 1:
return fields
14 changes: 8 additions & 6 deletions deid/dicom/groups.py
Original file line number Diff line number Diff line change
@@ -38,7 +38,7 @@ def extract_values_list(dicom, actions):
always returns a list intended to update some lookup to be used
to further process the dicom.
"""
values = []
values = set()
fields = get_fields(dicom)
for action in actions:

@@ -47,7 +47,7 @@ def extract_values_list(dicom, actions):
subset = expand_field_expression(
field=action["field"], dicom=dicom, contenders=fields
)
[values.append(dicom.get(field)) for field in subset]
[values.add(dicom.get(field)) for field in subset if field in dicom]

# Split action, can optionally have a "by" and/or minlength parameter
elif action["action"] == "SPLIT":
@@ -63,6 +63,8 @@ def extract_values_list(dicom, actions):
if "value" in action:
for param in action["value"].split(";"):
param_name, param_val = param.split("=")
param_name = param_name.strip()
param_val = param_val.strip()

# Set a custom parameter legnth
if param_name == "minlength":
@@ -73,17 +75,17 @@ def extract_values_list(dicom, actions):
bot.debug("Splitting value set to %s" % split_by)

for field in subset:
new_values = dicom.get(field, "").split(split_by)
new_values = str(dicom.get(field, "")).split(split_by)
for new_value in new_values:
if len(new_value) > minlength:
values.append(new_value)
if len(new_value) >= minlength:
values.add(new_value)

else:
bot.warning(
"Unrecognized action %s for values list extraction." % action["action"]
)

return values
return list(values)


def extract_fields_list(dicom, actions):
42 changes: 25 additions & 17 deletions deid/dicom/header.py
Original file line number Diff line number Diff line change
@@ -28,19 +28,17 @@
from deid.logger import bot
from deid.utils import read_json

from .tags import remove_sequences
from .groups import extract_values_list, extract_fields_list

from deid.dicom.tags import get_private
from deid.config import DeidRecipe

from pydicom import read_file

from .utils import save_dicom
from .actions import perform_action
from pydicom.dataset import Dataset
from deid.dicom.utils import save_dicom
from deid.dicom.actions import perform_action
from deid.dicom.tags import remove_sequences, get_private
from deid.dicom.groups import extract_values_list, extract_fields_list
from deid.dicom.fields import get_fields

from .fields import get_fields
from pydicom.dataset import Dataset

import os

@@ -137,8 +135,6 @@ def get_identifiers(
skip_fields: if not None, added fields to skip
"""
bot.debug("Extracting identifiers for %s dicom" % (len(dicom_files)))

if config is None:
config = "%s/config.json" % here

@@ -149,6 +145,7 @@ def get_identifiers(
if not isinstance(dicom_files, list):
dicom_files = [dicom_files]

bot.debug("Extracting identifiers for %s dicom" % len(dicom_files))
ids = dict() # identifiers

# We will skip PixelData
@@ -159,7 +156,13 @@ def get_identifiers(
skip = skip + skip_fields

for dicom_file in dicom_files:
dicom = read_file(dicom_file, force=True)

# TODO: this should have shared reader class to hold dicom, ids, etc.
if isinstance(dicom_file, Dataset):
dicom = dicom_file
dicom_file = dicom.filename
else:
dicom = read_file(dicom_file, force=force)

if dicom_file not in ids:
ids[dicom_file] = dict()
@@ -253,7 +256,12 @@ def replace_identifiers(
# Parse through dicom files, update headers, and save
updated_files = []
for _, dicom_file in enumerate(dicom_files):
dicom = read_file(dicom_file, force=force)

if isinstance(dicom_file, Dataset):
dicom = dicom_file
dicom_file = dicom.filename
else:
dicom = read_file(dicom_file, force=force)
dicom_name = os.path.basename(dicom_file)
fields = dicom.dir()

@@ -266,19 +274,19 @@ def replace_identifiers(
if dicom_file in ids:

# Prepare additional lists of values and fields (updates item)
if deid.has_values_lists():
for group, actions in deid.get_values_lists().items():
if recipe.has_values_lists():
for group, actions in recipe.get_values_lists().items():
ids[dicom_file][group] = extract_values_list(
dicom=dicom, actions=actions
)

if deid.has_fields_lists():
for group, actions in deid.get_fields_lists().items():
if recipe.has_fields_lists():
for group, actions in recipe.get_fields_lists().items():
ids[dicom_file][group] = extract_fields_list(
dicom=dicom, actions=actions
)

for action in deid.get_actions():
for action in recipe.get_actions():
dicom = perform_action(
dicom=dicom, item=ids[dicom_file], action=action
)
7 changes: 5 additions & 2 deletions deid/tests/test_config.py
Original file line number Diff line number Diff line change
@@ -56,11 +56,14 @@ def test_load_deid(self):
config = load_deid(os.path.dirname(self.deid))
self.assertTrue("format" in config)

print("Case 3: Testing error on non-existing load")
print("Case 3: Testing error on non-existing load of file")
with self.assertRaises(SystemExit) as cm:
config = load_deid(self.tmpdir)
config = load_deid(os.path.join(self.tmpdir, "deid.doesnt-exist"))
self.assertEqual(cm.exception.code, 1)

print("Case 4: Testing load of default deid.")
config = load_deid(self.tmpdir)

def test_find_deid(self):

print("Testing finding deid file, referencing directly.")
133 changes: 133 additions & 0 deletions deid/tests/test_deid_recipe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#!/usr/bin/env python

"""
Test DeidRecipe class
Copyright (c) 2020 Vanessa Sochat
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""

import unittest
import tempfile
import shutil
import json
import os

from deid.config import DeidRecipe
from deid.utils import get_installdir


class TestDeidRecipe(unittest.TestCase):
def setUp(self):
self.pwd = get_installdir()
self.deid = os.path.abspath("%s/../examples/deid/deid.dicom" % self.pwd)
self.tmpdir = tempfile.mkdtemp()
print("\n######################START######################")

def tearDown(self):
shutil.rmtree(self.tmpdir)
print("\n######################END########################")

def test_load_recipe(self):

print("Case 1: Test loading default DeidRecipe")

recipe = DeidRecipe()

self.assertTrue(isinstance(recipe.deid, dict))

print("Checking basic sections are loaded")
print(recipe.deid.keys())
for section in ["header", "format", "filter"]:
self.assertTrue(section in recipe.deid)

print("Case 2: Loading from file")
recipe = DeidRecipe(self.deid)

def test_get_functions(self):

recipe = DeidRecipe(self.deid)

# Format
self.assertEqual(recipe.get_format(), "dicom")

# Actions for header
print("Testing get_actions")
actions = recipe.get_actions()
self.assertTrue(isinstance(actions, list))
for key in ["action", "field", "value"]:
self.assertTrue(key in actions[0])
self.assertTrue(recipe.has_actions())

# Filters
print("Testing get_filters")
filters = recipe.get_filters()
self.assertTrue(isinstance(filters, dict))

# whitelist, blacklist, graylist
for key in recipe.ls_filters():
self.assertTrue(key in filters)

recipe = DeidRecipe()
filters = recipe.get_filters()
self.assertTrue(isinstance(filters["whitelist"], list))

# Test that each filter has a set of filters, coords, name
for key in ["filters", "coordinates", "name"]:
self.assertTrue(key in filters["whitelist"][0])

# Each filter is a list of actions, name is string, coords are list
self.assertTrue(isinstance(filters["whitelist"][0]["filters"], list))
self.assertTrue(isinstance(filters["whitelist"][0]["name"], str))
self.assertTrue(isinstance(filters["whitelist"][0]["coordinates"], list))

# Check content of the first filter
for key in ["action", "field", "operator", "InnerOperators", "value"]:
self.assertTrue(key in filters["whitelist"][0]["filters"][0])

# Fields and Values
print("Testing get_fields_lists and get_values_lists")
self.assertEqual(recipe.get_fields_lists(), None)
self.assertEqual(recipe.get_values_lists(), None)
self.assertEqual(recipe.ls_fieldlists(), [])
self.assertEqual(recipe.ls_valuelists(), [])
self.assertTrue(not recipe.has_fields_lists())
self.assertTrue(not recipe.has_values_lists())

# Load in recipe with values and fields
deid = os.path.abspath("%s/../examples/deid/deid.dicom-groups" % self.pwd)
recipe = DeidRecipe(deid)

assert "values" in recipe.deid
assert "fields" in recipe.deid
self.assertTrue(isinstance(recipe.deid["values"], dict))
self.assertTrue(isinstance(recipe.deid["fields"], dict))

self.assertTrue(recipe.get_fields_lists() is not None)
self.assertTrue(recipe.get_values_lists() is not None)
self.assertEqual(recipe.ls_fieldlists(), ["instance_fields"])
self.assertEqual(recipe.ls_valuelists(), ["cookie_names", "operator_names"])
self.assertTrue(recipe.has_fields_lists())
self.assertTrue(recipe.has_values_lists())


if __name__ == "__main__":
unittest.main()
114 changes: 114 additions & 0 deletions deid/tests/test_dicom_groups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#!/usr/bin/env python

"""
Testing groups for a deid recipe (values and fields)
Copyright (c) 2020 Vanessa Sochat
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""

import unittest
import tempfile
import shutil
import json
import os

from deid.utils import get_installdir
from deid.data import get_dataset
from deid.config import DeidRecipe
from deid.dicom.fields import get_fields
from deid.dicom import get_identifiers, replace_identifiers


class TestDicomGroups(unittest.TestCase):
def setUp(self):
self.pwd = get_installdir()
self.deid = os.path.abspath("%s/../examples/deid/deid.dicom-groups" % self.pwd)
self.dataset = get_dataset("dicom-cookies")
self.tmpdir = tempfile.mkdtemp()
print("\n######################START######################")

def tearDown(self):
shutil.rmtree(self.tmpdir)
print("\n######################END########################")

def test_extract_groups(self):
print("Test deid.dicom.groups extract_values_list")
from deid.dicom.groups import extract_values_list, extract_fields_list

dicom = get_dicom(self.dataset)
fields = get_fields(dicom) # removes empty / null

# Test split action
actions = [
{"action": "SPLIT", "field": "PatientID", "value": 'by="^";minlength=4'}
]
expected_names = dicom.get("PatientID").split("^")
actual = extract_values_list(dicom, actions)
self.assertEqual(actual, expected_names)

# Test field action
actions = [{"action": "FIELD", "field": "startswith:Operator"}]
expected_operator = [dicom.get(x) for x in fields if x.startswith("Operator")]
actual = extract_values_list(dicom, actions)
self.assertEqual(actual, expected_operator)

print("Test deid.dicom.groups extract_fields_list")
actions = [{"action": "FIELD", "field": "contains:Instance"}]
expected = [x for x in fields if "Instance" in x]
actual = extract_fields_list(dicom, actions)
self.assertEqual(actual, expected)

# Get identifiers for file
ids = get_identifiers(dicom)
self.assertTrue(isinstance(ids, dict))

# Add keys to be used for replace to ids - these first are for values
ids[dicom.filename]["cookie_names"] = expected_names
ids[dicom.filename]["operator_names"] = expected_operator

# This is for fields
ids[dicom.filename]["instance_fields"] = expected
ids[dicom.filename]["id"] = "new-cookie-id"
ids[dicom.filename]["source_id"] = "new-operator-id"

replaced = replace_identifiers(dicom, ids=ids, save=False, deid=self.deid)
cleaned = replaced.pop()
self.assertEqual(cleaned.get("PatientID"), "new-cookie-id")
self.assertEqual(cleaned.get("OperatorsName"), "new-operator-id")

# Currently we don't well handle tag types, so we convert to string
for field in expected_operator:
self.assertTrue(str(field) not in cleaned)


def get_dicom(dataset):
"""helper function to load a dicom
"""
from deid.dicom import get_files
from pydicom import read_file

dicom_files = get_files(dataset)
return read_file(next(dicom_files))


if __name__ == "__main__":
unittest.main()
2 changes: 1 addition & 1 deletion docs/_docs/examples/recipe.md
Original file line number Diff line number Diff line change
@@ -343,7 +343,7 @@ in a recipe:
FORMAT dicom
%values cookie_names
SPLIT PatientID by=" ";minlength=4
SPLIT PatientID by=" ";minlength=3
%values operator_names
FIELD startswith:Operator
2 changes: 1 addition & 1 deletion docs/_docs/user-docs/recipe-groups.md
Original file line number Diff line number Diff line change
@@ -89,7 +89,7 @@ This of course means that the actions supported for the `%fields` section includ

It could be that you want to generate a list of _values_ extracted from the dicom
to use as flags for checking other fields. For example, if I know that the Patient's ID
is in PatiendID, I would want to extract the patient's name from that field,
is in PatientID, I would want to extract the patient's name from that field,
and then search across fields looking for any instance of a first or last name.
This is the purpose of the `%values` group. Instead of defining rules to create
a list of fields, we write rules to extract values. Let's take a look at an

0 comments on commit 94160bb

Please sign in to comment.