Skip to content

Commit

Permalink
Merge pull request #120 from pydicom/add/tag-groups
Browse files Browse the repository at this point in the history
Adding tag groups for values and fields
  • Loading branch information
vsoch authored Mar 12, 2020
2 parents 3b924bb + da477c5 commit 5e7175f
Show file tree
Hide file tree
Showing 20 changed files with 908 additions and 80 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are:
Referenced versions in headers are tagged on Github, in parentheses are for pypi.

## [vxx](https://github.com/pydicom/deid/tree/master) (master)
- adding support for tag groups (values, fields) (0.1.4)
- Adding option to provide function to remove (must return boolean) (0.1.38)
- removing matplotlib version requirement (0.1.37)
- Matplotlib dependency >= 2.1.2 (0.1.36)
Expand Down
64 changes: 54 additions & 10 deletions deid/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,28 +91,40 @@ def _get_section(self, name):
"""
section = None
if self.deid is not None:
if name in self.deid:
section = self.deid[name]
section = self.deid.get(name)
return section

# Get Sections

def get_format(self):
"""return the format of the loaded deid, if one exists
"""
return self._get_section("format")

def _get_named_section(self, section_name, name=None):
"""a helper function to return an entire section, or if a name is
provided, a named section under it. If the section is not
defined, we appropriately return None.
"""
section = self._get_section(section_name)
if name is not None and section is not None:
section = section.get(name, [])
return section

def get_filters(self, name=None):
"""return all filters for a deid recipe, or a set based on a name
"""
filters = self._get_section("filter")
if name is not None and filters is not None:
filters = filters[name]
return filters
return self._get_named_section("filter", name)

def ls_filters(self):
"""list names of filter groups
def get_values_lists(self, name=None):
"""return a values list by name
"""
filters = self._get_section("filter")
return list(filters.keys())
return self._get_named_section("values", name)

def get_fields_lists(self, name=None):
"""return a values list by name
"""
return self._get_named_section("fields", name)

def get_actions(self, action=None, field=None):
"""get deid actions to perform on a header, or a subset based on a type
Expand All @@ -137,6 +149,38 @@ def get_actions(self, action=None, field=None):

return header

# Boolean properties

def _has_list_content(self, name):
return len(self.deid.get(name, [])) > 0

def has_fields_lists(self):
return self._has_list_content("fields")

def has_values_lists(self):
return self._has_list_content("values")

def has_actions(self):
return self._has_list_content("header")

# Listing

def listof(self, section):
"""return a list of keys for a section"""
listing = self._get_section(section) or {}
return list(listing.keys())

def ls_filters(self):
return self.listof("filter")

def ls_valuelists(self):
return self.listof("values")

def ls_fieldlists(self):
return self.listof("fields")

# Init

def _init_deid(self, deid=None, base=False, default_base="dicom"):
"""initalize the recipe with one or more deids, optionally including
the default. This function is called at init time. If you need to add
Expand Down
7 changes: 6 additions & 1 deletion deid/config/standards.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,15 @@
formats = ["dicom"]

# Supported Sections
sections = ["header", "labels", "filter"]
sections = ["header", "labels", "filter", "values", "fields"]

# Supported Header Actions
actions = ("ADD", "BLANK", "JITTER", "KEEP", "REPLACE", "REMOVE", "LABEL")

# Supported Group actions (SPLIT only supported for values)
groups = ["values", "fields"]
group_actions = ("FIELD", "SPLIT")

# Valid actions for a filter action
filters = (
"contains",
Expand Down
157 changes: 125 additions & 32 deletions deid/config/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,16 @@
# pylint: skip-file

from deid.logger import bot
from deid.utils import read_file
from deid.utils import read_file, get_installdir
from deid.data import data_base
from deid.config.standards import formats, actions, sections, filters

from deid.config.standards import (
formats,
actions,
sections,
filters,
groups,
group_actions,
)
from collections import OrderedDict
import os
import re
Expand Down Expand Up @@ -130,7 +136,7 @@ def load_deid(path=None):
config = OrderedDict()
section = None

while len(spec) > 0:
while spec:

# Clean up white trailing/leading space
line = spec.pop(0).strip()
Expand All @@ -139,15 +145,9 @@ def load_deid(path=None):
if line.startswith("#"):
continue

# Starts with Format?
elif bool(re.match("format", line, re.I)):
fmt = re.sub("FORMAT|(\s+)", "", line).lower()
if fmt not in formats:
bot.exit("%s is not a valid format." % fmt)

# Set format
config["format"] = fmt
bot.debug("FORMAT set to %s" % fmt)
# Set format
elif bool(re.match("^format", line, re.I)):
config["format"] = parse_format(line)

# A new section?
elif line.startswith("%"):
Expand All @@ -168,24 +168,18 @@ def load_deid(path=None):
config=config, section=section, section_name=section_name
)

# An action (replace, blank, remove, keep, jitter)
# A %fields action (only field allowed), %values allows split
elif line.upper().startswith(group_actions) and section in groups:
config = parse_group_action(
section=section, section_name=section_name, line=line, config=config
)

# An action (ADD, BLANK, JITTER, KEEP, REPLACE, REMOVE, LABEL)
elif line.upper().startswith(actions):

# Start of a filter group
if line.upper().startswith("LABEL") and section == "filter":
members = []
keep_going = True
while keep_going is True:
next_line = spec[0]
if next_line.upper().strip().startswith("LABEL"):
keep_going = False
elif next_line.upper().strip().startswith("%"):
keep_going = False
else:
new_member = spec.pop(0)
members.append(new_member)
if len(spec) == 0:
keep_going = False
members = parse_filter_group(spec)

# Add the filter label to the config
config = parse_label(
Expand All @@ -201,7 +195,7 @@ def load_deid(path=None):
section=section, section_name=section_name, line=line, config=config
)
else:
bot.debug("%s not recognized to be in valid format, skipping." % line)
bot.warning("%s not recognized to be in valid format, skipping." % line)
return config


Expand All @@ -214,6 +208,9 @@ def find_deid(path=None):
path: a path on the filesystem. If not provided, will assume PWD.
"""
# A default deid will be loaded if all else fails
default_deid = os.path.join(get_installdir(), "data", "deid.dicom")

if path is None:
path = os.getcwd()

Expand All @@ -224,7 +221,11 @@ def find_deid(path=None):
]

if len(contenders) == 0:
bot.exit("No deid settings files found in %s, exiting." % (path))
bot.warning(
"No deid settings files found in %s, will use default dicom.deid."
% path
)
contenders.append(default_deid)

elif len(contenders) > 1:
bot.warning("Multiple deid files found in %s, will use first." % (path))
Expand All @@ -238,6 +239,48 @@ def find_deid(path=None):
return path


def parse_format(line):
"""given a line that starts with FORMAT, parse the format of the
file and check that it is supported. If not, exit on error. If yes,
return the format.
Parameters
==========
line: the line that starts with format.
"""
fmt = re.sub("FORMAT|(\s+)", "", line).lower()
if fmt not in formats:
bot.exit("%s is not a valid format." % fmt)
bot.debug("FORMAT set to %s" % fmt)
return fmt


def parse_filter_group(spec):
"""given the specification (a list of lines) continue parsing lines
until the filter group ends, as indicated by the start of a new LABEL,
(case 1), the start of a new section (case 2) or the end of the spec
file (case 3). Returns a list of members (lines) that belong to the
filter group. The list (by way of using pop) is updated in the calling
function.
Parameters
==========
spec: unparsed lines of the deid recipe file
"""
members = []
keep_going = True
while keep_going and spec:
next_line = spec[0]
if next_line.upper().strip().startswith("LABEL"):
keep_going = False
elif next_line.upper().strip().startswith("%"):
keep_going = False
else:
new_member = spec.pop(0)
members.append(new_member)
return members


def parse_label(section, config, section_name, members, label=None):
"""parse label will add a (optionally named) label to the filter
section, including one or more criteria
Expand Down Expand Up @@ -289,7 +332,10 @@ def parse_label(section, config, section_name, members, label=None):


def parse_member(members, operator=None):

"""a parsing function for a filter member. Will return a single member
with fields, values, and an operator. In the case of multiple and/or
statements that are chained, will instead return a list.
"""
main_operator = operator

actions = []
Expand Down Expand Up @@ -382,7 +428,7 @@ def add_section(config, section, section_name=None):
if section is None:
bot.exit("You must define a section (e.g. %header) before any action.")

if section == "filter" and section_name is None:
if section in ["filter", "values", "fields"] and section_name is None:
bot.exit("You must provide a name for a filter section.")

if section not in sections:
Expand Down Expand Up @@ -415,6 +461,54 @@ def _remove_comments(parts):
return value.split("#")[0] # remove comments


def parse_group_action(section, line, config, section_name):
"""parse a group action, either FIELD or SPLIT, which must belong to
either a fields or values section.
Parameters
=========
section: a valid section name from the deid config file
line: the line content to parse for the section/action
config: the growing/current config dictionary
section_name: optionally, a section name
"""
if not line.upper().startswith(group_actions):
bot.exit("%s is not a valid group action." % line)

if not line.upper().startswith("FIELD") and section == "fields":
bot.exit("%fields only supports FIELD actions.")

# We may have to deal with cases of spaces
bot.debug("%s: adding %s" % (section, line))
parts = line.split(" ")
action = parts.pop(0).replace(" ", "")

# Both require some parts
if not parts:
bot.exit("%s action %s requires additional arguments" % (section, action))

# For both, the second is always a field or field expander
field = parts.pop(0)

# Fields supports one or more fields with expanders (no third arguments)
if section == "fields":
config[section][section_name].append({"action": action, "field": field})

# Values supports FIELD or SPLIT
elif section == "values":

# If we have a third set of arguments
if parts:
value = _remove_comments(parts)
config[section][section_name].append(
{"action": action, "field": field, "value": value}
)
else:
config[section][section_name].append({"action": action, "field": field})

return config


def parse_config_action(section, line, config, section_name=None):
"""add action will take a line from a deid config file, a config (dictionary), and
an active section name (eg header) and add an entry to the config file to perform
Expand All @@ -428,7 +522,6 @@ def parse_config_action(section, line, config, section_name=None):
section_name: optionally, a section name
"""

if not line.upper().startswith(actions):
bot.exit("%s is not a valid action line." % line)

Expand Down
2 changes: 0 additions & 2 deletions deid/dicom/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,5 @@
)

from .utils import get_files

from .fields import extract_sequence

from .pixels import has_burned_pixels, DicomCleaner
Loading

0 comments on commit 5e7175f

Please sign in to comment.