Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding tag groups for values and fields #120

Merged
merged 3 commits into from
Mar 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are:
Referenced versions in headers are tagged on Github, in parentheses are for pypi.

## [vxx](https://github.com/pydicom/deid/tree/master) (master)
- adding support for tag groups (values, fields) (0.1.4)
- Adding option to provide function to remove (must return boolean) (0.1.38)
- removing matplotlib version requirement (0.1.37)
- Matplotlib dependency >= 2.1.2 (0.1.36)
Expand Down
64 changes: 54 additions & 10 deletions deid/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,28 +91,40 @@ def _get_section(self, name):
"""
section = None
if self.deid is not None:
if name in self.deid:
section = self.deid[name]
section = self.deid.get(name)
return section

# Get Sections

def get_format(self):
"""return the format of the loaded deid, if one exists
"""
return self._get_section("format")

def _get_named_section(self, section_name, name=None):
"""a helper function to return an entire section, or if a name is
provided, a named section under it. If the section is not
defined, we appropriately return None.
"""
section = self._get_section(section_name)
if name is not None and section is not None:
section = section.get(name, [])
return section

def get_filters(self, name=None):
"""return all filters for a deid recipe, or a set based on a name
"""
filters = self._get_section("filter")
if name is not None and filters is not None:
filters = filters[name]
return filters
return self._get_named_section("filter", name)

def ls_filters(self):
"""list names of filter groups
def get_values_lists(self, name=None):
"""return a values list by name
"""
filters = self._get_section("filter")
return list(filters.keys())
return self._get_named_section("values", name)

def get_fields_lists(self, name=None):
"""return a values list by name
"""
return self._get_named_section("fields", name)

def get_actions(self, action=None, field=None):
"""get deid actions to perform on a header, or a subset based on a type
Expand All @@ -137,6 +149,38 @@ def get_actions(self, action=None, field=None):

return header

# Boolean properties

def _has_list_content(self, name):
return len(self.deid.get(name, [])) > 0

def has_fields_lists(self):
return self._has_list_content("fields")

def has_values_lists(self):
return self._has_list_content("values")

def has_actions(self):
return self._has_list_content("header")

# Listing

def listof(self, section):
"""return a list of keys for a section"""
listing = self._get_section(section) or {}
return list(listing.keys())

def ls_filters(self):
return self.listof("filter")

def ls_valuelists(self):
return self.listof("values")

def ls_fieldlists(self):
return self.listof("fields")

# Init

def _init_deid(self, deid=None, base=False, default_base="dicom"):
"""initalize the recipe with one or more deids, optionally including
the default. This function is called at init time. If you need to add
Expand Down
7 changes: 6 additions & 1 deletion deid/config/standards.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,15 @@
formats = ["dicom"]

# Supported Sections
sections = ["header", "labels", "filter"]
sections = ["header", "labels", "filter", "values", "fields"]

# Supported Header Actions
actions = ("ADD", "BLANK", "JITTER", "KEEP", "REPLACE", "REMOVE", "LABEL")

# Supported Group actions (SPLIT only supported for values)
groups = ["values", "fields"]
group_actions = ("FIELD", "SPLIT")

# Valid actions for a filter action
filters = (
"contains",
Expand Down
157 changes: 125 additions & 32 deletions deid/config/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,16 @@
# pylint: skip-file

from deid.logger import bot
from deid.utils import read_file
from deid.utils import read_file, get_installdir
from deid.data import data_base
from deid.config.standards import formats, actions, sections, filters

from deid.config.standards import (
formats,
actions,
sections,
filters,
groups,
group_actions,
)
from collections import OrderedDict
import os
import re
Expand Down Expand Up @@ -130,7 +136,7 @@ def load_deid(path=None):
config = OrderedDict()
section = None

while len(spec) > 0:
while spec:

# Clean up white trailing/leading space
line = spec.pop(0).strip()
Expand All @@ -139,15 +145,9 @@ def load_deid(path=None):
if line.startswith("#"):
continue

# Starts with Format?
elif bool(re.match("format", line, re.I)):
fmt = re.sub("FORMAT|(\s+)", "", line).lower()
if fmt not in formats:
bot.exit("%s is not a valid format." % fmt)

# Set format
config["format"] = fmt
bot.debug("FORMAT set to %s" % fmt)
# Set format
elif bool(re.match("^format", line, re.I)):
config["format"] = parse_format(line)

# A new section?
elif line.startswith("%"):
Expand All @@ -168,24 +168,18 @@ def load_deid(path=None):
config=config, section=section, section_name=section_name
)

# An action (replace, blank, remove, keep, jitter)
# A %fields action (only field allowed), %values allows split
elif line.upper().startswith(group_actions) and section in groups:
vsoch marked this conversation as resolved.
Show resolved Hide resolved
config = parse_group_action(
section=section, section_name=section_name, line=line, config=config
)

# An action (ADD, BLANK, JITTER, KEEP, REPLACE, REMOVE, LABEL)
elif line.upper().startswith(actions):

# Start of a filter group
if line.upper().startswith("LABEL") and section == "filter":
members = []
keep_going = True
while keep_going is True:
next_line = spec[0]
if next_line.upper().strip().startswith("LABEL"):
keep_going = False
elif next_line.upper().strip().startswith("%"):
keep_going = False
else:
new_member = spec.pop(0)
members.append(new_member)
if len(spec) == 0:
keep_going = False
members = parse_filter_group(spec)

# Add the filter label to the config
config = parse_label(
Expand All @@ -201,7 +195,7 @@ def load_deid(path=None):
section=section, section_name=section_name, line=line, config=config
)
else:
bot.debug("%s not recognized to be in valid format, skipping." % line)
bot.warning("%s not recognized to be in valid format, skipping." % line)
return config


Expand All @@ -214,6 +208,9 @@ def find_deid(path=None):
path: a path on the filesystem. If not provided, will assume PWD.

"""
# A default deid will be loaded if all else fails
default_deid = os.path.join(get_installdir(), "data", "deid.dicom")

if path is None:
path = os.getcwd()

Expand All @@ -224,7 +221,11 @@ def find_deid(path=None):
]

if len(contenders) == 0:
bot.exit("No deid settings files found in %s, exiting." % (path))
bot.warning(
"No deid settings files found in %s, will use default dicom.deid."
% path
)
contenders.append(default_deid)

elif len(contenders) > 1:
bot.warning("Multiple deid files found in %s, will use first." % (path))
Expand All @@ -238,6 +239,48 @@ def find_deid(path=None):
return path


def parse_format(line):
"""given a line that starts with FORMAT, parse the format of the
file and check that it is supported. If not, exit on error. If yes,
return the format.

Parameters
==========
line: the line that starts with format.
"""
fmt = re.sub("FORMAT|(\s+)", "", line).lower()
if fmt not in formats:
bot.exit("%s is not a valid format." % fmt)
bot.debug("FORMAT set to %s" % fmt)
return fmt


def parse_filter_group(spec):
"""given the specification (a list of lines) continue parsing lines
until the filter group ends, as indicated by the start of a new LABEL,
(case 1), the start of a new section (case 2) or the end of the spec
file (case 3). Returns a list of members (lines) that belong to the
filter group. The list (by way of using pop) is updated in the calling
function.

Parameters
==========
spec: unparsed lines of the deid recipe file
"""
members = []
keep_going = True
while keep_going and spec:
next_line = spec[0]
if next_line.upper().strip().startswith("LABEL"):
keep_going = False
elif next_line.upper().strip().startswith("%"):
keep_going = False
else:
new_member = spec.pop(0)
members.append(new_member)
return members


def parse_label(section, config, section_name, members, label=None):
"""parse label will add a (optionally named) label to the filter
section, including one or more criteria
Expand Down Expand Up @@ -289,7 +332,10 @@ def parse_label(section, config, section_name, members, label=None):


def parse_member(members, operator=None):

"""a parsing function for a filter member. Will return a single member
with fields, values, and an operator. In the case of multiple and/or
statements that are chained, will instead return a list.
"""
main_operator = operator

actions = []
Expand Down Expand Up @@ -382,7 +428,7 @@ def add_section(config, section, section_name=None):
if section is None:
bot.exit("You must define a section (e.g. %header) before any action.")

if section == "filter" and section_name is None:
if section in ["filter", "values", "fields"] and section_name is None:
bot.exit("You must provide a name for a filter section.")

if section not in sections:
Expand Down Expand Up @@ -415,6 +461,54 @@ def _remove_comments(parts):
return value.split("#")[0] # remove comments


def parse_group_action(section, line, config, section_name):
"""parse a group action, either FIELD or SPLIT, which must belong to
either a fields or values section.

Parameters
=========
section: a valid section name from the deid config file
line: the line content to parse for the section/action
config: the growing/current config dictionary
section_name: optionally, a section name
"""
if not line.upper().startswith(group_actions):
bot.exit("%s is not a valid group action." % line)

if not line.upper().startswith("FIELD") and section == "fields":
bot.exit("%fields only supports FIELD actions.")

# We may have to deal with cases of spaces
bot.debug("%s: adding %s" % (section, line))
parts = line.split(" ")
action = parts.pop(0).replace(" ", "")

# Both require some parts
if not parts:
bot.exit("%s action %s requires additional arguments" % (section, action))

# For both, the second is always a field or field expander
field = parts.pop(0)

# Fields supports one or more fields with expanders (no third arguments)
if section == "fields":
config[section][section_name].append({"action": action, "field": field})

# Values supports FIELD or SPLIT
elif section == "values":

# If we have a third set of arguments
if parts:
value = _remove_comments(parts)
config[section][section_name].append(
{"action": action, "field": field, "value": value}
)
else:
config[section][section_name].append({"action": action, "field": field})

return config


def parse_config_action(section, line, config, section_name=None):
"""add action will take a line from a deid config file, a config (dictionary), and
an active section name (eg header) and add an entry to the config file to perform
Expand All @@ -428,7 +522,6 @@ def parse_config_action(section, line, config, section_name=None):
section_name: optionally, a section name

"""

if not line.upper().startswith(actions):
bot.exit("%s is not a valid action line." % line)

Expand Down
2 changes: 0 additions & 2 deletions deid/dicom/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,5 @@
)

from .utils import get_files

from .fields import extract_sequence

from .pixels import has_burned_pixels, DicomCleaner
Loading