Skip to content

Commit

Permalink
Merge pull request pydicom#106 from pydicom/add/sequence-replacement
Browse files Browse the repository at this point in the history
Add/sequence replacement
  • Loading branch information
vsoch authored Jun 24, 2019
2 parents a2b402d + ebe0907 commit 2b69ddb
Show file tree
Hide file tree
Showing 14 changed files with 438 additions and 48 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are:
Referenced versions in headers are tagged on Github, in parentheses are for pypi.

## [vxx](https://github.com/pydicom/deid/tree/master) (master)
- Adding option to recursively replace sequences (0.1.34)
- adding pylint to clean up code (0.1.33)
- removing dependency that isn't used (simplejson) (0.1.31)
- updating cleaner to use pixel array (0.1.30)
Expand Down
110 changes: 96 additions & 14 deletions deid/dicom/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,16 @@
parse_value
)

from pydicom.dataset import Dataset
from pydicom.sequence import Sequence
from .tags import (
add_tag,
update_tag,
blank_tag,
remove_tag
)

import re

# Actions

Expand All @@ -61,20 +64,45 @@ def perform_action(dicom, action, item=None, fields=None, return_seen=False):
value = action.get('value') # "suid" or "var:field"
action = action.get('action') # "REPLACE"

# Validate the action
if action not in valid_actions:
bot.warning('%s in not a valid choice. Defaulting to blanked.' % action)
action = "BLANK"

# If there is an expander applied to field, we iterate over
fields = expand_field_expression(field=field,
dicom=dicom,
contenders=fields)

# Keep track of fields we have seen
seen = []

# An expanded field must END with that field
expanded_regexp = "__%s$" % field

for field in fields:
seen.append(field)
dicom = _perform_action(dicom=dicom,
field=field,
item=item,
action=action,
value=value)

# Handle top level field
_perform_action(dicom=dicom,
field=field,
item=item,
action=action,
value=value)


# Expand sequences
if item:
expanded_fields = [x for x in item if re.search(expanded_regexp, x)]

# FieldA__FieldB
for expanded_field in expanded_fields:
_perform_expanded_action(dicom=dicom,
expanded_field=expanded_field,
item=item,
action=action,
value=value)

if return_seen:
return dicom, seen
return dicom
Expand All @@ -86,13 +114,7 @@ def _perform_action(dicom, field, action, value=None, item=None):
and perform_addition is typically done via an addition in a config
Both result in a call to this function. If an action fails or is not
done, None is returned, and the calling function should handle this.
'''
if action not in valid_actions:
bot.warning('''%s in not a valid choice [%s].
Defaulting to blanked.''' %(action,
".".join(valid_actions)))
action = "BLANK"

'''
if field in dicom and action != "ADD":

# Blank the value
Expand All @@ -109,7 +131,7 @@ def _perform_action(dicom, field, action, value=None, item=None):
field=field,
value=value)
else:
bot.warning("REPLACE %s unsuccessful" %field)
bot.warning("REPLACE %s unsuccessful" % field)

# Code the value with something in the response
elif action == "JITTER":
Expand All @@ -134,8 +156,68 @@ def _perform_action(dicom, field, action, value=None, item=None):
if value is not None:
dicom = add_tag(dicom, field, value, quiet=True)

return dicom

def _perform_expanded_action(dicom, expanded_field, action, value=None, item=None):
'''akin to _perform_action, but we expect to be dealing with an expanded
sequence, and need to step into the Dicom data structure.
Add, jitter, and delete are currently not supported.
The field is expected to have the format FieldA__FieldB where the
last one is where we want to do the replacement.
'''
field = expanded_field.split('__')[-1]

while field != expanded_field:
next_field, expanded_field = expanded_field.split('__', 1)

# Case 1: we have a Dataset
if isinstance(dicom, Dataset):
dicom = dicom.get(next_field)

elif isinstance(dicom, Sequence):
for sequence in dicom:
for subitem in sequence:
if subitem.keyword == next_field:
dicom = subitem
break

# Field should be equal to expanded_field, and in dicom
if isinstance(dicom, Dataset):
return _perform_action(dicom=dicom,
field=field,
item=item,
action=action,
value=value)

elif isinstance(dicom, Sequence):
for sequence in dicom:
for subitem in sequence:
if subitem.keyword == field:
dicom = subitem
break

if not dicom:
return

# Not sure if this is possible
if dicom.keyword != field:
bot.warning('Early return, looking for %s, found %s' %(field, dicom.keyword))
return

# Blank the value
if action == "BLANK":
if dicom.VR not in ['US', 'SS']:
dicom.value = ''

# Code the value with something in the response
elif action == "REPLACE":

value = parse_value(item, value, field)
if value is not None:
dicom.value = value

# elif "KEEP" --> Do nothing. Keep the original

# Timestamps

Expand Down
82 changes: 62 additions & 20 deletions deid/dicom/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,30 +22,67 @@
'''

from deid.logger import bot
from pydicom.sequence import Sequence
from pydicom.dataset import RawDataElement

from pydicom.dataset import (
RawDataElement,
Dataset
)
import re

def extract_item(item, prefix=None, entry=None):
'''a helper function to extract sequence, will extract values from
a dicom sequence depending on the type.
Parameters
==========
item: an item from a sequence.
'''
# First call, we define entry to be a lookup dictionary
if entry is None:
entry = {}

# Skip raw data elements
if not isinstance(item, RawDataElement):
header = item.keyword

# If there is no header or field, we can't evaluate
if header in [None, '']:
return entry

if prefix is not None:
header = "%s__%s" %(prefix, header)

value = item.value
if isinstance(value, bytes):
value = value.decode('utf-8')
if isinstance(value, Sequence):
return extract_sequence(value, prefix=header)

entry[header] = value
return entry

def extract_sequence(sequence, prefix=None):
'''return a pydicom.sequence.Sequence recursively
as a list of dictionary items
as a flattened list of items. For example, a nested FieldA and FieldB
would return as:
{'FieldA__FieldB': '111111'}
Parameters
==========
sequence: the sequence to extract, should be pydicom.sequence.Sequence
prefix: the parent name
'''
items = []
items = {}
for item in sequence:
for _, val in item.items():
if not isinstance(val, RawDataElement):
header = val.keyword
if prefix is not None:
header = "%s__%s" %(prefix, header)
value = val.value
if isinstance(value, bytes):
value = value.decode('utf-8')
if isinstance(value, Sequence):
items += extract_sequence(value, prefix=header)
continue
entry = {"key": header, "value": value}
items.append(entry)

# If it's a Dataset, we need to further unwrap it
if isinstance(item, Dataset):
for subitem in item:
items.update(extract_item(subitem, prefix=prefix))
else:
bot.warning("Unrecognized type %s in extract sequences, skipping." % type(item))
return items


Expand Down Expand Up @@ -100,11 +137,18 @@ def expand_field_expression(field, dicom, contenders=None):
def get_fields(dicom, skip=None, expand_sequences=True):
'''get fields is a simple function to extract a dictionary of fields
(non empty) from a dicom file.
Parameters
==========
dicom: the dicom file to get fields for.
skip: an optional list of fields to skip
expand_sequences: if True, expand values that are sequences.
'''
if skip is None:
skip = []
if not isinstance(skip, list):
skip = [skip]

fields = dict()
contenders = dicom.dir()
for contender in contenders:
Expand All @@ -116,9 +160,7 @@ def get_fields(dicom, skip=None, expand_sequences=True):

# Adding expanded sequences
if isinstance(value, Sequence) and expand_sequences is True:
sequence_fields = extract_sequence(value, prefix=contender)
for sf in sequence_fields:
fields[sf['key']] = sf['value']
fields.update(extract_sequence(value, prefix=contender))
else:
if value not in [None, ""]:
if isinstance(value, bytes):
Expand Down
18 changes: 12 additions & 6 deletions deid/dicom/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,14 @@ def get_shared_identifiers(dicom_files,
# We will skip PixelData
skip = config['skip']
for dicom_file in dicom_files:

dicom = read_file(dicom_file, force=True)

# Get list of fields, expanded sequences are flattened
fields = get_fields(dicom,
skip=skip,
expand_sequences=expand_sequences)

for key, val in fields.items():

# If it's there, only keep if the same
Expand Down Expand Up @@ -125,6 +129,7 @@ def get_identifiers(dicom_files,
config=None,
expand_sequences=True,
skip_fields=None):

''' extract all identifiers from a dicom image.
This function returns a lookup by file name
Expand All @@ -133,14 +138,14 @@ def get_identifiers(dicom_files,
dicom_files: the dicom file(s) to extract from
force: force reading the file (default True)
config: if None, uses default in provided module folder
expand_sequences: if True, expand sequences. otherwise, skips
expand_sequences: if True, expand sequences. Otherwise, skips
skip_fields: if not None, added fields to skip
'''
bot.debug('Extracting identifiers for %s dicom' %(len(dicom_files)))

if config is None:
config = "%s/config.json" %(here)
config = "%s/config.json" % here

if not os.path.exists(config):
bot.error("Cannot find config %s, exiting" %(config))
Expand All @@ -165,8 +170,8 @@ def get_identifiers(dicom_files,
ids[dicom_file] = dict()

ids[dicom_file] = get_fields(dicom,
skip=skip,
expand_sequences=expand_sequences)
skip=skip,
expand_sequences=expand_sequences)
return ids


Expand Down Expand Up @@ -242,8 +247,9 @@ def replace_identifiers(dicom_files,
remove_private=True):

'''replace identifiers using pydicom, can be slow when writing
and saving new files'''

and saving new files. If you want to replace sequences, they need
to be extracted with get_identifiers and expand_sequences to True.
'''
dicom_files, recipe, config = _prepare_replace_config(dicom_files,
deid=deid,
config=config)
Expand Down
6 changes: 6 additions & 0 deletions deid/dicom/tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ def _filter_tags(tags, idx, fields=None):


def remove_sequences(dicom):
'''remove sequences from a dicom by removing the associated tag.
Parameters
==========
dicom: the loaded dicom to remove sequences
'''
for field in dicom.dir():
if isinstance(dicom.get(field), Sequence):
dicom = remove_tag(dicom, field)
Expand Down
6 changes: 5 additions & 1 deletion deid/main/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ def get_parser():
help="use verbose logging to debug.",
default=False, action='store_true')

parser.add_argument('--version', dest="version",
help="print version and exit.",
default=False, action='store_true')

parser.add_argument("--outfolder", "-o", dest='outfolder',
help="full path to save output, will use temporary folder if not specified",
type=str, default=None)
Expand Down Expand Up @@ -118,7 +122,7 @@ def main():
except:
sys.exit(0)

if args.command == "version":
if args.command == "version" or args.version:
print(__version__)
sys.exit(0)

Expand Down
2 changes: 1 addition & 1 deletion deid/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
'''

__version__ = "0.1.33"
__version__ = "0.1.34"
AUTHOR = 'Vanessa Sochat'
AUTHOR_EMAIL = '[email protected]'
NAME = 'deid'
Expand Down
Loading

0 comments on commit 2b69ddb

Please sign in to comment.