Skip to content

Commit

Permalink
Merge nodes that have identical paths
Browse files Browse the repository at this point in the history
- Parse conditional statements from macro row "include" statements
- Add conditionals to attribute descriptions where necessary
- Add new processing module to merge duplicate nodes
- Issue #9
  • Loading branch information
russellkan committed Apr 27, 2020
1 parent a27c69d commit c9382e1
Show file tree
Hide file tree
Showing 7 changed files with 94 additions and 14 deletions.
7 changes: 5 additions & 2 deletions dicom_standard/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ dist/ciod_to_modules.json: tmp/raw_ciod_module_tables.json
dist/ciod_to_func_group_macros.json: tmp/raw_ciod_func_group_macro_tables.json
$(PYTHONPATH_PREFIX) python3 process_ciod_func_group_macro_relationship.py $< > $@

dist/module_to_attributes.json: tmp/modules_attributes_partial_references.json dist/references.json
$(PYTHONPATH_PREFIX) python3 postprocess_update_reference_links.py $^ > $@
dist/module_to_attributes.json: tmp/modules_attributes_updated_references.json
$(PYTHONPATH_PREFIX) python3 postprocess_merge_duplicate_nodes.py $< > $@

dist/macro_to_attributes.json: tmp/macros_attributes_partial_references.json dist/references.json
$(PYTHONPATH_PREFIX) python3 postprocess_update_reference_links.py $^ > $@
Expand All @@ -52,6 +52,9 @@ dist/references.json: tmp/modules_attributes_partial_references.json tmp/raw_sec
$(PYTHONPATH_PREFIX) python3 postprocess_save_references.py $^ > $@


tmp/modules_attributes_updated_references.json: tmp/modules_attributes_partial_references.json dist/references.json
$(PYTHONPATH_PREFIX) python3 postprocess_update_reference_links.py $^ > $@

tmp/modules_attributes_partial_references.json: tmp/modules_attributes_no_references.json
$(PYTHONPATH_PREFIX) python3 postprocess_mark_references.py $< > $@

Expand Down
2 changes: 1 addition & 1 deletion dicom_standard/hierarchy_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

def get_hierarchy_markers(name: str) -> str:
clean_name = name.strip().replace('\n', '')
_, *split = re.split('^(>+)', clean_name)
_, *split = re.split(r'^(>+)', clean_name)
return '' if split == [] else split[0]


Expand Down
19 changes: 12 additions & 7 deletions dicom_standard/macro_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Utility functions for expanding macros in the module-attribute
relationship tables.
'''
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional
import re
from copy import deepcopy

Expand All @@ -15,22 +15,24 @@
MacrosType = Dict[str, MetadataTableType]


def expand_macro_rows(table: Tag, macros: MacrosType) -> List[Dict[str, str]]:
def expand_macro_rows(table: Tag, macros: MacrosType, conditional: Optional[str] = None) -> List[Dict[str, str]]:
# This variable is used to stop an infinite macro reference
# loop in the standard at the SR Document Content module.
table_id = get_id_from_link(table['linkToStandard'])
attribute_insertion_lists = [get_attributes_to_insert(attr, macros, table_id)
attribute_insertion_lists = [get_attributes_to_insert(attr, macros, table_id, conditional)
for attr in table['attributes']]
new_table = flatten_one_layer(attribute_insertion_lists)
# Removes divider or stylistic rows
return [attribute for attribute in new_table if attribute['tag'] != 'None']


def get_attributes_to_insert(attribute: AttributeType, macros: MacrosType, table_id: str) -> List[Dict[str, str]]:
def get_attributes_to_insert(attribute: AttributeType, macros: MacrosType, table_id: str, conditional: Optional[str] = None) -> List[Dict[str, str]]:
if is_macro_row(attribute):
new_attributes = get_macro_attributes(attribute, macros, table_id)
new_attributes = get_macro_attributes(attribute, macros, table_id, conditional)
return new_attributes if new_attributes is not None else []
else:
if conditional:
attribute['conditional'] = conditional
return [attribute]


Expand All @@ -46,12 +48,15 @@ def is_macro_row(attribute: AttributeType) -> bool:

# Note that this function *recursively expands* macro references using
# the `expand_macro_rows` function.
def get_macro_attributes(attribute: AttributeType, macros: MacrosType, table_id: str) -> List[AttributeType]:
def get_macro_attributes(attribute: AttributeType, macros: MacrosType, table_id: str, conditional: Optional[str] = None) -> List[AttributeType]:
macro_id = referenced_macro_id_from_include_statement(attribute['name'])
parsed_name = BeautifulSoup(attribute['name'], 'html.parser').get_text()
hierarchy_marker = get_hierarchy_markers(parsed_name)
conditional_match = re.search("if +.*", parsed_name.strip())
if conditional_match:
conditional = conditional_match.group()
if table_id != macro_id:
return expand_macro_rows(get_macros_by_id(macro_id, macros, hierarchy_marker), macros)
return expand_macro_rows(get_macros_by_id(macro_id, macros, hierarchy_marker), macros, conditional)
return []


Expand Down
61 changes: 61 additions & 0 deletions dicom_standard/postprocess_merge_duplicate_nodes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
'''
Merge nodes that have identical paths by adding conditional statments to the attribute descriptions.
'''
import re
import sys
from collections import Counter

from bs4 import BeautifulSoup

from dicom_standard import parse_lib as pl

DUPLICATE_PATH_EXCEPTIONS = ['rt-segment-annotation:00700084']


def get_description_text(raw_description):
description = BeautifulSoup(raw_description, "html.parser")
return description.getText()


def add_conditional_to_description(node):
conditional = node.get('conditional')
assert conditional is not None, f'Duplicate attribute (path: {node["path"]}) has no conditional statement.'
conditional = re.sub(r'\.$', ':', conditional)
formatted_conditional = f'<p style="font-weight: bold">{conditional[0].upper()}{conditional[1:]}</p>'
node['description'] = formatted_conditional + node['description']


def merge_duplicate_nodes(module_attr_list):
path_list = [d['path'] for d in module_attr_list]
duplicate_paths = [k for k, v in Counter(path_list).items() if v > 1]
path_to_node = {}
for node in module_attr_list:
path = node['path']
# Standard workaround: Catch inconsistency in Table C.36.8-1 where "Content Creator's Name" attribute
# appears twice in same hierarchy without a conditional
# http://dicom.nema.org/medical/dicom/2019c/output/chtml/part03/sect_C.36.8.html
if path not in DUPLICATE_PATH_EXCEPTIONS:
if path in path_to_node:
# Add conditional to description only if the duplicates do not have identical descriptions
instances = filter(lambda n: n['path'] == path, module_attr_list)
descriptions = map(lambda n: get_description_text(n['description']), instances)
if len(set(descriptions)) > 1:
add_conditional_to_description(node)
path_to_node[path]['description'] += node['description']
path_to_node[path]['externalReferences'].extend(node['externalReferences'])
else:
if path in duplicate_paths:
# Add conditional to description only if the duplicates do not have identical descriptions
instances = filter(lambda n: n['path'] == path, module_attr_list)
descriptions = map(lambda n: get_description_text(n['description']), instances)
if len(set(descriptions)) > 1:
add_conditional_to_description(node)
path_to_node[path] = node
path_to_node[path].pop('conditional', None)
return list(path_to_node.values())


if __name__ == "__main__":
module_attr_list = pl.read_json_data(sys.argv[1])
processed_module_attr_list = merge_duplicate_nodes(module_attr_list)
pl.write_pretty_json(processed_module_attr_list)
6 changes: 4 additions & 2 deletions dicom_standard/preprocess_modules_with_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ def preprocess_attribute_fields(tables):


def preprocess_single_table(table):
# Catch exception in Table F.3-3 where an attribute has an invalid tag: http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_F.3.2.2.html#table_F.3-3
# Standard workaround: Catch exception in Table F.3-3 where an attribute has an invalid tag
# http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_F.3.2.2.html#table_F.3-3
table['attributes'] = [attr for attr in list(map(preprocess_attribute, table['attributes'])) if attr]
return table

Expand All @@ -49,7 +50,8 @@ def preprocess_attribute(attr):
'tag': pl.text_from_html_string(attr['tag']),
'type': 'None' if 'type' not in attr.keys()
else pl.text_from_html_string(attr['type']),
'description': attr['description']
'description': attr['description'],
'conditional': attr.get('conditional'),
}
# Return empty dict if tag is invalid (exception in Table F.3-3)
if cleaned_attribute['tag'] == 'See F.5':
Expand Down
3 changes: 2 additions & 1 deletion dicom_standard/process_module_attribute_relationship.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ def module_attr_relationship_table(module_attr_list):
'tag': attribute['tag'],
'type': attribute['type'],
'linkToStandard': get_standard_link(module, attribute),
'description': attribute['description']
'description': attribute['description'],
'conditional': attribute.get('conditional'),
})
return entries

Expand Down
10 changes: 9 additions & 1 deletion tests/end_to_end_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def test_valid_ciod_names(sops, ciods):
assert any(d['name'] == pair['ciod'] for d in ciods)


@pytest.mark.endotend
@pytest.mark.endtoend
def test_vertical_samples_from_standard(ciods, modules, attributes):
test_ciod = {
"name": "US Multi-frame Image",
Expand Down Expand Up @@ -312,3 +312,11 @@ def test_no_duplicate_modules(self, modules):

def test_no_duplicate_sops(self, sops):
assert not self.get_duplicate_ids(sops)


@pytest.mark.endtoend
def test_no_duplicate_paths(module_attribute_relationship):
path_list = [d['path'] for d in module_attribute_relationship]
duplicates = [k for k, v in Counter(path_list).items() if v > 1]
print(duplicates)
assert not duplicates

0 comments on commit c9382e1

Please sign in to comment.