Skip to content

Commit

Permalink
Merge duplicate nodes in macro_to_attributes
Browse files Browse the repository at this point in the history
- Add duplicate node/key tests to e2e test file
- Issue #21 and #22
  • Loading branch information
russellkan committed May 7, 2020
1 parent ff9a32e commit 815793e
Show file tree
Hide file tree
Showing 6 changed files with 734 additions and 885 deletions.
7 changes: 5 additions & 2 deletions dicom_standard/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ dist/ciod_to_func_group_macros.json: tmp/raw_ciod_func_group_macro_tables.json
dist/module_to_attributes.json: tmp/module_to_attributes_no_duplicates.json dist/macros.json dist/ciod_to_func_group_macros.json dist/macro_to_attributes.json
$(PYTHONPATH_PREFIX) python3 postprocess_integrate_func_group_macros.py $^ > $@

dist/macro_to_attributes.json: tmp/macros_attributes_partial_references.json dist/references.json
$(PYTHONPATH_PREFIX) python3 postprocess_update_reference_links.py $^ > $@
dist/macro_to_attributes.json: tmp/macros_attributes_updated_references.json
$(PYTHONPATH_PREFIX) python3 postprocess_merge_duplicate_nodes.py $< > $@

dist/references.json: tmp/modules_attributes_partial_references.json tmp/raw_section_tables.json
$(PYTHONPATH_PREFIX) python3 postprocess_save_references.py $^ > $@
Expand All @@ -59,6 +59,9 @@ tmp/module_to_attributes_no_duplicates.json: tmp/modules_attributes_updated_refe
tmp/modules_attributes_updated_references.json: tmp/modules_attributes_partial_references.json dist/references.json
$(PYTHONPATH_PREFIX) python3 postprocess_update_reference_links.py $^ > $@

tmp/macros_attributes_updated_references.json: tmp/macros_attributes_partial_references.json dist/references.json
$(PYTHONPATH_PREFIX) python3 postprocess_update_reference_links.py $^ > $@

tmp/modules_attributes_partial_references.json: tmp/modules_attributes_no_references.json
$(PYTHONPATH_PREFIX) python3 postprocess_mark_references.py $< > $@

Expand Down
20 changes: 10 additions & 10 deletions dicom_standard/postprocess_merge_duplicate_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,39 +18,39 @@ def add_conditional_to_description(node):
node['description'] = formatted_conditional + node['description']


def is_duplicate_node(path, module_attr_list):
instances = filter(lambda n: n['path'] == path, module_attr_list)
def is_duplicate_node(path, node_list):
instances = filter(lambda n: n['path'] == path, node_list)
descriptions = map(lambda n: n['description'], instances)
return len(set(descriptions)) > 1


def merge_duplicate_nodes(module_attr_list):
path_list = [d['path'] for d in module_attr_list]
def merge_duplicate_nodes(node_list):
path_list = [d['path'] for d in node_list]
duplicate_paths = [k for k, v in Counter(path_list).items() if v > 1]
path_to_node = {}
for node in module_attr_list:
for node in node_list:
path = node['path']
if path in path_to_node:
# Standard workaround: Catch inconsistency in Table C.36.8-1 where "Content Creator's Name" attribute
# appears twice in same hierarchy without a conditional
# http://dicom.nema.org/medical/dicom/2019c/output/chtml/part03/sect_C.36.8.html
if path not in DUPLICATE_PATH_EXCEPTIONS:
# Add conditional to description only if the duplicates do not have identical descriptions
if is_duplicate_node(path, module_attr_list):
if is_duplicate_node(path, node_list):
add_conditional_to_description(node)
path_to_node[path]['description'] += node['description']
path_to_node[path]['externalReferences'].extend(node['externalReferences'])
else:
if path in duplicate_paths and path not in DUPLICATE_PATH_EXCEPTIONS:
# Add conditional to description only if the duplicates do not have identical descriptions
if is_duplicate_node(path, module_attr_list):
if is_duplicate_node(path, node_list):
add_conditional_to_description(node)
path_to_node[path] = node
path_to_node[path].pop('conditional', None)
return list(path_to_node.values())


if __name__ == "__main__":
module_attr_list = pl.read_json_data(sys.argv[1])
processed_module_attr_list = merge_duplicate_nodes(module_attr_list)
pl.write_pretty_json(processed_module_attr_list)
node_list = pl.read_json_data(sys.argv[1])
processed_node_list = merge_duplicate_nodes(node_list)
pl.write_pretty_json(processed_node_list)
3 changes: 2 additions & 1 deletion dicom_standard/process_macro_attribute_relationship.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ def macro_attr_relationship_table(macro_attr_list):
'tag': attribute['tag'],
'type': attribute['type'],
'linkToStandard': get_standard_link(macro, attribute),
'description': attribute['description']
'description': attribute['description'],
'conditional': attribute.get('conditional'),
})
return entries

Expand Down
Loading

0 comments on commit 815793e

Please sign in to comment.