-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathmacro_utils.py
102 lines (77 loc) · 4.37 KB
/
macro_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
'''
Utility functions for expanding macros in the module-attribute
relationship tables.
'''
from typing import Any, Dict, List, Optional
import re
from copy import deepcopy
from bs4 import BeautifulSoup, Tag
from dicom_standard.hierarchy_utils import get_hierarchy_markers
AttributeType = Dict[str, str]
MetadataTableType = Dict[str, Any]
MacrosType = Dict[str, MetadataTableType]
def expand_macro_rows(table: Tag, macros: MacrosType, conditional: Optional[str] = None) -> List[Dict[str, str]]:
# This variable is used to stop an infinite macro reference
# loop in the standard at the SR Document Content module.
table_id = get_id_from_link(table['linkToStandard'])
attribute_insertion_lists = [get_attributes_to_insert(attr, macros, table_id, conditional)
for attr in table['attributes']]
new_table = flatten_one_layer(attribute_insertion_lists)
# Removes divider or stylistic rows
return [attribute for attribute in new_table if attribute['tag'] != 'None']
def get_attributes_to_insert(attribute: AttributeType, macros: MacrosType, table_id: str, conditional: Optional[str] = None) -> List[Dict[str, str]]:
if is_macro_row(attribute):
new_attributes = get_macro_attributes(attribute, macros, table_id, conditional)
return new_attributes if new_attributes is not None else []
else:
if conditional:
attribute['conditional'] = conditional
return [attribute]
def is_macro_row(attribute: AttributeType) -> bool:
is_abnormal_row = attribute['tag'] == 'None'
reference_anchor_tag = BeautifulSoup(attribute['name'], 'html.parser').find('a', class_='xref')
contains_link = reference_anchor_tag is not None
# This line guards against a one-off reference in the standard
# where a link actually points to prose instead of a table.
is_table = re.match("Table.*", reference_anchor_tag.get_text()) if contains_link else False
return bool(is_abnormal_row and contains_link and is_table)
# Note that this function *recursively expands* macro references using
# the `expand_macro_rows` function.
def get_macro_attributes(attribute: AttributeType, macros: MacrosType, table_id: str, conditional: Optional[str] = None) -> List[AttributeType]:
macro_id = referenced_macro_id_from_include_statement(attribute['name'])
parsed_name = BeautifulSoup(attribute['name'], 'html.parser').get_text()
hierarchy_marker = get_hierarchy_markers(parsed_name)
conditional_match = re.search("if +.*", parsed_name.strip())
if conditional_match:
conditional = conditional_match.group()
if table_id != macro_id:
return expand_macro_rows(get_macros_by_id(macro_id, macros, hierarchy_marker), macros, conditional)
return []
def flatten_one_layer(nested_element_list: List[List[Any]]) -> List[Any]:
return [element for element_list in nested_element_list
for element in element_list]
def referenced_macro_id_from_include_statement(macro_reference_html: str) -> str:
parsed_reference = BeautifulSoup(macro_reference_html, 'html.parser')
id_anchor = parsed_reference.find('a', class_='xref')
return id_anchor.get('href')[1:] # Remove the first '#' character
def get_macros_by_id(macro_id: str, macros: MacrosType, hierarchy_marker: str) -> MetadataTableType:
# A copy is required so that local modifications to attributes
# (i.e. hierarchy marker modifications) don't persist.
macro = deepcopy(macros[macro_id])
macro['attributes'] = update_attribute_hierarchy_markers(macro['attributes'], hierarchy_marker)
return macro
def update_attribute_hierarchy_markers(attributes: List[AttributeType], marker: str) -> List[AttributeType]:
return [add_marker_to_attr(attribute, marker) for attribute in attributes]
def add_marker_to_attr(attribute: Dict[str, str], marker: str) -> Dict[str, str]:
name_html = BeautifulSoup(attribute['name'], 'html.parser')
parsed_attribute_name = name_html.find('td')
if parsed_attribute_name is None:
parsed_attribute_name = name_html.find('th')
attribute['name'] = prepend_marker_to_attribute_name(parsed_attribute_name, marker)
return attribute
def prepend_marker_to_attribute_name(new_attr_to_insert: Tag, marker: str) -> str:
new_attr_to_insert.insert(0, marker)
return str(new_attr_to_insert)
def get_id_from_link(link: str) -> str:
_, html_id = link.split('#')
return html_id