-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathextract_conf_profile_attributes.py
77 lines (62 loc) · 3.04 KB
/
extract_conf_profile_attributes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
'''
Extract the listing of all attributes given in table E.1-1 from part 15 of the DICOM Standard.
'''
from typing import cast, Dict, List, Union
import sys
from bs4 import BeautifulSoup
from dicom_standard import parse_lib as pl
from dicom_standard.table_utils import TableDictType, table_to_list, table_to_dict
COLUMN_TITLES = [
'name', 'tag', 'retired', 'stdCompIOD', 'basicProfile', 'rtnSafePrivOpt',
'rtnUIDsOpt', 'rtnDevIdOpt', 'rtnInstIdOpt', 'rtnPatCharsOpt', 'rtnLongFullDatesOpt',
'rtnLongModifDatesOpt', 'cleanDescOpt', 'cleanStructContOpt', 'cleanGraphOpt',
]
TABLE_ID = 'table_E.1-1'
RETIREMENT_MISMATCH_ATTRIBUTES = ['Referenced Patient Alias Sequence']
AttrTableType = List[Dict[str, Union[str, bool]]]
def ignore_retirement_mismatch(attr_name: str) -> bool:
"""Standard workaround: Indicates that an attribute name should be ignored if there is a retirement mismatch
The list of specific known mismatches to be worked around is hardcoded internally
Args:
attr_name (str): _description_
Returns:
bool: _description_
"""
return attr_name in RETIREMENT_MISMATCH_ATTRIBUTES
def get_conf_profile_table(standard: BeautifulSoup) -> List[TableDictType]:
all_tables = standard.find_all('div', class_='table')
html_table = pl.find_tdiv_by_id(all_tables, TABLE_ID)
list_table = table_to_list(html_table)
return table_to_dict(list_table, COLUMN_TITLES, omit_empty=True)
def table_to_json(table: List[TableDictType]) -> List[TableDictType]:
attributes = []
for attr in table:
attr['id'] = pl.create_slug(attr['tag'])
attr['tag'] = attr['tag'].upper()
attributes.append(attr)
return attributes
def verify_table_integrity(parsed_table_data: List[TableDictType], attributes: AttrTableType):
retired_attrs = [d['name'] for d in attributes if d['retired'] == 'Y']
errors = []
for attr in parsed_table_data:
attr_name = attr['name']
retired = attr['retired'] == 'Y'
if retired and attr['name'] not in retired_attrs and not ignore_retirement_mismatch(attr_name):
errors.append(f'Attribute "{attr_name}" {attr["tag"]} is retired in Table '
'E.1-1 but not in Table 6-1.')
if not retired and attr['name'] in retired_attrs and not ignore_retirement_mismatch(attr_name):
errors.append(f'Attribute "{attr_name}" {attr["tag"]} is retired in Table '
'6-1 but not in Table E.1-1.')
if errors:
errors.insert(0, 'One or more attributes in tables 6-1 and E.1-1 have inconsistent properties between tables:')
error_msg = '\n'.join(errors)
raise Exception(error_msg)
if __name__ == '__main__':
standard = pl.parse_html_file(sys.argv[1])
attributes = pl.read_json_data(sys.argv[2])
table = get_conf_profile_table(standard)
parsed_table_data = table_to_json(table)
verify_table_integrity(parsed_table_data, cast(AttrTableType, attributes))
for attr in parsed_table_data:
del attr['retired']
pl.write_pretty_json(parsed_table_data)