-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathpreprocess_modules_with_attributes.py
64 lines (47 loc) · 2.47 KB
/
preprocess_modules_with_attributes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
'''
Performs preprocessing tasks on the module-attribute relationship JSON tables.
All of these steps are required before any further data processing can occur.
Specific processing steps are:
1. Inline expansion of macros (preserving hierarchy markers)
2. Expand out hierarchy markers and embed order in the attribute ID
3. Clean up and format data fields
'''
from typing import cast, Dict, List
import sys
from dicom_standard import parse_lib as pl
from dicom_standard.macro_utils import MetadataTableType, expand_macro_rows, get_id_from_link
from dicom_standard.hierarchy_utils import record_hierarchy_for_module
def key_tables_by_id(table_list: List[MetadataTableType]) -> Dict[str, MetadataTableType]:
return {get_id_from_link(table['linkToStandard']): table for table in table_list}
def expand_all_macros(module_attr_tables, macros):
expanded_attribute_lists = [expand_macro_rows(table, macros)
for table in module_attr_tables]
return map(add_expanded_attributes_to_tables, zip(module_attr_tables, expanded_attribute_lists))
def add_expanded_attributes_to_tables(table_with_attributes):
table, attributes = table_with_attributes
table['attributes'] = attributes
return table
def preprocess_attribute_fields(tables):
return [preprocess_single_table(table) for table in tables]
def preprocess_single_table(table):
table['attributes'] = [attr for attr in list(map(preprocess_attribute, table['attributes'])) if attr]
return table
def preprocess_attribute(attr):
return {
'name': pl.text_from_html_string(attr['name']),
'tag': pl.text_from_html_string(attr['tag']),
'type': 'None' if 'type' not in attr.keys()
else pl.text_from_html_string(attr['type']),
'description': attr['description'],
'conditional': attr.get('conditional'),
}
def expand_hierarchy(tables):
return [record_hierarchy_for_module(table) for table in tables]
if __name__ == '__main__':
module_macro_attr_tables = cast(List[MetadataTableType], pl.read_json_data(sys.argv[1]))
id_to_table = key_tables_by_id(module_macro_attr_tables)
module_attr_tables = [table for table in module_macro_attr_tables if not table['isMacro']]
expanded_tables = expand_all_macros(module_attr_tables, id_to_table)
preprocessed_tables = preprocess_attribute_fields(expanded_tables)
tables_with_hierarchy = expand_hierarchy(preprocessed_tables)
pl.write_pretty_json(tables_with_hierarchy)