Skip to content

Commit

Permalink
Fix and add type hints
Browse files Browse the repository at this point in the history
- Rename read_json_to_dict to read_json_data since it can return either a dict or a list of dicts
- Use 'Y'/'N' string values instead of booleans to make 'retired' property consistent across JSON files
- Update standard and e2e tests
  • Loading branch information
russellkan committed Apr 4, 2020
1 parent 53c1885 commit 11c5500
Show file tree
Hide file tree
Showing 26 changed files with 5,669 additions and 5,756 deletions.
13 changes: 8 additions & 5 deletions dicom_standard/extract_attributes.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
'''
Extract the listing of all attributes given in PS3.6 of the DICOM Standard.
'''
from typing import List
import sys

from bs4 import BeautifulSoup, Tag

from dicom_standard import parse_lib as pl
from dicom_standard import parse_relations as pr
from dicom_standard.table_utils import table_to_dict
from dicom_standard.table_utils import TableDictType, table_to_dict

COLUMN_TITLES = ['tag', 'name', 'keyword', 'valueRepresentation', 'valueMultiplicity', 'retired']
ATTR_TABLE_IDS = ['table_6-1', 'table_7-1', 'table_8-1', 'table_9-1']


def get_attribute_table(standard):
def get_attribute_table(standard: BeautifulSoup) -> List[TableDictType]:
attr_dict_list = []
all_tables = standard.find_all('div', class_='table')
for table_id in ATTR_TABLE_IDS:
Expand All @@ -22,17 +25,17 @@ def get_attribute_table(standard):
return attr_dict_list


def attribute_table_to_list(table_div):
def attribute_table_to_list(table_div: Tag) -> List[List[str]]:
return [[cell.text.strip() for cell in row.find_all('td')]
for row in pr.table_rows(table_div)]


def attribute_table_to_json(table):
def attribute_table_to_json(table: List[TableDictType]) -> List[TableDictType]:
attributes = []
for attr in table:
attr['id'] = pl.create_slug(attr['tag'])
attr['tag'] = attr['tag'].upper()
attr['retired'] = True if 'RET' in attr['retired'] else False
attr['retired'] = 'Y' if 'RET' in attr['retired'] else 'N'
attributes.append(attr)
return attributes

Expand Down
14 changes: 10 additions & 4 deletions dicom_standard/extract_ciod_functional_group_macro_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,18 @@
Load the CIOD to functional group macro tables from DICOM Standard PS3.3, Annex A.
Output the data from the tables in JSON format, one entry per CIOD.
'''
from typing import List, Match, Tuple
import sys
import re

from bs4 import Tag

from dicom_standard import parse_lib as pl
from dicom_standard import parse_relations as pr
from dicom_standard.macro_utils import MetadataTableType
from dicom_standard.table_utils import (
TableListType,
TableDictType,
get_chapter_tables,
tables_to_json,
get_short_standard_link,
Expand All @@ -24,22 +30,22 @@

# Add missing "Image" to title of Table A.52.4.3-1
# http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_A.52.4.3.html#table_A.52.4.3-1
def clean_macro_table_name(table_name):
def clean_macro_table_name(table_name: str) -> str:
clean_name = pl.clean_table_name(table_name)
if clean_name == 'Ophthalmic Tomography':
clean_name = 'Ophthalmic Tomography Image'
return clean_name


def is_valid_macro_table(table_div):
def is_valid_macro_table(table_div: Tag) -> Match:
return TABLE_SUFFIX.match(pr.table_name(table_div))


def macro_table_to_dict(table):
def macro_table_to_dict(table: TableListType) -> List[TableDictType]:
return table_to_dict(table, COLUMN_TITLES)


def get_table_with_metadata(table_with_tdiv):
def get_table_with_metadata(table_with_tdiv: Tuple[List[TableDictType], Tag]) -> MetadataTableType:
table, tdiv = table_with_tdiv
clean_name = clean_macro_table_name(pr.table_name(tdiv))
table_description = get_table_description(tdiv)
Expand Down
9 changes: 7 additions & 2 deletions dicom_standard/extract_ciod_module_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@
All CIOD tables are defined in chapter A of the DICOM Standard.
Output the tables in JSON format, one entry per CIOD.
'''
from typing import List, Match, Tuple
import sys
import re

from bs4 import Tag

from dicom_standard import parse_lib as pl
from dicom_standard import parse_relations as pr
from dicom_standard.macro_utils import MetadataTableType
from dicom_standard.table_utils import (
TableDictType,
get_chapter_tables,
tables_to_json,
get_short_standard_link,
Expand All @@ -23,15 +28,15 @@
COLUMN_TITLES = ['informationEntity', 'module', 'reference_fragment', 'usage']


def is_valid_ciod_table(table_div):
def is_valid_ciod_table(table_div: Tag) -> Match:
return TABLE_SUFFIX.match(pr.table_name(table_div))


def ciod_table_to_dict(table):
return table_to_dict(table, COLUMN_TITLES)


def get_table_with_metadata(table_with_tdiv):
def get_table_with_metadata(table_with_tdiv: Tuple[List[TableDictType], Tag]) -> MetadataTableType:
table, tdiv = table_with_tdiv
clean_name = pl.clean_table_name(pr.table_name(tdiv))
table_description = get_table_description(tdiv)
Expand Down
15 changes: 8 additions & 7 deletions dicom_standard/extract_conf_profile_attributes.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
'''
Extract the listing of all attributes given in table E.1-1 from part 15 of the DICOM Standard.
'''
from typing import List
from typing import cast, Dict, List, Union
import sys

from bs4 import BeautifulSoup

from dicom_standard import parse_lib as pl
from dicom_standard.extract_attributes import attribute_table_to_list
from dicom_standard.table_utils import AttributeDictType, table_to_dict
from dicom_standard.table_utils import TableDictType, table_to_dict

COLUMN_TITLES = [
'name', 'tag', 'retired', 'stdCompIOD', 'basicProfile', 'rtnSafePrivOpt',
Expand All @@ -18,14 +18,14 @@
TABLE_ID = 'table_E.1-1'


def get_conf_profile_table(standard: BeautifulSoup) -> List[AttributeDictType]:
def get_conf_profile_table(standard: BeautifulSoup) -> List[TableDictType]:
all_tables = standard.find_all('div', class_='table')
html_table = pl.find_tdiv_by_id(all_tables, TABLE_ID)
list_table = attribute_table_to_list(html_table)
return table_to_dict(list_table, COLUMN_TITLES, omit_empty=True)


def table_to_json(table: List[AttributeDictType]) -> List[AttributeDictType]:
def table_to_json(table: List[TableDictType]) -> List[TableDictType]:
attributes = []
for attr in table:
attr['id'] = pl.create_slug(attr['tag'])
Expand All @@ -34,8 +34,9 @@ def table_to_json(table: List[AttributeDictType]) -> List[AttributeDictType]:
return attributes


def verify_table_integrity(parsed_table_data: List[AttributeDictType], attributes: List[AttributeDictType]):
retired_attrs = [d['name'] for d in attributes if d['retired']]
def verify_table_integrity(parsed_table_data: List[TableDictType], attributes: pl.JsonDataType):
attributes = cast(List[Dict[str, Union[str, bool]]], attributes)
retired_attrs = [d['name'] for d in attributes if d['retired'] == 'Y']
errors = []
for attr in parsed_table_data:
attr_name = attr['name']
Expand All @@ -54,7 +55,7 @@ def verify_table_integrity(parsed_table_data: List[AttributeDictType], attribute

if __name__ == '__main__':
standard = pl.parse_html_file(sys.argv[1])
attributes = pl.read_json_to_dict(sys.argv[2])
attributes = pl.read_json_data(sys.argv[2])
table = get_conf_profile_table(standard)
parsed_table_data = table_to_json(table)
verify_table_integrity(parsed_table_data, attributes)
Expand Down
37 changes: 0 additions & 37 deletions dicom_standard/extract_macros_with_attributes.py

This file was deleted.

10 changes: 6 additions & 4 deletions dicom_standard/extract_modules_macros_with_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@
Load the module-attribute tables from DICOM Standard PS3.3.
Output the tables in JSON format, one entry per attribute.
'''
from typing import Tuple, List, Dict, Any
from typing import List, Match, Tuple, Union
import sys
import re

from bs4 import BeautifulSoup, Tag

from dicom_standard import parse_lib as pl
from dicom_standard import parse_relations as pr
from dicom_standard.macro_utils import MetadataTableType
from dicom_standard.table_utils import (
TableListType,
TableDictType,
tdiv_to_table_list,
table_to_dict,
get_short_standard_link,
Expand All @@ -31,18 +33,18 @@ def get_module_macro_tables(standard: BeautifulSoup) -> Tuple[List[TableListType
return (table_lists, table_divs)


def is_valid_table(table_div):
def is_valid_table(table_div: Tag) -> Union[Match, bool]:
table_name = pr.table_name(table_div)
return TABLE_SUFFIX.match(table_name) and 'Example' not in table_name


def module_table_to_dict(table: TableListType) -> List[Dict[str, List[Tag]]]:
def module_table_to_dict(table: TableListType) -> List[TableDictType]:
has_type_column = len(table[0]) > 3
column_titles = COLUMN_TITLES_WITH_TYPE if has_type_column else COLUMN_TITLES_NO_TYPE
return table_to_dict(table, column_titles)


def get_table_with_metadata(table_with_tdiv: Tuple[TableListType, Tag]) -> Dict[str, Any]:
def get_table_with_metadata(table_with_tdiv: Tuple[List[TableDictType], Tag]) -> MetadataTableType:
table, tdiv = table_with_tdiv
table_name = pr.table_name(tdiv)
clean_name = pl.clean_table_name(table_name)
Expand Down
73 changes: 0 additions & 73 deletions dicom_standard/extract_modules_with_attributes.py

This file was deleted.

6 changes: 3 additions & 3 deletions dicom_standard/extract_sops.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from dicom_standard import parse_lib as pl
from dicom_standard.extract_attributes import attribute_table_to_list
from dicom_standard.table_utils import AttributeDictType, table_to_dict
from dicom_standard.table_utils import TableDictType, table_to_dict

COLUMN_TITLES = ['name', 'id', 'ciod']
TABLE_ID = 'table_B.5-1'
Expand All @@ -25,7 +25,7 @@
}


def get_table_and_tdiv(standard: BeautifulSoup) -> Tuple[List[AttributeDictType], Tag]:
def get_table_and_tdiv(standard: BeautifulSoup) -> Tuple[List[TableDictType], Tag]:
all_tables = standard.find_all('div', class_='table')
html_table = pl.find_tdiv_by_id(all_tables, TABLE_ID)
list_table = attribute_table_to_list(html_table)
Expand All @@ -38,7 +38,7 @@ def generate_ciod_id(name: str) -> str:
return IOD_ABBREVIATIONS.get(cleaned_name, cleaned_name)


def table_to_json(table: List[AttributeDictType], tdiv: Tag) -> List[AttributeDictType]:
def table_to_json(table: List[TableDictType], tdiv: Tag) -> List[TableDictType]:
attributes = []
for row in table:
row['ciod'] = generate_ciod_id(row['ciod'])
Expand Down
Loading

0 comments on commit 11c5500

Please sign in to comment.