diff --git a/README.md b/README.md index b9ac64d..58bc0c2 100644 --- a/README.md +++ b/README.md @@ -229,7 +229,7 @@ To run a specific test, run `tox -e `. Test environments include: Certain parts of the DICOM Standard site cause errors when running the parser, often due to typos or formatting inconsistencies. -When we find one of these issues, we add a fix in the relevant file and add a comment starting with 'Standard workaround' that describes the issue and links to its location in the Standard. +When we find one of these issues, we add a hard-coded fix in the relevant file and add a comment starting with 'Standard workaround' that describes the issue and links to its location in the Standard. To be aware when these fixes are obsolete, we add a unit test that fails once the issue no longer exists. Current standard workarounds (as of rev.2020b): | *Issue description* | *Workaround location* | @@ -237,14 +237,16 @@ Current standard workarounds (as of rev.2020b): | [Table A.39.19-1](http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_A.35.19.3.html) ends its title with an upper case "S" | `extract_ciod_module_tables.py`
`parse_lib.py` | | [Table A.32.9-2](http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_A.32.9.3.4.html#table_A.32.9-2) has "Functional Groups Macros" in its title while other tables use "Functional Group Macros" | `extract_ciod_func_group_macro_tables.py`
`parse_lib.py` | | [Table A.52.4.3-1](http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_A.52.4.3.html#table_A.52.4.3-1) is missing "Image" from the IOD name portion of the title | `extract_ciod_func_group_macro_tables.py` | -| [The Enhanced MR Color Image IOD](http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_A.36.4.4.html) references the Enhanced MR Image IOD's functional group macros table instead of having its own (they would be identical tables) | `extract_ciod_func_group_macro_tables.py` | -| [Table C.8.25.16-8](http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.8.25.16.8.html) has an include statement with an extra hierarchy marker (two instead of one) | `hierarchy_utils.py` | | [Table C.8-125](http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.8.15.3.9.html#table_C.8-125) has an extra "Sequence" in its title (should be "CT X-Ray Details", not "CT X-Ray Details Sequence") | `parse_lib.py` | -| Certain subsections are located within the base section rather than having their own section (C.7.16.2.5.1 should be within C.7.16.2.5, but `sect_C.7.16.2.5.html` is invalid) | `parse_lib.py` | -| [Table TID 1004](http://dicom.nema.org/medical/dicom/current/output/chtml/part16/chapter_A.html#sect_TID_1004) has a section URL pattern ("sect_TID_1004") that doesn't exist within the HTML version of the standard | `parse_lib.py` | -| The "Content Creator's Name" attribute appears twice in [Table C.36.8-1](http://dicom.nema.org/medical/dicom/2019c/output/chtml/part03/sect_C.36.8.html#table_C.36.8-1) with the same hierarchy without a conditional statement | `postprocess_merge_duplicate_nodes.py` | -| [Table F.3-3](http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_F.3.2.2.html#table_F.3-3) contains a "Record Selection Keys" attribute with an invalid tag ("See F.5") | `preprocess_modules_with_attributes.py` | | [Table A.84.3.2-1](http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_A.84.3.2.html#table_A.84.3.2-1) contains a macro that has an extra "Macro" in its name ("Frame VOI LUT With LUT Macro") | `process_ciod_func_group_macro_relationship.py` | +| [Table C.8.25.16-8](http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_C.8.25.16.8.html) has an include statement with an extra hierarchy marker (two instead of one) | `hierarchy_utils.py` | +| [Table TID 1004](http://dicom.nema.org/medical/dicom/current/output/chtml/part16/chapter_A.html#sect_TID_1004) has a section URL pattern ("sect_TID_1004") that doesn't exist within the HTML version of the standard | `parse_lib.py` | +| Certain subsections are located within the base section rather than having their own section (C.7.16.2.5.1 should be within C.7.16.2.5, but `sect_C.7.16.2.5.html` is invalid) | `parse_lib.py` | +| \*[The Enhanced MR Color Image IOD](http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_A.36.4.4.html) references the Enhanced MR Image IOD's functional group macros table instead of having its own (they would be identical tables) | `extract_ciod_func_group_macro_tables.py` | +| \*The "Content Creator's Name" attribute appears twice in [Table C.36.8-1](http://dicom.nema.org/medical/dicom/2019c/output/chtml/part03/sect_C.36.8.html#table_C.36.8-1) with the same hierarchy without a conditional statement | `postprocess_merge_duplicate_nodes.py` | +| \*[Table F.3-3](http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_F.3.2.2.html#table_F.3-3) contains a "Record Selection Keys" attribute with an invalid tag ("See F.5") | `preprocess_modules_with_attributes.py` | + +(\*) This issue is not caused by a typo or error in the Standard but rather an exception from the normal format and thus does not have a unit test for a fix. ## Contact diff --git a/dicom_standard/parse_lib.py b/dicom_standard/parse_lib.py index 689783d..08eb525 100644 --- a/dicom_standard/parse_lib.py +++ b/dicom_standard/parse_lib.py @@ -83,8 +83,8 @@ def clean_table_name(name: str) -> str: Table C.7-5b. Clinical Trial Series Module Attributes --> Clinical Trial Series ''' _, _, title = re.split('\u00a0', name) - # Standard workaround: Include upper case "S" at end of "IOD Modules" to catch typo in Table A.39.19-1 - # http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_A.35.19.3.html + # Standard workaround: Include upper case "S" at end of "IOD Modules" to catch typo in Table A.35.19-1 + # http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_A.35.19.3.html#table_A.35.19-1 # Standard workaround: Include optional "s" at end of "Functional Group" to catch Table A.32.9-2 # http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_A.32.9.3.4.html#table_A.32.9-2 possible_table_suffixes = r'(IOD Module[Ss])|(Module Attributes)|((Functional Group)? Macro Attributes)|(Module Table)|(Functional Groups? Macros)' diff --git a/tests/standard_workarounds_test.py b/tests/standard_workarounds_test.py new file mode 100644 index 0000000..f9994c7 --- /dev/null +++ b/tests/standard_workarounds_test.py @@ -0,0 +1,66 @@ +import pytest +import requests + +from dicom_standard.parse_lib import NONSTANDARD_SECTION_IDS, SHORT_DICOM_URL_PREFIX, parse_html_file +from dicom_standard.table_utils import get_table_rows_from_ids + + +@pytest.fixture(scope='module') +def part03(): + return parse_html_file('dicom_standard/standard/part03.html') + + +def get_table_title_from_id(standard, table_id): + table_id_element = standard.find('a', {'id': table_id}) + return table_id_element.find_next('p').text.strip() + + +def test_upper_case_S_in_table_a_39_19_1(part03): + table_title = get_table_title_from_id(part03, 'table_A.35.19-1') + assert table_title.endswith('S'), 'Table title no longer ends with upper case "S"' + + +def test_extra_s_in_table_a_32_9_2(part03): + table_title = get_table_title_from_id(part03, 'table_A.32.9-2') + assert 'Groups' in table_title, 'Table title no longer contains extra "s" at end of "Group"' + + +def test_missing_word_in_table_a_52_4_3_1(part03): + table_title = get_table_title_from_id(part03, 'table_A.52.4.3-1') + assert 'Image' not in table_title, 'Table title now contains full IOD name ("Ophthalmic Tomography Image")' + + +def test_extra_word_in_table_c_8_125(part03): + table_title = get_table_title_from_id(part03, 'table_C.8-125') + assert 'Sequence' in table_title, 'Table title no longer contains extra "Sequence"' + + +def test_extra_word_in_table_a_84_3_2_1(part03): + rows = get_table_rows_from_ids(part03, ['table_A.84.3.2-1'], col_titles=['macro_name']) + macro_name = 'Frame VOI LUT With LUT' + row = next(filter(lambda r: macro_name in r['macro_name'], rows)) + assert 'Macro' in row['macro_name'], 'Row no longer contains extra "Macro"' + + +def test_extra_hierarchy_marker_in_table_c_8_25_16_8(part03): + rows = get_table_rows_from_ids(part03, ['table_C.8.25.16-8'], col_titles=['attribute_name']) + attr_name_substr = 'Include Table' + row = next(filter(lambda r: attr_name_substr in r['attribute_name'], rows)) + assert '>>' in row['attribute_name'], 'Row no longer contains two hierarchy markers' + + +def test_sect_tid_1004_invalid_url(): + test_url = 'http://dicom.nema.org/medical/dicom/current/output/chtml/part16/sect_TID_1004.html#sect_TID_1004' + status_code = requests.get(test_url).status_code + assert status_code == 404, 'Section TID 1004 now has a URL format consistent with the other sections' + + +def test_nonstandard_sections_invalid_url(): + standard_sections = [] + for sect_id in NONSTANDARD_SECTION_IDS: + test_url = f'{SHORT_DICOM_URL_PREFIX}{sect_id}.2.html' + status_code = requests.get(test_url).status_code + if status_code != 404: + standard_sections.append(sect_id) + sections_str = ', '.join(standard_sections) + assert not standard_sections, f'Section(s) {sections_str} have at least one valid subsection URL'