Skip to content

Commit

Permalink
Enable tox testing and make flake8 compliant
Browse files Browse the repository at this point in the history
  • Loading branch information
ReeceStevens committed Jul 3, 2017
1 parent 1d5fa0a commit bcd5f67
Show file tree
Hide file tree
Showing 39 changed files with 245 additions and 137 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
'''
import sys

import parse_lib as pl
import parse_relations as pr
from table_utils import table_to_dict
from . import parse_lib as pl
from . import parse_relations as pr
from .table_utils import table_to_dict

COLUMN_TITLES = ['tag', 'name', 'keyword', 'valueRepresentation', 'valueMultiplicity', 'retired']
ATTR_TABLE_ID = 'table_6-1'


def get_attribute_table(standard):
all_tables = standard.find_all('div', class_='table')
html_table = pl.find_tdiv_by_id(all_tables, ATTR_TABLE_ID)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
import sys
import re

import parse_lib as pl
import parse_relations as pr
from table_utils import expand_spans, table_to_dict, stringify_table, tdiv_to_table_list
from . import parse_lib as pl
from . import parse_relations as pr
from .table_utils import expand_spans, table_to_dict, stringify_table, tdiv_to_table_list

CHAPTER_ID = 'chapter_A'
TABLE_SUFFIX = re.compile(".*IOD Modules$")
COLUMN_TITLES = ['informationEntity', 'module', 'reference_fragment', 'usage']


def get_ciod_tables(standard):
chapter_A_table_divs = pl.all_tdivs_in_chapter(standard, CHAPTER_ID)
ciod_table_divs = list(filter(is_valid_ciod_table, chapter_A_table_divs))
Expand Down Expand Up @@ -61,6 +62,7 @@ def get_ciod_description(tdiv):
except AttributeError:
return None


if __name__ == "__main__":
standard = pl.parse_html_file(sys.argv[1])
tables, tdivs = get_ciod_tables(standard)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@

from bs4 import BeautifulSoup, Tag

import parse_lib as pl
import parse_relations as pr
from table_utils import expand_spans, stringify_table, tdiv_to_table_list, TableListType
from macro_utils import get_id_from_link, MetadataTableType
from . import parse_lib as pl
from . import parse_relations as pr
from .table_utils import expand_spans, stringify_table, tdiv_to_table_list, TableListType
from .macro_utils import get_id_from_link, MetadataTableType

# Macros and modules require the same metadata and formatting,
# so they can share these two functions.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@
import sys
import re

import parse_lib as pl
import parse_relations as pr
from . import parse_lib as pl
from . import parse_relations as pr
from table_utils import expand_spans, table_to_dict, stringify_table, tdiv_to_table_list

CHAPTER_ID = 'chapter_C'
TABLE_SUFFIX = re.compile("(.*Module Attributes$)|(.*Module Table$)")
COLUMN_TITLES_WITH_TYPE = ['name', 'tag', 'type', 'description']
COLUMN_TITLES_NO_TYPE = ['name', 'tag', 'description']


def get_module_tables(standard):
chapter_C_table_divs = pl.all_tdivs_in_chapter(standard, CHAPTER_ID)
module_table_divs = list(filter(is_valid_module_table, chapter_C_table_divs))
Expand Down Expand Up @@ -64,6 +65,7 @@ def clean_table_description(description):
table_link.string = 'This module '
return description


if __name__ == '__main__':
standard = pl.parse_html_file(sys.argv[1])
tables, tdivs = get_module_tables(standard)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import sys
import re
import os
from bs4 import BeautifulSoup

from parse_lib import parse_html_file, write_pretty_json
from .parse_lib import parse_html_file, write_pretty_json

REFERENCED_IDS_RE = re.compile(r'(sect.*)|(figure.*)|(biblio.*)|(table.*)|(note.*)')

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

from bs4 import Tag

import parse_lib as pl
from . import parse_lib as pl


def get_hierarchy_markers(name: str) -> str:
clean_name = name.strip().replace('\n', '')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@

from bs4 import BeautifulSoup, Tag

import parse_lib as pl
from hierarchy_utils import get_hierarchy_markers
from .hierarchy_utils import get_hierarchy_markers

MetadataTableType = Dict[str, Any]
MacrosType = Dict[str, MetadataTableType]


def expand_macro_rows(table: Tag, macros: MacrosType) -> List[Dict[str, str]]:
# This variable is used to stop an infinite macro reference
# loop in the standard at the SR Document Content module.
Expand Down Expand Up @@ -62,7 +62,7 @@ def flatten_one_layer(nested_element_list: List[List[Any]]) -> List[Any]:
def referenced_macro_id_from_include_statement(macro_reference_html: str) -> str:
parsed_reference = BeautifulSoup(macro_reference_html, 'html.parser')
id_anchor = parsed_reference.find('a', class_='xref')
return id_anchor.get('href')[1:] # Remove the first '#' character
return id_anchor.get('href')[1:] # Remove the first '#' character


def get_macros_by_id(macro_id: str, macros: MacrosType, hierarchy_marker: str) -> MetadataTableType:
Expand Down
3 changes: 2 additions & 1 deletion dicom-standard/parse_lib.py → dicom_standard/parse_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@

from bs4 import BeautifulSoup, NavigableString, Tag

import parse_relations as pr
from . import parse_relations as pr

BASE_DICOM_URL = "http://dicom.nema.org/medical/dicom/current/output/html/"
BASE_SHORT_DICOM_SECTION_URL = "http://dicom.nema.org/medical/dicom/current/output/chtml/"
SHORT_DICOM_URL_PREFIX = "http://dicom.nema.org/medical/dicom/current/output/chtml/part03/"

allowed_attributes = ["href", "src", "type", "data", "colspan", "rowspan"]


def parse_html_file(filepath: str) -> BeautifulSoup:
with open(filepath, 'r') as html_file:
return BeautifulSoup(html_file, 'html.parser')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import List
from bs4 import Tag


def table_rows(table_div: Tag) -> List[Tag]:
return table_div.find('tbody').find_all('tr')

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@

from bs4 import BeautifulSoup

import parse_lib as pl
from . import parse_lib as pl

IGNORED_REFERENCES_RE = re.compile(r'(.*ftp.*)|(.*http.*)|(.*part05.*)|(.*chapter.*)|(.*PS3.*)|(.*DCM.*)|(.*glossentry.*)')
IGNORED_REFS_RE = re.compile(r'(.*ftp.*)|(.*http.*)|(.*part05.*)|(.*chapter.*)|(.*PS3.*)|(.*DCM.*)|(.*glossentry.*)')


def get_valid_reference_anchors(parsed_html):
anchor_tags = parsed_html.find_all('a', href=True)
return [a for a in anchor_tags if not re.match(IGNORED_REFERENCES_RE, a['href'])]
return [a for a in anchor_tags if not re.match(IGNORED_REFS_RE, a['href'])]


def record_references_inside_pairs(module_attr_pairs):
Expand All @@ -37,6 +37,7 @@ def record_reference_in_pair(pair):
def finalize_descriptions(pair):
pair['description'] = pl.clean_html(pair['description'])


def reference_structure_from_anchor(reference):
return {
"sourceUrl": reference.get('href'),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@

from bs4 import BeautifulSoup

import parse_lib as pl
from macro_utils import flatten_one_layer
from . import parse_lib as pl


def find_reference_html_in_sections(pairs, section_listing):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import sys

import parse_lib as pl
from . import parse_lib as pl


def update_sourceurls(module_attr_pairs, references):
for pair in module_attr_pairs:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@
'''
import sys

from bs4 import BeautifulSoup
from . import parse_lib as pl
from .macro_utils import expand_macro_rows
from .hierarchy_utils import record_hierarchy_for_module

import parse_lib as pl
from macro_utils import expand_macro_rows
from hierarchy_utils import record_hierarchy_for_module

def expand_all_macros(module_attr_tables, macros):
expanded_attribute_lists = [expand_macro_rows(table, macros)
Expand Down Expand Up @@ -46,7 +45,6 @@ def preprocess_attribute(attr):
return cleaned_attribute



def expand_hierarchy(tables):
return [record_hierarchy_for_module(table) for table in tables]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@
'''
import sys

import parse_lib as pl
from . import parse_lib as pl


def define_all_relationships(ciod_module_list):
all_relationships = []
for table in ciod_module_list:
ciod = table['name']
modules = table['modules']
all_relationships.extend([define_ciod_module_relationship(ciod, module) for module in modules])
all_relationships.extend([define_ciod_module_relationship(ciod, module)
for module in modules])
return all_relationships


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
'''
import sys

import parse_lib as pl
from . import parse_lib as pl


def ciods_from_extracted_list(ciod_module_list):
ciods = {}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import sys

import parse_lib as pl
from . import parse_lib as pl


def module_attr_relationship_table(module_attr_relationship_list):
entries = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
'''
import sys

import parse_lib as pl
from . import parse_lib as pl


def modules_from_tables(tables):
modules = {}
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
11 changes: 6 additions & 5 deletions dicom-standard/table_utils.py → dicom_standard/table_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
from copy import copy
from bs4 import Tag

import parse_relations as pr
from . import parse_relations as pr

TableListType = List[List[Tag]]


def table_to_dict(table: TableListType, row_names: List[str]) -> List[Dict[str, List[Tag]]]:
return [dict(zip(row_names, row)) for row in table]

Expand Down Expand Up @@ -37,7 +38,7 @@ def expand_rows(table: TableListType) -> TableListType:
communicated between each row (the rowspan information).
'''
extended_table = []
row_expansion = [] # Format: [(bs_html_object, row_index)]
row_expansion = [] # Format: [(bs_html_object, row_index)]
for row in table:
expanded_row, row_expansion = expand_rowspans(row, row_expansion)
extended_table.append(expanded_row)
Expand Down Expand Up @@ -71,7 +72,7 @@ def slide_down(start_idx: int, row: List[Tag], num_slides: int = 1) -> List[Tag]
'''
try:
sliding_rows = row[start_idx:len(row)]
new_row = row[0:len(row)-len(sliding_rows)]
new_row = row[0:len(row) - len(sliding_rows)]
for i in range(num_slides):
new_row.append(None)
new_row.extend(sliding_rows)
Expand Down Expand Up @@ -105,7 +106,7 @@ def is_new_rowspan_cell(cell: Tag, idx: int, row_expansion: List[Tuple[Tag, int]


def remove_completed_rowspans(row_expansion: List[Tuple[Tag, int]]) -> List[Tuple[Tag, int]]:
return [(cell,idx) for (cell, idx) in row_expansion
return [(cell, idx) for (cell, idx) in row_expansion
if has_rowspans_to_expand(cell)]


Expand All @@ -126,6 +127,6 @@ def expand_cell_colspan(cell: Tag) -> Tag:
if colspan_count is not None:
colspans = int(colspan_count)
cell['colspan'] = 1
for i in range(colspans-1):
for i in range(colspans - 1):
expanded_cell.append(None)
return expanded_cell
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
],

extras_require={
'dev': [],
'dev': ['check-manifest'],
'test': ['pytest'],
},

Expand Down
Loading

0 comments on commit bcd5f67

Please sign in to comment.