diff --git a/pycaption/base.py b/pycaption/base.py index 19c3fcd0..563d7f89 100644 --- a/pycaption/base.py +++ b/pycaption/base.py @@ -212,7 +212,7 @@ def __repr__(self): f'{self.format_start()} --> {self.format_end()}\n{self.get_text()}' ) - def get_text(self): + def get_text_nodes(self): """ Get the text of the caption. """ @@ -224,7 +224,10 @@ def get_text_for_node(node): return '\n' return '' - text_nodes = [get_text_for_node(node) for node in self.nodes] + return [get_text_for_node(node) for node in self.nodes] + + def get_text(self): + text_nodes = self.get_text_nodes() return ''.join(text_nodes).strip() def _format_timestamp(self, microseconds, msec_separator=None): diff --git a/pycaption/exceptions.py b/pycaption/exceptions.py index 661f8465..0474c05d 100644 --- a/pycaption/exceptions.py +++ b/pycaption/exceptions.py @@ -35,3 +35,9 @@ class RelativizationError(Exception): class InvalidInputError(RuntimeError): """Error raised when the input is invalid (i.e. a unicode string)""" + + +class CaptionLineLengthError(CaptionReadError): + """ + Error raised when a Caption has a line longer than 32 characters. + """ diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py index 39326f45..8d40f297 100644 --- a/pycaption/scc/__init__.py +++ b/pycaption/scc/__init__.py @@ -88,7 +88,7 @@ BaseReader, BaseWriter, CaptionSet, CaptionNode, ) from pycaption.exceptions import CaptionReadNoCaptions, InvalidInputError, \ - CaptionReadTimingError + CaptionReadTimingError, CaptionLineLengthError from .constants import ( HEADER, COMMANDS, SPECIAL_CHARS, EXTENDED_CHARS, CHARACTERS, MICROSECONDS_PER_CODEWORD, CHARACTER_TO_CODE, @@ -232,6 +232,22 @@ def read(self, content, lang='en-US', simulate_roll_up=False, offset=0): captions = CaptionSet({lang: self.caption_stash.get_all()}) # check captions for incorrect lengths + lines = [] + for caption in self.caption_stash._collection: + caption_text = "".join(caption.to_real_caption().get_text_nodes()) + lines.extend(caption_text.split("\n")) + lines_too_long = [line for line in lines if len(line) >= 32] + + if bool(lines_too_long): + msg = "" + for line in lines_too_long: + msg += line + f" - Length { len(line)}" + "\n" + raise CaptionLineLengthError( + f"32 character limit for caption cue in scc file.\n" + f"Lines longer than 32:\n" + f"{msg}" + ) + for cap in captions.get_captions(lang): # if there's an end time on a caption and the difference is # less than .05s kill it (this is likely caused by a standalone @@ -526,13 +542,7 @@ def write(self, caption_set): # Wrap lines at 32 chars @staticmethod def _layout_line(caption): - def caption_node_to_text(caption_node): - if caption_node.type_ == CaptionNode.TEXT: - return caption_node.content - elif caption_node.type_ == CaptionNode.BREAK: - return '\n' - caption_text = ''.join( - [caption_node_to_text(node) for node in caption.nodes]) + caption_text = "".join(caption.get_text_nodes()) inner_lines = caption_text.split('\n') inner_lines_laid_out = [textwrap.fill(x, 32) for x in inner_lines] return '\n'.join(inner_lines_laid_out) diff --git a/tests/conftest.py b/tests/conftest.py index 55b785fb..2e361fb8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -60,7 +60,8 @@ sample_scc_with_ampersand_character, sample_scc_multiple_formats, sample_scc_duplicate_tab_offset, sample_scc_duplicate_special_characters, sample_scc_tab_offset, sample_scc_with_unknown_commands, - sample_scc_special_and_extended_characters + sample_scc_special_and_extended_characters, + sample_scc_with_line_too_long ) from tests.fixtures.srt import ( # noqa: F401 sample_srt, sample_srt_ascii, sample_srt_numeric, sample_srt_empty, diff --git a/tests/fixtures/scc.py b/tests/fixtures/scc.py index 9744c6c0..8cd650ae 100644 --- a/tests/fixtures/scc.py +++ b/tests/fixtures/scc.py @@ -425,4 +425,21 @@ def sample_scc_special_and_extended_characters(): 00:20:19;12 1326 13a7 13a8 1329 132a 13ab 132c 13ad 13ae 132f 13b0 1331 1332 00:24:39;28 13b3 1334 13b5 13b6 1337 1338 13b9 13ba 133b 13bc 133d 133e 13bf -""" \ No newline at end of file +""" + + +@pytest.fixture(scope="session") +def sample_scc_with_line_too_long(): + return """\ +Scenarist_SCC V1.0 + +00:00:00;03 942c + +00:00:01;45 9420 91f4 cb45 4c4c d920 4ac1 cd45 d3ba 20c8 eff7 9254 f468 e520 7368 eff7 2073 f461 f2f4 e564 942c 8080 8080 942f + +00:00:02;55 9420 91e0 9723 f761 7320 4361 ec20 c4e5 6ee9 73ef 6e2c 2061 20e6 f2e9 e56e 6480 9240 9723 efe6 20ef 75f2 732c 20f7 6173 2064 efe9 6e67 206d 7920 43c4 73ae 942c 8080 8080 942f + +00:00:06;57 9420 94e0 c16e 6420 68e5 2073 61e9 642c 2049 20e3 616e 2064 ef20 6120 54d6 2073 68ef f7ae 942c 8080 8080 942f + +00:00:08;58 9420 9452 4920 ea75 73f4 20f7 616e f4e5 6420 ef6e e520 7368 eff7 2c80 94f2 ea75 73f4 20f4 ef20 6861 76e5 2061 7320 6120 ece9 f4f4 ece5 942c 8080 8080 942f +""" diff --git a/tests/test_scc.py b/tests/test_scc.py index ec3133aa..f573bb45 100644 --- a/tests/test_scc.py +++ b/tests/test_scc.py @@ -1,7 +1,7 @@ import pytest from pycaption import SCCReader, CaptionReadNoCaptions, CaptionNode -from pycaption.exceptions import CaptionReadTimingError +from pycaption.exceptions import CaptionReadTimingError, CaptionLineLengthError from pycaption.geometry import ( UnitEnum, HorizontalAlignmentEnum, VerticalAlignmentEnum, ) @@ -239,6 +239,14 @@ def test_flashing_cue(self, sample_scc_flashing_cue): assert exc_info.value.args[0].startswith( "Unsupported cue duration around 00:00:20.433") + def test_line_too_long(self, sample_scc_with_line_too_long): + with pytest.raises(CaptionLineLengthError) as exc_info: + SCCReader().read(sample_scc_with_line_too_long) + + assert exc_info.value.args[0].startswith( + "32 character limit for caption cue in scc file.") + assert "And he said, I can do a TV show. - Length 32" in exc_info.value.args[0].split("\n") + class TestCoverageOnly: """In order to refactor safely, we need coverage of 95% or more.