Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OCTO-10458-line_length #316

Merged
merged 2 commits into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions pycaption/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def __repr__(self):
f'{self.format_start()} --> {self.format_end()}\n{self.get_text()}'
)

def get_text(self):
def get_text_nodes(self):
"""
Get the text of the caption.
"""
Expand All @@ -224,7 +224,10 @@ def get_text_for_node(node):
return '\n'
return ''

text_nodes = [get_text_for_node(node) for node in self.nodes]
return [get_text_for_node(node) for node in self.nodes]

def get_text(self):
text_nodes = self.get_text_nodes()
return ''.join(text_nodes).strip()

def _format_timestamp(self, microseconds, msec_separator=None):
Expand Down
6 changes: 6 additions & 0 deletions pycaption/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,9 @@ class RelativizationError(Exception):

class InvalidInputError(RuntimeError):
"""Error raised when the input is invalid (i.e. a unicode string)"""


class CaptionLineLengthError(CaptionReadError):
"""
Error raised when a Caption has a line longer than 32 characters.
"""
26 changes: 18 additions & 8 deletions pycaption/scc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
BaseReader, BaseWriter, CaptionSet, CaptionNode,
)
from pycaption.exceptions import CaptionReadNoCaptions, InvalidInputError, \
CaptionReadTimingError
CaptionReadTimingError, CaptionLineLengthError
from .constants import (
HEADER, COMMANDS, SPECIAL_CHARS, EXTENDED_CHARS, CHARACTERS,
MICROSECONDS_PER_CODEWORD, CHARACTER_TO_CODE,
Expand Down Expand Up @@ -232,6 +232,22 @@ def read(self, content, lang='en-US', simulate_roll_up=False, offset=0):
captions = CaptionSet({lang: self.caption_stash.get_all()})

# check captions for incorrect lengths
lines = []
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this could be a separate method, the read method is already very crowded and it would be easier to read if instead of a new code block you would use a method call

for caption in self.caption_stash._collection:
caption_text = "".join(caption.to_real_caption().get_text_nodes())
lines.extend(caption_text.split("\n"))
lines_too_long = [line for line in lines if len(line) >= 32]

if bool(lines_too_long):
msg = ""
for line in lines_too_long:
msg += line + f" - Length { len(line)}" + "\n"
raise CaptionLineLengthError(
f"32 character limit for caption cue in scc file.\n"
f"Lines longer than 32:\n"
f"{msg}"
)

for cap in captions.get_captions(lang):
# if there's an end time on a caption and the difference is
# less than .05s kill it (this is likely caused by a standalone
Expand Down Expand Up @@ -526,13 +542,7 @@ def write(self, caption_set):
# Wrap lines at 32 chars
@staticmethod
def _layout_line(caption):
def caption_node_to_text(caption_node):
if caption_node.type_ == CaptionNode.TEXT:
return caption_node.content
elif caption_node.type_ == CaptionNode.BREAK:
return '\n'
caption_text = ''.join(
[caption_node_to_text(node) for node in caption.nodes])
caption_text = "".join(caption.get_text_nodes())
inner_lines = caption_text.split('\n')
inner_lines_laid_out = [textwrap.fill(x, 32) for x in inner_lines]
return '\n'.join(inner_lines_laid_out)
Expand Down
3 changes: 2 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@
sample_scc_with_ampersand_character, sample_scc_multiple_formats,
sample_scc_duplicate_tab_offset, sample_scc_duplicate_special_characters,
sample_scc_tab_offset, sample_scc_with_unknown_commands,
sample_scc_special_and_extended_characters
sample_scc_special_and_extended_characters,
sample_scc_with_line_too_long
)
from tests.fixtures.srt import ( # noqa: F401
sample_srt, sample_srt_ascii, sample_srt_numeric, sample_srt_empty,
Expand Down
19 changes: 18 additions & 1 deletion tests/fixtures/scc.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,4 +425,21 @@ def sample_scc_special_and_extended_characters():
00:20:19;12 1326 13a7 13a8 1329 132a 13ab 132c 13ad 13ae 132f 13b0 1331 1332

00:24:39;28 13b3 1334 13b5 13b6 1337 1338 13b9 13ba 133b 13bc 133d 133e 13bf
"""
"""


@pytest.fixture(scope="session")
def sample_scc_with_line_too_long():
return """\
Scenarist_SCC V1.0

00:00:00;03 942c

00:00:01;45 9420 91f4 cb45 4c4c d920 4ac1 cd45 d3ba 20c8 eff7 9254 f468 e520 7368 eff7 2073 f461 f2f4 e564 942c 8080 8080 942f

00:00:02;55 9420 91e0 9723 f761 7320 4361 ec20 c4e5 6ee9 73ef 6e2c 2061 20e6 f2e9 e56e 6480 9240 9723 efe6 20ef 75f2 732c 20f7 6173 2064 efe9 6e67 206d 7920 43c4 73ae 942c 8080 8080 942f

00:00:06;57 9420 94e0 c16e 6420 68e5 2073 61e9 642c 2049 20e3 616e 2064 ef20 6120 54d6 2073 68ef f7ae 942c 8080 8080 942f

00:00:08;58 9420 9452 4920 ea75 73f4 20f7 616e f4e5 6420 ef6e e520 7368 eff7 2c80 94f2 ea75 73f4 20f4 ef20 6861 76e5 2061 7320 6120 ece9 f4f4 ece5 942c 8080 8080 942f
"""
10 changes: 9 additions & 1 deletion tests/test_scc.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from pycaption import SCCReader, CaptionReadNoCaptions, CaptionNode
from pycaption.exceptions import CaptionReadTimingError
from pycaption.exceptions import CaptionReadTimingError, CaptionLineLengthError
from pycaption.geometry import (
UnitEnum, HorizontalAlignmentEnum, VerticalAlignmentEnum,
)
Expand Down Expand Up @@ -239,6 +239,14 @@ def test_flashing_cue(self, sample_scc_flashing_cue):
assert exc_info.value.args[0].startswith(
"Unsupported cue duration around 00:00:20.433")

def test_line_too_long(self, sample_scc_with_line_too_long):
with pytest.raises(CaptionLineLengthError) as exc_info:
SCCReader().read(sample_scc_with_line_too_long)

assert exc_info.value.args[0].startswith(
"32 character limit for caption cue in scc file.")
assert "And he said, I can do a TV show. - Length 32" in exc_info.value.args[0].split("\n")


class TestCoverageOnly:
"""In order to refactor safely, we need coverage of 95% or more.
Expand Down
Loading