Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for parsing HLS WebVTT X-TIMESTAMP-MAP tag #25

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions tests/subtitles/sample_hls.vtt
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
WEBVTT
X-TIMESTAMP-MAP=LOCAL:1086:59:52.424,MPEGTS:183000

1088:42:00.500 --> 1088:42:07.000
Caption text #1

1088:42:07.000 --> 1088:42:11.890
Caption text #2

1088:42:11.890 --> 1088:42:16.320
Caption text #3

1088:42:16.320 --> 1088:42:21.580
Caption text #4

1088:42:21.580 --> 1088:42:23.880
Caption text #5

1088:42:23.880 --> 1088:42:27.280
Caption text #6

1088:42:27.280 --> 1088:42:30.280
Caption text #7

1088:42:30.280 --> 1088:42:36.510
Caption text #8

1088:42:36.510 --> 1088:42:38.870
Caption text #9

1088:42:38.870 --> 1088:42:45.000
Caption text #10

1088:42:45.000 --> 1088:42:47.000
Caption text #11

1088:42:47.000 --> 1088:42:50.970
Caption text #12

1088:42:50.970 --> 1088:42:54.440
Caption text #13

1088:42:54.440 --> 1088:42:58.600
Caption text #14

1088:42:58.600 --> 1088:43:01.350
Caption text #15

1088:43:01.350 --> 1088:43:04.300
Caption text #16
37 changes: 36 additions & 1 deletion tests/webvtt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
import io
from shutil import rmtree, copy

from webvtt.webvtt import HlsWebVTT
import webvtt
from webvtt.structures import Caption, Style
from webvtt.structures import Caption, Style, TimestampMap
from .generic import GenericParserTestCase
from webvtt.errors import MalformedFileError

Expand Down Expand Up @@ -244,6 +245,40 @@ def test_read_malformed_buffer(self):
with self.assertRaises(MalformedFileError):
webvtt.read_buffer(buffer)

def test_timestamp_map(self):
tag_one = 'X-TIMESTAMP-MAP=LOCAL:1086:59:52.424,MPEGTS:183000'
tag_two = 'X-TIMESTAMP-MAP=MPEGTS:183000,LOCAL:1086:59:52.424'
timestamp_map_one = TimestampMap(tag_one)
timestamp_map_two = TimestampMap(tag_two)

self.assertEqual(
timestamp_map_one.local,
'1086:59:52.424'
)
self.assertEqual(
timestamp_map_two.local,
'1086:59:52.424'
)
self.assertEqual(
timestamp_map_one.mpegts,
'183000'
)
self.assertEqual(
timestamp_map_two.mpegts,
'183000'
)

def test_read_hls_webvtt(self):
vtt = HlsWebVTT.read(self._get_file('sample_hls.vtt'))
self.assertIsInstance(vtt.captions, list)
self.assertIsInstance(vtt.timestamp_map, TimestampMap)
self.assertTrue(vtt.timestamp_map.local)
self.assertTrue(vtt.timestamp_map.mpegts)

def test_read_non_hls_webvtt_with_hls_webvtt_parser(self):
vtt = HlsWebVTT.read(self._get_file('sample.vtt'))
self.assertIsInstance(vtt.captions, list)
self.assertIsNone(vtt.timestamp_map)

def test_captions(self):
vtt = webvtt.read(self._get_file('sample.vtt'))
Expand Down
4 changes: 4 additions & 0 deletions webvtt/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,7 @@ class InvalidCaptionsError(Exception):

class MissingFilenameError(Exception):
"""Error raised when saving a file without filename."""

class HlsTimeMapError(Exception):
"""Error raised when an Hls Timestamp map is malformed"""

14 changes: 13 additions & 1 deletion webvtt/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import codecs

from .errors import MalformedFileError, MalformedCaptionError
from .structures import Block, Style, Caption
from .structures import Block, Style, Caption, TimestampMap


class TextBasedParser(object):
Expand Down Expand Up @@ -249,6 +249,18 @@ def _is_style_block(self, block):
"""Returns True if it is a style block"""
return re.match(self.STYLE_PATTERN, block.lines[0])

class HlsWebVTTParser(WebVTTParser):
"""
HLS WebVTT parser. Support X-TIMESTAMP-MAP tag
"""
def _parse(self, lines):
try:
self.timestamp_map = TimestampMap(
next(line for line in lines if line.upper().startswith('X-TIMESTAMP-MAP'))
)
except StopIteration:
self.timestamp_map = None
super(HlsWebVTTParser, self)._parse(lines)

class SBVParser(TextBasedParser):
"""
Expand Down
51 changes: 50 additions & 1 deletion webvtt/structures.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,60 @@
import re

from .errors import MalformedCaptionError
from .errors import MalformedCaptionError, HlsTimeMapError

TIMESTAMP_PATTERN = re.compile('(\d+)?:?(\d{2}):(\d{2})[.,](\d{3})')

TIMESTAMP_MAP_LOCAL_PATTERN = re.compile(
'X-TIMESTAMP-MAP\=.*?LOCAL\:(?P<local>(\d+)?:?(\d{2}):(\d{2})[.,](\d{3}))',
re.IGNORECASE
)

TIMESTAMP_MAP_MPEGTS_PATTERN = re.compile(
'X-TIMESTAMP-MAP\=.*?MPEGTS\:(?P<mpegts>(\d+))',
re.IGNORECASE
)
__all__ = ['Caption']

class TimestampMap(object):
def __init__(self, raw_timestamp_map):
if 'X-TIMESTAMP-MAP' not in raw_timestamp_map:
raise HlsTimeMapError('X-TIMESTAMP-MAP tag not found')
self._raw = raw_timestamp_map

def __str__(self):
return 'X-TIMESTAMP-MAP=LOCAL:%s,MPEGTS:%s'%(self.local, self.mpegts)
@property
def local(self):
regex = re.search(
TIMESTAMP_MAP_LOCAL_PATTERN,
self._raw,
)
if regex is None:
return None
else:
try:
return regex.group('local')
except (AttributeError,IndexError) as e:
return None
except Exception as e:
raise HlsTimeMapError('could not parse LOCAL') from e

@property
def mpegts(self):
regex = re.search(
TIMESTAMP_MAP_MPEGTS_PATTERN,
self._raw,
)
if regex is None:
return None
else:
try:
return regex.group('mpegts')
except (AttributeError,IndexError) as e:
return None
except Exception as e:
raise HlsTimeMapError('could not parse MPEGTS') from e


class Caption(object):

Expand Down
34 changes: 33 additions & 1 deletion webvtt/webvtt.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os

from .parsers import WebVTTParser, SRTParser, SBVParser
from .parsers import WebVTTParser, HlsWebVTTParser, SRTParser, SBVParser
from .writers import WebVTTWriter, SRTWriter
from .errors import MissingFilenameError

Expand Down Expand Up @@ -131,3 +131,35 @@ def total_length(self):
@property
def styles(self):
return self._styles

class HlsWebVTT(WebVTT):
def __init__(self, file='', captions=None, styles=None, timestamp_map=None):
self._timestamp_map = timestamp_map
super(HlsWebVTT, self).__init__(file,captions, styles)

@property
def timestamp_map(self):
return self._timestamp_map

@classmethod
def read(cls, file):
"""Reads a WebVTT captions file."""
parser = HlsWebVTTParser().read(file)
return cls(file=file, captions=parser.captions, styles=parser.styles, timestamp_map=parser.timestamp_map)

@classmethod
def read_buffer(cls, buffer):
"""Reads a WebVTT captions from a file-like object.
Such file-like object may be the return of an io.open call,
io.StringIO object, tempfile.TemporaryFile object, etc."""
parser = HlsWebVTTParser().read_from_buffer(buffer)
return cls(captions=parser.captions, styles=parser.styles, timestamp_map=parser.timestamp_map)

def write(self, f, format='vtt'):
if format == 'vtt':
WebVTTWriter(
header_lines=['WEBVTT',str(self.timestamp_map)]
).write(self._captions, f)
elif format == 'srt':
SRTWriter().write(self._captions, f)

6 changes: 5 additions & 1 deletion webvtt/writers.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@

class WebVTTWriter(object):
def __init__(self, header_lines=['WEBVTT']):
self._header_lines = header_lines

def write(self, captions, f):
f.write('WEBVTT\n')
for line in self._header_lines:
f.write(line)
f.write('\n')
for c in captions:
if c.identifier:
f.write('\n' + c.identifier)
Expand Down