From 81a136b80f3d29c73884bb116f869df44bfd6fa1 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 8 Sep 2021 16:10:10 +0530 Subject: [PATCH] [WebVTT] Adjust parser to accommodate PBS subtitles (#922) Closes #921 --- yt_dlp/webvtt.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index eee2a4a2d..cd936e7e5 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -89,8 +89,12 @@ class ParseError(Exception): )) +# While the specification +# prescribes that hours must be *2 or more* digits, timestamps with a single +# digit for the hour part has been seen in the wild. +# See https://github.com/yt-dlp/yt-dlp/issues/921 _REGEX_TS = re.compile(r'''(?x) - (?:([0-9]{2,}):)? + (?:([0-9]{1,}):)? ([0-9]{2}): ([0-9]{2})\. ([0-9]{3})? @@ -172,6 +176,7 @@ class Magic(HeaderBlock): _REGEX_TSMAP = re.compile(r'X-TIMESTAMP-MAP=') _REGEX_TSMAP_LOCAL = re.compile(r'LOCAL:') _REGEX_TSMAP_MPEGTS = re.compile(r'MPEGTS:([0-9]+)') + _REGEX_TSMAP_SEP = re.compile(r'[ \t]*,[ \t]*') @classmethod def __parse_tsmap(cls, parser): @@ -194,7 +199,7 @@ class Magic(HeaderBlock): raise ParseError(parser) else: raise ParseError(parser) - if parser.consume(','): + if parser.consume(cls._REGEX_TSMAP_SEP): continue if parser.consume(_REGEX_NL): break