From eaedbfd97e860214399b0028fc47a487762e8294 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Tue, 11 Aug 2020 00:05:32 +0300 Subject: [PATCH] fix ytInitialData parsing --- youtube_dl/downloader/youtube_live_chat.py | 10 ++++++++-- youtube_dl/extractor/youtube.py | 3 ++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/youtube_live_chat.py b/youtube_dl/downloader/youtube_live_chat.py index f7478c3366..697e525500 100644 --- a/youtube_dl/downloader/youtube_live_chat.py +++ b/youtube_dl/downloader/youtube_live_chat.py @@ -28,8 +28,14 @@ class YoutubeLiveChatReplayFD(FragmentFD): return self._download_fragment(ctx, url, info_dict, headers) def parse_yt_initial_data(data): - raw_json = re.search(b'window\\["ytInitialData"\\]\\s*=\\s*(.*);', data).group(1) - return json.loads(raw_json) + window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?);' + var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?);' + for patt in window_patt, var_patt: + try: + raw_json = re.search(patt, data).group(1) + return json.loads(raw_json) + except AttributeError: + continue self._prepare_and_start_frag_download(ctx) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index d6c35fab4d..e143bbee7b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1495,7 +1495,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _get_yt_initial_data(self, video_id, webpage): config = self._search_regex( - r'window\["ytInitialData"\]\s*=\s*(.*);', + (r'window\["ytInitialData"\]\s*=\s*(.*);', + r'var\s+ytInitialData\s*=\s*(.*?);'), webpage, 'ytInitialData', default=None) if config: return self._parse_json(