From 3cfd000849208b58dab4f78d1486d3f24552009e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= Date: Sun, 22 Nov 2015 13:14:35 +0100 Subject: [PATCH 1/3] [youtube] More explicit player config JSON extraction (fixes #7468) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 687e0b4dbc..21731188af 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1074,7 +1074,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): age_gate = False video_info = None # Try looking directly into the video webpage - mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage) + mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});ytplayer', video_webpage) if mobj: json_code = uppercase_escape(mobj.group(1)) ytplayer_config = json.loads(json_code) From 0e49d9a6b0216555c2a3ee063ae3d1c6d09edbd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= Date: Sun, 22 Nov 2015 13:49:33 +0100 Subject: [PATCH 2/3] [youtube] Fall back to the original regex for ytplayer.config --- youtube_dl/extractor/youtube.py | 39 ++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 21731188af..7e74d23684 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -674,7 +674,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor): { 'url': 'http://vid.plus/FlRa-iH7PGw', 'only_matching': True, - } + }, + { + # Title with JS-like syntax "};" + 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg', + 'info_dict': { + 'id': 'lsguqyKfVQg', + 'ext': 'mp4', + 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21', + 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a', + 'upload_date': '20151119', + 'uploader_id': 'IronSoulElf', + 'uploader': 'IronSoulElf', + }, + 'params': { + 'skip_download': True, + }, + }, ] def __init__(self, *args, **kwargs): @@ -858,16 +874,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return {} return sub_lang_list + def _get_ytplayer_config(self, webpage): + patterns = [ + r';ytplayer\.config\s*=\s*({.*?});ytplayer', + r';ytplayer\.config\s*=\s*({.*?});', + ] + for pattern in patterns: + config = self._search_regex(pattern, webpage, 'ytconfig.player', default=None) + if config is not None: + return json.loads(uppercase_escape(config)) + def _get_automatic_captions(self, video_id, webpage): """We need the webpage for getting the captions url, pass it as an argument to speed up the process.""" self.to_screen('%s: Looking for automatic captions' % video_id) - mobj = re.search(r';ytplayer.config = ({.*?});', webpage) + player_config = self._get_ytplayer_config(webpage) err_msg = 'Couldn\'t find automatic captions for %s' % video_id - if mobj is None: + if player_config is None: self._downloader.report_warning(err_msg) return {} - player_config = json.loads(mobj.group(1)) try: args = player_config['args'] caption_url = args['ttsurl'] @@ -1074,10 +1099,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): age_gate = False video_info = None # Try looking directly into the video webpage - mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});ytplayer', video_webpage) - if mobj: - json_code = uppercase_escape(mobj.group(1)) - ytplayer_config = json.loads(json_code) + ytplayer_config = self._get_ytplayer_config(video_webpage) + if ytplayer_config is not None: args = ytplayer_config['args'] if args.get('url_encoded_fmt_stream_map'): # Convert to the same format returned by compat_parse_qs From b41631c4e6e56afb2427513c84df1b13681cf4c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= Date: Sun, 22 Nov 2015 13:53:26 +0100 Subject: [PATCH 3/3] [youtube] Send the list of patterns directly to _search_regex --- youtube_dl/extractor/youtube.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7e74d23684..247769067c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -879,10 +879,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r';ytplayer\.config\s*=\s*({.*?});ytplayer', r';ytplayer\.config\s*=\s*({.*?});', ] - for pattern in patterns: - config = self._search_regex(pattern, webpage, 'ytconfig.player', default=None) - if config is not None: - return json.loads(uppercase_escape(config)) + config = self._search_regex(patterns, webpage, 'ytconfig.player', default=None) + if config is not None: + return json.loads(uppercase_escape(config)) def _get_automatic_captions(self, video_id, webpage): """We need the webpage for getting the captions url, pass it as an