From 0685d9727b9657fc8a31c96cb52c4155de29fcfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 9 Jul 2018 23:43:05 +0700 Subject: [PATCH] [utils] Share JSON-LD regex --- youtube_dl/extractor/common.py | 4 ++-- youtube_dl/utils.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 78f053f182..5d4db54d52 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -52,6 +52,7 @@ from ..utils import ( GeoUtils, int_or_none, js_to_json, + JSON_LD_RE, mimetype2ext, orderedSet, parse_codecs, @@ -1149,8 +1150,7 @@ class InfoExtractor(object): def _search_json_ld(self, html, video_id, expected_type=None, **kwargs): json_ld = self._search_regex( - r'(?s)]+type=(["\'])application/ld\+json\1[^>]*>(?P.+?)', - html, 'JSON-LD', group='json_ld', **kwargs) + JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs) default = kwargs.get('default', NO_DEFAULT) if not json_ld: return default if default is not NO_DEFAULT else {} diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 6a3199fb99..8c45166d76 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -184,6 +184,7 @@ DATE_FORMATS_MONTH_FIRST.extend([ ]) PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" +JSON_LD_RE = r'(?is)]+type=(["\'])application/ld\+json\1[^>]*>(?P.+?)' def preferredencoding():