From 464c919ea82aefdf35f138a1ab2dd0bb8fb7fd0e Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 24 Feb 2024 17:13:26 -0600 Subject: [PATCH] [ie/CloudflareStream] Improve embed detection (#9287) Partially addresses #7858 Authored by: bashonly --- yt_dlp/extractor/cloudflarestream.py | 32 ++++++++++++++++++---------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/cloudflarestream.py b/yt_dlp/extractor/cloudflarestream.py index 0c5f4fb40c..a812c24af8 100644 --- a/yt_dlp/extractor/cloudflarestream.py +++ b/yt_dlp/extractor/cloudflarestream.py @@ -4,27 +4,25 @@ from .common import InfoExtractor class CloudflareStreamIE(InfoExtractor): + _SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?' _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' - _EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE + _EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo=' _ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+' - _VALID_URL = r'''(?x) - https?:// - (?: - (?:[\w-]+\.)?%s/| - %s - ) - (?P%s) - ''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE) - _EMBED_REGEX = [fr']+\bsrc=(["\'])(?P(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1'] + _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P{_ID_RE})' + _EMBED_REGEX = [ + rf']+\bsrc=(["\'])(?P(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1', + rf']+\bsrc=["\'](?Phttps?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})', + ] _TESTS = [{ 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717', 'info_dict': { 'id': '31c9291ab41fac05471db4e73aa11717', 'ext': 'mp4', 'title': '31c9291ab41fac05471db4e73aa11717', + 'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg', }, 'params': { - 'skip_download': True, + 'skip_download': 'm3u8', }, }, { 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', @@ -39,6 +37,18 @@ class CloudflareStreamIE(InfoExtractor): 'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + 'url': 'https://upride.cc/incident/shoulder-pass-at-light/', + 'info_dict': { + 'id': 'eaef9dea5159cf968be84241b5cedfe7', + 'ext': 'mp4', + 'title': 'eaef9dea5159cf968be84241b5cedfe7', + 'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }] def _real_extract(self, url): video_id = self._match_id(url)