From d4b99a233314bf31f9c842035ea9884673d5313a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 1 Jul 2024 15:55:18 -0500 Subject: [PATCH] [ie/vimeo] Support browser impersonation (#10327) Closes #10325 Authored by: bashonly --- yt_dlp/extractor/patreon.py | 3 ++- yt_dlp/extractor/vimeo.py | 34 +++++++++++++++++++++++----------- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 5dc46e3171..7d6e8439c6 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -316,7 +316,8 @@ class PatreonIE(PatreonBaseIE): r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '') if url_or_none(v_url) and self._request_webpage( - v_url, video_id, 'Checking Vimeo embed URL', headers=headers, fatal=False, errnote=False): + v_url, video_id, 'Checking Vimeo embed URL', headers=headers, + fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection entries.append(self.url_result( VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'), VimeoIE, url_transparent=True)) diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 7e79032f28..a4ab7e24a9 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -829,21 +829,33 @@ class VimeoIE(VimeoBaseInfoExtractor): url = 'https://vimeo.com/' + video_id self._try_album_password(url) + is_secure = urllib.parse.urlparse(url).scheme == 'https' try: # Retrieve video webpage to extract further information webpage, urlh = self._download_webpage_handle( - url, video_id, headers=headers) + url, video_id, headers=headers, impersonate=is_secure) redirect_url = urlh.url - except ExtractorError as ee: - if isinstance(ee.cause, HTTPError) and ee.cause.status == 403: - errmsg = ee.cause.response.read() - if b'Because of its privacy settings, this video cannot be played here' in errmsg: - raise ExtractorError( - 'Cannot download embed-only video without embedding ' - 'URL. Please call yt-dlp with the URL of the page ' - 'that embeds this video.', - expected=True) - raise + except ExtractorError as error: + if not isinstance(error.cause, HTTPError) or error.cause.status not in (403, 429): + raise + errmsg = error.cause.response.read() + if b'Because of its privacy settings, this video cannot be played here' in errmsg: + raise ExtractorError( + 'Cannot download embed-only video without embedding URL. Please call yt-dlp ' + 'with the URL of the page that embeds this video.', expected=True) + # 403 == vimeo.com TLS fingerprint or DC IP block; 429 == player.vimeo.com TLS FP block + status = error.cause.status + dcip_msg = 'If you are using a data center IP or VPN/proxy, your IP may be blocked' + if target := error.cause.response.extensions.get('impersonate'): + raise ExtractorError( + f'Got HTTP Error {status} when using impersonate target "{target}". {dcip_msg}') + elif not is_secure: + raise ExtractorError(f'Got HTTP Error {status}. {dcip_msg}', expected=True) + raise ExtractorError( + 'This request has been blocked due to its TLS fingerprint. Install a ' + 'required impersonation dependency if possible, or else if you are okay with ' + f'{self._downloader._format_err("compromising your security/cookies", "light red")}, ' + f'try replacing "https:" with "http:" in the input URL. {dcip_msg}.', expected=True) if '://player.vimeo.com/video/' in url: config = self._search_json(