From 68898ac5cd96a130dc41b152ac0b29fccf7c1052 Mon Sep 17 00:00:00 2001 From: fireattack Date: Mon, 29 Apr 2024 17:25:03 +0800 Subject: [PATCH 1/4] [ie/bilibili] apply geo-verification-proxy to webpage download --- yt_dlp/extractor/bilibili.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index fee4b2994..8fffc6a07 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -493,7 +493,8 @@ class BiliBiliIE(BilibiliBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage, urlh = self._download_webpage_handle(url, video_id) + headers = self.geo_verification_headers() + webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers) if not self._match_valid_url(urlh.url): return self.url_result(urlh.url) @@ -666,14 +667,14 @@ class BiliBiliBangumiIE(BilibiliBaseIE): def _real_extract(self, url): episode_id = self._match_id(url) - webpage = self._download_webpage(url, episode_id) + headers = {'Referer': url, **self.geo_verification_headers()} + webpage = self._download_webpage(url, episode_id, headers=headers) if '您所在的地区无法观看本片' in webpage: raise GeoRestrictedError('This video is restricted') elif '正在观看预览,大会员免费看全片' in webpage: self.raise_login_required('This video is for premium members only') - headers = {'Referer': url, **self.geo_verification_headers()} play_info = self._download_json( 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id, 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id}, From 295ee673ffd3bd24bb05c089ae673c452b700973 Mon Sep 17 00:00:00 2001 From: fireattack Date: Mon, 29 Apr 2024 21:42:41 +0800 Subject: [PATCH 2/4] Update yt_dlp/extractor/bilibili.py Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/bilibili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 8fffc6a07..c72588bd3 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -667,7 +667,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE): def _real_extract(self, url): episode_id = self._match_id(url) - headers = {'Referer': url, **self.geo_verification_headers()} + headers = self.geo_verification_headers() webpage = self._download_webpage(url, episode_id, headers=headers) if '您所在的地区无法观看本片' in webpage: From 7629901444a1954053a24f223d1083f7dc78893b Mon Sep 17 00:00:00 2001 From: fireattack Date: Mon, 29 Apr 2024 21:42:47 +0800 Subject: [PATCH 3/4] Update yt_dlp/extractor/bilibili.py Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/bilibili.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index c72588bd3..12bf4213c 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -675,6 +675,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE): elif '正在观看预览,大会员免费看全片' in webpage: self.raise_login_required('This video is for premium members only') + headers['Referer'] = url play_info = self._download_json( 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id, 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id}, From 717a3b8a09b889cbc43eeaf80e405140d4ebfd22 Mon Sep 17 00:00:00 2001 From: fireattack Date: Mon, 29 Apr 2024 22:26:28 +0800 Subject: [PATCH 4/4] make sure all requests are using the headers --- yt_dlp/extractor/bilibili.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 12bf4213c..0dd4ed78b 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -93,11 +93,11 @@ class BilibiliBaseIE(InfoExtractor): return formats - def _download_playinfo(self, video_id, cid): + def _download_playinfo(self, video_id, cid, headers=None): return self._download_json( 'https://api.bilibili.com/x/player/playurl', video_id, query={'bvid': video_id, 'cid': cid, 'fnval': 4048}, - note=f'Downloading video formats for cid {cid}')['data'] + note=f'Downloading video formats for cid {cid}', headers=headers)['data'] def json2srt(self, json_data): srt_data = '' @@ -532,7 +532,7 @@ class BiliBiliIE(BilibiliBaseIE): self._download_json( 'https://api.bilibili.com/x/player/pagelist', video_id, fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'}, - note='Extracting videos in anthology'), + note='Extracting videos in anthology', headers=headers), 'data', expected_type=list) or [] is_anthology = len(page_list_json) > 1 @@ -553,7 +553,7 @@ class BiliBiliIE(BilibiliBaseIE): festival_info = {} if is_festival: - play_info = self._download_playinfo(video_id, cid) + play_info = self._download_playinfo(video_id, cid, headers=headers) festival_info = traverse_obj(initial_state, { 'uploader': ('videoInfo', 'upName'),