From 457f6d68668704c20debc40ca77768796656d98b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 22 Oct 2021 23:13:06 +0530 Subject: [PATCH] [vlive:channel] Fix extraction Based on https://github.com/ytdl-org/youtube-dl/pull/29866 Closes #749, #927, https://github.com/ytdl-org/youtube-dl/issues/29837 Authored by kikuyan, pukkandan --- yt_dlp/extractor/vlive.py | 217 +++++++++++++++++--------------------- 1 file changed, 98 insertions(+), 119 deletions(-) diff --git a/yt_dlp/extractor/vlive.py b/yt_dlp/extractor/vlive.py index 84f51a544c..681d959027 100644 --- a/yt_dlp/extractor/vlive.py +++ b/yt_dlp/extractor/vlive.py @@ -17,17 +17,65 @@ from ..utils import ( strip_or_none, try_get, urlencode_postdata, + url_or_none, ) class VLiveBaseIE(NaverBaseIE): - _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b' + _NETRC_MACHINE = 'vlive' + _logged_in = False + + def _real_initialize(self): + if not self._logged_in: + VLiveBaseIE._logged_in = self._login() + + def _login(self): + email, password = self._get_login_info() + if email is None: + return False + + LOGIN_URL = 'https://www.vlive.tv/auth/email/login' + self._request_webpage( + LOGIN_URL, None, note='Downloading login cookies') + + self._download_webpage( + LOGIN_URL, None, note='Logging in', + data=urlencode_postdata({'email': email, 'pwd': password}), + headers={ + 'Referer': LOGIN_URL, + 'Content-Type': 'application/x-www-form-urlencoded' + }) + + login_info = self._download_json( + 'https://www.vlive.tv/auth/loginInfo', None, + note='Checking login status', + headers={'Referer': 'https://www.vlive.tv/home'}) + + if not try_get(login_info, lambda x: x['message']['login'], bool): + raise ExtractorError('Unable to log in', expected=True) + return True + + def _call_api(self, path_template, video_id, fields=None, query_add={}, note=None): + if note is None: + note = 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0] + query = {'appId': '8c6cc7b45d2568fb668be6e05b6e5a3b', 'gcc': 'KR', 'platformType': 'PC'} + if fields: + query['fields'] = fields + if query_add: + query.update(query_add) + try: + return self._download_json( + 'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id, + note, headers={'Referer': 'https://www.vlive.tv/'}, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message']) + raise class VLiveIE(VLiveBaseIE): IE_NAME = 'vlive' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P[0-9]+)' - _NETRC_MACHINE = 'vlive' _TESTS = [{ 'url': 'http://www.vlive.tv/video/1326', 'md5': 'cc7314812855ce56de70a06a27314983', @@ -81,53 +129,6 @@ class VLiveIE(VLiveBaseIE): 'playlist_mincount': 120 }] - def _real_initialize(self): - self._login() - - def _login(self): - email, password = self._get_login_info() - if None in (email, password): - return - - def is_logged_in(): - login_info = self._download_json( - 'https://www.vlive.tv/auth/loginInfo', None, - note='Downloading login info', - headers={'Referer': 'https://www.vlive.tv/home'}) - return try_get( - login_info, lambda x: x['message']['login'], bool) or False - - LOGIN_URL = 'https://www.vlive.tv/auth/email/login' - self._request_webpage( - LOGIN_URL, None, note='Downloading login cookies') - - self._download_webpage( - LOGIN_URL, None, note='Logging in', - data=urlencode_postdata({'email': email, 'pwd': password}), - headers={ - 'Referer': LOGIN_URL, - 'Content-Type': 'application/x-www-form-urlencoded' - }) - - if not is_logged_in(): - raise ExtractorError('Unable to log in', expected=True) - - def _call_api(self, path_template, video_id, fields=None, limit=None): - query = {'appId': self._APP_ID, 'gcc': 'KR', 'platformType': 'PC'} - if fields: - query['fields'] = fields - if limit: - query['limit'] = limit - try: - return self._download_json( - 'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id, - 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0], - headers={'Referer': 'https://www.vlive.tv/'}, query=query) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message']) - raise - def _real_extract(self, url): video_id = self._match_id(url) @@ -150,7 +151,7 @@ class VLiveIE(VLiveBaseIE): playlist_count = str_or_none(playlist.get('totalCount')) playlist = self._call_api( - 'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', limit=playlist_count) + 'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', {'limit': playlist_count}) entries = [] for video_data in playlist['data']: @@ -216,7 +217,7 @@ class VLiveIE(VLiveBaseIE): raise ExtractorError('Unknown status ' + status) -class VLivePostIE(VLiveIE): +class VLivePostIE(VLiveBaseIE): IE_NAME = 'vlive:post' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P\d-\d+)' _TESTS = [{ @@ -238,8 +239,6 @@ class VLivePostIE(VLiveIE): 'playlist_count': 1, }] _FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s' - _SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo' - _INKEY_TMPL = _FVIDEO_TMPL % 'inKey' def _real_extract(self, url): post_id = self._match_id(url) @@ -266,7 +265,7 @@ class VLivePostIE(VLiveIE): entry = None if upload_type == 'SOS': download = self._call_api( - self._SOS_TMPL, video_id)['videoUrl']['download'] + self._FVIDEO_TMPL % 'sosPlayInfo', video_id)['videoUrl']['download'] formats = [] for f_id, f_url in download.items(): formats.append({ @@ -284,7 +283,7 @@ class VLivePostIE(VLiveIE): vod_id = upload_info.get('videoId') if not vod_id: continue - inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey'] + inkey = self._call_api(self._FVIDEO_TMPL % 'inKey', video_id)['inKey'] entry = self._extract_video_info(video_id, vod_id, inkey) if entry: entry['title'] = '%s_part%s' % (title, idx) @@ -295,7 +294,7 @@ class VLivePostIE(VLiveIE): class VLiveChannelIE(VLiveBaseIE): IE_NAME = 'vlive:channel' - _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P[0-9A-Z]+)' + _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P[0-9A-Z]+)(?:/board/(?P\d+))?' _TESTS = [{ 'url': 'http://channels.vlive.tv/FCD4B', 'info_dict': { @@ -306,78 +305,58 @@ class VLiveChannelIE(VLiveBaseIE): }, { 'url': 'https://www.vlive.tv/channel/FCD4B', 'only_matching': True, + }, { + 'url': 'https://www.vlive.tv/channel/FCD4B/board/3546', + 'info_dict': { + 'id': 'FCD4B-3546', + 'title': 'MAMAMOO - Star Board', + }, + 'playlist_mincount': 880 }] - def _call_api(self, path, channel_key_suffix, channel_value, note, query): - q = { - 'app_id': self._APP_ID, - 'channel' + channel_key_suffix: channel_value, - } - q.update(query) - return self._download_json( - 'http://api.vfan.vlive.tv/vproxy/channelplus/' + path, - channel_value, note='Downloading ' + note, query=q)['result'] - - def _real_extract(self, url): - channel_code = self._match_id(url) - - channel_seq = self._call_api( - 'decodeChannelCode', 'Code', channel_code, - 'decode channel code', {})['channelSeq'] - - channel_name = None - entries = [] + def _entries(self, posts_id, board_name): + if board_name: + posts_path = 'post/v1.0/board-%s/posts' + query_add = {'limit': 100, 'sortType': 'LATEST'} + else: + posts_path = 'post/v1.0/channel-%s/starPosts' + query_add = {'limit': 100} for page_num in itertools.count(1): video_list = self._call_api( - 'getChannelVideoList', 'Seq', channel_seq, - 'channel list page #%d' % page_num, { - # Large values of maxNumOfRows (~300 or above) may cause - # empty responses (see [1]), e.g. this happens for [2] that - # has more than 300 videos. - # 1. https://github.com/ytdl-org/youtube-dl/issues/13830 - # 2. http://channels.vlive.tv/EDBF. - 'maxNumOfRows': 100, - 'pageNo': page_num - } - ) + posts_path, posts_id, 'channel{channelName},contentType,postId,title,url', query_add, + note=f'Downloading playlist page {page_num}') - if not channel_name: - channel_name = try_get( - video_list, - lambda x: x['channelInfo']['channelName'], - compat_str) - - videos = try_get( - video_list, lambda x: x['videoList'], list) - if not videos: - break - - for video in videos: - video_id = video.get('videoSeq') - video_type = video.get('videoType') - - if not video_id or not video_type: + for video in try_get(video_list, lambda x: x['data'], list) or []: + video_id = str(video.get('postId')) + video_title = str_or_none(video.get('title')) + video_url = url_or_none(video.get('url')) + if not all((video_id, video_title, video_url)) or video.get('contentType') != 'VIDEO': continue - video_id = compat_str(video_id) + channel_name = try_get(video, lambda x: x['channel']['channelName'], compat_str) + yield self.url_result(video_url, VLivePostIE.ie_key(), video_id, video_title, channel=channel_name) - if video_type in ('PLAYLIST'): - first_video_id = try_get( - video, - lambda x: x['videoPlaylist']['videoList'][0]['videoSeq'], int) + after = try_get(video_list, lambda x: x['paging']['nextParams']['after'], compat_str) + if not after: + break + query_add['after'] = after - if not first_video_id: - continue + def _real_extract(self, url): + channel_id, posts_id = self._match_valid_url(url).groups() - entries.append( - self.url_result( - 'http://www.vlive.tv/video/%s' % first_video_id, - ie=VLiveIE.ie_key(), video_id=first_video_id)) - else: - entries.append( - self.url_result( - 'http://www.vlive.tv/video/%s' % video_id, - ie=VLiveIE.ie_key(), video_id=video_id)) + board_name = None + if posts_id: + board = self._call_api( + 'board/v1.0/board-%s', posts_id, 'title,boardType') + board_name = board.get('title') or 'Unknown' + if board.get('boardType') not in ('STAR', 'VLIVE_PLUS'): + raise ExtractorError(f'Board {board_name!r} is not supported', expected=True) + + entries = self._entries(posts_id or channel_id, board_name) + first_video = next(entries) + channel_name = first_video['channel'] return self.playlist_result( - entries, channel_code, channel_name) + itertools.chain([first_video], entries), + f'{channel_id}-{posts_id}' if posts_id else channel_id, + f'{channel_name} - {board_name}' if channel_name and board_name else channel_name)