From 870efdee28860d7f6473c52bf7bb1bafb71aaeec Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Thu, 7 Apr 2022 16:19:36 +0900 Subject: [PATCH] [TVer] Fix extractor (#3268) Authored by: Lesmiscore --- yt_dlp/extractor/tver.py | 115 ++++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 49 deletions(-) diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py index 9ff3136e2..f23af1f14 100644 --- a/yt_dlp/extractor/tver.py +++ b/yt_dlp/extractor/tver.py @@ -1,77 +1,94 @@ # coding: utf-8 from __future__ import unicode_literals - from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, - int_or_none, - remove_start, smuggle_url, + str_or_none, traverse_obj, ) class TVerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?Pcorner|episode|feature|lp|tokyo2020/video)/(?P[fc]?\d+)' - # videos are only available for 7 days + _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?Plp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P[a-zA-Z0-9]+)' _TESTS = [{ - 'url': 'https://tver.jp/corner/f0062178', + 'skip': 'videos are only available for 7 days', + 'url': 'https://tver.jp/episodes/ephss8yveb', + 'info_dict': { + 'title': '#44 料理と値段と店主にびっくり オモてなしすぎウマい店 2時間SP', + 'description': 'md5:66985373a66fed8ad3cd595a3cfebb13', + }, + 'add_ie': ['BrightcoveNew'], + }, { + 'skip': 'videos are only available for 7 days', + 'url': 'https://tver.jp/lp/episodes/ep6f16g26p', + 'info_dict': { + # sorry but this is "correct" + 'title': '4月11日(月)23時06分 ~ 放送予定', + 'description': 'md5:4029cc5f4b1e8090dfc5b7bd2bc5cd0b', + }, + 'add_ie': ['BrightcoveNew'], + }, { + 'url': 'https://tver.jp/corner/f0103888', 'only_matching': True, }, { - 'url': 'https://tver.jp/feature/f0062413', - 'only_matching': True, - }, { - 'url': 'https://tver.jp/episode/79622438', - 'only_matching': True, - }, { - # subtitle = ' ' - 'url': 'https://tver.jp/corner/f0068870', - 'only_matching': True, - }, { - 'url': 'https://tver.jp/lp/f0009694', - 'only_matching': True, - }, { - 'url': 'https://tver.jp/lp/c0000239', - 'only_matching': True, - }, { - 'url': 'https://tver.jp/tokyo2020/video/6264525510001', + 'url': 'https://tver.jp/lp/f0033031', 'only_matching': True, }] - _TOKEN = None BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' + _PLATFORM_UID = None + _PLATFORM_TOKEN = None def _real_initialize(self): - self._TOKEN = self._download_json( - 'https://tver.jp/api/access_token.php', None)['token'] + create_response = self._download_json( + 'https://platform-api.tver.jp/v2/api/platform_users/browser/create', None, + note='Creating session', data=b'device_type=pc', headers={ + 'Origin': 'https://s.tver.jp', + 'Referer': 'https://s.tver.jp/', + 'Content-Type': 'application/x-www-form-urlencoded', + }) + self._PLATFORM_UID = traverse_obj(create_response, ('result', 'platform_uid')) + self._PLATFORM_TOKEN = traverse_obj(create_response, ('result', 'platform_token')) def _real_extract(self, url): - path, video_id = self._match_valid_url(url).groups() - if path == 'lp': - webpage = self._download_webpage(url, video_id) - redirect_path = self._search_regex(r'to_href="([^"]+)', webpage, 'redirect path') - path, video_id = self._match_valid_url(f'https://tver.jp{redirect_path}').groups() - api_response = self._download_json(f'https://api.tver.jp/v4/{path}/{video_id}', video_id, query={'token': self._TOKEN}) - p_id = traverse_obj(api_response, ('main', 'publisher_id')) - if not p_id: - error_msg, expected = traverse_obj(api_response, ('episode', 0, 'textbar', 0, ('text', 'longer')), get_all=False), True - if not error_msg: - error_msg, expected = 'Failed to extract publisher ID', False - raise ExtractorError(error_msg, expected=expected) - service = remove_start(traverse_obj(api_response, ('main', 'service')), 'ts_') + video_id, video_type = self._match_valid_url(url).group('id', 'type') + if video_type not in {'series', 'episodes'}: + webpage = self._download_webpage(url, video_id, note='Resolving to new URL') + video_id = self._match_id(self._search_regex( + (r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'), + webpage, 'url regex')) + video_info = self._download_json( + f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, + query={'v': '5'}, headers={ + 'Origin': 'https://tver.jp', + 'Referer': 'https://tver.jp/', + }) + p_id = video_info['video']['accountID'] + r_id = traverse_obj(video_info, ('video', ('videoRefID', 'videoID')), get_all=False) + if not r_id: + raise ExtractorError('Failed to extract reference ID for Brightcove') + if not r_id.isdigit(): + r_id = f'ref:{r_id}' - r_id = traverse_obj(api_response, ('main', 'reference_id')) - if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'): - r_id = 'ref:' + r_id - bc_url = smuggle_url( - self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), - {'geo_countries': ['JP']}) + additional_info = self._download_json( + f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]', + video_id, fatal=False, + query={ + 'platform_uid': self._PLATFORM_UID, + 'platform_token': self._PLATFORM_TOKEN, + }, headers={ + 'x-tver-platform-type': 'web' + }) return { '_type': 'url_transparent', - 'description': traverse_obj(api_response, ('main', 'note', 0, 'text'), expected_type=compat_str), - 'episode_number': int_or_none(traverse_obj(api_response, ('main', 'ext', 'episode_number'), expected_type=compat_str)), - 'url': bc_url, + 'title': str_or_none(video_info.get('title')), + 'description': str_or_none(video_info.get('description')), + 'url': smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}), + 'series': traverse_obj( + additional_info, ('result', ('episode', 'series'), 'content', ('seriesTitle', 'title')), + get_all=False), 'ie_key': 'BrightcoveNew', }