[globo] review fixes

push video_view as a class constant, use traverse_obj where possible and replace the subs metodology to use subs_list_to_dict, etc.
This commit is contained in:
slipinthedove 2024-12-13 00:50:38 -03:00
parent 27a4a3cee7
commit 93e715c81e

View File

@ -6,13 +6,26 @@ from ..utils import (
float_or_none, float_or_none,
orderedSet, orderedSet,
str_or_none, str_or_none,
try_get,
) )
from ..utils.traversal import subs_list_to_dict, traverse_obj
class GloboIE(InfoExtractor): class GloboIE(InfoExtractor):
_VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})' _VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/))(?P<id>\d{7,})'
_NETRC_MACHINE = 'globo' _NETRC_MACHINE = 'globo'
_VIDEO_VIEW = '''
query getVideoView($videoId: ID!) {
video(id: $videoId) {
duration
description
headline
title {
originProgramId
headline
}
}
}
'''
_TESTS = [{ _TESTS = [{
'url': 'https://globoplay.globo.com/v/3607726/', 'url': 'https://globoplay.globo.com/v/3607726/',
'info_dict': { 'info_dict': {
@ -47,30 +60,22 @@ class GloboIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_view = ''' info = self._download_json(
query getVideoView($videoId: ID!) { 'https://cloud-jarvis.globo.com/graphql', video_id,
video(id: $videoId) { query={'operationName': 'getVideoView',
duration 'variables': f'{{"videoId":{video_id}}}',
description 'query': self._VIDEO_VIEW},
headline headers={'content-type': 'application/json',
title { 'x-platform-id': 'web',
originProgramId 'x-device-id': 'desktop',
headline 'x-client-version': '2024.12-5'})['data']['video']
}
}
}
'''
video = self._download_json(
f'https://cloud-jarvis.globo.com/graphql?operationName=getVideoView&variables=%7B"videoId":"{video_id}"%7D&query={video_view}', video_id,
headers={'content-type': 'application/json', 'x-platform-id': 'web', 'x-device-id': 'desktop', 'x-client-version': '2024.12-5'})['data']['video']
title = video['headline']
uploader = video['title'].get('headline')
uploader_id = str_or_none(video['title'].get('originProgramId'))
formats = [] formats = []
security = self._download_json( video = self._download_json(
'https://playback.video.globo.com/v4/video-session', video_id, f'Downloading resource info for {video_id}', 'https://playback.video.globo.com/v4/video-session', video_id,
headers={'Content-Type': 'application/json'}, data=json.dumps({ f'Downloading resource info for {video_id}',
headers={'Content-Type': 'application/json'},
data=json.dumps({
'player_type': 'desktop', 'player_type': 'desktop',
'video_id': video_id, 'video_id': video_id,
'quality': 'max', 'quality': 'max',
@ -79,33 +84,27 @@ class GloboIE(InfoExtractor):
'tz': '-03:00', 'tz': '-03:00',
'version': 1, 'version': 1,
}).encode()) }).encode())
if traverse_obj(video, ('resource', 'drm_protection_enabled', {bool})): if traverse_obj(video, ('resource', 'drm_protection_enabled', {bool})):
self.report_drm(video_id) self.report_drm(video_id)
main_resource = security['sources'][0] main_source = video['sources'][0]
resource_url = main_resource['url'] resource_url = main_source['url']
fmts, subtitles = self._extract_m3u8_formats_and_subtitles( fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
resource_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) resource_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
formats.extend(fmts) formats.extend(fmts)
subs = try_get(security, lambda x: x['sources'][0]['text']) or {}
for sub in subs.items():
if sub['subtitle']:
subtitles.setdefault(sub or 'por', []).append({
'url': sub['subtitle']['srt'].get('url'),
})
duration = float_or_none(video.get('duration'), 1000)
return { return {
'id': video_id, 'id': video_id,
'title': title, **traverse_obj(info, {
'duration': duration, 'title': ('headline', {str}),
'uploader': uploader, 'duration': ('duration', {float_or_none(scale=1000)}),
'uploader_id': uploader_id, 'uploader': ('title', 'headline', {str_or_none}),
'uploader_id': ('title', 'originProgramId', {str_or_none}),
}),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subs_list_to_dict(traverse_obj(main_source, ('text', {dict.items}))),
} }