[extractor/youtube:tab] Extract metadata from channel items (#5569)

Authored by: coletdjnz
This commit is contained in:
Matthew 2022-12-12 23:08:14 +00:00 committed by GitHub
parent 81388c0954
commit c733555106
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -4382,6 +4382,25 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
elif key.startswith('grid') and key.endswith('Renderer'): elif key.startswith('grid') and key.endswith('Renderer'):
return renderer return renderer
def _extract_channel_renderer(self, renderer):
channel_id = renderer['channelId']
title = self._get_text(renderer, 'title')
channel_url = f'https://www.youtube.com/channel/{channel_id}'
return {
'_type': 'url',
'url': channel_url,
'id': channel_id,
'ie_key': YoutubeTabIE.ie_key(),
'channel': title,
'channel_id': channel_id,
'channel_url': channel_url,
'title': title,
'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
'playlist_count': self._get_count(renderer, 'videoCountText'),
'description': self._get_text(renderer, 'descriptionSnippet'),
}
def _grid_entries(self, grid_renderer): def _grid_entries(self, grid_renderer):
for item in grid_renderer['items']: for item in grid_renderer['items']:
if not isinstance(item, dict): if not isinstance(item, dict):
@ -4407,9 +4426,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
# channel # channel
channel_id = renderer.get('channelId') channel_id = renderer.get('channelId')
if channel_id: if channel_id:
yield self.url_result( yield self._extract_channel_renderer(renderer)
'https://www.youtube.com/channel/%s' % channel_id,
ie=YoutubeTabIE.ie_key(), video_title=title)
continue continue
# generic endpoint URL support # generic endpoint URL support
ep_url = urljoin('https://www.youtube.com/', try_get( ep_url = urljoin('https://www.youtube.com/', try_get(
@ -5762,7 +5779,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader': 'cole-dlp-test-acc', 'uploader': 'cole-dlp-test-acc',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel': 'cole-dlp-test-acc', 'channel': 'cole-dlp-test-acc',
'channel_follower_count': int,
}, },
'playlist_mincount': 1, 'playlist_mincount': 1,
'params': {'extractor_args': {'youtube': {'lang': ['ja']}}}, 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
@ -5930,7 +5946,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'title': 'cole-dlp-test-acc - Shorts', 'title': 'cole-dlp-test-acc - Shorts',
'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA', 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel': 'cole-dlp-test-acc', 'channel': 'cole-dlp-test-acc',
'channel_follower_count': int,
'description': 'test description', 'description': 'test description',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA', 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
@ -5976,8 +5991,40 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel': str, 'channel': str,
} }
}], }],
'params': {'extract_flat': True}, 'params': {'extract_flat': True, 'playlist_items': '1'},
'playlist_mincount': 1 'playlist_mincount': 1
}, {
# Channel renderer metadata. Contains number of videos on the channel
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
'info_dict': {
'id': 'UCiu-3thuViMebBjw_5nWYrA',
'title': 'cole-dlp-test-acc - Channels',
'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel': 'cole-dlp-test-acc',
'description': 'test description',
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
'tags': [],
'uploader': 'cole-dlp-test-acc',
'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
},
'playlist': [{
'info_dict': {
'_type': 'url',
'ie_key': 'YoutubeTab',
'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
'title': 'PewDiePie',
'channel': 'PewDiePie',
'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
'thumbnails': list,
'channel_follower_count': int,
'playlist_count': int
}
}],
'params': {'extract_flat': True},
}] }]
@classmethod @classmethod
@ -6531,6 +6578,30 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
# 'title': '#cats', # 'title': '#cats',
# }], # }],
}, },
}, {
# Channel results
'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
'info_dict': {
'id': 'kurzgesagt',
'title': 'kurzgesagt',
},
'playlist': [{
'info_dict': {
'_type': 'url',
'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
'ie_key': 'YoutubeTab',
'channel': 'Kurzgesagt In a Nutshell',
'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
'title': 'Kurzgesagt In a Nutshell',
'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
'playlist_count': int, # XXX: should have a way of saying > 1
'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
'thumbnails': list
}
}],
'params': {'extract_flat': True, 'playlist_items': '1'},
'playlist_mincount': 1,
}, { }, {
'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB', 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
'only_matching': True, 'only_matching': True,