mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-05 09:47:27 -05:00
[youtube:history] Fix extraction (fixes #5702)
It uses the same method as YoutubeSubscriptionsIE, if other feed starts using it we should consider using base class.
This commit is contained in:
parent
12675275a1
commit
2bc4330303
@ -1667,13 +1667,42 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE):
|
|||||||
return self._extract_playlist('WL')
|
return self._extract_playlist('WL')
|
||||||
|
|
||||||
|
|
||||||
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
|
class YoutubeHistoryIE(YoutubePlaylistIE):
|
||||||
IE_NAME = 'youtube:history'
|
IE_NAME = 'youtube:history'
|
||||||
IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
|
IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
|
||||||
_VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
|
_VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
|
||||||
_FEED_NAME = 'history'
|
_TESTS = []
|
||||||
_PERSONAL_FEED = True
|
|
||||||
_PLAYLIST_TITLE = 'Youtube Watch History'
|
def _real_extract(self, url):
|
||||||
|
title = 'Youtube History'
|
||||||
|
page = self._download_webpage('https://www.youtube.com/feed/history', title)
|
||||||
|
|
||||||
|
# The extraction process is the same as for playlists, but the regex
|
||||||
|
# for the video ids doesn't contain an index
|
||||||
|
ids = []
|
||||||
|
more_widget_html = content_html = page
|
||||||
|
|
||||||
|
for page_num in itertools.count(1):
|
||||||
|
matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
|
||||||
|
new_ids = orderedSet(matches)
|
||||||
|
ids.extend(new_ids)
|
||||||
|
|
||||||
|
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
|
||||||
|
if not mobj:
|
||||||
|
break
|
||||||
|
|
||||||
|
more = self._download_json(
|
||||||
|
'https://youtube.com/%s' % mobj.group('more'), title,
|
||||||
|
'Downloading page #%s' % page_num,
|
||||||
|
transform_source=uppercase_escape)
|
||||||
|
content_html = more['content_html']
|
||||||
|
more_widget_html = more['load_more_widget_html']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'title': title,
|
||||||
|
'entries': self._ids_to_results(ids),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
|
||||||
|
Loading…
Reference in New Issue
Block a user