[generic] Refactor _extract_rss

Closes #3738
This commit is contained in:
pukkandan 2022-05-18 04:14:13 +05:30
parent 7896214c42
commit d6bf1161db
No known key found for this signature in database
GPG Key ID: 7EEE9E1E817D0A39

View File

@ -129,6 +129,7 @@ from ..utils import (
sanitized_Request,
smuggle_url,
str_or_none,
try_call,
unescapeHTML,
unified_timestamp,
unsmuggle_url,
@ -2536,66 +2537,44 @@ class GenericIE(InfoExtractor):
self._downloader.write_debug(f'Identified a {name}')
def _extract_rss(self, url, video_id, doc):
playlist_title = doc.find('./channel/title').text
playlist_desc_el = doc.find('./channel/description')
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
NS_MAP = {
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
}
entries = []
for it in doc.findall('./channel/item'):
next_url = None
enclosure_nodes = it.findall('./enclosure')
for e in enclosure_nodes:
next_url = e.attrib.get('url')
if next_url:
break
if not next_url:
next_url = xpath_text(it, 'link', fatal=False)
next_url = next(
(e.attrib.get('url') for e in it.findall('./enclosure')),
xpath_text(it, 'link', fatal=False))
if not next_url:
continue
if it.find('guid').text is not None:
next_url = smuggle_url(next_url, {'force_videoid': it.find('guid').text})
guid = try_call(lambda: it.find('guid').text)
if guid:
next_url = smuggle_url(next_url, {'force_videoid': guid})
def itunes(key):
return xpath_text(
it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
default=None)
duration = itunes('duration')
explicit = (itunes('explicit') or '').lower()
if explicit in ('true', 'yes'):
age_limit = 18
elif explicit in ('false', 'no'):
age_limit = 0
else:
age_limit = None
return xpath_text(it, xpath_with_ns(f'./itunes:{key}', NS_MAP), default=None)
entries.append({
'_type': 'url_transparent',
'url': next_url,
'title': it.find('title').text,
'title': try_call(lambda: it.find('title').text),
'description': xpath_text(it, 'description', default=None),
'timestamp': unified_timestamp(
xpath_text(it, 'pubDate', default=None)),
'duration': int_or_none(duration) or parse_duration(duration),
'timestamp': unified_timestamp(xpath_text(it, 'pubDate', default=None)),
'duration': parse_duration(itunes('duration')),
'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
'episode': itunes('title'),
'episode_number': int_or_none(itunes('episode')),
'season_number': int_or_none(itunes('season')),
'age_limit': age_limit,
'age_limit': {'true': 18, 'yes': 18, 'false': 0, 'no': 0}.get((itunes('explicit') or '').lower()),
})
return {
'_type': 'playlist',
'id': url,
'title': playlist_title,
'description': playlist_desc,
'title': try_call(lambda: doc.find('./channel/title').text),
'description': try_call(lambda: doc.find('./channel/description').text),
'entries': entries,
}