More improvements to HLS/DASH external downloader code

* Fix error when there is no `protocol` in `info_dict`
* Move HLS byte range detection to `Aria2cFD` so that the download will fall back to the native downloader instead of ffmpeg
* Fix bug with getting no fragments in DASH
* Convert `check_results` in `can_download` to a generator
This commit is contained in:
pukkandan 2021-03-10 20:56:24 +05:30
parent e4edeb6226
commit 0a473f2f0f
No known key found for this signature in database
GPG Key ID: 0F00D95A001F4698
5 changed files with 44 additions and 22 deletions

View File

@ -2437,7 +2437,8 @@ class YoutubeDL(object):
else: else:
assert fixup_policy in ('ignore', 'never') assert fixup_policy in ('ignore', 'never')
if get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD': if ('protocol' in info_dict
and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
if fixup_policy == 'warn': if fixup_policy == 'warn':
self.report_warning('%s: malformed AAC bitstream detected.' % ( self.report_warning('%s: malformed AAC bitstream detected.' % (
info_dict['id'])) info_dict['id']))

View File

@ -326,6 +326,12 @@ class FileDownloader(object):
"""Report it was impossible to resume download.""" """Report it was impossible to resume download."""
self.to_screen('[download] Unable to resume') self.to_screen('[download] Unable to resume')
@staticmethod
def supports_manifest(manifest):
""" Whether the downloader can download the fragments from the manifest.
Redefine in subclasses if needed. """
pass
def download(self, filename, info_dict, subtitle=False): def download(self, filename, info_dict, subtitle=False):
"""Download to a filename using the info from info_dict """Download to a filename using the info from info_dict
Return True on success and False otherwise Return True on success and False otherwise

View File

@ -12,7 +12,8 @@ from ..utils import (
class DashSegmentsFD(FragmentFD): class DashSegmentsFD(FragmentFD):
""" """
Download segments in a DASH manifest Download segments in a DASH manifest. External downloaders can take over
the fragment downloads by supporting the 'frag_urls' protocol
""" """
FD_NAME = 'dashsegments' FD_NAME = 'dashsegments'
@ -37,7 +38,7 @@ class DashSegmentsFD(FragmentFD):
fragment_retries = self.params.get('fragment_retries', 0) fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
fragments = [] fragments_to_download = []
frag_index = 0 frag_index = 0
for i, fragment in enumerate(fragments): for i, fragment in enumerate(fragments):
frag_index += 1 frag_index += 1
@ -49,7 +50,7 @@ class DashSegmentsFD(FragmentFD):
fragment_url = urljoin(fragment_base_url, fragment['path']) fragment_url = urljoin(fragment_base_url, fragment['path'])
if real_downloader: if real_downloader:
fragments.append({ fragments_to_download.append({
'url': fragment_url, 'url': fragment_url,
}) })
continue continue
@ -92,7 +93,7 @@ class DashSegmentsFD(FragmentFD):
if real_downloader: if real_downloader:
info_copy = info_dict.copy() info_copy = info_dict.copy()
info_copy['fragments'] = fragments info_copy['fragments'] = fragments_to_download
fd = real_downloader(self.ydl, self.params) fd = real_downloader(self.ydl, self.params)
# TODO: Make progress updates work without hooking twice # TODO: Make progress updates work without hooking twice
# for ph in self._progress_hooks: # for ph in self._progress_hooks:

View File

@ -125,7 +125,7 @@ class ExternalFD(FileDownloader):
if 'fragments' in info_dict: if 'fragments' in info_dict:
file_list = [] file_list = []
dest, _ = sanitize_open(tmpfilename, 'wb') dest, _ = sanitize_open(tmpfilename, 'wb')
for [i, fragment] in enumerate(info_dict['fragments']): for i, fragment in enumerate(info_dict['fragments']):
file = '%s_%s.frag' % (tmpfilename, i) file = '%s_%s.frag' % (tmpfilename, i)
decrypt_info = fragment.get('decrypt_info') decrypt_info = fragment.get('decrypt_info')
src, _ = sanitize_open(file, 'rb') src, _ = sanitize_open(file, 'rb')
@ -242,6 +242,15 @@ class Aria2cFD(ExternalFD):
AVAILABLE_OPT = '-v' AVAILABLE_OPT = '-v'
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'frag_urls') SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'frag_urls')
@staticmethod
def supports_manifest(manifest):
UNSUPPORTED_FEATURES = [
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1]
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
]
check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
return all(check_results)
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c'] cmd = [self.exe, '-c']
dn = os.path.dirname(tmpfilename) dn = os.path.dirname(tmpfilename)
@ -264,7 +273,7 @@ class Aria2cFD(ExternalFD):
cmd += ['--uri-selector', 'inorder', '--download-result=hide'] cmd += ['--uri-selector', 'inorder', '--download-result=hide']
url_list_file = '%s.frag.urls' % tmpfilename url_list_file = '%s.frag.urls' % tmpfilename
url_list = [] url_list = []
for [i, fragment] in enumerate(info_dict['fragments']): for i, fragment in enumerate(info_dict['fragments']):
tmpsegmentname = '%s_%s.frag' % (os.path.basename(tmpfilename), i) tmpsegmentname = '%s_%s.frag' % (os.path.basename(tmpfilename), i)
url_list.append('%s\n\tout=%s' % (fragment['url'], tmpsegmentname)) url_list.append('%s\n\tout=%s' % (fragment['url'], tmpsegmentname))
stream, _ = sanitize_open(url_list_file, 'wb') stream, _ = sanitize_open(url_list_file, 'wb')

View File

@ -24,12 +24,16 @@ from ..utils import (
class HlsFD(FragmentFD): class HlsFD(FragmentFD):
""" A limited implementation that does not require ffmpeg """ """
Download segments in a m3u8 manifest. External downloaders can take over
the fragment downloads by supporting the 'frag_urls' protocol and
re-defining 'supports_manifest' function
"""
FD_NAME = 'hlsnative' FD_NAME = 'hlsnative'
@staticmethod @staticmethod
def can_download(manifest, info_dict, allow_unplayable_formats=False, real_downloader=None, with_crypto=can_decrypt_frag): def can_download(manifest, info_dict, allow_unplayable_formats=False, with_crypto=can_decrypt_frag):
UNSUPPORTED_FEATURES = [ UNSUPPORTED_FEATURES = [
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
@ -53,16 +57,15 @@ class HlsFD(FragmentFD):
UNSUPPORTED_FEATURES += [ UNSUPPORTED_FEATURES += [
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
] ]
if real_downloader:
UNSUPPORTED_FEATURES += [ def check_results():
r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] yield not info_dict.get('is_live')
]
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
check_results.append(with_crypto or not is_aes128_enc) yield with_crypto or not is_aes128_enc
check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest)) yield not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest)
check_results.append(not info_dict.get('is_live')) for feature in UNSUPPORTED_FEATURES:
return all(check_results) yield not re.search(feature, manifest)
return all(check_results())
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
man_url = info_dict['url'] man_url = info_dict['url']
@ -72,9 +75,7 @@ class HlsFD(FragmentFD):
man_url = urlh.geturl() man_url = urlh.geturl()
s = urlh.read().decode('utf-8', 'ignore') s = urlh.read().decode('utf-8', 'ignore')
real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None) if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')):
if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats'), real_downloader):
if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'): if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'):
self.report_error('pycryptodome not found. Please install it.') self.report_error('pycryptodome not found. Please install it.')
return False return False
@ -89,6 +90,10 @@ class HlsFD(FragmentFD):
# fd.add_progress_hook(ph) # fd.add_progress_hook(ph)
return fd.real_download(filename, info_dict) return fd.real_download(filename, info_dict)
real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)
if real_downloader and not real_downloader.supports_manifest(s):
real_downloader = None
def is_ad_fragment_start(s): def is_ad_fragment_start(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))