diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2a4c8c883a..7e065daa1f 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -58,6 +58,7 @@ from .postprocessor import ( from .update import detect_variant from .utils import ( DEFAULT_OUTTMPL, + IDENTITY, LINK_TEMPLATES, NO_DEFAULT, NUMBER_RE, @@ -1002,7 +1003,7 @@ class YoutubeDL: return self.params['outtmpl'] def _parse_outtmpl(self): - sanitize = lambda x: x + sanitize = IDENTITY if self.params.get('restrictfilenames'): # Remove spaces in the default template sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-') @@ -2983,13 +2984,12 @@ class YoutubeDL: info_dict['ext'] = os.path.splitext(file)[1][1:] return file - success = True - merger, fd = FFmpegMergerPP(self), None + fd, success = None, True if info_dict.get('protocol') or info_dict.get('url'): fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-') if fd is not FFmpegFD and ( info_dict.get('section_start') or info_dict.get('section_end')): - msg = ('This format cannot be partially downloaded' if merger.available + msg = ('This format cannot be partially downloaded' if FFmpegFD.available() else 'You have requested downloading the video partially, but ffmpeg is not installed') self.report_error(f'{msg}. Aborting') return @@ -3048,6 +3048,7 @@ class YoutubeDL: dl_filename = existing_video_file(full_filename, temp_filename) info_dict['__real_download'] = False + merger = FFmpegMergerPP(self) downloaded = [] if dl_filename is not None: self.report_file_already_downloaded(dl_filename) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index db34fe12a6..032856eb8a 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -12,6 +12,7 @@ import sys from .compat import compat_getpass, compat_shlex_quote from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .downloader import FileDownloader +from .downloader.external import get_external_downloader from .extractor import list_extractor_classes from .extractor.adobepass import MSO_INFO from .extractor.common import InfoExtractor @@ -39,6 +40,7 @@ from .utils import ( download_range_func, expand_path, float_or_none, + format_field, int_or_none, match_filter_func, parse_duration, @@ -399,6 +401,10 @@ def validate_options(opts): if opts.no_sponsorblock: opts.sponsorblock_mark = opts.sponsorblock_remove = set() + for proto, path in opts.external_downloader.items(): + if get_external_downloader(path) is None: + raise ValueError( + f'No such {format_field(proto, None, "%s ", ignore="default")}external downloader "{path}"') warnings, deprecation_warnings = [], [] # Common mistake: -f best diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 383a05a569..3e63df6cb1 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -704,12 +704,10 @@ from .iqiyi import ( IqIE, IqAlbumIE ) - from .itprotv import ( ITProTVIE, ITProTVCourseIE ) - from .itv import ( ITVIE, ITVBTCCIE, diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index 1b9deeae84..81a6542c3a 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -16,7 +16,7 @@ from ..compat import compat_urllib_parse_urlparse, compat_urllib_request from ..utils import ( ExtractorError, bytes_to_intlist, - decode_base, + decode_base_n, int_or_none, intlist_to_bytes, request_to_url, @@ -123,7 +123,7 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler): 'Content-Type': 'application/json', }) - res = decode_base(license_response['k'], self.STRTABLE) + res = decode_base_n(license_response['k'], table=self.STRTABLE) encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff)) h = hmac.new( diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 3e3e557985..3e8ba5bdd3 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2817,7 +2817,7 @@ class InfoExtractor: base_url = '' for element in (representation, adaptation_set, period, mpd_doc): base_url_e = element.find(_add_ns('BaseURL')) - if base_url_e is not None: + if base_url_e and base_url_e.text: base_url = base_url_e.text + base_url if re.match(r'^https?://', base_url): break diff --git a/yt_dlp/extractor/dailywire.py b/yt_dlp/extractor/dailywire.py index 5a14761836..1f27797ada 100644 --- a/yt_dlp/extractor/dailywire.py +++ b/yt_dlp/extractor/dailywire.py @@ -73,9 +73,7 @@ class DailyWireIE(DailyWireBaseIE): 'display_id': slug, 'title': traverse_obj(episode_info, 'title', 'name'), 'description': episode_info.get('description'), - 'creator': join_nonempty( - traverse_obj(episode_info, ('createdBy','firstName')), traverse_obj(episode_info, ('createdBy','lastName')), - delim=' '), + 'creator': join_nonempty(('createdBy', 'firstName'), ('createdBy', 'lastName'), from_dict=episode_info, delim=' '), 'duration': float_or_none(episode_info.get('duration')), 'is_live': episode_info.get('isLive'), 'thumbnail': traverse_obj(episode_info, 'thumbnail', 'image', expected_type=url_or_none), diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 845ce5298c..b8c5be7a08 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -3116,6 +3116,7 @@ class GenericIE(InfoExtractor): wistia_urls = WistiaIE._extract_urls(webpage) if wistia_urls: playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key()) + playlist['entries'] = list(playlist['entries']) for entry in playlist['entries']: entry.update({ '_type': 'url_transparent', diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index 8417c43c3d..059b62e2ab 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -37,7 +37,7 @@ def md5_text(text): return hashlib.md5(text.encode('utf-8')).hexdigest() -class IqiyiSDK(object): +class IqiyiSDK: def __init__(self, target, ip, timestamp): self.target = target self.ip = ip @@ -131,7 +131,7 @@ class IqiyiSDK(object): self.target = self.digit_sum(self.timestamp) + chunks[0] + compat_str(sum(ip)) -class IqiyiSDKInterpreter(object): +class IqiyiSDKInterpreter: def __init__(self, sdk_code): self.sdk_code = sdk_code diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 6abdca7880..b9c579cb67 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -146,6 +146,7 @@ USER_AGENTS = { NO_DEFAULT = object() +IDENTITY = lambda x: x ENGLISH_MONTH_NAMES = [ 'January', 'February', 'March', 'April', 'May', 'June', @@ -4744,22 +4745,42 @@ def pkcs1pad(data, length): return [0, 2] + pseudo_random + [0] + data -def encode_base_n(num, n, table=None): - FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' - if not table: - table = FULL_TABLE[:n] +def _base_n_table(n, table): + if not table and not n: + raise ValueError('Either table or n must be specified') + elif not table: + table = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'[:n] + elif not n or n == len(table): + return table + raise ValueError(f'base {n} exceeds table length {len(table)}') - if n > len(table): - raise ValueError('base %d exceeds table length %d' % (n, len(table))) - if num == 0: +def encode_base_n(num, n=None, table=None): + """Convert given int to a base-n string""" + table = _base_n_table(n) + if not num: return table[0] - ret = '' + result, base = '', len(table) while num: - ret = table[num % n] + ret - num = num // n - return ret + result = table[num % base] + result + num = num // result + return result + + +def decode_base_n(string, n=None, table=None): + """Convert given base-n string to int""" + table = {char: index for index, char in enumerate(_base_n_table(n, table))} + result, base = 0, len(table) + for char in string: + result = result * base + table[char] + return result + + +def decode_base(value, digits): + write_string('DeprecationWarning: yt_dlp.utils.decode_base is deprecated ' + 'and may be removed in a future version. Use yt_dlp.decode_base_n instead') + return decode_base_n(value, table=digits) def decode_packed_codes(code): @@ -5062,11 +5083,11 @@ def to_high_limit_path(path): return path -def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=None): +def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY): val = traverse_obj(obj, *variadic(field)) - if (not val and val != 0) if ignore is NO_DEFAULT else val in ignore: + if (not val and val != 0) if ignore is NO_DEFAULT else val in variadic(ignore): return default - return template % (func(val) if func else val) + return template % func(val) def clean_podcast_url(url): @@ -5207,10 +5228,8 @@ def traverse_obj( if isinstance(expected_type, type): type_test = lambda val: val if isinstance(val, expected_type) else None - elif expected_type is not None: - type_test = expected_type else: - type_test = lambda val: val + type_test = expected_type or IDENTITY for path in path_list: depth = 0 @@ -5243,17 +5262,6 @@ def variadic(x, allowed_types=(str, bytes, dict)): return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,) -def decode_base(value, digits): - # This will convert given base-x string to scalar (long or int) - table = {char: index for index, char in enumerate(digits)} - result = 0 - base = len(digits) - for chr in value: - result *= base - result += table[chr] - return result - - def time_seconds(**kwargs): t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs))) return t.timestamp() @@ -5327,7 +5335,7 @@ def number_of_digits(number): def join_nonempty(*values, delim='-', from_dict=None): if from_dict is not None: - values = map(from_dict.get, values) + values = (traverse_obj(from_dict, variadic(v)) for v in values) return delim.join(map(str, filter(None, values)))