From 901130bbcf799573255ade3e2882dec4670feda8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 29 Jul 2021 05:19:26 +0530 Subject: [PATCH] Expand and escape environment variables correctly in outtmpl Fixes: https://www.reddit.com/r/youtubedl/comments/otfmq3/ytdlp_same_parameters_different_results --- test/test_YoutubeDL.py | 19 +++++-- yt_dlp/YoutubeDL.py | 67 ++++++++++++++--------- yt_dlp/postprocessor/execafterdownload.py | 2 +- yt_dlp/postprocessor/metadatafromfield.py | 2 +- yt_dlp/utils.py | 7 ++- 5 files changed, 60 insertions(+), 37 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 555a516e6..e1287f222 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -13,7 +13,7 @@ import copy from test.helper import FakeYDL, assertRegexpMatches from yt_dlp import YoutubeDL -from yt_dlp.compat import compat_str, compat_urllib_error +from yt_dlp.compat import compat_os_name, compat_setenv, compat_str, compat_urllib_error from yt_dlp.extractor import YoutubeIE from yt_dlp.extractor.common import InfoExtractor from yt_dlp.postprocessor.common import PostProcessor @@ -663,7 +663,7 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(ydl.validate_outtmpl(tmpl), None) outtmpl, tmpl_dict = ydl.prepare_outtmpl(tmpl, info or self.outtmpl_info) - out = outtmpl % tmpl_dict + out = ydl.escape_outtmpl(outtmpl) % tmpl_dict fname = ydl.prepare_filename(info or self.outtmpl_info) if callable(expected): @@ -685,9 +685,15 @@ class TestYoutubeDL(unittest.TestCase): test('%(autonumber)s', '001', autonumber_size=3) # Escaping % + test('%', '%') test('%%', '%') test('%%%%', '%%') + test('%s', '%s') + test('%%%s', '%%s') + test('%d', '%d') + test('%abc%', '%abc%') test('%%(width)06d.%(ext)s', '%(width)06d.mp4') + test('%%%(height)s', '%1080') test('%(width)06d.%(ext)s', 'NA.mp4') test('%(width)06d.%%(ext)s', 'NA.%(ext)s') test('%%(width)06d.%(ext)s', '%(width)06d.mp4') @@ -702,12 +708,9 @@ class TestYoutubeDL(unittest.TestCase): test('%(id)s', ('ab:cd', 'ab -cd'), info={'id': 'ab:cd'}) # Invalid templates - self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%'), ValueError)) self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError)) test('%(invalid@tmpl|def)s', 'none', outtmpl_na_placeholder='none') test('%()s', 'NA') - test('%s', '%s') - test('%d', '%d') # NA placeholder NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s' @@ -741,6 +744,7 @@ class TestYoutubeDL(unittest.TestCase): # Internal formatting FORMATS = self.outtmpl_info['formats'] test('%(timestamp-1000>%H-%M-%S)s', '11-43-20') + test('%(title|%)s %(title|%%)s', '% %%') test('%(id+1-height+3)05d', '00158') test('%(width+100)05d', 'NA') test('%(formats.0) 15s', ('% 15s' % FORMATS[0], '% 15s' % str(FORMATS[0]).replace(':', ' -'))) @@ -759,6 +763,11 @@ class TestYoutubeDL(unittest.TestCase): # test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4')) # fixme # test('%(foo|)s', ('', '_')) # fixme + # Environment variable expansion for prepare_filename + compat_setenv('__yt_dlp_var', 'expanded') + envvar = '%__yt_dlp_var%' if compat_os_name == 'nt' else '$__yt_dlp_var' + test(envvar, (envvar, 'expanded')) + # Path expansion and escaping test('Hello %(title1)s', 'Hello $PATH') test('Hello %(title2)s', 'Hello %PATH%') diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index c0bde4339..3350042c9 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -65,7 +65,8 @@ from .utils import ( float_or_none, format_bytes, format_field, - STR_FORMAT_RE, + STR_FORMAT_RE_TMPL, + STR_FORMAT_TYPES, formatSeconds, GeoRestrictedError, HEADRequest, @@ -845,20 +846,40 @@ class YoutubeDL(object): return sanitize_path(path, force=self.params.get('windowsfilenames')) @staticmethod - def validate_outtmpl(tmpl): + def _outtmpl_expandpath(outtmpl): + # expand_path translates '%%' into '%' and '$$' into '$' + # correspondingly that is not what we want since we need to keep + # '%%' intact for template dict substitution step. Working around + # with boundary-alike separator hack. + sep = ''.join([random.choice(ascii_letters) for _ in range(32)]) + outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep)) + + # outtmpl should be expand_path'ed before template dict substitution + # because meta fields may contain env variables we don't want to + # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and + # title "Hello $PATH", we don't want `$PATH` to be expanded. + return expand_path(outtmpl).replace(sep, '') + + @staticmethod + def escape_outtmpl(outtmpl): + ''' Escape any remaining strings like %s, %abc% etc. ''' + return re.sub( + STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'), + lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0), + outtmpl) + + @classmethod + def validate_outtmpl(cls, outtmpl): ''' @return None or Exception object ''' + outtmpl = cls.escape_outtmpl(cls._outtmpl_expandpath(outtmpl)) try: - re.sub( - STR_FORMAT_RE.format(''), - lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0), - tmpl - ) % collections.defaultdict(int) + outtmpl % collections.defaultdict(int) return None except ValueError as err: return err def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): - """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)""" + """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """ info_dict = dict(info_dict) na = self.params.get('outtmpl_na_placeholder', 'NA') @@ -879,7 +900,7 @@ class YoutubeDL(object): } TMPL_DICT = {} - EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE.format('[^)]*')) + EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}]')) MATH_FUNCTIONS = { '+': float.__add__, '-': float.__sub__, @@ -938,10 +959,11 @@ class YoutubeDL(object): def create_key(outer_mobj): if not outer_mobj.group('has_key'): - return '%{}'.format(outer_mobj.group(0)) + return f'%{outer_mobj.group(0)}' + prefix = outer_mobj.group('prefix') key = outer_mobj.group('key') - fmt = outer_mobj.group('format') + original_fmt = fmt = outer_mobj.group('format') mobj = re.match(INTERNAL_FORMAT_RE, key) if mobj is None: value, default, mobj = None, na, {'fields': ''} @@ -965,6 +987,7 @@ class YoutubeDL(object): value = float_or_none(value) if value is None: value, fmt = default, 's' + if sanitize: if fmt[-1] == 'r': # If value is an object, sanitize might convert it to a string @@ -972,9 +995,10 @@ class YoutubeDL(object): value, fmt = repr(value), '%ss' % fmt[:-1] if fmt[-1] in 'csr': value = sanitize(mobj['fields'].split('.')[-1], value) - key += '\0%s' % fmt + + key = '%s\0%s' % (key.replace('%', '%\0'), original_fmt) TMPL_DICT[key] = value - return '%({key}){fmt}'.format(key=key, fmt=fmt) + return f'{prefix}%({key}){fmt}' return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT @@ -986,19 +1010,8 @@ class YoutubeDL(object): is_id=(k == 'id' or k.endswith('_id'))) outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']) outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize) - - # expand_path translates '%%' into '%' and '$$' into '$' - # correspondingly that is not what we want since we need to keep - # '%%' intact for template dict substitution step. Working around - # with boundary-alike separator hack. - sep = ''.join([random.choice(ascii_letters) for _ in range(32)]) - outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep)) - - # outtmpl should be expand_path'ed before template dict substitution - # because meta fields may contain env variables we don't want to - # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and - # title "Hello $PATH", we don't want `$PATH` to be expanded. - filename = expand_path(outtmpl).replace(sep, '') % template_dict + outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl)) + filename = outtmpl % template_dict force_ext = OUTTMPL_TYPES.get(tmpl_type) if force_ext is not None: @@ -2344,7 +2357,7 @@ class YoutubeDL(object): if re.match(r'\w+$', tmpl): tmpl = '%({})s'.format(tmpl) tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict) - self.to_stdout(tmpl % info_copy) + self.to_stdout(self.escape_outtmpl(tmpl) % info_copy) print_mandatory('title') print_mandatory('id') diff --git a/yt_dlp/postprocessor/execafterdownload.py b/yt_dlp/postprocessor/execafterdownload.py index 336671d14..ce77c6e80 100644 --- a/yt_dlp/postprocessor/execafterdownload.py +++ b/yt_dlp/postprocessor/execafterdownload.py @@ -23,7 +23,7 @@ class ExecAfterDownloadPP(PostProcessor): def parse_cmd(self, cmd, info): tmpl, tmpl_dict = self._downloader.prepare_outtmpl(cmd, info) if tmpl_dict: # if there are no replacements, tmpl_dict = {} - return tmpl % tmpl_dict + return self._downloader.escape_outtmpl(tmpl) % tmpl_dict # If no replacements are found, replace {} for backard compatibility if '{}' not in cmd: diff --git a/yt_dlp/postprocessor/metadatafromfield.py b/yt_dlp/postprocessor/metadatafromfield.py index d41ab4bfc..002794765 100644 --- a/yt_dlp/postprocessor/metadatafromfield.py +++ b/yt_dlp/postprocessor/metadatafromfield.py @@ -55,7 +55,7 @@ class MetadataFromFieldPP(PostProcessor): def run(self, info): for dictn in self._data: tmpl, tmpl_dict = self._downloader.prepare_outtmpl(dictn['tmpl'], info) - data_to_parse = tmpl % tmpl_dict + data_to_parse = self._downloader.escape_outtmpl(tmpl) % tmpl_dict self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], dictn['tmpl'])) match = re.search(dictn['regex'], data_to_parse) if match is None: diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 4ff53573f..2bd0925b6 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4438,8 +4438,8 @@ OUTTMPL_TYPES = { # As of [1] format syntax is: # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting -STR_FORMAT_RE = r'''(?x) - (?(?:%%)*) % (?P\((?P{0})\))? # mapping key (?P @@ -4447,10 +4447,11 @@ STR_FORMAT_RE = r'''(?x) (?:\d+)? # minimum field width (optional) (?:\.\d+)? # precision (optional) [hlL]? # length modifier (optional) - [diouxXeEfFgGcrs] # conversion type + {1} # conversion type ) ''' +STR_FORMAT_TYPES = 'diouxXeEfFgGcrs' def limit_length(s, length): """ Add ellipses to overly long strings """