Merge a376ab99f8 into 64766459e3

[core/windows] Improve shell quoting and tests (#9802 )
Authored by: Grub4K
2024-04-28 02:53:28 +05:30 · 2024-04-27 10:37:26 +02:00 · 2024-04-22 20:36:01 +00:00 · 2024-04-01 20:29:15 +05:30 · 2024-04-01 20:26:45 +05:30 · 2024-04-01 20:17:23 +05:30
11 changed files with 65 additions and 39 deletions
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@ -53,7 +53,7 @@ jobs:
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install test requirements
-      run: python3 ./devscripts/install_deps.py --include dev --include curl_cffi
+      run: python3 ./devscripts/install_deps.py --include dev --include curl-cffi
    - name: Run tests
      continue-on-error: False
      run: |
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@ -15,7 +15,7 @@ from devscripts.utils import get_filename_args, read_file, write_file
 NO_ATTR = object()
 STATIC_CLASS_PROPERTIES = [
    'IE_NAME', '_ENABLED', '_VALID_URL',  # Used for URL matching
-    '_WORKING', 'IE_DESC', '_NETRC_MACHINE', 'SEARCH_KEY',  # Used for --extractor-descriptions
+    '_REPORTED_BROKEN', 'IE_DESC', '_NETRC_MACHINE', 'SEARCH_KEY',  # Used for --extractor-descriptions
    'age_limit',  # Used for --age-limit (evaluated)
    '_RETURN_TYPE',  # Accessed in CLI only with instance (evaluated)
 ]
--- a/supportedsites.md
+++ b/supportedsites.md
@ -304,10 +304,10 @@
 - **CrowdBunker**
 - **CrowdBunkerChannel**
 - **Crtvg**
- - **crunchyroll**: [*crunchyroll*](## "netrc machine")
- - **crunchyroll:artist**: [*crunchyroll*](## "netrc machine")
- - **crunchyroll:music**: [*crunchyroll*](## "netrc machine")
- - **crunchyroll:playlist**: [*crunchyroll*](## "netrc machine")
+ - **crunchyroll**: [*crunchyroll*](## "netrc machine") ([**Currently broken**](https://github.com/yt-dlp/yt-dlp/issues/9453))
+ - **crunchyroll:artist**: [*crunchyroll*](## "netrc machine") ([**Currently broken**](https://github.com/yt-dlp/yt-dlp/issues/9453))
+ - **crunchyroll:music**: [*crunchyroll*](## "netrc machine") ([**Currently broken**](https://github.com/yt-dlp/yt-dlp/issues/9453))
+ - **crunchyroll:playlist**: [*crunchyroll*](## "netrc machine") ([**Currently broken**](https://github.com/yt-dlp/yt-dlp/issues/9453))
 - **CSpan**: C-SPAN
 - **CSpanCongress**
 - **CtsNews**: 華視新聞
@ -553,7 +553,7 @@
 - **hgtv.com:show**
 - **HGTVDe**
 - **HGTVUsa**
- - **HiDive**: [*hidive*](## "netrc machine")
+ - **HiDive**: [*hidive*](## "netrc machine") ([**Currently broken**](https://github.com/yt-dlp/yt-dlp/issues/9385))
 - **HistoricFilms**
 - **history:player**
 - **history:topic**: History.com Topic
--- a/test/test_download.py
+++ b/test/test_download.py
@ -98,7 +98,7 @@ def generator(test_case, tname):
            self.skipTest(reason)

        if not ie.working():
-            print_skipping('IE marked as not _WORKING')
+            print_skipping('IE is _REPORTED_BROKEN')

        for tc in test_cases:
            if tc.get('expected_exception'):
@ -117,7 +117,7 @@ def generator(test_case, tname):

        for other_ie in other_ies:
            if not other_ie.working():
-                print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
+                print_skipping(f'test depends on {other_ie.ie_key()}IE, is _REPORTED_BROKEN')

        params = get_params(test_case.get('params', {}))
        params['outtmpl'] = tname + '_' + params['outtmpl']
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@ -40,8 +40,8 @@ class BaseTestSubtitles(unittest.TestCase):
        self.ie = self.IE()
        self.DL.add_info_extractor(self.ie)
        if not self.IE.working():
-            print('Skipping: %s marked as not _WORKING' % self.IE.ie_key())
-            self.skipTest('IE marked as not _WORKING')
+            print(f'Skipping: {self.IE.ie_key()} is _REPORTED_BROKEN')
+            self.skipTest('IE is _REPORTED_BROKEN')

    def getInfoDict(self):
        info_dict = self.DL.extract_info(self.url, download=False)
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -2059,7 +2059,22 @@ Line 1
        assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')

    @unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows')
-    def test_Popen_windows_escaping(self):
+    def test_windows_escaping(self):
+        tests = [
+            'test"&',
+            '%CMDCMDLINE:~-1%&',
+            'a\nb',
+            '"',
+            '\\',
+            '!',
+            '^!',
+            'a \\ b',
+            'a \\" b',
+            'a \\ b\\',
+            # We replace \r with \n
+            ('a\r\ra', 'a\n\na'),
+        ]
+
        def run_shell(args):
            stdout, stderr, error = Popen.run(
                args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@ -2067,15 +2082,18 @@ Line 1
            assert not error
            return stdout

-        # Test escaping
-        assert run_shell(['echo', 'test"&']) == '"test""&"\n'
-        assert run_shell(['echo', '%CMDCMDLINE:~-1%&']) == '"%CMDCMDLINE:~-1%&"\n'
-        assert run_shell(['echo', 'a\nb']) == '"a"\n"b"\n'
-        assert run_shell(['echo', '"']) == '""""\n'
-        assert run_shell(['echo', '\\']) == '\\\n'
-        # Test if delayed expansion is disabled
-        assert run_shell(['echo', '^!']) == '"^!"\n'
-        assert run_shell('echo "^!"') == '"^!"\n'
+        for argument in tests:
+            if isinstance(argument, str):
+                expected = argument
+            else:
+                argument, expected = argument
+
+            args = [sys.executable, '-c', 'import sys; print(end=sys.argv[1])', argument, 'end']
+            assert run_shell(args) == expected
+
+            escaped = shell_quote(argument, shell=True)
+            args = f'{sys.executable} -c "import sys; print(end=sys.argv[1])" {escaped} end'
+            assert run_shell(args) == expected


 if __name__ == '__main__':
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -1582,8 +1582,12 @@ class YoutubeDL:
                continue

            if not ie.working():
-                self.report_warning('The program functionality for this site has been marked as broken, '
-                                    'and will probably not work.')
+                self.report_warning(join_nonempty(
+                    f'[{ie.IE_NAME}] The program\'s functionality for this site has been marked as '
+                    f'{self._format_err("BROKEN", self.Styles.ERROR)}, and will probably not work.',
+                    format_field(ie._REPORTED_BROKEN, None, f'See  {self._format_err("%s", self.Styles.EMPHASIS)}'
+                                 '  for more information. Do NOT open a new issue for this.'),
+                    delim='\n         '))

            temp_id = ie.get_temp_id(url)
            if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -546,8 +546,9 @@ class InfoExtractor:
    The _ENABLED attribute should be set to False for IEs that
    are disabled by default and must be explicitly enabled.

-    The _WORKING attribute should be set to False for broken IEs
+    For broken extractors, the _REPORTED_BROKEN attribute can be set to the issue URL
    in order to warn the users and skip the tests.
+    [Deprecated] If there is no open issue, set _WORKING = False instead.
    """

    _ready = False
@ -613,10 +614,13 @@ class InfoExtractor:
        except (IndexError, AttributeError):
            return None

+    @classproperty(cache=True)
+    def _REPORTED_BROKEN(cls):
+        return not cls._WORKING and ''
+
    @classmethod
    def working(cls):
-        """Getter method for _WORKING."""
-        return cls._WORKING
+        return cls._REPORTED_BROKEN is False

    @classmethod
    def supports_login(cls):
@ -3674,7 +3678,12 @@ class InfoExtractor:
                _COUNTS = ('', '5', '10', 'all')
                desc += f' (e.g. "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")'
        if not cls.working():
-            desc += ' (**Currently broken**)' if markdown else ' (Currently broken)'
+            msg = 'Currently broken'
+            if markdown:
+                msg = f'**{msg}**'
+                if cls._REPORTED_BROKEN:
+                    msg = f'[{msg}]({cls._REPORTED_BROKEN})'
+            desc += f' ({msg})'

        # Escape emojis. Ref: https://github.com/github/markup/issues/1153
        name = (' - **%s**' % re.sub(r':(\w+:)', ':\u200B\\g<1>', cls.IE_NAME)) if markdown else cls.IE_NAME
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@ -21,6 +21,7 @@ from ..utils import (


 class CrunchyrollBaseIE(InfoExtractor):
+    _REPORTED_BROKEN = 'https://github.com/yt-dlp/yt-dlp/issues/9453'
    _BASE_URL = 'https://www.crunchyroll.com'
    _API_BASE = 'https://api.crunchyroll.com'
    _NETRC_MACHINE = 'crunchyroll'
--- a/yt_dlp/extractor/hidive.py
+++ b/yt_dlp/extractor/hidive.py
@ -9,6 +9,7 @@ from ..utils import (


 class HiDiveIE(InfoExtractor):
+    _REPORTED_BROKEN = 'https://github.com/yt-dlp/yt-dlp/issues/9385'
    _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<id>(?P<title>[^/]+)/(?P<key>[^/?#&]+))'
    # Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
    # so disabling geo bypass completely
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@ -1638,16 +1638,14 @@ def get_filesystem_encoding():
    return encoding if encoding is not None else 'utf-8'


-_WINDOWS_QUOTE_TRANS = str.maketrans({'"': '\\"', '\\': '\\\\'})
+_WINDOWS_QUOTE_TRANS = str.maketrans({'"': R'\"'})
 _CMD_QUOTE_TRANS = str.maketrans({
    # Keep quotes balanced by replacing them with `""` instead of `\\"`
    '"': '""',
-    # Requires a variable `=` containing `"^\n\n"` (set in `utils.Popen`)
+    # These require an env-variable `=` containing `"^\n\n"` (set in `utils.Popen`)
    # `=` should be unique since variables containing `=` cannot be set using cmd
    '\n': '%=%',
-    # While we are only required to escape backslashes immediately before quotes,
-    # we instead escape all of 'em anyways to be consistent
-    '\\': '\\\\',
+    '\r': '%=%',
    # Use zero length variable replacement so `%` doesn't get expanded
    # `cd` is always set as long as extensions are enabled (`/E:ON` in `utils.Popen`)
    '%': '%%cd:~,%',
@ -1656,19 +1654,14 @@ _CMD_QUOTE_TRANS = str.maketrans({

 def shell_quote(args, *, shell=False):
    args = list(variadic(args))
-    if any(isinstance(item, bytes) for item in args):
-        deprecation_warning('Passing bytes to utils.shell_quote is deprecated')
-        encoding = get_filesystem_encoding()
-        for index, item in enumerate(args):
-            if isinstance(item, bytes):
-                args[index] = item.decode(encoding)

    if compat_os_name != 'nt':
        return shlex.join(args)

    trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS
    return ' '.join(
-        s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII) else s.translate(trans).join('""')
+        s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII)
+        else re.sub(r'(\\+)("|$)', r'\1\1\2', s).translate(trans).join('""')
        for s in args)
Author	SHA1	Message	Date
pukkandan	27b923f880	Merge `a376ab99f8` into `64766459e3`	2024-04-28 02:53:28 +05:30
Simon Sawicki	64766459e3	[core/windows] Improve shell quoting and tests (#9802 ) Authored by: Grub4K	2024-04-27 10:37:26 +02:00
bashonly	89f535e265	[ci] Fix `curl-cffi` installation (Bugfix for `02483bea1c`) Authored by: bashonly	2024-04-22 20:36:01 +00:00
pukkandan	a376ab99f8	fstrings need f	2024-04-01 20:29:15 +05:30
pukkandan	68b74d52ab	fstrings Co-authored-by: Simon Sawicki <accounts@grub4k.xyz>	2024-04-01 20:26:45 +05:30
pukkandan	c414c3d406	Rename	2024-04-01 20:17:23 +05:30
pukkandan	93e83fa261	lint	2024-04-01 07:51:11 +05:30
pukkandan	bce376140f	`make supportedsites`	2024-04-01 07:43:02 +05:30
pukkandan	93efacd098	[ie] Add `_BROKEN_ISSUE`	2024-04-01 07:42:08 +05:30