Merge 6cdde78019 into 64766459e3

[core/windows] Improve shell quoting and tests (#9802 )
Authored by: Grub4K
2024-04-28 02:53:29 +05:30 · 2024-04-27 10:37:26 +02:00 · 2024-04-22 20:36:01 +00:00 · 2024-04-21 22:41:40 +00:00 · 2024-04-01 22:25:54 +03:00
5 changed files with 110 additions and 89 deletions
--- a/.github/workflows/core.yml
+++ b/.github/workflows/core.yml
@ -53,7 +53,7 @@ jobs:
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install test requirements
-      run: python3 ./devscripts/install_deps.py --include dev --include curl_cffi
+      run: python3 ./devscripts/install_deps.py --include dev --include curl-cffi
    - name: Run tests
      continue-on-error: False
      run: |
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -2059,7 +2059,22 @@ Line 1
        assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')

    @unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows')
-    def test_Popen_windows_escaping(self):
+    def test_windows_escaping(self):
+        tests = [
+            'test"&',
+            '%CMDCMDLINE:~-1%&',
+            'a\nb',
+            '"',
+            '\\',
+            '!',
+            '^!',
+            'a \\ b',
+            'a \\" b',
+            'a \\ b\\',
+            # We replace \r with \n
+            ('a\r\ra', 'a\n\na'),
+        ]
+
        def run_shell(args):
            stdout, stderr, error = Popen.run(
                args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@ -2067,15 +2082,18 @@ Line 1
            assert not error
            return stdout

-        # Test escaping
-        assert run_shell(['echo', 'test"&']) == '"test""&"\n'
-        assert run_shell(['echo', '%CMDCMDLINE:~-1%&']) == '"%CMDCMDLINE:~-1%&"\n'
-        assert run_shell(['echo', 'a\nb']) == '"a"\n"b"\n'
-        assert run_shell(['echo', '"']) == '""""\n'
-        assert run_shell(['echo', '\\']) == '\\\n'
-        # Test if delayed expansion is disabled
-        assert run_shell(['echo', '^!']) == '"^!"\n'
-        assert run_shell('echo "^!"') == '"^!"\n'
+        for argument in tests:
+            if isinstance(argument, str):
+                expected = argument
+            else:
+                argument, expected = argument
+
+            args = [sys.executable, '-c', 'import sys; print(end=sys.argv[1])', argument, 'end']
+            assert run_shell(args) == expected
+
+            escaped = shell_quote(argument, shell=True)
+            args = f'{sys.executable} -c "import sys; print(end=sys.argv[1])" {escaped} end'
+            assert run_shell(args) == expected


 if __name__ == '__main__':
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -2236,10 +2236,10 @@ class InfoExtractor:
                    # contains EXT-X-STREAM-INF tag which references AUDIO
                    # rendition group but does not have CODECS and despite
                    # referencing an audio group it represents a complete
-                    # (with audio and video) format. So, for such cases we will
-                    # ignore references to rendition groups and treat them
-                    # as complete formats.
-                    if audio_group_id and codecs and f.get('vcodec') != 'none':
+                    # (with audio and video) format. But we can't know that
+                    # at this point so assume it's video only.
+                    # It can be resolved at later stage.
+                    if audio_group_id and f.get('vcodec') != 'none':
                        audio_group = groups.get(audio_group_id)
                        if audio_group and audio_group[0].get('URI'):
                            # TODO: update acodec for audio only formats with
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@ -24,11 +24,15 @@ class CrunchyrollBaseIE(InfoExtractor):
    _BASE_URL = 'https://www.crunchyroll.com'
    _API_BASE = 'https://api.crunchyroll.com'
    _NETRC_MACHINE = 'crunchyroll'
+    _REFRESH_TOKEN = None
    _AUTH_HEADERS = None
+    _AUTH_EXPIRY = None
    _API_ENDPOINT = None
-    _BASIC_AUTH = None
+    _BASIC_AUTH = 'Basic ' + base64.b64encode(':'.join((
+        't-kdgp2h8c3jub8fn0fq',
+        'yfLDfMfrYvKXh4JXS1LEI2cCqu1v5Wan',
+    )).encode()).decode()
    _IS_PREMIUM = None
-    _CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
    _LOCALE_LOOKUP = {
        'ar': 'ar-SA',
        'de': 'de-DE',
@ -43,69 +47,74 @@ class CrunchyrollBaseIE(InfoExtractor):
        'hi': 'hi-IN',
    }

-    @property
-    def is_logged_in(self):
-        return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
+    def _set_auth_info(self, response):
+        CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
+        CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': response['token_type'] + ' ' + response['access_token']}
+        CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)
+
+    def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
+        try:  # TODO: Add impersonation support here
+            return self._download_json(
+                f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
+                headers=headers, data=urlencode_postdata(data))
+        except ExtractorError as error:
+            if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
+                raise
+            raise ExtractorError(
+                'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
+                'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
+                'and your browser\'s User-Agent (with --user-agent)', expected=True)

    def _perform_login(self, username, password):
-        if self.is_logged_in:
+        if not CrunchyrollBaseIE._REFRESH_TOKEN:
+            CrunchyrollBaseIE._REFRESH_TOKEN = self.cache.load(self._NETRC_MACHINE, username)
+        if CrunchyrollBaseIE._REFRESH_TOKEN:
            return

-        upsell_response = self._download_json(
-            f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
-            query={
-                'sess_id': 1,
-                'device_id': 'whatvalueshouldbeforweb',
-                'device_type': 'com.crunchyroll.static',
-                'access_token': 'giKq5eY27ny3cqz',
-                'referer': f'{self._BASE_URL}/welcome/login'
-            })
-        if upsell_response['code'] != 'ok':
-            raise ExtractorError('Could not get session id')
-        session_id = upsell_response['data']['session_id']
-
-        login_response = self._download_json(
-            f'{self._API_BASE}/login.1.json', None, 'Logging in',
-            data=urlencode_postdata({
-                'account': username,
-                'password': password,
-                'session_id': session_id
-            }))
-        if login_response['code'] != 'ok':
-            raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
-        if not self.is_logged_in:
-            raise ExtractorError('Login succeeded but did not set etp_rt cookie')
-
-    def _update_auth(self):
-        if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
-            return
-
-        if not CrunchyrollBaseIE._BASIC_AUTH:
-            cx_api_param = self._CLIENT_ID[self.is_logged_in]
-            self.write_debug(f'Using cxApiParam={cx_api_param}')
-            CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
-
-        auth_headers = {'Authorization': CrunchyrollBaseIE._BASIC_AUTH}
-        if self.is_logged_in:
-            grant_type = 'etp_rt_cookie'
-        else:
-            grant_type = 'client_id'
-            auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
        try:
-            auth_response = self._download_json(
-                f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
-                headers=auth_headers, data=f'grant_type={grant_type}'.encode())
+            login_response = self._request_token(
+                headers={'Authorization': self._BASIC_AUTH}, data={
+                    'username': username,
+                    'password': password,
+                    'grant_type': 'password',
+                    'scope': 'offline_access',
+                }, note='Logging in', errnote='Failed to log in')
        except ExtractorError as error:
-            if isinstance(error.cause, HTTPError) and error.cause.status == 403:
-                raise ExtractorError(
-                    'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
-                    'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
-                    'and your browser\'s User-Agent (with --user-agent)', expected=True)
+            if isinstance(error.cause, HTTPError) and error.cause.status == 401:
+                raise ExtractorError('Invalid username and/or password', expected=True)
            raise

-        CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(auth_response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
-        CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
-        CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
+        CrunchyrollBaseIE._REFRESH_TOKEN = login_response['refresh_token']
+        self.cache.store(self._NETRC_MACHINE, username, CrunchyrollBaseIE._REFRESH_TOKEN)
+        self._set_auth_info(login_response)
+
+    def _update_auth(self):
+        if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_EXPIRY > time_seconds():
+            return
+
+        auth_headers = {'Authorization': self._BASIC_AUTH}
+        if CrunchyrollBaseIE._REFRESH_TOKEN:
+            data = {
+                'refresh_token': CrunchyrollBaseIE._REFRESH_TOKEN,
+                'grant_type': 'refresh_token',
+                'scope': 'offline_access',
+            }
+        else:
+            data = {'grant_type': 'client_id'}
+            auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
+        try:
+            auth_response = self._request_token(auth_headers, data)
+        except ExtractorError as error:
+            username, password = self._get_login_info()
+            if not username or not isinstance(error.cause, HTTPError) or error.cause.status != 400:
+                raise
+            self.to_screen('Refresh token has expired. Re-logging in')
+            CrunchyrollBaseIE._REFRESH_TOKEN = None
+            self.cache.store(self._NETRC_MACHINE, username, None)
+            self._perform_login(username, password)
+            return
+
+        self._set_auth_info(auth_response)

    def _locale_from_language(self, language):
        config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
@ -168,7 +177,8 @@ class CrunchyrollBaseIE(InfoExtractor):
        self._update_auth()
        stream_response = self._download_json(
            f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
-            display_id, note='Downloading stream info', headers=CrunchyrollBaseIE._AUTH_HEADERS)
+            display_id, note='Downloading stream info', errnote='Failed to download stream info',
+            headers=CrunchyrollBaseIE._AUTH_HEADERS)

        available_formats = {'': ('', '', stream_response['url'])}
        for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
@ -383,9 +393,9 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):

        if not self._IS_PREMIUM and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
            message = f'This {object_type} is for premium members only'
-            if self.is_logged_in:
+            if CrunchyrollBaseIE._REFRESH_TOKEN:
                raise ExtractorError(message, expected=True)
-            self.raise_login_required(message)
+            self.raise_login_required(message, method='password')

        result['formats'], result['subtitles'] = self._extract_stream(internal_id)

@ -575,9 +585,9 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):

        if not self._IS_PREMIUM and response.get('isPremiumOnly'):
            message = f'This {response.get("type") or "media"} is for premium members only'
-            if self.is_logged_in:
+            if CrunchyrollBaseIE._REFRESH_TOKEN:
                raise ExtractorError(message, expected=True)
-            self.raise_login_required(message)
+            self.raise_login_required(message, method='password')

        result = self._transform_music_response(response)
        result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@ -1638,16 +1638,14 @@ def get_filesystem_encoding():
    return encoding if encoding is not None else 'utf-8'


-_WINDOWS_QUOTE_TRANS = str.maketrans({'"': '\\"', '\\': '\\\\'})
+_WINDOWS_QUOTE_TRANS = str.maketrans({'"': R'\"'})
 _CMD_QUOTE_TRANS = str.maketrans({
    # Keep quotes balanced by replacing them with `""` instead of `\\"`
    '"': '""',
-    # Requires a variable `=` containing `"^\n\n"` (set in `utils.Popen`)
+    # These require an env-variable `=` containing `"^\n\n"` (set in `utils.Popen`)
    # `=` should be unique since variables containing `=` cannot be set using cmd
    '\n': '%=%',
-    # While we are only required to escape backslashes immediately before quotes,
-    # we instead escape all of 'em anyways to be consistent
-    '\\': '\\\\',
+    '\r': '%=%',
    # Use zero length variable replacement so `%` doesn't get expanded
    # `cd` is always set as long as extensions are enabled (`/E:ON` in `utils.Popen`)
    '%': '%%cd:~,%',
@ -1656,19 +1654,14 @@ _CMD_QUOTE_TRANS = str.maketrans({

 def shell_quote(args, *, shell=False):
    args = list(variadic(args))
-    if any(isinstance(item, bytes) for item in args):
-        deprecation_warning('Passing bytes to utils.shell_quote is deprecated')
-        encoding = get_filesystem_encoding()
-        for index, item in enumerate(args):
-            if isinstance(item, bytes):
-                args[index] = item.decode(encoding)

    if compat_os_name != 'nt':
        return shlex.join(args)

    trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS
    return ' '.join(
-        s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII) else s.translate(trans).join('""')
+        s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII)
+        else re.sub(r'(\\+)("|$)', r'\1\1\2', s).translate(trans).join('""')
        for s in args)
Author	SHA1	Message	Date
Dāvis	c4af8cc403	Merge `6cdde78019` into `64766459e3`	2024-04-28 02:53:29 +05:30
Simon Sawicki	64766459e3	[core/windows] Improve shell quoting and tests (#9802 ) Authored by: Grub4K	2024-04-27 10:37:26 +02:00
bashonly	89f535e265	[ci] Fix `curl-cffi` installation (Bugfix for `02483bea1c`) Authored by: bashonly	2024-04-22 20:36:01 +00:00
bashonly	ff38a011d5	[ie/crunchyroll] Fix auth and remove cookies support (#9749 ) Closes #9745 Authored by: bashonly	2024-04-21 22:41:40 +00:00
Dāvis Mosāns	6cdde78019	Assume M3U streams with audio rendition group to be video only There can be streams that contain reference to seperate audio stream and they don't contain audio itself.	2024-04-01 22:25:54 +03:00