diff --git a/test/test_networking.py b/test/test_networking.py index dbe28359be..f0938ab91c 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -930,10 +930,10 @@ class TestRequestHandlerValidation: run_validation(handler, False, Request('http://', proxies={'http': None})) run_validation(handler, False, Request('http://'), proxies={'http': None}) - @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1']) + @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c']) @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) - def test_missing_proxy_scheme(self, handler, proxy_url): - run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': 'example.com'})) + def test_invalid_proxy_url(self, handler, proxy_url): + run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url})) @pytest.mark.parametrize('handler,extensions,fail', [ (handler_tests[0], extensions, fail) @@ -1126,9 +1126,11 @@ class TestYoutubeDLNetworking: ('http', '__noproxy__', None), ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'), ('https', 'example.com', 'http://example.com'), + ('https', '//example.com', 'http://example.com'), ('https', 'socks5://example.com', 'socks5h://example.com'), ('http', 'socks://example.com', 'socks4://example.com'), ('http', 'socks4://example.com', 'socks4://example.com'), + ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies ]) def test_clean_proxy(self, proxy_key, proxy_url, expected): # proxies should be cleaned in urlopen() diff --git a/yt_dlp/networking/common.py b/yt_dlp/networking/common.py index ab26a06282..3164df49b4 100644 --- a/yt_dlp/networking/common.py +++ b/yt_dlp/networking/common.py @@ -262,9 +262,13 @@ class RequestHandler(abc.ABC): # Skip proxy scheme checks continue - # Scheme-less proxies are not supported - if urllib.request._parse_proxy(proxy_url)[0] is None: - raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme') + try: + if urllib.request._parse_proxy(proxy_url)[0] is None: + # Scheme-less proxies are not supported + raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme') + except ValueError as e: + # parse_proxy may raise on some invalid proxy urls such as "/a/b/c" + raise UnsupportedRequest(f'Invalid proxy url "{proxy_url}": {e}') scheme = urllib.parse.urlparse(proxy_url).scheme.lower() if scheme not in self._SUPPORTED_PROXY_SCHEMES: diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py index ac355ddc85..e6515ec8ee 100644 --- a/yt_dlp/utils/networking.py +++ b/yt_dlp/utils/networking.py @@ -98,7 +98,13 @@ def clean_proxies(proxies: dict, headers: HTTPHeaderDict): continue if proxy_url is not None: # Ensure proxies without a scheme are http. - proxy_scheme = urllib.request._parse_proxy(proxy_url)[0] + try: + proxy_scheme = urllib.request._parse_proxy(proxy_url)[0] + except ValueError: + # Ignore invalid proxy URLs. Sometimes these may be introduced through environment + # variables unrelated to proxy settings - e.g. Colab `COLAB_LANGUAGE_SERVER_PROXY`. + # If the proxy is going to be used, the Request Handler proxy validation will handle it. + continue if proxy_scheme is None: proxies[proxy_key] = 'http://' + remove_start(proxy_url, '//')