2023-07-15 06:25:23 -04:00
|
|
|
import collections
|
2023-07-15 05:00:08 -04:00
|
|
|
import random
|
2023-07-15 06:25:23 -04:00
|
|
|
import urllib.parse
|
|
|
|
import urllib.request
|
|
|
|
|
|
|
|
from ._utils import remove_start
|
2023-07-15 05:00:08 -04:00
|
|
|
|
|
|
|
|
|
|
|
def random_user_agent():
|
|
|
|
_USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
|
|
|
|
_CHROME_VERSIONS = (
|
|
|
|
'90.0.4430.212',
|
|
|
|
'90.0.4430.24',
|
|
|
|
'90.0.4430.70',
|
|
|
|
'90.0.4430.72',
|
|
|
|
'90.0.4430.85',
|
|
|
|
'90.0.4430.93',
|
|
|
|
'91.0.4472.101',
|
|
|
|
'91.0.4472.106',
|
|
|
|
'91.0.4472.114',
|
|
|
|
'91.0.4472.124',
|
|
|
|
'91.0.4472.164',
|
|
|
|
'91.0.4472.19',
|
|
|
|
'91.0.4472.77',
|
|
|
|
'92.0.4515.107',
|
|
|
|
'92.0.4515.115',
|
|
|
|
'92.0.4515.131',
|
|
|
|
'92.0.4515.159',
|
|
|
|
'92.0.4515.43',
|
|
|
|
'93.0.4556.0',
|
|
|
|
'93.0.4577.15',
|
|
|
|
'93.0.4577.63',
|
|
|
|
'93.0.4577.82',
|
|
|
|
'94.0.4606.41',
|
|
|
|
'94.0.4606.54',
|
|
|
|
'94.0.4606.61',
|
|
|
|
'94.0.4606.71',
|
|
|
|
'94.0.4606.81',
|
|
|
|
'94.0.4606.85',
|
|
|
|
'95.0.4638.17',
|
|
|
|
'95.0.4638.50',
|
|
|
|
'95.0.4638.54',
|
|
|
|
'95.0.4638.69',
|
|
|
|
'95.0.4638.74',
|
|
|
|
'96.0.4664.18',
|
|
|
|
'96.0.4664.45',
|
|
|
|
'96.0.4664.55',
|
|
|
|
'96.0.4664.93',
|
|
|
|
'97.0.4692.20',
|
|
|
|
)
|
|
|
|
return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
|
|
|
|
|
|
|
|
|
2023-07-15 06:25:23 -04:00
|
|
|
class HTTPHeaderDict(collections.UserDict, dict):
|
|
|
|
"""
|
|
|
|
Store and access keys case-insensitively.
|
|
|
|
The constructor can take multiple dicts, in which keys in the latter are prioritised.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
super().__init__()
|
|
|
|
for dct in args:
|
|
|
|
if dct is not None:
|
|
|
|
self.update(dct)
|
|
|
|
self.update(kwargs)
|
|
|
|
|
|
|
|
def __setitem__(self, key, value):
|
|
|
|
super().__setitem__(key.title(), str(value))
|
|
|
|
|
|
|
|
def __getitem__(self, key):
|
|
|
|
return super().__getitem__(key.title())
|
|
|
|
|
|
|
|
def __delitem__(self, key):
|
|
|
|
super().__delitem__(key.title())
|
|
|
|
|
|
|
|
def __contains__(self, key):
|
|
|
|
return super().__contains__(key.title() if isinstance(key, str) else key)
|
|
|
|
|
|
|
|
|
|
|
|
std_headers = HTTPHeaderDict({
|
2023-07-15 05:00:08 -04:00
|
|
|
'User-Agent': random_user_agent(),
|
|
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
|
|
'Accept-Language': 'en-us,en;q=0.5',
|
|
|
|
'Sec-Fetch-Mode': 'navigate',
|
2023-07-15 06:25:23 -04:00
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
def clean_proxies(proxies: dict, headers: HTTPHeaderDict):
|
|
|
|
req_proxy = headers.pop('Ytdl-Request-Proxy', None)
|
|
|
|
if req_proxy:
|
|
|
|
proxies.clear() # XXX: compat: Ytdl-Request-Proxy takes preference over everything, including NO_PROXY
|
|
|
|
proxies['all'] = req_proxy
|
|
|
|
for proxy_key, proxy_url in proxies.items():
|
|
|
|
if proxy_url == '__noproxy__':
|
|
|
|
proxies[proxy_key] = None
|
|
|
|
continue
|
|
|
|
if proxy_key == 'no': # special case
|
|
|
|
continue
|
|
|
|
if proxy_url is not None:
|
|
|
|
# Ensure proxies without a scheme are http.
|
2023-07-27 10:56:02 -04:00
|
|
|
try:
|
|
|
|
proxy_scheme = urllib.request._parse_proxy(proxy_url)[0]
|
|
|
|
except ValueError:
|
|
|
|
# Ignore invalid proxy URLs. Sometimes these may be introduced through environment
|
|
|
|
# variables unrelated to proxy settings - e.g. Colab `COLAB_LANGUAGE_SERVER_PROXY`.
|
|
|
|
# If the proxy is going to be used, the Request Handler proxy validation will handle it.
|
|
|
|
continue
|
2023-07-15 06:25:23 -04:00
|
|
|
if proxy_scheme is None:
|
|
|
|
proxies[proxy_key] = 'http://' + remove_start(proxy_url, '//')
|
|
|
|
|
|
|
|
replace_scheme = {
|
|
|
|
'socks5': 'socks5h', # compat: socks5 was treated as socks5h
|
|
|
|
'socks': 'socks4' # compat: non-standard
|
|
|
|
}
|
|
|
|
if proxy_scheme in replace_scheme:
|
|
|
|
proxies[proxy_key] = urllib.parse.urlunparse(
|
|
|
|
urllib.parse.urlparse(proxy_url)._replace(scheme=replace_scheme[proxy_scheme]))
|
2023-07-15 05:00:08 -04:00
|
|
|
|
|
|
|
|
2023-07-15 06:25:23 -04:00
|
|
|
def clean_headers(headers: HTTPHeaderDict):
|
|
|
|
if 'Youtubedl-No-Compression' in headers: # compat
|
|
|
|
del headers['Youtubedl-No-Compression']
|
2023-07-15 05:00:08 -04:00
|
|
|
headers['Accept-Encoding'] = 'identity'
|