mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-06-10 02:40:52 +00:00
Merge branch 'yt-dlp:master' into rls/arm-ubuntu-bump
This commit is contained in:
commit
1804055dd2
10
.github/banner.svg
vendored
10
.github/banner.svg
vendored
File diff suppressed because one or more lines are too long
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 15 KiB |
12
.github/workflows/build.yml
vendored
12
.github/workflows/build.yml
vendored
|
@ -125,11 +125,12 @@ jobs:
|
|||
sudo apt -y install zip pandoc man sed
|
||||
cat > ./requirements.txt << EOF
|
||||
python=3.10.*
|
||||
pyinstaller
|
||||
brotli-python
|
||||
EOF
|
||||
python devscripts/install_deps.py --print \
|
||||
--exclude brotli --exclude brotlicffi \
|
||||
--include secretstorage --include pyinstaller >> ./requirements.txt
|
||||
--include secretstorage >> ./requirements.txt
|
||||
mamba create -n build --file ./requirements.txt
|
||||
|
||||
- name: Prepare
|
||||
|
@ -249,7 +250,7 @@ jobs:
|
|||
run: |
|
||||
brew install coreutils
|
||||
python3 devscripts/install_deps.py --user -o --include build
|
||||
python3 devscripts/install_deps.py --print --include pyinstaller_macos > requirements.txt
|
||||
python3 devscripts/install_deps.py --print --include pyinstaller > requirements.txt
|
||||
# We need to ignore wheels otherwise we break universal2 builds
|
||||
python3 -m pip install -U --user --no-binary :all: -r requirements.txt
|
||||
# We need to fuse our own universal2 wheels for curl_cffi
|
||||
|
@ -321,7 +322,7 @@ jobs:
|
|||
run: |
|
||||
brew install coreutils
|
||||
python3 devscripts/install_deps.py --user -o --include build
|
||||
python3 devscripts/install_deps.py --user --include pyinstaller_macos --include curl_cffi
|
||||
python3 devscripts/install_deps.py --user --include pyinstaller --include curl_cffi
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
|
@ -468,8 +469,9 @@ jobs:
|
|||
- name: Make SHA2-SUMS files
|
||||
run: |
|
||||
cd ./artifact/
|
||||
sha256sum * > ../SHA2-256SUMS
|
||||
sha512sum * > ../SHA2-512SUMS
|
||||
# make sure SHA sums are also printed to stdout
|
||||
sha256sum * | tee ../SHA2-256SUMS
|
||||
sha512sum * | tee ../SHA2-512SUMS
|
||||
|
||||
- name: Make Update spec
|
||||
run: |
|
||||
|
|
27
README.md
27
README.md
|
@ -17,7 +17,7 @@
|
|||
</div>
|
||||
<!-- MANPAGE: END EXCLUDED SECTION -->
|
||||
|
||||
yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc). The main focus of this project is adding new features and patches while also keeping up to date with the original project
|
||||
yt-dlp is a feature-rich command-line audio/video downloader with support for [thousands of sites](supportedsites.md). The project is a fork of [youtube-dl](https://github.com/ytdl-org/youtube-dl) based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc).
|
||||
|
||||
<!-- MANPAGE: MOVE "USAGE AND OPTIONS" SECTION HERE -->
|
||||
|
||||
|
@ -1472,9 +1472,9 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `
|
|||
- `width`: Width of the video, if known
|
||||
- `height`: Height of the video, if known
|
||||
- `aspect_ratio`: Aspect ratio of the video, if known
|
||||
- `tbr`: Average bitrate of audio and video in KBit/s
|
||||
- `abr`: Average audio bitrate in KBit/s
|
||||
- `vbr`: Average video bitrate in KBit/s
|
||||
- `tbr`: Average bitrate of audio and video in [kbps](## "1000 bits/sec")
|
||||
- `abr`: Average audio bitrate in [kbps](## "1000 bits/sec")
|
||||
- `vbr`: Average video bitrate in [kbps](## "1000 bits/sec")
|
||||
- `asr`: Audio sampling rate in Hertz
|
||||
- `fps`: Frame rate
|
||||
- `audio_channels`: The number of audio channels
|
||||
|
@ -1499,7 +1499,7 @@ Any string comparison may be prefixed with negation `!` in order to produce an o
|
|||
|
||||
**Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
|
||||
|
||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
|
||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 kbps. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
|
||||
|
||||
Format selectors can also be grouped using parentheses; e.g. `-f "(mp4,webm)[height<480]"` will download the best pre-merged mp4 and webm formats with a height lower than 480.
|
||||
|
||||
|
@ -1531,10 +1531,10 @@ The available fields are:
|
|||
- `fps`: Framerate of video
|
||||
- `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `HLG` > `SDR`)
|
||||
- `channels`: The number of audio channels
|
||||
- `tbr`: Total average bitrate in KBit/s
|
||||
- `vbr`: Average video bitrate in KBit/s
|
||||
- `abr`: Average audio bitrate in KBit/s
|
||||
- `br`: Average bitrate in KBit/s, `tbr`/`vbr`/`abr`
|
||||
- `tbr`: Total average bitrate in [kbps](## "1000 bits/sec")
|
||||
- `vbr`: Average video bitrate in [kbps](## "1000 bits/sec")
|
||||
- `abr`: Average audio bitrate in [kbps](## "1000 bits/sec")
|
||||
- `br`: Average bitrate in [kbps](## "1000 bits/sec"), `tbr`/`vbr`/`abr`
|
||||
- `asr`: Audio sample rate in Hz
|
||||
|
||||
**Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names.
|
||||
|
@ -1805,9 +1805,12 @@ The following extractors use this feature:
|
|||
* `max_comments`: Maximum number of comments to extract - default is `120`
|
||||
|
||||
#### tiktok
|
||||
* `api_hostname`: Hostname to use for mobile API requests, e.g. `api-h2.tiktokv.com`
|
||||
* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1`
|
||||
* `manifest_app_version`: Numeric app version to call mobile APIs with, e.g. `221`
|
||||
* `api_hostname`: Hostname to use for mobile API calls, e.g. `api22-normal-c-alisg.tiktokv.com`
|
||||
* `app_name`: Default app name to use with mobile API calls, e.g. `trill`
|
||||
* `app_version`: Default app version to use with mobile API calls - should be set along with `manifest_app_version`, e.g. `34.1.2`
|
||||
* `manifest_app_version`: Default numeric app version to use with mobile API calls, e.g. `2023401020`
|
||||
* `aid`: Default app ID to use with API calls, e.g. `1180`
|
||||
* `app_info`: One or more app info strings in the format of `<iid>/[app_name]/[app_version]/[manifest_app_version]/[aid]`, where `iid` is the unique app install ID. `iid` is the only required value; all other values and their `/` separators can be omitted, e.g. `tiktok:app_info=1234567890123456789` or `tiktok:app_info=123,456/trill///1180,789//34.0.1/340001`
|
||||
|
||||
#### rokfinchannel
|
||||
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
||||
|
|
|
@ -28,7 +28,7 @@ def main():
|
|||
}],
|
||||
version_info={
|
||||
'version': VERSION,
|
||||
'description': 'A youtube-dl fork with additional features and patches',
|
||||
'description': 'A feature-rich command-line audio/video downloader',
|
||||
'comments': 'Official repository: <https://github.com/yt-dlp/yt-dlp>',
|
||||
'product_name': 'yt-dlp',
|
||||
'product_version': VERSION,
|
||||
|
|
|
@ -126,5 +126,9 @@
|
|||
"when": "4ce57d3b873c2887814cbec03d029533e82f7db5",
|
||||
"short": "[ie] Support multi-period MPD streams (#6654)",
|
||||
"authors": ["alard", "pukkandan"]
|
||||
},
|
||||
{
|
||||
"action": "remove",
|
||||
"when": "22e4dfacb61f62dfbb3eb41b31c7b69ba1059b80"
|
||||
}
|
||||
]
|
||||
|
|
|
@ -10,6 +10,8 @@ import argparse
|
|||
import re
|
||||
import subprocess
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from devscripts.tomlparse import parse_toml
|
||||
from devscripts.utils import read_file
|
||||
|
||||
|
@ -17,17 +19,23 @@ from devscripts.utils import read_file
|
|||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description='Install dependencies for yt-dlp')
|
||||
parser.add_argument(
|
||||
'input', nargs='?', metavar='TOMLFILE', default='pyproject.toml', help='Input file (default: %(default)s)')
|
||||
'input', nargs='?', metavar='TOMLFILE', default=Path(__file__).parent.parent / 'pyproject.toml',
|
||||
help='input file (default: %(default)s)')
|
||||
parser.add_argument(
|
||||
'-e', '--exclude', metavar='DEPENDENCY', action='append', help='Exclude a dependency')
|
||||
'-e', '--exclude', metavar='DEPENDENCY', action='append',
|
||||
help='exclude a dependency')
|
||||
parser.add_argument(
|
||||
'-i', '--include', metavar='GROUP', action='append', help='Include an optional dependency group')
|
||||
'-i', '--include', metavar='GROUP', action='append',
|
||||
help='include an optional dependency group')
|
||||
parser.add_argument(
|
||||
'-o', '--only-optional', action='store_true', help='Only install optional dependencies')
|
||||
'-o', '--only-optional', action='store_true',
|
||||
help='only install optional dependencies')
|
||||
parser.add_argument(
|
||||
'-p', '--print', action='store_true', help='Only print a requirements.txt to stdout')
|
||||
'-p', '--print', action='store_true',
|
||||
help='only print requirements to stdout')
|
||||
parser.add_argument(
|
||||
'-u', '--user', action='store_true', help='Install with pip as --user')
|
||||
'-u', '--user', action='store_true',
|
||||
help='install with pip as --user')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
|
@ -37,24 +45,16 @@ def main():
|
|||
optional_groups = project_table['optional-dependencies']
|
||||
excludes = args.exclude or []
|
||||
|
||||
deps = []
|
||||
targets = []
|
||||
if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group
|
||||
deps.extend(project_table['dependencies'])
|
||||
targets.extend(project_table['dependencies'])
|
||||
if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group
|
||||
deps.extend(optional_groups['default'])
|
||||
|
||||
def name(dependency):
|
||||
return re.match(r'[\w-]+', dependency)[0].lower()
|
||||
|
||||
target_map = {name(dep): dep for dep in deps}
|
||||
targets.extend(optional_groups['default'])
|
||||
|
||||
for include in filter(None, map(optional_groups.get, args.include or [])):
|
||||
target_map.update(zip(map(name, include), include))
|
||||
targets.extend(include)
|
||||
|
||||
for exclude in map(name, excludes):
|
||||
target_map.pop(exclude, None)
|
||||
|
||||
targets = list(target_map.values())
|
||||
targets = [t for t in targets if re.match(r'[\w-]+', t).group(0).lower() not in excludes]
|
||||
|
||||
if args.print:
|
||||
for target in targets:
|
||||
|
|
|
@ -24,7 +24,7 @@ PREFIX = r'''%yt-dlp(1)
|
|||
|
||||
# NAME
|
||||
|
||||
yt\-dlp \- A youtube-dl fork with additional features and patches
|
||||
yt\-dlp \- A feature\-rich command\-line audio/video downloader
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED!
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime
|
||||
import datetime as dt
|
||||
import json
|
||||
import re
|
||||
|
||||
|
@ -115,9 +115,9 @@ def parse_value(data: str, index: int):
|
|||
for func in [
|
||||
int,
|
||||
float,
|
||||
datetime.time.fromisoformat,
|
||||
datetime.date.fromisoformat,
|
||||
datetime.datetime.fromisoformat,
|
||||
dt.time.fromisoformat,
|
||||
dt.date.fromisoformat,
|
||||
dt.datetime.fromisoformat,
|
||||
{'true': True, 'false': False}.get,
|
||||
]:
|
||||
try:
|
||||
|
@ -179,7 +179,7 @@ def main():
|
|||
data = file.read()
|
||||
|
||||
def default(obj):
|
||||
if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)):
|
||||
if isinstance(obj, (dt.date, dt.time, dt.datetime)):
|
||||
return obj.isoformat()
|
||||
|
||||
print(json.dumps(parse_toml(data), default=default))
|
||||
|
|
|
@ -9,15 +9,15 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|||
|
||||
import argparse
|
||||
import contextlib
|
||||
import datetime as dt
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from devscripts.utils import read_version, run_process, write_file
|
||||
|
||||
|
||||
def get_new_version(version, revision):
|
||||
if not version:
|
||||
version = datetime.now(timezone.utc).strftime('%Y.%m.%d')
|
||||
version = dt.datetime.now(dt.timezone.utc).strftime('%Y.%m.%d')
|
||||
|
||||
if revision:
|
||||
assert revision.isdecimal(), 'Revision must be a number'
|
||||
|
|
|
@ -10,7 +10,7 @@ maintainers = [
|
|||
{name = "bashonly", email = "bashonly@protonmail.com"},
|
||||
{name = "coletdjnz", email = "coletdjnz@protonmail.com"},
|
||||
]
|
||||
description = "A youtube-dl fork with additional features and patches"
|
||||
description = "A feature-rich command-line audio/video downloader"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.8"
|
||||
keywords = [
|
||||
|
@ -69,8 +69,10 @@ dev = [
|
|||
"isort",
|
||||
"pytest",
|
||||
]
|
||||
pyinstaller = ["pyinstaller>=6.3"]
|
||||
pyinstaller_macos = ["pyinstaller==5.13.2"] # needed for curl_cffi builds
|
||||
pyinstaller = [
|
||||
"pyinstaller>=6.3; sys_platform!='darwin'",
|
||||
"pyinstaller==5.13.2; sys_platform=='darwin'", # needed for curl_cffi
|
||||
]
|
||||
py2exe = ["py2exe>=0.12"]
|
||||
|
||||
[project.urls]
|
||||
|
|
|
@ -183,7 +183,7 @@ class TestFormatSelection(unittest.TestCase):
|
|||
]
|
||||
|
||||
info_dict = _make_result(formats)
|
||||
ydl = YDL({'format': 'best'})
|
||||
ydl = YDL({'format': 'best', 'format_sort': ['abr', 'ext']})
|
||||
ydl.sort_formats(info_dict)
|
||||
ydl.process_ie_result(copy.deepcopy(info_dict))
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
|
@ -195,7 +195,7 @@ class TestFormatSelection(unittest.TestCase):
|
|||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
self.assertEqual(downloaded['format_id'], 'mp3-64')
|
||||
|
||||
ydl = YDL({'prefer_free_formats': True})
|
||||
ydl = YDL({'prefer_free_formats': True, 'format_sort': ['abr', 'ext']})
|
||||
ydl.sort_formats(info_dict)
|
||||
ydl.process_ie_result(copy.deepcopy(info_dict))
|
||||
downloaded = ydl.downloaded_info_dicts[0]
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import datetime as dt
|
||||
import unittest
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from yt_dlp import cookies
|
||||
from yt_dlp.cookies import (
|
||||
|
@ -138,7 +138,7 @@ class TestCookies(unittest.TestCase):
|
|||
self.assertEqual(cookie.name, 'foo')
|
||||
self.assertEqual(cookie.value, 'test%20%3Bcookie')
|
||||
self.assertFalse(cookie.secure)
|
||||
expected_expiration = datetime(2021, 6, 18, 21, 39, 19, tzinfo=timezone.utc)
|
||||
expected_expiration = dt.datetime(2021, 6, 18, 21, 39, 19, tzinfo=dt.timezone.utc)
|
||||
self.assertEqual(cookie.expires, int(expected_expiration.timestamp()))
|
||||
|
||||
def test_pbkdf2_sha1(self):
|
||||
|
|
442
test/test_traversal.py
Normal file
442
test/test_traversal.py
Normal file
|
@ -0,0 +1,442 @@
|
|||
import http.cookies
|
||||
import re
|
||||
import xml.etree.ElementTree
|
||||
|
||||
import pytest
|
||||
|
||||
from yt_dlp.utils import dict_get, int_or_none, str_or_none
|
||||
from yt_dlp.utils.traversal import traverse_obj
|
||||
|
||||
_TEST_DATA = {
|
||||
100: 100,
|
||||
1.2: 1.2,
|
||||
'str': 'str',
|
||||
'None': None,
|
||||
'...': ...,
|
||||
'urls': [
|
||||
{'index': 0, 'url': 'https://www.example.com/0'},
|
||||
{'index': 1, 'url': 'https://www.example.com/1'},
|
||||
],
|
||||
'data': (
|
||||
{'index': 2},
|
||||
{'index': 3},
|
||||
),
|
||||
'dict': {},
|
||||
}
|
||||
|
||||
|
||||
class TestTraversal:
|
||||
def test_dict_get(self):
|
||||
FALSE_VALUES = {
|
||||
'none': None,
|
||||
'false': False,
|
||||
'zero': 0,
|
||||
'empty_string': '',
|
||||
'empty_list': [],
|
||||
}
|
||||
d = {**FALSE_VALUES, 'a': 42}
|
||||
assert dict_get(d, 'a') == 42
|
||||
assert dict_get(d, 'b') is None
|
||||
assert dict_get(d, 'b', 42) == 42
|
||||
assert dict_get(d, ('a',)) == 42
|
||||
assert dict_get(d, ('b', 'a')) == 42
|
||||
assert dict_get(d, ('b', 'c', 'a', 'd')) == 42
|
||||
assert dict_get(d, ('b', 'c')) is None
|
||||
assert dict_get(d, ('b', 'c'), 42) == 42
|
||||
for key, false_value in FALSE_VALUES.items():
|
||||
assert dict_get(d, ('b', 'c', key)) is None
|
||||
assert dict_get(d, ('b', 'c', key), skip_false_values=False) == false_value
|
||||
|
||||
def test_traversal_base(self):
|
||||
assert traverse_obj(_TEST_DATA, ('str',)) == 'str', \
|
||||
'allow tuple path'
|
||||
assert traverse_obj(_TEST_DATA, ['str']) == 'str', \
|
||||
'allow list path'
|
||||
assert traverse_obj(_TEST_DATA, (value for value in ("str",))) == 'str', \
|
||||
'allow iterable path'
|
||||
assert traverse_obj(_TEST_DATA, 'str') == 'str', \
|
||||
'single items should be treated as a path'
|
||||
assert traverse_obj(_TEST_DATA, 100) == 100, \
|
||||
'allow int path'
|
||||
assert traverse_obj(_TEST_DATA, 1.2) == 1.2, \
|
||||
'allow float path'
|
||||
assert traverse_obj(_TEST_DATA, None) == _TEST_DATA, \
|
||||
'`None` should not perform any modification'
|
||||
|
||||
def test_traversal_ellipsis(self):
|
||||
assert traverse_obj(_TEST_DATA, ...) == [x for x in _TEST_DATA.values() if x not in (None, {})], \
|
||||
'`...` should give all non discarded values'
|
||||
assert traverse_obj(_TEST_DATA, ('urls', 0, ...)) == list(_TEST_DATA['urls'][0].values()), \
|
||||
'`...` selection for dicts should select all values'
|
||||
assert traverse_obj(_TEST_DATA, (..., ..., 'url')) == ['https://www.example.com/0', 'https://www.example.com/1'], \
|
||||
'nested `...` queries should work'
|
||||
assert traverse_obj(_TEST_DATA, (..., ..., 'index')) == list(range(4)), \
|
||||
'`...` query result should be flattened'
|
||||
assert traverse_obj(iter(range(4)), ...) == list(range(4)), \
|
||||
'`...` should accept iterables'
|
||||
|
||||
def test_traversal_function(self):
|
||||
filter_func = lambda x, y: x == 'urls' and isinstance(y, list)
|
||||
assert traverse_obj(_TEST_DATA, filter_func) == [_TEST_DATA['urls']], \
|
||||
'function as query key should perform a filter based on (key, value)'
|
||||
assert traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)) == ['str'], \
|
||||
'exceptions in the query function should be catched'
|
||||
assert traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0) == [0, 2], \
|
||||
'function key should accept iterables'
|
||||
# Wrong function signature should raise (debug mode)
|
||||
with pytest.raises(Exception):
|
||||
traverse_obj(_TEST_DATA, lambda a: ...)
|
||||
with pytest.raises(Exception):
|
||||
traverse_obj(_TEST_DATA, lambda a, b, c: ...)
|
||||
|
||||
def test_traversal_set(self):
|
||||
# transformation/type, like `expected_type`
|
||||
assert traverse_obj(_TEST_DATA, (..., {str.upper}, )) == ['STR'], \
|
||||
'Function in set should be a transformation'
|
||||
assert traverse_obj(_TEST_DATA, (..., {str})) == ['str'], \
|
||||
'Type in set should be a type filter'
|
||||
assert traverse_obj(_TEST_DATA, (..., {str, int})) == [100, 'str'], \
|
||||
'Multiple types in set should be a type filter'
|
||||
assert traverse_obj(_TEST_DATA, {dict}) == _TEST_DATA, \
|
||||
'A single set should be wrapped into a path'
|
||||
assert traverse_obj(_TEST_DATA, (..., {str.upper})) == ['STR'], \
|
||||
'Transformation function should not raise'
|
||||
expected = [x for x in map(str_or_none, _TEST_DATA.values()) if x is not None]
|
||||
assert traverse_obj(_TEST_DATA, (..., {str_or_none})) == expected, \
|
||||
'Function in set should be a transformation'
|
||||
assert traverse_obj(_TEST_DATA, ('fail', {lambda _: 'const'})) == 'const', \
|
||||
'Function in set should always be called'
|
||||
# Sets with length < 1 or > 1 not including only types should raise
|
||||
with pytest.raises(Exception):
|
||||
traverse_obj(_TEST_DATA, set())
|
||||
with pytest.raises(Exception):
|
||||
traverse_obj(_TEST_DATA, {str.upper, str})
|
||||
|
||||
def test_traversal_slice(self):
|
||||
_SLICE_DATA = [0, 1, 2, 3, 4]
|
||||
|
||||
assert traverse_obj(_TEST_DATA, ('dict', slice(1))) is None, \
|
||||
'slice on a dictionary should not throw'
|
||||
assert traverse_obj(_SLICE_DATA, slice(1)) == _SLICE_DATA[:1], \
|
||||
'slice key should apply slice to sequence'
|
||||
assert traverse_obj(_SLICE_DATA, slice(1, 2)) == _SLICE_DATA[1:2], \
|
||||
'slice key should apply slice to sequence'
|
||||
assert traverse_obj(_SLICE_DATA, slice(1, 4, 2)) == _SLICE_DATA[1:4:2], \
|
||||
'slice key should apply slice to sequence'
|
||||
|
||||
def test_traversal_alternatives(self):
|
||||
assert traverse_obj(_TEST_DATA, 'fail', 'str') == 'str', \
|
||||
'multiple `paths` should be treated as alternative paths'
|
||||
assert traverse_obj(_TEST_DATA, 'str', 100) == 'str', \
|
||||
'alternatives should exit early'
|
||||
assert traverse_obj(_TEST_DATA, 'fail', 'fail') is None, \
|
||||
'alternatives should return `default` if exhausted'
|
||||
assert traverse_obj(_TEST_DATA, (..., 'fail'), 100) == 100, \
|
||||
'alternatives should track their own branching return'
|
||||
assert traverse_obj(_TEST_DATA, ('dict', ...), ('data', ...)) == list(_TEST_DATA['data']), \
|
||||
'alternatives on empty objects should search further'
|
||||
|
||||
def test_traversal_branching_nesting(self):
|
||||
assert traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')) == ['https://www.example.com/0'], \
|
||||
'tuple as key should be treated as branches'
|
||||
assert traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')) == ['https://www.example.com/0'], \
|
||||
'list as key should be treated as branches'
|
||||
assert traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))) == ['https://www.example.com/0'], \
|
||||
'double nesting in path should be treated as paths'
|
||||
assert traverse_obj(['0', [1, 2]], [(0, 1), 0]) == [1], \
|
||||
'do not fail early on branching'
|
||||
expected = ['https://www.example.com/0', 'https://www.example.com/1']
|
||||
assert traverse_obj(_TEST_DATA, ('urls', ((0, ('fail', 'url')), (1, 'url')))) == expected, \
|
||||
'tripple nesting in path should be treated as branches'
|
||||
assert traverse_obj(_TEST_DATA, ('urls', ('fail', (..., 'url')))) == expected, \
|
||||
'ellipsis as branch path start gets flattened'
|
||||
|
||||
def test_traversal_dict(self):
|
||||
assert traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}) == {0: 100, 1: 1.2}, \
|
||||
'dict key should result in a dict with the same keys'
|
||||
expected = {0: 'https://www.example.com/0'}
|
||||
assert traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}) == expected, \
|
||||
'dict key should allow paths'
|
||||
expected = {0: ['https://www.example.com/0']}
|
||||
assert traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}) == expected, \
|
||||
'tuple in dict path should be treated as branches'
|
||||
assert traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}) == expected, \
|
||||
'double nesting in dict path should be treated as paths'
|
||||
expected = {0: ['https://www.example.com/1', 'https://www.example.com/0']}
|
||||
assert traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}) == expected, \
|
||||
'tripple nesting in dict path should be treated as branches'
|
||||
assert traverse_obj(_TEST_DATA, {0: 'fail'}) == {}, \
|
||||
'remove `None` values when top level dict key fails'
|
||||
assert traverse_obj(_TEST_DATA, {0: 'fail'}, default=...) == {0: ...}, \
|
||||
'use `default` if key fails and `default`'
|
||||
assert traverse_obj(_TEST_DATA, {0: 'dict'}) == {}, \
|
||||
'remove empty values when dict key'
|
||||
assert traverse_obj(_TEST_DATA, {0: 'dict'}, default=...) == {0: ...}, \
|
||||
'use `default` when dict key and `default`'
|
||||
assert traverse_obj(_TEST_DATA, {0: {0: 'fail'}}) == {}, \
|
||||
'remove empty values when nested dict key fails'
|
||||
assert traverse_obj(None, {0: 'fail'}) == {}, \
|
||||
'default to dict if pruned'
|
||||
assert traverse_obj(None, {0: 'fail'}, default=...) == {0: ...}, \
|
||||
'default to dict if pruned and default is given'
|
||||
assert traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=...) == {0: {0: ...}}, \
|
||||
'use nested `default` when nested dict key fails and `default`'
|
||||
assert traverse_obj(_TEST_DATA, {0: ('dict', ...)}) == {}, \
|
||||
'remove key if branch in dict key not successful'
|
||||
|
||||
def test_traversal_default(self):
|
||||
_DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
|
||||
|
||||
assert traverse_obj(_DEFAULT_DATA, 'fail') is None, \
|
||||
'default value should be `None`'
|
||||
assert traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=...) == ..., \
|
||||
'chained fails should result in default'
|
||||
assert traverse_obj(_DEFAULT_DATA, 'None', 'int') == 0, \
|
||||
'should not short cirquit on `None`'
|
||||
assert traverse_obj(_DEFAULT_DATA, 'fail', default=1) == 1, \
|
||||
'invalid dict key should result in `default`'
|
||||
assert traverse_obj(_DEFAULT_DATA, 'None', default=1) == 1, \
|
||||
'`None` is a deliberate sentinel and should become `default`'
|
||||
assert traverse_obj(_DEFAULT_DATA, ('list', 10)) is None, \
|
||||
'`IndexError` should result in `default`'
|
||||
assert traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=1) == 1, \
|
||||
'if branched but not successful return `default` if defined, not `[]`'
|
||||
assert traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=None) is None, \
|
||||
'if branched but not successful return `default` even if `default` is `None`'
|
||||
assert traverse_obj(_DEFAULT_DATA, (..., 'fail')) == [], \
|
||||
'if branched but not successful return `[]`, not `default`'
|
||||
assert traverse_obj(_DEFAULT_DATA, ('list', ...)) == [], \
|
||||
'if branched but object is empty return `[]`, not `default`'
|
||||
assert traverse_obj(None, ...) == [], \
|
||||
'if branched but object is `None` return `[]`, not `default`'
|
||||
assert traverse_obj({0: None}, (0, ...)) == [], \
|
||||
'if branched but state is `None` return `[]`, not `default`'
|
||||
|
||||
@pytest.mark.parametrize('path', [
|
||||
('fail', ...),
|
||||
(..., 'fail'),
|
||||
100 * ('fail',) + (...,),
|
||||
(...,) + 100 * ('fail',),
|
||||
])
|
||||
def test_traversal_branching(self, path):
|
||||
assert traverse_obj({}, path) == [], \
|
||||
'if branched but state is `None`, return `[]` (not `default`)'
|
||||
assert traverse_obj({}, 'fail', path) == [], \
|
||||
'if branching in last alternative and previous did not match, return `[]` (not `default`)'
|
||||
assert traverse_obj({0: 'x'}, 0, path) == 'x', \
|
||||
'if branching in last alternative and previous did match, return single value'
|
||||
assert traverse_obj({0: 'x'}, path, 0) == 'x', \
|
||||
'if branching in first alternative and non-branching path does match, return single value'
|
||||
assert traverse_obj({}, path, 'fail') is None, \
|
||||
'if branching in first alternative and non-branching path does not match, return `default`'
|
||||
|
||||
def test_traversal_expected_type(self):
|
||||
_EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
|
||||
|
||||
assert traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str) == 'str', \
|
||||
'accept matching `expected_type` type'
|
||||
assert traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int) is None, \
|
||||
'reject non matching `expected_type` type'
|
||||
assert traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)) == '0', \
|
||||
'transform type using type function'
|
||||
assert traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0) is None, \
|
||||
'wrap expected_type fuction in try_call'
|
||||
assert traverse_obj(_EXPECTED_TYPE_DATA, ..., expected_type=str) == ['str'], \
|
||||
'eliminate items that expected_type fails on'
|
||||
assert traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int) == {0: 100}, \
|
||||
'type as expected_type should filter dict values'
|
||||
assert traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none) == {0: '100', 1: '1.2'}, \
|
||||
'function as expected_type should transform dict values'
|
||||
assert traverse_obj(_TEST_DATA, ({0: 1.2}, 0, {int_or_none}), expected_type=int) == 1, \
|
||||
'expected_type should not filter non final dict values'
|
||||
assert traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int) == {0: {0: 100}}, \
|
||||
'expected_type should transform deep dict values'
|
||||
assert traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(...)) == [{0: ...}, {0: ...}], \
|
||||
'expected_type should transform branched dict values'
|
||||
assert traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int) == [4], \
|
||||
'expected_type regression for type matching in tuple branching'
|
||||
assert traverse_obj(_TEST_DATA, ['data', ...], expected_type=int) == [], \
|
||||
'expected_type regression for type matching in dict result'
|
||||
|
||||
def test_traversal_get_all(self):
|
||||
_GET_ALL_DATA = {'key': [0, 1, 2]}
|
||||
|
||||
assert traverse_obj(_GET_ALL_DATA, ('key', ...), get_all=False) == 0, \
|
||||
'if not `get_all`, return only first matching value'
|
||||
assert traverse_obj(_GET_ALL_DATA, ..., get_all=False) == [0, 1, 2], \
|
||||
'do not overflatten if not `get_all`'
|
||||
|
||||
def test_traversal_casesense(self):
|
||||
_CASESENSE_DATA = {
|
||||
'KeY': 'value0',
|
||||
0: {
|
||||
'KeY': 'value1',
|
||||
0: {'KeY': 'value2'},
|
||||
},
|
||||
}
|
||||
|
||||
assert traverse_obj(_CASESENSE_DATA, 'key') is None, \
|
||||
'dict keys should be case sensitive unless `casesense`'
|
||||
assert traverse_obj(_CASESENSE_DATA, 'keY', casesense=False) == 'value0', \
|
||||
'allow non matching key case if `casesense`'
|
||||
assert traverse_obj(_CASESENSE_DATA, [0, ('keY',)], casesense=False) == ['value1'], \
|
||||
'allow non matching key case in branch if `casesense`'
|
||||
assert traverse_obj(_CASESENSE_DATA, [0, ([0, 'keY'],)], casesense=False) == ['value2'], \
|
||||
'allow non matching key case in branch path if `casesense`'
|
||||
|
||||
def test_traversal_traverse_string(self):
|
||||
_TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
|
||||
|
||||
assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)) is None, \
|
||||
'do not traverse into string if not `traverse_string`'
|
||||
assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0), traverse_string=True) == 's', \
|
||||
'traverse into string if `traverse_string`'
|
||||
assert traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1), traverse_string=True) == '.', \
|
||||
'traverse into converted data if `traverse_string`'
|
||||
assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', ...), traverse_string=True) == 'str', \
|
||||
'`...` should result in string (same value) if `traverse_string`'
|
||||
assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)), traverse_string=True) == 'sr', \
|
||||
'`slice` should result in string if `traverse_string`'
|
||||
assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"), traverse_string=True) == 'str', \
|
||||
'function should result in string if `traverse_string`'
|
||||
assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)), traverse_string=True) == ['s', 'r'], \
|
||||
'branching should result in list if `traverse_string`'
|
||||
assert traverse_obj({}, (0, ...), traverse_string=True) == [], \
|
||||
'branching should result in list if `traverse_string`'
|
||||
assert traverse_obj({}, (0, lambda x, y: True), traverse_string=True) == [], \
|
||||
'branching should result in list if `traverse_string`'
|
||||
assert traverse_obj({}, (0, slice(1)), traverse_string=True) == [], \
|
||||
'branching should result in list if `traverse_string`'
|
||||
|
||||
def test_traversal_re(self):
|
||||
mobj = re.fullmatch(r'0(12)(?P<group>3)(4)?', '0123')
|
||||
assert traverse_obj(mobj, ...) == [x for x in mobj.groups() if x is not None], \
|
||||
'`...` on a `re.Match` should give its `groups()`'
|
||||
assert traverse_obj(mobj, lambda k, _: k in (0, 2)) == ['0123', '3'], \
|
||||
'function on a `re.Match` should give groupno, value starting at 0'
|
||||
assert traverse_obj(mobj, 'group') == '3', \
|
||||
'str key on a `re.Match` should give group with that name'
|
||||
assert traverse_obj(mobj, 2) == '3', \
|
||||
'int key on a `re.Match` should give group with that name'
|
||||
assert traverse_obj(mobj, 'gRoUp', casesense=False) == '3', \
|
||||
'str key on a `re.Match` should respect casesense'
|
||||
assert traverse_obj(mobj, 'fail') is None, \
|
||||
'failing str key on a `re.Match` should return `default`'
|
||||
assert traverse_obj(mobj, 'gRoUpS', casesense=False) is None, \
|
||||
'failing str key on a `re.Match` should return `default`'
|
||||
assert traverse_obj(mobj, 8) is None, \
|
||||
'failing int key on a `re.Match` should return `default`'
|
||||
assert traverse_obj(mobj, lambda k, _: k in (0, 'group')) == ['0123', '3'], \
|
||||
'function on a `re.Match` should give group name as well'
|
||||
|
||||
def test_traversal_xml_etree(self):
|
||||
etree = xml.etree.ElementTree.fromstring('''<?xml version="1.0"?>
|
||||
<data>
|
||||
<country name="Liechtenstein">
|
||||
<rank>1</rank>
|
||||
<year>2008</year>
|
||||
<gdppc>141100</gdppc>
|
||||
<neighbor name="Austria" direction="E"/>
|
||||
<neighbor name="Switzerland" direction="W"/>
|
||||
</country>
|
||||
<country name="Singapore">
|
||||
<rank>4</rank>
|
||||
<year>2011</year>
|
||||
<gdppc>59900</gdppc>
|
||||
<neighbor name="Malaysia" direction="N"/>
|
||||
</country>
|
||||
<country name="Panama">
|
||||
<rank>68</rank>
|
||||
<year>2011</year>
|
||||
<gdppc>13600</gdppc>
|
||||
<neighbor name="Costa Rica" direction="W"/>
|
||||
<neighbor name="Colombia" direction="E"/>
|
||||
</country>
|
||||
</data>''')
|
||||
assert traverse_obj(etree, '') == etree, \
|
||||
'empty str key should return the element itself'
|
||||
assert traverse_obj(etree, 'country') == list(etree), \
|
||||
'str key should lead all children with that tag name'
|
||||
assert traverse_obj(etree, ...) == list(etree), \
|
||||
'`...` as key should return all children'
|
||||
assert traverse_obj(etree, lambda _, x: x[0].text == '4') == [etree[1]], \
|
||||
'function as key should get element as value'
|
||||
assert traverse_obj(etree, lambda i, _: i == 1) == [etree[1]], \
|
||||
'function as key should get index as key'
|
||||
assert traverse_obj(etree, 0) == etree[0], \
|
||||
'int key should return the nth child'
|
||||
expected = ['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia']
|
||||
assert traverse_obj(etree, './/neighbor/@name') == expected, \
|
||||
'`@<attribute>` at end of path should give that attribute'
|
||||
assert traverse_obj(etree, '//neighbor/@fail') == [None, None, None, None, None], \
|
||||
'`@<nonexistant>` at end of path should give `None`'
|
||||
assert traverse_obj(etree, ('//neighbor/@', 2)) == {'name': 'Malaysia', 'direction': 'N'}, \
|
||||
'`@` should give the full attribute dict'
|
||||
assert traverse_obj(etree, '//year/text()') == ['2008', '2011', '2011'], \
|
||||
'`text()` at end of path should give the inner text'
|
||||
assert traverse_obj(etree, '//*[@direction]/@direction') == ['E', 'W', 'N', 'W', 'E'], \
|
||||
'full Python xpath features should be supported'
|
||||
assert traverse_obj(etree, (0, '@name')) == 'Liechtenstein', \
|
||||
'special transformations should act on current element'
|
||||
assert traverse_obj(etree, ('country', 0, ..., 'text()', {int_or_none})) == [1, 2008, 141100], \
|
||||
'special transformations should act on current element'
|
||||
|
||||
def test_traversal_unbranching(self):
|
||||
assert traverse_obj(_TEST_DATA, [(100, 1.2), all]) == [100, 1.2], \
|
||||
'`all` should give all results as list'
|
||||
assert traverse_obj(_TEST_DATA, [(100, 1.2), any]) == 100, \
|
||||
'`any` should give the first result'
|
||||
assert traverse_obj(_TEST_DATA, [100, all]) == [100], \
|
||||
'`all` should give list if non branching'
|
||||
assert traverse_obj(_TEST_DATA, [100, any]) == 100, \
|
||||
'`any` should give single item if non branching'
|
||||
assert traverse_obj(_TEST_DATA, [('dict', 'None', 100), all]) == [100], \
|
||||
'`all` should filter `None` and empty dict'
|
||||
assert traverse_obj(_TEST_DATA, [('dict', 'None', 100), any]) == 100, \
|
||||
'`any` should filter `None` and empty dict'
|
||||
assert traverse_obj(_TEST_DATA, [{
|
||||
'all': [('dict', 'None', 100, 1.2), all],
|
||||
'any': [('dict', 'None', 100, 1.2), any],
|
||||
}]) == {'all': [100, 1.2], 'any': 100}, \
|
||||
'`all`/`any` should apply to each dict path separately'
|
||||
assert traverse_obj(_TEST_DATA, [{
|
||||
'all': [('dict', 'None', 100, 1.2), all],
|
||||
'any': [('dict', 'None', 100, 1.2), any],
|
||||
}], get_all=False) == {'all': [100, 1.2], 'any': 100}, \
|
||||
'`all`/`any` should apply to dict regardless of `get_all`'
|
||||
assert traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, {float}]) is None, \
|
||||
'`all` should reset branching status'
|
||||
assert traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), any, {float}]) is None, \
|
||||
'`any` should reset branching status'
|
||||
assert traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, ..., {float}]) == [1.2], \
|
||||
'`all` should allow further branching'
|
||||
assert traverse_obj(_TEST_DATA, [('dict', 'None', 'urls', 'data'), any, ..., 'index']) == [0, 1], \
|
||||
'`any` should allow further branching'
|
||||
|
||||
def test_traversal_morsel(self):
|
||||
values = {
|
||||
'expires': 'a',
|
||||
'path': 'b',
|
||||
'comment': 'c',
|
||||
'domain': 'd',
|
||||
'max-age': 'e',
|
||||
'secure': 'f',
|
||||
'httponly': 'g',
|
||||
'version': 'h',
|
||||
'samesite': 'i',
|
||||
}
|
||||
morsel = http.cookies.Morsel()
|
||||
morsel.set('item_key', 'item_value', 'coded_value')
|
||||
morsel.update(values)
|
||||
values['key'] = 'item_key'
|
||||
values['value'] = 'item_value'
|
||||
|
||||
for key, value in values.items():
|
||||
assert traverse_obj(morsel, key) == value, \
|
||||
'Morsel should provide access to all values'
|
||||
assert traverse_obj(morsel, ...) == list(values.values()), \
|
||||
'`...` should yield all values'
|
||||
assert traverse_obj(morsel, lambda k, v: True) == list(values.values()), \
|
||||
'function key should yield all values'
|
||||
assert traverse_obj(morsel, [(None,), any]) == morsel, \
|
||||
'Morsel should not be implicitly changed to dict on usage'
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import unittest
|
||||
import warnings
|
||||
|
@ -45,7 +44,6 @@ from yt_dlp.utils import (
|
|||
determine_ext,
|
||||
determine_file_encoding,
|
||||
dfxp2srt,
|
||||
dict_get,
|
||||
encode_base_n,
|
||||
encode_compat_str,
|
||||
encodeFilename,
|
||||
|
@ -106,13 +104,11 @@ from yt_dlp.utils import (
|
|||
sanitize_url,
|
||||
shell_quote,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
strip_jsonp,
|
||||
strip_or_none,
|
||||
subtitles_filename,
|
||||
timeconvert,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
|
@ -755,28 +751,6 @@ class TestUtil(unittest.TestCase):
|
|||
self.assertRaises(
|
||||
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
|
||||
|
||||
def test_dict_get(self):
|
||||
FALSE_VALUES = {
|
||||
'none': None,
|
||||
'false': False,
|
||||
'zero': 0,
|
||||
'empty_string': '',
|
||||
'empty_list': [],
|
||||
}
|
||||
d = FALSE_VALUES.copy()
|
||||
d['a'] = 42
|
||||
self.assertEqual(dict_get(d, 'a'), 42)
|
||||
self.assertEqual(dict_get(d, 'b'), None)
|
||||
self.assertEqual(dict_get(d, 'b', 42), 42)
|
||||
self.assertEqual(dict_get(d, ('a', )), 42)
|
||||
self.assertEqual(dict_get(d, ('b', 'a', )), 42)
|
||||
self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
|
||||
self.assertEqual(dict_get(d, ('b', 'c', )), None)
|
||||
self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
|
||||
for key, false_value in FALSE_VALUES.items():
|
||||
self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
|
||||
self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
|
||||
|
||||
def test_merge_dicts(self):
|
||||
self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
|
||||
self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
|
||||
|
@ -2039,359 +2013,6 @@ Line 1
|
|||
warnings.simplefilter('ignore')
|
||||
self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
|
||||
|
||||
def test_traverse_obj(self):
|
||||
_TEST_DATA = {
|
||||
100: 100,
|
||||
1.2: 1.2,
|
||||
'str': 'str',
|
||||
'None': None,
|
||||
'...': ...,
|
||||
'urls': [
|
||||
{'index': 0, 'url': 'https://www.example.com/0'},
|
||||
{'index': 1, 'url': 'https://www.example.com/1'},
|
||||
],
|
||||
'data': (
|
||||
{'index': 2},
|
||||
{'index': 3},
|
||||
),
|
||||
'dict': {},
|
||||
}
|
||||
|
||||
# Test base functionality
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
|
||||
msg='allow tuple path')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
|
||||
msg='allow list path')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
|
||||
msg='allow iterable path')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
|
||||
msg='single items should be treated as a path')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
|
||||
|
||||
# Test Ellipsis behavior
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ...),
|
||||
(item for item in _TEST_DATA.values() if item not in (None, {})),
|
||||
msg='`...` should give all non discarded values')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, ...)), _TEST_DATA['urls'][0].values(),
|
||||
msg='`...` selection for dicts should select all values')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, (..., ..., 'url')),
|
||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||
msg='nested `...` queries should work')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, (..., ..., 'index')), range(4),
|
||||
msg='`...` query result should be flattened')
|
||||
self.assertEqual(traverse_obj(iter(range(4)), ...), list(range(4)),
|
||||
msg='`...` should accept iterables')
|
||||
|
||||
# Test function as key
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
|
||||
[_TEST_DATA['urls']],
|
||||
msg='function as query key should perform a filter based on (key, value)')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'},
|
||||
msg='exceptions in the query function should be catched')
|
||||
self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
|
||||
msg='function key should accept iterables')
|
||||
if __debug__:
|
||||
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
|
||||
traverse_obj(_TEST_DATA, lambda a: ...)
|
||||
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
|
||||
traverse_obj(_TEST_DATA, lambda a, b, c: ...)
|
||||
|
||||
# Test set as key (transformation/type, like `expected_type`)
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, (..., {str.upper}, )), ['STR'],
|
||||
msg='Function in set should be a transformation')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, (..., {str})), ['str'],
|
||||
msg='Type in set should be a type filter')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {dict}), _TEST_DATA,
|
||||
msg='A single set should be wrapped into a path')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, (..., {str.upper})), ['STR'],
|
||||
msg='Transformation function should not raise')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, (..., {str_or_none})),
|
||||
[item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
|
||||
msg='Function in set should be a transformation')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('fail', {lambda _: 'const'})), 'const',
|
||||
msg='Function in set should always be called')
|
||||
if __debug__:
|
||||
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
|
||||
traverse_obj(_TEST_DATA, set())
|
||||
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
|
||||
traverse_obj(_TEST_DATA, {str.upper, str})
|
||||
|
||||
# Test `slice` as a key
|
||||
_SLICE_DATA = [0, 1, 2, 3, 4]
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
|
||||
msg='slice on a dictionary should not throw')
|
||||
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
|
||||
msg='slice key should apply slice to sequence')
|
||||
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
|
||||
msg='slice key should apply slice to sequence')
|
||||
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
|
||||
msg='slice key should apply slice to sequence')
|
||||
|
||||
# Test alternative paths
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
|
||||
msg='multiple `paths` should be treated as alternative paths')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
|
||||
msg='alternatives should exit early')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
|
||||
msg='alternatives should return `default` if exhausted')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, (..., 'fail'), 100), 100,
|
||||
msg='alternatives should track their own branching return')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', ...), ('data', ...)), list(_TEST_DATA['data']),
|
||||
msg='alternatives on empty objects should search further')
|
||||
|
||||
# Test branch and path nesting
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
|
||||
msg='tuple as key should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
|
||||
msg='list as key should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
|
||||
msg='double nesting in path should be treated as paths')
|
||||
self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
|
||||
msg='do not fail early on branching')
|
||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
|
||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||
msg='tripple nesting in path should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (..., 'url')))),
|
||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||
msg='ellipsis as branch path start gets flattened')
|
||||
|
||||
# Test dictionary as key
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
|
||||
msg='dict key should result in a dict with the same keys')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
|
||||
{0: 'https://www.example.com/0'},
|
||||
msg='dict key should allow paths')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
|
||||
{0: ['https://www.example.com/0']},
|
||||
msg='tuple in dict path should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
|
||||
{0: ['https://www.example.com/0']},
|
||||
msg='double nesting in dict path should be treated as paths')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
|
||||
{0: ['https://www.example.com/1', 'https://www.example.com/0']},
|
||||
msg='tripple nesting in dict path should be treated as branches')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
|
||||
msg='remove `None` values when top level dict key fails')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=...), {0: ...},
|
||||
msg='use `default` if key fails and `default`')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
|
||||
msg='remove empty values when dict key')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=...), {0: ...},
|
||||
msg='use `default` when dict key and `default`')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
|
||||
msg='remove empty values when nested dict key fails')
|
||||
self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
|
||||
msg='default to dict if pruned')
|
||||
self.assertEqual(traverse_obj(None, {0: 'fail'}, default=...), {0: ...},
|
||||
msg='default to dict if pruned and default is given')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=...), {0: {0: ...}},
|
||||
msg='use nested `default` when nested dict key fails and `default`')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', ...)}), {},
|
||||
msg='remove key if branch in dict key not successful')
|
||||
|
||||
# Testing default parameter behavior
|
||||
_DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
|
||||
msg='default value should be `None`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=...), ...,
|
||||
msg='chained fails should result in default')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
|
||||
msg='should not short cirquit on `None`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
|
||||
msg='invalid dict key should result in `default`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
|
||||
msg='`None` is a deliberate sentinel and should become `default`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
|
||||
msg='`IndexError` should result in `default`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=1), 1,
|
||||
msg='if branched but not successful return `default` if defined, not `[]`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=None), None,
|
||||
msg='if branched but not successful return `default` even if `default` is `None`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail')), [],
|
||||
msg='if branched but not successful return `[]`, not `default`')
|
||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', ...)), [],
|
||||
msg='if branched but object is empty return `[]`, not `default`')
|
||||
self.assertEqual(traverse_obj(None, ...), [],
|
||||
msg='if branched but object is `None` return `[]`, not `default`')
|
||||
self.assertEqual(traverse_obj({0: None}, (0, ...)), [],
|
||||
msg='if branched but state is `None` return `[]`, not `default`')
|
||||
|
||||
branching_paths = [
|
||||
('fail', ...),
|
||||
(..., 'fail'),
|
||||
100 * ('fail',) + (...,),
|
||||
(...,) + 100 * ('fail',),
|
||||
]
|
||||
for branching_path in branching_paths:
|
||||
self.assertEqual(traverse_obj({}, branching_path), [],
|
||||
msg='if branched but state is `None`, return `[]` (not `default`)')
|
||||
self.assertEqual(traverse_obj({}, 'fail', branching_path), [],
|
||||
msg='if branching in last alternative and previous did not match, return `[]` (not `default`)')
|
||||
self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x',
|
||||
msg='if branching in last alternative and previous did match, return single value')
|
||||
self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x',
|
||||
msg='if branching in first alternative and non-branching path does match, return single value')
|
||||
self.assertEqual(traverse_obj({}, branching_path, 'fail'), None,
|
||||
msg='if branching in first alternative and non-branching path does not match, return `default`')
|
||||
|
||||
# Testing expected_type behavior
|
||||
_EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
|
||||
'str', msg='accept matching `expected_type` type')
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
|
||||
None, msg='reject non matching `expected_type` type')
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
|
||||
'0', msg='transform type using type function')
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
|
||||
None, msg='wrap expected_type fuction in try_call')
|
||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, ..., expected_type=str),
|
||||
['str'], msg='eliminate items that expected_type fails on')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int),
|
||||
{0: 100}, msg='type as expected_type should filter dict values')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
|
||||
{0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, {int_or_none}), expected_type=int),
|
||||
1, msg='expected_type should not filter non final dict values')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
|
||||
{0: {0: 100}}, msg='expected_type should transform deep dict values')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(...)),
|
||||
[{0: ...}, {0: ...}], msg='expected_type should transform branched dict values')
|
||||
self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int),
|
||||
[4], msg='expected_type regression for type matching in tuple branching')
|
||||
self.assertEqual(traverse_obj(_TEST_DATA, ['data', ...], expected_type=int),
|
||||
[], msg='expected_type regression for type matching in dict result')
|
||||
|
||||
# Test get_all behavior
|
||||
_GET_ALL_DATA = {'key': [0, 1, 2]}
|
||||
self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', ...), get_all=False), 0,
|
||||
msg='if not `get_all`, return only first matching value')
|
||||
self.assertEqual(traverse_obj(_GET_ALL_DATA, ..., get_all=False), [0, 1, 2],
|
||||
msg='do not overflatten if not `get_all`')
|
||||
|
||||
# Test casesense behavior
|
||||
_CASESENSE_DATA = {
|
||||
'KeY': 'value0',
|
||||
0: {
|
||||
'KeY': 'value1',
|
||||
0: {'KeY': 'value2'},
|
||||
},
|
||||
}
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
|
||||
msg='dict keys should be case sensitive unless `casesense`')
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
|
||||
casesense=False), 'value0',
|
||||
msg='allow non matching key case if `casesense`')
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
|
||||
casesense=False), ['value1'],
|
||||
msg='allow non matching key case in branch if `casesense`')
|
||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
|
||||
casesense=False), ['value2'],
|
||||
msg='allow non matching key case in branch path if `casesense`')
|
||||
|
||||
# Test traverse_string behavior
|
||||
_TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
|
||||
msg='do not traverse into string if not `traverse_string`')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
|
||||
traverse_string=True), 's',
|
||||
msg='traverse into string if `traverse_string`')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
|
||||
traverse_string=True), '.',
|
||||
msg='traverse into converted data if `traverse_string`')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', ...),
|
||||
traverse_string=True), 'str',
|
||||
msg='`...` should result in string (same value) if `traverse_string`')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
|
||||
traverse_string=True), 'sr',
|
||||
msg='`slice` should result in string if `traverse_string`')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"),
|
||||
traverse_string=True), 'str',
|
||||
msg='function should result in string if `traverse_string`')
|
||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
|
||||
traverse_string=True), ['s', 'r'],
|
||||
msg='branching should result in list if `traverse_string`')
|
||||
self.assertEqual(traverse_obj({}, (0, ...), traverse_string=True), [],
|
||||
msg='branching should result in list if `traverse_string`')
|
||||
self.assertEqual(traverse_obj({}, (0, lambda x, y: True), traverse_string=True), [],
|
||||
msg='branching should result in list if `traverse_string`')
|
||||
self.assertEqual(traverse_obj({}, (0, slice(1)), traverse_string=True), [],
|
||||
msg='branching should result in list if `traverse_string`')
|
||||
|
||||
# Test re.Match as input obj
|
||||
mobj = re.fullmatch(r'0(12)(?P<group>3)(4)?', '0123')
|
||||
self.assertEqual(traverse_obj(mobj, ...), [x for x in mobj.groups() if x is not None],
|
||||
msg='`...` on a `re.Match` should give its `groups()`')
|
||||
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
|
||||
msg='function on a `re.Match` should give groupno, value starting at 0')
|
||||
self.assertEqual(traverse_obj(mobj, 'group'), '3',
|
||||
msg='str key on a `re.Match` should give group with that name')
|
||||
self.assertEqual(traverse_obj(mobj, 2), '3',
|
||||
msg='int key on a `re.Match` should give group with that name')
|
||||
self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
|
||||
msg='str key on a `re.Match` should respect casesense')
|
||||
self.assertEqual(traverse_obj(mobj, 'fail'), None,
|
||||
msg='failing str key on a `re.Match` should return `default`')
|
||||
self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
|
||||
msg='failing str key on a `re.Match` should return `default`')
|
||||
self.assertEqual(traverse_obj(mobj, 8), None,
|
||||
msg='failing int key on a `re.Match` should return `default`')
|
||||
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
|
||||
msg='function on a `re.Match` should give group name as well')
|
||||
|
||||
# Test xml.etree.ElementTree.Element as input obj
|
||||
etree = xml.etree.ElementTree.fromstring('''<?xml version="1.0"?>
|
||||
<data>
|
||||
<country name="Liechtenstein">
|
||||
<rank>1</rank>
|
||||
<year>2008</year>
|
||||
<gdppc>141100</gdppc>
|
||||
<neighbor name="Austria" direction="E"/>
|
||||
<neighbor name="Switzerland" direction="W"/>
|
||||
</country>
|
||||
<country name="Singapore">
|
||||
<rank>4</rank>
|
||||
<year>2011</year>
|
||||
<gdppc>59900</gdppc>
|
||||
<neighbor name="Malaysia" direction="N"/>
|
||||
</country>
|
||||
<country name="Panama">
|
||||
<rank>68</rank>
|
||||
<year>2011</year>
|
||||
<gdppc>13600</gdppc>
|
||||
<neighbor name="Costa Rica" direction="W"/>
|
||||
<neighbor name="Colombia" direction="E"/>
|
||||
</country>
|
||||
</data>''')
|
||||
self.assertEqual(traverse_obj(etree, ''), etree,
|
||||
msg='empty str key should return the element itself')
|
||||
self.assertEqual(traverse_obj(etree, 'country'), list(etree),
|
||||
msg='str key should lead all children with that tag name')
|
||||
self.assertEqual(traverse_obj(etree, ...), list(etree),
|
||||
msg='`...` as key should return all children')
|
||||
self.assertEqual(traverse_obj(etree, lambda _, x: x[0].text == '4'), [etree[1]],
|
||||
msg='function as key should get element as value')
|
||||
self.assertEqual(traverse_obj(etree, lambda i, _: i == 1), [etree[1]],
|
||||
msg='function as key should get index as key')
|
||||
self.assertEqual(traverse_obj(etree, 0), etree[0],
|
||||
msg='int key should return the nth child')
|
||||
self.assertEqual(traverse_obj(etree, './/neighbor/@name'),
|
||||
['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia'],
|
||||
msg='`@<attribute>` at end of path should give that attribute')
|
||||
self.assertEqual(traverse_obj(etree, '//neighbor/@fail'), [None, None, None, None, None],
|
||||
msg='`@<nonexistant>` at end of path should give `None`')
|
||||
self.assertEqual(traverse_obj(etree, ('//neighbor/@', 2)), {'name': 'Malaysia', 'direction': 'N'},
|
||||
msg='`@` should give the full attribute dict')
|
||||
self.assertEqual(traverse_obj(etree, '//year/text()'), ['2008', '2011', '2011'],
|
||||
msg='`text()` at end of path should give the inner text')
|
||||
self.assertEqual(traverse_obj(etree, '//*[@direction]/@direction'), ['E', 'W', 'N', 'W', 'E'],
|
||||
msg='full Python xpath features should be supported')
|
||||
self.assertEqual(traverse_obj(etree, (0, '@name')), 'Liechtenstein',
|
||||
msg='special transformations should act on current element')
|
||||
self.assertEqual(traverse_obj(etree, ('country', 0, ..., 'text()', {int_or_none})), [1, 2008, 141100],
|
||||
msg='special transformations should act on current element')
|
||||
|
||||
def test_http_header_dict(self):
|
||||
headers = HTTPHeaderDict()
|
||||
headers['ytdl-test'] = b'0'
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import collections
|
||||
import contextlib
|
||||
import copy
|
||||
import datetime
|
||||
import datetime as dt
|
||||
import errno
|
||||
import fileinput
|
||||
import http.cookiejar
|
||||
|
@ -146,6 +146,7 @@ from .utils import (
|
|||
subtitles_filename,
|
||||
supports_terminal_sequences,
|
||||
system_identifier,
|
||||
filesize_from_tbr,
|
||||
timetuple_from_msec,
|
||||
to_high_limit_path,
|
||||
traverse_obj,
|
||||
|
@ -2628,7 +2629,7 @@ class YoutubeDL:
|
|||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||
# see http://bugs.python.org/issue1646728)
|
||||
with contextlib.suppress(ValueError, OverflowError, OSError):
|
||||
upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
|
||||
upload_date = dt.datetime.fromtimestamp(info_dict[ts_key], dt.timezone.utc)
|
||||
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
||||
|
||||
if not info_dict.get('release_year'):
|
||||
|
@ -2782,7 +2783,7 @@ class YoutubeDL:
|
|||
|
||||
get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
|
||||
if not get_from_start:
|
||||
info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
|
||||
info_dict['title'] += ' ' + dt.datetime.now().strftime('%Y-%m-%d %H:%M')
|
||||
if info_dict.get('is_live') and formats:
|
||||
formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
|
||||
if get_from_start and not formats:
|
||||
|
@ -2813,6 +2814,9 @@ class YoutubeDL:
|
|||
format['url'] = sanitize_url(format['url'])
|
||||
if format.get('ext') is None:
|
||||
format['ext'] = determine_ext(format['url']).lower()
|
||||
if format['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
|
||||
if format.get('acodec') is None:
|
||||
format['acodec'] = format['ext']
|
||||
if format.get('protocol') is None:
|
||||
format['protocol'] = determine_protocol(format)
|
||||
if format.get('resolution') is None:
|
||||
|
@ -2823,9 +2827,8 @@ class YoutubeDL:
|
|||
format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
|
||||
# For fragmented formats, "tbr" is often max bitrate and not average
|
||||
if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url'))
|
||||
and info_dict.get('duration') and format.get('tbr')
|
||||
and not format.get('filesize') and not format.get('filesize_approx')):
|
||||
format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
|
||||
format['filesize_approx'] = filesize_from_tbr(format.get('tbr'), info_dict.get('duration'))
|
||||
format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
|
||||
|
||||
# Safeguard against old/insecure infojson when using --load-info-json
|
||||
|
@ -3875,8 +3878,8 @@ class YoutubeDL:
|
|||
delim, (
|
||||
format_field(f, 'filesize', ' \t%s', func=format_bytes)
|
||||
or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
|
||||
or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
|
||||
None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
|
||||
or format_field(filesize_from_tbr(f.get('tbr'), info_dict.get('duration')), None,
|
||||
self._format_out('~\t%s', self.Styles.SUPPRESS), func=format_bytes)),
|
||||
format_field(f, 'tbr', '\t%dk', func=round),
|
||||
shorten_protocol_name(f.get('protocol', '')),
|
||||
delim,
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import base64
|
||||
import collections
|
||||
import contextlib
|
||||
import datetime as dt
|
||||
import glob
|
||||
import http.cookiejar
|
||||
import http.cookies
|
||||
|
@ -15,7 +16,6 @@ import sys
|
|||
import tempfile
|
||||
import time
|
||||
import urllib.request
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from enum import Enum, auto
|
||||
from hashlib import pbkdf2_hmac
|
||||
|
||||
|
@ -594,7 +594,7 @@ class DataParser:
|
|||
|
||||
|
||||
def _mac_absolute_time_to_posix(timestamp):
|
||||
return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
|
||||
return int((dt.datetime(2001, 1, 1, 0, 0, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=timestamp)).timestamp())
|
||||
|
||||
|
||||
def _parse_safari_cookies_header(data, logger):
|
||||
|
|
|
@ -491,7 +491,7 @@ class FFmpegFD(ExternalFD):
|
|||
if not self.params.get('verbose'):
|
||||
args += ['-hide_banner']
|
||||
|
||||
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[])
|
||||
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args', ...))
|
||||
|
||||
# These exists only for compatibility. Extractors should use
|
||||
# info_dict['downloader_options']['ffmpeg_args'] instead
|
||||
|
@ -615,6 +615,8 @@ class FFmpegFD(ExternalFD):
|
|||
else:
|
||||
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
|
||||
|
||||
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args_out', ...))
|
||||
|
||||
args += self._configuration_args(('_o1', '_o', ''))
|
||||
|
||||
args = [encodeArgument(opt) for opt in args]
|
||||
|
|
|
@ -590,6 +590,7 @@ from .facebook import (
|
|||
FacebookReelIE,
|
||||
FacebookAdsIE,
|
||||
)
|
||||
from .fathom import FathomIE
|
||||
from .fancode import (
|
||||
FancodeVodIE,
|
||||
FancodeLiveIE
|
||||
|
@ -989,6 +990,10 @@ from .lnkgo import (
|
|||
LnkGoIE,
|
||||
LnkIE,
|
||||
)
|
||||
from .loom import (
|
||||
LoomIE,
|
||||
LoomFolderIE,
|
||||
)
|
||||
from .lovehomeporn import LoveHomePornIE
|
||||
from .lrt import (
|
||||
LRTVODIE,
|
||||
|
@ -1750,6 +1755,7 @@ from .shahid import (
|
|||
ShahidIE,
|
||||
ShahidShowIE,
|
||||
)
|
||||
from .sharepoint import SharePointIE
|
||||
from .sharevideos import ShareVideosEmbedIE
|
||||
from .sibnet import SibnetEmbedIE
|
||||
from .shemaroome import ShemarooMeIE
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import functools
|
||||
import re
|
||||
from functools import partial
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
|
@ -349,7 +349,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
|||
r'(?P<title>.*)',
|
||||
]
|
||||
|
||||
return traverse_obj(patterns, (..., {partial(re.match, string=title)}, {
|
||||
return traverse_obj(patterns, (..., {functools.partial(re.match, string=title)}, {
|
||||
'season_number': ('season_number', {int_or_none}),
|
||||
'episode_number': ('episode_number', {int_or_none}),
|
||||
'episode': ((
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import datetime
|
||||
import datetime as dt
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
|
@ -71,9 +71,9 @@ class ATVAtIE(InfoExtractor):
|
|||
content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']}
|
||||
for id, content in enumerate(contentResource)]
|
||||
|
||||
time_of_request = datetime.datetime.now()
|
||||
not_before = time_of_request - datetime.timedelta(minutes=5)
|
||||
expire = time_of_request + datetime.timedelta(minutes=5)
|
||||
time_of_request = dt.datetime.now()
|
||||
not_before = time_of_request - dt.timedelta(minutes=5)
|
||||
expire = time_of_request + dt.timedelta(minutes=5)
|
||||
payload = {
|
||||
'content_ids': {
|
||||
content_id: content_ids,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import datetime
|
||||
import datetime as dt
|
||||
import hashlib
|
||||
import hmac
|
||||
|
||||
|
@ -12,7 +12,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
|||
|
||||
def _aws_execute_api(self, aws_dict, video_id, query=None):
|
||||
query = query or {}
|
||||
amz_date = datetime.datetime.now(datetime.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
|
||||
amz_date = dt.datetime.now(dt.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
|
||||
date = amz_date[:8]
|
||||
headers = {
|
||||
'Accept': 'application/json',
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from functools import partial
|
||||
import functools
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
|
@ -50,7 +50,7 @@ class BibelTVBaseIE(InfoExtractor):
|
|||
**traverse_obj(data, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {partial(int_or_none, scale=1000)}),
|
||||
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'timestamp': ('schedulingStart', {parse_iso8601}),
|
||||
'season_number': 'seasonNumber',
|
||||
'episode_number': 'episodeNumber',
|
||||
|
|
|
@ -3,6 +3,7 @@ import urllib.parse
|
|||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_iso8601,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
|
@ -11,8 +12,8 @@ from ..utils.traversal import traverse_obj
|
|||
|
||||
|
||||
class BoxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/?#]+)/file/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/?#]+)(?:/file/(?P<id>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
||||
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
||||
'info_dict': {
|
||||
|
@ -25,14 +26,36 @@ class BoxIE(InfoExtractor):
|
|||
'uploader_id': '235196876',
|
||||
},
|
||||
'params': {'skip_download': 'dash fragment too small'},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://utexas.app.box.com/s/2x6vanv85fdl8j2eqlcxmv0gp1wvps6e',
|
||||
'info_dict': {
|
||||
'id': '787379022466',
|
||||
'ext': 'mp4',
|
||||
'title': 'Webinar recording: Take the Leap!.mp4',
|
||||
'uploader': 'Patricia Mosele',
|
||||
'timestamp': 1615824864,
|
||||
'upload_date': '20210315',
|
||||
'uploader_id': '239068974',
|
||||
},
|
||||
'params': {'skip_download': 'dash fragment too small'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
shared_name, file_id = self._match_valid_url(url).groups()
|
||||
webpage = self._download_webpage(url, file_id)
|
||||
request_token = self._parse_json(self._search_regex(
|
||||
r'Box\.config\s*=\s*({.+?});', webpage,
|
||||
'Box config'), file_id)['requestToken']
|
||||
webpage = self._download_webpage(url, file_id or shared_name)
|
||||
|
||||
if not file_id:
|
||||
post_stream_data = self._search_json(
|
||||
r'Box\.postStreamData\s*=', webpage, 'Box post-stream data', shared_name)
|
||||
shared_item = traverse_obj(
|
||||
post_stream_data, ('/app-api/enduserapp/shared-item', {dict})) or {}
|
||||
if shared_item.get('itemType') != 'file':
|
||||
raise ExtractorError('The requested resource is not a file', expected=True)
|
||||
|
||||
file_id = str(shared_item['itemID'])
|
||||
|
||||
request_token = self._search_json(
|
||||
r'Box\.config\s*=', webpage, 'Box config', file_id)['requestToken']
|
||||
access_token = self._download_json(
|
||||
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||
'Downloading token JSON metadata',
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import functools
|
||||
import re
|
||||
from functools import partial
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
|
@ -115,9 +115,9 @@ class BundestagIE(InfoExtractor):
|
|||
note='Downloading metadata overlay', fatal=False,
|
||||
), {
|
||||
'title': (
|
||||
{partial(get_element_text_and_html_by_tag, 'h3')}, 0,
|
||||
{partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
|
||||
'description': ({partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
|
||||
{functools.partial(get_element_text_and_html_by_tag, 'h3')}, 0,
|
||||
{functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
|
||||
'description': ({functools.partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
|
||||
}))
|
||||
|
||||
return result
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import base64
|
||||
import codecs
|
||||
import datetime
|
||||
import datetime as dt
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
|
@ -134,7 +134,7 @@ class CDAIE(InfoExtractor):
|
|||
self._API_HEADERS['User-Agent'] = f'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})'
|
||||
|
||||
cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {}
|
||||
if cached_bearer.get('valid_until', 0) > datetime.datetime.now().timestamp() + 5:
|
||||
if cached_bearer.get('valid_until', 0) > dt.datetime.now().timestamp() + 5:
|
||||
self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}'
|
||||
return
|
||||
|
||||
|
@ -154,7 +154,7 @@ class CDAIE(InfoExtractor):
|
|||
})
|
||||
self.cache.store(self._BEARER_CACHE, username, {
|
||||
'token': token_res['access_token'],
|
||||
'valid_until': token_res['expires_in'] + datetime.datetime.now().timestamp(),
|
||||
'valid_until': token_res['expires_in'] + dt.datetime.now().timestamp(),
|
||||
})
|
||||
self._API_HEADERS['Authorization'] = f'Bearer {token_res["access_token"]}'
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@ from ..networking.exceptions import (
|
|||
IncompleteRead,
|
||||
network_exceptions,
|
||||
)
|
||||
from ..networking.impersonate import ImpersonateTarget
|
||||
from ..utils import (
|
||||
IDENTITY,
|
||||
JSON_LD_RE,
|
||||
|
@ -170,12 +171,12 @@ class InfoExtractor:
|
|||
Automatically calculated from width and height
|
||||
* dynamic_range The dynamic range of the video. One of:
|
||||
"SDR" (None), "HDR10", "HDR10+, "HDR12", "HLG, "DV"
|
||||
* tbr Average bitrate of audio and video in KBit/s
|
||||
* abr Average audio bitrate in KBit/s
|
||||
* tbr Average bitrate of audio and video in kbps (1000 bits/sec)
|
||||
* abr Average audio bitrate in kbps (1000 bits/sec)
|
||||
* acodec Name of the audio codec in use
|
||||
* asr Audio sampling rate in Hertz
|
||||
* audio_channels Number of audio channels
|
||||
* vbr Average video bitrate in KBit/s
|
||||
* vbr Average video bitrate in kbps (1000 bits/sec)
|
||||
* fps Frame rate
|
||||
* vcodec Name of the video codec in use
|
||||
* container Name of the container format
|
||||
|
@ -246,7 +247,8 @@ class InfoExtractor:
|
|||
* downloader_options A dictionary of downloader options
|
||||
(For internal use only)
|
||||
* http_chunk_size Chunk size for HTTP downloads
|
||||
* ffmpeg_args Extra arguments for ffmpeg downloader
|
||||
* ffmpeg_args Extra arguments for ffmpeg downloader (input)
|
||||
* ffmpeg_args_out Extra arguments for ffmpeg downloader (output)
|
||||
* is_dash_periods Whether the format is a result of merging
|
||||
multiple DASH periods.
|
||||
RTMP formats can also have the additional fields: page_url,
|
||||
|
@ -817,7 +819,7 @@ class InfoExtractor:
|
|||
else:
|
||||
return err.status in variadic(expected_status)
|
||||
|
||||
def _create_request(self, url_or_request, data=None, headers=None, query=None):
|
||||
def _create_request(self, url_or_request, data=None, headers=None, query=None, extensions=None):
|
||||
if isinstance(url_or_request, urllib.request.Request):
|
||||
self._downloader.deprecation_warning(
|
||||
'Passing a urllib.request.Request to _create_request() is deprecated. '
|
||||
|
@ -826,10 +828,11 @@ class InfoExtractor:
|
|||
elif not isinstance(url_or_request, Request):
|
||||
url_or_request = Request(url_or_request)
|
||||
|
||||
url_or_request.update(data=data, headers=headers, query=query)
|
||||
url_or_request.update(data=data, headers=headers, query=query, extensions=extensions)
|
||||
return url_or_request
|
||||
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
|
||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None,
|
||||
headers=None, query=None, expected_status=None, impersonate=None, require_impersonation=False):
|
||||
"""
|
||||
Return the response handle.
|
||||
|
||||
|
@ -860,8 +863,31 @@ class InfoExtractor:
|
|||
headers = (headers or {}).copy()
|
||||
headers.setdefault('X-Forwarded-For', self._x_forwarded_for_ip)
|
||||
|
||||
extensions = {}
|
||||
|
||||
if impersonate in (True, ''):
|
||||
impersonate = ImpersonateTarget()
|
||||
requested_targets = [
|
||||
t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t)
|
||||
for t in variadic(impersonate)
|
||||
] if impersonate else []
|
||||
|
||||
available_target = next(filter(self._downloader._impersonate_target_available, requested_targets), None)
|
||||
if available_target:
|
||||
extensions['impersonate'] = available_target
|
||||
elif requested_targets:
|
||||
message = 'The extractor is attempting impersonation, but '
|
||||
message += (
|
||||
'no impersonate target is available' if not str(impersonate)
|
||||
else f'none of these impersonate targets are available: "{", ".join(map(str, requested_targets))}"')
|
||||
info_msg = ('see https://github.com/yt-dlp/yt-dlp#impersonation '
|
||||
'for information on installing the required dependencies')
|
||||
if require_impersonation:
|
||||
raise ExtractorError(f'{message}; {info_msg}', expected=True)
|
||||
self.report_warning(f'{message}; if you encounter errors, then {info_msg}', only_once=True)
|
||||
|
||||
try:
|
||||
return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
|
||||
return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query, extensions))
|
||||
except network_exceptions as err:
|
||||
if isinstance(err, HTTPError):
|
||||
if self.__can_accept_status_code(err, expected_status):
|
||||
|
@ -880,13 +906,14 @@ class InfoExtractor:
|
|||
return False
|
||||
|
||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True,
|
||||
encoding=None, data=None, headers={}, query={}, expected_status=None):
|
||||
encoding=None, data=None, headers={}, query={}, expected_status=None,
|
||||
impersonate=None, require_impersonation=False):
|
||||
"""
|
||||
Return a tuple (page content as string, URL handle).
|
||||
|
||||
Arguments:
|
||||
url_or_request -- plain text URL as a string or
|
||||
a urllib.request.Request object
|
||||
a yt_dlp.networking.Request object
|
||||
video_id -- Video/playlist/item identifier (string)
|
||||
|
||||
Keyword arguments:
|
||||
|
@ -911,13 +938,22 @@ class InfoExtractor:
|
|||
returning True if it should be accepted
|
||||
Note that this argument does not affect success status codes (2xx)
|
||||
which are always accepted.
|
||||
impersonate -- the impersonate target. Can be any of the following entities:
|
||||
- an instance of yt_dlp.networking.impersonate.ImpersonateTarget
|
||||
- a string in the format of CLIENT[:OS]
|
||||
- a list or a tuple of CLIENT[:OS] strings or ImpersonateTarget instances
|
||||
- a boolean value; True means any impersonate target is sufficient
|
||||
require_impersonation -- flag to toggle whether the request should raise an error
|
||||
if impersonation is not possible (bool, default: False)
|
||||
"""
|
||||
|
||||
# Strip hashes from the URL (#1038)
|
||||
if isinstance(url_or_request, str):
|
||||
url_or_request = url_or_request.partition('#')[0]
|
||||
|
||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
|
||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data,
|
||||
headers=headers, query=query, expected_status=expected_status,
|
||||
impersonate=impersonate, require_impersonation=require_impersonation)
|
||||
if urlh is False:
|
||||
assert not fatal
|
||||
return False
|
||||
|
@ -1046,17 +1082,20 @@ class InfoExtractor:
|
|||
return getattr(ie, parser)(content, *args, **kwargs)
|
||||
|
||||
def download_handle(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None,
|
||||
impersonate=None, require_impersonation=False):
|
||||
res = self._download_webpage_handle(
|
||||
url_or_request, video_id, note=note, errnote=errnote, fatal=fatal, encoding=encoding,
|
||||
data=data, headers=headers, query=query, expected_status=expected_status)
|
||||
data=data, headers=headers, query=query, expected_status=expected_status,
|
||||
impersonate=impersonate, require_impersonation=require_impersonation)
|
||||
if res is False:
|
||||
return res
|
||||
content, urlh = res
|
||||
return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote), urlh
|
||||
|
||||
def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
||||
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None,
|
||||
impersonate=None, require_impersonation=False):
|
||||
if self.get_param('load_pages'):
|
||||
url_or_request = self._create_request(url_or_request, data, headers, query)
|
||||
filename = self._request_dump_filename(url_or_request.url, video_id)
|
||||
|
@ -1079,6 +1118,8 @@ class InfoExtractor:
|
|||
'headers': headers,
|
||||
'query': query,
|
||||
'expected_status': expected_status,
|
||||
'impersonate': impersonate,
|
||||
'require_impersonation': require_impersonation,
|
||||
}
|
||||
if parser is None:
|
||||
kwargs.pop('transform_source')
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import json
|
||||
from socket import timeout
|
||||
import socket
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
|
@ -56,7 +56,7 @@ class DTubeIE(InfoExtractor):
|
|||
try:
|
||||
self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
|
||||
self._downloader._opener.open(video_url, timeout=5).close()
|
||||
except timeout:
|
||||
except socket.timeout:
|
||||
self.to_screen(
|
||||
'%s: %s URL is invalid, skipping' % (video_id, format_id))
|
||||
continue
|
||||
|
|
54
yt_dlp/extractor/fathom.py
Normal file
54
yt_dlp/extractor/fathom.py
Normal file
|
@ -0,0 +1,54 @@
|
|||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_html_by_id,
|
||||
parse_iso8601,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class FathomIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?fathom\.video/share/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://fathom.video/share/G9mkjkspnohVVZ_L5nrsoPycyWcB8y7s',
|
||||
'md5': '0decd5343b8f30ae268625e79a02b60f',
|
||||
'info_dict': {
|
||||
'id': '47200596',
|
||||
'ext': 'mp4',
|
||||
'title': 'eCom Inucbator - Coaching Session',
|
||||
'duration': 8125.380507,
|
||||
'timestamp': 1699048914,
|
||||
'upload_date': '20231103',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://fathom.video/share/mEws3bybftHL2QLymxYEDeE21vtLxGVm',
|
||||
'md5': '4f5cb382126c22d1aba8a939f9c49690',
|
||||
'info_dict': {
|
||||
'id': '46812957',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jon, Lawrence, Neman chat about practice',
|
||||
'duration': 3571.517847,
|
||||
'timestamp': 1698933600,
|
||||
'upload_date': '20231102',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
props = traverse_obj(
|
||||
get_element_html_by_id('app', webpage), ({extract_attributes}, 'data-page', {json.loads}, 'props'))
|
||||
video_id = str(props['call']['id'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(props['call']['video_url'], video_id, 'mp4'),
|
||||
**traverse_obj(props, {
|
||||
'title': ('head', 'title', {str}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'timestamp': ('call', 'started_at', {parse_iso8601}),
|
||||
}),
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
import base64
|
||||
import binascii
|
||||
import datetime
|
||||
import datetime as dt
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
|
@ -422,7 +422,7 @@ class AwsIdp:
|
|||
months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
|
||||
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
|
||||
|
||||
time_now = datetime.datetime.now(datetime.timezone.utc)
|
||||
time_now = dt.datetime.now(dt.timezone.utc)
|
||||
format_string = "{} {} {} %H:%M:%S UTC %Y".format(days[time_now.weekday()], months[time_now.month], time_now.day)
|
||||
time_string = time_now.strftime(format_string)
|
||||
return time_string
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import datetime
|
||||
import datetime as dt
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@ -50,8 +50,8 @@ class JoqrAgIE(InfoExtractor):
|
|||
|
||||
def _extract_start_timestamp(self, video_id, is_live):
|
||||
def extract_start_time_from(date_str):
|
||||
dt = datetime_from_str(date_str) + datetime.timedelta(hours=9)
|
||||
date = dt.strftime('%Y%m%d')
|
||||
dt_ = datetime_from_str(date_str) + dt.timedelta(hours=9)
|
||||
date = dt_.strftime('%Y%m%d')
|
||||
start_time = self._search_regex(
|
||||
r'<h3[^>]+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+–\s*(\d{1,2}:\d{1,2})',
|
||||
self._download_webpage(
|
||||
|
@ -60,7 +60,7 @@ class JoqrAgIE(InfoExtractor):
|
|||
errnote=f'Failed to download program list of {date}') or '',
|
||||
'start time', default=None)
|
||||
if start_time:
|
||||
return unified_timestamp(f'{dt.strftime("%Y/%m/%d")} {start_time} +09:00')
|
||||
return unified_timestamp(f'{dt_.strftime("%Y/%m/%d")} {start_time} +09:00')
|
||||
return None
|
||||
|
||||
start_timestamp = extract_start_time_from('today')
|
||||
|
@ -87,7 +87,7 @@ class JoqrAgIE(InfoExtractor):
|
|||
msg = 'This stream is not currently live'
|
||||
if release_timestamp:
|
||||
msg += (' and will start at '
|
||||
+ datetime.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S'))
|
||||
+ dt.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S'))
|
||||
self.raise_no_formats(msg, expected=True)
|
||||
else:
|
||||
m3u8_path = self._search_regex(
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import datetime
|
||||
import datetime as dt
|
||||
import hashlib
|
||||
import re
|
||||
import time
|
||||
|
@ -185,7 +185,7 @@ class LeIE(InfoExtractor):
|
|||
|
||||
publish_time = parse_iso8601(self._html_search_regex(
|
||||
r'发布时间 ([^<>]+) ', page, 'publish time', default=None),
|
||||
delimiter=' ', timezone=datetime.timedelta(hours=8))
|
||||
delimiter=' ', timezone=dt.timedelta(hours=8))
|
||||
description = self._html_search_meta('description', page, fatal=False)
|
||||
|
||||
return {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from itertools import zip_longest
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
@ -156,7 +156,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
|
|||
|
||||
def json2srt(self, transcript_lines, duration=None):
|
||||
srt_data = ''
|
||||
for line, (line_dict, next_dict) in enumerate(zip_longest(transcript_lines, transcript_lines[1:])):
|
||||
for line, (line_dict, next_dict) in enumerate(itertools.zip_longest(transcript_lines, transcript_lines[1:])):
|
||||
start_time, caption = line_dict['transcriptStartAt'] / 1000, line_dict['caption']
|
||||
end_time = next_dict['transcriptStartAt'] / 1000 if next_dict else duration or start_time + 1
|
||||
srt_data += '%d\n%s --> %s\n%s\n\n' % (line + 1, srt_subtitles_timecode(start_time),
|
||||
|
|
461
yt_dlp/extractor/loom.py
Normal file
461
yt_dlp/extractor/loom.py
Normal file
|
@ -0,0 +1,461 @@
|
|||
import json
|
||||
import textwrap
|
||||
import urllib.parse
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
get_first,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
update_url,
|
||||
url_or_none,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class LoomIE(InfoExtractor):
|
||||
IE_NAME = 'loom'
|
||||
_VALID_URL = r'https?://(?:www\.)?loom\.com/(?:share|embed)/(?P<id>[\da-f]{32})'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=["\'](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
# m3u8 raw-url, mp4 transcoded-url, cdn url == raw-url, json subs only
|
||||
'url': 'https://www.loom.com/share/43d05f362f734614a2e81b4694a3a523',
|
||||
'md5': 'bfc2d7e9c2e0eb4813212230794b6f42',
|
||||
'info_dict': {
|
||||
'id': '43d05f362f734614a2e81b4694a3a523',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Ruler for Windows - 28 March 2022',
|
||||
'uploader': 'wILLIAM PIP',
|
||||
'upload_date': '20220328',
|
||||
'timestamp': 1648454238,
|
||||
'duration': 27,
|
||||
},
|
||||
}, {
|
||||
# webm raw-url, mp4 transcoded-url, cdn url == transcoded-url, no subs
|
||||
'url': 'https://www.loom.com/share/c43a642f815f4378b6f80a889bb73d8d',
|
||||
'md5': '70f529317be8cf880fcc2c649a531900',
|
||||
'info_dict': {
|
||||
'id': 'c43a642f815f4378b6f80a889bb73d8d',
|
||||
'ext': 'webm',
|
||||
'title': 'Lilah Nielsen Intro Video',
|
||||
'uploader': 'Lilah Nielsen',
|
||||
'upload_date': '20200826',
|
||||
'timestamp': 1598480716,
|
||||
'duration': 20,
|
||||
},
|
||||
}, {
|
||||
# m3u8 raw-url, mp4 transcoded-url, cdn url == raw-url, vtt sub and json subs
|
||||
'url': 'https://www.loom.com/share/9458bcbf79784162aa62ffb8dd66201b',
|
||||
'md5': '51737ec002969dd28344db4d60b9cbbb',
|
||||
'info_dict': {
|
||||
'id': '9458bcbf79784162aa62ffb8dd66201b',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sharing screen with gpt-4',
|
||||
'description': 'Sharing screen with GPT 4 vision model and asking questions to guide through blender.',
|
||||
'uploader': 'Suneel Matham',
|
||||
'chapters': 'count:3',
|
||||
'upload_date': '20231109',
|
||||
'timestamp': 1699518978,
|
||||
'duration': 93,
|
||||
},
|
||||
}, {
|
||||
# mpd raw-url, mp4 transcoded-url, cdn url == raw-url, no subs
|
||||
'url': 'https://www.loom.com/share/24351eb8b317420289b158e4b7e96ff2',
|
||||
'info_dict': {
|
||||
'id': '24351eb8b317420289b158e4b7e96ff2',
|
||||
'ext': 'webm',
|
||||
'title': 'OMFG clown',
|
||||
'description': 'md5:285c5ee9d62aa087b7e3271b08796815',
|
||||
'uploader': 'MrPumkin B',
|
||||
'upload_date': '20210924',
|
||||
'timestamp': 1632519618,
|
||||
'duration': 210,
|
||||
},
|
||||
'params': {'skip_download': 'dash'},
|
||||
}, {
|
||||
# password-protected
|
||||
'url': 'https://www.loom.com/share/50e26e8aeb7940189dff5630f95ce1f4',
|
||||
'md5': '5cc7655e7d55d281d203f8ffd14771f7',
|
||||
'info_dict': {
|
||||
'id': '50e26e8aeb7940189dff5630f95ce1f4',
|
||||
'ext': 'mp4',
|
||||
'title': 'iOS Mobile Upload',
|
||||
'uploader': 'Simon Curran',
|
||||
'upload_date': '20200520',
|
||||
'timestamp': 1590000123,
|
||||
'duration': 35,
|
||||
},
|
||||
'params': {'videopassword': 'seniorinfants2'},
|
||||
}, {
|
||||
# embed, transcoded-url endpoint sends empty JSON response
|
||||
'url': 'https://www.loom.com/embed/ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'md5': '8488817242a0db1cb2ad0ea522553cf6',
|
||||
'info_dict': {
|
||||
'id': 'ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||
'ext': 'mp4',
|
||||
'title': 'CF Reset User\'s Password',
|
||||
'uploader': 'Aimee Heintz',
|
||||
'upload_date': '20220707',
|
||||
'timestamp': 1657216459,
|
||||
'duration': 181,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse JSON'],
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.loom.com/community/e1229802a8694a09909e8ba0fbb6d073-pg',
|
||||
'md5': 'ec838cd01b576cf0386f32e1ae424609',
|
||||
'info_dict': {
|
||||
'id': 'e1229802a8694a09909e8ba0fbb6d073',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rexie Jane Cimafranca - Founder\'s Presentation',
|
||||
'uploader': 'Rexie Cimafranca',
|
||||
'upload_date': '20230213',
|
||||
'duration': 247,
|
||||
'timestamp': 1676274030,
|
||||
},
|
||||
}]
|
||||
|
||||
_GRAPHQL_VARIABLES = {
|
||||
'GetVideoSource': {
|
||||
'acceptableMimes': ['DASH', 'M3U8', 'MP4'],
|
||||
},
|
||||
}
|
||||
_GRAPHQL_QUERIES = {
|
||||
'GetVideoSSR': textwrap.dedent('''\
|
||||
query GetVideoSSR($videoId: ID!, $password: String) {
|
||||
getVideo(id: $videoId, password: $password) {
|
||||
__typename
|
||||
... on PrivateVideo {
|
||||
id
|
||||
status
|
||||
message
|
||||
__typename
|
||||
}
|
||||
... on VideoPasswordMissingOrIncorrect {
|
||||
id
|
||||
message
|
||||
__typename
|
||||
}
|
||||
... on RegularUserVideo {
|
||||
id
|
||||
__typename
|
||||
createdAt
|
||||
description
|
||||
download_enabled
|
||||
folder_id
|
||||
is_protected
|
||||
needs_password
|
||||
owner {
|
||||
display_name
|
||||
__typename
|
||||
}
|
||||
privacy
|
||||
s3_id
|
||||
name
|
||||
video_properties {
|
||||
avgBitRate
|
||||
client
|
||||
camera_enabled
|
||||
client_version
|
||||
duration
|
||||
durationMs
|
||||
format
|
||||
height
|
||||
microphone_enabled
|
||||
os
|
||||
os_version
|
||||
recordingClient
|
||||
recording_type
|
||||
recording_version
|
||||
screen_type
|
||||
tab_audio
|
||||
trim_duration
|
||||
width
|
||||
__typename
|
||||
}
|
||||
playable_duration
|
||||
source_duration
|
||||
visibility
|
||||
}
|
||||
}
|
||||
}\n'''),
|
||||
'GetVideoSource': textwrap.dedent('''\
|
||||
query GetVideoSource($videoId: ID!, $password: String, $acceptableMimes: [CloudfrontVideoAcceptableMime]) {
|
||||
getVideo(id: $videoId, password: $password) {
|
||||
... on RegularUserVideo {
|
||||
id
|
||||
nullableRawCdnUrl(acceptableMimes: $acceptableMimes, password: $password) {
|
||||
url
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}\n'''),
|
||||
'FetchVideoTranscript': textwrap.dedent('''\
|
||||
query FetchVideoTranscript($videoId: ID!, $password: String) {
|
||||
fetchVideoTranscript(videoId: $videoId, password: $password) {
|
||||
... on VideoTranscriptDetails {
|
||||
id
|
||||
video_id
|
||||
source_url
|
||||
captions_source_url
|
||||
__typename
|
||||
}
|
||||
... on GenericError {
|
||||
message
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}\n'''),
|
||||
'FetchChapters': textwrap.dedent('''\
|
||||
query FetchChapters($videoId: ID!, $password: String) {
|
||||
fetchVideoChapters(videoId: $videoId, password: $password) {
|
||||
... on VideoChapters {
|
||||
video_id
|
||||
content
|
||||
__typename
|
||||
}
|
||||
... on EmptyChaptersPayload {
|
||||
content
|
||||
__typename
|
||||
}
|
||||
... on InvalidRequestWarning {
|
||||
message
|
||||
__typename
|
||||
}
|
||||
... on Error {
|
||||
message
|
||||
__typename
|
||||
}
|
||||
__typename
|
||||
}
|
||||
}\n'''),
|
||||
}
|
||||
_APOLLO_GRAPHQL_VERSION = '0a1856c'
|
||||
|
||||
def _call_graphql_api(self, operations, video_id, note=None, errnote=None):
|
||||
password = self.get_param('videopassword')
|
||||
return self._download_json(
|
||||
'https://www.loom.com/graphql', video_id, note or 'Downloading GraphQL JSON',
|
||||
errnote or 'Failed to download GraphQL JSON', headers={
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
'x-loom-request-source': f'loom_web_{self._APOLLO_GRAPHQL_VERSION}',
|
||||
'apollographql-client-name': 'web',
|
||||
'apollographql-client-version': self._APOLLO_GRAPHQL_VERSION,
|
||||
}, data=json.dumps([{
|
||||
'operationName': operation_name,
|
||||
'variables': {
|
||||
'videoId': video_id,
|
||||
'password': password,
|
||||
**self._GRAPHQL_VARIABLES.get(operation_name, {}),
|
||||
},
|
||||
'query': self._GRAPHQL_QUERIES[operation_name],
|
||||
} for operation_name in variadic(operations)], separators=(',', ':')).encode())
|
||||
|
||||
def _call_url_api(self, endpoint, video_id):
|
||||
response = self._download_json(
|
||||
f'https://www.loom.com/api/campaigns/sessions/{video_id}/{endpoint}', video_id,
|
||||
f'Downloading {endpoint} JSON', f'Failed to download {endpoint} JSON', fatal=False,
|
||||
headers={'Accept': 'application/json', 'Content-Type': 'application/json'},
|
||||
data=json.dumps({
|
||||
'anonID': str(uuid.uuid4()),
|
||||
'deviceID': None,
|
||||
'force_original': False, # HTTP error 401 if True
|
||||
'password': self.get_param('videopassword'),
|
||||
}, separators=(',', ':')).encode())
|
||||
return traverse_obj(response, ('url', {url_or_none}))
|
||||
|
||||
def _extract_formats(self, video_id, metadata, gql_data):
|
||||
formats = []
|
||||
video_properties = traverse_obj(metadata, ('video_properties', {
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'acodec': ('microphone_enabled', {lambda x: 'none' if x is False else None}),
|
||||
}))
|
||||
|
||||
def get_formats(format_url, format_id, quality):
|
||||
if not format_url:
|
||||
return
|
||||
ext = determine_ext(format_url)
|
||||
query = urllib.parse.urlparse(format_url).query
|
||||
|
||||
if ext == 'm3u8':
|
||||
# Extract pre-merged HLS formats to avoid buggy parsing of metadata in split playlists
|
||||
format_url = format_url.replace('-split.m3u8', '.m3u8')
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id=f'hls-{format_id}', fatal=False, quality=quality)
|
||||
for fmt in m3u8_formats:
|
||||
yield {
|
||||
**fmt,
|
||||
'url': update_url(fmt['url'], query=query),
|
||||
'extra_param_to_segment_url': query,
|
||||
}
|
||||
|
||||
elif ext == 'mpd':
|
||||
dash_formats = self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id=f'dash-{format_id}', fatal=False)
|
||||
for fmt in dash_formats:
|
||||
yield {
|
||||
**fmt,
|
||||
'extra_param_to_segment_url': query,
|
||||
'quality': quality,
|
||||
}
|
||||
|
||||
else:
|
||||
yield {
|
||||
'url': format_url,
|
||||
'ext': ext,
|
||||
'format_id': f'http-{format_id}',
|
||||
'quality': quality,
|
||||
**video_properties,
|
||||
}
|
||||
|
||||
raw_url = self._call_url_api('raw-url', video_id)
|
||||
formats.extend(get_formats(raw_url, 'raw', quality=1)) # original quality
|
||||
|
||||
transcoded_url = self._call_url_api('transcoded-url', video_id)
|
||||
formats.extend(get_formats(transcoded_url, 'transcoded', quality=-1)) # transcoded quality
|
||||
|
||||
cdn_url = get_first(gql_data, ('data', 'getVideo', 'nullableRawCdnUrl', 'url', {url_or_none}))
|
||||
# cdn_url is usually a dupe, but the raw-url/transcoded-url endpoints could return errors
|
||||
valid_urls = [update_url(url, query=None) for url in (raw_url, transcoded_url) if url]
|
||||
if cdn_url and update_url(cdn_url, query=None) not in valid_urls:
|
||||
formats.extend(get_formats(cdn_url, 'cdn', quality=0)) # could be original or transcoded
|
||||
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
metadata = get_first(
|
||||
self._call_graphql_api('GetVideoSSR', video_id, 'Downloading GraphQL metadata JSON'),
|
||||
('data', 'getVideo', {dict})) or {}
|
||||
|
||||
if metadata.get('__typename') == 'VideoPasswordMissingOrIncorrect':
|
||||
if not self.get_param('videopassword'):
|
||||
raise ExtractorError(
|
||||
'This video is password-protected, use the --video-password option', expected=True)
|
||||
raise ExtractorError('Invalid video password', expected=True)
|
||||
|
||||
gql_data = self._call_graphql_api(['FetchChapters', 'FetchVideoTranscript', 'GetVideoSource'], video_id)
|
||||
duration = traverse_obj(metadata, ('video_properties', 'duration', {int_or_none}))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'duration': duration,
|
||||
'chapters': self._extract_chapters_from_description(
|
||||
get_first(gql_data, ('data', 'fetchVideoChapters', 'content', {str})), duration) or None,
|
||||
'formats': self._extract_formats(video_id, metadata, gql_data),
|
||||
'subtitles': filter_dict({
|
||||
'en': traverse_obj(gql_data, (
|
||||
..., 'data', 'fetchVideoTranscript',
|
||||
('source_url', 'captions_source_url'), {
|
||||
'url': {url_or_none},
|
||||
})) or None,
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}),
|
||||
'uploader': ('owner', 'display_name', {str}),
|
||||
'timestamp': ('createdAt', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class LoomFolderIE(InfoExtractor):
|
||||
IE_NAME = 'loom:folder'
|
||||
_VALID_URL = r'https?://(?:www\.)?loom\.com/share/folder/(?P<id>[\da-f]{32})'
|
||||
_TESTS = [{
|
||||
# 2 subfolders, no videos in root
|
||||
'url': 'https://www.loom.com/share/folder/997db4db046f43e5912f10dc5f817b5c',
|
||||
'playlist_mincount': 16,
|
||||
'info_dict': {
|
||||
'id': '997db4db046f43e5912f10dc5f817b5c',
|
||||
'title': 'Blending Lessons',
|
||||
},
|
||||
}, {
|
||||
# only videos, no subfolders
|
||||
'url': 'https://www.loom.com/share/folder/9a8a87f6b6f546d9a400c8e7575ff7f2',
|
||||
'playlist_mincount': 12,
|
||||
'info_dict': {
|
||||
'id': '9a8a87f6b6f546d9a400c8e7575ff7f2',
|
||||
'title': 'List A- a, i, o',
|
||||
},
|
||||
}, {
|
||||
# videos in root and empty subfolder
|
||||
'url': 'https://www.loom.com/share/folder/886e534218c24fd292e97e9563078cc4',
|
||||
'playlist_mincount': 21,
|
||||
'info_dict': {
|
||||
'id': '886e534218c24fd292e97e9563078cc4',
|
||||
'title': 'Medicare Agent Training videos',
|
||||
},
|
||||
}, {
|
||||
# videos in root and videos in subfolders
|
||||
'url': 'https://www.loom.com/share/folder/b72c4ecdf04745da9403926d80a40c38',
|
||||
'playlist_mincount': 21,
|
||||
'info_dict': {
|
||||
'id': 'b72c4ecdf04745da9403926d80a40c38',
|
||||
'title': 'Quick Altos Q & A Tutorials',
|
||||
},
|
||||
}, {
|
||||
# recursive folder extraction
|
||||
'url': 'https://www.loom.com/share/folder/8b458a94e0e4449b8df9ea7a68fafc4e',
|
||||
'playlist_count': 23,
|
||||
'info_dict': {
|
||||
'id': '8b458a94e0e4449b8df9ea7a68fafc4e',
|
||||
'title': 'Sezer Texting Guide',
|
||||
},
|
||||
}, {
|
||||
# more than 50 videos in 1 folder
|
||||
'url': 'https://www.loom.com/share/folder/e056a91d290d47ca9b00c9d1df56c463',
|
||||
'playlist_mincount': 61,
|
||||
'info_dict': {
|
||||
'id': 'e056a91d290d47ca9b00c9d1df56c463',
|
||||
'title': 'User Videos',
|
||||
},
|
||||
}, {
|
||||
# many subfolders
|
||||
'url': 'https://www.loom.com/share/folder/c2dde8cc67454f0e99031677279d8954',
|
||||
'playlist_mincount': 75,
|
||||
'info_dict': {
|
||||
'id': 'c2dde8cc67454f0e99031677279d8954',
|
||||
'title': 'Honors 1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.loom.com/share/folder/bae17109a68146c7803454f2893c8cf8/Edpuzzle',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_folder_data(self, folder_id):
|
||||
return self._download_json(
|
||||
f'https://www.loom.com/v1/folders/{folder_id}', folder_id,
|
||||
'Downloading folder info JSON', query={'limit': '10000'})
|
||||
|
||||
def _extract_folder_entries(self, folder_id, initial_folder_data=None):
|
||||
folder_data = initial_folder_data or self._extract_folder_data(folder_id)
|
||||
|
||||
for video in traverse_obj(folder_data, ('videos', lambda _, v: v['id'])):
|
||||
video_id = video['id']
|
||||
yield self.url_result(
|
||||
f'https://www.loom.com/share/{video_id}', LoomIE, video_id, video.get('name'))
|
||||
|
||||
# Recurse into subfolders
|
||||
for subfolder_id in traverse_obj(folder_data, (
|
||||
'folders', lambda _, v: v['id'] != folder_id, 'id', {str})):
|
||||
yield from self._extract_folder_entries(subfolder_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
playlist_data = self._extract_folder_data(playlist_id)
|
||||
|
||||
return self.playlist_result(
|
||||
self._extract_folder_entries(playlist_id, playlist_data), playlist_id,
|
||||
traverse_obj(playlist_data, ('folder', 'name', {str.strip})))
|
|
@ -1,4 +1,3 @@
|
|||
from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
traverse_obj,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from base64 import b64decode
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
|
@ -81,7 +81,7 @@ class MicrosoftStreamIE(InfoExtractor):
|
|||
'url': thumbnail_url,
|
||||
}
|
||||
thumb_name = url_basename(thumbnail_url)
|
||||
thumb_name = str(b64decode(thumb_name + '=' * (-len(thumb_name) % 4)))
|
||||
thumb_name = str(base64.b64decode(thumb_name + '=' * (-len(thumb_name) % 4)))
|
||||
thumb.update(parse_resolution(thumb_name))
|
||||
thumbnails.append(thumb)
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import datetime
|
||||
import datetime as dt
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
|
@ -151,7 +151,7 @@ class MotherlessIE(InfoExtractor):
|
|||
'd': 'days',
|
||||
}
|
||||
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
||||
upload_date = (datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
|
||||
upload_date = (dt.datetime.now(dt.timezone.utc) - dt.timedelta(**kwargs)).strftime('%Y%m%d')
|
||||
|
||||
comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
|
||||
uploader_id = self._html_search_regex(
|
||||
|
|
|
@ -4,8 +4,8 @@ import hmac
|
|||
import itertools
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
import time
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
|
@ -388,7 +388,7 @@ class NaverNowIE(NaverBaseIE):
|
|||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
qs = parse_qs(urlparse(url).query)
|
||||
qs = urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
|
||||
|
||||
if not self._yes_playlist(show_id, qs.get('shareHightlight')):
|
||||
return self._extract_highlight(show_id, qs['shareHightlight'][0])
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
import hashlib
|
||||
import itertools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
from hashlib import md5
|
||||
from random import randint
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_encrypt, pkcs7_padding
|
||||
|
@ -34,7 +34,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
|||
request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':'))
|
||||
|
||||
message = f'nobody{api_path}use{request_text}md5forencrypt'.encode('latin1')
|
||||
msg_digest = md5(message).hexdigest()
|
||||
msg_digest = hashlib.md5(message).hexdigest()
|
||||
|
||||
data = pkcs7_padding(list(str.encode(
|
||||
f'{api_path}-36cd479b6b5-{request_text}-36cd479b6b5-{msg_digest}')))
|
||||
|
@ -53,7 +53,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
|||
'__csrf': '',
|
||||
'os': 'pc',
|
||||
'channel': 'undefined',
|
||||
'requestId': f'{int(time.time() * 1000)}_{randint(0, 1000):04}',
|
||||
'requestId': f'{int(time.time() * 1000)}_{random.randint(0, 1000):04}',
|
||||
**traverse_obj(self._get_cookies(self._API_BASE), {
|
||||
'MUSIC_U': ('MUSIC_U', {lambda i: i.value}),
|
||||
})
|
||||
|
|
|
@ -1,11 +1,10 @@
|
|||
import datetime
|
||||
import datetime as dt
|
||||
import functools
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
from urllib.parse import urlparse
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..networking import Request
|
||||
|
@ -820,12 +819,12 @@ class NicovideoSearchDateIE(NicovideoSearchBaseIE, SearchInfoExtractor):
|
|||
'playlist_mincount': 1610,
|
||||
}]
|
||||
|
||||
_START_DATE = datetime.date(2007, 1, 1)
|
||||
_START_DATE = dt.date(2007, 1, 1)
|
||||
_RESULTS_PER_PAGE = 32
|
||||
_MAX_PAGES = 50
|
||||
|
||||
def _entries(self, url, item_id, start_date=None, end_date=None):
|
||||
start_date, end_date = start_date or self._START_DATE, end_date or datetime.datetime.now().date()
|
||||
start_date, end_date = start_date or self._START_DATE, end_date or dt.datetime.now().date()
|
||||
|
||||
# If the last page has a full page of videos, we need to break down the query interval further
|
||||
last_page_len = len(list(self._get_entries_for_date(
|
||||
|
@ -957,7 +956,7 @@ class NiconicoLiveIE(InfoExtractor):
|
|||
'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9',
|
||||
})
|
||||
|
||||
hostname = remove_start(urlparse(urlh.url).hostname, 'sp.')
|
||||
hostname = remove_start(urllib.parse.urlparse(urlh.url).hostname, 'sp.')
|
||||
latency = try_get(self._configuration_arg('latency'), lambda x: x[0])
|
||||
if latency not in self._KNOWN_LATENCY:
|
||||
latency = 'high'
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import calendar
|
||||
import json
|
||||
import datetime as dt
|
||||
import functools
|
||||
from datetime import datetime, timezone
|
||||
from random import random
|
||||
import json
|
||||
import random
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
|
@ -243,7 +243,7 @@ class PanoptoIE(PanoptoBaseIE):
|
|||
invocation_id = delivery_info.get('InvocationId')
|
||||
stream_id = traverse_obj(delivery_info, ('Delivery', 'Streams', ..., 'PublicID'), get_all=False, expected_type=str)
|
||||
if invocation_id and stream_id and duration:
|
||||
timestamp_str = f'/Date({calendar.timegm(datetime.now(timezone.utc).timetuple())}000)/'
|
||||
timestamp_str = f'/Date({calendar.timegm(dt.datetime.now(dt.timezone.utc).timetuple())}000)/'
|
||||
data = {
|
||||
'streamRequests': [
|
||||
{
|
||||
|
@ -415,7 +415,7 @@ class PanoptoIE(PanoptoBaseIE):
|
|||
'cast': traverse_obj(delivery, ('Contributors', ..., 'DisplayName'), expected_type=lambda x: x or None),
|
||||
'timestamp': session_start_time - 11640000000 if session_start_time else None,
|
||||
'duration': delivery.get('Duration'),
|
||||
'thumbnail': base_url + f'/Services/FrameGrabber.svc/FrameRedirect?objectId={video_id}&mode=Delivery&random={random()}',
|
||||
'thumbnail': base_url + f'/Services/FrameGrabber.svc/FrameRedirect?objectId={video_id}&mode=Delivery&random={random.random()}',
|
||||
'average_rating': delivery.get('AverageRating'),
|
||||
'chapters': self._extract_chapters(timestamps),
|
||||
'uploader': delivery.get('OwnerDisplayName') or None,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from uuid import uuid4
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
|
@ -51,7 +51,7 @@ class PolsatGoIE(InfoExtractor):
|
|||
}
|
||||
|
||||
def _call_api(self, endpoint, media_id, method, params):
|
||||
rand_uuid = str(uuid4())
|
||||
rand_uuid = str(uuid.uuid4())
|
||||
res = self._download_json(
|
||||
f'https://b2c-mobile.redefine.pl/rpc/{endpoint}/', media_id,
|
||||
note=f'Downloading {method} JSON metadata',
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import datetime as dt
|
||||
import json
|
||||
from urllib.parse import unquote
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import functools
|
||||
|
@ -114,7 +115,7 @@ class Pr0grammIE(InfoExtractor):
|
|||
cookies = self._get_cookies(self.BASE_URL)
|
||||
if 'me' not in cookies:
|
||||
self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
|
||||
if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')):
|
||||
if traverse_obj(cookies, ('me', {lambda x: x.value}, {urllib.parse.unquote}, {json.loads}, 'verified')):
|
||||
flags |= 0b00110
|
||||
|
||||
return flags
|
||||
|
@ -196,6 +197,7 @@ class Pr0grammIE(InfoExtractor):
|
|||
'like_count': ('up', {int}),
|
||||
'dislike_count': ('down', {int}),
|
||||
'timestamp': ('created', {int}),
|
||||
'upload_date': ('created', {int}, {dt.date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}),
|
||||
'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)})
|
||||
}),
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import hashlib
|
||||
import re
|
||||
|
||||
from hashlib import sha1
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
|
@ -42,7 +42,7 @@ class ProSiebenSat1BaseIE(InfoExtractor):
|
|||
'Downloading protocols JSON',
|
||||
headers=self.geo_verification_headers(), query={
|
||||
'access_id': self._ACCESS_ID,
|
||||
'client_token': sha1((raw_ct).encode()).hexdigest(),
|
||||
'client_token': hashlib.sha1((raw_ct).encode()).hexdigest(),
|
||||
'video_id': clip_id,
|
||||
}, fatal=False, expected_status=(403,)) or {}
|
||||
error = protocols.get('error') or {}
|
||||
|
@ -53,7 +53,7 @@ class ProSiebenSat1BaseIE(InfoExtractor):
|
|||
urls = (self._download_json(
|
||||
self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={
|
||||
'access_id': self._ACCESS_ID,
|
||||
'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(),
|
||||
'client_token': hashlib.sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(),
|
||||
'protocols': self._SUPPORTED_PROTOCOLS,
|
||||
'server_token': server_token,
|
||||
'video_id': clip_id,
|
||||
|
@ -77,7 +77,7 @@ class ProSiebenSat1BaseIE(InfoExtractor):
|
|||
if not formats:
|
||||
source_ids = [compat_str(source['id']) for source in video['sources']]
|
||||
|
||||
client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
|
||||
client_id = self._SALT[:2] + hashlib.sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
|
||||
|
||||
sources = self._download_json(
|
||||
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
|
||||
|
@ -96,7 +96,7 @@ class ProSiebenSat1BaseIE(InfoExtractor):
|
|||
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
|
||||
|
||||
for source_id in source_ids:
|
||||
client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
|
||||
client_id = self._SALT[:2] + hashlib.sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
|
||||
urls = self._download_json(
|
||||
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
|
||||
clip_id, 'Downloading urls JSON', fatal=False, query={
|
||||
|
|
|
@ -1,18 +1,14 @@
|
|||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
import itertools
|
||||
from urllib.parse import urlencode
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import clean_html, traverse_obj, unescapeHTML
|
||||
|
||||
|
||||
class RadioKapitalBaseIE(InfoExtractor):
|
||||
def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}):
|
||||
return self._download_json(
|
||||
f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urlencode(qs)}',
|
||||
f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urllib.parse.urlencode(qs)}',
|
||||
video_id, note=note)
|
||||
|
||||
def _parse_episode(self, data):
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import datetime as dt
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
from datetime import datetime
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..utils import (
|
||||
|
@ -156,7 +156,7 @@ class RokfinIE(InfoExtractor):
|
|||
self.raise_login_required('This video is only available to premium users', True, method='cookies')
|
||||
elif scheduled:
|
||||
self.raise_no_formats(
|
||||
f'Stream is offline; scheduled for {datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}',
|
||||
f'Stream is offline; scheduled for {dt.datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}',
|
||||
video_id=video_id, expected=True)
|
||||
|
||||
uploader = traverse_obj(metadata, ('createdBy', 'username'), ('creator', 'username'))
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import datetime
|
||||
import datetime as dt
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .redge import RedCDNLivxIE
|
||||
|
@ -13,16 +13,16 @@ from ..utils.traversal import traverse_obj
|
|||
|
||||
|
||||
def is_dst(date):
|
||||
last_march = datetime.datetime(date.year, 3, 31)
|
||||
last_october = datetime.datetime(date.year, 10, 31)
|
||||
last_sunday_march = last_march - datetime.timedelta(days=last_march.isoweekday() % 7)
|
||||
last_sunday_october = last_october - datetime.timedelta(days=last_october.isoweekday() % 7)
|
||||
last_march = dt.datetime(date.year, 3, 31)
|
||||
last_october = dt.datetime(date.year, 10, 31)
|
||||
last_sunday_march = last_march - dt.timedelta(days=last_march.isoweekday() % 7)
|
||||
last_sunday_october = last_october - dt.timedelta(days=last_october.isoweekday() % 7)
|
||||
return last_sunday_march.replace(hour=2) <= date <= last_sunday_october.replace(hour=3)
|
||||
|
||||
|
||||
def rfc3339_to_atende(date):
|
||||
date = datetime.datetime.fromisoformat(date)
|
||||
date = date + datetime.timedelta(hours=1 if is_dst(date) else 0)
|
||||
date = dt.datetime.fromisoformat(date)
|
||||
date = date + dt.timedelta(hours=1 if is_dst(date) else 0)
|
||||
return int((date.timestamp() - 978307200) * 1000)
|
||||
|
||||
|
||||
|
|
112
yt_dlp/extractor/sharepoint.py
Normal file
112
yt_dlp/extractor/sharepoint.py
Normal file
|
@ -0,0 +1,112 @@
|
|||
import json
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import determine_ext, int_or_none, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class SharePointIE(InfoExtractor):
|
||||
_BASE_URL_RE = r'https?://[\w-]+\.sharepoint\.com/'
|
||||
_VALID_URL = [
|
||||
rf'{_BASE_URL_RE}:v:/[a-z]/(?:[^/?#]+/)*(?P<id>[^/?#]{{46}})/?(?:$|[?#])',
|
||||
rf'{_BASE_URL_RE}(?!:v:)(?:[^/?#]+/)*stream\.aspx\?(?:[^#]+&)?id=(?P<id>[^&#]+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://lut-my.sharepoint.com/:v:/g/personal/juha_eerola_student_lab_fi/EUrAmrktb4ZMhUcY9J2PqMEBD_9x_l0DyYWVgAvp-TTOMw?e=ZpQOOw',
|
||||
'md5': '2950821d0d4937a0a76373782093b435',
|
||||
'info_dict': {
|
||||
'id': '01EQRS7EKKYCNLSLLPQZGIKRYY6SOY7KGB',
|
||||
'display_id': 'EUrAmrktb4ZMhUcY9J2PqMEBD_9x_l0DyYWVgAvp-TTOMw',
|
||||
'ext': 'mp4',
|
||||
'title': 'CmvpJST',
|
||||
'duration': 54.567,
|
||||
'thumbnail': r're:https://.+/thumbnail',
|
||||
'uploader_id': '8dcec565-a956-4b91-95e5-bacfb8bc015f',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://greaternyace.sharepoint.com/:v:/s/acementornydrive/ETski5eAfNVEoPRZUAyy1wEBpLgVFYWso5bjbZjfBLlPUg?e=PQUfVb',
|
||||
'md5': 'c496a01644223273bff12e93e501afd1',
|
||||
'info_dict': {
|
||||
'id': '01QI4AVTZ3ESFZPAD42VCKB5CZKAGLFVYB',
|
||||
'display_id': 'ETski5eAfNVEoPRZUAyy1wEBpLgVFYWso5bjbZjfBLlPUg',
|
||||
'ext': 'mp4',
|
||||
'title': '930103681233985536',
|
||||
'duration': 3797.326,
|
||||
'thumbnail': r're:https://.+/thumbnail',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://lut-my.sharepoint.com/personal/juha_eerola_student_lab_fi/_layouts/15/stream.aspx?id=%2Fpersonal%2Fjuha_eerola_student_lab_fi%2FDocuments%2FM-DL%2FCmvpJST.mp4&ga=1&referrer=StreamWebApp.Web&referrerScenario=AddressBarCopied.view',
|
||||
'info_dict': {
|
||||
'id': '01EQRS7EKKYCNLSLLPQZGIKRYY6SOY7KGB',
|
||||
'display_id': '/personal/juha_eerola_student_lab_fi/Documents/M-DL/CmvpJST.mp4',
|
||||
'ext': 'mp4',
|
||||
'title': 'CmvpJST',
|
||||
'duration': 54.567,
|
||||
'thumbnail': r're:https://.+/thumbnail',
|
||||
'uploader_id': '8dcec565-a956-4b91-95e5-bacfb8bc015f',
|
||||
},
|
||||
'skip': 'Session cookies needed',
|
||||
}, {
|
||||
'url': 'https://izoobasisschool.sharepoint.com/:v:/g/Eaqleq8COVBIvIPvod0U27oBypC6aWOkk8ptuDpmJ6arHw',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://uskudaredutr-my.sharepoint.com/:v:/g/personal/songul_turkaydin_uskudar_edu_tr/EbTf-VRUIbtGuIN73tx1MuwBCHBOmNcWNqSLw61Fd2_o0g?e=n5Vkof',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://epam-my.sharepoint.com/:v:/p/dzmitry_tamashevich/Ec4ZOs-rATZHjFYZWVxjczEB649FCoYFKDV_x3RxZiWAGA?e=4hswgA',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://microsoft.sharepoint.com/:v:/t/MicrosoftSPARKRecordings-MSFTInternal/EWCyeqByVWBAt8wDvNZdV-UB0BvU5YVbKm0UHgdrUlI6dg?e=QbPck6',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = urllib.parse.unquote(self._match_id(url))
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||
if urllib.parse.urlparse(urlh.url).hostname == 'login.microsoftonline.com':
|
||||
self.raise_login_required(
|
||||
'Session cookies are required for this URL and can be passed '
|
||||
'with the --cookies option. The --cookies-from-browser option will not work', method=None)
|
||||
|
||||
video_data = self._search_json(r'g_fileInfo\s*=', webpage, 'player config', display_id)
|
||||
video_id = video_data['VroomItemId']
|
||||
|
||||
parsed_url = urllib.parse.urlparse(video_data['.transformUrl'])
|
||||
base_media_url = urllib.parse.urlunparse(parsed_url._replace(
|
||||
path=urllib.parse.urljoin(f'{parsed_url.path}/', '../videomanifest'),
|
||||
query=urllib.parse.urlencode({
|
||||
**urllib.parse.parse_qs(parsed_url.query),
|
||||
'cTag': video_data['.ctag'],
|
||||
'action': 'Access',
|
||||
'part': 'index',
|
||||
}, doseq=True)))
|
||||
|
||||
# Web player adds more params to the format URLs but we still get all formats without them
|
||||
formats = self._extract_mpd_formats(
|
||||
base_media_url, video_id, mpd_id='dash', query={'format': 'dash'}, fatal=False)
|
||||
for hls_type in ('hls', 'hls-vnext'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
base_media_url, video_id, 'mp4', m3u8_id=hls_type,
|
||||
query={'format': hls_type}, fatal=False, quality=-2))
|
||||
|
||||
if video_url := traverse_obj(video_data, ('downloadUrl', {url_or_none})):
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'ext': determine_ext(video_data.get('extension') or video_data.get('name')),
|
||||
'quality': 1,
|
||||
'format_id': 'source',
|
||||
'filesize': int_or_none(video_data.get('size')),
|
||||
'vcodec': 'none' if video_data.get('isAudio') is True else None,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': video_data.get('title') or video_data.get('displayName'),
|
||||
'display_id': display_id,
|
||||
'uploader_id': video_data.get('authorId'),
|
||||
'duration': traverse_obj(video_data, (
|
||||
'MediaServiceFastMetadata', {json.loads}, 'media', 'duration', {lambda x: x / 10000000})),
|
||||
'thumbnail': url_or_none(video_data.get('thumbnailUrl')),
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
import datetime
|
||||
import datetime as dt
|
||||
import itertools
|
||||
import json
|
||||
import math
|
||||
|
@ -94,7 +94,7 @@ class SonyLIVIE(InfoExtractor):
|
|||
'mobileNumber': username,
|
||||
'channelPartnerID': 'MSMIND',
|
||||
'country': 'IN',
|
||||
'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
|
||||
'timestamp': dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
|
||||
'otpSize': 6,
|
||||
'loginType': 'REGISTERORSIGNIN',
|
||||
'isMobileMandatory': True,
|
||||
|
@ -111,7 +111,7 @@ class SonyLIVIE(InfoExtractor):
|
|||
'otp': self._get_tfa_info('OTP'),
|
||||
'dmaId': 'IN',
|
||||
'ageConfirmation': True,
|
||||
'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
|
||||
'timestamp': dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
|
||||
'isMobileMandatory': True,
|
||||
}).encode())
|
||||
if otp_verify_json['resultCode'] == 'KO':
|
||||
|
|
|
@ -1,30 +1,27 @@
|
|||
import itertools
|
||||
import re
|
||||
import json
|
||||
# import random
|
||||
import re
|
||||
|
||||
from .common import (
|
||||
InfoExtractor,
|
||||
SearchInfoExtractor
|
||||
)
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..networking import HEADRequest, Request
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
error_to_compat_str,
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
error_to_compat_str,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
KNOWN_EXTENSIONS,
|
||||
mimetype2ext,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
try_get,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class SoundcloudEmbedIE(InfoExtractor):
|
||||
|
@ -54,7 +51,6 @@ class SoundcloudBaseIE(InfoExtractor):
|
|||
_API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
|
||||
_API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
|
||||
_API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
|
||||
_access_token = None
|
||||
_HEADERS = {}
|
||||
|
||||
_IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
|
||||
|
@ -112,21 +108,31 @@ class SoundcloudBaseIE(InfoExtractor):
|
|||
def _initialize_pre_login(self):
|
||||
self._CLIENT_ID = self.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if username != 'oauth':
|
||||
self.report_warning(
|
||||
'Login using username and password is not currently supported. '
|
||||
'Use "--username oauth --password <oauth_token>" to login using an oauth token')
|
||||
self._access_token = password
|
||||
query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
|
||||
payload = {'session': {'access_token': self._access_token}}
|
||||
token_verification = Request(self._API_VERIFY_AUTH_TOKEN % query, json.dumps(payload).encode('utf-8'))
|
||||
response = self._download_json(token_verification, None, note='Verifying login token...', fatal=False)
|
||||
if response is not False:
|
||||
self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
|
||||
def _verify_oauth_token(self, token):
|
||||
if self._request_webpage(
|
||||
self._API_VERIFY_AUTH_TOKEN % (self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID),
|
||||
None, note='Verifying login token...', fatal=False,
|
||||
data=json.dumps({'session': {'access_token': token}}).encode()):
|
||||
self._HEADERS['Authorization'] = f'OAuth {token}'
|
||||
self.report_login()
|
||||
else:
|
||||
self.report_warning('Provided authorization token seems to be invalid. Continue as guest')
|
||||
self.report_warning('Provided authorization token is invalid. Continuing as guest')
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._HEADERS:
|
||||
return
|
||||
if token := try_call(lambda: self._get_cookies(self._BASE_URL)['oauth_token'].value):
|
||||
self._verify_oauth_token(token)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if username != 'oauth':
|
||||
raise ExtractorError(
|
||||
'Login using username and password is not currently supported. '
|
||||
'Use "--username oauth --password <oauth_token>" to login using an oauth token, '
|
||||
f'or else {self._login_hint(method="cookies")}', expected=True)
|
||||
if self._HEADERS:
|
||||
return
|
||||
self._verify_oauth_token(password)
|
||||
|
||||
r'''
|
||||
def genDevId():
|
||||
|
@ -147,14 +153,17 @@ class SoundcloudBaseIE(InfoExtractor):
|
|||
'user_agent': self._USER_AGENT
|
||||
}
|
||||
|
||||
query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
|
||||
login = sanitized_Request(self._API_AUTH_URL_PW % query, json.dumps(payload).encode('utf-8'))
|
||||
response = self._download_json(login, None)
|
||||
self._access_token = response.get('session').get('access_token')
|
||||
if not self._access_token:
|
||||
self.report_warning('Unable to get access token, login may has failed')
|
||||
else:
|
||||
self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
|
||||
response = self._download_json(
|
||||
self._API_AUTH_URL_PW % (self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID),
|
||||
None, note='Verifying login token...', fatal=False,
|
||||
data=json.dumps(payload).encode())
|
||||
|
||||
if token := traverse_obj(response, ('session', 'access_token', {str})):
|
||||
self._HEADERS['Authorization'] = f'OAuth {token}'
|
||||
self.report_login()
|
||||
return
|
||||
|
||||
raise ExtractorError('Unable to get access token, login may have failed', expected=True)
|
||||
'''
|
||||
|
||||
# signature generation
|
||||
|
@ -217,6 +226,7 @@ class SoundcloudBaseIE(InfoExtractor):
|
|||
'filesize': int_or_none(urlh.headers.get('Content-Length')),
|
||||
'url': format_url,
|
||||
'quality': 10,
|
||||
'format_note': 'Original',
|
||||
})
|
||||
|
||||
def invalid_url(url):
|
||||
|
@ -233,9 +243,13 @@ class SoundcloudBaseIE(InfoExtractor):
|
|||
format_id_list.append(protocol)
|
||||
ext = f.get('ext')
|
||||
if ext == 'aac':
|
||||
f['abr'] = '256'
|
||||
f.update({
|
||||
'abr': 256,
|
||||
'quality': 5,
|
||||
'format_note': 'Premium',
|
||||
})
|
||||
for k in ('ext', 'abr'):
|
||||
v = f.get(k)
|
||||
v = str_or_none(f.get(k))
|
||||
if v:
|
||||
format_id_list.append(v)
|
||||
preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
|
||||
|
@ -256,16 +270,25 @@ class SoundcloudBaseIE(InfoExtractor):
|
|||
formats.append(f)
|
||||
|
||||
# New API
|
||||
transcodings = try_get(
|
||||
info, lambda x: x['media']['transcodings'], list) or []
|
||||
for t in transcodings:
|
||||
if not isinstance(t, dict):
|
||||
continue
|
||||
format_url = url_or_none(t.get('url'))
|
||||
if not format_url:
|
||||
continue
|
||||
stream = None if extract_flat else self._download_json(
|
||||
format_url, track_id, query=query, fatal=False, headers=self._HEADERS)
|
||||
for t in traverse_obj(info, ('media', 'transcodings', lambda _, v: url_or_none(v['url']))):
|
||||
if extract_flat:
|
||||
break
|
||||
format_url = t['url']
|
||||
stream = None
|
||||
|
||||
for retry in self.RetryManager(fatal=False):
|
||||
try:
|
||||
stream = self._download_json(format_url, track_id, query=query, headers=self._HEADERS)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 429:
|
||||
self.report_warning(
|
||||
'You have reached the API rate limit, which is ~600 requests per '
|
||||
'10 minutes. Use the --extractor-retries and --retry-sleep options '
|
||||
'to configure an appropriate retry count and wait time', only_once=True)
|
||||
retry.error = e.cause
|
||||
else:
|
||||
self.report_warning(e.msg)
|
||||
|
||||
if not isinstance(stream, dict):
|
||||
continue
|
||||
stream_url = url_or_none(stream.get('url'))
|
||||
|
|
|
@ -1,8 +1,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import json
|
||||
from functools import partial
|
||||
from textwrap import dedent
|
||||
import textwrap
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, format_field, int_or_none, parse_iso8601
|
||||
|
@ -10,7 +9,7 @@ from ..utils.traversal import traverse_obj
|
|||
|
||||
|
||||
def _fmt_url(url):
|
||||
return partial(format_field, template=url, default=None)
|
||||
return functools.partial(format_field, template=url, default=None)
|
||||
|
||||
|
||||
class TelewebionIE(InfoExtractor):
|
||||
|
@ -88,7 +87,7 @@ class TelewebionIE(InfoExtractor):
|
|||
if not video_id.startswith('0x'):
|
||||
video_id = hex(int(video_id))
|
||||
|
||||
episode_data = self._call_graphql_api('getEpisodeDetail', video_id, dedent('''
|
||||
episode_data = self._call_graphql_api('getEpisodeDetail', video_id, textwrap.dedent('''
|
||||
queryEpisode(filter: {EpisodeID: $EpisodeId}, first: 1) {
|
||||
title
|
||||
program {
|
||||
|
@ -127,7 +126,7 @@ class TelewebionIE(InfoExtractor):
|
|||
'formats': (
|
||||
'channel', 'descriptor', {str},
|
||||
{_fmt_url(f'https://cdna.telewebion.com/%s/episode/{video_id}/playlist.m3u8')},
|
||||
{partial(self._extract_m3u8_formats, video_id=video_id, ext='mp4', m3u8_id='hls')}),
|
||||
{functools.partial(self._extract_m3u8_formats, video_id=video_id, ext='mp4', m3u8_id='hls')}),
|
||||
}))
|
||||
info_dict['id'] = video_id
|
||||
return info_dict
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import base64
|
||||
import datetime as dt
|
||||
import functools
|
||||
import itertools
|
||||
from datetime import datetime
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
|
@ -70,7 +70,7 @@ class TenPlayIE(InfoExtractor):
|
|||
username, password = self._get_login_info()
|
||||
if username is None or password is None:
|
||||
self.raise_login_required('Your 10play account\'s details must be provided with --username and --password.')
|
||||
_timestamp = datetime.now().strftime('%Y%m%d000000')
|
||||
_timestamp = dt.datetime.now().strftime('%Y%m%d000000')
|
||||
_auth_header = base64.b64encode(_timestamp.encode('ascii')).decode('ascii')
|
||||
data = self._download_json('https://10play.com.au/api/user/auth', video_id, 'Getting bearer token', headers={
|
||||
'X-Network-Ten-Auth': _auth_header,
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import json
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from .zype import ZypeIE
|
||||
from ..networking import HEADRequest
|
||||
|
@ -8,6 +9,7 @@ from ..utils import (
|
|||
ExtractorError,
|
||||
filter_dict,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
try_call,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
@ -17,23 +19,43 @@ class ThisOldHouseIE(InfoExtractor):
|
|||
_NETRC_MACHINE = 'thisoldhouse'
|
||||
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/?#]+/)?\d+)/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
# Unresolved Brightcove URL embed (formerly Zype), free
|
||||
'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
|
||||
'info_dict': {
|
||||
'id': '5dcdddf673c3f956ef5db202',
|
||||
'id': '6325298523112',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Build a Storage Bench',
|
||||
'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
|
||||
'timestamp': 1442548800,
|
||||
'upload_date': '20150918',
|
||||
'duration': 674,
|
||||
'view_count': int,
|
||||
'average_rating': 0,
|
||||
'thumbnail': r're:^https?://.*\.jpg\?\d+$',
|
||||
'display_id': 'how-to-build-a-storage-bench',
|
||||
'timestamp': 1681793639,
|
||||
'upload_date': '20230418',
|
||||
'duration': 674.54,
|
||||
'tags': 'count:11',
|
||||
'uploader_id': '6314471934001',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# Brightcove embed, authwalled
|
||||
'url': 'https://www.thisoldhouse.com/glen-ridge-generational/99537/s45-e17-multi-generational',
|
||||
'info_dict': {
|
||||
'id': '6349675446112',
|
||||
'ext': 'mp4',
|
||||
'title': 'E17 | Glen Ridge Generational | Multi-Generational',
|
||||
'description': 'md5:53c6bc2e8031f3033d693d9a3563222c',
|
||||
'timestamp': 1711382202,
|
||||
'upload_date': '20240325',
|
||||
'duration': 1422.229,
|
||||
'tags': 'count:13',
|
||||
'uploader_id': '6314471934001',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
},
|
||||
'expected_warnings': ['Login with password is not supported for this website'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Requires subscription',
|
||||
}, {
|
||||
# Page no longer has video
|
||||
'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
|
||||
|
@ -98,7 +120,15 @@ class ThisOldHouseIE(InfoExtractor):
|
|||
|
||||
video_url, video_id = self._search_regex(
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]',
|
||||
webpage, 'video url', group=(1, 2))
|
||||
video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Zype URL').url
|
||||
webpage, 'zype url', group=(1, 2), default=(None, None))
|
||||
if video_url:
|
||||
video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Zype URL').url
|
||||
return self.url_result(video_url, ZypeIE, video_id)
|
||||
|
||||
return self.url_result(video_url, ZypeIE, video_id)
|
||||
video_url, video_id = self._search_regex([
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//players\.brightcove\.net/\d+/\w+/index\.html\?videoId=(\d+))',
|
||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)thisoldhouse\.com/videos/brightcove/(\d+))'],
|
||||
webpage, 'iframe url', group=(1, 2))
|
||||
if not parse_qs(video_url).get('videoId'):
|
||||
video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Brightcove URL').url
|
||||
return self.url_result(smuggle_url(video_url, {'referrer': url}), BrightcoveNewIE, video_id)
|
||||
|
|
|
@ -4,6 +4,7 @@ import random
|
|||
import re
|
||||
import string
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlparse
|
||||
|
@ -30,19 +31,65 @@ from ..utils import (
|
|||
|
||||
|
||||
class TikTokBaseIE(InfoExtractor):
|
||||
_APP_VERSIONS = [('26.1.3', '260103'), ('26.1.2', '260102'), ('26.1.1', '260101'), ('25.6.2', '250602')]
|
||||
_WORKING_APP_VERSION = None
|
||||
_APP_NAME = 'trill'
|
||||
_AID = 1180
|
||||
_UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s'
|
||||
_WEBPAGE_HOST = 'https://www.tiktok.com/'
|
||||
QUALITIES = ('360p', '540p', '720p', '1080p')
|
||||
|
||||
_APP_INFO_DEFAULTS = {
|
||||
# unique "install id"
|
||||
'iid': None,
|
||||
# TikTok (KR/PH/TW/TH/VN) = trill, TikTok (rest of world) = musical_ly, Douyin = aweme
|
||||
'app_name': 'musical_ly',
|
||||
'app_version': '34.1.2',
|
||||
'manifest_app_version': '2023401020',
|
||||
# "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
|
||||
'aid': '0',
|
||||
}
|
||||
_KNOWN_APP_INFO = [
|
||||
'7351144126450059040',
|
||||
'7351149742343391009',
|
||||
'7351153174894626592',
|
||||
]
|
||||
_APP_INFO_POOL = None
|
||||
_APP_INFO = None
|
||||
_APP_USER_AGENT = None
|
||||
|
||||
@property
|
||||
def _API_HOSTNAME(self):
|
||||
return self._configuration_arg(
|
||||
'api_hostname', ['api22-normal-c-useast2a.tiktokv.com'], ie_key=TikTokIE)[0]
|
||||
|
||||
def _get_next_app_info(self):
|
||||
if self._APP_INFO_POOL is None:
|
||||
defaults = {
|
||||
key: self._configuration_arg(key, [default], ie_key=TikTokIE)[0]
|
||||
for key, default in self._APP_INFO_DEFAULTS.items()
|
||||
if key != 'iid'
|
||||
}
|
||||
app_info_list = (
|
||||
self._configuration_arg('app_info', ie_key=TikTokIE)
|
||||
or random.sample(self._KNOWN_APP_INFO, len(self._KNOWN_APP_INFO)))
|
||||
self._APP_INFO_POOL = [
|
||||
{**defaults, **dict(
|
||||
(k, v) for k, v in zip(self._APP_INFO_DEFAULTS, app_info.split('/')) if v
|
||||
)} for app_info in app_info_list
|
||||
]
|
||||
|
||||
if not self._APP_INFO_POOL:
|
||||
return False
|
||||
|
||||
self._APP_INFO = self._APP_INFO_POOL.pop(0)
|
||||
|
||||
app_name = self._APP_INFO['app_name']
|
||||
version = self._APP_INFO['manifest_app_version']
|
||||
if app_name == 'musical_ly':
|
||||
package = f'com.zhiliaoapp.musically/{version}'
|
||||
else: # trill, aweme
|
||||
package = f'com.ss.android.ugc.{app_name}/{version}'
|
||||
self._APP_USER_AGENT = f'{package} (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)'
|
||||
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _create_url(user_id, video_id):
|
||||
return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'
|
||||
|
@ -58,7 +105,7 @@ class TikTokBaseIE(InfoExtractor):
|
|||
'universal data', display_id, end_pattern=r'</script>', default={}),
|
||||
('__DEFAULT_SCOPE__', {dict})) or {}
|
||||
|
||||
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
|
||||
def _call_api_impl(self, ep, query, video_id, fatal=True,
|
||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160)))
|
||||
webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
|
||||
|
@ -67,80 +114,84 @@ class TikTokBaseIE(InfoExtractor):
|
|||
return self._download_json(
|
||||
'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
|
||||
fatal=fatal, note=note, errnote=errnote, headers={
|
||||
'User-Agent': f'com.ss.android.ugc.{self._APP_NAME}/{manifest_app_version} (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)',
|
||||
'User-Agent': self._APP_USER_AGENT,
|
||||
'Accept': 'application/json',
|
||||
}, query=query)
|
||||
|
||||
def _build_api_query(self, query, app_version, manifest_app_version):
|
||||
def _build_api_query(self, query):
|
||||
return {
|
||||
**query,
|
||||
'version_name': app_version,
|
||||
'version_code': manifest_app_version,
|
||||
'build_number': app_version,
|
||||
'manifest_version_code': manifest_app_version,
|
||||
'update_version_code': manifest_app_version,
|
||||
'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
|
||||
'uuid': ''.join(random.choices(string.digits, k=16)),
|
||||
'_rticket': int(time.time() * 1000),
|
||||
'ts': int(time.time()),
|
||||
'device_brand': 'Google',
|
||||
'device_type': 'Pixel 7',
|
||||
'device_platform': 'android',
|
||||
'os': 'android',
|
||||
'ssmix': 'a',
|
||||
'_rticket': int(time.time() * 1000),
|
||||
'cdid': str(uuid.uuid4()),
|
||||
'channel': 'googleplay',
|
||||
'aid': self._APP_INFO['aid'],
|
||||
'app_name': self._APP_INFO['app_name'],
|
||||
'version_code': ''.join((f'{int(v):02d}' for v in self._APP_INFO['app_version'].split('.'))),
|
||||
'version_name': self._APP_INFO['app_version'],
|
||||
'manifest_version_code': self._APP_INFO['manifest_app_version'],
|
||||
'update_version_code': self._APP_INFO['manifest_app_version'],
|
||||
'ab_version': self._APP_INFO['app_version'],
|
||||
'resolution': '1080*2400',
|
||||
'dpi': 420,
|
||||
'os_version': '13',
|
||||
'os_api': '29',
|
||||
'carrier_region': 'US',
|
||||
'sys_region': 'US',
|
||||
'region': 'US',
|
||||
'app_name': self._APP_NAME,
|
||||
'app_language': 'en',
|
||||
'device_type': 'Pixel 7',
|
||||
'device_brand': 'Google',
|
||||
'language': 'en',
|
||||
'timezone_name': 'America/New_York',
|
||||
'timezone_offset': '-14400',
|
||||
'channel': 'googleplay',
|
||||
'os_api': '29',
|
||||
'os_version': '13',
|
||||
'ac': 'wifi',
|
||||
'mcc_mnc': '310260',
|
||||
'is_my_cn': 0,
|
||||
'aid': self._AID,
|
||||
'ssmix': 'a',
|
||||
'as': 'a1qwert123',
|
||||
'cp': 'cbfhckdckkde1',
|
||||
'is_pad': '0',
|
||||
'current_region': 'US',
|
||||
'app_type': 'normal',
|
||||
'sys_region': 'US',
|
||||
'last_install_time': int(time.time()) - random.randint(86400, 1123200),
|
||||
'timezone_name': 'America/New_York',
|
||||
'residence': 'US',
|
||||
'app_language': 'en',
|
||||
'timezone_offset': '-14400',
|
||||
'host_abi': 'armeabi-v7a',
|
||||
'locale': 'en',
|
||||
'ac2': 'wifi5g',
|
||||
'uoo': '1',
|
||||
'op_region': 'US',
|
||||
'build_number': self._APP_INFO['app_version'],
|
||||
'region': 'US',
|
||||
'ts': int(time.time()),
|
||||
'iid': self._APP_INFO['iid'],
|
||||
'device_id': random.randint(7250000000000000000, 7351147085025500000),
|
||||
'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
|
||||
}
|
||||
|
||||
def _call_api(self, ep, query, video_id, fatal=True,
|
||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||
if not self._WORKING_APP_VERSION:
|
||||
app_version = self._configuration_arg('app_version', [''], ie_key=TikTokIE.ie_key())[0]
|
||||
manifest_app_version = self._configuration_arg('manifest_app_version', [''], ie_key=TikTokIE.ie_key())[0]
|
||||
if app_version and manifest_app_version:
|
||||
self._WORKING_APP_VERSION = (app_version, manifest_app_version)
|
||||
self.write_debug('Imported app version combo from extractor arguments')
|
||||
elif app_version or manifest_app_version:
|
||||
self.report_warning('Only one of the two required version params are passed as extractor arguments', only_once=True)
|
||||
if not self._APP_INFO and not self._get_next_app_info():
|
||||
message = 'No working app info is available'
|
||||
if fatal:
|
||||
raise ExtractorError(message, expected=True)
|
||||
else:
|
||||
self.report_warning(message)
|
||||
return
|
||||
|
||||
if self._WORKING_APP_VERSION:
|
||||
app_version, manifest_app_version = self._WORKING_APP_VERSION
|
||||
real_query = self._build_api_query(query, app_version, manifest_app_version)
|
||||
return self._call_api_impl(ep, real_query, manifest_app_version, video_id, fatal, note, errnote)
|
||||
|
||||
for count, (app_version, manifest_app_version) in enumerate(self._APP_VERSIONS, start=1):
|
||||
real_query = self._build_api_query(query, app_version, manifest_app_version)
|
||||
max_tries = len(self._APP_INFO_POOL) + 1 # _APP_INFO_POOL + _APP_INFO
|
||||
for count in itertools.count(1):
|
||||
self.write_debug(str(self._APP_INFO))
|
||||
real_query = self._build_api_query(query)
|
||||
try:
|
||||
res = self._call_api_impl(ep, real_query, manifest_app_version, video_id, fatal, note, errnote)
|
||||
self._WORKING_APP_VERSION = (app_version, manifest_app_version)
|
||||
return res
|
||||
return self._call_api_impl(ep, real_query, video_id, fatal, note, errnote)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
|
||||
if count == len(self._APP_VERSIONS):
|
||||
message = str(e.cause or e.msg)
|
||||
if not self._get_next_app_info():
|
||||
if fatal:
|
||||
raise e
|
||||
raise
|
||||
else:
|
||||
self.report_warning(str(e.cause or e.msg))
|
||||
self.report_warning(message)
|
||||
return
|
||||
self.report_warning('%s. Retrying... (attempt %s of %s)' % (str(e.cause or e.msg), count, len(self._APP_VERSIONS)))
|
||||
self.report_warning(f'{message}. Retrying... (attempt {count} of {max_tries})')
|
||||
continue
|
||||
raise e
|
||||
raise
|
||||
|
||||
def _extract_aweme_app(self, aweme_id):
|
||||
feed_list = self._call_api(
|
||||
|
@ -223,6 +274,7 @@ class TikTokBaseIE(InfoExtractor):
|
|||
|
||||
def extract_addr(addr, add_meta={}):
|
||||
parsed_meta, res = parse_url_key(addr.get('url_key', ''))
|
||||
is_bytevc2 = parsed_meta.get('vcodec') == 'bytevc2'
|
||||
if res:
|
||||
known_resolutions.setdefault(res, {}).setdefault('height', int_or_none(addr.get('height')))
|
||||
known_resolutions[res].setdefault('width', int_or_none(addr.get('width')))
|
||||
|
@ -235,8 +287,11 @@ class TikTokBaseIE(InfoExtractor):
|
|||
'acodec': 'aac',
|
||||
'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked
|
||||
**add_meta, **parsed_meta,
|
||||
# bytevc2 is bytedance's proprietary (unplayable) video codec
|
||||
'preference': -100 if is_bytevc2 else -1,
|
||||
'format_note': join_nonempty(
|
||||
add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None, delim=' '),
|
||||
add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None,
|
||||
'(UNPLAYABLE)' if is_bytevc2 else None, delim=' '),
|
||||
**audio_meta(url),
|
||||
} for url in addr.get('url_list') or []]
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import base64
|
||||
import re
|
||||
import urllib.parse
|
||||
from base64 import b64decode
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
|
@ -371,7 +371,7 @@ class WistiaChannelIE(WistiaBaseIE):
|
|||
webpage = self._download_webpage(f'https://fast.wistia.net/embed/channel/{channel_id}', channel_id)
|
||||
data = self._parse_json(
|
||||
self._search_regex(r'wchanneljsonp-%s\'\]\s*=[^\"]*\"([A-Za-z0-9=/]*)' % channel_id, webpage, 'jsonp', channel_id),
|
||||
channel_id, transform_source=lambda x: urllib.parse.unquote_plus(b64decode(x).decode('utf-8')))
|
||||
channel_id, transform_source=lambda x: urllib.parse.unquote_plus(base64.b64decode(x).decode('utf-8')))
|
||||
|
||||
# XXX: can there be more than one series?
|
||||
series = traverse_obj(data, ('series', 0), default={})
|
||||
|
|
|
@ -15,35 +15,35 @@ class XVideosIE(InfoExtractor):
|
|||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:[^/]+\.)?xvideos2?\.com/video|
|
||||
(?:www\.)?xvideos\.es/video|
|
||||
(?:[^/]+\.)?xvideos2?\.com/video\.?|
|
||||
(?:www\.)?xvideos\.es/video\.?|
|
||||
(?:www|flashservice)\.xvideos\.com/embedframe/|
|
||||
static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video=
|
||||
)
|
||||
(?P<id>[0-9]+)
|
||||
(?P<id>[0-9a-z]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.xvideos.com/video4588838/motorcycle_guy_cucks_influencer_steals_his_gf',
|
||||
'md5': '14cea69fcb84db54293b1e971466c2e1',
|
||||
'url': 'http://xvideos.com/video.ucuvbkfda4e/a_beautiful_red-haired_stranger_was_refused_but_still_came_to_my_room_for_sex',
|
||||
'md5': '396255a900a6bddb3e98985f0b86c3fd',
|
||||
'info_dict': {
|
||||
'id': '4588838',
|
||||
'id': 'ucuvbkfda4e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Motorcycle Guy Cucks Influencer, Steals his GF',
|
||||
'duration': 108,
|
||||
'title': 'A Beautiful Red-Haired Stranger Was Refused, But Still Came To My Room For Sex',
|
||||
'duration': 1238,
|
||||
'age_limit': 18,
|
||||
'thumbnail': r're:^https://img-hw.xvideos-cdn.com/.+\.jpg',
|
||||
'thumbnail': r're:^https://cdn\d+-pic.xvideos-cdn.com/.+\.jpg',
|
||||
}
|
||||
}, {
|
||||
# Broken HLS formats
|
||||
'url': 'https://www.xvideos.com/video65982001/what_s_her_name',
|
||||
'md5': 'b82d7d7ef7d65a84b1fa6965f81f95a5',
|
||||
'md5': '56742808292c8fa1418e4538c262c58b',
|
||||
'info_dict': {
|
||||
'id': '65982001',
|
||||
'ext': 'mp4',
|
||||
'title': 'what\'s her name?',
|
||||
'duration': 120,
|
||||
'age_limit': 18,
|
||||
'thumbnail': r're:^https://img-hw.xvideos-cdn.com/.+\.jpg',
|
||||
'thumbnail': r're:^https://cdn\d+-pic.xvideos-cdn.com/.+\.jpg',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://flashservice.xvideos.com/embedframe/4588838',
|
||||
|
@ -90,6 +90,18 @@ class XVideosIE(InfoExtractor):
|
|||
}, {
|
||||
'url': 'https://de.xvideos.com/video4588838/biker_takes_his_girl',
|
||||
'only_matching': True
|
||||
}, {
|
||||
'url': 'https://flashservice.xvideos.com/embedframe/ucuvbkfda4e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.xvideos.com/embedframe/ucuvbkfda4e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=ucuvbkfda4e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://xvideos.es/video.ucuvbkfda4e/a_beautiful_red-haired_stranger_was_refused_but_still_came_to_my_room_for_sex',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
|
|
@ -2,7 +2,7 @@ import base64
|
|||
import calendar
|
||||
import collections
|
||||
import copy
|
||||
import datetime
|
||||
import datetime as dt
|
||||
import enum
|
||||
import hashlib
|
||||
import itertools
|
||||
|
@ -33,6 +33,7 @@ from ..utils import (
|
|||
clean_html,
|
||||
datetime_from_str,
|
||||
dict_get,
|
||||
filesize_from_tbr,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
format_field,
|
||||
|
@ -55,6 +56,7 @@ from ..utils import (
|
|||
str_to_int,
|
||||
strftime_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
|
@ -922,10 +924,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||
def _parse_time_text(self, text):
|
||||
if not text:
|
||||
return
|
||||
dt = self.extract_relative_time(text)
|
||||
dt_ = self.extract_relative_time(text)
|
||||
timestamp = None
|
||||
if isinstance(dt, datetime.datetime):
|
||||
timestamp = calendar.timegm(dt.timetuple())
|
||||
if isinstance(dt_, dt.datetime):
|
||||
timestamp = calendar.timegm(dt_.timetuple())
|
||||
|
||||
if timestamp is None:
|
||||
timestamp = (
|
||||
|
@ -3602,8 +3604,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
yt_query = {
|
||||
'videoId': video_id,
|
||||
}
|
||||
if _split_innertube_client(client)[0] == 'android':
|
||||
yt_query['params'] = 'CgIQBg=='
|
||||
if _split_innertube_client(client)[0] in ('android', 'android_embedscreen'):
|
||||
yt_query['params'] = 'CgIIAQ=='
|
||||
|
||||
pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
|
||||
if pp_arg:
|
||||
|
@ -3839,11 +3841,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
10 if audio_track.get('audioIsDefault') and 10
|
||||
else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
|
||||
else -1)
|
||||
format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
|
||||
# Some formats may have much smaller duration than others (possibly damaged during encoding)
|
||||
# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
|
||||
# Make sure to avoid false positives with small duration differences.
|
||||
# E.g. __2ABJjxzNo, ySuUZEjARPY
|
||||
is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
|
||||
is_damaged = try_call(lambda: format_duration < duration // 2)
|
||||
if is_damaged:
|
||||
self.report_warning(
|
||||
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
|
||||
|
@ -3873,6 +3876,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
|
||||
'has_drm': bool(fmt.get('drmFamilies')),
|
||||
'tbr': tbr,
|
||||
'filesize_approx': filesize_from_tbr(tbr, format_duration),
|
||||
'url': fmt_url,
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
|
||||
|
@ -4564,7 +4568,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
|
||||
if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
|
||||
# Newly uploaded videos' HLS formats are potentially problematic and need to be checked
|
||||
upload_datetime = datetime_from_str(upload_date).replace(tzinfo=datetime.timezone.utc)
|
||||
upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc)
|
||||
if upload_datetime >= datetime_from_str('today-2days'):
|
||||
for fmt in info['formats']:
|
||||
if fmt.get('protocol') == 'm3u8_native':
|
||||
|
@ -6965,7 +6969,7 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
|
|||
IE_DESC = 'YouTube search'
|
||||
IE_NAME = 'youtube:search'
|
||||
_SEARCH_KEY = 'ytsearch'
|
||||
_SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
|
||||
_SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only
|
||||
_TESTS = [{
|
||||
'url': 'ytsearch5:youtube-dl test video',
|
||||
'playlist_count': 5,
|
||||
|
@ -6973,6 +6977,14 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
|
|||
'id': 'youtube-dl test video',
|
||||
'title': 'youtube-dl test video',
|
||||
}
|
||||
}, {
|
||||
'note': 'Suicide/self-harm search warning',
|
||||
'url': 'ytsearch1:i hate myself and i wanna die',
|
||||
'playlist_count': 1,
|
||||
'info_dict': {
|
||||
'id': 'i hate myself and i wanna die',
|
||||
'title': 'i hate myself and i wanna die',
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
|
@ -6980,7 +6992,7 @@ class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
|
|||
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
|
||||
_SEARCH_KEY = 'ytsearchdate'
|
||||
IE_DESC = 'YouTube search, newest videos first'
|
||||
_SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
|
||||
_SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
|
||||
_TESTS = [{
|
||||
'url': 'ytsearchdate5:youtube-dl test video',
|
||||
'playlist_count': 5,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import re
|
||||
from uuid import uuid4
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
|
@ -53,7 +53,7 @@ class ZattooPlatformBaseIE(InfoExtractor):
|
|||
self._request_webpage(
|
||||
'%s/zapi/v3/session/hello' % self._host_url(), None,
|
||||
'Opening session', data=urlencode_postdata({
|
||||
'uuid': compat_str(uuid4()),
|
||||
'uuid': compat_str(uuid.uuid4()),
|
||||
'lang': 'en',
|
||||
'app_version': '1.8.2',
|
||||
'format': 'json',
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import io
|
||||
import logging
|
||||
import ssl
|
||||
|
@ -38,27 +39,40 @@ if websockets_version < (12, 0):
|
|||
import websockets.sync.client
|
||||
from websockets.uri import parse_uri
|
||||
|
||||
# In websockets Connection, recv_exc and recv_events_exc are defined
|
||||
# after the recv events handler thread is started [1].
|
||||
# On our CI using PyPy, in some cases a race condition may occur
|
||||
# where the recv events handler thread tries to use these attributes before they are defined [2].
|
||||
# 1: https://github.com/python-websockets/websockets/blame/de768cf65e7e2b1a3b67854fb9e08816a5ff7050/src/websockets/sync/connection.py#L93
|
||||
# 2: "AttributeError: 'ClientConnection' object has no attribute 'recv_events_exc'. Did you mean: 'recv_events'?"
|
||||
import websockets.sync.connection # isort: split
|
||||
with contextlib.suppress(Exception):
|
||||
# > 12.0
|
||||
websockets.sync.connection.Connection.recv_exc = None
|
||||
# 12.0
|
||||
websockets.sync.connection.Connection.recv_events_exc = None
|
||||
|
||||
|
||||
class WebsocketsResponseAdapter(WebSocketResponse):
|
||||
|
||||
def __init__(self, wsw: websockets.sync.client.ClientConnection, url):
|
||||
def __init__(self, ws: websockets.sync.client.ClientConnection, url):
|
||||
super().__init__(
|
||||
fp=io.BytesIO(wsw.response.body or b''),
|
||||
fp=io.BytesIO(ws.response.body or b''),
|
||||
url=url,
|
||||
headers=wsw.response.headers,
|
||||
status=wsw.response.status_code,
|
||||
reason=wsw.response.reason_phrase,
|
||||
headers=ws.response.headers,
|
||||
status=ws.response.status_code,
|
||||
reason=ws.response.reason_phrase,
|
||||
)
|
||||
self.wsw = wsw
|
||||
self._ws = ws
|
||||
|
||||
def close(self):
|
||||
self.wsw.close()
|
||||
self._ws.close()
|
||||
super().close()
|
||||
|
||||
def send(self, message):
|
||||
# https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.send
|
||||
try:
|
||||
return self.wsw.send(message)
|
||||
return self._ws.send(message)
|
||||
except (websockets.exceptions.WebSocketException, RuntimeError, TimeoutError) as e:
|
||||
raise TransportError(cause=e) from e
|
||||
except SocksProxyError as e:
|
||||
|
@ -69,7 +83,7 @@ class WebsocketsResponseAdapter(WebSocketResponse):
|
|||
def recv(self):
|
||||
# https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.recv
|
||||
try:
|
||||
return self.wsw.recv()
|
||||
return self._ws.recv()
|
||||
except SocksProxyError as e:
|
||||
raise ProxyError(cause=e) from e
|
||||
except (websockets.exceptions.WebSocketException, RuntimeError, TimeoutError) as e:
|
||||
|
|
|
@ -463,9 +463,10 @@ class Request:
|
|||
else:
|
||||
raise TypeError('headers must be a mapping')
|
||||
|
||||
def update(self, url=None, data=None, headers=None, query=None):
|
||||
def update(self, url=None, data=None, headers=None, query=None, extensions=None):
|
||||
self.data = data if data is not None else self.data
|
||||
self.headers.update(headers or {})
|
||||
self.extensions.update(extensions or {})
|
||||
self.url = update_url_query(url or self.url, query or {})
|
||||
|
||||
def copy(self):
|
||||
|
|
|
@ -5,7 +5,7 @@ import codecs
|
|||
import collections
|
||||
import collections.abc
|
||||
import contextlib
|
||||
import datetime
|
||||
import datetime as dt
|
||||
import email.header
|
||||
import email.utils
|
||||
import errno
|
||||
|
@ -1150,14 +1150,14 @@ def extract_timezone(date_str):
|
|||
timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
|
||||
if timezone is not None:
|
||||
date_str = date_str[:-len(m.group('tz'))]
|
||||
timezone = datetime.timedelta(hours=timezone or 0)
|
||||
timezone = dt.timedelta(hours=timezone or 0)
|
||||
else:
|
||||
date_str = date_str[:-len(m.group('tz'))]
|
||||
if not m.group('sign'):
|
||||
timezone = datetime.timedelta()
|
||||
timezone = dt.timedelta()
|
||||
else:
|
||||
sign = 1 if m.group('sign') == '+' else -1
|
||||
timezone = datetime.timedelta(
|
||||
timezone = dt.timedelta(
|
||||
hours=sign * int(m.group('hours')),
|
||||
minutes=sign * int(m.group('minutes')))
|
||||
return timezone, date_str
|
||||
|
@ -1176,8 +1176,8 @@ def parse_iso8601(date_str, delimiter='T', timezone=None):
|
|||
|
||||
with contextlib.suppress(ValueError):
|
||||
date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
|
||||
dt = datetime.datetime.strptime(date_str, date_format) - timezone
|
||||
return calendar.timegm(dt.timetuple())
|
||||
dt_ = dt.datetime.strptime(date_str, date_format) - timezone
|
||||
return calendar.timegm(dt_.timetuple())
|
||||
|
||||
|
||||
def date_formats(day_first=True):
|
||||
|
@ -1198,12 +1198,12 @@ def unified_strdate(date_str, day_first=True):
|
|||
|
||||
for expression in date_formats(day_first):
|
||||
with contextlib.suppress(ValueError):
|
||||
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
||||
upload_date = dt.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
||||
if upload_date is None:
|
||||
timetuple = email.utils.parsedate_tz(date_str)
|
||||
if timetuple:
|
||||
with contextlib.suppress(ValueError):
|
||||
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
|
||||
upload_date = dt.datetime(*timetuple[:6]).strftime('%Y%m%d')
|
||||
if upload_date is not None:
|
||||
return str(upload_date)
|
||||
|
||||
|
@ -1233,8 +1233,8 @@ def unified_timestamp(date_str, day_first=True):
|
|||
|
||||
for expression in date_formats(day_first):
|
||||
with contextlib.suppress(ValueError):
|
||||
dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
|
||||
return calendar.timegm(dt.timetuple())
|
||||
dt_ = dt.datetime.strptime(date_str, expression) - timezone + dt.timedelta(hours=pm_delta)
|
||||
return calendar.timegm(dt_.timetuple())
|
||||
|
||||
timetuple = email.utils.parsedate_tz(date_str)
|
||||
if timetuple:
|
||||
|
@ -1272,11 +1272,11 @@ def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
|
|||
if precision == 'auto':
|
||||
auto_precision = True
|
||||
precision = 'microsecond'
|
||||
today = datetime_round(datetime.datetime.now(datetime.timezone.utc), precision)
|
||||
today = datetime_round(dt.datetime.now(dt.timezone.utc), precision)
|
||||
if date_str in ('now', 'today'):
|
||||
return today
|
||||
if date_str == 'yesterday':
|
||||
return today - datetime.timedelta(days=1)
|
||||
return today - dt.timedelta(days=1)
|
||||
match = re.match(
|
||||
r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
|
||||
date_str)
|
||||
|
@ -1291,13 +1291,13 @@ def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
|
|||
if unit == 'week':
|
||||
unit = 'day'
|
||||
time *= 7
|
||||
delta = datetime.timedelta(**{unit + 's': time})
|
||||
delta = dt.timedelta(**{unit + 's': time})
|
||||
new_date = start_time + delta
|
||||
if auto_precision:
|
||||
return datetime_round(new_date, unit)
|
||||
return new_date
|
||||
|
||||
return datetime_round(datetime.datetime.strptime(date_str, format), precision)
|
||||
return datetime_round(dt.datetime.strptime(date_str, format), precision)
|
||||
|
||||
|
||||
def date_from_str(date_str, format='%Y%m%d', strict=False):
|
||||
|
@ -1312,21 +1312,21 @@ def date_from_str(date_str, format='%Y%m%d', strict=False):
|
|||
return datetime_from_str(date_str, precision='microsecond', format=format).date()
|
||||
|
||||
|
||||
def datetime_add_months(dt, months):
|
||||
def datetime_add_months(dt_, months):
|
||||
"""Increment/Decrement a datetime object by months."""
|
||||
month = dt.month + months - 1
|
||||
year = dt.year + month // 12
|
||||
month = dt_.month + months - 1
|
||||
year = dt_.year + month // 12
|
||||
month = month % 12 + 1
|
||||
day = min(dt.day, calendar.monthrange(year, month)[1])
|
||||
return dt.replace(year, month, day)
|
||||
day = min(dt_.day, calendar.monthrange(year, month)[1])
|
||||
return dt_.replace(year, month, day)
|
||||
|
||||
|
||||
def datetime_round(dt, precision='day'):
|
||||
def datetime_round(dt_, precision='day'):
|
||||
"""
|
||||
Round a datetime object's time to a specific precision
|
||||
"""
|
||||
if precision == 'microsecond':
|
||||
return dt
|
||||
return dt_
|
||||
|
||||
unit_seconds = {
|
||||
'day': 86400,
|
||||
|
@ -1335,8 +1335,8 @@ def datetime_round(dt, precision='day'):
|
|||
'second': 1,
|
||||
}
|
||||
roundto = lambda x, n: ((x + n / 2) // n) * n
|
||||
timestamp = roundto(calendar.timegm(dt.timetuple()), unit_seconds[precision])
|
||||
return datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
|
||||
timestamp = roundto(calendar.timegm(dt_.timetuple()), unit_seconds[precision])
|
||||
return dt.datetime.fromtimestamp(timestamp, dt.timezone.utc)
|
||||
|
||||
|
||||
def hyphenate_date(date_str):
|
||||
|
@ -1357,11 +1357,11 @@ class DateRange:
|
|||
if start is not None:
|
||||
self.start = date_from_str(start, strict=True)
|
||||
else:
|
||||
self.start = datetime.datetime.min.date()
|
||||
self.start = dt.datetime.min.date()
|
||||
if end is not None:
|
||||
self.end = date_from_str(end, strict=True)
|
||||
else:
|
||||
self.end = datetime.datetime.max.date()
|
||||
self.end = dt.datetime.max.date()
|
||||
if self.start > self.end:
|
||||
raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
|
||||
|
||||
|
@ -1372,7 +1372,7 @@ class DateRange:
|
|||
|
||||
def __contains__(self, date):
|
||||
"""Check if the date is in the range"""
|
||||
if not isinstance(date, datetime.date):
|
||||
if not isinstance(date, dt.date):
|
||||
date = date_from_str(date)
|
||||
return self.start <= date <= self.end
|
||||
|
||||
|
@ -1996,12 +1996,12 @@ def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
|
|||
if isinstance(timestamp, (int, float)): # unix timestamp
|
||||
# Using naive datetime here can break timestamp() in Windows
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
|
||||
# Also, datetime.datetime.fromtimestamp breaks for negative timestamps
|
||||
# Also, dt.datetime.fromtimestamp breaks for negative timestamps
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
|
||||
datetime_object = (datetime.datetime.fromtimestamp(0, datetime.timezone.utc)
|
||||
+ datetime.timedelta(seconds=timestamp))
|
||||
datetime_object = (dt.datetime.fromtimestamp(0, dt.timezone.utc)
|
||||
+ dt.timedelta(seconds=timestamp))
|
||||
elif isinstance(timestamp, str): # assume YYYYMMDD
|
||||
datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
|
||||
datetime_object = dt.datetime.strptime(timestamp, '%Y%m%d')
|
||||
date_format = re.sub( # Support %s on windows
|
||||
r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
|
||||
return datetime_object.strftime(date_format)
|
||||
|
@ -4490,10 +4490,10 @@ def write_xattr(path, key, value):
|
|||
|
||||
|
||||
def random_birthday(year_field, month_field, day_field):
|
||||
start_date = datetime.date(1950, 1, 1)
|
||||
end_date = datetime.date(1995, 12, 31)
|
||||
start_date = dt.date(1950, 1, 1)
|
||||
end_date = dt.date(1995, 12, 31)
|
||||
offset = random.randint(0, (end_date - start_date).days)
|
||||
random_date = start_date + datetime.timedelta(offset)
|
||||
random_date = start_date + dt.timedelta(offset)
|
||||
return {
|
||||
year_field: str(random_date.year),
|
||||
month_field: str(random_date.month),
|
||||
|
@ -4672,7 +4672,7 @@ def time_seconds(**kwargs):
|
|||
"""
|
||||
Returns TZ-aware time in seconds since the epoch (1970-01-01T00:00:00Z)
|
||||
"""
|
||||
return time.time() + datetime.timedelta(**kwargs).total_seconds()
|
||||
return time.time() + dt.timedelta(**kwargs).total_seconds()
|
||||
|
||||
|
||||
# create a JSON Web Signature (jws) with HS256 algorithm
|
||||
|
@ -5415,6 +5415,17 @@ class FormatSorter:
|
|||
return tuple(self._calculate_field_preference(format, field) for field in self._order)
|
||||
|
||||
|
||||
def filesize_from_tbr(tbr, duration):
|
||||
"""
|
||||
@param tbr: Total bitrate in kbps (1000 bits/sec)
|
||||
@param duration: Duration in seconds
|
||||
@returns Filesize in bytes
|
||||
"""
|
||||
if tbr is None or duration is None:
|
||||
return None
|
||||
return int(duration * tbr * (1000 / 8))
|
||||
|
||||
|
||||
# XXX: Temporary
|
||||
class _YDLLogger:
|
||||
def __init__(self, ydl=None):
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import collections.abc
|
||||
import contextlib
|
||||
import http.cookies
|
||||
import inspect
|
||||
import itertools
|
||||
import re
|
||||
|
@ -28,7 +29,8 @@ def traverse_obj(
|
|||
|
||||
Each of the provided `paths` is tested and the first producing a valid result will be returned.
|
||||
The next path will also be tested if the path branched but no results could be found.
|
||||
Supported values for traversal are `Mapping`, `Iterable` and `re.Match`.
|
||||
Supported values for traversal are `Mapping`, `Iterable`, `re.Match`,
|
||||
`xml.etree.ElementTree` (xpath) and `http.cookies.Morsel`.
|
||||
Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded.
|
||||
|
||||
The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
|
||||
|
@ -36,8 +38,8 @@ def traverse_obj(
|
|||
The keys in the path can be one of:
|
||||
- `None`: Return the current object.
|
||||
- `set`: Requires the only item in the set to be a type or function,
|
||||
like `{type}`/`{func}`. If a `type`, returns only values
|
||||
of this type. If a function, returns `func(obj)`.
|
||||
like `{type}`/`{type, type, ...}/`{func}`. If a `type`, return only
|
||||
values of this type. If a function, returns `func(obj)`.
|
||||
- `str`/`int`: Return `obj[key]`. For `re.Match`, return `obj.group(key)`.
|
||||
- `slice`: Branch out and return all values in `obj[key]`.
|
||||
- `Ellipsis`: Branch out and return a list of all values.
|
||||
|
@ -48,8 +50,10 @@ def traverse_obj(
|
|||
For `Iterable`s, `key` is the index of the value.
|
||||
For `re.Match`es, `key` is the group number (0 = full match)
|
||||
as well as additionally any group names, if given.
|
||||
- `dict` Transform the current object and return a matching dict.
|
||||
- `dict`: Transform the current object and return a matching dict.
|
||||
Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
|
||||
- `any`-builtin: Take the first matching object and return it, resetting branching.
|
||||
- `all`-builtin: Take all matching objects and return them as a list, resetting branching.
|
||||
|
||||
`tuple`, `list`, and `dict` all support nested paths and branches.
|
||||
|
||||
|
@ -102,10 +106,10 @@ def traverse_obj(
|
|||
result = obj
|
||||
|
||||
elif isinstance(key, set):
|
||||
assert len(key) == 1, 'Set should only be used to wrap a single item'
|
||||
item = next(iter(key))
|
||||
if isinstance(item, type):
|
||||
if isinstance(obj, item):
|
||||
if len(key) > 1 or isinstance(item, type):
|
||||
assert all(isinstance(item, type) for item in key)
|
||||
if isinstance(obj, tuple(key)):
|
||||
result = obj
|
||||
else:
|
||||
result = try_call(item, args=(obj,))
|
||||
|
@ -117,6 +121,8 @@ def traverse_obj(
|
|||
|
||||
elif key is ...:
|
||||
branching = True
|
||||
if isinstance(obj, http.cookies.Morsel):
|
||||
obj = dict(obj, key=obj.key, value=obj.value)
|
||||
if isinstance(obj, collections.abc.Mapping):
|
||||
result = obj.values()
|
||||
elif is_iterable_like(obj) or isinstance(obj, xml.etree.ElementTree.Element):
|
||||
|
@ -131,6 +137,8 @@ def traverse_obj(
|
|||
|
||||
elif callable(key):
|
||||
branching = True
|
||||
if isinstance(obj, http.cookies.Morsel):
|
||||
obj = dict(obj, key=obj.key, value=obj.value)
|
||||
if isinstance(obj, collections.abc.Mapping):
|
||||
iter_obj = obj.items()
|
||||
elif is_iterable_like(obj) or isinstance(obj, xml.etree.ElementTree.Element):
|
||||
|
@ -157,6 +165,8 @@ def traverse_obj(
|
|||
} or None
|
||||
|
||||
elif isinstance(obj, collections.abc.Mapping):
|
||||
if isinstance(obj, http.cookies.Morsel):
|
||||
obj = dict(obj, key=obj.key, value=obj.value)
|
||||
result = (try_call(obj.get, args=(key,)) if casesense or try_call(obj.__contains__, args=(key,)) else
|
||||
next((v for k, v in obj.items() if casefold(k) == key), None))
|
||||
|
||||
|
@ -179,7 +189,7 @@ def traverse_obj(
|
|||
|
||||
elif isinstance(obj, xml.etree.ElementTree.Element) and isinstance(key, str):
|
||||
xpath, _, special = key.rpartition('/')
|
||||
if not special.startswith('@') and special != 'text()':
|
||||
if not special.startswith('@') and not special.endswith('()'):
|
||||
xpath = key
|
||||
special = None
|
||||
|
||||
|
@ -198,7 +208,7 @@ def traverse_obj(
|
|||
return try_call(element.attrib.get, args=(special[1:],))
|
||||
if special == 'text()':
|
||||
return element.text
|
||||
assert False, f'apply_specials is missing case for {special!r}'
|
||||
raise SyntaxError(f'apply_specials is missing case for {special!r}')
|
||||
|
||||
if xpath:
|
||||
result = list(map(apply_specials, obj.iterfind(xpath)))
|
||||
|
@ -228,6 +238,15 @@ def traverse_obj(
|
|||
if not casesense and isinstance(key, str):
|
||||
key = key.casefold()
|
||||
|
||||
if key in (any, all):
|
||||
has_branched = False
|
||||
filtered_objs = (obj for obj in objs if obj not in (None, {}))
|
||||
if key is any:
|
||||
objs = (next(filtered_objs, None),)
|
||||
else:
|
||||
objs = (list(filtered_objs),)
|
||||
continue
|
||||
|
||||
if __debug__ and callable(key):
|
||||
# Verify function signature
|
||||
inspect.signature(key).bind(None, None)
|
||||
|
|
Loading…
Reference in New Issue
Block a user