Merge branch 'yt-dlp:master' into rls/arm-ubuntu-bump

This commit is contained in:
bashonly 2024-05-04 11:17:04 -05:00
commit 11f2cb1197
No known key found for this signature in database
GPG Key ID: 783F096F253D15B0
51 changed files with 1696 additions and 771 deletions

View File

@ -12,6 +12,9 @@ on:
unix: unix:
default: true default: true
type: boolean type: boolean
linux_static:
default: true
type: boolean
linux_arm: linux_arm:
default: true default: true
type: boolean type: boolean
@ -27,9 +30,6 @@ on:
windows32: windows32:
default: true default: true
type: boolean type: boolean
meta_files:
default: true
type: boolean
origin: origin:
required: false required: false
default: '' default: ''
@ -52,7 +52,11 @@ on:
default: stable default: stable
type: string type: string
unix: unix:
description: yt-dlp, yt-dlp.tar.gz, yt-dlp_linux, yt-dlp_linux.zip description: yt-dlp, yt-dlp.tar.gz
default: true
type: boolean
linux_static:
description: yt-dlp_linux
default: true default: true
type: boolean type: boolean
linux_arm: linux_arm:
@ -75,10 +79,6 @@ on:
description: yt-dlp_x86.exe description: yt-dlp_x86.exe
default: true default: true
type: boolean type: boolean
meta_files:
description: SHA2-256SUMS, SHA2-512SUMS, _update_spec
default: true
type: boolean
origin: origin:
description: Origin description: Origin
required: false required: false
@ -112,27 +112,9 @@ jobs:
- uses: actions/setup-python@v5 - uses: actions/setup-python@v5
with: with:
python-version: "3.10" python-version: "3.10"
- uses: conda-incubator/setup-miniconda@v3
with:
miniforge-variant: Mambaforge
use-mamba: true
channels: conda-forge
auto-update-conda: true
activate-environment: ""
auto-activate-base: false
- name: Install Requirements - name: Install Requirements
run: | run: |
sudo apt -y install zip pandoc man sed sudo apt -y install zip pandoc man sed
cat > ./requirements.txt << EOF
python=3.10.*
pyinstaller
brotli-python
EOF
python devscripts/install_deps.py --print \
--exclude brotli --exclude brotlicffi \
--include secretstorage >> ./requirements.txt
mamba create -n build --file ./requirements.txt
- name: Prepare - name: Prepare
run: | run: |
python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
@ -141,30 +123,15 @@ jobs:
- name: Build Unix platform-independent binary - name: Build Unix platform-independent binary
run: | run: |
make all tar make all tar
- name: Build Unix standalone binary
shell: bash -l {0}
run: |
unset LD_LIBRARY_PATH # Harmful; set by setup-python
conda activate build
python -m bundle.pyinstaller --onedir
(cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .)
python -m bundle.pyinstaller
mv ./dist/yt-dlp_linux ./yt-dlp_linux
mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip
- name: Verify --update-to - name: Verify --update-to
if: vars.UPDATE_TO_VERIFICATION if: vars.UPDATE_TO_VERIFICATION
run: | run: |
binaries=("yt-dlp" "yt-dlp_linux") chmod +x ./yt-dlp
for binary in "${binaries[@]}"; do cp ./yt-dlp ./yt-dlp_downgraded
chmod +x ./${binary} version="$(./yt-dlp --version)"
cp ./${binary} ./${binary}_downgraded ./yt-dlp_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
version="$(./${binary} --version)" downgraded_version="$(./yt-dlp_downgraded --version)"
./${binary}_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04 [[ "$version" != "$downgraded_version" ]]
downgraded_version="$(./${binary}_downgraded --version)"
[[ "$version" != "$downgraded_version" ]]
done
- name: Upload artifacts - name: Upload artifacts
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
@ -172,8 +139,39 @@ jobs:
path: | path: |
yt-dlp yt-dlp
yt-dlp.tar.gz yt-dlp.tar.gz
yt-dlp_linux compression-level: 0
yt-dlp_linux.zip
linux_static:
needs: process
if: inputs.linux_static
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Build static executable
env:
channel: ${{ inputs.channel }}
origin: ${{ needs.process.outputs.origin }}
version: ${{ inputs.version }}
run: |
mkdir ~/build
cd bundle/docker
docker compose up --build static
sudo chown "${USER}:docker" ~/build/yt-dlp_linux
- name: Verify --update-to
if: vars.UPDATE_TO_VERIFICATION
run: |
chmod +x ~/build/yt-dlp_linux
cp ~/build/yt-dlp_linux ~/build/yt-dlp_linux_downgraded
version="$(~/build/yt-dlp_linux --version)"
~/build/yt-dlp_linux_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
downgraded_version="$(~/build/yt-dlp_linux_downgraded --version)"
[[ "$version" != "$downgraded_version" ]]
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
name: build-bin-${{ github.job }}
path: |
~/build/yt-dlp_linux
compression-level: 0 compression-level: 0
linux_arm: linux_arm:
@ -256,7 +254,7 @@ jobs:
# We need to fuse our own universal2 wheels for curl_cffi # We need to fuse our own universal2 wheels for curl_cffi
python3 -m pip install -U --user delocate python3 -m pip install -U --user delocate
mkdir curl_cffi_whls curl_cffi_universal2 mkdir curl_cffi_whls curl_cffi_universal2
python3 devscripts/install_deps.py --print -o --include curl_cffi > requirements.txt python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt
for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do
python3 -m pip download \ python3 -m pip download \
--only-binary=:all: \ --only-binary=:all: \
@ -302,7 +300,7 @@ jobs:
macos_legacy: macos_legacy:
needs: process needs: process
if: inputs.macos_legacy if: inputs.macos_legacy
runs-on: macos-latest runs-on: macos-12
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@ -322,7 +320,7 @@ jobs:
run: | run: |
brew install coreutils brew install coreutils
python3 devscripts/install_deps.py --user -o --include build python3 devscripts/install_deps.py --user -o --include build
python3 devscripts/install_deps.py --user --include pyinstaller --include curl_cffi python3 devscripts/install_deps.py --user --include pyinstaller
- name: Prepare - name: Prepare
run: | run: |
@ -364,7 +362,7 @@ jobs:
- name: Install Requirements - name: Install Requirements
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
python devscripts/install_deps.py -o --include build python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py --include py2exe --include curl_cffi python devscripts/install_deps.py --include py2exe --include curl-cffi
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl" python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"
- name: Prepare - name: Prepare
@ -449,10 +447,11 @@ jobs:
compression-level: 0 compression-level: 0
meta_files: meta_files:
if: inputs.meta_files && always() && !cancelled() if: always() && !cancelled()
needs: needs:
- process - process
- unix - unix
- linux_static
- linux_arm - linux_arm
- macos - macos
- macos_legacy - macos_legacy

View File

@ -53,7 +53,7 @@ jobs:
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
- name: Install test requirements - name: Install test requirements
run: python3 ./devscripts/install_deps.py --include dev --include curl_cffi run: python3 ./devscripts/install_deps.py --include dev --include curl-cffi
- name: Run tests - name: Run tests
continue-on-error: False continue-on-error: False
run: | run: |

View File

@ -27,6 +27,8 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v5 - uses: actions/setup-python@v5
with:
python-version: '3.8'
- name: Install flake8 - name: Install flake8
run: python3 ./devscripts/install_deps.py -o --include dev run: python3 ./devscripts/install_deps.py -o --include dev
- name: Make lazy extractors - name: Make lazy extractors

View File

@ -600,3 +600,13 @@ xpadev-net
Xpl0itU Xpl0itU
YoshichikaAAA YoshichikaAAA
zhijinwuu zhijinwuu
alb
hruzgar
kasper93
leoheitmannruiz
luiso1979
nipotan
Offert4324
sta1us
Tomoka1
trwstin

View File

@ -4,6 +4,101 @@
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
--> -->
### 2024.04.09
#### Important changes
- Security: [[CVE-2024-22423](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-22423)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p)
- The shell escape function now properly escapes `%`, `\` and `\n`.
- `utils.Popen` has been patched accordingly.
#### Core changes
- [Add new option `--progress-delta`](https://github.com/yt-dlp/yt-dlp/commit/9590cc6b4768e190183d7d071a6c78170889116a) ([#9082](https://github.com/yt-dlp/yt-dlp/issues/9082)) by [Grub4K](https://github.com/Grub4K)
- [Add new options `--impersonate` and `--list-impersonate-targets`](https://github.com/yt-dlp/yt-dlp/commit/0b81d4d252bd065ccd352722987ea34fe17f9244) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
- [Add option `--no-break-on-existing`](https://github.com/yt-dlp/yt-dlp/commit/16be117729150b2784f3b17755c886cb0cf73374) ([#9610](https://github.com/yt-dlp/yt-dlp/issues/9610)) by [bashonly](https://github.com/bashonly)
- [Fix `filesize_approx` calculation](https://github.com/yt-dlp/yt-dlp/commit/86e3b82261e8ebc6c6707c09544c9dfb8907c0fd) ([#9560](https://github.com/yt-dlp/yt-dlp/issues/9560)) by [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
- [Infer `acodec` for single-codec containers](https://github.com/yt-dlp/yt-dlp/commit/86a972033e05fea80e5fe7f2aff6723dbe2f3952) by [pukkandan](https://github.com/pukkandan)
- [Prevent RCE when using `--exec` with `%q` (CVE-2024-22423)](https://github.com/yt-dlp/yt-dlp/commit/ff07792676f404ffff6ee61b5638c9dc1a33a37a) by [Grub4K](https://github.com/Grub4K)
- **cookies**: [Add `--cookies-from-browser` support for Firefox Flatpak](https://github.com/yt-dlp/yt-dlp/commit/2ab2651a4a7be18939e2b4cb21be79fe477c797a) ([#9619](https://github.com/yt-dlp/yt-dlp/issues/9619)) by [un-def](https://github.com/un-def)
- **utils**
- `traverse_obj`
- [Allow unbranching using `all` and `any`](https://github.com/yt-dlp/yt-dlp/commit/3699eeb67cad333272b14a42dd3843d93fda1a2e) ([#9571](https://github.com/yt-dlp/yt-dlp/issues/9571)) by [Grub4K](https://github.com/Grub4K)
- [Convenience improvements](https://github.com/yt-dlp/yt-dlp/commit/32abfb00bdbd119ca675fdc6d1719331f0a2741a) ([#9577](https://github.com/yt-dlp/yt-dlp/issues/9577)) by [Grub4K](https://github.com/Grub4K)
#### Extractor changes
- [Add extractor impersonate API](https://github.com/yt-dlp/yt-dlp/commit/50c29352312f5662acf9a64b0012766f5c40af61) ([#9474](https://github.com/yt-dlp/yt-dlp/issues/9474)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
- **afreecatv**
- [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/9415f1a5ef88482ebafe3083e8bcb778ac512df7) ([#9566](https://github.com/yt-dlp/yt-dlp/issues/9566)) by [bashonly](https://github.com/bashonly), [Tomoka1](https://github.com/Tomoka1)
- live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9073ae6458f4c6a832aa832c67174c61852869be) ([#9348](https://github.com/yt-dlp/yt-dlp/issues/9348)) by [hui1601](https://github.com/hui1601)
- **asobistage**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/0284f1fee202302a78888420f933deae19d9f4e1) ([#8735](https://github.com/yt-dlp/yt-dlp/issues/8735)) by [pzhlkj6612](https://github.com/pzhlkj6612)
- **box**: [Support URLs without file IDs](https://github.com/yt-dlp/yt-dlp/commit/07f5b2f7570fd9ac85aed17f4c0118f6eac77beb) ([#9504](https://github.com/yt-dlp/yt-dlp/issues/9504)) by [shreyasminocha](https://github.com/shreyasminocha)
- **cbc.ca**: player: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/b49d5ffc53a72d8245ba319ff07bdc5b8c6a4f0c) ([#9561](https://github.com/yt-dlp/yt-dlp/issues/9561)) by [trainman261](https://github.com/trainman261)
- **crunchyroll**
- [Extract `vo_adaptive_hls` formats by default](https://github.com/yt-dlp/yt-dlp/commit/be77923ffe842f667971019460f6005f3cad01eb) ([#9447](https://github.com/yt-dlp/yt-dlp/issues/9447)) by [bashonly](https://github.com/bashonly)
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/954e57e405f79188450eb30103a9308732cd318f) ([#9615](https://github.com/yt-dlp/yt-dlp/issues/9615)) by [bytedream](https://github.com/bytedream)
- **dropbox**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/a48cc86d6f6b20427553620c2ddb990ede6a4b41) ([#9627](https://github.com/yt-dlp/yt-dlp/issues/9627)) by [bashonly](https://github.com/bashonly)
- **fathom**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/bc2b8c0596fd6b75af24822c4f0f1da6783d71f7) ([#9495](https://github.com/yt-dlp/yt-dlp/issues/9495)) by [src-tinkerer](https://github.com/src-tinkerer)
- **gofile**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0da66980d3193cad3dae0120cddddbfcabddf7a1) ([#9446](https://github.com/yt-dlp/yt-dlp/issues/9446)) by [jazz1611](https://github.com/jazz1611)
- **imgur**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/86d2f4d24849af0d1f3af7c0e2ac43bf8a058f74) ([#9471](https://github.com/yt-dlp/yt-dlp/issues/9471)) by [trwstin](https://github.com/trwstin)
- **jiosaavn**
- [Extract artists](https://github.com/yt-dlp/yt-dlp/commit/0ae16ceb1846cc4e609b70ce7c5d8e7458efceb2) ([#9612](https://github.com/yt-dlp/yt-dlp/issues/9612)) by [bashonly](https://github.com/bashonly)
- [Fix format extensions](https://github.com/yt-dlp/yt-dlp/commit/443e206ec41e64ca2aef61d8ef91640fb69b3113) ([#9609](https://github.com/yt-dlp/yt-dlp/issues/9609)) by [bashonly](https://github.com/bashonly)
- [Support playlists](https://github.com/yt-dlp/yt-dlp/commit/2e94602f241f6e41bdc48576c61089435529339b) ([#9622](https://github.com/yt-dlp/yt-dlp/issues/9622)) by [bashonly](https://github.com/bashonly)
- **joqrag**: [Fix live status detection](https://github.com/yt-dlp/yt-dlp/commit/f2fd449b46c4058222e1744f7a35caa20b2d003d) ([#9624](https://github.com/yt-dlp/yt-dlp/issues/9624)) by [pzhlkj6612](https://github.com/pzhlkj6612)
- **kick**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/c8a61a910096c77ce08dad5e1b2fbda5eb964156) ([#9611](https://github.com/yt-dlp/yt-dlp/issues/9611)) by [bashonly](https://github.com/bashonly)
- **loom**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/f859ed3ba1e8b129ae6a467592c65687e73fbca1) ([#8686](https://github.com/yt-dlp/yt-dlp/issues/8686)) by [bashonly](https://github.com/bashonly), [hruzgar](https://github.com/hruzgar)
- **medici**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4cd9e251b9abada107b10830de997bf4d79ca369) ([#9518](https://github.com/yt-dlp/yt-dlp/issues/9518)) by [Offert4324](https://github.com/Offert4324)
- **mixch**
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4c3b7a0769706f7f0ea24adf1f219d5ae82d2b07) ([#9608](https://github.com/yt-dlp/yt-dlp/issues/9608)) by [bashonly](https://github.com/bashonly), [nipotan](https://github.com/nipotan)
- archive: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c59de48e2bb4c681b03b93b584a05f52609ce4a0) ([#8761](https://github.com/yt-dlp/yt-dlp/issues/8761)) by [pzhlkj6612](https://github.com/pzhlkj6612)
- **nhk**: [Fix NHK World extractors](https://github.com/yt-dlp/yt-dlp/commit/4af9d5c2f6aa81403ae2a8a5ae3cc824730f0b86) ([#9623](https://github.com/yt-dlp/yt-dlp/issues/9623)) by [bashonly](https://github.com/bashonly)
- **patreon**: [Do not extract dead embed URLs](https://github.com/yt-dlp/yt-dlp/commit/36b240f9a72af57eb2c9d927ebb7fd1c917ebf18) ([#9613](https://github.com/yt-dlp/yt-dlp/issues/9613)) by [johnvictorfs](https://github.com/johnvictorfs)
- **radio1be**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/36baaa10e06715ccba06b78885b2042c4844c826) ([#9122](https://github.com/yt-dlp/yt-dlp/issues/9122)) by [HobbyistDev](https://github.com/HobbyistDev)
- **sharepoint**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ff349ff94aae0b2b148bd3670f7c91d39c2f1d8e) ([#6531](https://github.com/yt-dlp/yt-dlp/issues/6531)) by [bashonly](https://github.com/bashonly), [C0D3D3V](https://github.com/C0D3D3V)
- **sonylivseries**: [Fix season extraction](https://github.com/yt-dlp/yt-dlp/commit/f2868b26e917354203f82a370ad2396646edb813) ([#9423](https://github.com/yt-dlp/yt-dlp/issues/9423)) by [bashonly](https://github.com/bashonly)
- **soundcloud**
- [Adjust format sorting](https://github.com/yt-dlp/yt-dlp/commit/a2d0840739cddd585d24e0ce4796394fc8a4fa2e) ([#9584](https://github.com/yt-dlp/yt-dlp/issues/9584)) by [bashonly](https://github.com/bashonly)
- [Support cookies](https://github.com/yt-dlp/yt-dlp/commit/97362712a1f2b04e735bdf54f749ad99165a62fe) ([#9586](https://github.com/yt-dlp/yt-dlp/issues/9586)) by [bashonly](https://github.com/bashonly)
- [Support retries for API rate-limit](https://github.com/yt-dlp/yt-dlp/commit/246571ae1d867df8bf31a056bdf3bbbfd398366a) ([#9585](https://github.com/yt-dlp/yt-dlp/issues/9585)) by [bashonly](https://github.com/bashonly)
- **thisoldhouse**: [Support Brightcove embeds](https://github.com/yt-dlp/yt-dlp/commit/0df63cce69026d2f4c0cbb4dd36163e83eac93dc) ([#9576](https://github.com/yt-dlp/yt-dlp/issues/9576)) by [bashonly](https://github.com/bashonly)
- **tiktok**
- [Fix API extraction](https://github.com/yt-dlp/yt-dlp/commit/cb61e20c266facabb7a30f9ce53bd79dfc158475) ([#9548](https://github.com/yt-dlp/yt-dlp/issues/9548)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
- [Prefer non-bytevc2 formats](https://github.com/yt-dlp/yt-dlp/commit/63f685f341f35f6f02b0368d1ba53bdb5b520410) ([#9575](https://github.com/yt-dlp/yt-dlp/issues/9575)) by [bashonly](https://github.com/bashonly)
- [Restore `carrier_region` API parameter](https://github.com/yt-dlp/yt-dlp/commit/fc53ec13ff1ee926a3e533a68cfca8acc887b661) ([#9637](https://github.com/yt-dlp/yt-dlp/issues/9637)) by [bashonly](https://github.com/bashonly)
- [Update API hostname](https://github.com/yt-dlp/yt-dlp/commit/8c05b3ebae23c5b444857549a85b84004c01a536) ([#9444](https://github.com/yt-dlp/yt-dlp/issues/9444)) by [bashonly](https://github.com/bashonly)
- **twitch**: [Extract AV1 and HEVC formats](https://github.com/yt-dlp/yt-dlp/commit/02f93ff51b3ff9436d60c4993562b366eaae8851) ([#9158](https://github.com/yt-dlp/yt-dlp/issues/9158)) by [kasper93](https://github.com/kasper93)
- **vkplay**: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/b15b0c1d2106437ec61a5c436c543e8760eac160) ([#9636](https://github.com/yt-dlp/yt-dlp/issues/9636)) by [bashonly](https://github.com/bashonly)
- **xvideos**: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/aa7e9ae4f48276bd5d0173966c77db9484f65a0a) ([#9502](https://github.com/yt-dlp/yt-dlp/issues/9502)) by [sta1us](https://github.com/sta1us)
- **youtube**
- [Calculate more accurate `filesize`](https://github.com/yt-dlp/yt-dlp/commit/a25a424323267e3f6f9f63c0b62df499bd7b8d46) by [pukkandan](https://github.com/pukkandan)
- [Update `android` params](https://github.com/yt-dlp/yt-dlp/commit/e7b17fce14775bd2448695c8eb7379b8d31d3537) by [pukkandan](https://github.com/pukkandan)
- search: [Fix params for uncensored results](https://github.com/yt-dlp/yt-dlp/commit/17d248a58781e2588d18a5ebe00c441d10011fcd) ([#9456](https://github.com/yt-dlp/yt-dlp/issues/9456)) by [alb](https://github.com/alb), [pukkandan](https://github.com/pukkandan)
#### Downloader changes
- **ffmpeg**: [Accept output args from info dict](https://github.com/yt-dlp/yt-dlp/commit/9c42b7eef547e826e9fcc7beb6706a2523949d05) ([#9278](https://github.com/yt-dlp/yt-dlp/issues/9278)) by [bashonly](https://github.com/bashonly)
#### Networking changes
- [Respect `SSLKEYLOGFILE` environment variable](https://github.com/yt-dlp/yt-dlp/commit/79a451e5763eda8b10d00684d5d3378f3255ee01) ([#9543](https://github.com/yt-dlp/yt-dlp/issues/9543)) by [luiso1979](https://github.com/luiso1979)
- **Request Handler**
- curlcffi: [Add support for `curl_cffi`](https://github.com/yt-dlp/yt-dlp/commit/52f5be1f1e0dc45bb397ab950f564721976a39bf) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
- websockets: [Workaround race condition causing issues on PyPy](https://github.com/yt-dlp/yt-dlp/commit/e5d4f11104ce7ea1717a90eea82c0f7d230ea5d5) ([#9514](https://github.com/yt-dlp/yt-dlp/issues/9514)) by [coletdjnz](https://github.com/coletdjnz)
#### Misc. changes
- **build**
- [Do not include `curl_cffi` in `macos_legacy`](https://github.com/yt-dlp/yt-dlp/commit/b19ae095fdddd43c2a2c67d10fbe0d9a645bb98f) ([#9653](https://github.com/yt-dlp/yt-dlp/issues/9653)) by [bashonly](https://github.com/bashonly)
- [Optional dependencies cleanup](https://github.com/yt-dlp/yt-dlp/commit/58dd0f8d1eee6bc9fdc57f1923bed772fa3c946d) ([#9550](https://github.com/yt-dlp/yt-dlp/issues/9550)) by [bashonly](https://github.com/bashonly)
- [Print SHA sums to GHA logs](https://github.com/yt-dlp/yt-dlp/commit/e8032503b9517465b0e86d776fc1e60d8795d673) ([#9582](https://github.com/yt-dlp/yt-dlp/issues/9582)) by [bashonly](https://github.com/bashonly)
- [Update changelog for tarball and sdist](https://github.com/yt-dlp/yt-dlp/commit/17b96974a334688f76b57d350e07cae8cda46877) ([#9425](https://github.com/yt-dlp/yt-dlp/issues/9425)) by [bashonly](https://github.com/bashonly)
- **cleanup**
- [Standardize `import datetime as dt`](https://github.com/yt-dlp/yt-dlp/commit/c305a25c1b16bcf7a5ec499c3b786ed1e2c748da) ([#8978](https://github.com/yt-dlp/yt-dlp/issues/8978)) by [pukkandan](https://github.com/pukkandan)
- ie: [No `from` stdlib imports in extractors](https://github.com/yt-dlp/yt-dlp/commit/e3a3ed8a981d9395c4859b6ef56cd02bc3148db2) by [pukkandan](https://github.com/pukkandan)
- Miscellaneous: [216f6a3](https://github.com/yt-dlp/yt-dlp/commit/216f6a3cb57824e6a3c859649ce058c199b1b247) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
- **docs**
- [Update yt-dlp tagline](https://github.com/yt-dlp/yt-dlp/commit/388c979ac63a8774339fac2516fe1cc852b4276e) ([#9481](https://github.com/yt-dlp/yt-dlp/issues/9481)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
- [Various manpage fixes](https://github.com/yt-dlp/yt-dlp/commit/df0e138fc02ae2764a44f2f59fc93c756c4d3ee2) by [leoheitmannruiz](https://github.com/leoheitmannruiz)
- **test**
- [Workaround websocket server hanging](https://github.com/yt-dlp/yt-dlp/commit/f849d77ab54788446b995d256e1ee0894c4fb927) ([#9467](https://github.com/yt-dlp/yt-dlp/issues/9467)) by [coletdjnz](https://github.com/coletdjnz)
- `traversal`: [Separate traversal tests](https://github.com/yt-dlp/yt-dlp/commit/979ce2e786f2ee3fc783b6dc1ef4188d8805c923) ([#9574](https://github.com/yt-dlp/yt-dlp/issues/9574)) by [Grub4K](https://github.com/Grub4K)
### 2024.03.10 ### 2024.03.10
#### Core changes #### Core changes

View File

@ -10,9 +10,12 @@ tar: yt-dlp.tar.gz
# intended use: when building a source distribution, # intended use: when building a source distribution,
# make pypi-files && python3 -m build -sn . # make pypi-files && python3 -m build -sn .
pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \
completions yt-dlp.1 pyproject.toml setup.cfg devscripts/* test/* completions yt-dlp.1 pyproject.toml setup.cfg devscripts/* test/*
.PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites .PHONY: all clean clean-all clean-test clean-dist clean-cache \
completions completion-bash completion-fish completion-zsh \
doc issuetemplates supportedsites ot offlinetest codetest test \
tar pypi-files lazy-extractors install uninstall
clean-test: clean-test:
rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \

View File

@ -158,6 +158,7 @@ When using `--update`/`-U`, a release binary will only update to its current cha
You may also use `--update-to <repository>` (`<owner>/<repository>`) to update to a channel on a completely different repository. Be careful with what repository you are updating to though, there is no verification done for binaries from different repositories. You may also use `--update-to <repository>` (`<owner>/<repository>`) to update to a channel on a completely different repository. Be careful with what repository you are updating to though, there is no verification done for binaries from different repositories.
Example usage: Example usage:
* `yt-dlp --update-to master` switch to the `master` channel and update to its latest release * `yt-dlp --update-to master` switch to the `master` channel and update to its latest release
* `yt-dlp --update-to stable@2023.07.06` upgrade/downgrade to release to `stable` channel tag `2023.07.06` * `yt-dlp --update-to stable@2023.07.06` upgrade/downgrade to release to `stable` channel tag `2023.07.06`
* `yt-dlp --update-to 2023.10.07` upgrade/downgrade to tag `2023.10.07` if it exists on the current channel * `yt-dlp --update-to 2023.10.07` upgrade/downgrade to tag `2023.10.07` if it exists on the current channel
@ -201,8 +202,8 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly
The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting. The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting.
* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE) * [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
* Can be installed with the `curl_cffi` group, e.g. `pip install yt-dlp[default,curl_cffi]` * Can be installed with the `curl-cffi` group, e.g. `pip install yt-dlp[default,curl-cffi]`
* Only included in `yt-dlp.exe`, `yt-dlp_macos` and `yt-dlp_macos_legacy` builds * Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds
### Metadata ### Metadata
@ -481,6 +482,9 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
--max-downloads NUMBER Abort after downloading NUMBER files --max-downloads NUMBER Abort after downloading NUMBER files
--break-on-existing Stop the download process when encountering --break-on-existing Stop the download process when encountering
a file that is in the archive a file that is in the archive
--no-break-on-existing Do not stop the download process when
encountering a file that is in the archive
(default)
--break-per-input Alters --max-downloads, --break-on-existing, --break-per-input Alters --max-downloads, --break-on-existing,
--break-match-filter, and autonumber to --break-match-filter, and autonumber to
reset per input URL reset per input URL
@ -754,6 +758,7 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
accessible under "progress" key. E.g. accessible under "progress" key. E.g.
--console-title --progress-template --console-title --progress-template
"download-title:%(info.id)s-%(progress.eta)s" "download-title:%(info.id)s-%(progress.eta)s"
--progress-delta SECONDS Time between progress output (default: 0)
-v, --verbose Print various debugging information -v, --verbose Print various debugging information
--dump-pages Print downloaded pages encoded using base64 --dump-pages Print downloaded pages encoded using base64
to debug problems (very verbose) to debug problems (very verbose)
@ -1781,8 +1786,7 @@ The following extractors use this feature:
* `version`: The video version to extract - `uncut` or `simulcast` * `version`: The video version to extract - `uncut` or `simulcast`
#### crunchyrollbeta (Crunchyroll) #### crunchyrollbeta (Crunchyroll)
* `format`: Which stream type(s) to extract (default: `adaptive_hls`). Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `download_dash`, `multitrack_adaptive_hls_v2` * `hardsub`: One or more hardsub versions to extract (in order of preference), or `all` (default: `None` = no hardsubs will be extracted), e.g. `crunchyrollbeta:hardsub=en-US,de-DE`
* `hardsub`: Preference order for which hardsub versions to extract, or `all` (default: `None` = no hardsubs), e.g. `crunchyrollbeta:hardsub=en-US,None`
#### vikichannel #### vikichannel
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers` * `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
@ -1833,6 +1837,9 @@ The following extractors use this feature:
#### jiosaavn #### jiosaavn
* `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320` * `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320`
#### afreecatvlive
* `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web`
**Note**: These options may be changed/removed in the future without concern for backward compatibility **Note**: These options may be changed/removed in the future without concern for backward compatibility
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE --> <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
@ -1890,6 +1897,7 @@ Plugins can be installed using various methods and locations.
`.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages. `.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages.
* e.g. `${XDG_CONFIG_HOME}/yt-dlp/plugins/mypluginpkg.zip` where `mypluginpkg.zip` contains `yt_dlp_plugins/<type>/myplugin.py` * e.g. `${XDG_CONFIG_HOME}/yt-dlp/plugins/mypluginpkg.zip` where `mypluginpkg.zip` contains `yt_dlp_plugins/<type>/myplugin.py`
Run yt-dlp with `--verbose` to check if the plugin has been loaded. Run yt-dlp with `--verbose` to check if the plugin has been loaded.

10
bundle/docker/compose.yml Normal file
View File

@ -0,0 +1,10 @@
services:
static:
build: static
environment:
channel: ${channel}
origin: ${origin}
version: ${version}
volumes:
- ~/build:/build
- ../..:/yt-dlp

View File

@ -0,0 +1,21 @@
FROM alpine:3.19 as base
RUN apk --update add --no-cache \
build-base \
python3 \
pipx \
;
RUN pipx install pyinstaller
# Requires above step to prepare the shared venv
RUN ~/.local/share/pipx/shared/bin/python -m pip install -U wheel
RUN apk --update add --no-cache \
scons \
patchelf \
binutils \
;
RUN pipx install staticx
WORKDIR /yt-dlp
COPY entrypoint.sh /entrypoint.sh
ENTRYPOINT /entrypoint.sh

View File

@ -0,0 +1,13 @@
#!/bin/ash
set -e
source ~/.local/share/pipx/venvs/pyinstaller/bin/activate
python -m devscripts.install_deps --include secretstorage
python -m devscripts.make_lazy_extractors
python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}"
python -m bundle.pyinstaller
deactivate
source ~/.local/share/pipx/venvs/staticx/bin/activate
staticx /yt-dlp/dist/yt-dlp_linux /build/yt-dlp_linux
deactivate

View File

@ -127,8 +127,25 @@
"short": "[ie] Support multi-period MPD streams (#6654)", "short": "[ie] Support multi-period MPD streams (#6654)",
"authors": ["alard", "pukkandan"] "authors": ["alard", "pukkandan"]
}, },
{
"action": "change",
"when": "aa7e9ae4f48276bd5d0173966c77db9484f65a0a",
"short": "[ie/xvideos] Support new URL format (#9502)",
"authors": ["sta1us"]
},
{ {
"action": "remove", "action": "remove",
"when": "22e4dfacb61f62dfbb3eb41b31c7b69ba1059b80" "when": "22e4dfacb61f62dfbb3eb41b31c7b69ba1059b80"
},
{
"action": "change",
"when": "e3a3ed8a981d9395c4859b6ef56cd02bc3148db2",
"short": "[cleanup:ie] No `from` stdlib imports in extractors",
"authors": ["pukkandan"]
},
{
"action": "add",
"when": "9590cc6b4768e190183d7d071a6c78170889116a",
"short": "[priority] Security: [[CVE-2024-22423](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-22423)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p)\n - The shell escape function now properly escapes `%`, `\\` and `\\n`.\n - `utils.Popen` has been patched accordingly."
} }
] ]

View File

@ -43,6 +43,27 @@ def filter_excluded_sections(readme):
'', readme) '', readme)
def _convert_code_blocks(readme):
current_code_block = None
for line in readme.splitlines(True):
if current_code_block:
if line == current_code_block:
current_code_block = None
yield '\n'
else:
yield f' {line}'
elif line.startswith('```'):
current_code_block = line.count('`') * '`' + '\n'
yield '\n'
else:
yield line
def convert_code_blocks(readme):
return ''.join(_convert_code_blocks(readme))
def move_sections(readme): def move_sections(readme):
MOVE_TAG_TEMPLATE = '<!-- MANPAGE: MOVE "%s" SECTION HERE -->' MOVE_TAG_TEMPLATE = '<!-- MANPAGE: MOVE "%s" SECTION HERE -->'
sections = re.findall(r'(?m)^%s$' % ( sections = re.findall(r'(?m)^%s$' % (
@ -65,8 +86,10 @@ def move_sections(readme):
def filter_options(readme): def filter_options(readme):
section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0) section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0)
section_new = section.replace('*', R'\*')
options = '# OPTIONS\n' options = '# OPTIONS\n'
for line in section.split('\n')[1:]: for line in section_new.split('\n')[1:]:
mobj = re.fullmatch(r'''(?x) mobj = re.fullmatch(r'''(?x)
\s{4}(?P<opt>-(?:,\s|[^\s])+) \s{4}(?P<opt>-(?:,\s|[^\s])+)
(?:\s(?P<meta>(?:[^\s]|\s(?!\s))+))? (?:\s(?P<meta>(?:[^\s]|\s(?!\s))+))?
@ -86,7 +109,7 @@ def filter_options(readme):
return readme.replace(section, options, 1) return readme.replace(section, options, 1)
TRANSFORM = compose_functions(filter_excluded_sections, move_sections, filter_options) TRANSFORM = compose_functions(filter_excluded_sections, convert_code_blocks, move_sections, filter_options)
def main(): def main():

View File

@ -53,7 +53,7 @@ dependencies = [
[project.optional-dependencies] [project.optional-dependencies]
default = [] default = []
curl_cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"] curl-cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
secretstorage = [ secretstorage = [
"cffi", "cffi",
"secretstorage", "secretstorage",

View File

@ -47,7 +47,7 @@
- **aenetworks:show** - **aenetworks:show**
- **AeonCo** - **AeonCo**
- **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com - **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com
- **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com - **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com livestreams
- **afreecatv:user** - **afreecatv:user**
- **AirTV** - **AirTV**
- **AitubeKZVideo** - **AitubeKZVideo**
@ -105,6 +105,7 @@
- **ArteTVPlaylist** - **ArteTVPlaylist**
- **asobichannel**: ASOBI CHANNEL - **asobichannel**: ASOBI CHANNEL
- **asobichannel:tag**: ASOBI CHANNEL - **asobichannel:tag**: ASOBI CHANNEL
- **AsobiStage**: ASOBISTAGE (アソビステージ)
- **AtresPlayer**: [*atresplayer*](## "netrc machine") - **AtresPlayer**: [*atresplayer*](## "netrc machine")
- **AtScaleConfEvent** - **AtScaleConfEvent**
- **ATVAt** - **ATVAt**
@ -436,6 +437,7 @@
- **FacebookPluginsVideo** - **FacebookPluginsVideo**
- **fancode:live**: [*fancode*](## "netrc machine") (**Currently broken**) - **fancode:live**: [*fancode*](## "netrc machine") (**Currently broken**)
- **fancode:vod**: [*fancode*](## "netrc machine") (**Currently broken**) - **fancode:vod**: [*fancode*](## "netrc machine") (**Currently broken**)
- **Fathom**
- **faz.net** - **faz.net**
- **fc2**: [*fc2*](## "netrc machine") - **fc2**: [*fc2*](## "netrc machine")
- **fc2:embed** - **fc2:embed**
@ -633,8 +635,9 @@
- **Jamendo** - **Jamendo**
- **JamendoAlbum** - **JamendoAlbum**
- **JeuxVideo**: (**Currently broken**) - **JeuxVideo**: (**Currently broken**)
- **JioSaavnAlbum** - **jiosaavn:album**
- **JioSaavnSong** - **jiosaavn:playlist**
- **jiosaavn:song**
- **Joj** - **Joj**
- **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR) - **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)
- **Jove** - **Jove**
@ -716,6 +719,8 @@
- **Lnk** - **Lnk**
- **LnkGo** - **LnkGo**
- **loc**: Library of Congress - **loc**: Library of Congress
- **loom**
- **loom:folder**
- **LoveHomePorn** - **LoveHomePorn**
- **LRTStream** - **LRTStream**
- **LRTVOD** - **LRTVOD**
@ -1136,6 +1141,7 @@
- **Radiko** - **Radiko**
- **RadikoRadio** - **RadikoRadio**
- **radio.de**: (**Currently broken**) - **radio.de**: (**Currently broken**)
- **Radio1Be**
- **radiocanada** - **radiocanada**
- **radiocanada:audiovideo** - **radiocanada:audiovideo**
- **RadioComercial** - **RadioComercial**
@ -1288,6 +1294,7 @@
- **SeznamZpravyArticle** - **SeznamZpravyArticle**
- **Shahid**: [*shahid*](## "netrc machine") - **Shahid**: [*shahid*](## "netrc machine")
- **ShahidShow** - **ShahidShow**
- **SharePoint**
- **ShareVideosEmbed** - **ShareVideosEmbed**
- **ShemarooMe** - **ShemarooMe**
- **ShowRoomLive** - **ShowRoomLive**

View File

@ -1906,6 +1906,15 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
expected_status=TEAPOT_RESPONSE_STATUS) expected_status=TEAPOT_RESPONSE_STATUS)
self.assertEqual(content, TEAPOT_RESPONSE_BODY) self.assertEqual(content, TEAPOT_RESPONSE_BODY)
def test_search_nextjs_data(self):
data = '<script id="__NEXT_DATA__" type="application/json">{"props":{}}</script>'
self.assertEqual(self.ie._search_nextjs_data(data, None), {'props': {}})
self.assertEqual(self.ie._search_nextjs_data('', None, fatal=False), {})
self.assertEqual(self.ie._search_nextjs_data('', None, default=None), None)
self.assertEqual(self.ie._search_nextjs_data('', None, default={}), {})
with self.assertRaises(DeprecationWarning):
self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -26,27 +26,6 @@ _TEST_DATA = {
class TestTraversal: class TestTraversal:
def test_dict_get(self):
FALSE_VALUES = {
'none': None,
'false': False,
'zero': 0,
'empty_string': '',
'empty_list': [],
}
d = {**FALSE_VALUES, 'a': 42}
assert dict_get(d, 'a') == 42
assert dict_get(d, 'b') is None
assert dict_get(d, 'b', 42) == 42
assert dict_get(d, ('a',)) == 42
assert dict_get(d, ('b', 'a')) == 42
assert dict_get(d, ('b', 'c', 'a', 'd')) == 42
assert dict_get(d, ('b', 'c')) is None
assert dict_get(d, ('b', 'c'), 42) == 42
for key, false_value in FALSE_VALUES.items():
assert dict_get(d, ('b', 'c', key)) is None
assert dict_get(d, ('b', 'c', key), skip_false_values=False) == false_value
def test_traversal_base(self): def test_traversal_base(self):
assert traverse_obj(_TEST_DATA, ('str',)) == 'str', \ assert traverse_obj(_TEST_DATA, ('str',)) == 'str', \
'allow tuple path' 'allow tuple path'
@ -440,3 +419,26 @@ class TestTraversal:
'function key should yield all values' 'function key should yield all values'
assert traverse_obj(morsel, [(None,), any]) == morsel, \ assert traverse_obj(morsel, [(None,), any]) == morsel, \
'Morsel should not be implicitly changed to dict on usage' 'Morsel should not be implicitly changed to dict on usage'
class TestDictGet:
def test_dict_get(self):
FALSE_VALUES = {
'none': None,
'false': False,
'zero': 0,
'empty_string': '',
'empty_list': [],
}
d = {**FALSE_VALUES, 'a': 42}
assert dict_get(d, 'a') == 42
assert dict_get(d, 'b') is None
assert dict_get(d, 'b', 42) == 42
assert dict_get(d, ('a',)) == 42
assert dict_get(d, ('b', 'a')) == 42
assert dict_get(d, ('b', 'c', 'a', 'd')) == 42
assert dict_get(d, ('b', 'c')) is None
assert dict_get(d, ('b', 'c'), 42) == 42
for key, false_value in FALSE_VALUES.items():
assert dict_get(d, ('b', 'c', key)) is None
assert dict_get(d, ('b', 'c', key), skip_false_values=False) == false_value

View File

@ -2059,7 +2059,22 @@ Line 1
assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz') assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
@unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows') @unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows')
def test_Popen_windows_escaping(self): def test_windows_escaping(self):
tests = [
'test"&',
'%CMDCMDLINE:~-1%&',
'a\nb',
'"',
'\\',
'!',
'^!',
'a \\ b',
'a \\" b',
'a \\ b\\',
# We replace \r with \n
('a\r\ra', 'a\n\na'),
]
def run_shell(args): def run_shell(args):
stdout, stderr, error = Popen.run( stdout, stderr, error = Popen.run(
args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@ -2067,11 +2082,15 @@ Line 1
assert not error assert not error
return stdout return stdout
# Test escaping for argument in tests:
assert run_shell(['echo', 'test"&']) == '"test""&"\n' if isinstance(argument, str):
# Test if delayed expansion is disabled expected = argument
assert run_shell(['echo', '^!']) == '"^!"\n' else:
assert run_shell('echo "^!"') == '"^!"\n' argument, expected = argument
args = [sys.executable, '-c', 'import sys; print(end=sys.argv[1])', argument, 'end']
assert run_shell(args) == expected
assert run_shell(shell_quote(args, shell=True)) == expected
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -25,7 +25,7 @@ import unicodedata
from .cache import Cache from .cache import Cache
from .compat import functools, urllib # isort: split from .compat import functools, urllib # isort: split
from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req from .compat import compat_os_name, urllib_req_to_req
from .cookies import LenientSimpleCookie, load_cookies from .cookies import LenientSimpleCookie, load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
from .downloader.rtmp import rtmpdump_version from .downloader.rtmp import rtmpdump_version
@ -102,7 +102,6 @@ from .utils import (
UserNotLive, UserNotLive,
YoutubeDLError, YoutubeDLError,
age_restricted, age_restricted,
args_to_str,
bug_reports_message, bug_reports_message,
date_from_str, date_from_str,
deprecation_warning, deprecation_warning,
@ -141,6 +140,7 @@ from .utils import (
sanitize_filename, sanitize_filename,
sanitize_path, sanitize_path,
sanitize_url, sanitize_url,
shell_quote,
str_or_none, str_or_none,
strftime_or_none, strftime_or_none,
subtitles_filename, subtitles_filename,
@ -481,7 +481,7 @@ class YoutubeDL:
nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize, nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries, max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size, continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
external_downloader_args, concurrent_fragment_downloads. external_downloader_args, concurrent_fragment_downloads, progress_delta.
The following options are used by the post processors: The following options are used by the post processors:
ffmpeg_location: Location of the ffmpeg/avconv binary; either the path ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
@ -823,7 +823,7 @@ class YoutubeDL:
self.report_warning( self.report_warning(
'Long argument string detected. ' 'Long argument string detected. '
'Use -- to separate parameters and URLs, like this:\n%s' % 'Use -- to separate parameters and URLs, like this:\n%s' %
args_to_str(correct_argv)) shell_quote(correct_argv))
def add_info_extractor(self, ie): def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list.""" """Add an InfoExtractor object to the end of the list."""
@ -1355,7 +1355,7 @@ class YoutubeDL:
value, fmt = escapeHTML(str(value)), str_fmt value, fmt = escapeHTML(str(value)), str_fmt
elif fmt[-1] == 'q': # quoted elif fmt[-1] == 'q': # quoted
value = map(str, variadic(value) if '#' in flags else [value]) value = map(str, variadic(value) if '#' in flags else [value])
value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt value, fmt = shell_quote(value, shell=True), str_fmt
elif fmt[-1] == 'B': # bytes elif fmt[-1] == 'B': # bytes
value = f'%{str_fmt}'.encode() % str(value).encode() value = f'%{str_fmt}'.encode() % str(value).encode()
value, fmt = value.decode('utf-8', 'ignore'), 's' value, fmt = value.decode('utf-8', 'ignore'), 's'

View File

@ -836,6 +836,7 @@ def parse_options(argv=None):
'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress, 'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress,
'progress_with_newline': opts.progress_with_newline, 'progress_with_newline': opts.progress_with_newline,
'progress_template': opts.progress_template, 'progress_template': opts.progress_template,
'progress_delta': opts.progress_delta,
'playliststart': opts.playliststart, 'playliststart': opts.playliststart,
'playlistend': opts.playlistend, 'playlistend': opts.playlistend,
'playlistreverse': opts.playlist_reverse, 'playlistreverse': opts.playlist_reverse,

View File

@ -27,12 +27,9 @@ def compat_etree_fromstring(text):
compat_os_name = os._name if os.name == 'java' else os.name compat_os_name = os._name if os.name == 'java' else os.name
if compat_os_name == 'nt': def compat_shlex_quote(s):
def compat_shlex_quote(s): from ..utils import shell_quote
import re return shell_quote(s)
return s if re.match(r'^[-_\w./]+$', s) else s.replace('"', '""').join('""')
else:
from shlex import quote as compat_shlex_quote # noqa: F401
def compat_ord(c): def compat_ord(c):

View File

@ -194,7 +194,11 @@ def _firefox_browser_dirs():
yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles') yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
else: else:
yield from map(os.path.expanduser, ('~/.mozilla/firefox', '~/snap/firefox/common/.mozilla/firefox')) yield from map(os.path.expanduser, (
'~/.mozilla/firefox',
'~/snap/firefox/common/.mozilla/firefox',
'~/.var/app/org.mozilla.firefox/.mozilla/firefox',
))
def _firefox_cookie_dbs(roots): def _firefox_cookie_dbs(roots):

View File

@ -4,6 +4,7 @@ import functools
import os import os
import random import random
import re import re
import threading
import time import time
from ..minicurses import ( from ..minicurses import (
@ -63,6 +64,7 @@ class FileDownloader:
min_filesize: Skip files smaller than this size min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size max_filesize: Skip files larger than this size
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
progress_delta: The minimum time between progress output, in seconds
external_downloader_args: A dictionary of downloader keys (in lower case) external_downloader_args: A dictionary of downloader keys (in lower case)
and a list of additional command-line arguments for the and a list of additional command-line arguments for the
executable. Use 'default' as the name for arguments to be executable. Use 'default' as the name for arguments to be
@ -88,6 +90,9 @@ class FileDownloader:
self.params = params self.params = params
self._prepare_multiline_status() self._prepare_multiline_status()
self.add_progress_hook(self.report_progress) self.add_progress_hook(self.report_progress)
if self.params.get('progress_delta'):
self._progress_delta_lock = threading.Lock()
self._progress_delta_time = time.monotonic()
def _set_ydl(self, ydl): def _set_ydl(self, ydl):
self.ydl = ydl self.ydl = ydl
@ -366,6 +371,12 @@ class FileDownloader:
if s['status'] != 'downloading': if s['status'] != 'downloading':
return return
if update_delta := self.params.get('progress_delta'):
with self._progress_delta_lock:
if time.monotonic() < self._progress_delta_time:
return
self._progress_delta_time += update_delta
s.update({ s.update({
'_eta_str': self.format_eta(s.get('eta')).strip(), '_eta_str': self.format_eta(s.get('eta')).strip(),
'_speed_str': self.format_speed(s.get('speed')), '_speed_str': self.format_speed(s.get('speed')),

View File

@ -150,6 +150,7 @@ from .arte import (
) )
from .arnes import ArnesIE from .arnes import ArnesIE
from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE
from .asobistage import AsobiStageIE
from .atresplayer import AtresPlayerIE from .atresplayer import AtresPlayerIE
from .atscaleconf import AtScaleConfEventIE from .atscaleconf import AtScaleConfEventIE
from .atvat import ATVAtIE from .atvat import ATVAtIE
@ -875,6 +876,7 @@ from .jeuxvideo import JeuxVideoIE
from .jiosaavn import ( from .jiosaavn import (
JioSaavnSongIE, JioSaavnSongIE,
JioSaavnAlbumIE, JioSaavnAlbumIE,
JioSaavnPlaylistIE,
) )
from .jove import JoveIE from .jove import JoveIE
from .joj import JojIE from .joj import JojIE
@ -2289,6 +2291,7 @@ from .vrt import (
VrtNUIE, VrtNUIE,
KetnetIE, KetnetIE,
DagelijkseKostIE, DagelijkseKostIE,
Radio1BeIE,
) )
from .vtm import VTMIE from .vtm import VTMIE
from .medialaan import MedialaanIE from .medialaan import MedialaanIE

View File

@ -1,25 +1,65 @@
import functools import functools
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
date_from_str, UserNotLive,
determine_ext, determine_ext,
filter_dict,
int_or_none, int_or_none,
qualities, orderedSet,
traverse_obj,
unified_strdate,
unified_timestamp, unified_timestamp,
update_url_query,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
xpath_text, urljoin,
) )
from ..utils.traversal import traverse_obj
class AfreecaTVIE(InfoExtractor): class AfreecaTVBaseIE(InfoExtractor):
_NETRC_MACHINE = 'afreecatv'
def _perform_login(self, username, password):
login_form = {
'szWork': 'login',
'szType': 'json',
'szUid': username,
'szPassword': password,
'isSaveId': 'false',
'szScriptVar': 'oLoginRet',
'szAction': '',
}
response = self._download_json(
'https://login.afreecatv.com/app/LoginAction.php', None,
'Logging in', data=urlencode_postdata(login_form))
_ERRORS = {
-4: 'Your account has been suspended due to a violation of our terms and policies.',
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
-6: 'https://login.afreecatv.com/membership/changeMember.php',
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
-12: 'https://member.afreecatv.com/app/user_security.php',
0: 'The username does not exist or you have entered the wrong password.',
-1: 'The username does not exist or you have entered the wrong password.',
-3: 'You have entered your username/password incorrectly.',
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
-32008: 'You have failed to log in. Please contact our Help Center.',
}
result = int_or_none(response.get('RESULT'))
if result != 1:
error = _ERRORS.get(result, 'You have failed to log in.')
raise ExtractorError(
'Unable to login: %s said: %s' % (self.IE_NAME, error),
expected=True)
class AfreecaTVIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv' IE_NAME = 'afreecatv'
IE_DESC = 'afreecatv.com' IE_DESC = 'afreecatv.com'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
@ -34,7 +74,6 @@ class AfreecaTVIE(InfoExtractor):
) )
(?P<id>\d+) (?P<id>\d+)
''' '''
_NETRC_MACHINE = 'afreecatv'
_TESTS = [{ _TESTS = [{
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e', 'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
@ -87,6 +126,7 @@ class AfreecaTVIE(InfoExtractor):
'uploader': '♥이슬이', 'uploader': '♥이슬이',
'uploader_id': 'dasl8121', 'uploader_id': 'dasl8121',
'upload_date': '20170411', 'upload_date': '20170411',
'timestamp': 1491929865,
'duration': 213, 'duration': 213,
}, },
'params': { 'params': {
@ -120,219 +160,102 @@ class AfreecaTVIE(InfoExtractor):
'uploader_id': 'rlantnghks', 'uploader_id': 'rlantnghks',
'uploader': '페이즈으', 'uploader': '페이즈으',
'duration': 10840, 'duration': 10840,
'thumbnail': 'http://videoimg.afreecatv.com/php/SnapshotLoad.php?rowKey=20230108_9FF5BEE1_244432674_1_r', 'thumbnail': r're:https?://videoimg\.afreecatv\.com/.+',
'upload_date': '20230108', 'upload_date': '20230108',
'timestamp': 1673218805,
'title': '젠지 페이즈', 'title': '젠지 페이즈',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
# adult content
'url': 'https://vod.afreecatv.com/player/70395877',
'only_matching': True,
}, {
# subscribers only
'url': 'https://vod.afreecatv.com/player/104647403',
'only_matching': True,
}, {
# private
'url': 'https://vod.afreecatv.com/player/81669846',
'only_matching': True,
}] }]
@staticmethod
def parse_video_key(key):
video_key = {}
m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
if m:
video_key['upload_date'] = m.group('upload_date')
video_key['part'] = int(m.group('part'))
return video_key
def _perform_login(self, username, password):
login_form = {
'szWork': 'login',
'szType': 'json',
'szUid': username,
'szPassword': password,
'isSaveId': 'false',
'szScriptVar': 'oLoginRet',
'szAction': '',
}
response = self._download_json(
'https://login.afreecatv.com/app/LoginAction.php', None,
'Logging in', data=urlencode_postdata(login_form))
_ERRORS = {
-4: 'Your account has been suspended due to a violation of our terms and policies.',
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
-6: 'https://login.afreecatv.com/membership/changeMember.php',
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
-12: 'https://member.afreecatv.com/app/user_security.php',
0: 'The username does not exist or you have entered the wrong password.',
-1: 'The username does not exist or you have entered the wrong password.',
-3: 'You have entered your username/password incorrectly.',
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
-32008: 'You have failed to log in. Please contact our Help Center.',
}
result = int_or_none(response.get('RESULT'))
if result != 1:
error = _ERRORS.get(result, 'You have failed to log in.')
raise ExtractorError(
'Unable to login: %s said: %s' % (self.IE_NAME, error),
expected=True)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
data = self._download_json(
partial_view = False 'https://api.m.afreecatv.com/station/video/a/view', video_id,
adult_view = False headers={'Referer': url}, data=urlencode_postdata({
for _ in range(2):
data = self._download_json(
'https://api.m.afreecatv.com/station/video/a/view',
video_id, headers={'Referer': url}, data=urlencode_postdata({
'nTitleNo': video_id,
'nApiLevel': 10,
}))['data']
if traverse_obj(data, ('code', {int})) == -6221:
raise ExtractorError('The VOD does not exist', expected=True)
query = {
'nTitleNo': video_id, 'nTitleNo': video_id,
'nStationNo': data['station_no'], 'nApiLevel': 10,
'nBbsNo': data['bbs_no'], }))['data']
}
if partial_view:
query['partialView'] = 'SKIP_ADULT'
if adult_view:
query['adultView'] = 'ADULT_VIEW'
video_xml = self._download_xml(
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
video_id, 'Downloading video info XML%s'
% (' (skipping adult)' if partial_view else ''),
video_id, headers={
'Referer': url,
}, query=query)
flag = xpath_text(video_xml, './track/flag', 'flag', default=None) error_code = traverse_obj(data, ('code', {int}))
if flag and flag == 'SUCCEED': if error_code == -6221:
break raise ExtractorError('The VOD does not exist', expected=True)
if flag == 'PARTIAL_ADULT': elif error_code == -6205:
self.report_warning( raise ExtractorError('This VOD is private', expected=True)
'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
'Only content suitable for all ages will be downloaded. '
'Provide account credentials if you wish to download restricted content.')
partial_view = True
continue
elif flag == 'ADULT':
if not adult_view:
adult_view = True
continue
error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
else:
error = flag
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, error), expected=True)
else:
raise ExtractorError('Unable to download video info')
video_element = video_xml.findall('./track/video')[-1] common_info = traverse_obj(data, {
if video_element is None or video_element.text is None: 'title': ('title', {str}),
raise ExtractorError( 'uploader': ('writer_nick', {str}),
'Video %s does not exist' % video_id, expected=True) 'uploader_id': ('bj_id', {str}),
'duration': ('total_file_duration', {functools.partial(int_or_none, scale=1000)}),
video_url = video_element.text.strip() 'thumbnail': ('thumb', {url_or_none}),
title = xpath_text(video_xml, './track/title', 'title', fatal=True)
uploader = xpath_text(video_xml, './track/nickname', 'uploader')
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
duration = int_or_none(xpath_text(
video_xml, './track/duration', 'duration'))
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
common_entry = {
'uploader': uploader,
'uploader_id': uploader_id,
'thumbnail': thumbnail,
}
info = common_entry.copy()
info.update({
'id': video_id,
'title': title,
'duration': duration,
}) })
if not video_url: entries = []
entries = [] for file_num, file_element in enumerate(
file_elements = video_element.findall('./file') traverse_obj(data, ('files', lambda _, v: url_or_none(v['file']))), start=1):
one = len(file_elements) == 1 file_url = file_element['file']
for file_num, file_element in enumerate(file_elements, start=1): if determine_ext(file_url) == 'm3u8':
file_url = url_or_none(file_element.text) formats = self._extract_m3u8_formats(
if not file_url: file_url, video_id, 'mp4', m3u8_id='hls',
continue note=f'Downloading part {file_num} m3u8 information')
key = file_element.get('key', '') else:
upload_date = unified_strdate(self._search_regex( formats = [{
r'^(\d{8})_', key, 'upload date', default=None)) 'url': file_url,
if upload_date is not None: 'format_id': 'http',
# sometimes the upload date isn't included in the file name }]
# instead, another random ID is, which may parse as a valid
# date but be wildly out of a reasonable range entries.append({
parsed_date = date_from_str(upload_date) **common_info,
if parsed_date.year < 2000 or parsed_date.year >= 2100: 'id': file_element.get('file_info_key') or f'{video_id}_{file_num}',
upload_date = None 'title': f'{common_info.get("title") or "Untitled"} (part {file_num})',
file_duration = int_or_none(file_element.get('duration')) 'formats': formats,
format_id = key if key else '%s_%s' % (video_id, file_num) **traverse_obj(file_element, {
if determine_ext(file_url) == 'm3u8': 'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
formats = self._extract_m3u8_formats( 'timestamp': ('file_start', {unified_timestamp}),
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls',
note='Downloading part %d m3u8 information' % file_num)
else:
formats = [{
'url': file_url,
'format_id': 'http',
}]
if not formats and not self.get_param('ignore_no_formats'):
continue
file_info = common_entry.copy()
file_info.update({
'id': format_id,
'title': title if one else '%s (part %d)' % (title, file_num),
'upload_date': upload_date,
'duration': file_duration,
'formats': formats,
}) })
entries.append(file_info)
entries_info = info.copy()
entries_info.update({
'_type': 'multi_video',
'entries': entries,
})
return entries_info
info = {
'id': video_id,
'title': title,
'uploader': uploader,
'uploader_id': uploader_id,
'duration': duration,
'thumbnail': thumbnail,
}
if determine_ext(video_url) == 'm3u8':
info['formats'] = self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
else:
app, playpath = video_url.split('mp4:')
info.update({
'url': app,
'ext': 'flv',
'play_path': 'mp4:' + playpath,
'rtmp_live': True, # downloading won't end without this
}) })
return info if traverse_obj(data, ('adult_status', {str})) == 'notLogin':
if not entries:
self.raise_login_required(
'Only users older than 19 are able to watch this video', method='password')
self.report_warning(
'In accordance with local laws and regulations, underage users are '
'restricted from watching adult content. Only content suitable for all '
f'ages will be downloaded. {self._login_hint("password")}')
if not entries and traverse_obj(data, ('sub_upload_type', {str})):
self.raise_login_required('This VOD is for subscribers only', method='password')
if len(entries) == 1:
return {
**entries[0],
'title': common_info.get('title'),
}
common_info['timestamp'] = traverse_obj(entries, (..., 'timestamp'), get_all=False)
return self.playlist_result(entries, video_id, multi_video=True, **common_info)
class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE class AfreecaTVLiveIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv:live' IE_NAME = 'afreecatv:live'
IE_DESC = 'afreecatv.com livestreams'
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?' _VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
_TESTS = [{ _TESTS = [{
'url': 'https://play.afreecatv.com/pyh3646/237852185', 'url': 'https://play.afreecatv.com/pyh3646/237852185',
@ -347,77 +270,97 @@ class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE
}, },
'skip': 'Livestream has ended', 'skip': 'Livestream has ended',
}, { }, {
'url': 'http://play.afreeca.com/pyh3646/237852185', 'url': 'https://play.afreecatv.com/pyh3646/237852185',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://play.afreeca.com/pyh3646', 'url': 'https://play.afreecatv.com/pyh3646',
'only_matching': True, 'only_matching': True,
}] }]
_LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php' _LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
_WORKING_CDNS = [
'gcp_cdn', # live-global-cdn-v02.afreecatv.com
'gs_cdn_pc_app', # pc-app.stream.afreecatv.com
'gs_cdn_mobile_web', # mobile-web.stream.afreecatv.com
'gs_cdn_pc_web', # pc-web.stream.afreecatv.com
]
_BAD_CDNS = [
'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve)
'gs_cdn_chromecast', # chromecast.stream.afreecatv.com (HTTP Error 400)
'azure_cdn', # live-global-cdn-v01.afreecatv.com (cannot resolve)
'aws_cf', # live-global-cdn-v03.afreecatv.com (cannot resolve)
'kt_cdn', # kt.stream.afreecatv.com (HTTP Error 400)
]
_QUALITIES = ('sd', 'hd', 'hd2k', 'original') def _extract_formats(self, channel_info, broadcast_no, aid):
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
# If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs
default_cdn_ids = orderedSet([
*traverse_obj(channel_info, ('CDN', {str}, all, lambda _, v: v not in self._BAD_CDNS)),
*self._WORKING_CDNS,
])
cdn_ids = self._configuration_arg('cdn', default_cdn_ids)
for attempt, cdn_id in enumerate(cdn_ids, start=1):
m3u8_url = traverse_obj(self._download_json(
urljoin(stream_base_url, 'broad_stream_assign.html'), broadcast_no,
f'Downloading {cdn_id} stream info', f'Unable to download {cdn_id} stream info',
fatal=False, query={
'return_type': cdn_id,
'broad_key': f'{broadcast_no}-common-master-hls',
}), ('view_url', {url_or_none}))
try:
return self._extract_m3u8_formats(
m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid},
headers={'Referer': 'https://play.afreecatv.com/'})
except ExtractorError as e:
if attempt == len(cdn_ids):
raise
self.report_warning(
f'{e.cause or e.msg}. Retrying... (attempt {attempt} of {len(cdn_ids)})')
def _real_extract(self, url): def _real_extract(self, url):
broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno') broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
password = self.get_param('videopassword') channel_info = traverse_obj(self._download_json(
self._LIVE_API_URL, broadcaster_id, data=urlencode_postdata({'bid': broadcaster_id})),
('CHANNEL', {dict})) or {}
info = self._download_json(self._LIVE_API_URL, broadcaster_id, fatal=False,
data=urlencode_postdata({'bid': broadcaster_id})) or {}
channel_info = info.get('CHANNEL') or {}
broadcaster_id = channel_info.get('BJID') or broadcaster_id broadcaster_id = channel_info.get('BJID') or broadcaster_id
broadcast_no = channel_info.get('BNO') or broadcast_no broadcast_no = channel_info.get('BNO') or broadcast_no
password_protected = channel_info.get('BPWD')
if not broadcast_no: if not broadcast_no:
raise ExtractorError(f'Unable to extract broadcast number ({broadcaster_id} may not be live)', expected=True) raise UserNotLive(video_id=broadcaster_id)
if password_protected == 'Y' and password is None:
password = self.get_param('videopassword')
if channel_info.get('BPWD') == 'Y' and password is None:
raise ExtractorError( raise ExtractorError(
'This livestream is protected by a password, use the --video-password option', 'This livestream is protected by a password, use the --video-password option',
expected=True) expected=True)
formats = [] token_info = traverse_obj(self._download_json(
quality_key = qualities(self._QUALITIES) self._LIVE_API_URL, broadcast_no, 'Downloading access token for stream',
for quality_str in self._QUALITIES: 'Unable to download access token for stream', data=urlencode_postdata(filter_dict({
params = {
'bno': broadcast_no, 'bno': broadcast_no,
'stream_type': 'common', 'stream_type': 'common',
'type': 'aid', 'type': 'aid',
'quality': quality_str, 'quality': 'master',
} 'pwd': password,
if password is not None: }))), ('CHANNEL', {dict})) or {}
params['pwd'] = password aid = token_info.get('AID')
aid_response = self._download_json( if not aid:
self._LIVE_API_URL, broadcast_no, fatal=False, result = token_info.get('RESULT')
data=urlencode_postdata(params), if result == 0:
note=f'Downloading access token for {quality_str} stream', raise ExtractorError('This livestream has ended', expected=True)
errnote=f'Unable to download access token for {quality_str} stream') elif result == -6:
aid = traverse_obj(aid_response, ('CHANNEL', 'AID')) self.raise_login_required('This livestream is for subscribers only', method='password')
if not aid: raise ExtractorError('Unable to extract access token')
continue
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com' formats = self._extract_formats(channel_info, broadcast_no, aid)
stream_info = self._download_json(
f'{stream_base_url}/broad_stream_assign.html', broadcast_no, fatal=False,
query={
'return_type': channel_info.get('CDN', 'gcp_cdn'),
'broad_key': f'{broadcast_no}-common-{quality_str}-hls',
},
note=f'Downloading metadata for {quality_str} stream',
errnote=f'Unable to download metadata for {quality_str} stream') or {}
if stream_info.get('view_url'): station_info = traverse_obj(self._download_json(
formats.append({
'format_id': quality_str,
'url': update_url_query(stream_info['view_url'], {'aid': aid}),
'ext': 'mp4',
'protocol': 'm3u8',
'quality': quality_key(quality_str),
})
station_info = self._download_json(
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no, 'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
query={'szBjId': broadcaster_id}, fatal=False, 'Downloading channel metadata', 'Unable to download channel metadata',
note='Downloading channel metadata', errnote='Unable to download channel metadata') or {} query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {}
return { return {
'id': broadcast_no, 'id': broadcast_no,
@ -427,6 +370,7 @@ class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE
'timestamp': unified_timestamp(station_info.get('broad_start')), 'timestamp': unified_timestamp(station_info.get('broad_start')),
'formats': formats, 'formats': formats,
'is_live': True, 'is_live': True,
'http_headers': {'Referer': url},
} }

View File

@ -0,0 +1,154 @@
import functools
from .common import InfoExtractor
from ..utils import str_or_none, url_or_none
from ..utils.traversal import traverse_obj
class AsobiStageIE(InfoExtractor):
IE_DESC = 'ASOBISTAGE (アソビステージ)'
_VALID_URL = r'https?://asobistage\.asobistore\.jp/event/(?P<id>(?P<event>\w+)/(?P<type>archive|player)/(?P<slug>\w+))(?:[?#]|$)'
_TESTS = [{
'url': 'https://asobistage.asobistore.jp/event/315passionhour_2022summer/archive/frame',
'info_dict': {
'id': '315passionhour_2022summer/archive/frame',
'title': '315プロダクションプレゼンツ 315パッションアワー!!!',
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
},
'playlist_count': 1,
'playlist': [{
'info_dict': {
'id': 'edff52f2',
'ext': 'mp4',
'title': '315passion_FRAME_only',
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
},
}],
}, {
'url': 'https://asobistage.asobistore.jp/event/idolmaster_idolworld2023_goods/archive/live',
'info_dict': {
'id': 'idolmaster_idolworld2023_goods/archive/live',
'title': 'md5:378510b6e830129d505885908bd6c576',
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
},
'playlist_count': 1,
'playlist': [{
'info_dict': {
'id': '3aef7110',
'ext': 'mp4',
'title': 'asobistore_station_1020_serverREC',
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
},
}],
}, {
'url': 'https://asobistage.asobistore.jp/event/sidem_fclive_bpct/archive/premium_hc',
'playlist_count': 4,
'info_dict': {
'id': 'sidem_fclive_bpct/archive/premium_hc',
'title': '315 Production presents FNTASTIC COMBINATION LIVE BRAINPOWER!!/CONNECTIME!!!!',
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
},
}, {
'url': 'https://asobistage.asobistore.jp/event/ijigenfes_utagassen/player/day1',
'only_matching': True,
}]
_API_HOST = 'https://asobistage-api.asobistore.jp'
_HEADERS = {}
_is_logged_in = False
@functools.cached_property
def _owned_tickets(self):
owned_tickets = set()
if not self._is_logged_in:
return owned_tickets
for path, name in [
('api/v1/purchase_history/list', 'ticket purchase history'),
('api/v1/serialcode/list', 'redemption history'),
]:
response = self._download_json(
f'{self._API_HOST}/{path}', None, f'Downloading {name}',
f'Unable to download {name}', expected_status=400)
if traverse_obj(response, ('payload', 'error_message'), 'error') == 'notlogin':
self._is_logged_in = False
break
owned_tickets.update(
traverse_obj(response, ('payload', 'value', ..., 'digital_product_id', {str_or_none})))
return owned_tickets
def _get_available_channel_id(self, channel):
channel_id = traverse_obj(channel, ('chennel_vspf_id', {str}))
if not channel_id:
return None
# if rights_type_id == 6, then 'No conditions (no login required - non-members are OK)'
if traverse_obj(channel, ('viewrights', lambda _, v: v['rights_type_id'] == 6)):
return channel_id
available_tickets = traverse_obj(channel, (
'viewrights', ..., ('tickets', 'serialcodes'), ..., 'digital_product_id', {str_or_none}))
if not self._owned_tickets.intersection(available_tickets):
self.report_warning(
f'You are not a ticketholder for "{channel.get("channel_name") or channel_id}"')
return None
return channel_id
def _real_initialize(self):
if self._get_cookies(self._API_HOST):
self._is_logged_in = True
token = self._download_json(
f'{self._API_HOST}/api/v1/vspf/token', None, 'Getting token', 'Unable to get token')
self._HEADERS['Authorization'] = f'Bearer {token}'
def _real_extract(self, url):
video_id, event, type_, slug = self._match_valid_url(url).group('id', 'event', 'type', 'slug')
video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
webpage = self._download_webpage(url, video_id)
event_data = traverse_obj(
self._search_nextjs_data(webpage, video_id, default={}),
('props', 'pageProps', 'eventCMSData', {
'title': ('event_name', {str}),
'thumbnail': ('event_thumbnail_image', {url_or_none}),
}))
available_channels = traverse_obj(self._download_json(
f'https://asobistage.asobistore.jp/cdn/v101/events/{event}/{video_type}.json',
video_id, 'Getting channel list', 'Unable to get channel list'), (
video_type, lambda _, v: v['broadcast_slug'] == slug,
'channels', lambda _, v: v['chennel_vspf_id'] != '00000'))
entries = []
for channel_id in traverse_obj(available_channels, (..., {self._get_available_channel_id})):
if video_type == 'archives':
channel_json = self._download_json(
f'https://survapi.channel.or.jp/proxy/v1/contents/{channel_id}/get_by_cuid', channel_id,
'Getting archive channel info', 'Unable to get archive channel info', fatal=False,
headers=self._HEADERS)
channel_data = traverse_obj(channel_json, ('ex_content', {
'm3u8_url': 'streaming_url',
'title': 'title',
'thumbnail': ('thumbnail', 'url'),
}))
else: # video_type == 'broadcasts'
channel_json = self._download_json(
f'https://survapi.channel.or.jp/ex/events/{channel_id}', channel_id,
'Getting live channel info', 'Unable to get live channel info', fatal=False,
headers=self._HEADERS, query={'embed': 'channel'})
channel_data = traverse_obj(channel_json, ('data', {
'm3u8_url': ('Channel', 'Custom_live_url'),
'title': 'Name',
'thumbnail': 'Poster_url',
}))
entries.append({
'id': channel_id,
'title': channel_data.get('title'),
'formats': self._extract_m3u8_formats(channel_data.get('m3u8_url'), channel_id, fatal=False),
'is_live': video_type == 'broadcasts',
'thumbnail': url_or_none(channel_data.get('thumbnail')),
})
if not self._is_logged_in and not entries:
self.raise_login_required()
return self.playlist_result(entries, video_id, **event_data)

View File

@ -151,7 +151,7 @@ class CBCIE(InfoExtractor):
class CBCPlayerIE(InfoExtractor): class CBCPlayerIE(InfoExtractor):
IE_NAME = 'cbc.ca:player' IE_NAME = 'cbc.ca:player'
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)' _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.cbc.ca/player/play/2683190193', 'url': 'http://www.cbc.ca/player/play/2683190193',
'md5': '64d25f841ddf4ddb28a235338af32e2c', 'md5': '64d25f841ddf4ddb28a235338af32e2c',
@ -165,9 +165,52 @@ class CBCPlayerIE(InfoExtractor):
'uploader': 'CBCC-NEW', 'uploader': 'CBCC-NEW',
}, },
'skip': 'Geo-restricted to Canada and no longer available', 'skip': 'Geo-restricted to Canada and no longer available',
}, {
'url': 'http://www.cbc.ca/i/caffeine/syndicate/?mediaId=2657631896',
'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
'info_dict': {
'id': '2657631896',
'ext': 'mp3',
'title': 'CBC Montreal is organizing its first ever community hackathon!',
'description': 'md5:dd3b692f0a139b0369943150bd1c46a9',
'timestamp': 1425704400,
'upload_date': '20150307',
'uploader': 'CBCC-NEW',
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
'chapters': [],
'duration': 494.811,
'categories': ['AudioMobile/All in a Weekend Montreal'],
'tags': 'count:8',
'location': 'Quebec',
'series': 'All in a Weekend Montreal',
'season': 'Season 2015',
'season_number': 2015,
'media_type': 'Excerpt',
},
}, {
'url': 'http://www.cbc.ca/i/caffeine/syndicate/?mediaId=2164402062',
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
'info_dict': {
'id': '2164402062',
'ext': 'mp4',
'title': 'Cancer survivor four times over',
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
'timestamp': 1320410746,
'upload_date': '20111104',
'uploader': 'CBCC-NEW',
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
'chapters': [],
'duration': 186.867,
'series': 'CBC News: Windsor at 6:00',
'categories': ['News/Canada/Windsor'],
'location': 'Windsor',
'tags': ['cancer'],
'creators': ['Allison Johnson'],
'media_type': 'Excerpt',
},
}, { }, {
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/ # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
'url': 'http://www.cbc.ca/player/play/2657631896', 'url': 'https://www.cbc.ca/player/play/1.2985700',
'md5': 'e5e708c34ae6fca156aafe17c43e8b75', 'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
'info_dict': { 'info_dict': {
'id': '2657631896', 'id': '2657631896',
@ -189,7 +232,7 @@ class CBCPlayerIE(InfoExtractor):
'media_type': 'Excerpt', 'media_type': 'Excerpt',
}, },
}, { }, {
'url': 'http://www.cbc.ca/player/play/2164402062', 'url': 'https://www.cbc.ca/player/play/1.1711287',
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6', 'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
'info_dict': { 'info_dict': {
'id': '2164402062', 'id': '2164402062',
@ -206,38 +249,53 @@ class CBCPlayerIE(InfoExtractor):
'categories': ['News/Canada/Windsor'], 'categories': ['News/Canada/Windsor'],
'location': 'Windsor', 'location': 'Windsor',
'tags': ['cancer'], 'tags': ['cancer'],
'creator': 'Allison Johnson', 'creators': ['Allison Johnson'],
'media_type': 'Excerpt', 'media_type': 'Excerpt',
}, },
}, { }, {
# Has subtitles # Has subtitles
# These broadcasts expire after ~1 month, can find new test URL here: # These broadcasts expire after ~1 month, can find new test URL here:
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast # https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
'url': 'http://www.cbc.ca/player/play/2284799043667', 'url': 'https://www.cbc.ca/player/play/1.7159484',
'md5': '9b49f0839e88b6ec0b01d840cf3d42b5', 'md5': '6ed6cd0fc2ef568d2297ba68a763d455',
'info_dict': { 'info_dict': {
'id': '2284799043667', 'id': '2324213316001',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The National | Hockey coach charged, Green grants, Safer drugs', 'title': 'The National | School boards sue social media giants',
'description': 'md5:84ef46321c94bcf7d0159bb565d26bfa', 'description': 'md5:4b4db69322fa32186c3ce426da07402c',
'timestamp': 1700272800, 'timestamp': 1711681200,
'duration': 2718.833, 'duration': 2743.400,
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]}, 'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/907/171/thumbnail.jpeg', 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/607/559/thumbnail.jpeg',
'uploader': 'CBCC-NEW', 'uploader': 'CBCC-NEW',
'chapters': 'count:5', 'chapters': 'count:5',
'upload_date': '20231118', 'upload_date': '20240329',
'categories': 'count:4', 'categories': 'count:4',
'series': 'The National - Full Show', 'series': 'The National - Full Show',
'tags': 'count:1', 'tags': 'count:1',
'creator': 'News', 'creators': ['News'],
'location': 'Canada', 'location': 'Canada',
'media_type': 'Full Program', 'media_type': 'Full Program',
}, },
}, {
'url': 'cbcplayer:1.7159484',
'only_matching': True,
}, {
'url': 'cbcplayer:2164402062',
'only_matching': True,
}, {
'url': 'http://www.cbc.ca/player/play/2657631896',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
if '.' in video_id:
webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id)
video_id = self._search_json(
r'window\.__INITIAL_STATE__\s*=', webpage,
'initial state', video_id)['video']['currentClip']['mediaId']
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'ie_key': 'ThePlatform', 'ie_key': 'ThePlatform',

View File

@ -1738,12 +1738,16 @@ class InfoExtractor:
traverse_json_ld(json_ld) traverse_json_ld(json_ld)
return filter_dict(info) return filter_dict(info)
def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw): def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT, **kw):
return self._parse_json( if default == '{}':
self._search_regex( self._downloader.deprecation_warning('using `default=\'{}\'` is deprecated, use `default={}` instead')
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>', default = {}
webpage, 'next.js data', fatal=fatal, **kw), if default is not NO_DEFAULT:
video_id, transform_source=transform_source, fatal=fatal) fatal = False
return self._search_json(
r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)): def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function""" """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""

View File

@ -1,4 +1,5 @@
import base64 import base64
import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
@ -7,12 +8,11 @@ from ..utils import (
float_or_none, float_or_none,
format_field, format_field,
int_or_none, int_or_none,
join_nonempty, jwt_decode_hs256,
parse_age_limit, parse_age_limit,
parse_count, parse_count,
parse_iso8601, parse_iso8601,
qualities, qualities,
remove_start,
time_seconds, time_seconds,
traverse_obj, traverse_obj,
url_or_none, url_or_none,
@ -24,10 +24,15 @@ class CrunchyrollBaseIE(InfoExtractor):
_BASE_URL = 'https://www.crunchyroll.com' _BASE_URL = 'https://www.crunchyroll.com'
_API_BASE = 'https://api.crunchyroll.com' _API_BASE = 'https://api.crunchyroll.com'
_NETRC_MACHINE = 'crunchyroll' _NETRC_MACHINE = 'crunchyroll'
_REFRESH_TOKEN = None
_AUTH_HEADERS = None _AUTH_HEADERS = None
_AUTH_EXPIRY = None
_API_ENDPOINT = None _API_ENDPOINT = None
_BASIC_AUTH = None _BASIC_AUTH = 'Basic ' + base64.b64encode(':'.join((
_CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q') 't-kdgp2h8c3jub8fn0fq',
'yfLDfMfrYvKXh4JXS1LEI2cCqu1v5Wan',
)).encode()).decode()
_IS_PREMIUM = None
_LOCALE_LOOKUP = { _LOCALE_LOOKUP = {
'ar': 'ar-SA', 'ar': 'ar-SA',
'de': 'de-DE', 'de': 'de-DE',
@ -42,63 +47,74 @@ class CrunchyrollBaseIE(InfoExtractor):
'hi': 'hi-IN', 'hi': 'hi-IN',
} }
@property def _set_auth_info(self, response):
def is_logged_in(self): CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
return bool(self._get_cookies(self._BASE_URL).get('etp_rt')) CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': response['token_type'] + ' ' + response['access_token']}
CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)
def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
try: # TODO: Add impersonation support here
return self._download_json(
f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
headers=headers, data=urlencode_postdata(data))
except ExtractorError as error:
if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
raise
raise ExtractorError(
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
'and your browser\'s User-Agent (with --user-agent)', expected=True)
def _perform_login(self, username, password): def _perform_login(self, username, password):
if self.is_logged_in: if not CrunchyrollBaseIE._REFRESH_TOKEN:
CrunchyrollBaseIE._REFRESH_TOKEN = self.cache.load(self._NETRC_MACHINE, username)
if CrunchyrollBaseIE._REFRESH_TOKEN:
return return
upsell_response = self._download_json(
f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
query={
'sess_id': 1,
'device_id': 'whatvalueshouldbeforweb',
'device_type': 'com.crunchyroll.static',
'access_token': 'giKq5eY27ny3cqz',
'referer': f'{self._BASE_URL}/welcome/login'
})
if upsell_response['code'] != 'ok':
raise ExtractorError('Could not get session id')
session_id = upsell_response['data']['session_id']
login_response = self._download_json(
f'{self._API_BASE}/login.1.json', None, 'Logging in',
data=urlencode_postdata({
'account': username,
'password': password,
'session_id': session_id
}))
if login_response['code'] != 'ok':
raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
if not self.is_logged_in:
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
def _update_auth(self):
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
return
if not CrunchyrollBaseIE._BASIC_AUTH:
cx_api_param = self._CLIENT_ID[self.is_logged_in]
self.write_debug(f'Using cxApiParam={cx_api_param}')
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id'
try: try:
auth_response = self._download_json( login_response = self._request_token(
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}', headers={'Authorization': self._BASIC_AUTH}, data={
headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode()) 'username': username,
'password': password,
'grant_type': 'password',
'scope': 'offline_access',
}, note='Logging in', errnote='Failed to log in')
except ExtractorError as error: except ExtractorError as error:
if isinstance(error.cause, HTTPError) and error.cause.status == 403: if isinstance(error.cause, HTTPError) and error.cause.status == 401:
raise ExtractorError( raise ExtractorError('Invalid username and/or password', expected=True)
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
'and your browser\'s User-Agent (with --user-agent)', expected=True)
raise raise
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']} CrunchyrollBaseIE._REFRESH_TOKEN = login_response['refresh_token']
CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10) self.cache.store(self._NETRC_MACHINE, username, CrunchyrollBaseIE._REFRESH_TOKEN)
self._set_auth_info(login_response)
def _update_auth(self):
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_EXPIRY > time_seconds():
return
auth_headers = {'Authorization': self._BASIC_AUTH}
if CrunchyrollBaseIE._REFRESH_TOKEN:
data = {
'refresh_token': CrunchyrollBaseIE._REFRESH_TOKEN,
'grant_type': 'refresh_token',
'scope': 'offline_access',
}
else:
data = {'grant_type': 'client_id'}
auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
try:
auth_response = self._request_token(auth_headers, data)
except ExtractorError as error:
username, password = self._get_login_info()
if not username or not isinstance(error.cause, HTTPError) or error.cause.status != 400:
raise
self.to_screen('Refresh token has expired. Re-logging in')
CrunchyrollBaseIE._REFRESH_TOKEN = None
self.cache.store(self._NETRC_MACHINE, username, None)
self._perform_login(username, password)
return
self._set_auth_info(auth_response)
def _locale_from_language(self, language): def _locale_from_language(self, language):
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True) config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
@ -135,62 +151,73 @@ class CrunchyrollBaseIE(InfoExtractor):
raise ExtractorError(f'Unexpected response when downloading {note} JSON') raise ExtractorError(f'Unexpected response when downloading {note} JSON')
return result return result
def _extract_formats(self, stream_response, display_id=None): def _extract_chapters(self, internal_id):
requested_formats = self._configuration_arg('format') or ['vo_adaptive_hls'] # if no skip events are available, a 403 xml error is returned
available_formats = {} skip_events = self._download_json(
for stream_type, streams in traverse_obj( f'https://static.crunchyroll.com/skip-events/production/{internal_id}.json',
stream_response, (('streams', ('data', 0)), {dict.items}, ...)): internal_id, note='Downloading chapter info', fatal=False, errnote=False)
if stream_type not in requested_formats: if not skip_events:
return None
chapters = []
for event in ('recap', 'intro', 'credits', 'preview'):
start = traverse_obj(skip_events, (event, 'start', {float_or_none}))
end = traverse_obj(skip_events, (event, 'end', {float_or_none}))
# some chapters have no start and/or ending time, they will just be ignored
if start is None or end is None:
continue continue
for stream in traverse_obj(streams, lambda _, v: v['url']): chapters.append({'title': event.capitalize(), 'start_time': start, 'end_time': end})
hardsub_lang = stream.get('hardsub_locale') or ''
format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s')) return chapters
available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url'])
def _extract_stream(self, identifier, display_id=None):
if not display_id:
display_id = identifier
self._update_auth()
stream_response = self._download_json(
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
display_id, note='Downloading stream info', errnote='Failed to download stream info',
headers=CrunchyrollBaseIE._AUTH_HEADERS)
available_formats = {'': ('', '', stream_response['url'])}
for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
available_formats[hardsub_lang] = (f'hardsub-{hardsub_lang}', hardsub_lang, stream['url'])
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])] requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
if '' in available_formats and 'all' not in requested_hardsubs: hardsub_langs = [lang for lang in available_formats if lang]
if hardsub_langs and 'all' not in requested_hardsubs:
full_format_langs = set(requested_hardsubs) full_format_langs = set(requested_hardsubs)
self.to_screen(f'Available hardsub languages: {", ".join(hardsub_langs)}')
self.to_screen( self.to_screen(
'To get all formats of a hardsub language, use ' 'To extract formats of a hardsub language, use '
'"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". ' '"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta-crunchyroll for more info', 'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta-crunchyroll for more info',
only_once=True) only_once=True)
else: else:
full_format_langs = set(map(str.lower, available_formats)) full_format_langs = set(map(str.lower, available_formats))
audio_locale = traverse_obj(stream_response, ((None, 'meta'), 'audio_locale'), get_all=False) audio_locale = traverse_obj(stream_response, ('audioLocale', {str}))
hardsub_preference = qualities(requested_hardsubs[::-1]) hardsub_preference = qualities(requested_hardsubs[::-1])
formats = [] formats, subtitles = [], {}
for stream_type, format_id, hardsub_lang, stream_url in available_formats.values(): for format_id, hardsub_lang, stream_url in available_formats.values():
if stream_type.endswith('hls'): if hardsub_lang.lower() in full_format_langs:
if hardsub_lang.lower() in full_format_langs: adaptive_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
adaptive_formats = self._extract_m3u8_formats( stream_url, display_id, mpd_id=format_id, headers=CrunchyrollBaseIE._AUTH_HEADERS,
stream_url, display_id, 'mp4', m3u8_id=format_id, fatal=False, note=f'Downloading {f"{format_id} " if hardsub_lang else ""}MPD manifest')
fatal=False, note=f'Downloading {format_id} HLS manifest') self._merge_subtitles(dash_subs, target=subtitles)
else:
adaptive_formats = (self._m3u8_meta_format(stream_url, ext='mp4', m3u8_id=format_id),)
elif stream_type.endswith('dash'):
adaptive_formats = self._extract_mpd_formats(
stream_url, display_id, mpd_id=format_id,
fatal=False, note=f'Downloading {format_id} MPD manifest')
else: else:
self.report_warning(f'Encountered unknown stream_type: {stream_type!r}', display_id, only_once=True) continue # XXX: Update this if/when meta mpd formats are working
continue
for f in adaptive_formats: for f in adaptive_formats:
if f.get('acodec') != 'none': if f.get('acodec') != 'none':
f['language'] = audio_locale f['language'] = audio_locale
f['quality'] = hardsub_preference(hardsub_lang.lower()) f['quality'] = hardsub_preference(hardsub_lang.lower())
formats.extend(adaptive_formats) formats.extend(adaptive_formats)
return formats for locale, subtitle in traverse_obj(stream_response, (('subtitles', 'captions'), {dict.items}, ...)):
subtitles.setdefault(locale, []).append(traverse_obj(subtitle, {'url': 'url', 'ext': 'format'}))
def _extract_subtitles(self, data): return formats, subtitles
subtitles = {}
for locale, subtitle in traverse_obj(data, ((None, 'meta'), 'subtitles', {dict.items}, ...)):
subtitles[locale] = [traverse_obj(subtitle, {'url': 'url', 'ext': 'format'})]
return subtitles
class CrunchyrollCmsBaseIE(CrunchyrollBaseIE): class CrunchyrollCmsBaseIE(CrunchyrollBaseIE):
@ -245,7 +272,11 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
'like_count': int, 'like_count': int,
'dislike_count': int, 'dislike_count': int,
}, },
'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'}, 'params': {
'skip_download': 'm3u8',
'extractor_args': {'crunchyrollbeta': {'hardsub': ['de-DE']}},
'format': 'bv[format_id~=hardsub]',
},
}, { }, {
# Premium only # Premium only
'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR', 'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
@ -306,6 +337,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$', 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'skip': 'no longer exists',
}, { }, {
'url': 'https://www.crunchyroll.com/watch/G62PEZ2E6', 'url': 'https://www.crunchyroll.com/watch/G62PEZ2E6',
'info_dict': { 'info_dict': {
@ -359,31 +391,16 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
else: else:
raise ExtractorError(f'Unknown object type {object_type}') raise ExtractorError(f'Unknown object type {object_type}')
# There might be multiple audio languages for one object (`<object>_metadata.versions`), if not self._IS_PREMIUM and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
# so we need to get the id from `streams_link` instead or we dont know which language to choose
streams_link = response.get('streams_link')
if not streams_link and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
message = f'This {object_type} is for premium members only' message = f'This {object_type} is for premium members only'
if self.is_logged_in: if CrunchyrollBaseIE._REFRESH_TOKEN:
raise ExtractorError(message, expected=True) self.raise_no_formats(message, expected=True, video_id=internal_id)
self.raise_login_required(message) else:
self.raise_login_required(message, method='password', metadata_available=True)
else:
result['formats'], result['subtitles'] = self._extract_stream(internal_id)
# We need go from unsigned to signed api to avoid getting soft banned result['chapters'] = self._extract_chapters(internal_id)
stream_response = self._call_cms_api_signed(remove_start(
streams_link, '/content/v2/cms/'), internal_id, lang, 'stream info')
result['formats'] = self._extract_formats(stream_response, internal_id)
result['subtitles'] = self._extract_subtitles(stream_response)
# if no intro chapter is available, a 403 without usable data is returned
intro_chapter = self._download_json(
f'https://static.crunchyroll.com/datalab-intro-v2/{internal_id}.json',
internal_id, note='Downloading chapter info', fatal=False, errnote=False)
if isinstance(intro_chapter, dict):
result['chapters'] = [{
'title': 'Intro',
'start_time': float_or_none(intro_chapter.get('startTime')),
'end_time': float_or_none(intro_chapter.get('endTime')),
}]
def calculate_count(item): def calculate_count(item):
return parse_count(''.join((item['displayed'], item.get('unit') or ''))) return parse_count(''.join((item['displayed'], item.get('unit') or '')))
@ -512,7 +529,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
'display_id': 'egaono-hana', 'display_id': 'egaono-hana',
'title': 'Egaono Hana', 'title': 'Egaono Hana',
'track': 'Egaono Hana', 'track': 'Egaono Hana',
'artist': 'Goose house', 'artists': ['Goose house'],
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$', 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'genres': ['J-Pop'], 'genres': ['J-Pop'],
}, },
@ -525,11 +542,12 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
'display_id': 'crossing-field', 'display_id': 'crossing-field',
'title': 'Crossing Field', 'title': 'Crossing Field',
'track': 'Crossing Field', 'track': 'Crossing Field',
'artist': 'LiSA', 'artists': ['LiSA'],
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$', 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'genres': ['Anime'], 'genres': ['Anime'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'skip': 'no longer exists',
}, { }, {
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135', 'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135',
'info_dict': { 'info_dict': {
@ -538,7 +556,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
'display_id': 'live-is-smile-always-364joker-at-yokohama-arena', 'display_id': 'live-is-smile-always-364joker-at-yokohama-arena',
'title': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA', 'title': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
'track': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA', 'track': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
'artist': 'LiSA', 'artists': ['LiSA'],
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$', 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'description': 'md5:747444e7e6300907b7a43f0a0503072e', 'description': 'md5:747444e7e6300907b7a43f0a0503072e',
'genres': ['J-Pop'], 'genres': ['J-Pop'],
@ -566,16 +584,16 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
if not response: if not response:
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True) raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
streams_link = response.get('streams_link')
if not streams_link and response.get('isPremiumOnly'):
message = f'This {response.get("type") or "media"} is for premium members only'
if self.is_logged_in:
raise ExtractorError(message, expected=True)
self.raise_login_required(message)
result = self._transform_music_response(response) result = self._transform_music_response(response)
stream_response = self._call_api(streams_link, internal_id, lang, 'stream info')
result['formats'] = self._extract_formats(stream_response, internal_id) if not self._IS_PREMIUM and response.get('isPremiumOnly'):
message = f'This {response.get("type") or "media"} is for premium members only'
if CrunchyrollBaseIE._REFRESH_TOKEN:
self.raise_no_formats(message, expected=True, video_id=internal_id)
else:
self.raise_login_required(message, method='password', metadata_available=True)
else:
result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
return result return result
@ -587,7 +605,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
'display_id': 'slug', 'display_id': 'slug',
'title': 'title', 'title': 'title',
'track': 'title', 'track': 'title',
'artist': ('artist', 'name'), 'artists': ('artist', 'name', all),
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n') or None}), 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n') or None}),
'thumbnails': ('images', ..., ..., { 'thumbnails': ('images', ..., ..., {
'url': ('source', {url_or_none}), 'url': ('source', {url_or_none}),
@ -611,7 +629,7 @@ class CrunchyrollArtistIE(CrunchyrollBaseIE):
'info_dict': { 'info_dict': {
'id': 'MA179CB50D', 'id': 'MA179CB50D',
'title': 'LiSA', 'title': 'LiSA',
'genres': ['J-Pop', 'Anime', 'Rock'], 'genres': ['Anime', 'J-Pop', 'Rock'],
'description': 'md5:16d87de61a55c3f7d6c454b73285938e', 'description': 'md5:16d87de61a55c3f7d6c454b73285938e',
}, },
'playlist_mincount': 83, 'playlist_mincount': 83,

View File

@ -65,12 +65,14 @@ class DropboxIE(InfoExtractor):
formats, subtitles, has_anonymous_download = [], {}, False formats, subtitles, has_anonymous_download = [], {}, False
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)): for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
decoded = base64.b64decode(encoded).decode('utf-8', 'ignore') decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
if not has_anonymous_download:
has_anonymous_download = self._search_regex(
r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
transcode_url = self._search_regex( transcode_url = self._search_regex(
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None) r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
if not transcode_url: if not transcode_url:
continue continue
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4') formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
has_anonymous_download = self._search_regex(r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
break break
# downloads enabled we can get the original file # downloads enabled we can get the original file

View File

@ -560,7 +560,7 @@ class FacebookIE(InfoExtractor):
js_data, lambda x: x['jsmods']['instances'], list) or []) js_data, lambda x: x['jsmods']['instances'], list) or [])
def extract_dash_manifest(video, formats): def extract_dash_manifest(video, formats):
dash_manifest = video.get('dash_manifest') dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
if dash_manifest: if dash_manifest:
formats.extend(self._parse_mpd_formats( formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)), compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),

View File

@ -2104,22 +2104,6 @@ class GenericIE(InfoExtractor):
'age_limit': 0, 'age_limit': 0,
}, },
}, },
{
'note': 'JW Player embed with unicode-escape sequences in URL',
'url': 'https://www.medici.tv/en/concerts/lahav-shani-mozart-mahler-israel-philharmonic-abu-dhabi-classics',
'info_dict': {
'id': 'm',
'ext': 'mp4',
'title': 'Lahav Shani conducts the Israel Philharmonic\'s first-ever concert in Abu Dhabi',
'description': 'Mahler\'s ',
'uploader': 'www.medici.tv',
'age_limit': 0,
'thumbnail': r're:^https?://.+\.jpg',
},
'params': {
'skip_download': True,
},
},
{ {
'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/', 'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
'md5': 'e2f0a4c329f7986280b7328e24036d60', 'md5': 'e2f0a4c329f7986280b7328e24036d60',

View File

@ -1,89 +1,143 @@
import functools
import math
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
InAdvancePagedList,
clean_html,
int_or_none, int_or_none,
js_to_json, make_archive_id,
smuggle_url,
unsmuggle_url,
url_basename,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
urljoin,
) )
from ..utils.traversal import traverse_obj from ..utils.traversal import traverse_obj
class JioSaavnBaseIE(InfoExtractor): class JioSaavnBaseIE(InfoExtractor):
def _extract_initial_data(self, url, audio_id): _API_URL = 'https://www.jiosaavn.com/api.php'
webpage = self._download_webpage(url, audio_id) _VALID_BITRATES = {'16', '32', '64', '128', '320'}
return self._search_json(
r'window\.__INITIAL_DATA__\s*=', webpage,
'init json', audio_id, transform_source=js_to_json)
@functools.cached_property
class JioSaavnSongIE(JioSaavnBaseIE): def requested_bitrates(self):
_VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)' requested_bitrates = self._configuration_arg('bitrate', ['128', '320'], ie_key='JioSaavn')
_TESTS = [{ if invalid_bitrates := set(requested_bitrates) - self._VALID_BITRATES:
'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
'md5': '3b84396d15ed9e083c3106f1fa589c04',
'info_dict': {
'id': 'OQsEfQFVUXk',
'ext': 'mp4',
'title': 'Leja Re',
'album': 'Leja Re',
'thumbnail': 'https://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
'duration': 205,
'view_count': int,
'release_year': 2018,
},
}, {
'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
'only_matching': True,
}]
_VALID_BITRATES = ('16', '32', '64', '128', '320')
def _real_extract(self, url):
audio_id = self._match_id(url)
extract_bitrates = self._configuration_arg('bitrate', ['128', '320'], ie_key='JioSaavn')
if invalid_bitrates := [br for br in extract_bitrates if br not in self._VALID_BITRATES]:
raise ValueError( raise ValueError(
f'Invalid bitrate(s): {", ".join(invalid_bitrates)}. ' f'Invalid bitrate(s): {", ".join(invalid_bitrates)}. '
+ f'Valid bitrates are: {", ".join(self._VALID_BITRATES)}') + f'Valid bitrates are: {", ".join(sorted(self._VALID_BITRATES, key=int))}')
return requested_bitrates
song_data = self._extract_initial_data(url, audio_id)['song']['song'] def _extract_formats(self, song_data):
formats = [] for bitrate in self.requested_bitrates:
for bitrate in extract_bitrates:
media_data = self._download_json( media_data = self._download_json(
'https://www.jiosaavn.com/api.php', audio_id, f'Downloading format info for {bitrate}', self._API_URL, song_data['id'],
f'Downloading format info for {bitrate}',
fatal=False, data=urlencode_postdata({ fatal=False, data=urlencode_postdata({
'__call': 'song.generateAuthToken', '__call': 'song.generateAuthToken',
'_format': 'json', '_format': 'json',
'bitrate': bitrate, 'bitrate': bitrate,
'url': song_data['encrypted_media_url'], 'url': song_data['encrypted_media_url'],
})) }))
if not media_data.get('auth_url'): if not traverse_obj(media_data, ('auth_url', {url_or_none})):
self.report_warning(f'Unable to extract format info for {bitrate}') self.report_warning(f'Unable to extract format info for {bitrate}')
continue continue
formats.append({ ext = media_data.get('type')
yield {
'url': media_data['auth_url'], 'url': media_data['auth_url'],
'ext': media_data.get('type'), 'ext': 'm4a' if ext == 'mp4' else ext,
'format_id': bitrate, 'format_id': bitrate,
'abr': int(bitrate), 'abr': int(bitrate),
'vcodec': 'none', 'vcodec': 'none',
}
def _extract_song(self, song_data, url=None):
info = traverse_obj(song_data, {
'id': ('id', {str}),
'title': ('song', {clean_html}),
'album': ('album', {clean_html}),
'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
'duration': ('duration', {int_or_none}),
'view_count': ('play_count', {int_or_none}),
'release_year': ('year', {int_or_none}),
'artists': ('primary_artists', {lambda x: x.split(', ') if x else None}),
'webpage_url': ('perma_url', {url_or_none}),
})
if webpage_url := info.get('webpage_url') or url:
info['display_id'] = url_basename(webpage_url)
info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]
return info
def _call_api(self, type_, token, note='API', params={}):
return self._download_json(
self._API_URL, token, f'Downloading {note} JSON', f'Unable to download {note} JSON',
query={
'__call': 'webapi.get',
'_format': 'json',
'_marker': '0',
'ctx': 'web6dot0',
'token': token,
'type': type_,
**params,
}) })
return { def _yield_songs(self, playlist_data):
'id': audio_id, for song_data in traverse_obj(playlist_data, ('songs', lambda _, v: v['id'] and v['perma_url'])):
'formats': formats, song_info = self._extract_song(song_data)
**traverse_obj(song_data, { url = smuggle_url(song_info['webpage_url'], {
'title': ('title', 'text'), 'id': song_data['id'],
'album': ('album', 'text'), 'encrypted_media_url': song_data['encrypted_media_url'],
'thumbnail': ('image', 0, {url_or_none}), })
'duration': ('duration', {int_or_none}), yield self.url_result(url, JioSaavnSongIE, url_transparent=True, **song_info)
'view_count': ('play_count', {int_or_none}),
'release_year': ('year', {int_or_none}),
}), class JioSaavnSongIE(JioSaavnBaseIE):
} IE_NAME = 'jiosaavn:song'
_VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
'md5': '3b84396d15ed9e083c3106f1fa589c04',
'info_dict': {
'id': 'IcoLuefJ',
'display_id': 'OQsEfQFVUXk',
'ext': 'm4a',
'title': 'Leja Re',
'album': 'Leja Re',
'thumbnail': r're:https?://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
'duration': 205,
'view_count': int,
'release_year': 2018,
'artists': ['Sandesh Shandilya', 'Dhvani Bhanushali', 'Tanishk Bagchi'],
'_old_archive_ids': ['jiosaavnsong OQsEfQFVUXk'],
},
}, {
'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
'only_matching': True,
}]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url)
song_data = traverse_obj(smuggled_data, ({
'id': ('id', {str}),
'encrypted_media_url': ('encrypted_media_url', {str}),
}))
if 'id' in song_data and 'encrypted_media_url' in song_data:
result = {'id': song_data['id']}
else:
# only extract metadata if this is not a url_transparent result
song_data = self._call_api('song', self._match_id(url))['songs'][0]
result = self._extract_song(song_data, url)
result['formats'] = list(self._extract_formats(song_data))
return result
class JioSaavnAlbumIE(JioSaavnBaseIE): class JioSaavnAlbumIE(JioSaavnBaseIE):
IE_NAME = 'jiosaavn:album'
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/album/[^/?#]+/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/album/[^/?#]+/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.jiosaavn.com/album/96/buIOjYZDrNA_', 'url': 'https://www.jiosaavn.com/album/96/buIOjYZDrNA_',
@ -95,11 +149,46 @@ class JioSaavnAlbumIE(JioSaavnBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
album_id = self._match_id(url) display_id = self._match_id(url)
album_view = self._extract_initial_data(url, album_id)['albumView'] album_data = self._call_api('album', display_id)
return self.playlist_from_matches( return self.playlist_result(
traverse_obj(album_view, ( self._yield_songs(album_data), display_id, traverse_obj(album_data, ('title', {str})))
'modules', lambda _, x: x['key'] == 'list', 'data', ..., 'title', 'action', {str})),
album_id, traverse_obj(album_view, ('album', 'title', 'text', {str})), ie=JioSaavnSongIE,
getter=lambda x: urljoin('https://www.jiosaavn.com/', x)) class JioSaavnPlaylistIE(JioSaavnBaseIE):
IE_NAME = 'jiosaavn:playlist'
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/s/playlist/(?:[^/?#]+/){2}(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-english/LlJ8ZWT1ibN5084vKHRj2Q__',
'info_dict': {
'id': 'LlJ8ZWT1ibN5084vKHRj2Q__',
'title': 'Mood English',
},
'playlist_mincount': 301,
}, {
'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-hindi/DVR,pFUOwyXqIp77B1JF,A__',
'info_dict': {
'id': 'DVR,pFUOwyXqIp77B1JF,A__',
'title': 'Mood Hindi',
},
'playlist_mincount': 801,
}]
_PAGE_SIZE = 50
def _fetch_page(self, token, page):
return self._call_api(
'playlist', token, f'playlist page {page}', {'p': page, 'n': self._PAGE_SIZE})
def _entries(self, token, first_page_data, page):
page_data = first_page_data if not page else self._fetch_page(token, page + 1)
yield from self._yield_songs(page_data)
def _real_extract(self, url):
display_id = self._match_id(url)
playlist_data = self._fetch_page(display_id, 1)
total_pages = math.ceil(int(playlist_data['list_count']) / self._PAGE_SIZE)
return self.playlist_result(InAdvancePagedList(
functools.partial(self._entries, display_id, playlist_data),
total_pages, self._PAGE_SIZE), display_id, traverse_obj(playlist_data, ('listname', {str})))

View File

@ -80,7 +80,7 @@ class JoqrAgIE(InfoExtractor):
note='Downloading metadata', errnote='Failed to download metadata') note='Downloading metadata', errnote='Failed to download metadata')
title = self._extract_metadata('Program_name', metadata) title = self._extract_metadata('Program_name', metadata)
if title == '放送休止': if not title or title == '放送休止':
formats = [] formats = []
live_status = 'is_upcoming' live_status = 'is_upcoming'
release_timestamp = self._extract_start_timestamp(video_id, False) release_timestamp = self._extract_start_timestamp(video_id, False)

View File

@ -13,7 +13,8 @@ from ..utils import (
class KickBaseIE(InfoExtractor): class KickBaseIE(InfoExtractor):
def _real_initialize(self): def _real_initialize(self):
self._request_webpage(HEADRequest('https://kick.com/'), None, 'Setting up session', fatal=False) self._request_webpage(
HEADRequest('https://kick.com/'), None, 'Setting up session', fatal=False, impersonate=True)
xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN') xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN')
if not xsrf_token: if not xsrf_token:
self.write_debug('kick.com did not set XSRF-TOKEN cookie') self.write_debug('kick.com did not set XSRF-TOKEN cookie')
@ -25,7 +26,7 @@ class KickBaseIE(InfoExtractor):
def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs): def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
return self._download_json( return self._download_json(
f'https://kick.com/api/v1/{path}', display_id, note=note, f'https://kick.com/api/v1/{path}', display_id, note=note,
headers=merge_dicts(headers, self._API_HEADERS), **kwargs) headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs)
class KickIE(KickBaseIE): class KickIE(KickBaseIE):
@ -82,26 +83,27 @@ class KickIE(KickBaseIE):
class KickVODIE(KickBaseIE): class KickVODIE(KickBaseIE):
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' _VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_TESTS = [{ _TESTS = [{
'url': 'https://kick.com/video/54244b5e-050a-4df4-a013-b2433dafbe35', 'url': 'https://kick.com/video/58bac65b-e641-4476-a7ba-3707a35e60e3',
'md5': '73691206a6a49db25c5aa1588e6538fc', 'md5': '3870f94153e40e7121a6e46c068b70cb',
'info_dict': { 'info_dict': {
'id': '54244b5e-050a-4df4-a013-b2433dafbe35', 'id': '58bac65b-e641-4476-a7ba-3707a35e60e3',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Making 710-carBoosting. Kinda No Pixel inspired. !guilded - !links', 'title': '🤠REBIRTH IS BACK!!!!🤠!stake CODE JAREDFPS 🤠',
'description': 'md5:a0d3546bf7955d0a8252ffe0fd6f518f', 'description': 'md5:02b0c46f9b4197fb545ab09dddb85b1d',
'channel': 'kmack710', 'channel': 'jaredfps',
'channel_id': '16278', 'channel_id': '26608',
'uploader': 'Kmack710', 'uploader': 'JaredFPS',
'uploader_id': '16412', 'uploader_id': '26799',
'upload_date': '20221206', 'upload_date': '20240402',
'timestamp': 1670318289, 'timestamp': 1712097108,
'duration': 40104.0, 'duration': 33859.0,
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'categories': ['Grand Theft Auto V'], 'categories': ['Call of Duty: Warzone'],
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
'expected_warnings': [r'impersonation'],
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,67 +1,153 @@
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
unified_strdate, filter_dict,
update_url_query, parse_iso8601,
urlencode_postdata, traverse_obj,
try_call,
url_or_none,
) )
class MediciIE(InfoExtractor): class MediciIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?medici\.tv/#!/(?P<id>[^?#&]+)' _VALID_URL = r'https?://(?:(?P<sub>www|edu)\.)?medici\.tv/[a-z]{2}/[\w.-]+/(?P<id>[^/?#&]+)'
_TEST = { _TESTS = [{
'url': 'http://www.medici.tv/#!/daniel-harding-frans-helmerson-verbier-festival-music-camp', 'url': 'https://www.medici.tv/en/operas/thomas-ades-the-exterminating-angel-calixto-bieito-opera-bastille-paris',
'md5': '004c21bb0a57248085b6ff3fec72719d', 'md5': 'd483f74e7a7a9eac0dbe152ab189050d',
'info_dict': { 'info_dict': {
'id': '3059', 'id': '8032',
'ext': 'flv', 'ext': 'mp4',
'title': 'Daniel Harding conducts the Verbier Festival Music Camp \u2013 With Frans Helmerson', 'title': 'Thomas Adès\'s The Exterminating Angel',
'description': 'md5:322a1e952bafb725174fd8c1a8212f58', 'description': 'md5:708ae6350dadc604225b4a6e32482bab',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:https://.+/.+\.jpg',
'upload_date': '20170408', 'upload_date': '20240304',
'timestamp': 1709561766,
'display_id': 'thomas-ades-the-exterminating-angel-calixto-bieito-opera-bastille-paris',
}, },
} 'expected_warnings': [r'preview'],
}, {
'url': 'https://edu.medici.tv/en/operas/wagner-lohengrin-paris-opera-kirill-serebrennikov-piotr-beczala-kwangchul-youn-johanni-van-oostrum',
'md5': '4ef3f4079a6e1c617584463a9eb84f99',
'info_dict': {
'id': '7900',
'ext': 'mp4',
'title': 'Wagner\'s Lohengrin',
'description': 'md5:a384a62937866101f86902f21752cd89',
'thumbnail': r're:https://.+/.+\.jpg',
'upload_date': '20231017',
'timestamp': 1697554771,
'display_id': 'wagner-lohengrin-paris-opera-kirill-serebrennikov-piotr-beczala-kwangchul-youn-johanni-van-oostrum',
},
'expected_warnings': [r'preview'],
}, {
'url': 'https://www.medici.tv/en/concerts/sergey-smbatyan-conducts-mansurian-chouchane-siranossian-mario-brunello',
'md5': '9dd757e53b22b2511e85ea9ea60e4815',
'info_dict': {
'id': '5712',
'ext': 'mp4',
'title': 'Sergey Smbatyan conducts Tigran Mansurian — With Chouchane Siranossian and Mario Brunello',
'thumbnail': r're:https://.+/.+\.jpg',
'description': 'md5:9411fe44c874bb10e9af288c65816e41',
'upload_date': '20200323',
'timestamp': 1584975600,
'display_id': 'sergey-smbatyan-conducts-mansurian-chouchane-siranossian-mario-brunello',
},
'expected_warnings': [r'preview'],
}, {
'url': 'https://www.medici.tv/en/ballets/carmen-ballet-choregraphie-de-jiri-bubenicek-teatro-dellopera-di-roma',
'md5': '40f5e76cb701a97a6d7ba23b62c49990',
'info_dict': {
'id': '7857',
'ext': 'mp4',
'title': 'Carmen by Jiří Bubeníček after Roland Petit, music by Bizet, de Falla, Castelnuovo-Tedesco, and Bonolis',
'thumbnail': r're:https://.+/.+\.jpg',
'description': 'md5:0f15a15611ed748020c769873e10a8bb',
'upload_date': '20240223',
'timestamp': 1708707600,
'display_id': 'carmen-ballet-choregraphie-de-jiri-bubenicek-teatro-dellopera-di-roma',
},
'expected_warnings': [r'preview'],
}, {
'url': 'https://www.medici.tv/en/documentaries/la-sonnambula-liege-2023-documentaire',
'md5': '87ff198018ce79a34757ab0dd6f21080',
'info_dict': {
'id': '7513',
'ext': 'mp4',
'title': 'La Sonnambula',
'thumbnail': r're:https://.+/.+\.jpg',
'description': 'md5:0caf9109a860fd50cd018df062a67f34',
'upload_date': '20231103',
'timestamp': 1699010830,
'display_id': 'la-sonnambula-liege-2023-documentaire',
},
'expected_warnings': [r'preview'],
}, {
'url': 'https://edu.medici.tv/en/masterclasses/yvonne-loriod-olivier-messiaen',
'md5': 'fb5dcec46d76ad20fbdbaabb01da191d',
'info_dict': {
'id': '3024',
'ext': 'mp4',
'title': 'Olivier Messiaen and Yvonne Loriod, pianists and teachers',
'thumbnail': r're:https://.+/.+\.jpg',
'description': 'md5:aab948e2f7690214b5c28896c83f1fc1',
'upload_date': '20150223',
'timestamp': 1424706608,
'display_id': 'yvonne-loriod-olivier-messiaen',
},
'skip': 'Requires authentication; preview starts in the middle',
}, {
'url': 'https://www.medici.tv/en/jazz/makaya-mccraven-la-rochelle',
'md5': '4cc279a8b06609782747c8f50beea2b3',
'info_dict': {
'id': '7922',
'ext': 'mp4',
'title': 'NEW: Makaya McCraven in La Rochelle',
'thumbnail': r're:https://.+/.+\.jpg',
'description': 'md5:b5a8aaeb6993d8ccb18bde8abb8aa8d2',
'upload_date': '20231228',
'timestamp': 1703754863,
'display_id': 'makaya-mccraven-la-rochelle',
},
'expected_warnings': [r'preview'],
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) display_id, subdomain = self._match_valid_url(url).group('id', 'sub')
self._request_webpage(url, display_id, 'Requesting CSRF token cookie')
# Sets csrftoken cookie subdomain = 'edu-' if subdomain == 'edu' else ''
self._download_webpage(url, video_id) origin = f'https://{urllib.parse.urlparse(url).hostname}'
MEDICI_URL = 'http://www.medici.tv/'
data = self._download_json( data = self._download_json(
MEDICI_URL, video_id, f'https://api.medici.tv/{subdomain}satie/edito/movie-file/{display_id}/', display_id,
data=urlencode_postdata({ headers=filter_dict({
'json': 'true', 'Authorization': try_call(
'page': '/%s' % video_id, lambda: urllib.parse.unquote(self._get_cookies(url)['auth._token.mAuth'].value)),
'timezone_offset': -420, 'Device-Type': 'web',
}), headers={ 'Origin': origin,
'X-CSRFToken': self._get_cookies(url)['csrftoken'].value, 'Referer': f'{origin}/',
'X-Requested-With': 'XMLHttpRequest', 'Accept': 'application/json, text/plain, */*',
'Referer': MEDICI_URL, }))
'Content-Type': 'application/x-www-form-urlencoded',
})
video = data['video']['videos']['video1'] if not traverse_obj(data, ('video', 'is_full_video')) and traverse_obj(
data, ('video', 'is_limited_by_user_access')):
self.report_warning(
'The full video is for subscribers only. Only previews will be downloaded. If you '
'have used the --cookies-from-browser option, try using the --cookies option instead')
title = video.get('nom') or data['title'] formats, subtitles = self._extract_m3u8_formats_and_subtitles(
data['video']['video_url'], display_id, 'mp4')
video_id = video.get('id') or video_id
formats = self._extract_f4m_formats(
update_url_query(video['url_akamai'], {
'hdcore': '3.1.0',
'plugin=aasp': '3.1.0.43.124',
}), video_id, f4m_id='hds')
description = data.get('meta_description')
thumbnail = video.get('url_thumbnail') or data.get('main_image')
upload_date = unified_strdate(data['video'].get('date'))
return { return {
'id': video_id, 'id': str(data['id']),
'title': title, 'display_id': display_id,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
'formats': formats, 'formats': formats,
'subtitles': subtitles,
**traverse_obj(data, {
'title': ('title', {str}),
'description': ('subtitle', {str}),
'thumbnail': ('picture', {url_or_none}),
'timestamp': ('date_publish', {parse_iso8601}),
}),
} }

View File

@ -1,5 +1,7 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import UserNotLive, traverse_obj from ..networking.exceptions import HTTPError
from ..utils import ExtractorError, UserNotLive, int_or_none, url_or_none
from ..utils.traversal import traverse_obj
class MixchIE(InfoExtractor): class MixchIE(InfoExtractor):
@ -25,25 +27,23 @@ class MixchIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(f'https://mixch.tv/u/{video_id}/live', video_id) data = self._download_json(f'https://mixch.tv/api-web/users/{video_id}/live', video_id)
if not traverse_obj(data, ('liveInfo', {dict})):
initial_js_state = self._parse_json(self._search_regex(
r'(?m)^\s*window\.__INITIAL_JS_STATE__\s*=\s*(\{.+?\});\s*$', webpage, 'initial JS state'), video_id)
if not initial_js_state.get('liveInfo'):
raise UserNotLive(video_id=video_id) raise UserNotLive(video_id=video_id)
return { return {
'id': video_id, 'id': video_id,
'title': traverse_obj(initial_js_state, ('liveInfo', 'title')),
'comment_count': traverse_obj(initial_js_state, ('liveInfo', 'comments')),
'view_count': traverse_obj(initial_js_state, ('liveInfo', 'visitor')),
'timestamp': traverse_obj(initial_js_state, ('liveInfo', 'created')),
'uploader': traverse_obj(initial_js_state, ('broadcasterInfo', 'name')),
'uploader_id': video_id, 'uploader_id': video_id,
**traverse_obj(data, {
'title': ('liveInfo', 'title', {str}),
'comment_count': ('liveInfo', 'comments', {int_or_none}),
'view_count': ('liveInfo', 'visitor', {int_or_none}),
'timestamp': ('liveInfo', 'created', {int_or_none}),
'uploader': ('broadcasterInfo', 'name', {str}),
}),
'formats': [{ 'formats': [{
'format_id': 'hls', 'format_id': 'hls',
'url': (traverse_obj(initial_js_state, ('liveInfo', 'hls')) 'url': data['liveInfo']['hls'],
or f'https://d1hd0ww6piyb43.cloudfront.net/hls/torte_{video_id}.m3u8'),
'ext': 'mp4', 'ext': 'mp4',
'protocol': 'm3u8', 'protocol': 'm3u8',
}], }],
@ -60,22 +60,38 @@ class MixchArchiveIE(InfoExtractor):
'skip': 'paid video, no DRM. expires at Jan 23', 'skip': 'paid video, no DRM. expires at Jan 23',
'info_dict': { 'info_dict': {
'id': '421', 'id': '421',
'ext': 'mp4',
'title': '96NEKO SHOW TIME', 'title': '96NEKO SHOW TIME',
} }
}, {
'url': 'https://mixch.tv/archive/1213',
'skip': 'paid video, no DRM. expires at Dec 31, 2023',
'info_dict': {
'id': '1213',
'ext': 'mp4',
'title': '【特別トーク番組アーカイブス】Merm4id×燐舞曲 2nd LIVE「VERSUS」',
'release_date': '20231201',
'thumbnail': str,
}
}, {
'url': 'https://mixch.tv/archive/1214',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
html5_videos = self._parse_html5_media_entries( try:
url, webpage.replace('video-js', 'video'), video_id, 'hls') info_json = self._download_json(
if not html5_videos: f'https://mixch.tv/api-web/archive/{video_id}', video_id)['archive']
self.raise_login_required(method='cookies') except ExtractorError as e:
infodict = html5_videos[0] if isinstance(e.cause, HTTPError) and e.cause.status == 401:
infodict.update({ self.raise_login_required()
raise
return {
'id': video_id, 'id': video_id,
'title': self._html_search_regex(r'class="archive-title">(.+?)</', webpage, 'title') 'title': traverse_obj(info_json, ('title', {str})),
}) 'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id),
'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})),
return infodict }

View File

@ -8,6 +8,7 @@ from ..utils import (
int_or_none, int_or_none,
join_nonempty, join_nonempty,
parse_duration, parse_duration,
remove_end,
traverse_obj, traverse_obj,
try_call, try_call,
unescapeHTML, unescapeHTML,
@ -19,8 +20,7 @@ from ..utils import (
class NhkBaseIE(InfoExtractor): class NhkBaseIE(InfoExtractor):
_API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json' _API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
_BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand' _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/'
_TYPE_REGEX = r'/(?P<type>video|audio)/'
def _call_api(self, m_id, lang, is_video, is_episode, is_clip): def _call_api(self, m_id, lang, is_video, is_episode, is_clip):
return self._download_json( return self._download_json(
@ -83,7 +83,7 @@ class NhkBaseIE(InfoExtractor):
def _extract_episode_info(self, url, episode=None): def _extract_episode_info(self, url, episode=None):
fetch_episode = episode is None fetch_episode = episode is None
lang, m_type, episode_id = NhkVodIE._match_valid_url(url).group('lang', 'type', 'id') lang, m_type, episode_id = NhkVodIE._match_valid_url(url).group('lang', 'type', 'id')
is_video = m_type == 'video' is_video = m_type != 'audio'
if is_video: if is_video:
episode_id = episode_id[:4] + '-' + episode_id[4:] episode_id = episode_id[:4] + '-' + episode_id[4:]
@ -138,9 +138,10 @@ class NhkBaseIE(InfoExtractor):
else: else:
if fetch_episode: if fetch_episode:
audio_path = episode['audio']['audio'] # From https://www3.nhk.or.jp/nhkworld/common/player/radio/inline/rod.html
audio_path = remove_end(episode['audio']['audio'], '.m4a')
info['formats'] = self._extract_m3u8_formats( info['formats'] = self._extract_m3u8_formats(
'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path, f'{urljoin("https://vod-stream.nhk.jp", audio_path)}/index.m3u8',
episode_id, 'm4a', entry_protocol='m3u8_native', episode_id, 'm4a', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False) m3u8_id='hls', fatal=False)
for f in info['formats']: for f in info['formats']:
@ -155,9 +156,11 @@ class NhkBaseIE(InfoExtractor):
class NhkVodIE(NhkBaseIE): class NhkVodIE(NhkBaseIE):
# the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg _VALID_URL = [
_VALID_URL = [rf'{NhkBaseIE._BASE_URL_REGEX}/(?P<type>video)/(?P<id>[0-9a-z]+)', rf'{NhkBaseIE._BASE_URL_REGEX}shows/(?:(?P<type>video)/)?(?P<id>\d{{4}}[\da-z]\d+)/?(?:$|[?#])',
rf'{NhkBaseIE._BASE_URL_REGEX}/(?P<type>audio)/(?P<id>[^/?#]+?-\d{{8}}-[0-9a-z]+)'] rf'{NhkBaseIE._BASE_URL_REGEX}(?:ondemand|shows)/(?P<type>audio)/(?P<id>[^/?#]+?-\d{{8}}-[\da-z]+)',
rf'{NhkBaseIE._BASE_URL_REGEX}ondemand/(?P<type>video)/(?P<id>\d{{4}}[\da-z]\d+)', # deprecated
]
# Content available only for a limited period of time. Visit # Content available only for a limited period of time. Visit
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
_TESTS = [{ _TESTS = [{
@ -167,17 +170,16 @@ class NhkVodIE(NhkBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Japan Railway Journal - The Tohoku Shinkansen: Full Speed Ahead', 'title': 'Japan Railway Journal - The Tohoku Shinkansen: Full Speed Ahead',
'description': 'md5:49f7c5b206e03868a2fdf0d0814b92f6', 'description': 'md5:49f7c5b206e03868a2fdf0d0814b92f6',
'thumbnail': 'md5:51bcef4a21936e7fea1ff4e06353f463', 'thumbnail': r're:https://.+/.+\.jpg',
'episode': 'The Tohoku Shinkansen: Full Speed Ahead', 'episode': 'The Tohoku Shinkansen: Full Speed Ahead',
'series': 'Japan Railway Journal', 'series': 'Japan Railway Journal',
'modified_timestamp': 1694243656, 'modified_timestamp': 1707217907,
'timestamp': 1681428600, 'timestamp': 1681428600,
'release_timestamp': 1693883728, 'release_timestamp': 1693883728,
'duration': 1679, 'duration': 1679,
'upload_date': '20230413', 'upload_date': '20230413',
'modified_date': '20230909', 'modified_date': '20240206',
'release_date': '20230905', 'release_date': '20230905',
}, },
}, { }, {
# video clip # video clip
@ -188,15 +190,15 @@ class NhkVodIE(NhkBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Dining with the Chef - Chef Saito\'s Family recipe: MENCHI-KATSU', 'title': 'Dining with the Chef - Chef Saito\'s Family recipe: MENCHI-KATSU',
'description': 'md5:5aee4a9f9d81c26281862382103b0ea5', 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
'thumbnail': 'md5:d6a4d9b6e9be90aaadda0bcce89631ed', 'thumbnail': r're:https://.+/.+\.jpg',
'series': 'Dining with the Chef', 'series': 'Dining with the Chef',
'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU', 'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU',
'duration': 148, 'duration': 148,
'upload_date': '20190816', 'upload_date': '20190816',
'release_date': '20230902', 'release_date': '20230902',
'release_timestamp': 1693619292, 'release_timestamp': 1693619292,
'modified_timestamp': 1694168033, 'modified_timestamp': 1707217907,
'modified_date': '20230908', 'modified_date': '20240206',
'timestamp': 1565997540, 'timestamp': 1565997540,
}, },
}, { }, {
@ -208,7 +210,7 @@ class NhkVodIE(NhkBaseIE):
'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines', 'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines',
'series': 'Living in Japan', 'series': 'Living in Japan',
'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab', 'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab',
'thumbnail': 'md5:960622fb6e06054a4a1a0c97ea752545', 'thumbnail': r're:https://.+/.+\.jpg',
'episode': 'Tips for Travelers to Japan / Ramen Vending Machines' 'episode': 'Tips for Travelers to Japan / Ramen Vending Machines'
}, },
}, { }, {
@ -245,7 +247,7 @@ class NhkVodIE(NhkBaseIE):
'title': 'おはよう日本7時台 - 10月8日放送', 'title': 'おはよう日本7時台 - 10月8日放送',
'series': 'おはよう日本7時台', 'series': 'おはよう日本7時台',
'episode': '10月8日放送', 'episode': '10月8日放送',
'thumbnail': 'md5:d733b1c8e965ab68fb02b2d347d0e9b4', 'thumbnail': r're:https://.+/.+\.jpg',
'description': 'md5:9c1d6cbeadb827b955b20e99ab920ff0', 'description': 'md5:9c1d6cbeadb827b955b20e99ab920ff0',
}, },
'skip': 'expires 2023-10-15', 'skip': 'expires 2023-10-15',
@ -255,17 +257,100 @@ class NhkVodIE(NhkBaseIE):
'info_dict': { 'info_dict': {
'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552', 'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Barakan Discovers AMAMI OSHIMA: Isson\'s Treasure Island', 'title': 'Barakan Discovers - AMAMI OSHIMA: Isson\'s Treasure Isla',
'description': 'md5:5db620c46a0698451cc59add8816b797', 'description': 'md5:5db620c46a0698451cc59add8816b797',
'thumbnail': 'md5:67d9ff28009ba379bfa85ad1aaa0e2bd', 'thumbnail': r're:https://.+/.+\.jpg',
'release_date': '20230905', 'release_date': '20230905',
'timestamp': 1690103400, 'timestamp': 1690103400,
'duration': 2939, 'duration': 2939,
'release_timestamp': 1693898699, 'release_timestamp': 1693898699,
'modified_timestamp': 1698057495,
'modified_date': '20231023',
'upload_date': '20230723', 'upload_date': '20230723',
'modified_timestamp': 1707217907,
'modified_date': '20240206',
'episode': 'AMAMI OSHIMA: Isson\'s Treasure Isla',
'series': 'Barakan Discovers',
}, },
}, {
# /ondemand/video/ url with alphabetical character in 5th position of id
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a07/',
'info_dict': {
'id': 'nw_c_en_9999-a07',
'ext': 'mp4',
'episode': 'Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
'series': 'Mini-Dramas on SDGs',
'modified_date': '20240206',
'title': 'Mini-Dramas on SDGs - Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
'description': 'md5:3f9dcb4db22fceb675d90448a040d3f6',
'timestamp': 1621962360,
'duration': 189,
'release_date': '20230903',
'modified_timestamp': 1707217907,
'upload_date': '20210525',
'thumbnail': r're:https://.+/.+\.jpg',
'release_timestamp': 1693713487,
},
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999d17/',
'info_dict': {
'id': 'nw_c_en_9999-d17',
'ext': 'mp4',
'title': 'Flowers of snow blossom - The 72 Pentads of Yamato',
'description': 'Todays focus: Snow',
'release_timestamp': 1693792402,
'release_date': '20230904',
'upload_date': '20220128',
'timestamp': 1643370960,
'thumbnail': r're:https://.+/.+\.jpg',
'duration': 136,
'series': '',
'modified_date': '20240206',
'modified_timestamp': 1707217907,
},
}, {
# new /shows/ url format
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/2032307/',
'info_dict': {
'id': 'nw_vod_v_en_2032_307_20240321113000_01_1710990282',
'ext': 'mp4',
'title': 'Japanology Plus - 20th Anniversary Special Part 1',
'description': 'md5:817d41fc8e54339ad2a916161ea24faf',
'episode': '20th Anniversary Special Part 1',
'series': 'Japanology Plus',
'thumbnail': r're:https://.+/.+\.jpg',
'duration': 1680,
'timestamp': 1711020600,
'upload_date': '20240321',
'release_timestamp': 1711022683,
'release_date': '20240321',
'modified_timestamp': 1711031012,
'modified_date': '20240321',
},
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/3020025/',
'info_dict': {
'id': 'nw_vod_v_en_3020_025_20230325144000_01_1679723944',
'ext': 'mp4',
'title': '100 Ideas to Save the World - Working Styles Evolve',
'description': 'md5:9e6c7778eaaf4f7b4af83569649f84d9',
'episode': 'Working Styles Evolve',
'series': '100 Ideas to Save the World',
'thumbnail': r're:https://.+/.+\.jpg',
'duration': 899,
'upload_date': '20230325',
'timestamp': 1679755200,
'release_date': '20230905',
'release_timestamp': 1693880540,
'modified_date': '20240206',
'modified_timestamp': 1707217907,
},
}, {
# new /shows/audio/ url format
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/audio/livinginjapan-20231001-1/',
'only_matching': True,
}, {
# valid url even if can't be found in wild; support needed for clip entries extraction
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/9999o80/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -273,18 +358,21 @@ class NhkVodIE(NhkBaseIE):
class NhkVodProgramIE(NhkBaseIE): class NhkVodProgramIE(NhkBaseIE):
_VALID_URL = rf'{NhkBaseIE._BASE_URL_REGEX}/program{NhkBaseIE._TYPE_REGEX}(?P<id>\w+)(?:.+?\btype=(?P<episode_type>clip|(?:radio|tv)Episode))?' _VALID_URL = rf'''(?x)
{NhkBaseIE._BASE_URL_REGEX}(?:shows|tv)/
(?:(?P<type>audio)/programs/)?(?P<id>\w+)/?
(?:\?(?:[^#]+&)?type=(?P<episode_type>clip|(?:radio|tv)Episode))?'''
_TESTS = [{ _TESTS = [{
# video program episodes # video program episodes
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/sumo', 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/sumo/',
'info_dict': { 'info_dict': {
'id': 'sumo', 'id': 'sumo',
'title': 'GRAND SUMO Highlights', 'title': 'GRAND SUMO Highlights',
'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf', 'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf',
}, },
'playlist_mincount': 0, 'playlist_mincount': 1,
}, { }, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway', 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/japanrailway/',
'info_dict': { 'info_dict': {
'id': 'japanrailway', 'id': 'japanrailway',
'title': 'Japan Railway Journal', 'title': 'Japan Railway Journal',
@ -293,40 +381,68 @@ class NhkVodProgramIE(NhkBaseIE):
'playlist_mincount': 12, 'playlist_mincount': 12,
}, { }, {
# video program clips # video program clips
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway/?type=clip', 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/japanrailway/?type=clip',
'info_dict': { 'info_dict': {
'id': 'japanrailway', 'id': 'japanrailway',
'title': 'Japan Railway Journal', 'title': 'Japan Railway Journal',
'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f', 'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
}, },
'playlist_mincount': 5, 'playlist_mincount': 12,
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/10yearshayaomiyazaki/',
'only_matching': True,
}, { }, {
# audio program # audio program
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/audio/listener/', 'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/audio/programs/livinginjapan/',
'info_dict': {
'id': 'livinginjapan',
'title': 'Living in Japan',
'description': 'md5:665bb36ec2a12c5a7f598ee713fc2b54',
},
'playlist_mincount': 12,
}, {
# /tv/ program url
'url': 'https://www3.nhk.or.jp/nhkworld/en/tv/designtalksplus/',
'info_dict': {
'id': 'designtalksplus',
'title': 'DESIGN TALKS plus',
'description': 'md5:47b3b3a9f10d4ac7b33b53b70a7d2837',
},
'playlist_mincount': 20,
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/10yearshayaomiyazaki/',
'only_matching': True, 'only_matching': True,
}] }]
@classmethod
def suitable(cls, url):
return False if NhkVodIE.suitable(url) else super().suitable(url)
def _extract_meta_from_class_elements(self, class_values, html):
for class_value in class_values:
if value := clean_html(get_element_by_class(class_value, html)):
return value
def _real_extract(self, url): def _real_extract(self, url):
lang, m_type, program_id, episode_type = self._match_valid_url(url).group('lang', 'type', 'id', 'episode_type') lang, m_type, program_id, episode_type = self._match_valid_url(url).group('lang', 'type', 'id', 'episode_type')
episodes = self._call_api( episodes = self._call_api(
program_id, lang, m_type == 'video', False, episode_type == 'clip') program_id, lang, m_type != 'audio', False, episode_type == 'clip')
entries = [] def entries():
for episode in episodes: for episode in episodes:
episode_path = episode.get('url') if episode_path := episode.get('url'):
if not episode_path: yield self._extract_episode_info(urljoin(url, episode_path), episode)
continue
entries.append(self._extract_episode_info(
urljoin(url, episode_path), episode))
html = self._download_webpage(url, program_id) html = self._download_webpage(url, program_id)
program_title = clean_html(get_element_by_class('p-programDetail__title', html)) program_title = self._extract_meta_from_class_elements([
program_description = clean_html(get_element_by_class('p-programDetail__text', html)) 'p-programDetail__title', # /ondemand/program/
'pProgramHero__logoText', # /shows/
'tAudioProgramMain__title', # /shows/audio/programs/
'p-program-name'], html) # /tv/
program_description = self._extract_meta_from_class_elements([
'p-programDetail__text', # /ondemand/program/
'pProgramHero__description', # /shows/
'tAudioProgramMain__info', # /shows/audio/programs/
'p-program-description'], html) # /tv/
return self.playlist_result(entries, program_id, program_title, program_description) return self.playlist_result(entries(), program_id, program_title, program_description)
class NhkForSchoolBangumiIE(InfoExtractor): class NhkForSchoolBangumiIE(InfoExtractor):

View File

@ -1,8 +1,8 @@
import itertools import itertools
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from .vimeo import VimeoIE from .vimeo import VimeoIE
from ..compat import compat_urllib_parse_unquote
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
KNOWN_EXTENSIONS, KNOWN_EXTENSIONS,
@ -14,7 +14,6 @@ from ..utils import (
parse_iso8601, parse_iso8601,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
try_get,
url_or_none, url_or_none,
urljoin, urljoin,
) )
@ -92,7 +91,7 @@ class PatreonIE(PatreonBaseIE):
'thumbnail': 're:^https?://.*$', 'thumbnail': 're:^https?://.*$',
'upload_date': '20150211', 'upload_date': '20150211',
'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364', 'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364',
'uploader_id': 'TraciJHines', 'uploader_id': '@TraciHinesMusic',
'categories': ['Entertainment'], 'categories': ['Entertainment'],
'duration': 282, 'duration': 282,
'view_count': int, 'view_count': int,
@ -106,8 +105,10 @@ class PatreonIE(PatreonBaseIE):
'availability': 'public', 'availability': 'public',
'channel_follower_count': int, 'channel_follower_count': int,
'playable_in_embed': True, 'playable_in_embed': True,
'uploader_url': 'http://www.youtube.com/user/TraciJHines', 'uploader_url': 'https://www.youtube.com/@TraciHinesMusic',
'comment_count': int, 'comment_count': int,
'channel_is_verified': True,
'chapters': 'count:4',
}, },
'params': { 'params': {
'noplaylist': True, 'noplaylist': True,
@ -176,6 +177,48 @@ class PatreonIE(PatreonBaseIE):
'uploader_url': 'https://www.patreon.com/thenormies', 'uploader_url': 'https://www.patreon.com/thenormies',
}, },
'skip': 'Patron-only content', 'skip': 'Patron-only content',
}, {
# dead vimeo and embed URLs, need to extract post_file
'url': 'https://www.patreon.com/posts/hunter-x-hunter-34007913',
'info_dict': {
'id': '34007913',
'ext': 'mp4',
'title': 'Hunter x Hunter | Kurapika DESTROYS Uvogin!!!',
'like_count': int,
'uploader': 'YaBoyRoshi',
'timestamp': 1581636833,
'channel_url': 'https://www.patreon.com/yaboyroshi',
'thumbnail': r're:^https?://.*$',
'tags': ['Hunter x Hunter'],
'uploader_id': '14264111',
'comment_count': int,
'channel_follower_count': int,
'description': 'Kurapika is a walking cheat code!',
'upload_date': '20200213',
'channel_id': '2147162',
'uploader_url': 'https://www.patreon.com/yaboyroshi',
},
}, {
# NSFW vimeo embed URL
'url': 'https://www.patreon.com/posts/4k-spiderman-4k-96414599',
'info_dict': {
'id': '902250943',
'ext': 'mp4',
'title': '❤️(4K) Spiderman Girl Yeonhwas Gift ❤️(4K) 스파이더맨걸 연화의 선물',
'description': '❤️(4K) Spiderman Girl Yeonhwas Gift \n❤️(4K) 스파이더맨걸 연화의 선물',
'uploader': 'Npickyeonhwa',
'uploader_id': '90574422',
'uploader_url': 'https://www.patreon.com/Yeonhwa726',
'channel_id': '10237902',
'channel_url': 'https://www.patreon.com/Yeonhwa726',
'duration': 70,
'timestamp': 1705150153,
'upload_date': '20240113',
'comment_count': int,
'like_count': int,
'thumbnail': r're:^https?://.+',
},
'params': {'skip_download': 'm3u8'},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -245,25 +288,21 @@ class PatreonIE(PatreonBaseIE):
}) })
# handle Vimeo embeds # handle Vimeo embeds
if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo': if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
embed_html = try_get(attributes, lambda x: x['embed']['html']) v_url = urllib.parse.unquote(self._html_search_regex(
v_url = url_or_none(compat_urllib_parse_unquote( r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False))) traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
if v_url: if url_or_none(v_url) and self._request_webpage(
return { v_url, video_id, 'Checking Vimeo embed URL',
**info, headers={'Referer': 'https://patreon.com/'},
'_type': 'url_transparent', fatal=False, errnote=False):
'url': VimeoIE._smuggle_referrer(v_url, 'https://patreon.com'), return self.url_result(
'ie_key': 'Vimeo', VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
} VimeoIE, url_transparent=True, **info)
embed_url = try_get(attributes, lambda x: x['embed']['url']) embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
if embed_url: if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False):
return { return self.url_result(embed_url, **info)
**info,
'_type': 'url',
'url': embed_url,
}
post_file = traverse_obj(attributes, 'post_file') post_file = traverse_obj(attributes, 'post_file')
if post_file: if post_file:

View File

@ -361,7 +361,7 @@ class SoundcloudBaseIE(InfoExtractor):
'like_count': extract_count('favoritings') or extract_count('likes'), 'like_count': extract_count('favoritings') or extract_count('likes'),
'comment_count': extract_count('comment'), 'comment_count': extract_count('comment'),
'repost_count': extract_count('reposts'), 'repost_count': extract_count('reposts'),
'genre': info.get('genre'), 'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)),
'formats': formats if not extract_flat else None 'formats': formats if not extract_flat else None
} }
@ -395,10 +395,10 @@ class SoundcloudIE(SoundcloudBaseIE):
_TESTS = [ _TESTS = [
{ {
'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', 'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
'md5': 'ebef0a451b909710ed1d7787dddbf0d7', 'md5': 'de9bac153e7427a7333b4b0c1b6a18d2',
'info_dict': { 'info_dict': {
'id': '62986583', 'id': '62986583',
'ext': 'mp3', 'ext': 'opus',
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d', 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
'uploader': 'E.T. ExTerrestrial Music', 'uploader': 'E.T. ExTerrestrial Music',
@ -411,6 +411,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int, 'like_count': int,
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
'uploader_url': 'https://soundcloud.com/ethmusic',
'genres': [],
} }
}, },
# geo-restricted # geo-restricted
@ -418,7 +421,7 @@ class SoundcloudIE(SoundcloudBaseIE):
'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep', 'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
'info_dict': { 'info_dict': {
'id': '47127627', 'id': '47127627',
'ext': 'mp3', 'ext': 'opus',
'title': 'Goldrushed', 'title': 'Goldrushed',
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
'uploader': 'The Royal Concept', 'uploader': 'The Royal Concept',
@ -431,6 +434,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int, 'like_count': int,
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'uploader_url': 'https://soundcloud.com/the-concept-band',
'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
'genres': ['Alternative'],
}, },
}, },
# private link # private link
@ -452,6 +458,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int, 'like_count': int,
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'uploader_url': 'https://soundcloud.com/jaimemf',
'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
'genres': ['youtubedl'],
}, },
}, },
# private link (alt format) # private link (alt format)
@ -473,6 +482,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int, 'like_count': int,
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'uploader_url': 'https://soundcloud.com/jaimemf',
'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
'genres': ['youtubedl'],
}, },
}, },
# downloadable song # downloadable song
@ -482,6 +494,21 @@ class SoundcloudIE(SoundcloudBaseIE):
'info_dict': { 'info_dict': {
'id': '343609555', 'id': '343609555',
'ext': 'wav', 'ext': 'wav',
'title': 'The Following',
'description': '',
'uploader': '80M',
'uploader_id': '312384765',
'uploader_url': 'https://soundcloud.com/the80m',
'upload_date': '20170922',
'timestamp': 1506120436,
'duration': 397.228,
'thumbnail': 'https://i1.sndcdn.com/artworks-000243916348-ktoo7d-original.jpg',
'license': 'all-rights-reserved',
'like_count': int,
'comment_count': int,
'repost_count': int,
'view_count': int,
'genres': ['Dance & EDM'],
}, },
}, },
# private link, downloadable format # private link, downloadable format
@ -503,6 +530,9 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int, 'like_count': int,
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
'uploader_url': 'https://soundcloud.com/oriuplift',
'genres': ['Trance'],
}, },
}, },
# no album art, use avatar pic for thumbnail # no album art, use avatar pic for thumbnail
@ -525,6 +555,8 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int, 'like_count': int,
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'uploader_url': 'https://soundcloud.com/garyvee',
'genres': [],
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -532,13 +564,13 @@ class SoundcloudIE(SoundcloudBaseIE):
}, },
{ {
'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer', 'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7', 'md5': '8227c3473a4264df6b02ad7e5b7527ac',
'info_dict': { 'info_dict': {
'id': '583011102', 'id': '583011102',
'ext': 'mp3', 'ext': 'opus',
'title': 'Mezzo Valzer', 'title': 'Mezzo Valzer',
'description': 'md5:4138d582f81866a530317bae316e8b61', 'description': 'md5:f4d5f39d52e0ccc2b4f665326428901a',
'uploader': 'Micronie', 'uploader': 'Giovanni Sarani',
'uploader_id': '3352531', 'uploader_id': '3352531',
'timestamp': 1551394171, 'timestamp': 1551394171,
'upload_date': '20190228', 'upload_date': '20190228',
@ -549,6 +581,8 @@ class SoundcloudIE(SoundcloudBaseIE):
'like_count': int, 'like_count': int,
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'genres': ['Piano'],
'uploader_url': 'https://soundcloud.com/giovannisarani',
}, },
}, },
{ {

View File

@ -174,7 +174,7 @@ class TheaterComplexTownBaseIE(StacommuBaseIE):
class TheaterComplexTownVODIE(TheaterComplexTownBaseIE): class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?videos/episodes/(?P<id>\w+)' _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?videos/episodes/(?P<id>\w+)'
IE_NAME = 'theatercomplextown:vod' IE_NAME = 'theatercomplextown:vod'
_TESTS = [{ _TESTS = [{
'url': 'https://www.theater-complex.town/videos/episodes/hoxqidYNoAn7bP92DN6p78', 'url': 'https://www.theater-complex.town/videos/episodes/hoxqidYNoAn7bP92DN6p78',
@ -195,6 +195,9 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
}, { }, {
'url': 'https://www.theater-complex.town/en/videos/episodes/6QT7XYwM9dJz5Gf9VB6K5y', 'url': 'https://www.theater-complex.town/en/videos/episodes/6QT7XYwM9dJz5Gf9VB6K5y',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.theater-complex.town/ja/videos/episodes/hoxqidYNoAn7bP92DN6p78',
'only_matching': True,
}] }]
_API_PATH = 'videoEpisodes' _API_PATH = 'videoEpisodes'
@ -204,7 +207,7 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE): class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?ppv/(?P<id>\w+)' _VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?ppv/(?P<id>\w+)'
IE_NAME = 'theatercomplextown:ppv' IE_NAME = 'theatercomplextown:ppv'
_TESTS = [{ _TESTS = [{
'url': 'https://www.theater-complex.town/ppv/wytW3X7khrjJBUpKuV3jen', 'url': 'https://www.theater-complex.town/ppv/wytW3X7khrjJBUpKuV3jen',
@ -223,6 +226,9 @@ class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
}, { }, {
'url': 'https://www.theater-complex.town/en/ppv/wytW3X7khrjJBUpKuV3jen', 'url': 'https://www.theater-complex.town/en/ppv/wytW3X7khrjJBUpKuV3jen',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.theater-complex.town/ja/ppv/qwUVmLmGEiZ3ZW6it9uGys',
'only_matching': True,
}] }]
_API_PATH = 'events' _API_PATH = 'events'

View File

@ -41,7 +41,7 @@ class STVPlayerIE(InfoExtractor):
ptype, video_id = self._match_valid_url(url).groups() ptype, video_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, video_id, fatal=False) or '' webpage = self._download_webpage(url, video_id, fatal=False) or ''
props = self._search_nextjs_data(webpage, video_id, default='{}').get('props') or {} props = self._search_nextjs_data(webpage, video_id, default={}).get('props') or {}
player_api_cache = try_get( player_api_cache = try_get(
props, lambda x: x['initialReduxState']['playerApiCache']) or {} props, lambda x: x['initialReduxState']['playerApiCache']) or {}

View File

@ -155,6 +155,7 @@ class TikTokBaseIE(InfoExtractor):
'locale': 'en', 'locale': 'en',
'ac2': 'wifi5g', 'ac2': 'wifi5g',
'uoo': '1', 'uoo': '1',
'carrier_region': 'US',
'op_region': 'US', 'op_region': 'US',
'build_number': self._APP_INFO['app_version'], 'build_number': self._APP_INFO['app_version'],
'region': 'US', 'region': 'US',
@ -775,7 +776,7 @@ class TikTokIE(TikTokBaseIE):
status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0 status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict})) video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))
elif next_data := self._search_nextjs_data(webpage, video_id, default='{}'): elif next_data := self._search_nextjs_data(webpage, video_id, default={}):
self.write_debug('Found next.js data') self.write_debug('Found next.js data')
status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0 status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict})) video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))

View File

@ -191,17 +191,25 @@ class TwitchBaseIE(InfoExtractor):
}] if thumbnail else None }] if thumbnail else None
def _extract_twitch_m3u8_formats(self, path, video_id, token, signature): def _extract_twitch_m3u8_formats(self, path, video_id, token, signature):
return self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
f'{self._USHER_BASE}/{path}/{video_id}.m3u8', video_id, 'mp4', query={ f'{self._USHER_BASE}/{path}/{video_id}.m3u8', video_id, 'mp4', query={
'allow_source': 'true', 'allow_source': 'true',
'allow_audio_only': 'true', 'allow_audio_only': 'true',
'allow_spectre': 'true', 'allow_spectre': 'true',
'p': random.randint(1000000, 10000000), 'p': random.randint(1000000, 10000000),
'platform': 'web',
'player': 'twitchweb', 'player': 'twitchweb',
'supported_codecs': 'av1,h265,h264',
'playlist_include_framerate': 'true', 'playlist_include_framerate': 'true',
'sig': signature, 'sig': signature,
'token': token, 'token': token,
}) })
for fmt in formats:
if fmt.get('vcodec') and fmt['vcodec'].startswith('av01'):
# mpegts does not yet have proper support for av1
fmt['downloader_options'] = {'ffmpeg_args_out': ['-f', 'mp4']}
return formats
class TwitchVodIE(TwitchBaseIE): class TwitchVodIE(TwitchBaseIE):

View File

@ -707,6 +707,7 @@ class VKWallPostIE(VKBaseIE):
class VKPlayBaseIE(InfoExtractor): class VKPlayBaseIE(InfoExtractor):
_BASE_URL_RE = r'https?://(?:vkplay\.live|live\.vkplay\.ru)/'
_RESOLUTIONS = { _RESOLUTIONS = {
'tiny': '256x144', 'tiny': '256x144',
'lowest': '426x240', 'lowest': '426x240',
@ -765,7 +766,7 @@ class VKPlayBaseIE(InfoExtractor):
class VKPlayIE(VKPlayBaseIE): class VKPlayIE(VKPlayBaseIE):
_VALID_URL = r'https?://vkplay\.live/(?P<username>[^/#?]+)/record/(?P<id>[a-f0-9-]+)' _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P<username>[^/#?]+)/record/(?P<id>[\da-f-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://vkplay.live/zitsmann/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da', 'url': 'https://vkplay.live/zitsmann/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da',
'info_dict': { 'info_dict': {
@ -776,13 +777,16 @@ class VKPlayIE(VKPlayBaseIE):
'uploader_id': '13159830', 'uploader_id': '13159830',
'release_timestamp': 1683461378, 'release_timestamp': 1683461378,
'release_date': '20230507', 'release_date': '20230507',
'thumbnail': r're:https://images.vkplay.live/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview\?change_time=\d+', 'thumbnail': r're:https://[^/]+/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview',
'duration': 10608, 'duration': 10608,
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'categories': ['Atomic Heart'], 'categories': ['Atomic Heart'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://live.vkplay.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -802,7 +806,7 @@ class VKPlayIE(VKPlayBaseIE):
class VKPlayLiveIE(VKPlayBaseIE): class VKPlayLiveIE(VKPlayBaseIE):
_VALID_URL = r'https?://vkplay\.live/(?P<id>[^/#?]+)/?(?:[#?]|$)' _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P<id>[^/#?]+)/?(?:[#?]|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://vkplay.live/bayda', 'url': 'https://vkplay.live/bayda',
'info_dict': { 'info_dict': {
@ -813,7 +817,7 @@ class VKPlayLiveIE(VKPlayBaseIE):
'uploader_id': '12279401', 'uploader_id': '12279401',
'release_timestamp': 1687209962, 'release_timestamp': 1687209962,
'release_date': '20230619', 'release_date': '20230619',
'thumbnail': r're:https://images.vkplay.live/public_video_stream/12279401/preview\?change_time=\d+', 'thumbnail': r're:https://[^/]+/public_video_stream/12279401/preview',
'view_count': int, 'view_count': int,
'concurrent_view_count': int, 'concurrent_view_count': int,
'like_count': int, 'like_count': int,
@ -822,6 +826,9 @@ class VKPlayLiveIE(VKPlayBaseIE):
}, },
'skip': 'livestream', 'skip': 'livestream',
'params': {'skip_download': True}, 'params': {'skip_download': True},
}, {
'url': 'https://live.vkplay.ru/lebwa',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -16,6 +16,7 @@ from ..utils import (
join_nonempty, join_nonempty,
jwt_encode_hs256, jwt_encode_hs256,
make_archive_id, make_archive_id,
merge_dicts,
parse_age_limit, parse_age_limit,
parse_iso8601, parse_iso8601,
str_or_none, str_or_none,
@ -425,3 +426,64 @@ class DagelijkseKostIE(VRTBaseIE):
['description', 'twitter:description', 'og:description'], webpage), ['description', 'twitter:description', 'og:description'], webpage),
'_old_archive_ids': [make_archive_id('Canvas', video_id)], '_old_archive_ids': [make_archive_id('Canvas', video_id)],
} }
class Radio1BeIE(VRTBaseIE):
_VALID_URL = r'https?://radio1\.be/(?:lees|luister/select)/(?P<id>[\w/-]+)'
_TESTS = [{
'url': 'https://radio1.be/luister/select/de-ochtend/komt-n-va-volgend-jaar-op-in-wallonie',
'info_dict': {
'id': 'eb6c22e9-544f-44f4-af39-cf8cccd29e22',
'title': 'Komt N-VA volgend jaar op in Wallonië?',
'display_id': 'de-ochtend/komt-n-va-volgend-jaar-op-in-wallonie',
'description': 'md5:b374ea1c9302f38362df9dea1931468e',
'thumbnail': r're:https?://cds\.vrt\.radio/[^/#\?&]+'
},
'playlist_mincount': 1
}, {
'url': 'https://radio1.be/lees/europese-unie-wil-onmiddellijke-humanitaire-pauze-en-duurzaam-staakt-het-vuren-in-gaza?view=web',
'info_dict': {
'id': '5d47f102-dbdb-4fa0-832b-26c1870311f2',
'title': 'Europese Unie wil "onmiddellijke humanitaire pauze" en "duurzaam staakt-het-vuren" in Gaza',
'description': 'md5:1aad1fae7d39edeffde5d3e67d276b64',
'thumbnail': r're:https?://cds\.vrt\.radio/[^/#\?&]+',
'display_id': 'europese-unie-wil-onmiddellijke-humanitaire-pauze-en-duurzaam-staakt-het-vuren-in-gaza'
},
'playlist_mincount': 1
}]
def _extract_video_entries(self, next_js_data, display_id):
video_data = traverse_obj(
next_js_data, ((None, ('paragraphs', ...)), {lambda x: x if x['mediaReference'] else None}))
for data in video_data:
media_reference = data['mediaReference']
formats, subtitles = self._extract_formats_and_subtitles(
self._call_api(media_reference), display_id)
yield {
'id': media_reference,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(data, {
'title': ('title', {str}),
'description': ('body', {clean_html})
}),
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
next_js_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['item']
return self.playlist_result(
self._extract_video_entries(next_js_data, display_id), **merge_dicts(traverse_obj(
next_js_data, ({
'id': ('id', {str}),
'title': ('title', {str}),
'description': (('description', 'content'), {clean_html}),
}), get_all=False), {
'display_id': display_id,
'title': self._html_search_meta(['name', 'og:title', 'twitter:title'], webpage),
'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage),
'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage),
}))

View File

@ -12,6 +12,7 @@ from ..utils import (
jwt_decode_hs256, jwt_decode_hs256,
traverse_obj, traverse_obj,
try_call, try_call,
url_basename,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
variadic, variadic,
@ -147,7 +148,7 @@ class WrestleUniverseBaseIE(InfoExtractor):
metadata = self._call_api(video_id, msg='metadata', query={'al': lang or 'ja'}, auth=False, fatal=False) metadata = self._call_api(video_id, msg='metadata', query={'al': lang or 'ja'}, auth=False, fatal=False)
if not metadata: if not metadata:
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
nextjs_data = self._search_nextjs_data(webpage, video_id) nextjs_data = self._search_nextjs_data(webpage, video_id, fatal=False)
metadata = traverse_obj(nextjs_data, ( metadata = traverse_obj(nextjs_data, (
'props', 'pageProps', *variadic(props_keys, (str, bytes, dict, set)), {dict})) or {} 'props', 'pageProps', *variadic(props_keys, (str, bytes, dict, set)), {dict})) or {}
return metadata return metadata
@ -194,8 +195,7 @@ class WrestleUniverseVODIE(WrestleUniverseBaseIE):
return { return {
'id': video_id, 'id': video_id,
'formats': self._get_formats(video_data, ( 'formats': self._get_formats(video_data, ('protocolHls', 'url', {url_or_none}), video_id),
(('protocolHls', 'url'), ('chromecastUrls', ...)), {url_or_none}), video_id),
**traverse_obj(metadata, { **traverse_obj(metadata, {
'title': ('displayName', {str}), 'title': ('displayName', {str}),
'description': ('description', {str}), 'description': ('description', {str}),
@ -259,6 +259,10 @@ class WrestleUniversePPVIE(WrestleUniverseBaseIE):
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
}, {
'note': 'manifest provides live-a (partial) and live-b (full) streams',
'url': 'https://www.wrestle-universe.com/en/lives/umc99R9XsexXrxr9VjTo9g',
'only_matching': True,
}] }]
_API_PATH = 'events' _API_PATH = 'events'
@ -285,12 +289,16 @@ class WrestleUniversePPVIE(WrestleUniverseBaseIE):
video_data, decrypt = self._call_encrypted_api( video_data, decrypt = self._call_encrypted_api(
video_id, ':watchArchive', 'watch archive', data={'method': 1}) video_id, ':watchArchive', 'watch archive', data={'method': 1})
info['formats'] = self._get_formats(video_data, ( # 'chromecastUrls' can be only partial videos, avoid
('hls', None), ('urls', 'chromecastUrls'), ..., {url_or_none}), video_id) info['formats'] = self._get_formats(video_data, ('hls', (('urls', ...), 'url'), {url_or_none}), video_id)
for f in info['formats']: for f in info['formats']:
# bitrates are exaggerated in PPV playlists, so avoid wrong/huge filesize_approx values # bitrates are exaggerated in PPV playlists, so avoid wrong/huge filesize_approx values
if f.get('tbr'): if f.get('tbr'):
f['tbr'] = int(f['tbr'] / 2.5) f['tbr'] = int(f['tbr'] / 2.5)
# prefer variants with the same basename as the master playlist to avoid partial streams
f['format_id'] = url_basename(f['url']).partition('.')[0]
if not f['format_id'].startswith(url_basename(f['manifest_url']).partition('.')[0]):
f['preference'] = -10
hls_aes_key = traverse_obj(video_data, ('hls', 'key', {decrypt})) hls_aes_key = traverse_obj(video_data, ('hls', 'key', {decrypt}))
if hls_aes_key: if hls_aes_key:

View File

@ -2,6 +2,7 @@ from __future__ import annotations
import contextlib import contextlib
import functools import functools
import os
import socket import socket
import ssl import ssl
import sys import sys
@ -121,6 +122,9 @@ def make_ssl_context(
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
context.check_hostname = verify context.check_hostname = verify
context.verify_mode = ssl.CERT_REQUIRED if verify else ssl.CERT_NONE context.verify_mode = ssl.CERT_REQUIRED if verify else ssl.CERT_NONE
# OpenSSL 1.1.1+ Python 3.8+ keylog file
if hasattr(context, 'keylog_filename'):
context.keylog_filename = os.environ.get('SSLKEYLOGFILE') or None
# Some servers may reject requests if ALPN extension is not sent. See: # Some servers may reject requests if ALPN extension is not sent. See:
# https://github.com/python/cpython/issues/85140 # https://github.com/python/cpython/issues/85140

View File

@ -691,6 +691,10 @@ def create_parser():
'--break-on-existing', '--break-on-existing',
action='store_true', dest='break_on_existing', default=False, action='store_true', dest='break_on_existing', default=False,
help='Stop the download process when encountering a file that is in the archive') help='Stop the download process when encountering a file that is in the archive')
selection.add_option(
'--no-break-on-existing',
action='store_false', dest='break_on_existing',
help='Do not stop the download process when encountering a file that is in the archive (default)')
selection.add_option( selection.add_option(
'--break-on-reject', '--break-on-reject',
action='store_true', dest='break_on_reject', default=False, action='store_true', dest='break_on_reject', default=False,
@ -1254,6 +1258,10 @@ def create_parser():
'the progress attributes are accessible under "progress" key. E.g. ' 'the progress attributes are accessible under "progress" key. E.g. '
# TODO: Document the fields inside "progress" # TODO: Document the fields inside "progress"
'--console-title --progress-template "download-title:%(info.id)s-%(progress.eta)s"')) '--console-title --progress-template "download-title:%(info.id)s-%(progress.eta)s"'))
verbosity.add_option(
'--progress-delta',
metavar='SECONDS', action='store', dest='progress_delta', type=float, default=0,
help='Time between progress output (default: 0)')
verbosity.add_option( verbosity.add_option(
'-v', '--verbose', '-v', '--verbose',
action='store_true', dest='verbose', default=False, action='store_true', dest='verbose', default=False,

View File

@ -69,6 +69,10 @@ def _get_variant_and_executable_path():
# Ref: https://en.wikipedia.org/wiki/Uname#Examples # Ref: https://en.wikipedia.org/wiki/Uname#Examples
if machine[1:] in ('x86', 'x86_64', 'amd64', 'i386', 'i686'): if machine[1:] in ('x86', 'x86_64', 'amd64', 'i386', 'i686'):
machine = '_x86' if platform.architecture()[0][:2] == '32' else '' machine = '_x86' if platform.architecture()[0][:2] == '32' else ''
# sys.executable returns a /tmp/ path for staticx builds (linux_static)
# Ref: https://staticx.readthedocs.io/en/latest/usage.html#run-time-information
if static_exe_path := os.getenv('STATICX_PROG_PATH'):
path = static_exe_path
return f'{remove_end(sys.platform, "32")}{machine}_exe', path return f'{remove_end(sys.platform, "32")}{machine}_exe', path
path = os.path.dirname(__file__) path = os.path.dirname(__file__)
@ -114,7 +118,7 @@ _NON_UPDATEABLE_REASONS = {
**{variant: f'Auto-update is not supported for unpackaged {name} executable; Re-download the latest release' **{variant: f'Auto-update is not supported for unpackaged {name} executable; Re-download the latest release'
for variant, name in {'win32_dir': 'Windows', 'darwin_dir': 'MacOS', 'linux_dir': 'Linux'}.items()}, for variant, name in {'win32_dir': 'Windows', 'darwin_dir': 'MacOS', 'linux_dir': 'Linux'}.items()},
'source': 'You cannot update when running from source code; Use git to pull the latest changes', 'source': 'You cannot update when running from source code; Use git to pull the latest changes',
'unknown': 'You installed yt-dlp with a package manager or setup.py; Use that to update', 'unknown': 'You installed yt-dlp from a manual build or with a package manager; Use that to update',
'other': 'You are using an unofficial build of yt-dlp; Build the executable again', 'other': 'You are using an unofficial build of yt-dlp; Build the executable again',
} }

View File

@ -50,7 +50,6 @@ from ..compat import (
compat_expanduser, compat_expanduser,
compat_HTMLParseError, compat_HTMLParseError,
compat_os_name, compat_os_name,
compat_shlex_quote,
) )
from ..dependencies import xattr from ..dependencies import xattr
@ -836,9 +835,11 @@ class Popen(subprocess.Popen):
if shell and compat_os_name == 'nt' and kwargs.get('executable') is None: if shell and compat_os_name == 'nt' and kwargs.get('executable') is None:
if not isinstance(args, str): if not isinstance(args, str):
args = ' '.join(compat_shlex_quote(a) for a in args) args = shell_quote(args, shell=True)
shell = False shell = False
args = f'{self.__comspec()} /Q /S /D /V:OFF /C "{args}"' # Set variable for `cmd.exe` newline escaping (see `utils.shell_quote`)
env['='] = '"^\n\n"'
args = f'{self.__comspec()} /Q /S /D /V:OFF /E:ON /C "{args}"'
super().__init__(args, *remaining, env=env, shell=shell, **kwargs, startupinfo=self._startupinfo) super().__init__(args, *remaining, env=env, shell=shell, **kwargs, startupinfo=self._startupinfo)
@ -1637,15 +1638,31 @@ def get_filesystem_encoding():
return encoding if encoding is not None else 'utf-8' return encoding if encoding is not None else 'utf-8'
def shell_quote(args): _WINDOWS_QUOTE_TRANS = str.maketrans({'"': R'\"'})
quoted_args = [] _CMD_QUOTE_TRANS = str.maketrans({
encoding = get_filesystem_encoding() # Keep quotes balanced by replacing them with `""` instead of `\\"`
for a in args: '"': '""',
if isinstance(a, bytes): # These require an env-variable `=` containing `"^\n\n"` (set in `utils.Popen`)
# We may get a filename encoded with 'encodeFilename' # `=` should be unique since variables containing `=` cannot be set using cmd
a = a.decode(encoding) '\n': '%=%',
quoted_args.append(compat_shlex_quote(a)) '\r': '%=%',
return ' '.join(quoted_args) # Use zero length variable replacement so `%` doesn't get expanded
# `cd` is always set as long as extensions are enabled (`/E:ON` in `utils.Popen`)
'%': '%%cd:~,%',
})
def shell_quote(args, *, shell=False):
args = list(variadic(args))
if compat_os_name != 'nt':
return shlex.join(args)
trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS
return ' '.join(
s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII)
else re.sub(r'(\\+)("|$)', r'\1\1\2', s).translate(trans).join('""')
for s in args)
def smuggle_url(url, data): def smuggle_url(url, data):
@ -2849,7 +2866,7 @@ def ytdl_is_updateable():
def args_to_str(args): def args_to_str(args):
# Get a short string representation for a subprocess command # Get a short string representation for a subprocess command
return ' '.join(compat_shlex_quote(a) for a in args) return shell_quote(args)
def error_to_str(err): def error_to_str(err):

View File

@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py # Autogenerated by devscripts/update-version.py
__version__ = '2024.03.10' __version__ = '2024.04.09'
RELEASE_GIT_HEAD = '615a84447e8322720be77a0e64298d7f42848693' RELEASE_GIT_HEAD = 'ff07792676f404ffff6ee61b5638c9dc1a33a37a'
VARIANT = None VARIANT = None
@ -12,4 +12,4 @@ CHANNEL = 'stable'
ORIGIN = 'yt-dlp/yt-dlp' ORIGIN = 'yt-dlp/yt-dlp'
_pkg_version = '2024.03.10' _pkg_version = '2024.04.09'