1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-07-01 19:08:32 +00:00

Merge branch 'master' into jsi

This commit is contained in:
c-basalt 2024-12-26 20:05:10 -05:00
commit a9de401ccd
173 changed files with 3832 additions and 2527 deletions

View File

@ -63,14 +63,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View File

@ -75,14 +75,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View File

@ -71,14 +71,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View File

@ -56,14 +56,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View File

@ -52,14 +52,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View File

@ -58,14 +58,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View File

@ -411,7 +411,7 @@ jobs:
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
python devscripts/install_deps.py -o --include build python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py --include curl-cffi python devscripts/install_deps.py --include curl-cffi
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.10.0-py3-none-any.whl" python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.11.1-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |
@ -460,7 +460,7 @@ jobs:
run: | run: |
python devscripts/install_deps.py -o --include build python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py python devscripts/install_deps.py
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.10.0-py3-none-any.whl" python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.11.1-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |
@ -504,7 +504,8 @@ jobs:
- windows32 - windows32
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/download-artifact@v4 - name: Download artifacts
uses: actions/download-artifact@v4
with: with:
path: artifact path: artifact
pattern: build-bin-* pattern: build-bin-*

View File

@ -28,3 +28,20 @@ jobs:
actions: write # For cleaning up cache actions: write # For cleaning up cache
id-token: write # mandatory for trusted publishing id-token: write # mandatory for trusted publishing
secrets: inherit secrets: inherit
publish_pypi:
needs: [release]
if: vars.MASTER_PYPI_PROJECT != ''
runs-on: ubuntu-latest
permissions:
id-token: write # mandatory for trusted publishing
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: dist
name: build-pypi
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true

View File

@ -41,3 +41,20 @@ jobs:
actions: write # For cleaning up cache actions: write # For cleaning up cache
id-token: write # mandatory for trusted publishing id-token: write # mandatory for trusted publishing
secrets: inherit secrets: inherit
publish_pypi:
needs: [release]
if: vars.NIGHTLY_PYPI_PROJECT != ''
runs-on: ubuntu-latest
permissions:
id-token: write # mandatory for trusted publishing
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: dist
name: build-pypi
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true

View File

@ -2,10 +2,6 @@ name: Release
on: on:
workflow_call: workflow_call:
inputs: inputs:
prerelease:
required: false
default: true
type: boolean
source: source:
required: false required: false
default: '' default: ''
@ -18,6 +14,10 @@ on:
required: false required: false
default: '' default: ''
type: string type: string
prerelease:
required: false
default: true
type: boolean
workflow_dispatch: workflow_dispatch:
inputs: inputs:
source: source:
@ -278,11 +278,20 @@ jobs:
make clean-cache make clean-cache
python -m build --no-isolation . python -m build --no-isolation .
- name: Upload artifacts
if: github.event_name != 'workflow_dispatch'
uses: actions/upload-artifact@v4
with:
name: build-pypi
path: |
dist/*
compression-level: 0
- name: Publish to PyPI - name: Publish to PyPI
if: github.event_name == 'workflow_dispatch'
uses: pypa/gh-action-pypi-publish@release/v1 uses: pypa/gh-action-pypi-publish@release/v1
with: with:
verbose: true verbose: true
attestations: false # Currently doesn't work w/ reusable workflows (breaks nightly)
publish: publish:
needs: [prepare, build] needs: [prepare, build]

View File

@ -688,3 +688,28 @@ KarboniteKream
mikkovedru mikkovedru
pktiuk pktiuk
rubyevadestaxes rubyevadestaxes
avagordon01
CounterPillow
JoseAngelB
KBelmin
kesor
MellowKyler
Wesley107772
a13ssandr0
ChocoLZS
doe1080
hugovdev
jshumphrey
julionc
manavchaudhary1
powergold1
Sakura286
SamDecrock
stratus-ss
subrat-lima
gitninja1234
jkruse
xiaomac
wesson09
Crypto90
MutantPiggieGolem1

View File

@ -4,6 +4,199 @@ # Changelog
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
--> -->
### 2024.12.23
#### Core changes
- [Don't sanitize filename on Unix when `--no-windows-filenames`](https://github.com/yt-dlp/yt-dlp/commit/6fc85f617a5850307fd5b258477070e6ee177796) ([#9591](https://github.com/yt-dlp/yt-dlp/issues/9591)) by [pukkandan](https://github.com/pukkandan)
- **update**
- [Check 64-bitness when upgrading ARM builds](https://github.com/yt-dlp/yt-dlp/commit/b91c3925c2059970daa801cb131c0c2f4f302e72) ([#11819](https://github.com/yt-dlp/yt-dlp/issues/11819)) by [bashonly](https://github.com/bashonly)
- [Fix endless update loop for `linux_exe` builds](https://github.com/yt-dlp/yt-dlp/commit/3d3ee458c1fe49dd5ebd7651a092119d23eb7000) ([#11827](https://github.com/yt-dlp/yt-dlp/issues/11827)) by [bashonly](https://github.com/bashonly)
#### Extractor changes
- **soundcloud**: [Various fixes](https://github.com/yt-dlp/yt-dlp/commit/d298693b1b266d198e8eeecb90ea17c4a031268f) ([#11820](https://github.com/yt-dlp/yt-dlp/issues/11820)) by [bashonly](https://github.com/bashonly)
- **youtube**
- [Add age-gate workaround for some embeddable videos](https://github.com/yt-dlp/yt-dlp/commit/09a6c687126f04e243fcb105a828787efddd1030) ([#11821](https://github.com/yt-dlp/yt-dlp/issues/11821)) by [bashonly](https://github.com/bashonly)
- [Fix `uploader_id` extraction](https://github.com/yt-dlp/yt-dlp/commit/1a8851b689763e5173b96f70f8a71df0e4a44b66) ([#11818](https://github.com/yt-dlp/yt-dlp/issues/11818)) by [bashonly](https://github.com/bashonly)
- [Player client maintenance](https://github.com/yt-dlp/yt-dlp/commit/65cf46cddd873fd229dbb0fc0689bca4c201c6b6) ([#11893](https://github.com/yt-dlp/yt-dlp/issues/11893)) by [bashonly](https://github.com/bashonly)
- [Skip iOS formats that require PO Token](https://github.com/yt-dlp/yt-dlp/commit/9f42e68a74f3f00b0253fe70763abd57cac4237b) ([#11890](https://github.com/yt-dlp/yt-dlp/issues/11890)) by [coletdjnz](https://github.com/coletdjnz)
### 2024.12.13
#### Extractor changes
- **patreon**: campaign: [Support /c/ URLs](https://github.com/yt-dlp/yt-dlp/commit/bc262bcad4d3683ceadf61a7eb87e233e72adef3) ([#11756](https://github.com/yt-dlp/yt-dlp/issues/11756)) by [bashonly](https://github.com/bashonly)
- **soundcloud**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/f4d3e9e6dc25077b79849a31a2f67f93fdc01e62) ([#11777](https://github.com/yt-dlp/yt-dlp/issues/11777)) by [bashonly](https://github.com/bashonly)
- **youtube**
- [Fix `release_date` extraction](https://github.com/yt-dlp/yt-dlp/commit/d5e2a379f2adcb28bc48c7d9e90716d7278f89d2) ([#11759](https://github.com/yt-dlp/yt-dlp/issues/11759)) by [MutantPiggieGolem1](https://github.com/MutantPiggieGolem1)
- [Fix signature function extraction for `2f1832d2`](https://github.com/yt-dlp/yt-dlp/commit/5460cd91891bf613a2065e2fc278d9903c37a127) ([#11801](https://github.com/yt-dlp/yt-dlp/issues/11801)) by [bashonly](https://github.com/bashonly)
- [Prioritize original language over auto-dubbed audio](https://github.com/yt-dlp/yt-dlp/commit/dc3c4fddcc653989dae71fc563d82a308fc898cc) ([#11803](https://github.com/yt-dlp/yt-dlp/issues/11803)) by [bashonly](https://github.com/bashonly)
- search_url: [Fix playlist searches](https://github.com/yt-dlp/yt-dlp/commit/f6c73aad5f1a67544bea137ebd9d1e22e0e56567) ([#11782](https://github.com/yt-dlp/yt-dlp/issues/11782)) by [Crypto90](https://github.com/Crypto90)
#### Misc. changes
- **cleanup**: [Make more playlist entries lazy](https://github.com/yt-dlp/yt-dlp/commit/54216696261bc07cacd9a837c501d9e0b7fed09e) ([#11763](https://github.com/yt-dlp/yt-dlp/issues/11763)) by [seproDev](https://github.com/seproDev)
### 2024.12.06
#### Core changes
- **cookies**: [Add `--cookies-from-browser` support for MS Store Firefox](https://github.com/yt-dlp/yt-dlp/commit/354cb4026cf2191e1a130ec2a627b95cabfbc60a) ([#11731](https://github.com/yt-dlp/yt-dlp/issues/11731)) by [wesson09](https://github.com/wesson09)
#### Extractor changes
- **bilibili**: [Fix HD formats extraction](https://github.com/yt-dlp/yt-dlp/commit/fca3eb5f8be08d5fab2e18b45b7281a12e566725) ([#11734](https://github.com/yt-dlp/yt-dlp/issues/11734)) by [grqz](https://github.com/grqz)
- **soundcloud**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/2feb28028ee48f2185d2d95076e62accb09b9e2e) ([#11742](https://github.com/yt-dlp/yt-dlp/issues/11742)) by [bashonly](https://github.com/bashonly)
- **youtube**
- [Fix `n` sig extraction for player `3bb1f723`](https://github.com/yt-dlp/yt-dlp/commit/a95ee6d8803fca9157adecf63732ab58bf87fd88) ([#11750](https://github.com/yt-dlp/yt-dlp/issues/11750)) by [bashonly](https://github.com/bashonly) (With fixes in [4bd2655](https://github.com/yt-dlp/yt-dlp/commit/4bd2655398aed450456197a6767639114a24eac2))
- [Fix signature function extraction](https://github.com/yt-dlp/yt-dlp/commit/4c85ccd1366c88cf93982f8350f58eed17355981) ([#11751](https://github.com/yt-dlp/yt-dlp/issues/11751)) by [bashonly](https://github.com/bashonly)
- [Player client maintenance](https://github.com/yt-dlp/yt-dlp/commit/2e49c789d3eebc39af8910705d65a98bca0e4c4f) ([#11724](https://github.com/yt-dlp/yt-dlp/issues/11724)) by [bashonly](https://github.com/bashonly)
### 2024.12.03
#### Core changes
- [Add `playlist_webpage_url` field](https://github.com/yt-dlp/yt-dlp/commit/7d6c259a03bc4707a319e5e8c6eff0278707874b) ([#11613](https://github.com/yt-dlp/yt-dlp/issues/11613)) by [seproDev](https://github.com/seproDev)
#### Extractor changes
- [Handle fragmented formats in `_remove_duplicate_formats`](https://github.com/yt-dlp/yt-dlp/commit/e0500cbf796323551bbabe5b8ed8c75a511ba47a) ([#11637](https://github.com/yt-dlp/yt-dlp/issues/11637)) by [Grub4K](https://github.com/Grub4K)
- **bilibili**
- [Always try to extract HD formats](https://github.com/yt-dlp/yt-dlp/commit/dc1687648077c5bf64863b307ecc5ab7e029bd8d) ([#10559](https://github.com/yt-dlp/yt-dlp/issues/10559)) by [grqz](https://github.com/grqz)
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/239f5f36fe04603bec59c8b975f6a792f10246db) ([#11667](https://github.com/yt-dlp/yt-dlp/issues/11667)) by [grqz](https://github.com/grqz) (With fixes in [f05a1cd](https://github.com/yt-dlp/yt-dlp/commit/f05a1cd1492fc98dc8d80d2081d632a1879913d2) by [bashonly](https://github.com/bashonly), [grqz](https://github.com/grqz))
- [Fix subtitles and chapters extraction](https://github.com/yt-dlp/yt-dlp/commit/a13a336aa6f906812701abec8101b73b73db8ff7) ([#11708](https://github.com/yt-dlp/yt-dlp/issues/11708)) by [xiaomac](https://github.com/xiaomac)
- **chaturbate**: [Fix support for non-public streams](https://github.com/yt-dlp/yt-dlp/commit/4b5eec0aaa7c02627f27a386591b735b90e681a8) ([#11624](https://github.com/yt-dlp/yt-dlp/issues/11624)) by [jkruse](https://github.com/jkruse)
- **dacast**: [Fix HLS AES formats extraction](https://github.com/yt-dlp/yt-dlp/commit/0a0d80800b9350d1a4c4b18d82cfb77ffbc3c507) ([#11644](https://github.com/yt-dlp/yt-dlp/issues/11644)) by [bashonly](https://github.com/bashonly)
- **dropbox**: [Fix password-protected video extraction](https://github.com/yt-dlp/yt-dlp/commit/00dcde728635633eee969ad4d498b9f233c4a94e) ([#11636](https://github.com/yt-dlp/yt-dlp/issues/11636)) by [bashonly](https://github.com/bashonly)
- **duoplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/62cba8a1bedbfc0ddde7267ae57b72bf5f7ea7b1) ([#11588](https://github.com/yt-dlp/yt-dlp/issues/11588)) by [bashonly](https://github.com/bashonly), [glensc](https://github.com/glensc)
- **facebook**: [Support more groups URLs](https://github.com/yt-dlp/yt-dlp/commit/e0f1ae813b36e783e2348ba2a1566e12f5cd8f6e) ([#11576](https://github.com/yt-dlp/yt-dlp/issues/11576)) by [grqz](https://github.com/grqz)
- **instagram**: [Support `share` URLs](https://github.com/yt-dlp/yt-dlp/commit/360aed810ad85db950df586282d256516c98cd2d) ([#11677](https://github.com/yt-dlp/yt-dlp/issues/11677)) by [grqz](https://github.com/grqz)
- **microsoftembed**: [Make format extraction non fatal](https://github.com/yt-dlp/yt-dlp/commit/2bea7936323ca4b6f3b9b1fdd892566223e30efa) ([#11654](https://github.com/yt-dlp/yt-dlp/issues/11654)) by [seproDev](https://github.com/seproDev)
- **mitele**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/cd0f934604587ed793e9177f6a127e5dcf99a7dd) ([#11683](https://github.com/yt-dlp/yt-dlp/issues/11683)) by [DarkZeros](https://github.com/DarkZeros)
- **stripchat**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/16336c51d0848a6868a4fa04e749fa03548b4913) ([#11596](https://github.com/yt-dlp/yt-dlp/issues/11596)) by [gitninja1234](https://github.com/gitninja1234)
- **tiktok**: [Deprioritize animated thumbnails](https://github.com/yt-dlp/yt-dlp/commit/910ecc422930bca14e2abe4986f5f92359e3cea8) ([#11645](https://github.com/yt-dlp/yt-dlp/issues/11645)) by [bashonly](https://github.com/bashonly)
- **vk**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/c038a7b187ba24360f14134842a7a2cf897c33b1) ([#11715](https://github.com/yt-dlp/yt-dlp/issues/11715)) by [bashonly](https://github.com/bashonly)
- **youtube**
- [Adjust player clients for site changes](https://github.com/yt-dlp/yt-dlp/commit/0d146c1e36f467af30e87b7af651bdee67b73500) ([#11663](https://github.com/yt-dlp/yt-dlp/issues/11663)) by [bashonly](https://github.com/bashonly)
- tab: [Fix playlists tab extraction](https://github.com/yt-dlp/yt-dlp/commit/fe70f20aedf528fdee332131bc9b6710e54e6f10) ([#11615](https://github.com/yt-dlp/yt-dlp/issues/11615)) by [seproDev](https://github.com/seproDev)
#### Networking changes
- **Request Handler**: websockets: [Support websockets 14.0+](https://github.com/yt-dlp/yt-dlp/commit/c7316373c0a886f65a07a51e50ee147bb3294c85) ([#11616](https://github.com/yt-dlp/yt-dlp/issues/11616)) by [coletdjnz](https://github.com/coletdjnz)
#### Misc. changes
- **cleanup**
- [Bump ruff to 0.8.x](https://github.com/yt-dlp/yt-dlp/commit/d8fb3490863653182864d2a53522f350d67a9ff8) ([#11608](https://github.com/yt-dlp/yt-dlp/issues/11608)) by [seproDev](https://github.com/seproDev)
- Miscellaneous
- [ccf0a6b](https://github.com/yt-dlp/yt-dlp/commit/ccf0a6b86b7f68a75463804fe485ec240b8635f0) by [bashonly](https://github.com/bashonly), [pzhlkj6612](https://github.com/pzhlkj6612)
- [2b67ac3](https://github.com/yt-dlp/yt-dlp/commit/2b67ac300ac8b44368fb121637d1743cea8c5b6b) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
### 2024.11.18
#### Important changes
- **Login with OAuth is no longer supported for YouTube**
Due to a change made by the site, yt-dlp is no longer able to support OAuth login for YouTube. [Read more](https://github.com/yt-dlp/yt-dlp/issues/11462#issuecomment-2471703090)
#### Core changes
- [Catch broken Cryptodome installations](https://github.com/yt-dlp/yt-dlp/commit/b83ca24eb72e1e558b0185bd73975586c0bc0546) ([#11486](https://github.com/yt-dlp/yt-dlp/issues/11486)) by [seproDev](https://github.com/seproDev)
- **utils**
- [Fix `join_nonempty`, add `**kwargs` to `unpack`](https://github.com/yt-dlp/yt-dlp/commit/39d79c9b9cf23411d935910685c40aa1a2fdb409) ([#11559](https://github.com/yt-dlp/yt-dlp/issues/11559)) by [Grub4K](https://github.com/Grub4K)
- `subs_list_to_dict`: [Add `lang` default parameter](https://github.com/yt-dlp/yt-dlp/commit/c014fbcddcb4c8f79d914ac5bb526758b540ea33) ([#11508](https://github.com/yt-dlp/yt-dlp/issues/11508)) by [Grub4K](https://github.com/Grub4K)
#### Extractor changes
- [Allow `ext` override for thumbnails](https://github.com/yt-dlp/yt-dlp/commit/eb64ae7d5def6df2aba74fb703e7f168fb299865) ([#11545](https://github.com/yt-dlp/yt-dlp/issues/11545)) by [bashonly](https://github.com/bashonly)
- **adobepass**: [Fix provider requests](https://github.com/yt-dlp/yt-dlp/commit/85fdc66b6e01d19a94b4f39b58e3c0cf23600902) ([#11472](https://github.com/yt-dlp/yt-dlp/issues/11472)) by [bashonly](https://github.com/bashonly)
- **archive.org**: [Fix comments extraction](https://github.com/yt-dlp/yt-dlp/commit/f2a4983df7a64c4e93b56f79dbd16a781bd90206) ([#11527](https://github.com/yt-dlp/yt-dlp/issues/11527)) by [jshumphrey](https://github.com/jshumphrey)
- **bandlab**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/6365e92589e4bc17b8fffb0125a716d144ad2137) ([#11535](https://github.com/yt-dlp/yt-dlp/issues/11535)) by [seproDev](https://github.com/seproDev)
- **chaturbate**
- [Extract from API and support impersonation](https://github.com/yt-dlp/yt-dlp/commit/720b3dc453c342bc2e8df7dbc0acaab4479de46c) ([#11555](https://github.com/yt-dlp/yt-dlp/issues/11555)) by [powergold1](https://github.com/powergold1) (With fixes in [7cecd29](https://github.com/yt-dlp/yt-dlp/commit/7cecd299e4a5ef1f0f044b2fedc26f17e41f15e3) by [seproDev](https://github.com/seproDev))
- [Support alternate domains](https://github.com/yt-dlp/yt-dlp/commit/a9f85670d03ab993dc589f21a9ffffcad61392d5) ([#10595](https://github.com/yt-dlp/yt-dlp/issues/10595)) by [manavchaudhary1](https://github.com/manavchaudhary1)
- **cloudflarestream**: [Avoid extraction via videodelivery.net](https://github.com/yt-dlp/yt-dlp/commit/2db8c2e7d57a1784b06057c48e3e91023720d195) ([#11478](https://github.com/yt-dlp/yt-dlp/issues/11478)) by [hugovdev](https://github.com/hugovdev)
- **ctvnews**
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f351440f1dc5b3dfbfc5737b037a869d946056fe) ([#11534](https://github.com/yt-dlp/yt-dlp/issues/11534)) by [bashonly](https://github.com/bashonly), [jshumphrey](https://github.com/jshumphrey)
- [Fix playlist ID extraction](https://github.com/yt-dlp/yt-dlp/commit/f9d98509a898737c12977b2e2117277bada2c196) ([#8892](https://github.com/yt-dlp/yt-dlp/issues/8892)) by [qbnu](https://github.com/qbnu)
- **digitalconcerthall**: [Support login with access/refresh tokens](https://github.com/yt-dlp/yt-dlp/commit/f7257588bdff5f0b0452635a66b253a783c97357) ([#11571](https://github.com/yt-dlp/yt-dlp/issues/11571)) by [bashonly](https://github.com/bashonly)
- **facebook**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/bacc31b05a04181b63100c481565256b14813a5e) ([#11513](https://github.com/yt-dlp/yt-dlp/issues/11513)) by [bashonly](https://github.com/bashonly)
- **gamedevtv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/be3579aaf0c3b71a0a3195e1955415d5e4d6b3d8) ([#11368](https://github.com/yt-dlp/yt-dlp/issues/11368)) by [bashonly](https://github.com/bashonly), [stratus-ss](https://github.com/stratus-ss)
- **goplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6b43a8d84b881d769b480ba6e20ec691e9d1b92d) ([#11466](https://github.com/yt-dlp/yt-dlp/issues/11466)) by [bashonly](https://github.com/bashonly), [SamDecrock](https://github.com/SamDecrock)
- **kenh14**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/eb15fd5a32d8b35ef515f7a3d1158c03025648ff) ([#3996](https://github.com/yt-dlp/yt-dlp/issues/3996)) by [krichbanana](https://github.com/krichbanana), [pzhlkj6612](https://github.com/pzhlkj6612)
- **litv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/e079ffbda66de150c0a9ebef05e89f61bb4d5f76) ([#11071](https://github.com/yt-dlp/yt-dlp/issues/11071)) by [jiru](https://github.com/jiru)
- **mixchmovie**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/0ec9bfed4d4a52bfb4f8733da1acf0aeeae21e6b) ([#10897](https://github.com/yt-dlp/yt-dlp/issues/10897)) by [Sakura286](https://github.com/Sakura286)
- **patreon**: [Fix comments extraction](https://github.com/yt-dlp/yt-dlp/commit/1d253b0a27110d174c40faf8fb1c999d099e0cde) ([#11530](https://github.com/yt-dlp/yt-dlp/issues/11530)) by [bashonly](https://github.com/bashonly), [jshumphrey](https://github.com/jshumphrey)
- **pialive**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/d867f99622ef7fba690b08da56c39d739b822bb7) ([#10811](https://github.com/yt-dlp/yt-dlp/issues/10811)) by [ChocoLZS](https://github.com/ChocoLZS)
- **radioradicale**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/70c55cb08f780eab687e881ef42bb5c6007d290b) ([#5607](https://github.com/yt-dlp/yt-dlp/issues/5607)) by [a13ssandr0](https://github.com/a13ssandr0), [pzhlkj6612](https://github.com/pzhlkj6612)
- **reddit**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/7ea2787920cccc6b8ea30791993d114fbd564434) ([#11573](https://github.com/yt-dlp/yt-dlp/issues/11573)) by [bashonly](https://github.com/bashonly)
- **redgifsuser**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/d215fba7edb69d4fa665f43663756fd260b1489f) ([#11531](https://github.com/yt-dlp/yt-dlp/issues/11531)) by [jshumphrey](https://github.com/jshumphrey)
- **rutube**: [Rework extractors](https://github.com/yt-dlp/yt-dlp/commit/e398217aae19bb25f91797bfbe8a3243698d7f45) ([#11480](https://github.com/yt-dlp/yt-dlp/issues/11480)) by [seproDev](https://github.com/seproDev)
- **sonylivseries**: [Add `sort_order` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/2009cb27e17014787bf63eaa2ada51293d54f22a) ([#11569](https://github.com/yt-dlp/yt-dlp/issues/11569)) by [bashonly](https://github.com/bashonly)
- **soop**: [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/c699bafc5038b59c9afe8c2e69175fb66424c832) ([#11545](https://github.com/yt-dlp/yt-dlp/issues/11545)) by [bashonly](https://github.com/bashonly)
- **spankbang**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/8388ec256f7753b02488788e3cfa771f6e1db247) ([#11542](https://github.com/yt-dlp/yt-dlp/issues/11542)) by [jshumphrey](https://github.com/jshumphrey)
- **spreaker**
- [Support episode pages and access keys](https://github.com/yt-dlp/yt-dlp/commit/c39016f66df76d14284c705736ca73db8055d8de) ([#11489](https://github.com/yt-dlp/yt-dlp/issues/11489)) by [julionc](https://github.com/julionc)
- [Support podcast and feed pages](https://github.com/yt-dlp/yt-dlp/commit/c6737310619022248f5d0fd13872073cac168453) ([#10968](https://github.com/yt-dlp/yt-dlp/issues/10968)) by [subrat-lima](https://github.com/subrat-lima)
- **youtube**
- [Player client maintenance](https://github.com/yt-dlp/yt-dlp/commit/637d62a3a9fc723d68632c1af25c30acdadeeb85) ([#11528](https://github.com/yt-dlp/yt-dlp/issues/11528)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
- [Remove broken OAuth support](https://github.com/yt-dlp/yt-dlp/commit/52c0ffe40ad6e8404d93296f575007b05b04c686) ([#11558](https://github.com/yt-dlp/yt-dlp/issues/11558)) by [bashonly](https://github.com/bashonly)
- tab: [Fix podcasts tab extraction](https://github.com/yt-dlp/yt-dlp/commit/37cd7660eaff397c551ee18d80507702342b0c2b) ([#11567](https://github.com/yt-dlp/yt-dlp/issues/11567)) by [seproDev](https://github.com/seproDev)
#### Misc. changes
- **build**
- [Bump PyInstaller version pin to `>=6.11.1`](https://github.com/yt-dlp/yt-dlp/commit/f9c8deb4e5887ff5150e911ac0452e645f988044) ([#11507](https://github.com/yt-dlp/yt-dlp/issues/11507)) by [bashonly](https://github.com/bashonly)
- [Enable attestations for trusted publishing](https://github.com/yt-dlp/yt-dlp/commit/f13df591d4d7ca8e2f31b35c9c91e69ba9e9b013) ([#11420](https://github.com/yt-dlp/yt-dlp/issues/11420)) by [bashonly](https://github.com/bashonly)
- [Pin `websockets` version to >=13.0,<14](https://github.com/yt-dlp/yt-dlp/commit/240a7d43c8a67ffb86d44dc276805aa43c358dcc) ([#11488](https://github.com/yt-dlp/yt-dlp/issues/11488)) by [bashonly](https://github.com/bashonly)
- **cleanup**
- [Deprecate more compat functions](https://github.com/yt-dlp/yt-dlp/commit/f95a92b3d0169a784ee15a138fbe09d82b2754a1) ([#11439](https://github.com/yt-dlp/yt-dlp/issues/11439)) by [seproDev](https://github.com/seproDev)
- [Remove dead extractors](https://github.com/yt-dlp/yt-dlp/commit/10fc719bc7f1eef469389c5219102266ef411f29) ([#11566](https://github.com/yt-dlp/yt-dlp/issues/11566)) by [doe1080](https://github.com/doe1080)
- Miscellaneous: [da252d9](https://github.com/yt-dlp/yt-dlp/commit/da252d9d322af3e2178ac5eae324809502a0a862) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev)
### 2024.11.04
#### Important changes
- **Beginning with this release, yt-dlp's Python dependencies *must* be installed using the `default` group**
If you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255)
- **The minimum *required* Python version has been raised to 3.9**
Python 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)
#### Core changes
- [Allow thumbnails with `.jpe` extension](https://github.com/yt-dlp/yt-dlp/commit/5bc5fb2835ea59bdf326bd12176d74d2c7348a95) ([#11408](https://github.com/yt-dlp/yt-dlp/issues/11408)) by [bashonly](https://github.com/bashonly)
- [Expand paths in `--plugin-dirs`](https://github.com/yt-dlp/yt-dlp/commit/914af9a0cf51c9a3f74aa88d952bee8334c67511) ([#11334](https://github.com/yt-dlp/yt-dlp/issues/11334)) by [bashonly](https://github.com/bashonly)
- [Fix `--netrc` empty string parsing for Python <=3.10](https://github.com/yt-dlp/yt-dlp/commit/88402b714ec124633933737bc156b172a3dec3d6) ([#11414](https://github.com/yt-dlp/yt-dlp/issues/11414)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
- [Populate format sorting fields before dependent fields](https://github.com/yt-dlp/yt-dlp/commit/5c880ef42e9c2b2fc412f6d69dad37d34fb75a62) ([#11353](https://github.com/yt-dlp/yt-dlp/issues/11353)) by [Grub4K](https://github.com/Grub4K)
- [Prioritize AV1](https://github.com/yt-dlp/yt-dlp/commit/3945677a75e94a1fecc085432d791e1c21220cd3) ([#11153](https://github.com/yt-dlp/yt-dlp/issues/11153)) by [seproDev](https://github.com/seproDev)
- [Remove Python 3.8 support](https://github.com/yt-dlp/yt-dlp/commit/d784464399b600ba9516bbcec6286f11d68974dd) ([#11321](https://github.com/yt-dlp/yt-dlp/issues/11321)) by [bashonly](https://github.com/bashonly)
- **aes**: [Fix GCM pad length calculation](https://github.com/yt-dlp/yt-dlp/commit/beae2db127d3b5017cbcf685da9de7a9ef496541) ([#11438](https://github.com/yt-dlp/yt-dlp/issues/11438)) by [seproDev](https://github.com/seproDev)
- **cookies**: [Support chrome table version 24](https://github.com/yt-dlp/yt-dlp/commit/4613096f2e6eab9dcbac0e98b6cec760bbc99375) ([#11425](https://github.com/yt-dlp/yt-dlp/issues/11425)) by [kesor](https://github.com/kesor), [seproDev](https://github.com/seproDev)
- **utils**
- [Allow partial application for more functions](https://github.com/yt-dlp/yt-dlp/commit/b6dc2c49e8793c6dfa21275e61caf49ec1148b81) ([#11391](https://github.com/yt-dlp/yt-dlp/issues/11391)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) (With fixes in [422195e](https://github.com/yt-dlp/yt-dlp/commit/422195ec70a00b0d2002b238cacbae7790c57fdf) by [Grub4K](https://github.com/Grub4K))
- [Fix `find_element` by class](https://github.com/yt-dlp/yt-dlp/commit/f93c16395cea1fe9ffc3c594d3e019c3b214544c) ([#11402](https://github.com/yt-dlp/yt-dlp/issues/11402)) by [bashonly](https://github.com/bashonly)
- [Fix and improve `find_element` and `find_elements`](https://github.com/yt-dlp/yt-dlp/commit/b103aca24d35b72b405c340357dc01a0ed534281) ([#11443](https://github.com/yt-dlp/yt-dlp/issues/11443)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
#### Extractor changes
- [Resolve `language` to ISO639-2 for ISM formats](https://github.com/yt-dlp/yt-dlp/commit/21cdcf03a237a0c4979c941d5a5385cae44c7906) ([#11359](https://github.com/yt-dlp/yt-dlp/issues/11359)) by [bashonly](https://github.com/bashonly)
- **ardmediathek**: [Extract chapters](https://github.com/yt-dlp/yt-dlp/commit/59f8dd8239c31f00b708da53b39b1e2e9409b6e6) ([#11442](https://github.com/yt-dlp/yt-dlp/issues/11442)) by [iw0nderhow](https://github.com/iw0nderhow)
- **bfmtv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/754940e9a558565d6bd3c0c529802569b1d0ae4e) ([#11444](https://github.com/yt-dlp/yt-dlp/issues/11444)) by [seproDev](https://github.com/seproDev)
- **bluesky**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5c7a5aaab27e9c3cb367b663a6136ca58866e547) ([#11055](https://github.com/yt-dlp/yt-dlp/issues/11055)) by [MellowKyler](https://github.com/MellowKyler), [seproDev](https://github.com/seproDev)
- **ccma**: [Support new 3cat.cat domain](https://github.com/yt-dlp/yt-dlp/commit/330335386d4f7603d92d6796798375336005275e) ([#11222](https://github.com/yt-dlp/yt-dlp/issues/11222)) by [JoseAngelB](https://github.com/JoseAngelB)
- **chzzk**: video: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/9c6534da81e485b2325b3489ee4128943e6d3e4b) ([#11228](https://github.com/yt-dlp/yt-dlp/issues/11228)) by [hui1601](https://github.com/hui1601)
- **cnn**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9acf79c91a8c6c55ca972747c6858e784e2da351) ([#10185](https://github.com/yt-dlp/yt-dlp/issues/10185)) by [kylegustavo](https://github.com/kylegustavo), [seproDev](https://github.com/seproDev)
- **dailymotion**
- [Improve embed extraction](https://github.com/yt-dlp/yt-dlp/commit/a403dcf9be20b49cbb3017328f4aaa352fb6d685) ([#10843](https://github.com/yt-dlp/yt-dlp/issues/10843)) by [bashonly](https://github.com/bashonly), [pzhlkj6612](https://github.com/pzhlkj6612)
- [Support shortened URLs](https://github.com/yt-dlp/yt-dlp/commit/d1358231371f20fa23020fa9176be3b56119873e) ([#11374](https://github.com/yt-dlp/yt-dlp/issues/11374)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
- **facebook**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/ec9b25043f399de6a591d8370d32bf0e66c117f2) ([#11343](https://github.com/yt-dlp/yt-dlp/issues/11343)) by [kclauhk](https://github.com/kclauhk)
- **generic**: [Do not impersonate by default](https://github.com/yt-dlp/yt-dlp/commit/c29f5a7fae93a08f3cfbb6127b2faa75145b06a0) ([#11336](https://github.com/yt-dlp/yt-dlp/issues/11336)) by [bashonly](https://github.com/bashonly)
- **nfl**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/838f4385de8300a4dd4e7ffbbf0e5b7b85fb52c2) ([#11409](https://github.com/yt-dlp/yt-dlp/issues/11409)) by [bashonly](https://github.com/bashonly)
- **niconicouser**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6abef74232c0fc695cd803c18ae446cacb129389) ([#11324](https://github.com/yt-dlp/yt-dlp/issues/11324)) by [Wesley107772](https://github.com/Wesley107772)
- **soundcloud**: [Extract artists](https://github.com/yt-dlp/yt-dlp/commit/f101e5d34c97c608156ad5396714c2a2edca966a) ([#11377](https://github.com/yt-dlp/yt-dlp/issues/11377)) by [seproDev](https://github.com/seproDev)
- **tumblr**: [Support more URLs](https://github.com/yt-dlp/yt-dlp/commit/b03267bf0675eeb8df5baf1daac7cf67840c91a5) ([#6057](https://github.com/yt-dlp/yt-dlp/issues/6057)) by [selfisekai](https://github.com/selfisekai), [seproDev](https://github.com/seproDev)
- **twitter**: [Remove cookies migration workaround](https://github.com/yt-dlp/yt-dlp/commit/76802f461332d444e596437c42374fa237fa5174) ([#11392](https://github.com/yt-dlp/yt-dlp/issues/11392)) by [bashonly](https://github.com/bashonly)
- **vimeo**: [Fix API retries](https://github.com/yt-dlp/yt-dlp/commit/57212a5f97ce367590aaa5c3e9a135eead8f81f7) ([#11351](https://github.com/yt-dlp/yt-dlp/issues/11351)) by [bashonly](https://github.com/bashonly)
- **yle_areena**: [Support live events](https://github.com/yt-dlp/yt-dlp/commit/a6783a3b9905e547f6c1d4df9d7c7999feda8afa) ([#11358](https://github.com/yt-dlp/yt-dlp/issues/11358)) by [bashonly](https://github.com/bashonly), [CounterPillow](https://github.com/CounterPillow)
- **youtube**: [Adjust OAuth refresh token handling](https://github.com/yt-dlp/yt-dlp/commit/d569a8845254d90ce13ad74ae76695e8d6441068) ([#11414](https://github.com/yt-dlp/yt-dlp/issues/11414)) by [bashonly](https://github.com/bashonly)
#### Misc. changes
- **build**
- [Disable attestations for trusted publishing](https://github.com/yt-dlp/yt-dlp/commit/428ffb75aa3534b275cf54de42693a4d261519da) ([#11418](https://github.com/yt-dlp/yt-dlp/issues/11418)) by [bashonly](https://github.com/bashonly)
- [Move optional dependencies to the `default` group](https://github.com/yt-dlp/yt-dlp/commit/87884f15580910e4e0fe0e1db73508debc657471) ([#11255](https://github.com/yt-dlp/yt-dlp/issues/11255)) by [bashonly](https://github.com/bashonly)
- [Use Ubuntu 20.04 and Python 3.9 for Linux ARM builds](https://github.com/yt-dlp/yt-dlp/commit/dd2e24446954246a2ec4d4a7e95531f52a14b351) ([#8638](https://github.com/yt-dlp/yt-dlp/issues/8638)) by [bashonly](https://github.com/bashonly)
- **cleanup**
- Miscellaneous
- [ea9e35d](https://github.com/yt-dlp/yt-dlp/commit/ea9e35d85fba5eab341cdcaf1eaed69b57f7e465) by [bashonly](https://github.com/bashonly)
- [c998238](https://github.com/yt-dlp/yt-dlp/commit/c998238c2e76c62d1d29962c6e8ebe916cc7913b) by [bashonly](https://github.com/bashonly), [KBelmin](https://github.com/KBelmin)
- [197d0b0](https://github.com/yt-dlp/yt-dlp/commit/197d0b03b6a3c8fe4fa5ace630eeffec629bf72c) by [avagordon01](https://github.com/avagordon01), [bashonly](https://github.com/bashonly), [grqz](https://github.com/grqz), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev)
- **devscripts**: `make_changelog`: [Parse full commit message for fixes](https://github.com/yt-dlp/yt-dlp/commit/0a3991edae0e10f2ea41ece9fdea5e48f789f1de) ([#11366](https://github.com/yt-dlp/yt-dlp/issues/11366)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
### 2024.10.22 ### 2024.10.22
#### Important changes #### Important changes

View File

@ -342,8 +342,9 @@ ## General Options:
extractor plugins; postprocessor plugins can extractor plugins; postprocessor plugins can
only be loaded from the default plugin only be loaded from the default plugin
directories directories
--flat-playlist Do not extract the videos of a playlist, --flat-playlist Do not extract a playlist's URL result
only list them entries; some entry metadata may be missing
and downloading may be bypassed
--no-flat-playlist Fully extract the videos of a playlist --no-flat-playlist Fully extract the videos of a playlist
(default) (default)
--live-from-start Download livestreams from the start. --live-from-start Download livestreams from the start.
@ -479,7 +480,8 @@ ## Video Selection:
--no-download-archive Do not use archive file (default) --no-download-archive Do not use archive file (default)
--max-downloads NUMBER Abort after downloading NUMBER files --max-downloads NUMBER Abort after downloading NUMBER files
--break-on-existing Stop the download process when encountering --break-on-existing Stop the download process when encountering
a file that is in the archive a file that is in the archive supplied with
the --download-archive option
--no-break-on-existing Do not stop the download process when --no-break-on-existing Do not stop the download process when
encountering a file that is in the archive encountering a file that is in the archive
(default) (default)
@ -611,8 +613,7 @@ ## Filesystem Options:
--no-restrict-filenames Allow Unicode characters, "&" and spaces in --no-restrict-filenames Allow Unicode characters, "&" and spaces in
filenames (default) filenames (default)
--windows-filenames Force filenames to be Windows-compatible --windows-filenames Force filenames to be Windows-compatible
--no-windows-filenames Make filenames Windows-compatible only if --no-windows-filenames Sanitize filenames only minimally
using Windows (default)
--trim-filenames LENGTH Limit the filename length (excluding --trim-filenames LENGTH Limit the filename length (excluding
extension) to the specified number of extension) to the specified number of
characters characters
@ -1292,6 +1293,7 @@ # OUTPUT TEMPLATE
- `playlist_uploader_id` (string): Nickname or id of the playlist uploader - `playlist_uploader_id` (string): Nickname or id of the playlist uploader
- `playlist_channel` (string): Display name of the channel that uploaded the playlist - `playlist_channel` (string): Display name of the channel that uploaded the playlist
- `playlist_channel_id` (string): Identifier of the channel that uploaded the playlist - `playlist_channel_id` (string): Identifier of the channel that uploaded the playlist
- `playlist_webpage_url` (string): URL of the playlist webpage
- `webpage_url` (string): A URL to the video webpage which, if given to yt-dlp, should yield the same result again - `webpage_url` (string): A URL to the video webpage which, if given to yt-dlp, should yield the same result again
- `webpage_url_basename` (string): The basename of the webpage URL - `webpage_url_basename` (string): The basename of the webpage URL
- `webpage_url_domain` (string): The domain of the webpage URL - `webpage_url_domain` (string): The domain of the webpage URL
@ -1758,7 +1760,7 @@ # Replace all spaces and "_" in title and uploader with a `-`
# EXTRACTOR ARGUMENTS # EXTRACTOR ARGUMENTS
Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=mediaconnect,web;formats=incomplete" --extractor-args "funimation:version=uncut"` Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=tv,mweb;formats=incomplete" --extractor-args "funimation:version=uncut"`
Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"` Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"`
@ -1767,13 +1769,13 @@ # EXTRACTOR ARGUMENTS
#### youtube #### youtube
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `mediaconnect`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `web_creator,mediaconnect` is added as needed for age-gated videos when account age verification is required. Similarly, the `_music` variants are added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` * `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, or `web_creator,mweb` is used when authenticating with cookies. The `_music` variants are added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. Not all clients support authentication via cookies. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web`
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8) * `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8), `missing_pot` (include formats that require a PO Token but are missing one)
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
* `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used * `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning * `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
@ -1857,7 +1859,7 @@ #### afreecatvlive
* `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web` * `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web`
#### soundcloud #### soundcloud
* `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{extension}` (omitting the bitrate), e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can be passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known extensions include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3` * `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{codec}`, e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can be passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known codecs include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3`
#### orfon (orf:on) #### orfon (orf:on)
* `prefer_segments_playlist`: Prefer a playlist of program segments instead of a single complete video when available. If individual segments are desired, use `--concat-playlist never --extractor-args "orfon:prefer_segments_playlist"` * `prefer_segments_playlist`: Prefer a playlist of program segments instead of a single complete video when available. If individual segments are desired, use `--concat-playlist never --extractor-args "orfon:prefer_segments_playlist"`
@ -1865,8 +1867,8 @@ #### orfon (orf:on)
#### bilibili #### bilibili
* `prefer_multi_flv`: Prefer extracting flv formats over mp4 for older videos that still provide legacy formats * `prefer_multi_flv`: Prefer extracting flv formats over mp4 for older videos that still provide legacy formats
#### digitalconcerthall #### sonylivseries
* `prefer_combined_hls`: Prefer extracting combined/pre-merged video and audio HLS formats. This will exclude 4K/HEVC video and lossless/FLAC audio formats, which are only available as split video/audio HLS formats * `sort_order`: Episode sort order for series extraction - one of `asc` (ascending, oldest first) or `desc` (descending, newest first). Default is `asc`
**Note**: These options may be changed/removed in the future without concern for backward compatibility **Note**: These options may be changed/removed in the future without concern for backward compatibility

View File

@ -234,5 +234,10 @@
"when": "57212a5f97ce367590aaa5c3e9a135eead8f81f7", "when": "57212a5f97ce367590aaa5c3e9a135eead8f81f7",
"short": "[ie/vimeo] Fix API retries (#11351)", "short": "[ie/vimeo] Fix API retries (#11351)",
"authors": ["bashonly"] "authors": ["bashonly"]
},
{
"action": "add",
"when": "52c0ffe40ad6e8404d93296f575007b05b04c686",
"short": "[priority] **Login with OAuth is no longer supported for YouTube**\nDue to a change made by the site, yt-dlp is no longer able to support OAuth login for YouTube. [Read more](https://github.com/yt-dlp/yt-dlp/issues/11462#issuecomment-2471703090)"
} }
] ]

View File

@ -11,13 +11,12 @@
import subprocess import subprocess
from yt_dlp.aes import aes_encrypt, key_expansion from yt_dlp.aes import aes_encrypt, key_expansion
from yt_dlp.utils import intlist_to_bytes
secret_msg = b'Secret message goes here' secret_msg = b'Secret message goes here'
def hex_str(int_list): def hex_str(int_list):
return codecs.encode(intlist_to_bytes(int_list), 'hex') return codecs.encode(bytes(int_list), 'hex')
def openssl_encode(algo, key, iv): def openssl_encode(algo, key, iv):

View File

@ -76,14 +76,14 @@ dev = [
] ]
static-analysis = [ static-analysis = [
"autopep8~=2.0", "autopep8~=2.0",
"ruff~=0.7.0", "ruff~=0.8.0",
] ]
test = [ test = [
"pytest~=8.1", "pytest~=8.1",
"pytest-rerunfailures~=14.0", "pytest-rerunfailures~=14.0",
] ]
pyinstaller = [ pyinstaller = [
"pyinstaller>=6.10.0", # Windows temp cleanup fixed in 6.10.0 "pyinstaller>=6.11.1", # Windows temp cleanup fixed in 6.11.1
] ]
[project.urls] [project.urls]
@ -186,6 +186,7 @@ ignore = [
"E501", # line-too-long "E501", # line-too-long
"E731", # lambda-assignment "E731", # lambda-assignment
"E741", # ambiguous-variable-name "E741", # ambiguous-variable-name
"UP031", # printf-string-formatting
"UP036", # outdated-version-block "UP036", # outdated-version-block
"B006", # mutable-argument-default "B006", # mutable-argument-default
"B008", # function-call-in-default-argument "B008", # function-call-in-default-argument
@ -258,9 +259,6 @@ select = [
"A002", # builtin-argument-shadowing "A002", # builtin-argument-shadowing
"C408", # unnecessary-collection-call "C408", # unnecessary-collection-call
] ]
"yt_dlp/jsinterp/native.py" = [
"UP031", # printf-string-formatting
]
[tool.ruff.lint.isort] [tool.ruff.lint.isort]
known-first-party = [ known-first-party = [
@ -313,6 +311,16 @@ banned-from = [
"yt_dlp.compat.compat_urllib_parse_urlparse".msg = "Use `urllib.parse.urlparse` instead." "yt_dlp.compat.compat_urllib_parse_urlparse".msg = "Use `urllib.parse.urlparse` instead."
"yt_dlp.compat.compat_shlex_quote".msg = "Use `yt_dlp.utils.shell_quote` instead." "yt_dlp.compat.compat_shlex_quote".msg = "Use `yt_dlp.utils.shell_quote` instead."
"yt_dlp.utils.error_to_compat_str".msg = "Use `str` instead." "yt_dlp.utils.error_to_compat_str".msg = "Use `str` instead."
"yt_dlp.utils.bytes_to_intlist".msg = "Use `list` instead."
"yt_dlp.utils.intlist_to_bytes".msg = "Use `bytes` instead."
"yt_dlp.utils.decodeArgument".msg = "Do not use"
"yt_dlp.utils.decodeFilename".msg = "Do not use"
"yt_dlp.utils.encodeFilename".msg = "Do not use"
"yt_dlp.compat.compat_os_name".msg = "Use `os.name` instead."
"yt_dlp.compat.compat_realpath".msg = "Use `os.path.realpath` instead."
"yt_dlp.compat.functools".msg = "Use `functools` instead."
"yt_dlp.utils.decodeOption".msg = "Do not use"
"yt_dlp.utils.compiled_regex_type".msg = "Use `re.Pattern` instead."
[tool.autopep8] [tool.autopep8]
max_line_length = 120 max_line_length = 120

View File

@ -129,6 +129,8 @@ # Supported sites
- **Bandcamp:album** - **Bandcamp:album**
- **Bandcamp:user** - **Bandcamp:user**
- **Bandcamp:weekly** - **Bandcamp:weekly**
- **Bandlab**
- **BandlabPlaylist**
- **BannedVideo** - **BannedVideo**
- **bbc**: [*bbc*](## "netrc machine") BBC - **bbc**: [*bbc*](## "netrc machine") BBC
- **bbc.co.uk**: [*bbc*](## "netrc machine") BBC iPlayer - **bbc.co.uk**: [*bbc*](## "netrc machine") BBC iPlayer
@ -190,6 +192,7 @@ # Supported sites
- **blerp** - **blerp**
- **blogger.com** - **blogger.com**
- **Bloomberg** - **Bloomberg**
- **Bluesky**
- **BokeCC** - **BokeCC**
- **BongaCams** - **BongaCams**
- **Boosty** - **Boosty**
@ -247,7 +250,7 @@ # Supported sites
- **cbsnews:livevideo**: CBS News Live Videos - **cbsnews:livevideo**: CBS News Live Videos
- **cbssports**: (**Currently broken**) - **cbssports**: (**Currently broken**)
- **cbssports:embed**: (**Currently broken**) - **cbssports:embed**: (**Currently broken**)
- **CCMA** - **CCMA**: 3Cat, TV3 and Catalunya Ràdio
- **CCTV**: 央视网 - **CCTV**: 央视网
- **CDA**: [*cdapl*](## "netrc machine") - **CDA**: [*cdapl*](## "netrc machine")
- **CDAFolder** - **CDAFolder**
@ -280,8 +283,6 @@ # Supported sites
- **cmt.com**: (**Currently broken**) - **cmt.com**: (**Currently broken**)
- **CNBCVideo** - **CNBCVideo**
- **CNN** - **CNN**
- **CNNArticle**
- **CNNBlogs**
- **CNNIndonesia** - **CNNIndonesia**
- **ComedyCentral** - **ComedyCentral**
- **ComedyCentralTV** - **ComedyCentralTV**
@ -485,6 +486,7 @@ # Supported sites
- **Gab** - **Gab**
- **GabTV** - **GabTV**
- **Gaia**: [*gaia*](## "netrc machine") - **Gaia**: [*gaia*](## "netrc machine")
- **GameDevTVDashboard**: [*gamedevtv*](## "netrc machine")
- **GameJolt** - **GameJolt**
- **GameJoltCommunity** - **GameJoltCommunity**
- **GameJoltGame** - **GameJoltGame**
@ -652,6 +654,8 @@ # Supported sites
- **Karaoketv** - **Karaoketv**
- **Katsomo**: (**Currently broken**) - **Katsomo**: (**Currently broken**)
- **KelbyOne**: (**Currently broken**) - **KelbyOne**: (**Currently broken**)
- **Kenh14Playlist**
- **Kenh14Video**
- **Ketnet** - **Ketnet**
- **khanacademy** - **khanacademy**
- **khanacademy:unit** - **khanacademy:unit**
@ -685,9 +689,9 @@ # Supported sites
- **LastFMPlaylist** - **LastFMPlaylist**
- **LastFMUser** - **LastFMUser**
- **LaXarxaMes**: [*laxarxames*](## "netrc machine") - **LaXarxaMes**: [*laxarxames*](## "netrc machine")
- **lbry** - **lbry**: odysee.com
- **lbry:channel** - **lbry:channel**: odysee.com channels
- **lbry:playlist** - **lbry:playlist**: odysee.com playlists
- **LCI** - **LCI**
- **Lcp** - **Lcp**
- **LcpPlay** - **LcpPlay**
@ -785,10 +789,6 @@ # Supported sites
- **MicrosoftLearnSession** - **MicrosoftLearnSession**
- **MicrosoftMedius** - **MicrosoftMedius**
- **microsoftstream**: Microsoft Stream - **microsoftstream**: Microsoft Stream
- **mildom**: Record ongoing live by specific user in Mildom
- **mildom:clip**: Clip in Mildom
- **mildom:user:vod**: Download all VODs from specific user in Mildom
- **mildom:vod**: VOD in Mildom
- **minds** - **minds**
- **minds:channel** - **minds:channel**
- **minds:group** - **minds:group**
@ -799,6 +799,7 @@ # Supported sites
- **MiTele**: mitele.es - **MiTele**: mitele.es
- **mixch** - **mixch**
- **mixch:archive** - **mixch:archive**
- **mixch:movie**
- **mixcloud** - **mixcloud**
- **mixcloud:playlist** - **mixcloud:playlist**
- **mixcloud:user** - **mixcloud:user**
@ -1061,8 +1062,8 @@ # Supported sites
- **PhilharmonieDeParis**: Philharmonie de Paris - **PhilharmonieDeParis**: Philharmonie de Paris
- **phoenix.de** - **phoenix.de**
- **Photobucket** - **Photobucket**
- **PiaLive**
- **Piapro**: [*piapro*](## "netrc machine") - **Piapro**: [*piapro*](## "netrc machine")
- **PIAULIZAPortal**: ulizaportal.jp - PIA LIVE STREAM
- **Picarto** - **Picarto**
- **PicartoVod** - **PicartoVod**
- **Piksel** - **Piksel**
@ -1089,8 +1090,6 @@ # Supported sites
- **PodbayFMChannel** - **PodbayFMChannel**
- **Podchaser** - **Podchaser**
- **podomatic**: (**Currently broken**) - **podomatic**: (**Currently broken**)
- **Pokemon**
- **PokemonWatch**
- **PokerGo**: [*pokergo*](## "netrc machine") - **PokerGo**: [*pokergo*](## "netrc machine")
- **PokerGoCollection**: [*pokergo*](## "netrc machine") - **PokerGoCollection**: [*pokergo*](## "netrc machine")
- **PolsatGo** - **PolsatGo**
@ -1161,6 +1160,7 @@ # Supported sites
- **RadioJavan**: (**Currently broken**) - **RadioJavan**: (**Currently broken**)
- **radiokapital** - **radiokapital**
- **radiokapital:show** - **radiokapital:show**
- **RadioRadicale**
- **RadioZetPodcast** - **RadioZetPodcast**
- **radlive** - **radlive**
- **radlive:channel** - **radlive:channel**
@ -1368,9 +1368,7 @@ # Supported sites
- **spotify**: Spotify episodes (**Currently broken**) - **spotify**: Spotify episodes (**Currently broken**)
- **spotify:show**: Spotify shows (**Currently broken**) - **spotify:show**: Spotify shows (**Currently broken**)
- **Spreaker** - **Spreaker**
- **SpreakerPage**
- **SpreakerShow** - **SpreakerShow**
- **SpreakerShowPage**
- **SpringboardPlatform** - **SpringboardPlatform**
- **Sprout** - **Sprout**
- **SproutVideo** - **SproutVideo**
@ -1446,7 +1444,7 @@ # Supported sites
- **TeleQuebecSquat** - **TeleQuebecSquat**
- **TeleQuebecVideo** - **TeleQuebecVideo**
- **TeleTask**: (**Currently broken**) - **TeleTask**: (**Currently broken**)
- **Telewebion** - **Telewebion**: (**Currently broken**)
- **Tempo** - **Tempo**
- **TennisTV**: [*tennistv*](## "netrc machine") - **TennisTV**: [*tennistv*](## "netrc machine")
- **TenPlay**: [*10play*](## "netrc machine") - **TenPlay**: [*10play*](## "netrc machine")
@ -1571,6 +1569,8 @@ # Supported sites
- **UFCTV**: [*ufctv*](## "netrc machine") - **UFCTV**: [*ufctv*](## "netrc machine")
- **ukcolumn**: (**Currently broken**) - **ukcolumn**: (**Currently broken**)
- **UKTVPlay** - **UKTVPlay**
- **UlizaPlayer**
- **UlizaPortal**: ulizaportal.jp
- **umg:de**: Universal Music Deutschland (**Currently broken**) - **umg:de**: Universal Music Deutschland (**Currently broken**)
- **Unistra** - **Unistra**
- **Unity**: (**Currently broken**) - **Unity**: (**Currently broken**)
@ -1588,8 +1588,6 @@ # Supported sites
- **Varzesh3**: (**Currently broken**) - **Varzesh3**: (**Currently broken**)
- **Vbox7** - **Vbox7**
- **Veo** - **Veo**
- **Veoh**
- **veoh:user**
- **Vesti**: Вести.Ru (**Currently broken**) - **Vesti**: Вести.Ru (**Currently broken**)
- **Vevo** - **Vevo**
- **VevoPlaylist** - **VevoPlaylist**

View File

@ -9,7 +9,6 @@
import yt_dlp.extractor import yt_dlp.extractor
from yt_dlp import YoutubeDL from yt_dlp import YoutubeDL
from yt_dlp.compat import compat_os_name
from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port
if 'pytest' in sys.modules: if 'pytest' in sys.modules:
@ -49,7 +48,7 @@ def report_warning(message, *args, **kwargs):
Print the message to stderr, it will be prefixed with 'WARNING:' Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored If stderr is a tty file the 'WARNING:' will be colored
""" """
if sys.stderr.isatty() and compat_os_name != 'nt': if sys.stderr.isatty() and os.name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m' _msg_header = '\033[0;33mWARNING:\033[0m'
else: else:
_msg_header = 'WARNING:' _msg_header = 'WARNING:'

View File

@ -15,7 +15,6 @@
from test.helper import FakeYDL, assertRegexpMatches, try_rm from test.helper import FakeYDL, assertRegexpMatches, try_rm
from yt_dlp import YoutubeDL from yt_dlp import YoutubeDL
from yt_dlp.compat import compat_os_name
from yt_dlp.extractor import YoutubeIE from yt_dlp.extractor import YoutubeIE
from yt_dlp.extractor.common import InfoExtractor from yt_dlp.extractor.common import InfoExtractor
from yt_dlp.postprocessor.common import PostProcessor from yt_dlp.postprocessor.common import PostProcessor
@ -762,6 +761,13 @@ def test(tmpl, expected, *, info=None, **params):
test('%(width)06d.%%(ext)s', 'NA.%(ext)s') test('%(width)06d.%%(ext)s', 'NA.%(ext)s')
test('%%(width)06d.%(ext)s', '%(width)06d.mp4') test('%%(width)06d.%(ext)s', '%(width)06d.mp4')
# Sanitization options
test('%(title3)s', (None, 'foobartest'))
test('%(title5)s', (None, 'aei_A'), restrictfilenames=True)
test('%(title3)s', (None, 'foo_bar_test'), windowsfilenames=False, restrictfilenames=True)
if sys.platform != 'win32':
test('%(title3)s', (None, 'foobar\\test'), windowsfilenames=False)
# ID sanitization # ID sanitization
test('%(id)s', '_abcd', info={'id': '_abcd'}) test('%(id)s', '_abcd', info={'id': '_abcd'})
test('%(some_id)s', '_abcd', info={'some_id': '_abcd'}) test('%(some_id)s', '_abcd', info={'some_id': '_abcd'})
@ -839,8 +845,8 @@ def expect_same_infodict(out):
test('%(filesize)#D', '1Ki') test('%(filesize)#D', '1Ki')
test('%(height)5.2D', ' 1.08k') test('%(height)5.2D', ' 1.08k')
test('%(title4)#S', 'foo_bar_test') test('%(title4)#S', 'foo_bar_test')
test('%(title4).10S', ('foo bar ', 'foo bar' + ('#' if compat_os_name == 'nt' else ' '))) test('%(title4).10S', ('foo bar ', 'foo bar' + ('#' if os.name == 'nt' else ' ')))
if compat_os_name == 'nt': if os.name == 'nt':
test('%(title4)q', ('"foo ""bar"" test"', None)) test('%(title4)q', ('"foo ""bar"" test"', None))
test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None)) test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None))
test('%(formats.0.id)#q', ('"id 1"', None)) test('%(formats.0.id)#q', ('"id 1"', None))
@ -903,9 +909,9 @@ def gen():
# Environment variable expansion for prepare_filename # Environment variable expansion for prepare_filename
os.environ['__yt_dlp_var'] = 'expanded' os.environ['__yt_dlp_var'] = 'expanded'
envvar = '%__yt_dlp_var%' if compat_os_name == 'nt' else '$__yt_dlp_var' envvar = '%__yt_dlp_var%' if os.name == 'nt' else '$__yt_dlp_var'
test(envvar, (envvar, 'expanded')) test(envvar, (envvar, 'expanded'))
if compat_os_name == 'nt': if os.name == 'nt':
test('%s%', ('%s%', '%s%')) test('%s%', ('%s%', '%s%'))
os.environ['s'] = 'expanded' os.environ['s'] = 'expanded'
test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s

View File

@ -27,7 +27,6 @@
pad_block, pad_block,
) )
from yt_dlp.dependencies import Cryptodome from yt_dlp.dependencies import Cryptodome
from yt_dlp.utils import bytes_to_intlist, intlist_to_bytes
# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py' # the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
@ -40,33 +39,33 @@ def setUp(self):
def test_encrypt(self): def test_encrypt(self):
msg = b'message' msg = b'message'
key = list(range(16)) key = list(range(16))
encrypted = aes_encrypt(bytes_to_intlist(msg), key) encrypted = aes_encrypt(list(msg), key)
decrypted = intlist_to_bytes(aes_decrypt(encrypted, key)) decrypted = bytes(aes_decrypt(encrypted, key))
self.assertEqual(decrypted, msg) self.assertEqual(decrypted, msg)
def test_cbc_decrypt(self): def test_cbc_decrypt(self):
data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd' data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd'
decrypted = intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(data), self.key, self.iv)) decrypted = bytes(aes_cbc_decrypt(list(data), self.key, self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
if Cryptodome.AES: if Cryptodome.AES:
decrypted = aes_cbc_decrypt_bytes(data, intlist_to_bytes(self.key), intlist_to_bytes(self.iv)) decrypted = aes_cbc_decrypt_bytes(data, bytes(self.key), bytes(self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_cbc_encrypt(self): def test_cbc_encrypt(self):
data = bytes_to_intlist(self.secret_msg) data = list(self.secret_msg)
encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv)) encrypted = bytes(aes_cbc_encrypt(data, self.key, self.iv))
self.assertEqual( self.assertEqual(
encrypted, encrypted,
b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd') b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd')
def test_ctr_decrypt(self): def test_ctr_decrypt(self):
data = bytes_to_intlist(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') data = list(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08')
decrypted = intlist_to_bytes(aes_ctr_decrypt(data, self.key, self.iv)) decrypted = bytes(aes_ctr_decrypt(data, self.key, self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_ctr_encrypt(self): def test_ctr_encrypt(self):
data = bytes_to_intlist(self.secret_msg) data = list(self.secret_msg)
encrypted = intlist_to_bytes(aes_ctr_encrypt(data, self.key, self.iv)) encrypted = bytes(aes_ctr_encrypt(data, self.key, self.iv))
self.assertEqual( self.assertEqual(
encrypted, encrypted,
b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08')
@ -75,19 +74,19 @@ def test_gcm_decrypt(self):
data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f.\x08\xb4T\xe4/\x17\xbd' data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f.\x08\xb4T\xe4/\x17\xbd'
authentication_tag = b'\xe8&I\x80rI\x07\x9d}YWuU@:e' authentication_tag = b'\xe8&I\x80rI\x07\x9d}YWuU@:e'
decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify( decrypted = bytes(aes_gcm_decrypt_and_verify(
bytes_to_intlist(data), self.key, bytes_to_intlist(authentication_tag), self.iv[:12])) list(data), self.key, list(authentication_tag), self.iv[:12]))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
if Cryptodome.AES: if Cryptodome.AES:
decrypted = aes_gcm_decrypt_and_verify_bytes( decrypted = aes_gcm_decrypt_and_verify_bytes(
data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12])) data, bytes(self.key), authentication_tag, bytes(self.iv[:12]))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_gcm_aligned_decrypt(self): def test_gcm_aligned_decrypt(self):
data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f' data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f'
authentication_tag = b'\x08\xb1\x9d!&\x98\xd0\xeaRq\x90\xe6;\xb5]\xd8' authentication_tag = b'\x08\xb1\x9d!&\x98\xd0\xeaRq\x90\xe6;\xb5]\xd8'
decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify( decrypted = bytes(aes_gcm_decrypt_and_verify(
list(data), self.key, list(authentication_tag), self.iv[:12])) list(data), self.key, list(authentication_tag), self.iv[:12]))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg[:16]) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg[:16])
if Cryptodome.AES: if Cryptodome.AES:
@ -96,38 +95,38 @@ def test_gcm_aligned_decrypt(self):
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg[:16]) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg[:16])
def test_decrypt_text(self): def test_decrypt_text(self):
password = intlist_to_bytes(self.key).decode() password = bytes(self.key).decode()
encrypted = base64.b64encode( encrypted = base64.b64encode(
intlist_to_bytes(self.iv[:8]) bytes(self.iv[:8])
+ b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae', + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae',
).decode() ).decode()
decrypted = (aes_decrypt_text(encrypted, password, 16)) decrypted = (aes_decrypt_text(encrypted, password, 16))
self.assertEqual(decrypted, self.secret_msg) self.assertEqual(decrypted, self.secret_msg)
password = intlist_to_bytes(self.key).decode() password = bytes(self.key).decode()
encrypted = base64.b64encode( encrypted = base64.b64encode(
intlist_to_bytes(self.iv[:8]) bytes(self.iv[:8])
+ b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83', + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83',
).decode() ).decode()
decrypted = (aes_decrypt_text(encrypted, password, 32)) decrypted = (aes_decrypt_text(encrypted, password, 32))
self.assertEqual(decrypted, self.secret_msg) self.assertEqual(decrypted, self.secret_msg)
def test_ecb_encrypt(self): def test_ecb_encrypt(self):
data = bytes_to_intlist(self.secret_msg) data = list(self.secret_msg)
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key)) encrypted = bytes(aes_ecb_encrypt(data, self.key))
self.assertEqual( self.assertEqual(
encrypted, encrypted,
b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
def test_ecb_decrypt(self): def test_ecb_decrypt(self):
data = bytes_to_intlist(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') data = list(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
decrypted = intlist_to_bytes(aes_ecb_decrypt(data, self.key, self.iv)) decrypted = bytes(aes_ecb_decrypt(data, self.key, self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_key_expansion(self): def test_key_expansion(self):
key = '4f6bdaa39e2f8cb07f5e722d9edef314' key = '4f6bdaa39e2f8cb07f5e722d9edef314'
self.assertEqual(key_expansion(bytes_to_intlist(bytearray.fromhex(key))), [ self.assertEqual(key_expansion(list(bytearray.fromhex(key))), [
0x4F, 0x6B, 0xDA, 0xA3, 0x9E, 0x2F, 0x8C, 0xB0, 0x7F, 0x5E, 0x72, 0x2D, 0x9E, 0xDE, 0xF3, 0x14, 0x4F, 0x6B, 0xDA, 0xA3, 0x9E, 0x2F, 0x8C, 0xB0, 0x7F, 0x5E, 0x72, 0x2D, 0x9E, 0xDE, 0xF3, 0x14,
0x53, 0x66, 0x20, 0xA8, 0xCD, 0x49, 0xAC, 0x18, 0xB2, 0x17, 0xDE, 0x35, 0x2C, 0xC9, 0x2D, 0x21, 0x53, 0x66, 0x20, 0xA8, 0xCD, 0x49, 0xAC, 0x18, 0xB2, 0x17, 0xDE, 0x35, 0x2C, 0xC9, 0x2D, 0x21,
0x8C, 0xBE, 0xDD, 0xD9, 0x41, 0xF7, 0x71, 0xC1, 0xF3, 0xE0, 0xAF, 0xF4, 0xDF, 0x29, 0x82, 0xD5, 0x8C, 0xBE, 0xDD, 0xD9, 0x41, 0xF7, 0x71, 0xC1, 0xF3, 0xE0, 0xAF, 0xF4, 0xDF, 0x29, 0x82, 0xD5,

View File

@ -12,12 +12,7 @@
from yt_dlp import compat from yt_dlp import compat
from yt_dlp.compat import urllib # isort: split from yt_dlp.compat import urllib # isort: split
from yt_dlp.compat import ( from yt_dlp.compat import compat_etree_fromstring, compat_expanduser
compat_etree_fromstring,
compat_expanduser,
compat_urllib_parse_unquote, # noqa: TID251
compat_urllib_parse_urlencode, # noqa: TID251
)
from yt_dlp.compat.urllib.request import getproxies from yt_dlp.compat.urllib.request import getproxies
@ -43,39 +38,6 @@ def test_compat_expanduser(self):
finally: finally:
os.environ['HOME'] = old_home or '' os.environ['HOME'] = old_home or ''
def test_compat_urllib_parse_unquote(self):
self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def')
self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def')
self.assertEqual(compat_urllib_parse_unquote(''), '')
self.assertEqual(compat_urllib_parse_unquote('%'), '%')
self.assertEqual(compat_urllib_parse_unquote('%%'), '%%')
self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%')
self.assertEqual(compat_urllib_parse_unquote('%2F'), '/')
self.assertEqual(compat_urllib_parse_unquote('%2f'), '/')
self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波')
self.assertEqual(
compat_urllib_parse_unquote('''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%25%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a'''),
'''<meta property="og:description" content="▁▂▃▄%▅▆▇█" />
%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''')
self.assertEqual(
compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''),
'''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''')
def test_compat_urllib_parse_unquote_plus(self):
self.assertEqual(urllib.parse.unquote_plus('abc%20def'), 'abc def')
self.assertEqual(urllib.parse.unquote_plus('%7e/abc+def'), '~/abc def')
def test_compat_urllib_parse_urlencode(self):
self.assertEqual(compat_urllib_parse_urlencode({'abc': 'def'}), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode([('abc', 'def')]), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode([('abc', b'def')]), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def')
def test_compat_etree_fromstring(self): def test_compat_etree_fromstring(self):
xml = ''' xml = '''
<root foo="bar" spam="中文"> <root foo="bar" spam="中文">

View File

@ -15,7 +15,6 @@
from test.helper import http_server_port, try_rm from test.helper import http_server_port, try_rm
from yt_dlp import YoutubeDL from yt_dlp import YoutubeDL
from yt_dlp.downloader.http import HttpFD from yt_dlp.downloader.http import HttpFD
from yt_dlp.utils import encodeFilename
from yt_dlp.utils._utils import _YDLLogger as FakeLogger from yt_dlp.utils._utils import _YDLLogger as FakeLogger
TEST_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DIR = os.path.dirname(os.path.abspath(__file__))
@ -82,12 +81,12 @@ def download(self, params, ep):
ydl = YoutubeDL(params) ydl = YoutubeDL(params)
downloader = HttpFD(ydl, params) downloader = HttpFD(ydl, params)
filename = 'testfile.mp4' filename = 'testfile.mp4'
try_rm(encodeFilename(filename)) try_rm(filename)
self.assertTrue(downloader.real_download(filename, { self.assertTrue(downloader.real_download(filename, {
'url': f'http://127.0.0.1:{self.port}/{ep}', 'url': f'http://127.0.0.1:{self.port}/{ep}',
}), ep) }), ep)
self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep) self.assertEqual(os.path.getsize(filename), TEST_SIZE, ep)
try_rm(encodeFilename(filename)) try_rm(filename)
def download_all(self, params): def download_all(self, params):
for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'): for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):

View File

@ -216,7 +216,9 @@ def handle(self):
protocol = websockets.ServerProtocol() protocol = websockets.ServerProtocol()
connection = websockets.sync.server.ServerConnection(socket=self.request, protocol=protocol, close_timeout=0) connection = websockets.sync.server.ServerConnection(socket=self.request, protocol=protocol, close_timeout=0)
connection.handshake() connection.handshake()
connection.send(json.dumps(self.socks_info)) for message in connection:
if message == 'socks_info':
connection.send(json.dumps(self.socks_info))
connection.close() connection.close()

View File

@ -481,7 +481,7 @@ def test_subs_list_to_dict(self):
'id': 'name', 'id': 'name',
'data': 'content', 'data': 'content',
'url': 'url', 'url': 'url',
}, all, {subs_list_to_dict}]) == { }, all, {subs_list_to_dict(lang=None)}]) == {
'de': [{'url': 'https://example.com/subs/de.ass'}], 'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [{'data': 'content'}], 'en': [{'data': 'content'}],
}, 'subs with mandatory items missing should be filtered' }, 'subs with mandatory items missing should be filtered'
@ -490,7 +490,7 @@ def test_subs_list_to_dict(self):
{'url': 'https://example.com/subs/en', 'name': 'en'}, {'url': 'https://example.com/subs/en', 'name': 'en'},
], [..., { ], [..., {
'id': 'name', 'id': 'name',
'ext': ['url', {lambda x: determine_ext(x, default_ext=None)}], 'ext': ['url', {determine_ext(default_ext=None)}],
'url': 'url', 'url': 'url',
}, all, {subs_list_to_dict(ext='ext')}]) == { }, all, {subs_list_to_dict(ext='ext')}]) == {
'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}], 'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}],
@ -507,6 +507,54 @@ def test_subs_list_to_dict(self):
{'url': 'https://example.com/subs/en1', 'ext': 'ext'}, {'url': 'https://example.com/subs/en1', 'ext': 'ext'},
{'url': 'https://example.com/subs/en2', 'ext': 'ext'}, {'url': 'https://example.com/subs/en2', 'ext': 'ext'},
]}, '`quality` key should sort subtitle list accordingly' ]}, '`quality` key should sort subtitle list accordingly'
assert traverse_obj([
{'name': 'de', 'url': 'https://example.com/subs/de.ass'},
{'name': 'de'},
{'name': 'en', 'content': 'content'},
{'url': 'https://example.com/subs/en'},
], [..., {
'id': 'name',
'url': 'url',
'data': 'content',
}, all, {subs_list_to_dict(lang='en')}]) == {
'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [
{'data': 'content'},
{'url': 'https://example.com/subs/en'},
],
}, 'optionally provided lang should be used if no id available'
assert traverse_obj([
{'name': 1, 'url': 'https://example.com/subs/de1'},
{'name': {}, 'url': 'https://example.com/subs/de2'},
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
], [..., {
'id': 'name',
'url': 'url',
'ext': 'ext',
}, all, {subs_list_to_dict(lang=None)}]) == {
'de': [
{'url': 'https://example.com/subs/de3'},
{'url': 'https://example.com/subs/de4'},
],
}, 'non str types should be ignored for id and ext'
assert traverse_obj([
{'name': 1, 'url': 'https://example.com/subs/de1'},
{'name': {}, 'url': 'https://example.com/subs/de2'},
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
], [..., {
'id': 'name',
'url': 'url',
'ext': 'ext',
}, all, {subs_list_to_dict(lang='de')}]) == {
'de': [
{'url': 'https://example.com/subs/de1'},
{'url': 'https://example.com/subs/de2'},
{'url': 'https://example.com/subs/de3'},
{'url': 'https://example.com/subs/de4'},
],
}, 'non str types should be replaced by default id'
def test_trim_str(self): def test_trim_str(self):
with pytest.raises(TypeError): with pytest.raises(TypeError):
@ -525,7 +573,7 @@ def test_trim_str(self):
def test_unpack(self): def test_unpack(self):
assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123' assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123'
assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3' assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3'
assert unpack(join_nonempty(delim=' '))([1, 2, 3]) == '1 2 3' assert unpack(join_nonempty, delim=' ')([1, 2, 3]) == '1 2 3'
with pytest.raises(TypeError): with pytest.raises(TypeError):
unpack(join_nonempty)() unpack(join_nonempty)()
with pytest.raises(TypeError): with pytest.raises(TypeError):

View File

@ -21,7 +21,6 @@
from yt_dlp.compat import ( from yt_dlp.compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_HTMLParseError, compat_HTMLParseError,
compat_os_name,
) )
from yt_dlp.utils import ( from yt_dlp.utils import (
Config, Config,
@ -49,7 +48,6 @@
dfxp2srt, dfxp2srt,
encode_base_n, encode_base_n,
encode_compat_str, encode_compat_str,
encodeFilename,
expand_path, expand_path,
extract_attributes, extract_attributes,
extract_basic_auth, extract_basic_auth,
@ -69,10 +67,8 @@
get_elements_html_by_class, get_elements_html_by_class,
get_elements_text_and_html_by_attribute, get_elements_text_and_html_by_attribute,
int_or_none, int_or_none,
intlist_to_bytes,
iri_to_uri, iri_to_uri,
is_html, is_html,
join_nonempty,
js_to_json, js_to_json,
limit_length, limit_length,
locked_file, locked_file,
@ -567,10 +563,10 @@ def test_smuggle_url(self):
self.assertEqual(res_data, {'a': 'b', 'c': 'd'}) self.assertEqual(res_data, {'a': 'b', 'c': 'd'})
def test_shell_quote(self): def test_shell_quote(self):
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')] args = ['ffmpeg', '-i', 'ñ€ß\'.mp4']
self.assertEqual( self.assertEqual(
shell_quote(args), shell_quote(args),
"""ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''') """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if os.name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
def test_float_or_none(self): def test_float_or_none(self):
self.assertEqual(float_or_none('42.42'), 42.42) self.assertEqual(float_or_none('42.42'), 42.42)
@ -1310,15 +1306,10 @@ def test_clean_html(self):
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb') self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb')
def test_intlist_to_bytes(self):
self.assertEqual(
intlist_to_bytes([0, 1, 127, 128, 255]),
b'\x00\x01\x7f\x80\xff')
def test_args_to_str(self): def test_args_to_str(self):
self.assertEqual( self.assertEqual(
args_to_str(['foo', 'ba/r', '-baz', '2 be', '']), args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""', 'foo ba/r -baz \'2 be\' \'\'' if os.name != 'nt' else 'foo ba/r -baz "2 be" ""',
) )
def test_parse_filesize(self): def test_parse_filesize(self):
@ -2118,7 +2109,7 @@ def test_extract_basic_auth(self):
assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=') assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=')
assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz') assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
@unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows') @unittest.skipUnless(os.name == 'nt', 'Only relevant on Windows')
def test_windows_escaping(self): def test_windows_escaping(self):
tests = [ tests = [
'test"&', 'test"&',
@ -2156,11 +2147,7 @@ def test_partial_application(self):
assert callable(int_or_none(scale=10)), 'missing positional parameter should apply partially' assert callable(int_or_none(scale=10)), 'missing positional parameter should apply partially'
assert int_or_none(10, scale=0.1) == 100, 'positionally passed argument should call function' assert int_or_none(10, scale=0.1) == 100, 'positionally passed argument should call function'
assert int_or_none(v=10) == 10, 'keyword passed positional should call function' assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
assert int_or_none(scale=0.1)(10) == 100, 'call after partial applicatino should call the function' assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function'
assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially'
assert callable(join_nonempty()), 'varargs positional should apply partially'
assert join_nonempty(None, delim=', ') == '', 'passed varargs should call the function'
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -68,6 +68,16 @@
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0', 'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
), ),
(
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
),
(
'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q',
),
] ]
_NSIG_TESTS = [ _NSIG_TESTS = [
@ -183,6 +193,14 @@
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw', 'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
), ),
(
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ',
),
(
'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
'YWt1qdbe8SAfkoPHW5d', 'RrRjWQOJmBiP',
),
] ]
@ -254,8 +272,11 @@ def signature(jscode, sig_input):
def n_sig(jscode, sig_input): def n_sig(jscode, sig_input):
funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode) ie = YoutubeIE(FakeYDL())
return NativeJSI(jscode).call_function(funcname, sig_input) funcname = ie._extract_n_function_name(jscode)
jsi = NativeJSI(jscode)
func = jsi.extract_function_from_code(*ie._fixup_n_function_code(*jsi.extract_function_code(funcname)))
return func([sig_input])
make_sig_test = t_factory( make_sig_test = t_factory(

View File

@ -26,7 +26,7 @@
from .cache import Cache from .cache import Cache
from .compat import urllib # isort: split from .compat import urllib # isort: split
from .compat import compat_os_name, urllib_req_to_req from .compat import urllib_req_to_req
from .cookies import CookieLoadError, LenientSimpleCookie, load_cookies from .cookies import CookieLoadError, LenientSimpleCookie, load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
from .downloader.rtmp import rtmpdump_version from .downloader.rtmp import rtmpdump_version
@ -109,7 +109,6 @@
determine_ext, determine_ext,
determine_protocol, determine_protocol,
encode_compat_str, encode_compat_str,
encodeFilename,
escapeHTML, escapeHTML,
expand_path, expand_path,
extract_basic_auth, extract_basic_auth,
@ -167,7 +166,7 @@
) )
from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__ from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
if compat_os_name == 'nt': if os.name == 'nt':
import ctypes import ctypes
@ -267,7 +266,9 @@ class YoutubeDL:
outtmpl_na_placeholder: Placeholder for unavailable meta fields. outtmpl_na_placeholder: Placeholder for unavailable meta fields.
restrictfilenames: Do not allow "&" and spaces in file names restrictfilenames: Do not allow "&" and spaces in file names
trim_file_name: Limit length of filename (extension excluded) trim_file_name: Limit length of filename (extension excluded)
windowsfilenames: Force the filenames to be windows compatible windowsfilenames: True: Force filenames to be Windows compatible
False: Sanitize filenames only minimally
This option has no effect when running on Windows
ignoreerrors: Do not stop on download/postprocessing errors. ignoreerrors: Do not stop on download/postprocessing errors.
Can be 'only_download' to ignore only download errors. Can be 'only_download' to ignore only download errors.
Default is 'only_download' for CLI, but False for API Default is 'only_download' for CLI, but False for API
@ -643,7 +644,7 @@ def __init__(self, params=None, auto_init=True):
out=stdout, out=stdout,
error=sys.stderr, error=sys.stderr,
screen=sys.stderr if self.params.get('quiet') else stdout, screen=sys.stderr if self.params.get('quiet') else stdout,
console=None if compat_os_name == 'nt' else next( console=None if os.name == 'nt' else next(
filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None), filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None),
) )
@ -952,7 +953,7 @@ def to_stderr(self, message, only_once=False):
self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once) self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
def _send_console_code(self, code): def _send_console_code(self, code):
if compat_os_name == 'nt' or not self._out_files.console: if os.name == 'nt' or not self._out_files.console:
return return
self._write_string(code, self._out_files.console) self._write_string(code, self._out_files.console)
@ -960,7 +961,7 @@ def to_console_title(self, message):
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
return return
message = remove_terminal_sequences(message) message = remove_terminal_sequences(message)
if compat_os_name == 'nt': if os.name == 'nt':
if ctypes.windll.kernel32.GetConsoleWindow(): if ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is # c_wchar_p() might not be necessary if `message` is
# already of type unicode() # already of type unicode()
@ -1117,7 +1118,7 @@ def report_file_delete(self, file_name):
def raise_no_formats(self, info, forced=False, *, msg=None): def raise_no_formats(self, info, forced=False, *, msg=None):
has_drm = info.get('_has_drm') has_drm = info.get('_has_drm')
ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg) ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!' msg = msg or (has_drm and 'This video is DRM protected') or 'No video formats found!'
if forced or not ignored: if forced or not ignored:
raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'], raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
expected=has_drm or ignored or expected) expected=has_drm or ignored or expected)
@ -1193,8 +1194,7 @@ def _copy_infodict(info_dict):
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
""" Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
@param sanitize Whether to sanitize the output as a filename. @param sanitize Whether to sanitize the output as a filename
For backward compatibility, a function can also be passed
""" """
info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
@ -1310,14 +1310,23 @@ def get_value(mdict):
na = self.params.get('outtmpl_na_placeholder', 'NA') na = self.params.get('outtmpl_na_placeholder', 'NA')
def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')): def filename_sanitizer(key, value, restricted):
return sanitize_filename(str(value), restricted=restricted, is_id=( return sanitize_filename(str(value), restricted=restricted, is_id=(
bool(re.search(r'(^|[_.])id(\.|$)', key)) bool(re.search(r'(^|[_.])id(\.|$)', key))
if 'filename-sanitization' in self.params['compat_opts'] if 'filename-sanitization' in self.params['compat_opts']
else NO_DEFAULT)) else NO_DEFAULT))
sanitizer = sanitize if callable(sanitize) else filename_sanitizer if callable(sanitize):
sanitize = bool(sanitize) self.deprecation_warning('Passing a callable "sanitize" to YoutubeDL.prepare_outtmpl is deprecated')
elif not sanitize:
pass
elif (sys.platform != 'win32' and not self.params.get('restrictfilenames')
and self.params.get('windowsfilenames') is False):
def sanitize(key, value):
return value.replace('/', '\u29F8').replace('\0', '')
else:
def sanitize(key, value):
return filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames'))
def _dumpjson_default(obj): def _dumpjson_default(obj):
if isinstance(obj, (set, LazyList)): if isinstance(obj, (set, LazyList)):
@ -1400,13 +1409,13 @@ def create_key(outer_mobj):
if sanitize: if sanitize:
# If value is an object, sanitize might convert it to a string # If value is an object, sanitize might convert it to a string
# So we convert it to repr first # So we manually convert it before sanitizing
if fmt[-1] == 'r': if fmt[-1] == 'r':
value, fmt = repr(value), str_fmt value, fmt = repr(value), str_fmt
elif fmt[-1] == 'a': elif fmt[-1] == 'a':
value, fmt = ascii(value), str_fmt value, fmt = ascii(value), str_fmt
if fmt[-1] in 'csra': if fmt[-1] in 'csra':
value = sanitizer(last_field, value) value = sanitize(last_field, value)
key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format')) key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format'))
TMPL_DICT[key] = value TMPL_DICT[key] = value
@ -1948,6 +1957,7 @@ def _playlist_infodict(ie_result, strict=False, **kwargs):
'playlist_uploader_id': ie_result.get('uploader_id'), 'playlist_uploader_id': ie_result.get('uploader_id'),
'playlist_channel': ie_result.get('channel'), 'playlist_channel': ie_result.get('channel'),
'playlist_channel_id': ie_result.get('channel_id'), 'playlist_channel_id': ie_result.get('channel_id'),
'playlist_webpage_url': ie_result.get('webpage_url'),
**kwargs, **kwargs,
} }
if strict: if strict:
@ -2196,7 +2206,7 @@ def _select_formats(self, formats, selector):
def _default_format_spec(self, info_dict): def _default_format_spec(self, info_dict):
prefer_best = ( prefer_best = (
self.params['outtmpl']['default'] == '-' self.params['outtmpl']['default'] == '-'
or info_dict.get('is_live') and not self.params.get('live_from_start')) or (info_dict.get('is_live') and not self.params.get('live_from_start')))
def can_merge(): def can_merge():
merger = FFmpegMergerPP(self) merger = FFmpegMergerPP(self)
@ -2365,7 +2375,7 @@ def _merge(formats_pair):
vexts=[f['ext'] for f in video_fmts], vexts=[f['ext'] for f in video_fmts],
aexts=[f['ext'] for f in audio_fmts], aexts=[f['ext'] for f in audio_fmts],
preferences=(try_call(lambda: self.params['merge_output_format'].split('/')) preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
or self.params.get('prefer_free_formats') and ('webm', 'mkv'))) or (self.params.get('prefer_free_formats') and ('webm', 'mkv'))))
filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info)) filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
@ -3255,9 +3265,9 @@ def check_max_downloads():
if full_filename is None: if full_filename is None:
return return
if not self._ensure_dir_exists(encodeFilename(full_filename)): if not self._ensure_dir_exists(full_filename):
return return
if not self._ensure_dir_exists(encodeFilename(temp_filename)): if not self._ensure_dir_exists(temp_filename):
return return
if self._write_description('video', info_dict, if self._write_description('video', info_dict,
@ -3289,16 +3299,16 @@ def check_max_downloads():
if self.params.get('writeannotations', False): if self.params.get('writeannotations', False):
annofn = self.prepare_filename(info_dict, 'annotation') annofn = self.prepare_filename(info_dict, 'annotation')
if annofn: if annofn:
if not self._ensure_dir_exists(encodeFilename(annofn)): if not self._ensure_dir_exists(annofn):
return return
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)): if not self.params.get('overwrites', True) and os.path.exists(annofn):
self.to_screen('[info] Video annotations are already present') self.to_screen('[info] Video annotations are already present')
elif not info_dict.get('annotations'): elif not info_dict.get('annotations'):
self.report_warning('There are no annotations to write.') self.report_warning('There are no annotations to write.')
else: else:
try: try:
self.to_screen('[info] Writing video annotations to: ' + annofn) self.to_screen('[info] Writing video annotations to: ' + annofn)
with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: with open(annofn, 'w', encoding='utf-8') as annofile:
annofile.write(info_dict['annotations']) annofile.write(info_dict['annotations'])
except (KeyError, TypeError): except (KeyError, TypeError):
self.report_warning('There are no annotations to write.') self.report_warning('There are no annotations to write.')
@ -3314,14 +3324,14 @@ def _write_link_file(link_type):
f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown') f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
return True return True
linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
if not self._ensure_dir_exists(encodeFilename(linkfn)): if not self._ensure_dir_exists(linkfn):
return False return False
if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)): if self.params.get('overwrites', True) and os.path.exists(linkfn):
self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present') self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
return True return True
try: try:
self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', with open(to_high_limit_path(linkfn), 'w', encoding='utf-8',
newline='\r\n' if link_type == 'url' else '\n') as linkfile: newline='\r\n' if link_type == 'url' else '\n') as linkfile:
template_vars = {'url': url} template_vars = {'url': url}
if link_type == 'desktop': if link_type == 'desktop':
@ -3352,7 +3362,7 @@ def _write_link_file(link_type):
if self.params.get('skip_download'): if self.params.get('skip_download'):
info_dict['filepath'] = temp_filename info_dict['filepath'] = temp_filename
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) info_dict['__finaldir'] = os.path.dirname(os.path.abspath(full_filename))
info_dict['__files_to_move'] = files_to_move info_dict['__files_to_move'] = files_to_move
replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)) replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
@ -3482,7 +3492,7 @@ def correct_ext(filename, ext=new_ext):
self.report_file_already_downloaded(dl_filename) self.report_file_already_downloaded(dl_filename)
dl_filename = dl_filename or temp_filename dl_filename = dl_filename or temp_filename
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) info_dict['__finaldir'] = os.path.dirname(os.path.abspath(full_filename))
except network_exceptions as err: except network_exceptions as err:
self.report_error(f'unable to download video data: {err}') self.report_error(f'unable to download video data: {err}')
@ -3541,8 +3551,8 @@ def ffmpeg_fixup(cndn, msg, cls):
and info_dict.get('container') == 'm4a_dash', and info_dict.get('container') == 'm4a_dash',
'writing DASH m4a. Only some players support this container', 'writing DASH m4a. Only some players support this container',
FFmpegFixupM4aPP) FFmpegFixupM4aPP)
ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts') ffmpeg_fixup((downloader == 'hlsnative' and not self.params.get('hls_use_mpegts'))
or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None, or (info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None),
'Possible MPEG-TS in MP4 container or malformed AAC timestamps', 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
FFmpegFixupM3u8PP) FFmpegFixupM3u8PP)
ffmpeg_fixup(downloader == 'dashsegments' ffmpeg_fixup(downloader == 'dashsegments'
@ -4297,7 +4307,7 @@ def _write_description(self, label, ie_result, descfn):
else: else:
try: try:
self.to_screen(f'[info] Writing {label} description to: {descfn}') self.to_screen(f'[info] Writing {label} description to: {descfn}')
with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: with open(descfn, 'w', encoding='utf-8') as descfile:
descfile.write(ie_result['description']) descfile.write(ie_result['description'])
except OSError: except OSError:
self.report_error(f'Cannot write {label} description file {descfn}') self.report_error(f'Cannot write {label} description file {descfn}')
@ -4381,7 +4391,9 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
return None return None
for idx, t in list(enumerate(thumbnails))[::-1]: for idx, t in list(enumerate(thumbnails))[::-1]:
thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg') thumb_ext = t.get('ext') or determine_ext(t['url'], 'jpg')
if multiple:
thumb_ext = f'{t["id"]}.{thumb_ext}'
thumb_display_id = f'{label} thumbnail {t["id"]}' thumb_display_id = f'{label} thumbnail {t["id"]}'
thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext')) thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
@ -4397,7 +4409,7 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
try: try:
uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {}))) uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}') self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
with open(encodeFilename(thumb_filename), 'wb') as thumbf: with open(thumb_filename, 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf) shutil.copyfileobj(uf, thumbf)
ret.append((thumb_filename, thumb_filename_final)) ret.append((thumb_filename, thumb_filename_final))
t['filepath'] = thumb_filename t['filepath'] = thumb_filename

View File

@ -14,7 +14,6 @@
import re import re
import traceback import traceback
from .compat import compat_os_name
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError
from .downloader.external import get_external_downloader from .downloader.external import get_external_downloader
from .extractor import list_extractor_classes from .extractor import list_extractor_classes
@ -44,7 +43,6 @@
GeoUtils, GeoUtils,
PlaylistEntries, PlaylistEntries,
SameFileError, SameFileError,
decodeOption,
download_range_func, download_range_func,
expand_path, expand_path,
float_or_none, float_or_none,
@ -883,8 +881,8 @@ def parse_options(argv=None):
'listsubtitles': opts.listsubtitles, 'listsubtitles': opts.listsubtitles,
'subtitlesformat': opts.subtitlesformat, 'subtitlesformat': opts.subtitlesformat,
'subtitleslangs': opts.subtitleslangs, 'subtitleslangs': opts.subtitleslangs,
'matchtitle': decodeOption(opts.matchtitle), 'matchtitle': opts.matchtitle,
'rejecttitle': decodeOption(opts.rejecttitle), 'rejecttitle': opts.rejecttitle,
'max_downloads': opts.max_downloads, 'max_downloads': opts.max_downloads,
'prefer_free_formats': opts.prefer_free_formats, 'prefer_free_formats': opts.prefer_free_formats,
'trim_file_name': opts.trim_file_name, 'trim_file_name': opts.trim_file_name,
@ -1053,7 +1051,7 @@ def make_row(target, handler):
ydl.warn_if_short_id(args) ydl.warn_if_short_id(args)
# Show a useful error message and wait for keypress if not launched from shell on Windows # Show a useful error message and wait for keypress if not launched from shell on Windows
if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False): if not args and os.name == 'nt' and getattr(sys, 'frozen', False):
import ctypes.wintypes import ctypes.wintypes
import msvcrt import msvcrt
@ -1064,7 +1062,7 @@ def make_row(target, handler):
# If we only have a single process attached, then the executable was double clicked # If we only have a single process attached, then the executable was double clicked
# When using `pyinstaller` with `--onefile`, two processes get attached # When using `pyinstaller` with `--onefile`, two processes get attached
is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI') is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI')
if attached_processes == 1 or is_onefile and attached_processes == 2: if attached_processes == 1 or (is_onefile and attached_processes == 2):
print(parser._generate_error_message( print(parser._generate_error_message(
'Do not double-click the executable, instead call it from a command line.\n' 'Do not double-click the executable, instead call it from a command line.\n'
'Please read the README for further information on how to use yt-dlp: ' 'Please read the README for further information on how to use yt-dlp: '
@ -1111,9 +1109,9 @@ def main(argv=None):
from .extractor import gen_extractors, list_extractors from .extractor import gen_extractors, list_extractors
__all__ = [ __all__ = [
'main',
'YoutubeDL', 'YoutubeDL',
'parse_options',
'gen_extractors', 'gen_extractors',
'list_extractors', 'list_extractors',
'main',
'parse_options',
] ]

View File

@ -3,7 +3,6 @@
from .compat import compat_ord from .compat import compat_ord
from .dependencies import Cryptodome from .dependencies import Cryptodome
from .utils import bytes_to_intlist, intlist_to_bytes
if Cryptodome.AES: if Cryptodome.AES:
def aes_cbc_decrypt_bytes(data, key, iv): def aes_cbc_decrypt_bytes(data, key, iv):
@ -17,15 +16,15 @@ def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
else: else:
def aes_cbc_decrypt_bytes(data, key, iv): def aes_cbc_decrypt_bytes(data, key, iv):
""" Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """ """ Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """
return intlist_to_bytes(aes_cbc_decrypt(*map(bytes_to_intlist, (data, key, iv)))) return bytes(aes_cbc_decrypt(*map(list, (data, key, iv))))
def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
""" Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """ """ Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """
return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce)))) return bytes(aes_gcm_decrypt_and_verify(*map(list, (data, key, tag, nonce))))
def aes_cbc_encrypt_bytes(data, key, iv, **kwargs): def aes_cbc_encrypt_bytes(data, key, iv, **kwargs):
return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs)) return bytes(aes_cbc_encrypt(*map(list, (data, key, iv)), **kwargs))
BLOCK_SIZE_BYTES = 16 BLOCK_SIZE_BYTES = 16
@ -221,7 +220,7 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
j0 = [*nonce, 0, 0, 0, 1] j0 = [*nonce, 0, 0, 0, 1]
else: else:
fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8 fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8
ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big')) ghash_in = nonce + [0] * fill + list((8 * len(nonce)).to_bytes(8, 'big'))
j0 = ghash(hash_subkey, ghash_in) j0 = ghash(hash_subkey, ghash_in)
# TODO: add nonce support to aes_ctr_decrypt # TODO: add nonce support to aes_ctr_decrypt
@ -234,9 +233,9 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
s_tag = ghash( s_tag = ghash(
hash_subkey, hash_subkey,
data data
+ [0] * pad_len # pad + [0] * pad_len # pad
+ bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data + list((0 * 8).to_bytes(8, 'big') # length of associated data
+ ((len(data) * 8).to_bytes(8, 'big'))), # length of data + ((len(data) * 8).to_bytes(8, 'big'))), # length of data
) )
if tag != aes_ctr_encrypt(s_tag, key, j0): if tag != aes_ctr_encrypt(s_tag, key, j0):
@ -300,8 +299,8 @@ def aes_decrypt_text(data, password, key_size_bytes):
""" """
NONCE_LENGTH_BYTES = 8 NONCE_LENGTH_BYTES = 8
data = bytes_to_intlist(base64.b64decode(data)) data = list(base64.b64decode(data))
password = bytes_to_intlist(password.encode()) password = list(password.encode())
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
@ -310,7 +309,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
cipher = data[NONCE_LENGTH_BYTES:] cipher = data[NONCE_LENGTH_BYTES:]
decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)) decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES))
return intlist_to_bytes(decrypted_data) return bytes(decrypted_data)
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
@ -535,19 +534,17 @@ def ghash(subkey, data):
__all__ = [ __all__ = [
'aes_cbc_decrypt', 'aes_cbc_decrypt',
'aes_cbc_decrypt_bytes', 'aes_cbc_decrypt_bytes',
'aes_ctr_decrypt',
'aes_decrypt_text',
'aes_decrypt',
'aes_ecb_decrypt',
'aes_gcm_decrypt_and_verify',
'aes_gcm_decrypt_and_verify_bytes',
'aes_cbc_encrypt', 'aes_cbc_encrypt',
'aes_cbc_encrypt_bytes', 'aes_cbc_encrypt_bytes',
'aes_ctr_decrypt',
'aes_ctr_encrypt', 'aes_ctr_encrypt',
'aes_decrypt',
'aes_decrypt_text',
'aes_ecb_decrypt',
'aes_ecb_encrypt', 'aes_ecb_encrypt',
'aes_encrypt', 'aes_encrypt',
'aes_gcm_decrypt_and_verify',
'aes_gcm_decrypt_and_verify_bytes',
'key_expansion', 'key_expansion',
'pad_block', 'pad_block',
'pkcs7_padding', 'pkcs7_padding',

View File

@ -1,5 +1,4 @@
import os import os
import sys
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
from .compat_utils import passthrough_module from .compat_utils import passthrough_module
@ -24,33 +23,14 @@ def compat_etree_fromstring(text):
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
compat_os_name = os._name if os.name == 'java' else os.name
def compat_shlex_quote(s):
from ..utils import shell_quote
return shell_quote(s)
def compat_ord(c): def compat_ord(c):
return c if isinstance(c, int) else ord(c) return c if isinstance(c, int) else ord(c)
if compat_os_name == 'nt' and sys.version_info < (3, 8):
# os.path.realpath on Windows does not follow symbolic links
# prior to Python 3.8 (see https://bugs.python.org/issue9949)
def compat_realpath(path):
while os.path.islink(path):
path = os.path.abspath(os.readlink(path))
return os.path.realpath(path)
else:
compat_realpath = os.path.realpath
# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl # Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl
# See https://github.com/yt-dlp/yt-dlp/issues/792 # See https://github.com/yt-dlp/yt-dlp/issues/792
# https://docs.python.org/3/library/os.path.html#os.path.expanduser # https://docs.python.org/3/library/os.path.html#os.path.expanduser
if compat_os_name in ('nt', 'ce'): if os.name in ('nt', 'ce'):
def compat_expanduser(path): def compat_expanduser(path):
HOME = os.environ.get('HOME') HOME = os.environ.get('HOME')
if not HOME: if not HOME:

View File

@ -8,16 +8,14 @@
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6)) DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
del passthrough_module del passthrough_module
import base64 import functools # noqa: F401
import urllib.error import os
import urllib.parse
compat_str = str
compat_b64decode = base64.b64decode compat_os_name = os.name
compat_realpath = os.path.realpath
compat_urlparse = urllib.parse
compat_parse_qs = urllib.parse.parse_qs def compat_shlex_quote(s):
compat_urllib_parse_unquote = urllib.parse.unquote from ..utils import shell_quote
compat_urllib_parse_urlencode = urllib.parse.urlencode return shell_quote(s)
compat_urllib_parse_urlparse = urllib.parse.urlparse

View File

@ -30,7 +30,7 @@
from re import Pattern as compat_Pattern # noqa: F401 from re import Pattern as compat_Pattern # noqa: F401
from re import match as compat_Match # noqa: F401 from re import match as compat_Match # noqa: F401
from . import compat_expanduser, compat_HTMLParseError, compat_realpath from . import compat_expanduser, compat_HTMLParseError
from .compat_utils import passthrough_module from .compat_utils import passthrough_module
from ..dependencies import brotli as compat_brotli # noqa: F401 from ..dependencies import brotli as compat_brotli # noqa: F401
from ..dependencies import websockets as compat_websockets # noqa: F401 from ..dependencies import websockets as compat_websockets # noqa: F401
@ -78,7 +78,7 @@ def compat_setenv(key, value, env=os.environ):
compat_map = map compat_map = map
compat_numeric_types = (int, float, complex) compat_numeric_types = (int, float, complex)
compat_os_path_expanduser = compat_expanduser compat_os_path_expanduser = compat_expanduser
compat_os_path_realpath = compat_realpath compat_os_path_realpath = os.path.realpath
compat_print = print compat_print = print
compat_shlex_split = shlex.split compat_shlex_split = shlex.split
compat_socket_create_connection = socket.create_connection compat_socket_create_connection = socket.create_connection
@ -104,5 +104,12 @@ def compat_setenv(key, value, env=os.environ):
compat_xpath = lambda xpath: xpath compat_xpath = lambda xpath: xpath
compat_zip = zip compat_zip = zip
workaround_optparse_bug9161 = lambda: None workaround_optparse_bug9161 = lambda: None
compat_str = str
compat_b64decode = base64.b64decode
compat_urlparse = urllib.parse
compat_parse_qs = urllib.parse.parse_qs
compat_urllib_parse_unquote = urllib.parse.unquote
compat_urllib_parse_urlencode = urllib.parse.urlencode
compat_urllib_parse_urlparse = urllib.parse.urlparse
legacy = [] legacy = []

View File

@ -1,7 +0,0 @@
# flake8: noqa: F405
from functools import * # noqa: F403
from .compat_utils import passthrough_module
passthrough_module(__name__, 'functools')
del passthrough_module

View File

@ -7,9 +7,9 @@
del passthrough_module del passthrough_module
from .. import compat_os_name import os
if compat_os_name == 'nt': if os.name == 'nt':
# On older Python versions, proxies are extracted from Windows registry erroneously. [1] # On older Python versions, proxies are extracted from Windows registry erroneously. [1]
# If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2] # If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2]
# It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade # It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade
@ -37,4 +37,4 @@ def getproxies_registry_patched():
def getproxies(): def getproxies():
return getproxies_environment() or getproxies_registry_patched() return getproxies_environment() or getproxies_registry_patched()
del compat_os_name del os

View File

@ -25,7 +25,6 @@
aes_gcm_decrypt_and_verify_bytes, aes_gcm_decrypt_and_verify_bytes,
unpad_pkcs7, unpad_pkcs7,
) )
from .compat import compat_os_name
from .dependencies import ( from .dependencies import (
_SECRETSTORAGE_UNAVAILABLE_REASON, _SECRETSTORAGE_UNAVAILABLE_REASON,
secretstorage, secretstorage,
@ -196,7 +195,10 @@ def _extract_firefox_cookies(profile, container, logger):
def _firefox_browser_dirs(): def _firefox_browser_dirs():
if sys.platform in ('cygwin', 'win32'): if sys.platform in ('cygwin', 'win32'):
yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles') yield from map(os.path.expandvars, (
R'%APPDATA%\Mozilla\Firefox\Profiles',
R'%LOCALAPPDATA%\Packages\Mozilla.Firefox_n80bbvh6b1yt2\LocalCache\Roaming\Mozilla\Firefox\Profiles',
))
elif sys.platform == 'darwin': elif sys.platform == 'darwin':
yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles') yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
@ -343,7 +345,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
logger.debug(f'cookie version breakdown: {counts}') logger.debug(f'cookie version breakdown: {counts}')
return jar return jar
except PermissionError as error: except PermissionError as error:
if compat_os_name == 'nt' and error.errno == 13: if os.name == 'nt' and error.errno == 13:
message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info' message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
logger.error(message) logger.error(message)
raise DownloadError(message) # force exit raise DownloadError(message) # force exit
@ -1277,8 +1279,8 @@ def open(self, file, *, write=False):
def _really_save(self, f, ignore_discard, ignore_expires): def _really_save(self, f, ignore_discard, ignore_expires):
now = time.time() now = time.time()
for cookie in self: for cookie in self:
if (not ignore_discard and cookie.discard if ((not ignore_discard and cookie.discard)
or not ignore_expires and cookie.is_expired(now)): or (not ignore_expires and cookie.is_expired(now))):
continue continue
name, value = cookie.name, cookie.value name, value = cookie.name, cookie.value
if value is None: if value is None:

View File

@ -24,7 +24,7 @@
from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401 from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401
from Crypto.Hash import CMAC, SHA1 # noqa: F401 from Crypto.Hash import CMAC, SHA1 # noqa: F401
from Crypto.PublicKey import RSA # noqa: F401 from Crypto.PublicKey import RSA # noqa: F401
except ImportError: except (ImportError, OSError):
__version__ = f'broken {__version__}'.strip() __version__ = f'broken {__version__}'.strip()

View File

@ -20,9 +20,7 @@
Namespace, Namespace,
RetryManager, RetryManager,
classproperty, classproperty,
decodeArgument,
deprecation_warning, deprecation_warning,
encodeFilename,
format_bytes, format_bytes,
join_nonempty, join_nonempty,
parse_bytes, parse_bytes,
@ -219,7 +217,7 @@ def slow_down(self, start_time, now, byte_counter):
def temp_name(self, filename): def temp_name(self, filename):
"""Returns a temporary filename for the given filename.""" """Returns a temporary filename for the given filename."""
if self.params.get('nopart', False) or filename == '-' or \ if self.params.get('nopart', False) or filename == '-' or \
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): (os.path.exists(filename) and not os.path.isfile(filename)):
return filename return filename
return filename + '.part' return filename + '.part'
@ -273,7 +271,7 @@ def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file.""" """Try to set the last-modified time of the given file."""
if last_modified_hdr is None: if last_modified_hdr is None:
return return
if not os.path.isfile(encodeFilename(filename)): if not os.path.isfile(filename):
return return
timestr = last_modified_hdr timestr = last_modified_hdr
if timestr is None: if timestr is None:
@ -432,13 +430,13 @@ def download(self, filename, info_dict, subtitle=False):
""" """
nooverwrites_and_exists = ( nooverwrites_and_exists = (
not self.params.get('overwrites', True) not self.params.get('overwrites', True)
and os.path.exists(encodeFilename(filename)) and os.path.exists(filename)
) )
if not hasattr(filename, 'write'): if not hasattr(filename, 'write'):
continuedl_and_exists = ( continuedl_and_exists = (
self.params.get('continuedl', True) self.params.get('continuedl', True)
and os.path.isfile(encodeFilename(filename)) and os.path.isfile(filename)
and not self.params.get('nopart', False) and not self.params.get('nopart', False)
) )
@ -448,7 +446,7 @@ def download(self, filename, info_dict, subtitle=False):
self._hook_progress({ self._hook_progress({
'filename': filename, 'filename': filename,
'status': 'finished', 'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)), 'total_bytes': os.path.getsize(filename),
}, info_dict) }, info_dict)
self._finish_multiline_status() self._finish_multiline_status()
return True, False return True, False
@ -489,9 +487,7 @@ def _debug_cmd(self, args, exe=None):
if not self.params.get('verbose', False): if not self.params.get('verbose', False):
return return
str_args = [decodeArgument(a) for a in args]
if exe is None: if exe is None:
exe = os.path.basename(str_args[0]) exe = os.path.basename(args[0])
self.write_debug(f'{exe} command line: {shell_quote(str_args)}') self.write_debug(f'{exe} command line: {shell_quote(args)}')

View File

@ -23,7 +23,6 @@
cli_valueless_option, cli_valueless_option,
determine_ext, determine_ext,
encodeArgument, encodeArgument,
encodeFilename,
find_available_port, find_available_port,
remove_end, remove_end,
traverse_obj, traverse_obj,
@ -67,7 +66,7 @@ def real_download(self, filename, info_dict):
'elapsed': time.time() - started, 'elapsed': time.time() - started,
} }
if filename != '-': if filename != '-':
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(tmpfilename)
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
status.update({ status.update({
'downloaded_bytes': fsize, 'downloaded_bytes': fsize,
@ -184,9 +183,9 @@ def _call_downloader(self, tmpfilename, info_dict):
dest.write(decrypt_fragment(fragment, src.read())) dest.write(decrypt_fragment(fragment, src.read()))
src.close() src.close()
if not self.params.get('keep_fragments', False): if not self.params.get('keep_fragments', False):
self.try_remove(encodeFilename(fragment_filename)) self.try_remove(fragment_filename)
dest.close() dest.close()
self.try_remove(encodeFilename(f'{tmpfilename}.frag.urls')) self.try_remove(f'{tmpfilename}.frag.urls')
return 0 return 0
def _call_process(self, cmd, info_dict): def _call_process(self, cmd, info_dict):
@ -620,7 +619,7 @@ def _call_downloader(self, tmpfilename, info_dict):
args += self._configuration_args(('_o1', '_o', '')) args += self._configuration_args(('_o1', '_o', ''))
args = [encodeArgument(opt) for opt in args] args = [encodeArgument(opt) for opt in args]
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) args.append(ffpp._ffmpeg_filename_argument(tmpfilename))
self._debug_cmd(args) self._debug_cmd(args)
piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats) piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats)

View File

@ -9,10 +9,9 @@
from .common import FileDownloader from .common import FileDownloader
from .http import HttpFD from .http import HttpFD
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import compat_os_name
from ..networking import Request from ..networking import Request
from ..networking.exceptions import HTTPError, IncompleteRead from ..networking.exceptions import HTTPError, IncompleteRead
from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj from ..utils import DownloadError, RetryManager, traverse_obj
from ..utils.networking import HTTPHeaderDict from ..utils.networking import HTTPHeaderDict
from ..utils.progress import ProgressCalculator from ..utils.progress import ProgressCalculator
@ -152,7 +151,7 @@ def _append_fragment(self, ctx, frag_content):
if self.__do_ytdl_file(ctx): if self.__do_ytdl_file(ctx):
self._write_ytdl_file(ctx) self._write_ytdl_file(ctx)
if not self.params.get('keep_fragments', False): if not self.params.get('keep_fragments', False):
self.try_remove(encodeFilename(ctx['fragment_filename_sanitized'])) self.try_remove(ctx['fragment_filename_sanitized'])
del ctx['fragment_filename_sanitized'] del ctx['fragment_filename_sanitized']
def _prepare_frag_download(self, ctx): def _prepare_frag_download(self, ctx):
@ -188,7 +187,7 @@ def _prepare_frag_download(self, ctx):
}) })
if self.__do_ytdl_file(ctx): if self.__do_ytdl_file(ctx):
ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))) ytdl_file_exists = os.path.isfile(self.ytdl_filename(ctx['filename']))
continuedl = self.params.get('continuedl', True) continuedl = self.params.get('continuedl', True)
if continuedl and ytdl_file_exists: if continuedl and ytdl_file_exists:
self._read_ytdl_file(ctx) self._read_ytdl_file(ctx)
@ -390,7 +389,7 @@ class FTPE(concurrent.futures.ThreadPoolExecutor):
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
pass pass
if compat_os_name == 'nt': if os.name == 'nt':
def future_result(future): def future_result(future):
while True: while True:
try: try:

View File

@ -119,12 +119,12 @@ def real_download(self, filename, info_dict):
self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}')
def is_ad_fragment_start(s): def is_ad_fragment_start(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s)
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')))
def is_ad_fragment_end(s): def is_ad_fragment_end(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s)
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')))
fragments = [] fragments = []

View File

@ -15,7 +15,6 @@
ThrottledDownload, ThrottledDownload,
XAttrMetadataError, XAttrMetadataError,
XAttrUnavailableError, XAttrUnavailableError,
encodeFilename,
int_or_none, int_or_none,
parse_http_range, parse_http_range,
try_call, try_call,
@ -58,9 +57,8 @@ class DownloadContext(dict):
if self.params.get('continuedl', True): if self.params.get('continuedl', True):
# Establish possible resume length # Establish possible resume length
if os.path.isfile(encodeFilename(ctx.tmpfilename)): if os.path.isfile(ctx.tmpfilename):
ctx.resume_len = os.path.getsize( ctx.resume_len = os.path.getsize(ctx.tmpfilename)
encodeFilename(ctx.tmpfilename))
ctx.is_resume = ctx.resume_len > 0 ctx.is_resume = ctx.resume_len > 0
@ -241,7 +239,7 @@ def retry(e):
ctx.resume_len = byte_counter ctx.resume_len = byte_counter
else: else:
try: try:
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename)) ctx.resume_len = os.path.getsize(ctx.tmpfilename)
except FileNotFoundError: except FileNotFoundError:
ctx.resume_len = 0 ctx.resume_len = 0
raise RetryDownload(e) raise RetryDownload(e)

View File

@ -8,7 +8,6 @@
Popen, Popen,
check_executable, check_executable,
encodeArgument, encodeArgument,
encodeFilename,
get_exe_version, get_exe_version,
) )
@ -179,7 +178,7 @@ def run_rtmpdump(args):
return False return False
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
prevsize = os.path.getsize(encodeFilename(tmpfilename)) prevsize = os.path.getsize(tmpfilename)
self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes') self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes')
time.sleep(5.0) # This seems to be needed time.sleep(5.0) # This seems to be needed
args = [*basic_args, '--resume'] args = [*basic_args, '--resume']
@ -187,7 +186,7 @@ def run_rtmpdump(args):
args += ['--skip', '1'] args += ['--skip', '1']
args = [encodeArgument(a) for a in args] args = [encodeArgument(a) for a in args]
retval = run_rtmpdump(args) retval = run_rtmpdump(args)
cursize = os.path.getsize(encodeFilename(tmpfilename)) cursize = os.path.getsize(tmpfilename)
if prevsize == cursize and retval == RD_FAILED: if prevsize == cursize and retval == RD_FAILED:
break break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
@ -196,7 +195,7 @@ def run_rtmpdump(args):
retval = RD_SUCCESS retval = RD_SUCCESS
break break
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(tmpfilename)
self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes') self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes')
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({ self._hook_progress({

View File

@ -2,7 +2,7 @@
import subprocess import subprocess
from .common import FileDownloader from .common import FileDownloader
from ..utils import check_executable, encodeFilename from ..utils import check_executable
class RtspFD(FileDownloader): class RtspFD(FileDownloader):
@ -26,7 +26,7 @@ def real_download(self, filename, info_dict):
retval = subprocess.call(args) retval = subprocess.call(args)
if retval == 0: if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(tmpfilename)
self.to_screen(f'\r[{args[0]}] {fsize} bytes') self.to_screen(f'\r[{args[0]}] {fsize} bytes')
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({ self._hook_progress({

View File

@ -123,8 +123,8 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
data, data,
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live func = ((info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live)
or frag_index == 1 and try_refresh_replay_beginning or (frag_index == 1 and try_refresh_replay_beginning)
or parse_actions_replay) or parse_actions_replay)
return (True, *func(live_chat_continuation)) return (True, *func(live_chat_continuation))
except HTTPError as err: except HTTPError as err:

View File

@ -208,6 +208,10 @@
BandcampUserIE, BandcampUserIE,
BandcampWeeklyIE, BandcampWeeklyIE,
) )
from .bandlab import (
BandlabIE,
BandlabPlaylistIE,
)
from .bannedvideo import BannedVideoIE from .bannedvideo import BannedVideoIE
from .bbc import ( from .bbc import (
BBCIE, BBCIE,
@ -708,6 +712,7 @@
GabTVIE, GabTVIE,
) )
from .gaia import GaiaIE from .gaia import GaiaIE
from .gamedevtv import GameDevTVDashboardIE
from .gamejolt import ( from .gamejolt import (
GameJoltCommunityIE, GameJoltCommunityIE,
GameJoltGameIE, GameJoltGameIE,
@ -941,6 +946,10 @@
from .kankanews import KankaNewsIE from .kankanews import KankaNewsIE
from .karaoketv import KaraoketvIE from .karaoketv import KaraoketvIE
from .kelbyone import KelbyOneIE from .kelbyone import KelbyOneIE
from .kenh14 import (
Kenh14PlaylistIE,
Kenh14VideoIE,
)
from .khanacademy import ( from .khanacademy import (
KhanAcademyIE, KhanAcademyIE,
KhanAcademyUnitIE, KhanAcademyUnitIE,
@ -1130,12 +1139,6 @@
MicrosoftMediusIE, MicrosoftMediusIE,
) )
from .microsoftstream import MicrosoftStreamIE from .microsoftstream import MicrosoftStreamIE
from .mildom import (
MildomClipIE,
MildomIE,
MildomUserVodIE,
MildomVodIE,
)
from .minds import ( from .minds import (
MindsChannelIE, MindsChannelIE,
MindsGroupIE, MindsGroupIE,
@ -1155,6 +1158,7 @@
from .mixch import ( from .mixch import (
MixchArchiveIE, MixchArchiveIE,
MixchIE, MixchIE,
MixchMovieIE,
) )
from .mixcloud import ( from .mixcloud import (
MixcloudIE, MixcloudIE,
@ -1516,8 +1520,8 @@
from .philharmoniedeparis import PhilharmonieDeParisIE from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
from .pialive import PiaLiveIE
from .piapro import PiaproIE from .piapro import PiaproIE
from .piaulizaportal import PIAULIZAPortalIE
from .picarto import ( from .picarto import (
PicartoIE, PicartoIE,
PicartoVodIE, PicartoVodIE,
@ -1553,10 +1557,6 @@
) )
from .podchaser import PodchaserIE from .podchaser import PodchaserIE
from .podomatic import PodomaticIE from .podomatic import PodomaticIE
from .pokemon import (
PokemonIE,
PokemonWatchIE,
)
from .pokergo import ( from .pokergo import (
PokerGoCollectionIE, PokerGoCollectionIE,
PokerGoIE, PokerGoIE,
@ -1647,6 +1647,7 @@
RadioKapitalIE, RadioKapitalIE,
RadioKapitalShowIE, RadioKapitalShowIE,
) )
from .radioradicale import RadioRadicaleIE
from .radiozet import RadioZetPodcastIE from .radiozet import RadioZetPodcastIE
from .radlive import ( from .radlive import (
RadLiveChannelIE, RadLiveChannelIE,
@ -1938,9 +1939,7 @@
) )
from .spreaker import ( from .spreaker import (
SpreakerIE, SpreakerIE,
SpreakerPageIE,
SpreakerShowIE, SpreakerShowIE,
SpreakerShowPageIE,
) )
from .springboardplatform import SpringboardPlatformIE from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE from .sprout import SproutIE
@ -2251,6 +2250,10 @@
) )
from .ukcolumn import UkColumnIE from .ukcolumn import UkColumnIE
from .uktvplay import UKTVPlayIE from .uktvplay import UKTVPlayIE
from .uliza import (
UlizaPlayerIE,
UlizaPortalIE,
)
from .umg import UMGDeIE from .umg import UMGDeIE
from .unistra import UnistraIE from .unistra import UnistraIE
from .unity import UnityIE from .unity import UnityIE
@ -2279,10 +2282,6 @@
from .varzesh3 import Varzesh3IE from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .veo import VeoIE from .veo import VeoIE
from .veoh import (
VeohIE,
VeohUserIE,
)
from .vesti import VestiIE from .vesti import VestiIE
from .vevo import ( from .vevo import (
VevoIE, VevoIE,

View File

@ -6,7 +6,6 @@
import io import io
import json import json
import re import re
import struct
import time import time
import urllib.parse import urllib.parse
import uuid import uuid
@ -18,10 +17,8 @@
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
bytes_to_intlist,
decode_base_n, decode_base_n,
int_or_none, int_or_none,
intlist_to_bytes,
time_seconds, time_seconds,
traverse_obj, traverse_obj,
update_url_query, update_url_query,
@ -72,15 +69,15 @@ def _get_videokey_from_ticket(self, ticket):
}) })
res = decode_base_n(license_response['k'], table=self._STRTABLE) res = decode_base_n(license_response['k'], table=self._STRTABLE)
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff)) encvideokey = list(res.to_bytes(16, 'big'))
h = hmac.new( h = hmac.new(
binascii.unhexlify(self._HKEY), binascii.unhexlify(self._HKEY),
(license_response['cid'] + self.ie._DEVICE_ID).encode(), (license_response['cid'] + self.ie._DEVICE_ID).encode(),
digestmod=hashlib.sha256) digestmod=hashlib.sha256)
enckey = bytes_to_intlist(h.digest()) enckey = list(h.digest())
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey)) return bytes(aes_ecb_decrypt(encvideokey, enckey))
class AbemaTVBaseIE(InfoExtractor): class AbemaTVBaseIE(InfoExtractor):

View File

@ -11,11 +11,9 @@
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
ass_subtitles_timecode, ass_subtitles_timecode,
bytes_to_intlist,
bytes_to_long, bytes_to_long,
float_or_none, float_or_none,
int_or_none, int_or_none,
intlist_to_bytes,
join_nonempty, join_nonempty,
long_to_bytes, long_to_bytes,
parse_iso8601, parse_iso8601,
@ -198,16 +196,16 @@ def _real_extract(self, url):
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link') links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
self._K = ''.join(random.choices('0123456789abcdef', k=16)) self._K = ''.join(random.choices('0123456789abcdef', k=16))
message = bytes_to_intlist(json.dumps({ message = list(json.dumps({
'k': self._K, 'k': self._K,
't': token, 't': token,
})) }).encode())
# Sometimes authentication fails for no good reason, retry with # Sometimes authentication fails for no good reason, retry with
# a different random padding # a different random padding
links_data = None links_data = None
for _ in range(3): for _ in range(3):
padded_message = intlist_to_bytes(pkcs1pad(message, 128)) padded_message = bytes(pkcs1pad(message, 128))
n, e = self._RSA_KEY n, e = self._RSA_KEY
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n)) encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
authorization = base64.b64encode(encrypted_message).decode() authorization = base64.b64encode(encrypted_message).decode()
@ -234,7 +232,7 @@ def _real_extract(self, url):
error = self._parse_json(e.cause.response.read(), video_id) error = self._parse_json(e.cause.response.read(), video_id)
message = error.get('message') message = error.get('message')
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country': if e.cause.status == 403 and error.get('code') == 'player-bad-geolocation-country':
self.raise_geo_restricted(msg=message) self.raise_geo_restricted(msg=message)
raise ExtractorError(message) raise ExtractorError(message)
else: else:

View File

@ -1362,7 +1362,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
def _download_webpage_handle(self, *args, **kwargs): def _download_webpage_handle(self, *args, **kwargs):
headers = self.geo_verification_headers() headers = self.geo_verification_headers()
headers.update(kwargs.get('headers', {})) headers.update(kwargs.get('headers') or {})
kwargs['headers'] = headers kwargs['headers'] = headers
return super()._download_webpage_handle( return super()._download_webpage_handle(
*args, **kwargs) *args, **kwargs)

View File

@ -66,6 +66,14 @@ def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
extensions={'legacy_ssl': True}), display_id, extensions={'legacy_ssl': True}), display_id,
'Downloading API JSON', 'Unable to download API JSON') 'Downloading API JSON', 'Unable to download API JSON')
@staticmethod
def _fixup_thumb(thumb_url):
if not url_or_none(thumb_url):
return None
# Core would determine_ext as 'php' from the url, so we need to provide the real ext
# See: https://github.com/yt-dlp/yt-dlp/issues/11537
return [{'url': thumb_url, 'ext': 'jpg'}]
class AfreecaTVIE(AfreecaTVBaseIE): class AfreecaTVIE(AfreecaTVBaseIE):
IE_NAME = 'soop' IE_NAME = 'soop'
@ -154,8 +162,8 @@ def _real_extract(self, url):
'title': ('title', {str}), 'title': ('title', {str}),
'uploader': ('writer_nick', {str}), 'uploader': ('writer_nick', {str}),
'uploader_id': ('bj_id', {str}), 'uploader_id': ('bj_id', {str}),
'duration': ('total_file_duration', {functools.partial(int_or_none, scale=1000)}), 'duration': ('total_file_duration', {int_or_none(scale=1000)}),
'thumbnail': ('thumb', {url_or_none}), 'thumbnails': ('thumb', {self._fixup_thumb}),
}) })
entries = [] entries = []
@ -178,7 +186,7 @@ def _real_extract(self, url):
'title': f'{common_info.get("title") or "Untitled"} (part {file_num})', 'title': f'{common_info.get("title") or "Untitled"} (part {file_num})',
'formats': formats, 'formats': formats,
**traverse_obj(file_element, { **traverse_obj(file_element, {
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}), 'duration': ('duration', {int_or_none(scale=1000)}),
'timestamp': ('file_start', {unified_timestamp}), 'timestamp': ('file_start', {unified_timestamp}),
}), }),
}) })
@ -226,19 +234,18 @@ def _real_extract(self, url):
return self.playlist_result(self._entries(data), video_id) return self.playlist_result(self._entries(data), video_id)
@staticmethod def _entries(self, data):
def _entries(data):
# 'files' is always a list with 1 element # 'files' is always a list with 1 element
yield from traverse_obj(data, ( yield from traverse_obj(data, (
'data', lambda _, v: v['story_type'] == 'catch', 'data', lambda _, v: v['story_type'] == 'catch',
'catch_list', lambda _, v: v['files'][0]['file'], { 'catch_list', lambda _, v: v['files'][0]['file'], {
'id': ('files', 0, 'file_info_key', {str}), 'id': ('files', 0, 'file_info_key', {str}),
'url': ('files', 0, 'file', {url_or_none}), 'url': ('files', 0, 'file', {url_or_none}),
'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}), 'duration': ('files', 0, 'duration', {int_or_none(scale=1000)}),
'title': ('title', {str}), 'title': ('title', {str}),
'uploader': ('writer_nick', {str}), 'uploader': ('writer_nick', {str}),
'uploader_id': ('writer_id', {str}), 'uploader_id': ('writer_id', {str}),
'thumbnail': ('thumb', {url_or_none}), 'thumbnails': ('thumb', {self._fixup_thumb}),
'timestamp': ('write_timestamp', {int_or_none}), 'timestamp': ('write_timestamp', {int_or_none}),
})) }))

View File

@ -71,7 +71,7 @@ def media_url_or_none(path):
'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}), 'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}),
'duration': ('clipLength', {int_or_none}), 'duration': ('clipLength', {int_or_none}),
'filesize': ('clipSizeBytes', {int_or_none}), 'filesize': ('clipSizeBytes', {int_or_none}),
'timestamp': ('createdDate', {functools.partial(int_or_none, scale=1000)}), 'timestamp': ('createdDate', {int_or_none(scale=1000)}),
'uploader': ('username', {str}), 'uploader': ('username', {str}),
'uploader_id': ('user', '_id', {str}), 'uploader_id': ('user', '_id', {str}),
'view_count': ('views', {int_or_none}), 'view_count': ('views', {int_or_none}),

View File

@ -8,10 +8,8 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..aes import aes_encrypt from ..aes import aes_encrypt
from ..utils import ( from ..utils import (
bytes_to_intlist,
determine_ext, determine_ext,
int_or_none, int_or_none,
intlist_to_bytes,
join_nonempty, join_nonempty,
smuggle_url, smuggle_url,
strip_jsonp, strip_jsonp,
@ -234,8 +232,8 @@ def _get_video_json(self, access_key, video_id, extracted_token):
server_time = self._server_time(access_key, video_id) server_time = self._server_time(access_key, video_id)
input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}' input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}'
auth_secret = intlist_to_bytes(aes_encrypt( auth_secret = bytes(aes_encrypt(
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY))) list(input_data[:64].encode()), list(self._AUTH_KEY)))
query = { query = {
'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'), 'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'),
'rtyp': 'fp', 'rtyp': 'fp',

View File

@ -205,6 +205,26 @@ class ArchiveOrgIE(InfoExtractor):
}, },
}, },
], ],
}, {
# The reviewbody is None for one of the reviews; just need to extract data without crashing
'url': 'https://archive.org/details/gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
'info_dict': {
'id': 'gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
'ext': 'mp3',
'title': 'Stuck Inside of Mobile with the Memphis Blues Again',
'creators': ['Grateful Dead'],
'duration': 338.31,
'track': 'Stuck Inside of Mobile with the Memphis Blues Again',
'description': 'md5:764348a470b986f1217ffd38d6ac7b72',
'display_id': 'gd95-04-02d1t04.shn',
'location': 'Pyramid Arena',
'uploader': 'jon@archive.org',
'album': '1995-04-02 - Pyramid Arena',
'upload_date': '20040519',
'track_number': 4,
'release_date': '19950402',
'timestamp': 1084927901,
},
}] }]
@staticmethod @staticmethod
@ -335,7 +355,7 @@ def _real_extract(self, url):
info['comments'].append({ info['comments'].append({
'id': review.get('review_id'), 'id': review.get('review_id'),
'author': review.get('reviewer'), 'author': review.get('reviewer'),
'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'), 'text': join_nonempty('reviewtitle', 'reviewbody', from_dict=review, delim='\n\n'),
'timestamp': unified_timestamp(review.get('createdate')), 'timestamp': unified_timestamp(review.get('createdate')),
'parent': 'root'}) 'parent': 'root'})

View File

@ -1,4 +1,3 @@
import functools
import json import json
import random import random
import re import re
@ -10,7 +9,6 @@
ExtractorError, ExtractorError,
extract_attributes, extract_attributes,
float_or_none, float_or_none,
get_element_html_by_id,
int_or_none, int_or_none,
parse_filesize, parse_filesize,
str_or_none, str_or_none,
@ -21,7 +19,7 @@
url_or_none, url_or_none,
urljoin, urljoin,
) )
from ..utils.traversal import traverse_obj from ..utils.traversal import find_element, traverse_obj
class BandcampIE(InfoExtractor): class BandcampIE(InfoExtractor):
@ -45,6 +43,8 @@ class BandcampIE(InfoExtractor):
'uploader_url': 'https://youtube-dl.bandcamp.com', 'uploader_url': 'https://youtube-dl.bandcamp.com',
'uploader_id': 'youtube-dl', 'uploader_id': 'youtube-dl',
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg', 'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
'artists': ['youtube-dl "\'/\\ä↭'],
'album_artists': ['youtube-dl "\'/\\ä↭'],
}, },
'skip': 'There is a limit of 200 free downloads / month for the test song', 'skip': 'There is a limit of 200 free downloads / month for the test song',
}, { }, {
@ -271,6 +271,18 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'timestamp': 1311756226, 'timestamp': 1311756226,
'upload_date': '20110727', 'upload_date': '20110727',
'uploader': 'Blazo', 'uploader': 'Blazo',
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
'album_artists': ['Blazo'],
'uploader_url': 'https://blazo.bandcamp.com',
'release_date': '20110727',
'release_timestamp': 1311724800.0,
'track': 'Intro',
'uploader_id': 'blazo',
'track_number': 1,
'album': 'Jazz Format Mixtape vol.1',
'artists': ['Blazo'],
'duration': 19.335,
'track_id': '1353101989',
}, },
}, },
{ {
@ -282,6 +294,18 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'timestamp': 1311757238, 'timestamp': 1311757238,
'upload_date': '20110727', 'upload_date': '20110727',
'uploader': 'Blazo', 'uploader': 'Blazo',
'track': 'Kero One - Keep It Alive (Blazo remix)',
'release_date': '20110727',
'track_id': '38097443',
'track_number': 2,
'duration': 181.467,
'uploader_url': 'https://blazo.bandcamp.com',
'album': 'Jazz Format Mixtape vol.1',
'uploader_id': 'blazo',
'album_artists': ['Blazo'],
'artists': ['Blazo'],
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
'release_timestamp': 1311724800.0,
}, },
}, },
], ],
@ -289,6 +313,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'title': 'Jazz Format Mixtape vol.1', 'title': 'Jazz Format Mixtape vol.1',
'id': 'jazz-format-mixtape-vol-1', 'id': 'jazz-format-mixtape-vol-1',
'uploader_id': 'blazo', 'uploader_id': 'blazo',
'description': 'md5:38052a93217f3ffdc033cd5dbbce2989',
}, },
'params': { 'params': {
'playlistend': 2, 'playlistend': 2,
@ -363,10 +388,10 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://bandcamp.com/?show=224', 'url': 'https://bandcamp.com/?show=224',
'md5': 'b00df799c733cf7e0c567ed187dea0fd', 'md5': '61acc9a002bed93986b91168aa3ab433',
'info_dict': { 'info_dict': {
'id': '224', 'id': '224',
'ext': 'opus', 'ext': 'mp3',
'title': 'BC Weekly April 4th 2017 - Magic Moments', 'title': 'BC Weekly April 4th 2017 - Magic Moments',
'description': 'md5:5d48150916e8e02d030623a48512c874', 'description': 'md5:5d48150916e8e02d030623a48512c874',
'duration': 5829.77, 'duration': 5829.77,
@ -376,7 +401,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
'episode_id': '224', 'episode_id': '224',
}, },
'params': { 'params': {
'format': 'opus-lo', 'format': 'mp3-128',
}, },
}, { }, {
'url': 'https://bandcamp.com/?blah/blah@&show=228', 'url': 'https://bandcamp.com/?blah/blah@&show=228',
@ -484,7 +509,7 @@ def _yield_items(self, webpage):
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage)) or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
yield from traverse_obj(webpage, ( yield from traverse_obj(webpage, (
{functools.partial(get_element_html_by_id, 'music-grid')}, {extract_attributes}, {find_element(id='music-grid', html=True)}, {extract_attributes},
'data-client-items', {json.loads}, ..., 'page_url', {str})) 'data-client-items', {json.loads}, ..., 'page_url', {str}))
def _real_extract(self, url): def _real_extract(self, url):
@ -493,4 +518,4 @@ def _real_extract(self, url):
return self.playlist_from_matches( return self.playlist_from_matches(
self._yield_items(webpage), uploader, f'Discography of {uploader}', self._yield_items(webpage), uploader, f'Discography of {uploader}',
getter=functools.partial(urljoin, url)) getter=urljoin(url))

437
yt_dlp/extractor/bandlab.py Normal file
View File

@ -0,0 +1,437 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
format_field,
int_or_none,
parse_iso8601,
parse_qs,
truncate_string,
url_or_none,
)
from ..utils.traversal import traverse_obj, value
class BandlabBaseIE(InfoExtractor):
def _call_api(self, endpoint, asset_id, **kwargs):
headers = kwargs.pop('headers', None) or {}
return self._download_json(
f'https://www.bandlab.com/api/v1.3/{endpoint}/{asset_id}',
asset_id, headers={
'accept': 'application/json',
'referer': 'https://www.bandlab.com/',
'x-client-id': 'BandLab-Web',
'x-client-version': '10.1.124',
**headers,
}, **kwargs)
def _parse_revision(self, revision_data, url=None):
return {
'vcodec': 'none',
'media_type': 'revision',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(revision_data, {
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/revision/%s')}), filter, any),
'id': (('revisionId', 'id'), {str}, any),
'title': ('song', 'name', {str}),
'track': ('song', 'name', {str}),
'url': ('mixdown', 'file', {url_or_none}),
'thumbnail': ('song', 'picture', 'url', {url_or_none}),
'description': ('description', {str}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
'duration': ('mixdown', 'duration', {float_or_none}),
'view_count': ('counters', 'plays', {int_or_none}),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'genres': ('genres', ..., 'name', {str}),
}),
}
def _parse_track(self, track_data, url=None):
return {
'vcodec': 'none',
'media_type': 'track',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(track_data, {
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
'id': (('revisionId', 'id'), {str}, any),
'url': ('track', 'sample', 'audioUrl', {url_or_none}),
'title': ('track', 'name', {str}),
'track': ('track', 'name', {str}),
'description': ('caption', {str}),
'thumbnail': ('track', 'picture', ('original', 'url'), {url_or_none}, any),
'view_count': ('counters', 'plays', {int_or_none}),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'duration': ('track', 'sample', 'duration', {float_or_none}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
}),
}
def _parse_video(self, video_data, url=None):
return {
'media_type': 'video',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(video_data, {
'id': ('id', {str}),
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
'url': ('video', 'url', {url_or_none}),
'title': ('caption', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
'description': ('caption', {str}),
'thumbnail': ('video', 'picture', 'url', {url_or_none}),
'view_count': ('video', 'counters', 'plays', {int_or_none}),
'like_count': ('video', 'counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'duration': ('video', 'duration', {float_or_none}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
}),
}
class BandlabIE(BandlabBaseIE):
_VALID_URL = [
r'https?://(?:www\.)?bandlab.com/(?P<url_type>track|post|revision)/(?P<id>[\da-f_-]+)',
r'https?://(?:www\.)?bandlab.com/(?P<url_type>embed)/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
]
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
_TESTS = [{
'url': 'https://www.bandlab.com/track/04b37e88dba24967b9dac8eb8567ff39_07d7f906fc96ee11b75e000d3a428fff',
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
'info_dict': {
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
'ext': 'm4a',
'uploader_id': 'ender_milze',
'track': 'sweet black',
'description': 'composed by juanjn3737',
'timestamp': 1702171963,
'view_count': int,
'like_count': int,
'duration': 54.629999999999995,
'title': 'sweet black',
'upload_date': '20231210',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
'genres': ['Lofi'],
'uploader': 'ender milze',
'comment_count': int,
'media_type': 'revision',
},
}, {
# Same track as above but post URL
'url': 'https://www.bandlab.com/post/07d7f906-fc96-ee11-b75e-000d3a428fff',
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
'info_dict': {
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
'ext': 'm4a',
'uploader_id': 'ender_milze',
'track': 'sweet black',
'description': 'composed by juanjn3737',
'timestamp': 1702171973,
'view_count': int,
'like_count': int,
'duration': 54.629999999999995,
'title': 'sweet black',
'upload_date': '20231210',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
'genres': ['Lofi'],
'uploader': 'ender milze',
'comment_count': int,
'media_type': 'revision',
},
}, {
# SharedKey Example
'url': 'https://www.bandlab.com/track/048916c2-c6da-ee11-85f9-6045bd2e11f9?sharedKey=0NNWX8qYAEmI38lWAzCNDA',
'md5': '15174b57c44440e2a2008be9cae00250',
'info_dict': {
'id': '038916c2-c6da-ee11-85f9-6045bd2e11f9',
'ext': 'm4a',
'comment_count': int,
'genres': ['Other'],
'uploader_id': 'user8353034818103753',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/51b18363-da23-4b9b-a29c-2933a3e561ca/',
'timestamp': 1709625771,
'track': 'PodcastMaerchen4b',
'duration': 468.14,
'view_count': int,
'description': 'Podcast: Neues aus der Märchenwelt',
'like_count': int,
'upload_date': '20240305',
'uploader': 'Erna Wageneder',
'title': 'PodcastMaerchen4b',
'media_type': 'revision',
},
}, {
# Different Revision selected
'url': 'https://www.bandlab.com/track/130343fc-148b-ea11-96d2-0003ffd1fc09?revId=110343fc-148b-ea11-96d2-0003ffd1fc09',
'md5': '74e055ef9325d63f37088772fbfe4454',
'info_dict': {
'id': '110343fc-148b-ea11-96d2-0003ffd1fc09',
'ext': 'm4a',
'timestamp': 1588273294,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/b612e533-e4f7-4542-9f50-3fcfd8dd822c/',
'description': 'Final Revision.',
'title': 'Replay ( Instrumental)',
'uploader': 'David R Sparks',
'uploader_id': 'davesnothome69',
'view_count': int,
'comment_count': int,
'track': 'Replay ( Instrumental)',
'genres': ['Rock'],
'upload_date': '20200430',
'like_count': int,
'duration': 279.43,
'media_type': 'revision',
},
}, {
# Video
'url': 'https://www.bandlab.com/post/5cdf9036-3857-ef11-991a-6045bd36e0d9',
'md5': '8caa2ef28e86c1dacf167293cfdbeba9',
'info_dict': {
'id': '5cdf9036-3857-ef11-991a-6045bd36e0d9',
'ext': 'mp4',
'duration': 44.705,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/videos/67c6cef1-cef6-40d3-831e-a55bc1dcb972/',
'comment_count': int,
'title': 'backing vocals',
'uploader_id': 'marliashya',
'uploader': 'auraa',
'like_count': int,
'description': 'backing vocals',
'media_type': 'video',
},
}, {
# Embed Example
'url': 'https://www.bandlab.com/embed/?blur=false&id=014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'md5': 'a4ad05cb68c54faaed9b0a8453a8cf4a',
'info_dict': {
'id': '014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'ext': 'm4a',
'comment_count': int,
'genres': ['Electronic'],
'uploader': 'Charlie Henson',
'timestamp': 1587328674,
'upload_date': '20200419',
'view_count': int,
'track': 'Positronic Meltdown',
'duration': 318.55,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/87165bc3-5439-496e-b1f7-a9f13b541ff2/',
'description': 'Checkout my tracks at AOMX http://aomxsounds.com/',
'uploader_id': 'microfreaks',
'title': 'Positronic Meltdown',
'like_count': int,
'media_type': 'revision',
},
}, {
# Track without revisions available
'url': 'https://www.bandlab.com/track/55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
'md5': 'f05d68a3769952c2d9257c473e14c15f',
'info_dict': {
'id': '55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
'ext': 'm4a',
'track': 'insame',
'like_count': int,
'duration': 84.03,
'title': 'insame',
'view_count': int,
'comment_count': int,
'uploader': 'Sorakime',
'uploader_id': 'sorakime',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/572a351a-0f3a-4c6a-ac39-1a5defdeeb1c/',
'timestamp': 1691162128,
'upload_date': '20230804',
'media_type': 'track',
},
}, {
'url': 'https://www.bandlab.com/revision/014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://phantomluigi.github.io/',
'info_dict': {
'id': 'e14223c3-7871-ef11-bdfd-000d3a980db3',
'ext': 'm4a',
'view_count': int,
'upload_date': '20240913',
'uploader_id': 'phantommusicofficial',
'timestamp': 1726194897,
'uploader': 'Phantom',
'comment_count': int,
'genres': ['Progresive Rock'],
'description': 'md5:a38cd668f7a2843295ef284114f18429',
'duration': 225.23,
'like_count': int,
'title': 'Vermilion Pt. 2 (Cover)',
'track': 'Vermilion Pt. 2 (Cover)',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/62b10750-7aef-4f42-ad08-1af52f577e97/',
'media_type': 'revision',
},
}]
def _real_extract(self, url):
display_id, url_type = self._match_valid_url(url).group('id', 'url_type')
qs = parse_qs(url)
revision_id = traverse_obj(qs, (('revId', 'id'), 0, any))
if url_type == 'revision':
revision_id = display_id
revision_data = None
if not revision_id:
post_data = self._call_api(
'posts', display_id, note='Downloading post data',
query=traverse_obj(qs, {'sharedKey': ('sharedKey', 0)}))
revision_id = traverse_obj(post_data, (('revisionId', ('revision', 'id')), {str}, any))
revision_data = traverse_obj(post_data, ('revision', {dict}))
if not revision_data and not revision_id:
post_type = post_data.get('type')
if post_type == 'Video':
return self._parse_video(post_data, url=url)
if post_type == 'Track':
return self._parse_track(post_data, url=url)
raise ExtractorError(f'Could not extract data for post type {post_type!r}')
if not revision_data:
revision_data = self._call_api(
'revisions', revision_id, note='Downloading revision data', query={'edit': 'false'})
return self._parse_revision(revision_data, url=url)
class BandlabPlaylistIE(BandlabBaseIE):
_VALID_URL = [
r'https?://(?:www\.)?bandlab.com/(?:[\w]+/)?(?P<type>albums|collections)/(?P<id>[\da-f-]+)',
r'https?://(?:www\.)?bandlab.com/(?P<type>embed)/collection/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
]
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
_TESTS = [{
'url': 'https://www.bandlab.com/davesnothome69/albums/89b79ea6-de42-ed11-b495-00224845aac7',
'info_dict': {
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/69507ff3-579a-45be-afca-9e87eddec944/',
'release_date': '20221003',
'title': 'Remnants',
'album': 'Remnants',
'like_count': int,
'album_type': 'LP',
'description': 'A collection of some feel good, rock hits.',
'comment_count': int,
'view_count': int,
'id': '89b79ea6-de42-ed11-b495-00224845aac7',
'uploader': 'David R Sparks',
'uploader_id': 'davesnothome69',
},
'playlist_count': 10,
}, {
'url': 'https://www.bandlab.com/slytheband/collections/955102d4-1040-ef11-86c3-000d3a42581b',
'info_dict': {
'id': '955102d4-1040-ef11-86c3-000d3a42581b',
'timestamp': 1720762659,
'view_count': int,
'title': 'My Shit 🖤',
'uploader_id': 'slytheband',
'uploader': '𝓢𝓛𝓨',
'upload_date': '20240712',
'like_count': int,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/collections/2c64ca12-b180-4b76-8587-7a8da76bddc8/',
},
'playlist_count': 15,
}, {
# Embeds can contain both albums and collections with the same URL pattern. This is an album
'url': 'https://www.bandlab.com/embed/collection/?id=12cc6f7f-951b-ee11-907c-00224844f303',
'info_dict': {
'id': '12cc6f7f-951b-ee11-907c-00224844f303',
'release_date': '20230706',
'description': 'This is a collection of songs I created when I had an Amiga computer.',
'view_count': int,
'title': 'Mark Salud The Amiga Collection',
'uploader_id': 'mssirmooth1962',
'comment_count': int,
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/d618bd7b-0537-40d5-bdd8-61b066e77d59/',
'like_count': int,
'uploader': 'Mark Salud',
'album': 'Mark Salud The Amiga Collection',
'album_type': 'LP',
},
'playlist_count': 24,
}, {
# Tracks without revision id
'url': 'https://www.bandlab.com/embed/collection/?id=e98aafb5-d932-ee11-b8f0-00224844c719',
'info_dict': {
'like_count': int,
'uploader_id': 'sorakime',
'comment_count': int,
'uploader': 'Sorakime',
'view_count': int,
'description': 'md5:4ec31c568a5f5a5a2b17572ea64c3825',
'release_date': '20230812',
'title': 'Art',
'album': 'Art',
'album_type': 'Album',
'id': 'e98aafb5-d932-ee11-b8f0-00224844c719',
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/20c890de-e94a-4422-828a-2da6377a13c8/',
},
'playlist_count': 13,
}, {
'url': 'https://www.bandlab.com/albums/89b79ea6-de42-ed11-b495-00224845aac7',
'only_matching': True,
}]
def _entries(self, album_data):
for post in traverse_obj(album_data, ('posts', lambda _, v: v['type'])):
post_type = post['type']
if post_type == 'Revision':
yield self._parse_revision(post.get('revision'))
elif post_type == 'Track':
yield self._parse_track(post)
elif post_type == 'Video':
yield self._parse_video(post)
else:
self.report_warning(f'Skipping unknown post type: "{post_type}"')
def _real_extract(self, url):
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
endpoints = {
'albums': ['albums'],
'collections': ['collections'],
'embed': ['collections', 'albums'],
}.get(playlist_type)
for endpoint in endpoints:
playlist_data = self._call_api(
endpoint, playlist_id, note=f'Downloading {endpoint[:-1]} data',
fatal=False, expected_status=404)
if not playlist_data.get('errorCode'):
playlist_type = endpoint
break
if error_code := playlist_data.get('errorCode'):
raise ExtractorError(f'Could not find playlist data. Error code: "{error_code}"')
return self.playlist_result(
self._entries(playlist_data), playlist_id,
**traverse_obj(playlist_data, {
'title': ('name', {str}),
'description': ('description', {str}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
'release_date': ('releaseDate', {lambda x: x.replace('-', '')}, filter),
'thumbnail': ('picture', ('original', 'url'), {url_or_none}, any),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'view_count': ('counters', 'plays', {int_or_none}),
}),
**(traverse_obj(playlist_data, {
'album': ('name', {str}),
'album_type': ('type', {str}),
}) if playlist_type == 'albums' else {}))

View File

@ -1284,9 +1284,9 @@ def parse_model(model):
**traverse_obj(model, { **traverse_obj(model, {
'title': ('title', {str}), 'title': ('title', {str}),
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}), 'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any), 'description': ('synopses', ('long', 'medium', 'short'), {str}, filter, any),
'duration': ('versions', 0, 'duration', {int}), 'duration': ('versions', 0, 'duration', {int}),
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}), 'timestamp': ('versions', 0, 'availableFrom', {int_or_none(scale=1000)}),
}), }),
} }
@ -1386,7 +1386,7 @@ def parse_media(media):
formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), { formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
'url': ('url', {url_or_none}), 'url': ('url', {url_or_none}),
'ext': ('format', {str}), 'ext': ('format', {str}),
'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}), 'tbr': ('bitrate', {int_or_none(scale=1000)}),
})) }))
if formats: if formats:
entry = { entry = {
@ -1398,7 +1398,7 @@ def parse_media(media):
'title': ('title', {str}), 'title': ('title', {str}),
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}), 'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
'description': ('synopses', ('long', 'medium', 'short'), {str}, any), 'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}), 'timestamp': ('firstPublished', {int_or_none(scale=1000)}),
}), }),
} }
done = True done = True
@ -1428,7 +1428,7 @@ def extract_all(pattern):
if not entry.get('timestamp'): if not entry.get('timestamp'):
entry['timestamp'] = traverse_obj(next_data, ( entry['timestamp'] = traverse_obj(next_data, (
..., 'contents', is_type('timestamp'), 'model', ..., 'contents', is_type('timestamp'), 'model',
'timestamp', {functools.partial(int_or_none, scale=1000)}, any)) 'timestamp', {int_or_none(scale=1000)}, any))
entries.append(entry) entries.append(entry)
return self.playlist_result( return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description) entries, playlist_id, playlist_title, playlist_description)

View File

@ -1,4 +1,3 @@
import functools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -50,7 +49,7 @@ def _extract_base_info(data):
**traverse_obj(data, { **traverse_obj(data, {
'title': 'title', 'title': 'title',
'description': 'description', 'description': 'description',
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}), 'duration': ('duration', {int_or_none(scale=1000)}),
'timestamp': ('schedulingStart', {parse_iso8601}), 'timestamp': ('schedulingStart', {parse_iso8601}),
'season_number': 'seasonNumber', 'season_number': 'seasonNumber',
'episode_number': 'episodeNumber', 'episode_number': 'episodeNumber',

View File

@ -18,7 +18,6 @@
InAdvancePagedList, InAdvancePagedList,
OnDemandPagedList, OnDemandPagedList,
bool_or_none, bool_or_none,
clean_html,
determine_ext, determine_ext,
filter_dict, filter_dict,
float_or_none, float_or_none,
@ -63,7 +62,7 @@ def _check_missing_formats(self, play_info, formats):
'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ') 'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
if missing_formats: if missing_formats:
self.to_screen( self.to_screen(
f'Format(s) {missing_formats} are missing; you have to login or ' f'Format(s) {missing_formats} are missing; you have to '
f'become a premium member to download them. {self._login_hint()}') f'become a premium member to download them. {self._login_hint()}')
def extract_formats(self, play_info): def extract_formats(self, play_info):
@ -109,7 +108,7 @@ def extract_formats(self, play_info):
fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), { fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
'url': ('url', {url_or_none}), 'url': ('url', {url_or_none}),
'duration': ('length', {functools.partial(float_or_none, scale=1000)}), 'duration': ('length', {float_or_none(scale=1000)}),
'filesize': ('size', {int_or_none}), 'filesize': ('size', {int_or_none}),
})) }))
if fragments: if fragments:
@ -124,7 +123,7 @@ def extract_formats(self, play_info):
'quality': ('quality', {int_or_none}), 'quality': ('quality', {int_or_none}),
'format_id': ('quality', {str_or_none}), 'format_id': ('quality', {str_or_none}),
'format_note': ('quality', {lambda x: format_names.get(x)}), 'format_note': ('quality', {lambda x: format_names.get(x)}),
'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}), 'duration': ('timelength', {float_or_none(scale=1000)}),
}), }),
**parse_resolution(format_names.get(play_info.get('quality'))), **parse_resolution(format_names.get(play_info.get('quality'))),
}) })
@ -165,14 +164,18 @@ def _sign_wbi(self, params, video_id):
params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest() params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
return params return params
def _download_playinfo(self, bvid, cid, headers=None, qn=None): def _download_playinfo(self, bvid, cid, headers=None, query=None):
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048} params = {'bvid': bvid, 'cid': cid, 'fnval': 4048, **(query or {})}
if qn: if self.is_logged_in:
params['qn'] = qn params.pop('try_look', None)
if qn := params.get('qn'):
note = f'Downloading video format {qn} for cid {cid}'
else:
note = f'Downloading video formats for cid {cid}'
return self._download_json( return self._download_json(
'https://api.bilibili.com/x/player/wbi/playurl', bvid, 'https://api.bilibili.com/x/player/wbi/playurl', bvid,
query=self._sign_wbi(params, bvid), headers=headers, query=self._sign_wbi(params, bvid), headers=headers, note=note)['data']
note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
def json2srt(self, json_data): def json2srt(self, json_data):
srt_data = '' srt_data = ''
@ -191,7 +194,7 @@ def _get_subtitles(self, video_id, cid, aid=None):
} }
video_info = self._download_json( video_info = self._download_json(
'https://api.bilibili.com/x/player/v2', video_id, 'https://api.bilibili.com/x/player/wbi/v2', video_id,
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid}, query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
note=f'Extracting subtitle info {cid}', headers=self._HEADERS) note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
if traverse_obj(video_info, ('data', 'need_login_subtitle')): if traverse_obj(video_info, ('data', 'need_login_subtitle')):
@ -207,7 +210,7 @@ def _get_subtitles(self, video_id, cid, aid=None):
def _get_chapters(self, aid, cid): def _get_chapters(self, aid, cid):
chapters = aid and cid and self._download_json( chapters = aid and cid and self._download_json(
'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid}, 'https://api.bilibili.com/x/player/wbi/v2', aid, query={'aid': aid, 'cid': cid},
note='Extracting chapters', fatal=False, headers=self._HEADERS) note='Extracting chapters', fatal=False, headers=self._HEADERS)
return traverse_obj(chapters, ('data', 'view_points', ..., { return traverse_obj(chapters, ('data', 'view_points', ..., {
'title': 'content', 'title': 'content',
@ -286,7 +289,7 @@ def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
('data', 'interaction', 'graph_version', {int_or_none})) ('data', 'interaction', 'graph_version', {int_or_none}))
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1) cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
for cid, edges in cid_edges.items(): for cid, edges in cid_edges.items():
play_info = self._download_playinfo(video_id, cid, headers=headers) play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1})
yield { yield {
**metainfo, **metainfo,
'id': f'{video_id}_{cid}', 'id': f'{video_id}_{cid}',
@ -639,40 +642,29 @@ def _real_extract(self, url):
headers['Referer'] = url headers['Referer'] = url
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
self.raise_login_required()
if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
raise ExtractorError(
'This video may be deleted or geo-restricted. '
'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
is_festival = 'videoData' not in initial_state is_festival = 'videoData' not in initial_state
if is_festival: if is_festival:
video_data = initial_state['videoInfo'] video_data = initial_state['videoInfo']
else: else:
play_info_obj = self._search_json(
r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
if not play_info_obj:
if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
self.raise_login_required()
if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
raise ExtractorError(
'This video may be deleted or geo-restricted. '
'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
play_info = traverse_obj(play_info_obj, ('data', {dict}))
if not play_info:
if traverse_obj(play_info_obj, 'code') == 87007:
toast = get_element_by_class('tips-toast', webpage) or ''
msg = clean_html(
f'{get_element_by_class("belongs-to", toast) or ""}'
+ (get_element_by_class('level', toast) or ''))
raise ExtractorError(
f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
raise ExtractorError('Failed to extract play info')
video_data = initial_state['videoData'] video_data = initial_state['videoData']
video_id, title = video_data['bvid'], video_data.get('title') video_id, title = video_data['bvid'], video_data.get('title')
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself. # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
page_list_json = not is_festival and traverse_obj( page_list_json = (not is_festival and traverse_obj(
self._download_json( self._download_json(
'https://api.bilibili.com/x/player/pagelist', video_id, 'https://api.bilibili.com/x/player/pagelist', video_id,
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'}, fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
note='Extracting videos in anthology', headers=headers), note='Extracting videos in anthology', headers=headers),
'data', expected_type=list) or [] 'data', expected_type=list)) or []
is_anthology = len(page_list_json) > 1 is_anthology = len(page_list_json) > 1
part_id = int_or_none(parse_qs(url).get('p', [None])[-1]) part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
@ -691,8 +683,6 @@ def _real_extract(self, url):
festival_info = {} festival_info = {}
if is_festival: if is_festival:
play_info = self._download_playinfo(video_id, cid, headers=headers)
festival_info = traverse_obj(initial_state, { festival_info = traverse_obj(initial_state, {
'uploader': ('videoInfo', 'upName'), 'uploader': ('videoInfo', 'upName'),
'uploader_id': ('videoInfo', 'upMid', {str_or_none}), 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
@ -727,62 +717,79 @@ def _real_extract(self, url):
self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo, self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
__post_extractor=self.extract_comments(aid)) __post_extractor=self.extract_comments(aid))
else:
formats = self.extract_formats(play_info)
if not traverse_obj(play_info, ('dash')): play_info = None
# we only have legacy formats and need additional work if self.is_logged_in:
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality')) play_info = traverse_obj(
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})): self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id, default=None),
formats.extend(traverse_obj( ('data', {dict}))
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)), if not play_info:
lambda _, v: not has_qn(v['quality']))) play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1})
self._check_missing_formats(play_info, formats) formats = self.extract_formats(play_info)
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
if flv_formats and len(flv_formats) < len(formats):
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
if not self._configuration_arg('prefer_multi_flv'):
dropped_fmts = ', '.join(
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
if dropped_fmts:
self.to_screen(
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
else:
formats = traverse_obj(
# XXX: Filtering by extractor-arg is for testing purposes
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
) or [max(flv_formats, key=lambda x: x['quality'])]
if traverse_obj(formats, (0, 'fragments')): if video_data.get('is_upower_exclusive'):
# We have flv formats, which are individual short videos with their own timestamps and metainfo high_level = traverse_obj(initial_state, ('elecFullInfo', 'show_info', 'high_level', {dict})) or {}
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround msg = f'{join_nonempty("title", "sub_title", from_dict=high_level, delim="")}. {self._login_hint()}'
return { if not formats:
**metainfo, raise ExtractorError(f'This is a supporter-only video: {msg}', expected=True)
'_type': 'multi_video', if '试看' in traverse_obj(play_info, ('accept_description', ..., {str})):
'entries': [{ self.report_warning(
'id': f'{metainfo["id"]}_{idx}', f'This is a supporter-only video, only the preview will be extracted: {msg}',
'title': metainfo['title'], video_id=video_id)
'http_headers': metainfo['http_headers'],
'formats': [{ if not traverse_obj(play_info, 'dash'):
**fragment, # we only have legacy formats and need additional work
'format_id': formats[0].get('format_id'), has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
}], for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None, formats.extend(traverse_obj(
'__post_extractor': self.extract_comments(aid) if idx == 0 else None, self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, query={'qn': qn})),
} for idx, fragment in enumerate(formats[0]['fragments'])], lambda _, v: not has_qn(v['quality'])))
'duration': float_or_none(play_info.get('timelength'), scale=1000), self._check_missing_formats(play_info, formats)
} flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
else: if flv_formats and len(flv_formats) < len(formats):
return { # Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
**metainfo, if not self._configuration_arg('prefer_multi_flv'):
'formats': formats, dropped_fmts = ', '.join(
'duration': float_or_none(play_info.get('timelength'), scale=1000), f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
'chapters': self._get_chapters(aid, cid), formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
'subtitles': self.extract_subtitles(video_id, cid), if dropped_fmts:
'__post_extractor': self.extract_comments(aid), self.to_screen(
} f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
else:
formats = traverse_obj(
# XXX: Filtering by extractor-arg is for testing purposes
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
) or [max(flv_formats, key=lambda x: x['quality'])]
if traverse_obj(formats, (0, 'fragments')):
# We have flv formats, which are individual short videos with their own timestamps and metainfo
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
return {
**metainfo,
'_type': 'multi_video',
'entries': [{
'id': f'{metainfo["id"]}_{idx}',
'title': metainfo['title'],
'http_headers': metainfo['http_headers'],
'formats': [{
**fragment,
'format_id': formats[0].get('format_id'),
}],
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
} for idx, fragment in enumerate(formats[0]['fragments'])],
'duration': float_or_none(play_info.get('timelength'), scale=1000),
}
return {
**metainfo,
'formats': formats,
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'chapters': self._get_chapters(aid, cid),
'subtitles': self.extract_subtitles(video_id, cid),
'__post_extractor': self.extract_comments(aid),
}
class BiliBiliBangumiIE(BilibiliBaseIE): class BiliBiliBangumiIE(BilibiliBaseIE):
@ -860,10 +867,16 @@ def _real_extract(self, url):
self.raise_login_required('This video is for premium members only') self.raise_login_required('This video is for premium members only')
headers['Referer'] = url headers['Referer'] = url
play_info = self._download_json(
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id, play_info = (
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id}, self._search_json(
headers=headers) r'playurlSSRData\s*=', webpage, 'embedded page info', episode_id,
end_pattern='\n', default=None)
or self._download_json(
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id},
headers=headers))
premium_only = play_info.get('code') == -10403 premium_only = play_info.get('code') == -10403
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {} play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
@ -1585,7 +1598,7 @@ def _real_extract(self, url):
'title': ('title', {str}), 'title': ('title', {str}),
'uploader': ('upper', 'name', {str}), 'uploader': ('upper', 'name', {str}),
'uploader_id': ('upper', 'mid', {str_or_none}), 'uploader_id': ('upper', 'mid', {str_or_none}),
'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}), 'timestamp': ('ctime', {int_or_none}, filter),
'thumbnail': ('cover', {url_or_none}), 'thumbnail': ('cover', {url_or_none}),
})), })),
} }

View File

@ -382,7 +382,7 @@ def _extract_videos(self, root, video_id, embed_path='embed', record_path='recor
'age_limit': ( 'age_limit': (
'labels', ..., 'val', {lambda x: 18 if x in ('sexual', 'porn', 'graphic-media') else None}, any), 'labels', ..., 'val', {lambda x: 18 if x in ('sexual', 'porn', 'graphic-media') else None}, any),
'description': (*record_path, 'text', {str}, filter), 'description': (*record_path, 'text', {str}, filter),
'title': (*record_path, 'text', {lambda x: x.replace('\n', '')}, {truncate_string(left=50)}), 'title': (*record_path, 'text', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
}), }),
}) })
return entries return entries

View File

@ -1,35 +1,20 @@
import functools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
extract_attributes, extract_attributes,
get_element_text_and_html_by_tag,
get_elements_by_class,
join_nonempty, join_nonempty,
js_to_json, js_to_json,
mimetype2ext, mimetype2ext,
unified_strdate, unified_strdate,
url_or_none, url_or_none,
urljoin, urljoin,
variadic,
) )
from ..utils.traversal import traverse_obj from ..utils.traversal import (
find_element,
traverse_obj,
def html_get_element(tag=None, cls=None): )
assert tag or cls, 'One of tag or class is required'
if cls:
func = functools.partial(get_elements_by_class, cls, tag=tag)
else:
func = functools.partial(get_element_text_and_html_by_tag, tag)
def html_get_element_wrapper(html):
return variadic(func(html))[0]
return html_get_element_wrapper
class BpbIE(InfoExtractor): class BpbIE(InfoExtractor):
@ -41,12 +26,12 @@ class BpbIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '297', 'id': '297',
'ext': 'mp4', 'ext': 'mp4',
'creator': 'Kooperative Berlin', 'creators': ['Kooperative Berlin'],
'description': 'md5:f4f75885ba009d3e2b156247a8941ce6', 'description': r're:Joachim Gauck, .*\n\nKamera: .*',
'release_date': '20160115', 'release_date': '20150716',
'series': 'Interview auf dem Geschichtsforum 1989 | 2009', 'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'], 'tags': [],
'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D', 'thumbnail': r're:https?://www\.bpb\.de/cache/images/7/297_teaser_16x9_1240\.jpg.*',
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR', 'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
'uploader': 'Bundeszentrale für politische Bildung', 'uploader': 'Bundeszentrale für politische Bildung',
}, },
@ -55,11 +40,12 @@ class BpbIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '522184', 'id': '522184',
'ext': 'mp4', 'ext': 'mp4',
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)', 'creators': ['Institute for Strategic Dialogue Germany gGmbH (ISD)'],
'description': 'md5:f83c795ff8f825a69456a9e51fc15903', 'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
'release_date': '20230621', 'release_date': '20230621',
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'], 'series': 'Narrative über den Krieg Russlands gegen die Ukraine (NUK)',
'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB', 'tags': [],
'thumbnail': r're:https://www\.bpb\.de/cache/images/4/522184_teaser_16x9_1240\.png.*',
'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c', 'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
'uploader': 'Bundeszentrale für politische Bildung', 'uploader': 'Bundeszentrale für politische Bildung',
}, },
@ -68,11 +54,12 @@ class BpbIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '518789', 'id': '518789',
'ext': 'mp4', 'ext': 'mp4',
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)', 'creators': ['Institute for Strategic Dialogue Germany gGmbH (ISD)'],
'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8', 'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
'release_date': '20230302', 'release_date': '20230302',
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'], 'series': 'Narrative über den Krieg Russlands gegen die Ukraine (NUK)',
'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D', 'tags': [],
'thumbnail': r're:https://www\.bpb\.de/cache/images/9/518789_teaser_16x9_1240\.jpeg.*',
'title': 'md5:3e956f264bb501f6383f10495a401da4', 'title': 'md5:3e956f264bb501f6383f10495a401da4',
'uploader': 'Bundeszentrale für politische Bildung', 'uploader': 'Bundeszentrale für politische Bildung',
}, },
@ -84,12 +71,12 @@ class BpbIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '315813', 'id': '315813',
'ext': 'mp3', 'ext': 'mp3',
'creator': 'Axel Schröder', 'creators': ['Axel Schröder'],
'description': 'md5:eda9d1af34e5912efef5baf54fba4427', 'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
'release_date': '20200921', 'release_date': '20200921',
'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager', 'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'], 'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94', 'thumbnail': r're:https://www\.bpb\.de/cache/images/3/315813_teaser_16x9_1240\.png.*',
'title': 'Folge 1: Eine Einführung', 'title': 'Folge 1: Eine Einführung',
'uploader': 'Bundeszentrale für politische Bildung', 'uploader': 'Bundeszentrale für politische Bildung',
}, },
@ -98,12 +85,12 @@ class BpbIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '517806', 'id': '517806',
'ext': 'mp3', 'ext': 'mp3',
'creator': 'Bundeszentrale für politische Bildung', 'creators': ['Bundeszentrale für politische Bildung'],
'description': 'md5:594689600e919912aade0b2871cc3fed', 'description': 'md5:594689600e919912aade0b2871cc3fed',
'release_date': '20230127', 'release_date': '20230127',
'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"', 'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'], 'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0', 'thumbnail': r're:https://www\.bpb\.de/cache/images/6/517806_teaser_16x9_1240\.png.*',
'title': 'Die Weltanschauung der "Neuen Rechten"', 'title': 'Die Weltanschauung der "Neuen Rechten"',
'uploader': 'Bundeszentrale für politische Bildung', 'uploader': 'Bundeszentrale für politische Bildung',
}, },
@ -147,7 +134,7 @@ def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match})) title_result = traverse_obj(webpage, ({find_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False)) json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
return { return {
@ -156,15 +143,15 @@ def _real_extract(self, url):
# This metadata could be interpreted otherwise, but it fits "series" the most # This metadata could be interpreted otherwise, but it fits "series" the most
'series': traverse_obj(title_result, ('series', {str.strip})) or None, 'series': traverse_obj(title_result, ('series', {str.strip})) or None,
'description': join_nonempty(*traverse_obj(webpage, [( 'description': join_nonempty(*traverse_obj(webpage, [(
{html_get_element(cls='opening-intro')}, {find_element(cls='opening-intro')},
[{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}], [{find_element(tag='bpb-accordion-item')}, {find_element(cls='text-content')}],
), {clean_html}]), delim='\n\n') or None, ), {clean_html}]), delim='\n\n') or None,
'creator': self._html_search_meta('author', webpage), 'creators': traverse_obj(self._html_search_meta('author', webpage), all),
'uploader': self._html_search_meta('publisher', webpage), 'uploader': self._html_search_meta('publisher', webpage),
'release_date': unified_strdate(self._html_search_meta('date', webpage)), 'release_date': unified_strdate(self._html_search_meta('date', webpage)),
'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)), 'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
**traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), { **traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
'formats': (':sources', ..., {self._process_source}), 'formats': (':sources', ..., {self._process_source}),
'thumbnail': ('poster', {lambda x: urljoin(url, x)}), 'thumbnail': ('poster', {urljoin(url)}),
}), }),
} }

View File

@ -145,10 +145,9 @@ def _real_extract(self, url):
tp_metadata = self._download_json( tp_metadata = self._download_json(
update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False) update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
seconds_or_none = lambda x: float_or_none(x, 1000)
chapters = traverse_obj(tp_metadata, ('chapters', ..., { chapters = traverse_obj(tp_metadata, ('chapters', ..., {
'start_time': ('startTime', {seconds_or_none}), 'start_time': ('startTime', {float_or_none(scale=1000)}),
'end_time': ('endTime', {seconds_or_none}), 'end_time': ('endTime', {float_or_none(scale=1000)}),
})) }))
# prune pointless single chapters that span the entire duration from short videos # prune pointless single chapters that span the entire duration from short videos
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')): if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
@ -168,8 +167,8 @@ def _real_extract(self, url):
**merge_dicts(traverse_obj(tp_metadata, { **merge_dicts(traverse_obj(tp_metadata, {
'title': 'title', 'title': 'title',
'description': 'description', 'description': 'description',
'duration': ('duration', {seconds_or_none}), 'duration': ('duration', {float_or_none(scale=1000)}),
'timestamp': ('pubDate', {seconds_or_none}), 'timestamp': ('pubDate', {float_or_none(scale=1000)}),
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}), 'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}), 'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}), 'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),

View File

@ -31,6 +31,7 @@
update_url_query, update_url_query,
url_or_none, url_or_none,
) )
from ..utils.traversal import traverse_obj
class BrightcoveLegacyIE(InfoExtractor): class BrightcoveLegacyIE(InfoExtractor):
@ -935,8 +936,8 @@ def extract_policy_key():
if content_type == 'playlist': if content_type == 'playlist':
return self.playlist_result( return self.playlist_result(
[self._parse_brightcove_metadata(vid, vid.get('id'), headers) (self._parse_brightcove_metadata(vid, vid['id'], headers)
for vid in json_data.get('videos', []) if vid.get('id')], for vid in traverse_obj(json_data, ('videos', lambda _, v: v['id']))),
json_data.get('id'), json_data.get('name'), json_data.get('id'), json_data.get('name'),
json_data.get('description')) json_data.get('description'))

View File

@ -8,11 +8,13 @@
bug_reports_message, bug_reports_message,
clean_html, clean_html,
format_field, format_field,
get_element_text_and_html_by_tag,
int_or_none, int_or_none,
url_or_none, url_or_none,
) )
from ..utils.traversal import traverse_obj from ..utils.traversal import (
find_element,
traverse_obj,
)
class BundestagIE(InfoExtractor): class BundestagIE(InfoExtractor):
@ -115,9 +117,8 @@ def _real_extract(self, url):
note='Downloading metadata overlay', fatal=False, note='Downloading metadata overlay', fatal=False,
), { ), {
'title': ( 'title': (
{functools.partial(get_element_text_and_html_by_tag, 'h3')}, 0, {find_element(tag='h3')}, {functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
{functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}), 'description': ({find_element(tag='p')}, {clean_html}),
'description': ({functools.partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
})) }))
return result return result

View File

@ -53,7 +53,7 @@ def _real_extract(self, url):
'like_count': ('like_count', {int_or_none}), 'like_count': ('like_count', {int_or_none}),
'view_count': ('view_count', {int_or_none}), 'view_count': ('view_count', {int_or_none}),
'comment_count': ('comment_count', {int_or_none}), 'comment_count': ('comment_count', {int_or_none}),
'tags': ('tags', ..., {str}, {lambda x: x or None}), 'tags': ('tags', ..., {str}, filter),
'uploader': ('user', 'name', {str}), 'uploader': ('user', 'name', {str}),
'uploader_id': (((None, 'user'), 'username'), {str}, any), 'uploader_id': (((None, 'user'), 'username'), {str}, any),
'is_live': ('is_live', {bool}), 'is_live': ('is_live', {bool}),
@ -62,7 +62,7 @@ def _real_extract(self, url):
'title': ('broadcast_title', {str}), 'title': ('broadcast_title', {str}),
'duration': ('content_duration', {int_or_none}), 'duration': ('content_duration', {int_or_none}),
'timestamp': ('broadcast_start_time', {parse_iso8601}), 'timestamp': ('broadcast_start_time', {parse_iso8601}),
'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}), 'thumbnail': ('preview_image_path', {urljoin(url)}),
}), }),
'age_limit': { 'age_limit': {
# assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system # assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system

View File

@ -453,8 +453,8 @@ def _real_extract(self, url):
chapters = traverse_obj(data, ( chapters = traverse_obj(data, (
'media', 'chapters', lambda _, v: float(v['startTime']) is not None, { 'media', 'chapters', lambda _, v: float(v['startTime']) is not None, {
'start_time': ('startTime', {functools.partial(float_or_none, scale=1000)}), 'start_time': ('startTime', {float_or_none(scale=1000)}),
'end_time': ('endTime', {functools.partial(float_or_none, scale=1000)}), 'end_time': ('endTime', {float_or_none(scale=1000)}),
'title': ('name', {str}), 'title': ('name', {str}),
})) }))
# Filter out pointless single chapters with start_time==0 and no end_time # Filter out pointless single chapters with start_time==0 and no end_time
@ -465,8 +465,8 @@ def _real_extract(self, url):
**traverse_obj(data, { **traverse_obj(data, {
'title': ('title', {str}), 'title': ('title', {str}),
'description': ('description', {str.strip}), 'description': ('description', {str.strip}),
'thumbnail': ('image', 'url', {url_or_none}, {functools.partial(update_url, query=None)}), 'thumbnail': ('image', 'url', {url_or_none}, {update_url(query=None)}),
'timestamp': ('publishedAt', {functools.partial(float_or_none, scale=1000)}), 'timestamp': ('publishedAt', {float_or_none(scale=1000)}),
'media_type': ('media', 'clipType', {str}), 'media_type': ('media', 'clipType', {str}),
'series': ('showName', {str}), 'series': ('showName', {str}),
'season_number': ('media', 'season', {int_or_none}), 'season_number': ('media', 'season', {int_or_none}),

View File

@ -96,7 +96,7 @@ def get_subtitles(subs_url):
**traverse_obj(item, { **traverse_obj(item, {
'title': (None, ('fulltitle', 'title')), 'title': (None, ('fulltitle', 'title')),
'description': 'dek', 'description': 'dek',
'timestamp': ('timestamp', {lambda x: float_or_none(x, 1000)}), 'timestamp': ('timestamp', {float_or_none(scale=1000)}),
'duration': ('duration', {float_or_none}), 'duration': ('duration', {float_or_none}),
'subtitles': ('captions', {get_subtitles}), 'subtitles': ('captions', {get_subtitles}),
'thumbnail': ('images', ('hd', 'sd'), {url_or_none}), 'thumbnail': ('images', ('hd', 'sd'), {url_or_none}),

View File

@ -5,11 +5,12 @@
ExtractorError, ExtractorError,
lowercase_escape, lowercase_escape,
url_or_none, url_or_none,
urlencode_postdata,
) )
class ChaturbateIE(InfoExtractor): class ChaturbateIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)' _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.(?P<tld>com|eu|global)/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.chaturbate.com/siswet19/', 'url': 'https://www.chaturbate.com/siswet19/',
'info_dict': { 'info_dict': {
@ -29,16 +30,58 @@ class ChaturbateIE(InfoExtractor):
}, { }, {
'url': 'https://en.chaturbate.com/siswet19/', 'url': 'https://en.chaturbate.com/siswet19/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://chaturbate.eu/siswet19/',
'only_matching': True,
}, {
'url': 'https://chaturbate.eu/fullvideo/?b=caylin',
'only_matching': True,
}, {
'url': 'https://chaturbate.global/siswet19/',
'only_matching': True,
}] }]
_ROOM_OFFLINE = 'Room is currently offline' _ERROR_MAP = {
'offline': 'Room is currently offline',
'private': 'Room is currently in a private show',
'away': 'Performer is currently away',
'password protected': 'Room is password protected',
'hidden': 'Hidden session in progress',
}
def _real_extract(self, url): def _extract_from_api(self, video_id, tld):
video_id = self._match_id(url) response = self._download_json(
f'https://chaturbate.{tld}/get_edge_hls_url_ajax/', video_id,
data=urlencode_postdata({'room_slug': video_id}),
headers={
**self.geo_verification_headers(),
'X-Requested-With': 'XMLHttpRequest',
'Accept': 'application/json',
}, fatal=False, impersonate=True) or {}
m3u8_url = response.get('url')
if not m3u8_url:
status = response.get('room_status')
if error := self._ERROR_MAP.get(status):
raise ExtractorError(error, expected=True)
if status == 'public':
self.raise_geo_restricted()
self.report_warning(f'Got status "{status}" from API; falling back to webpage extraction')
return None
return {
'id': video_id,
'title': video_id,
'thumbnail': f'https://roomimg.stream.highwebmedia.com/ri/{video_id}.jpg',
'is_live': True,
'age_limit': 18,
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
}
def _extract_from_html(self, video_id, tld):
webpage = self._download_webpage( webpage = self._download_webpage(
f'https://chaturbate.com/{video_id}/', video_id, f'https://chaturbate.{tld}/{video_id}/', video_id,
headers=self.geo_verification_headers()) headers=self.geo_verification_headers(), impersonate=True)
found_m3u8_urls = [] found_m3u8_urls = []
@ -76,8 +119,8 @@ def _real_extract(self, url):
webpage, 'error', group='error', default=None) webpage, 'error', group='error', default=None)
if not error: if not error:
if any(p in webpage for p in ( if any(p in webpage for p in (
self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')): self._ERROR_MAP['offline'], 'offline_tipping', 'tip_offline')):
error = self._ROOM_OFFLINE error = self._ERROR_MAP['offline']
if error: if error:
raise ExtractorError(error, expected=True) raise ExtractorError(error, expected=True)
raise ExtractorError('Unable to find stream URL') raise ExtractorError('Unable to find stream URL')
@ -104,3 +147,7 @@ def _real_extract(self, url):
'is_live': True, 'is_live': True,
'formats': formats, 'formats': formats,
} }
def _real_extract(self, url):
video_id, tld = self._match_valid_url(url).group('id', 'tld')
return self._extract_from_api(video_id, tld) or self._extract_from_html(video_id, tld)

View File

@ -1,5 +1,3 @@
import functools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
UserNotLive, UserNotLive,
@ -77,7 +75,7 @@ def _real_extract(self, url):
'thumbnails': thumbnails, 'thumbnails': thumbnails,
**traverse_obj(live_detail, { **traverse_obj(live_detail, {
'title': ('liveTitle', {str}), 'title': ('liveTitle', {str}),
'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}), 'timestamp': ('openDate', {parse_iso8601(delimiter=' ')}),
'concurrent_view_count': ('concurrentUserCount', {int_or_none}), 'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
'view_count': ('accumulateCount', {int_or_none}), 'view_count': ('accumulateCount', {int_or_none}),
'channel': ('channel', 'channelName', {str}), 'channel': ('channel', 'channelName', {str}),
@ -176,7 +174,7 @@ def _real_extract(self, url):
**traverse_obj(video_meta, { **traverse_obj(video_meta, {
'title': ('videoTitle', {str}), 'title': ('videoTitle', {str}),
'thumbnail': ('thumbnailImageUrl', {url_or_none}), 'thumbnail': ('thumbnailImageUrl', {url_or_none}),
'timestamp': ('publishDateAt', {functools.partial(float_or_none, scale=1000)}), 'timestamp': ('publishDateAt', {float_or_none(scale=1000)}),
'view_count': ('readCount', {int_or_none}), 'view_count': ('readCount', {int_or_none}),
'duration': ('duration', {int_or_none}), 'duration': ('duration', {int_or_none}),
'channel': ('channel', 'channelName', {str}), 'channel': ('channel', 'channelName', {str}),

View File

@ -3,6 +3,7 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
filter_dict, filter_dict,
float_or_none,
int_or_none, int_or_none,
parse_age_limit, parse_age_limit,
smuggle_url, smuggle_url,
@ -85,7 +86,7 @@ def _real_extract(self, url):
'title': 'title', 'title': 'title',
'id': ('details', 'item_id'), 'id': ('details', 'item_id'),
'description': ('details', 'description'), 'description': ('details', 'description'),
'duration': ('duration', {lambda x: x / 1000}), 'duration': ('duration', {float_or_none(scale=1000)}),
'cast': ('details', 'cast', {lambda x: x.split(', ')}), 'cast': ('details', 'cast', {lambda x: x.split(', ')}),
'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}), 'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}),
'season_number': ('details', 'season', {int_or_none}), 'season_number': ('details', 'season', {int_or_none}),

View File

@ -8,7 +8,7 @@ class CloudflareStreamIE(InfoExtractor):
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
_EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video=' _EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video='
_ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+' _ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+'
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})' _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}(?P<domain>{_DOMAIN_RE})/|{_EMBED_RE})(?P<id>{_ID_RE})'
_EMBED_REGEX = [ _EMBED_REGEX = [
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1', rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1',
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})', rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
@ -19,7 +19,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': '31c9291ab41fac05471db4e73aa11717', 'id': '31c9291ab41fac05471db4e73aa11717',
'ext': 'mp4', 'ext': 'mp4',
'title': '31c9291ab41fac05471db4e73aa11717', 'title': '31c9291ab41fac05471db4e73aa11717',
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
@ -30,7 +30,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': '0e8e040aec776862e1d632a699edf59e', 'id': '0e8e040aec776862e1d632a699edf59e',
'ext': 'mp4', 'ext': 'mp4',
'title': '0e8e040aec776862e1d632a699edf59e', 'title': '0e8e040aec776862e1d632a699edf59e',
'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
}, },
}, { }, {
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
@ -54,7 +54,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': 'eaef9dea5159cf968be84241b5cedfe7', 'id': 'eaef9dea5159cf968be84241b5cedfe7',
'ext': 'mp4', 'ext': 'mp4',
'title': 'eaef9dea5159cf968be84241b5cedfe7', 'title': 'eaef9dea5159cf968be84241b5cedfe7',
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
@ -62,8 +62,9 @@ class CloudflareStreamIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id, domain = self._match_valid_url(url).group('id', 'domain')
domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net' if domain != 'bytehighway.net':
domain = 'cloudflarestream.com'
base_url = f'https://{domain}/{video_id}/' base_url = f'https://{domain}/{video_id}/'
if '.' in video_id: if '.' in video_id:
video_id = self._parse_json(base64.urlsafe_b64decode( video_id = self._parse_json(base64.urlsafe_b64decode(

View File

@ -1,4 +1,3 @@
import functools
import json import json
import re import re
@ -199,7 +198,7 @@ def _real_extract(self, url):
'timestamp': ('data-publish-date', {parse_iso8601}), 'timestamp': ('data-publish-date', {parse_iso8601}),
'thumbnail': ( 'thumbnail': (
'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none}, 'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
{functools.partial(update_url, query='c=original')}), {update_url(query='c=original')}),
'display_id': 'data-video-slug', 'display_id': 'data-video-slug',
}), }),
**traverse_obj(video_data, { **traverse_obj(video_data, {

View File

@ -25,7 +25,6 @@
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_expanduser, compat_expanduser,
compat_os_name,
urllib_req_to_req, urllib_req_to_req,
) )
from ..cookies import LenientSimpleCookie from ..cookies import LenientSimpleCookie
@ -279,6 +278,7 @@ class InfoExtractor:
thumbnails: A list of dictionaries, with the following entries: thumbnails: A list of dictionaries, with the following entries:
* "id" (optional, string) - Thumbnail format ID * "id" (optional, string) - Thumbnail format ID
* "url" * "url"
* "ext" (optional, string) - actual image extension if not given in URL
* "preference" (optional, int) - quality of the image * "preference" (optional, int) - quality of the image
* "width" (optional, int) * "width" (optional, int)
* "height" (optional, int) * "height" (optional, int)
@ -1028,7 +1028,7 @@ def _request_dump_filename(self, url, video_id, data=None):
filename = sanitize_filename(f'{basen}.dump', restricted=True) filename = sanitize_filename(f'{basen}.dump', restricted=True)
# Working around MAX_PATH limitation on Windows (see # Working around MAX_PATH limitation on Windows (see
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx) # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
if compat_os_name == 'nt': if os.name == 'nt':
absfilepath = os.path.abspath(filename) absfilepath = os.path.abspath(filename)
if len(absfilepath) > 259: if len(absfilepath) > 259:
filename = fR'\\?\{absfilepath}' filename = fR'\\?\{absfilepath}'
@ -1578,7 +1578,9 @@ def _yield_json_ld(self, html, video_id, *, fatal=True, default=NO_DEFAULT):
if default is not NO_DEFAULT: if default is not NO_DEFAULT:
fatal = False fatal = False
for mobj in re.finditer(JSON_LD_RE, html): for mobj in re.finditer(JSON_LD_RE, html):
json_ld_item = self._parse_json(mobj.group('json_ld'), video_id, fatal=fatal) json_ld_item = self._parse_json(
mobj.group('json_ld'), video_id, fatal=fatal,
errnote=False if default is not NO_DEFAULT else None)
for json_ld in variadic(json_ld_item): for json_ld in variadic(json_ld_item):
if isinstance(json_ld, dict): if isinstance(json_ld, dict):
yield json_ld yield json_ld
@ -1852,12 +1854,26 @@ def _check_formats(self, formats, video_id):
@staticmethod @staticmethod
def _remove_duplicate_formats(formats): def _remove_duplicate_formats(formats):
format_urls = set() seen_urls = set()
seen_fragment_urls = set()
unique_formats = [] unique_formats = []
for f in formats: for f in formats:
if f['url'] not in format_urls: fragments = f.get('fragments')
format_urls.add(f['url']) if callable(fragments):
unique_formats.append(f) unique_formats.append(f)
elif fragments:
fragment_urls = frozenset(
fragment.get('url') or urljoin(f['fragment_base_url'], fragment['path'])
for fragment in fragments)
if fragment_urls not in seen_fragment_urls:
seen_fragment_urls.add(fragment_urls)
unique_formats.append(f)
elif f['url'] not in seen_urls:
seen_urls.add(f['url'])
unique_formats.append(f)
formats[:] = unique_formats formats[:] = unique_formats
def _is_valid_url(self, url, video_id, item='video', headers={}): def _is_valid_url(self, url, video_id, item='video', headers={}):
@ -3765,7 +3781,7 @@ def _merge_subtitles(cls, *dicts, target=None):
""" Merge subtitle dictionaries, language by language. """ """ Merge subtitle dictionaries, language by language. """
if target is None: if target is None:
target = {} target = {}
for d in dicts: for d in filter(None, dicts):
for lang, subs in d.items(): for lang, subs in d.items():
target[lang] = cls._merge_subtitle_items(target.get(lang, []), subs) target[lang] = cls._merge_subtitle_items(target.get(lang, []), subs)
return target return target
@ -3787,7 +3803,7 @@ def _cookies_passed(self):
def mark_watched(self, *args, **kwargs): def mark_watched(self, *args, **kwargs):
if not self.get_param('mark_watched', False): if not self.get_param('mark_watched', False):
return return
if self.supports_login() and self._get_login_info()[0] is not None or self._cookies_passed: if (self.supports_login() and self._get_login_info()[0] is not None) or self._cookies_passed:
self._mark_watched(*args, **kwargs) self._mark_watched(*args, **kwargs)
def _mark_watched(self, *args, **kwargs): def _mark_watched(self, *args, **kwargs):

View File

@ -12,6 +12,7 @@
parse_iso8601, parse_iso8601,
strip_or_none, strip_or_none,
try_get, try_get,
urljoin,
) )
@ -112,8 +113,7 @@ def _extract_series(self, url, webpage):
m_paths = re.finditer( m_paths = re.finditer(
r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage) r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
paths = orderedSet(m.group(1) for m in m_paths) paths = orderedSet(m.group(1) for m in m_paths)
build_url = lambda path: urllib.parse.urljoin(base_url, path) entries = [self.url_result(urljoin(base_url, path), 'CondeNast') for path in paths]
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
return self.playlist_result(entries, playlist_title=title) return self.playlist_result(entries, playlist_title=title)
def _extract_video_params(self, webpage, display_id): def _extract_video_params(self, webpage, display_id):

View File

@ -456,7 +456,7 @@ def _transform_episode_response(data):
}), }),
}), }),
**traverse_obj(metadata, { **traverse_obj(metadata, {
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}), 'duration': ('duration_ms', {float_or_none(scale=1000)}),
'timestamp': ('upload_date', {parse_iso8601}), 'timestamp': ('upload_date', {parse_iso8601}),
'series': ('series_title', {str}), 'series': ('series_title', {str}),
'series_id': ('series_id', {str}), 'series_id': ('series_id', {str}),
@ -484,7 +484,7 @@ def _transform_movie_response(data):
}), }),
}), }),
**traverse_obj(metadata, { **traverse_obj(metadata, {
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}), 'duration': ('duration_ms', {float_or_none(scale=1000)}),
'age_limit': ('maturity_ratings', -1, {parse_age_limit}), 'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
}), }),
} }

View File

@ -1,14 +1,27 @@
import json
import re import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import orderedSet from .ninecninemedia import NineCNineMediaIE
from ..utils import extract_attributes, orderedSet
from ..utils.traversal import find_element, traverse_obj
class CTVNewsIE(InfoExtractor): class CTVNewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)' _BASE_REGEX = r'https?://(?:[^.]+\.)?ctvnews\.ca/'
_VIDEO_ID_RE = r'(?P<id>\d{5,})'
_PLAYLIST_ID_RE = r'(?P<id>\d\.\d{5,})'
_VALID_URL = [
rf'{_BASE_REGEX}video/c{_VIDEO_ID_RE}',
rf'{_BASE_REGEX}video(?:-gallery)?/?\?clipId={_VIDEO_ID_RE}',
rf'{_BASE_REGEX}video/?\?(?:playlist|bin)Id={_PLAYLIST_ID_RE}',
rf'{_BASE_REGEX}(?!video/)[^?#]*?{_PLAYLIST_ID_RE}/?(?:$|[?#])',
rf'{_BASE_REGEX}(?!video/)[^?#]+\?binId={_PLAYLIST_ID_RE}',
]
_TESTS = [{ _TESTS = [{
'url': 'http://www.ctvnews.ca/video?clipId=901995', 'url': 'http://www.ctvnews.ca/video?clipId=901995',
'md5': '9b8624ba66351a23e0b6e1391971f9af', 'md5': 'b608f466c7fa24b9666c6439d766ab7e',
'info_dict': { 'info_dict': {
'id': '901995', 'id': '901995',
'ext': 'flv', 'ext': 'flv',
@ -16,6 +29,33 @@ class CTVNewsIE(InfoExtractor):
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285', 'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
'timestamp': 1467286284, 'timestamp': 1467286284,
'upload_date': '20160630', 'upload_date': '20160630',
'categories': [],
'season_number': 0,
'season': 'Season 0',
'tags': [],
'series': 'CTV News National | Archive | Stories 2',
'season_id': '57981',
'thumbnail': r're:https?://.*\.jpg$',
'duration': 764.631,
},
}, {
'url': 'https://barrie.ctvnews.ca/video/c3030933-here_s-what_s-making-news-for-nov--15?binId=1272429',
'md5': '8b8c2b33c5c1803e3c26bc74ff8694d5',
'info_dict': {
'id': '3030933',
'ext': 'flv',
'title': 'Heres whats making news for Nov. 15',
'description': 'Here are the top stories were working on for CTV News at 11 for Nov. 15',
'thumbnail': 'http://images2.9c9media.com/image_asset/2021_2_22_a602e68e-1514-410e-a67a-e1f7cccbacab_png_2000x1125.jpg',
'season_id': '58104',
'season_number': 0,
'tags': [],
'season': 'Season 0',
'categories': [],
'series': 'CTV News Barrie',
'upload_date': '20241116',
'duration': 42.943,
'timestamp': 1731722452,
}, },
}, { }, {
'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224', 'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
@ -31,6 +71,72 @@ class CTVNewsIE(InfoExtractor):
'id': '1.2876780', 'id': '1.2876780',
}, },
'playlist_mincount': 100, 'playlist_mincount': 100,
}, {
'url': 'https://www.ctvnews.ca/it-s-been-23-years-since-toronto-called-in-the-army-after-a-major-snowstorm-1.5736957',
'info_dict':
{
'id': '1.5736957',
},
'playlist_mincount': 6,
}, {
'url': 'https://www.ctvnews.ca/business/respondents-to-bank-of-canada-questionnaire-largely-oppose-creating-a-digital-loonie-1.6665797',
'md5': '24bc4b88cdc17d8c3fc01dfc228ab72c',
'info_dict': {
'id': '2695026',
'ext': 'flv',
'season_id': '89852',
'series': 'From CTV News Channel',
'description': 'md5:796a985a23cacc7e1e2fafefd94afd0a',
'season': '2023',
'title': 'Bank of Canada asks public about digital currency',
'categories': [],
'tags': [],
'upload_date': '20230526',
'season_number': 2023,
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
'timestamp': 1685105157,
'duration': 253.553,
},
}, {
'url': 'https://stox.ctvnews.ca/video-gallery?clipId=582589',
'md5': '135cc592df607d29dddc931f1b756ae2',
'info_dict': {
'id': '582589',
'ext': 'flv',
'categories': [],
'timestamp': 1427906183,
'season_number': 0,
'duration': 125.559,
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
'series': 'CTV News Stox',
'description': 'CTV original footage of the rise and fall of the Berlin Wall.',
'title': 'Berlin Wall',
'season_id': '63817',
'season': 'Season 0',
'tags': [],
'upload_date': '20150401',
},
}, {
'url': 'https://ottawa.ctvnews.ca/features/regional-contact/regional-contact-archive?binId=1.1164587#3023759',
'md5': 'a14c0603557decc6531260791c23cc5e',
'info_dict': {
'id': '3023759',
'ext': 'flv',
'season_number': 2024,
'timestamp': 1731798000,
'season': '2024',
'episode': 'Episode 125',
'description': 'CTV News Ottawa at Six',
'duration': 2712.076,
'episode_number': 125,
'upload_date': '20241116',
'title': 'CTV News Ottawa at Six for Saturday, November 16, 2024',
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
'categories': [],
'tags': [],
'series': 'CTV News Ottawa at Six',
'season_id': '92667',
},
}, { }, {
'url': 'http://www.ctvnews.ca/1.810401', 'url': 'http://www.ctvnews.ca/1.810401',
'only_matching': True, 'only_matching': True,
@ -42,29 +148,35 @@ class CTVNewsIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _ninecninemedia_url_result(self, clip_id):
return self.url_result(f'9c9media:ctvnews_web:{clip_id}', NineCNineMediaIE, clip_id)
def _real_extract(self, url): def _real_extract(self, url):
page_id = self._match_id(url) page_id = self._match_id(url)
def ninecninemedia_url_result(clip_id): if mobj := re.fullmatch(self._VIDEO_ID_RE, urllib.parse.urlparse(url).fragment):
return { page_id = mobj.group('id')
'_type': 'url_transparent',
'id': clip_id,
'url': f'9c9media:ctvnews_web:{clip_id}',
'ie_key': 'NineCNineMedia',
}
if page_id.isdigit(): if re.fullmatch(self._VIDEO_ID_RE, page_id):
return ninecninemedia_url_result(page_id) return self._ninecninemedia_url_result(page_id)
else:
webpage = self._download_webpage(f'http://www.ctvnews.ca/{page_id}', page_id, query={ webpage = self._download_webpage(f'https://www.ctvnews.ca/{page_id}', page_id, query={
'ot': 'example.AjaxPageLayout.ot', 'ot': 'example.AjaxPageLayout.ot',
'maxItemsPerPage': 1000000, 'maxItemsPerPage': 1000000,
}) })
entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet( entries = [self._ninecninemedia_url_result(clip_id)
re.findall(r'clip\.id\s*=\s*(\d+);', webpage))] for clip_id in orderedSet(re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
if not entries: if not entries:
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
if 'getAuthStates("' in webpage: if 'getAuthStates("' in webpage:
entries = [ninecninemedia_url_result(clip_id) for clip_id in entries = [self._ninecninemedia_url_result(clip_id) for clip_id in
self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')] self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')]
return self.playlist_result(entries, page_id) else:
entries = [
self._ninecninemedia_url_result(clip_id) for clip_id in
traverse_obj(webpage, (
{find_element(tag='jasper-player-container', html=True)},
{extract_attributes}, 'axis-ids', {json.loads}, ..., 'axisId', {str}))
]
return self.playlist_result(entries, page_id)

View File

@ -1,7 +1,4 @@
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import int_or_none from ..utils import int_or_none
@ -31,9 +28,6 @@ def _real_extract(self, url):
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id display_id = mobj.group('display_id') or video_id
# request setClientTimezone.php to get PHPSESSID cookie which is need to get valid json data in the next request
self._request_webpage(HEADRequest(
'http://www.cultureunplugged.com/setClientTimezone.php?timeOffset=%d' % -(time.timezone / 3600)), display_id)
movie_data = self._download_json( movie_data = self._download_json(
f'http://www.cultureunplugged.com/movie-data/cu-{video_id}.json', display_id) f'http://www.cultureunplugged.com/movie-data/cu-{video_id}.json', display_id)

View File

@ -1,3 +1,4 @@
import functools
import hashlib import hashlib
import re import re
import time import time
@ -51,6 +52,15 @@ class DacastVODIE(DacastBaseIE):
'thumbnail': 'https://universe-files.dacast.com/26137208-5858-65c1-5e9a-9d6b6bd2b6c2', 'thumbnail': 'https://universe-files.dacast.com/26137208-5858-65c1-5e9a-9d6b6bd2b6c2',
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { # /uspaes/ in hls_url
'url': 'https://iframe.dacast.com/vod/f9823fc6-faba-b98f-0d00-4a7b50a58c5b/348c5c84-b6af-4859-bb9d-1d01009c795b',
'info_dict': {
'id': '348c5c84-b6af-4859-bb9d-1d01009c795b',
'ext': 'mp4',
'title': 'pl1-edyta-rubas-211124.mp4',
'uploader_id': 'f9823fc6-faba-b98f-0d00-4a7b50a58c5b',
'thumbnail': 'https://universe-files.dacast.com/4d0bd042-a536-752d-fc34-ad2fa44bbcbb.png',
},
}] }]
_WEBPAGE_TESTS = [{ _WEBPAGE_TESTS = [{
'url': 'https://www.dacast.com/support/knowledgebase/how-can-i-embed-a-video-on-my-website/', 'url': 'https://www.dacast.com/support/knowledgebase/how-can-i-embed-a-video-on-my-website/',
@ -74,6 +84,15 @@ class DacastVODIE(DacastBaseIE):
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}] }]
@functools.cached_property
def _usp_signing_secret(self):
player_js = self._download_webpage(
'https://player.dacast.com/js/player.js', None, 'Downloading player JS')
# Rotates every so often, but hardcode a fallback in case of JS change/breakage before rotation
return self._search_regex(
r'\bUSP_SIGNING_SECRET\s*=\s*(["\'])(?P<secret>(?:(?!\1).)+)', player_js,
'usp signing secret', group='secret', fatal=False) or 'odnInCGqhvtyRTtIiddxtuRtawYYICZP'
def _real_extract(self, url): def _real_extract(self, url):
user_id, video_id = self._match_valid_url(url).group('user_id', 'id') user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'} query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'}
@ -94,10 +113,10 @@ def _real_extract(self, url):
if 'DRM_EXT' in hls_url: if 'DRM_EXT' in hls_url:
self.report_drm(video_id) self.report_drm(video_id)
elif '/uspaes/' in hls_url: elif '/uspaes/' in hls_url:
# From https://player.dacast.com/js/player.js # Ref: https://player.dacast.com/js/player.js
ts = int(time.time()) ts = int(time.time())
signature = hashlib.sha1( signature = hashlib.sha1(
f'{10413792000 - ts}{ts}YfaKtquEEpDeusCKbvYszIEZnWmBcSvw').digest().hex() f'{10413792000 - ts}{ts}{self._usp_signing_secret}'.encode()).digest().hex()
hls_aes['uri'] = f'https://keys.dacast.com/uspaes/{video_id}.key?s={signature}&ts={ts}' hls_aes['uri'] = f'https://keys.dacast.com/uspaes/{video_id}.key?s={signature}&ts={ts}'
for retry in self.RetryManager(): for retry in self.RetryManager():

View File

@ -261,6 +261,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'tags': [], 'tags': [],
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'thumbnail': r're:https://\w+.dmcdn.net/v/WnEY61cmvMxt2Fi6d/x1080',
}, },
}, { }, {
# https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj # https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj
@ -288,6 +289,25 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'description': 'À bord du « véloto », lalternative à la voiture pour la campagne', 'description': 'À bord du « véloto », lalternative à la voiture pour la campagne',
'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'], 'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'],
}, },
}, {
# https://geo.dailymotion.com/player/xry80.html?video=x8vu47w
'url': 'https://www.metatube.com/en/videos/546765/This-frogs-decorates-Christmas-tree/',
'info_dict': {
'id': 'x8vu47w',
'ext': 'mp4',
'like_count': int,
'uploader': 'Metatube',
'thumbnail': r're:https://\w+.dmcdn.net/v/W1G_S1coGSFTfkTeR/x1080',
'upload_date': '20240326',
'view_count': int,
'timestamp': 1711496732,
'age_limit': 0,
'uploader_id': 'x2xpy74',
'title': 'Está lindas ranitas ponen su arbolito',
'duration': 28,
'description': 'Que lindura',
'tags': [],
},
}] }]
_GEO_BYPASS = False _GEO_BYPASS = False
_COMMON_MEDIA_FIELDS = '''description _COMMON_MEDIA_FIELDS = '''description
@ -302,7 +322,7 @@ def _extract_embed_urls(cls, url, webpage):
yield from super()._extract_embed_urls(url, webpage) yield from super()._extract_embed_urls(url, webpage)
for mobj in re.finditer( for mobj in re.finditer(
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage): r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id') yield 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
for mobj in re.finditer( for mobj in re.finditer(
r'(?s)<script [^>]*\bsrc=(["\'])(?:https?:)?//[\w-]+\.dailymotion\.com/player/(?:(?!\1).)+\1[^>]*>', webpage): r'(?s)<script [^>]*\bsrc=(["\'])(?:https?:)?//[\w-]+\.dailymotion\.com/player/(?:(?!\1).)+\1[^>]*>', webpage):
attrs = extract_attributes(mobj.group(0)) attrs = extract_attributes(mobj.group(0))

View File

@ -40,7 +40,7 @@ def _extract_episode_info(self, metadata, episode_slug, series_slug):
'id': ('content_id', {str}), 'id': ('content_id', {str}),
'title': ('display_title', {str}), 'title': ('display_title', {str}),
'episode': ('title', {str}), 'episode': ('title', {str}),
'series': ('show_name', {str}, {lambda x: x or None}), 'series': ('show_name', {str}, filter),
'series_id': ('catalog_id', {str}), 'series_id': ('catalog_id', {str}),
'duration': ('duration', {int_or_none}), 'duration': ('duration', {int_or_none}),
'release_timestamp': ('release_date_uts', {int_or_none}), 'release_timestamp': ('release_date_uts', {int_or_none}),

View File

@ -1,7 +1,10 @@
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
jwt_decode_hs256,
parse_codecs, parse_codecs,
try_get, try_get,
url_or_none, url_or_none,
@ -13,9 +16,6 @@
class DigitalConcertHallIE(InfoExtractor): class DigitalConcertHallIE(InfoExtractor):
IE_DESC = 'DigitalConcertHall extractor' IE_DESC = 'DigitalConcertHall extractor'
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?' _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?'
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15'
_ACCESS_TOKEN = None
_NETRC_MACHINE = 'digitalconcerthall' _NETRC_MACHINE = 'digitalconcerthall'
_TESTS = [{ _TESTS = [{
'note': 'Playlist with only one video', 'note': 'Playlist with only one video',
@ -69,59 +69,157 @@ class DigitalConcertHallIE(InfoExtractor):
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'playlist_count': 1, 'playlist_count': 1,
}] }]
_LOGIN_HINT = ('Use --username token --password ACCESS_TOKEN where ACCESS_TOKEN '
'is the "access_token_production" from your browser local storage')
_REFRESH_HINT = 'or else use a "refresh_token" with --username refresh --password REFRESH_TOKEN'
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
_CLIENT_ID = 'dch.webapp'
_CLIENT_SECRET = '2ySLN+2Fwb'
_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15'
_OAUTH_HEADERS = {
'Accept': 'application/json',
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
'Origin': 'https://www.digitalconcerthall.com',
'Referer': 'https://www.digitalconcerthall.com/',
'User-Agent': _USER_AGENT,
}
_access_token = None
_access_token_expiry = 0
_refresh_token = None
def _perform_login(self, username, password): @property
login_token = self._download_json( def _access_token_is_expired(self):
self._OAUTH_URL, return self._access_token_expiry - 30 <= int(time.time())
None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({
def _set_access_token(self, value):
self._access_token = value
self._access_token_expiry = traverse_obj(value, ({jwt_decode_hs256}, 'exp', {int})) or 0
def _cache_tokens(self, /):
self.cache.store(self._NETRC_MACHINE, 'tokens', {
'access_token': self._access_token,
'refresh_token': self._refresh_token,
})
def _fetch_new_tokens(self, invalidate=False):
if invalidate:
self.report_warning('Access token has been invalidated')
self._set_access_token(None)
if not self._access_token_is_expired:
return
if not self._refresh_token:
self._set_access_token(None)
self._cache_tokens()
raise ExtractorError(
'Access token has expired or been invalidated. '
'Get a new "access_token_production" value from your browser '
f'and try again, {self._REFRESH_HINT}', expected=True)
# If we only have a refresh token, we need a temporary "initial token" for the refresh flow
bearer_token = self._access_token or self._download_json(
self._OAUTH_URL, None, 'Obtaining initial token', 'Unable to obtain initial token',
data=urlencode_postdata({
'affiliate': 'none', 'affiliate': 'none',
'grant_type': 'device', 'grant_type': 'device',
'device_vendor': 'unknown', 'device_vendor': 'unknown',
# device_model 'Safari' gets split streams of 4K/HEVC video and lossless/FLAC audio # device_model 'Safari' gets split streams of 4K/HEVC video and lossless/FLAC audio,
'device_model': 'unknown' if self._configuration_arg('prefer_combined_hls') else 'Safari', # but this is no longer effective since actual login is not possible anymore
'app_id': 'dch.webapp', 'device_model': 'unknown',
'app_id': self._CLIENT_ID,
'app_distributor': 'berlinphil', 'app_distributor': 'berlinphil',
'app_version': '1.84.0', 'app_version': '1.95.0',
'client_secret': '2ySLN+2Fwb', 'client_secret': self._CLIENT_SECRET,
}), headers={ }), headers=self._OAUTH_HEADERS)['access_token']
'Accept': 'application/json',
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
'User-Agent': self._USER_AGENT,
})['access_token']
try: try:
login_response = self._download_json( response = self._download_json(
self._OAUTH_URL, self._OAUTH_URL, None, 'Refreshing token', 'Unable to refresh token',
None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({ data=urlencode_postdata({
'grant_type': 'password', 'grant_type': 'refresh_token',
'username': username, 'refresh_token': self._refresh_token,
'password': password, 'client_id': self._CLIENT_ID,
'client_secret': self._CLIENT_SECRET,
}), headers={ }), headers={
'Accept': 'application/json', **self._OAUTH_HEADERS,
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8', 'Authorization': f'Bearer {bearer_token}',
'Referer': 'https://www.digitalconcerthall.com',
'Authorization': f'Bearer {login_token}',
'User-Agent': self._USER_AGENT,
}) })
except ExtractorError as error: except ExtractorError as e:
if isinstance(error.cause, HTTPError) and error.cause.status == 401: if isinstance(e.cause, HTTPError) and e.cause.status == 401:
raise ExtractorError('Invalid username or password', expected=True) self._set_access_token(None)
self._refresh_token = None
self._cache_tokens()
raise ExtractorError('Your tokens have been invalidated', expected=True)
raise raise
self._ACCESS_TOKEN = login_response['access_token']
self._set_access_token(response['access_token'])
if refresh_token := traverse_obj(response, ('refresh_token', {str})):
self.write_debug('New refresh token granted')
self._refresh_token = refresh_token
self._cache_tokens()
def _perform_login(self, username, password):
self.report_login()
if username == 'refresh':
self._refresh_token = password
self._fetch_new_tokens()
if username == 'token':
if not traverse_obj(password, {jwt_decode_hs256}):
raise ExtractorError(
f'The access token passed to yt-dlp is not valid. {self._LOGIN_HINT}', expected=True)
self._set_access_token(password)
self._cache_tokens()
if username in ('refresh', 'token'):
if self.get_param('cachedir') is not False:
token_type = 'access' if username == 'token' else 'refresh'
self.to_screen(f'Your {token_type} token has been cached to disk. To use the cached '
'token next time, pass --username cache along with any password')
return
if username != 'cache':
raise ExtractorError(
'Login with username and password is no longer supported '
f'for this site. {self._LOGIN_HINT}, {self._REFRESH_HINT}', expected=True)
# Try cached access_token
cached_tokens = self.cache.load(self._NETRC_MACHINE, 'tokens', default={})
self._set_access_token(cached_tokens.get('access_token'))
self._refresh_token = cached_tokens.get('refresh_token')
if not self._access_token_is_expired:
return
# Try cached refresh_token
self._fetch_new_tokens(invalidate=True)
def _real_initialize(self): def _real_initialize(self):
if not self._ACCESS_TOKEN: if not self._access_token:
self.raise_login_required(method='password') self.raise_login_required(
'All content on this site is only available for registered users. '
f'{self._LOGIN_HINT}, {self._REFRESH_HINT}', method=None)
def _entries(self, items, language, type_, **kwargs): def _entries(self, items, language, type_, **kwargs):
for item in items: for item in items:
video_id = item['id'] video_id = item['id']
stream_info = self._download_json(
self._proto_relative_url(item['_links']['streams']['href']), video_id, headers={ for should_retry in (True, False):
'Accept': 'application/json', self._fetch_new_tokens(invalidate=not should_retry)
'Authorization': f'Bearer {self._ACCESS_TOKEN}', try:
'Accept-Language': language, stream_info = self._download_json(
'User-Agent': self._USER_AGENT, self._proto_relative_url(item['_links']['streams']['href']), video_id, headers={
}) 'Accept': 'application/json',
'Authorization': f'Bearer {self._access_token}',
'Accept-Language': language,
'User-Agent': self._USER_AGENT,
})
break
except ExtractorError as error:
if should_retry and isinstance(error.cause, HTTPError) and error.cause.status == 401:
continue
raise
formats = [] formats = []
for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})): for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})):
@ -157,7 +255,6 @@ def _real_extract(self, url):
'Accept': 'application/json', 'Accept': 'application/json',
'Accept-Language': language, 'Accept-Language': language,
'User-Agent': self._USER_AGENT, 'User-Agent': self._USER_AGENT,
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
}) })
videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...)) videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))

View File

@ -48,32 +48,30 @@ def _real_extract(self, url):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
fn = urllib.parse.unquote(url_basename(url)) fn = urllib.parse.unquote(url_basename(url))
title = os.path.splitext(fn)[0] title = os.path.splitext(fn)[0]
password = self.get_param('videopassword') content_id = None
for part in self._yield_decoded_parts(webpage): for part in self._yield_decoded_parts(webpage):
if '/sm/password' in part: if '/sm/password' in part:
webpage = self._download_webpage( content_id = self._search_regex(r'content_id=([\w.+=/-]+)', part, 'content ID')
update_url('https://www.dropbox.com/sm/password', query=part.partition('?')[2]), video_id)
break break
if (self._og_search_title(webpage, default=None) == 'Dropbox - Password Required' if content_id:
or 'Enter the password for this link' in webpage): password = self.get_param('videopassword')
if password: if not password:
response = self._download_json(
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'},
data=urlencode_postdata({
'is_xhr': 'true',
't': self._get_cookies('https://www.dropbox.com')['t'].value,
'content_id': self._search_regex(r'content_id=([\w.+=/-]+)["\']', webpage, 'content id'),
'password': password,
'url': url,
}))
if response.get('status') != 'authed':
raise ExtractorError('Invalid password', expected=True)
elif not self._get_cookies('https://dropbox.com').get('sm_auth'):
raise ExtractorError('Password protected video, use --video-password <password>', expected=True) raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
response = self._download_json(
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
data=urlencode_postdata({
'is_xhr': 'true',
't': self._get_cookies('https://www.dropbox.com')['t'].value,
'content_id': content_id,
'password': password,
'url': update_url(url, scheme='', netloc=''),
}))
if response.get('status') != 'authed':
raise ExtractorError('Invalid password', expected=True)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
formats, subtitles = [], {} formats, subtitles = [], {}

View File

@ -5,15 +5,16 @@
get_element_text_and_html_by_tag, get_element_text_and_html_by_tag,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
parse_qs,
str_or_none, str_or_none,
try_call, try_call,
unified_timestamp, unified_timestamp,
) )
from ..utils.traversal import traverse_obj from ..utils.traversal import traverse_obj, value
class DuoplayIE(InfoExtractor): class DuoplayIE(InfoExtractor):
_VALID_URL = r'https?://duoplay\.ee/(?P<id>\d+)/[\w-]+/?(?:\?(?:[^#]+&)?ep=(?P<ep>\d+))?' _VALID_URL = r'https?://duoplay\.ee/(?P<id>\d+)(?:[/?#]|$)'
_TESTS = [{ _TESTS = [{
'note': 'Siberi võmm S02E12', 'note': 'Siberi võmm S02E12',
'url': 'https://duoplay.ee/4312/siberi-vomm?ep=24', 'url': 'https://duoplay.ee/4312/siberi-vomm?ep=24',
@ -34,15 +35,16 @@ class DuoplayIE(InfoExtractor):
'episode_number': 12, 'episode_number': 12,
'episode_id': '24', 'episode_id': '24',
}, },
'skip': 'No video found',
}, { }, {
'note': 'Empty title', 'note': 'Empty title',
'url': 'https://duoplay.ee/17/uhikarotid?ep=14', 'url': 'https://duoplay.ee/17/uhikarotid?ep=14',
'md5': '6aca68be71112314738dd17cced7f8bf', 'md5': 'cba9f5dabf2582b224d80ac44fb80e47',
'info_dict': { 'info_dict': {
'id': '17_14', 'id': '17_14',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ühikarotid', 'title': 'Episode 14',
'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$', 'thumbnail': r're:https?://.+\.jpg',
'description': 'md5:4719b418e058c209def41d48b601276e', 'description': 'md5:4719b418e058c209def41d48b601276e',
'upload_date': '20100916', 'upload_date': '20100916',
'timestamp': 1284661800, 'timestamp': 1284661800,
@ -52,6 +54,8 @@ class DuoplayIE(InfoExtractor):
'season_number': 2, 'season_number': 2,
'episode_id': '14', 'episode_id': '14',
'release_year': 2010, 'release_year': 2010,
'episode': 'Episode 14',
'episode_number': 14,
}, },
}, { }, {
'note': 'Movie without expiry', 'note': 'Movie without expiry',
@ -68,10 +72,32 @@ class DuoplayIE(InfoExtractor):
'timestamp': 1671054000, 'timestamp': 1671054000,
'release_year': 2018, 'release_year': 2018,
}, },
'skip': 'No video found',
}, {
'note': 'Episode url without show name',
'url': 'https://duoplay.ee/9644?ep=185',
'md5': '63f324b4fe2dbd8194dca16a6d52184a',
'info_dict': {
'id': '9644_185',
'ext': 'mp4',
'title': 'Episode 185',
'thumbnail': r're:https?://.+\.jpg',
'description': 'md5:ed25ba4e9e5d54bc291a4a0cdd241467',
'upload_date': '20241120',
'timestamp': 1732077000,
'episode': 'Episode 63',
'episode_id': '185',
'episode_number': 63,
'season': 'Season 2',
'season_number': 2,
'series': 'Telehommik',
'series_id': '9644',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
telecast_id, episode = self._match_valid_url(url).group('id', 'ep') telecast_id = self._match_id(url)
episode = traverse_obj(parse_qs(url), ('ep', 0, {int_or_none}, {str_or_none}))
video_id = join_nonempty(telecast_id, episode, delim='_') video_id = join_nonempty(telecast_id, episode, delim='_')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_player = try_call(lambda: extract_attributes( video_player = try_call(lambda: extract_attributes(
@ -79,25 +105,33 @@ def _real_extract(self, url):
if not video_player or not video_player.get('manifest-url'): if not video_player or not video_player.get('manifest-url'):
raise ExtractorError('No video found', expected=True) raise ExtractorError('No video found', expected=True)
manifest_url = video_player['manifest-url']
session_token = self._download_json(
'https://sts.postimees.ee/session/register', video_id, 'Registering session',
'Unable to register session', headers={
'Accept': 'application/json',
'X-Original-URI': manifest_url,
})['session']
episode_attr = self._parse_json(video_player.get(':episode') or '', video_id, fatal=False) or {} episode_attr = self._parse_json(video_player.get(':episode') or '', video_id, fatal=False) or {}
return { return {
'id': video_id, 'id': video_id,
'formats': self._extract_m3u8_formats(video_player['manifest-url'], video_id, 'mp4'), 'formats': self._extract_m3u8_formats(manifest_url, video_id, 'mp4', query={'s': session_token}),
**traverse_obj(episode_attr, { **traverse_obj(episode_attr, {
'title': 'title', 'title': ('title', {str}),
'description': 'synopsis', 'description': ('synopsis', {str}),
'thumbnail': ('images', 'original'), 'thumbnail': ('images', 'original'),
'timestamp': ('airtime', {lambda x: unified_timestamp(x + ' +0200')}), 'timestamp': ('airtime', {lambda x: unified_timestamp(x + ' +0200')}),
'cast': ('cast', {lambda x: x.split(', ')}), 'cast': ('cast', filter, {lambda x: x.split(', ')}),
'release_year': ('year', {int_or_none}), 'release_year': ('year', {int_or_none}),
}), }),
**(traverse_obj(episode_attr, { **(traverse_obj(episode_attr, {
'title': (None, ('subtitle', ('episode_nr', {lambda x: f'Episode {x}' if x else None}))), 'title': (None, (('subtitle', {str}, filter), {value(f'Episode {episode}' if episode else None)})),
'series': 'title', 'series': ('title', {str}),
'series_id': ('telecast_id', {str_or_none}), 'series_id': ('telecast_id', {str_or_none}),
'season_number': ('season_id', {int_or_none}), 'season_number': ('season_id', {int_or_none}),
'episode': 'subtitle', 'episode': ('subtitle', {str}, filter),
'episode_number': ('episode_nr', {int_or_none}), 'episode_number': ('episode_nr', {int_or_none}),
'episode_id': ('episode_id', {str_or_none}), 'episode_id': ('episode_id', {str_or_none}),
}, get_all=False) if episode_attr.get('category') != 'movies' else {}), }, get_all=False) if episode_attr.get('category') != 'movies' else {}),

View File

@ -162,7 +162,7 @@ def _real_extract(self, url):
items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage) items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage)
if items: if items:
return self.playlist_result( return self.playlist_result(
[self._parse_video_metadata(i, video_id, timestamp) for i in items], (self._parse_video_metadata(i, video_id, timestamp) for i in items),
video_id, self._html_search_meta('twitter:title', webpage)) video_id, self._html_search_meta('twitter:title', webpage))
item = self._search_regex( item = self._search_regex(

View File

@ -207,7 +207,7 @@ def _real_extract(self, url):
**traverse_obj(data, { **traverse_obj(data, {
'title': ('heading', {str}), 'title': ('heading', {str}),
'alt_title': ('subHeading', {str}), 'alt_title': ('subHeading', {str}),
'description': (('lead', 'body'), {clean_html}, {lambda x: x or None}), 'description': (('lead', 'body'), {clean_html}, filter),
'timestamp': ('created', {int_or_none}), 'timestamp': ('created', {int_or_none}),
'modified_timestamp': ('updated', {int_or_none}), 'modified_timestamp': ('updated', {int_or_none}),
'release_timestamp': (('scheduleStart', 'publicStart'), {int_or_none}), 'release_timestamp': (('scheduleStart', 'publicStart'), {int_or_none}),

View File

@ -50,7 +50,7 @@ class FacebookIE(InfoExtractor):
[^/]+/videos/(?:[^/]+/)?| [^/]+/videos/(?:[^/]+/)?|
[^/]+/posts/| [^/]+/posts/|
events/(?:[^/]+/)?| events/(?:[^/]+/)?|
groups/[^/]+/(?:permalink|posts)/| groups/[^/]+/(?:permalink|posts)/(?:[\da-f]+/)?|
watchparty/ watchparty/
)| )|
facebook: facebook:
@ -410,6 +410,9 @@ class FacebookIE(InfoExtractor):
'uploader': 'Comitato Liberi Pensatori', 'uploader': 'Comitato Liberi Pensatori',
'uploader_id': '100065709540881', 'uploader_id': '100065709540881',
}, },
}, {
'url': 'https://www.facebook.com/groups/1513990329015294/posts/d41d8cd9/2013209885760000/?app=fbl',
'only_matching': True,
}] }]
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)' _SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
_api_config = { _api_config = {
@ -563,13 +566,13 @@ def extract_from_jsmods_instances(js_data):
return extract_video_data(try_get( return extract_video_data(try_get(
js_data, lambda x: x['jsmods']['instances'], list) or []) js_data, lambda x: x['jsmods']['instances'], list) or [])
def extract_dash_manifest(video, formats): def extract_dash_manifest(vid_data, formats, mpd_url=None):
dash_manifest = traverse_obj( dash_manifest = traverse_obj(
video, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', expected_type=str) vid_data, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', 'manifest_xml', expected_type=str)
if dash_manifest: if dash_manifest:
formats.extend(self._parse_mpd_formats( formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)), compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
mpd_url=url_or_none(video.get('dash_manifest_url')))) mpd_url=url_or_none(vid_data.get('dash_manifest_url')) or mpd_url))
def process_formats(info): def process_formats(info):
# Downloads with browser's User-Agent are rate limited. Working around # Downloads with browser's User-Agent are rate limited. Working around
@ -619,9 +622,12 @@ def parse_graphql_video(video):
video = video['creation_story'] video = video['creation_story']
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner')) video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
video.update(reel_info) video.update(reel_info)
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
formats = [] formats = []
q = qualities(['sd', 'hd']) q = qualities(['sd', 'hd'])
# Legacy formats extraction
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'), for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'), ('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
('browser_native_sd_url', 'sd')): ('browser_native_sd_url', 'sd')):
@ -629,7 +635,7 @@ def parse_graphql_video(video):
if not playable_url: if not playable_url:
continue continue
if determine_ext(playable_url) == 'mpd': if determine_ext(playable_url) == 'mpd':
formats.extend(self._extract_mpd_formats(playable_url, video_id)) formats.extend(self._extract_mpd_formats(playable_url, video_id, fatal=False))
else: else:
formats.append({ formats.append({
'format_id': format_id, 'format_id': format_id,
@ -638,6 +644,28 @@ def parse_graphql_video(video):
'url': playable_url, 'url': playable_url,
}) })
extract_dash_manifest(fmt_data, formats) extract_dash_manifest(fmt_data, formats)
# New videoDeliveryResponse formats extraction
fmt_data = traverse_obj(video, ('videoDeliveryResponseFragment', 'videoDeliveryResponseResult'))
mpd_urls = traverse_obj(fmt_data, ('dash_manifest_urls', ..., 'manifest_url', {url_or_none}))
dash_manifests = traverse_obj(fmt_data, ('dash_manifests', lambda _, v: v['manifest_xml']))
for idx, dash_manifest in enumerate(dash_manifests):
extract_dash_manifest(dash_manifest, formats, mpd_url=traverse_obj(mpd_urls, idx))
if not dash_manifests:
# Only extract from MPD URLs if the manifests are not already provided
for mpd_url in mpd_urls:
formats.extend(self._extract_mpd_formats(mpd_url, video_id, fatal=False))
for prog_fmt in traverse_obj(fmt_data, ('progressive_urls', lambda _, v: v['progressive_url'])):
format_id = traverse_obj(prog_fmt, ('metadata', 'quality', {str.lower}))
formats.append({
'format_id': format_id,
# sd, hd formats w/o resolution info should be deprioritized below DASH
'quality': q(format_id) - 3,
'url': prog_fmt['progressive_url'],
})
for m3u8_url in traverse_obj(fmt_data, ('hls_playlist_urls', ..., 'hls_playlist_url', {url_or_none})):
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False, m3u8_id='hls'))
if not formats: if not formats:
# Do not append false positive entry w/o any formats # Do not append false positive entry w/o any formats
return return

View File

@ -193,9 +193,9 @@ def _real_extract(self, url):
for lang, version, fmt in self._get_experiences(episode): for lang, version, fmt in self._get_experiences(episode):
experience_id = str(fmt['experienceId']) experience_id = str(fmt['experienceId'])
if (only_initial_experience and experience_id != initial_experience_id if ((only_initial_experience and experience_id != initial_experience_id)
or requested_languages and lang.lower() not in requested_languages or (requested_languages and lang.lower() not in requested_languages)
or requested_versions and version.lower() not in requested_versions): or (requested_versions and version.lower() not in requested_versions)):
continue continue
thumbnails.append({'url': fmt.get('poster')}) thumbnails.append({'url': fmt.get('poster')})
duration = max(duration, fmt.get('duration', 0)) duration = max(duration, fmt.get('duration', 0))

View File

@ -0,0 +1,141 @@
import json
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
join_nonempty,
parse_iso8601,
str_or_none,
url_or_none,
)
from ..utils.traversal import traverse_obj
class GameDevTVDashboardIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gamedev\.tv/dashboard/courses/(?P<course_id>\d+)(?:/(?P<lecture_id>\d+))?'
_NETRC_MACHINE = 'gamedevtv'
_TESTS = [{
'url': 'https://www.gamedev.tv/dashboard/courses/25',
'info_dict': {
'id': '25',
'title': 'Complete Blender Creator 3: Learn 3D Modelling for Beginners',
'tags': ['blender', 'course', 'all', 'box modelling', 'sculpting'],
'categories': ['Blender', '3D Art'],
'thumbnail': 'https://gamedev-files.b-cdn.net/courses/qisc9pmu1jdc.jpg',
'upload_date': '20220516',
'timestamp': 1652694420,
'modified_date': '20241027',
'modified_timestamp': 1730049658,
},
'playlist_count': 100,
}, {
'url': 'https://www.gamedev.tv/dashboard/courses/63/2279',
'info_dict': {
'id': 'df04f4d8-68a4-4756-a71b-9ca9446c3a01',
'ext': 'mp4',
'modified_timestamp': 1701695752,
'upload_date': '20230504',
'episode': 'MagicaVoxel Community Course Introduction',
'series_id': '63',
'title': 'MagicaVoxel Community Course Introduction',
'timestamp': 1683195397,
'modified_date': '20231204',
'categories': ['3D Art', 'MagicaVoxel'],
'season': 'MagicaVoxel Community Course',
'tags': ['MagicaVoxel', 'all', 'course'],
'series': 'MagicaVoxel 3D Art Mini Course',
'duration': 1405,
'episode_number': 1,
'season_number': 1,
'season_id': '219',
'description': 'md5:a378738c5bbec1c785d76c067652d650',
'display_id': '63-219-2279',
'alt_title': '1_CC_MVX MagicaVoxel Community Course Introduction.mp4',
'thumbnail': 'https://vz-23691c65-6fa.b-cdn.net/df04f4d8-68a4-4756-a71b-9ca9446c3a01/thumbnail.jpg',
},
}]
_API_HEADERS = {}
def _perform_login(self, username, password):
try:
response = self._download_json(
'https://api.gamedev.tv/api/students/login', None, 'Logging in',
headers={'Content-Type': 'application/json'},
data=json.dumps({
'email': username,
'password': password,
'cart_items': [],
}).encode())
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
raise ExtractorError('Invalid username/password', expected=True)
raise
self._API_HEADERS['Authorization'] = f'{response["token_type"]} {response["access_token"]}'
def _real_initialize(self):
if not self._API_HEADERS.get('Authorization'):
self.raise_login_required(
'This content is only available with purchase', method='password')
def _entries(self, data, course_id, course_info, selected_lecture):
for section in traverse_obj(data, ('sections', ..., {dict})):
section_info = traverse_obj(section, {
'season_id': ('id', {str_or_none}),
'season': ('title', {str}),
'season_number': ('order', {int_or_none}),
})
for lecture in traverse_obj(section, ('lectures', lambda _, v: url_or_none(v['video']['playListUrl']))):
if selected_lecture and str(lecture.get('id')) != selected_lecture:
continue
display_id = join_nonempty(course_id, section_info.get('season_id'), lecture.get('id'))
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
lecture['video']['playListUrl'], display_id, 'mp4', m3u8_id='hls')
yield {
**course_info,
**section_info,
'id': display_id, # fallback
'display_id': display_id,
'formats': formats,
'subtitles': subtitles,
'series': course_info.get('title'),
'series_id': course_id,
**traverse_obj(lecture, {
'id': ('video', 'guid', {str}),
'title': ('title', {str}),
'alt_title': ('video', 'title', {str}),
'description': ('description', {clean_html}),
'episode': ('title', {str}),
'episode_number': ('order', {int_or_none}),
'duration': ('video', 'duration_in_sec', {int_or_none}),
'timestamp': ('video', 'created_at', {parse_iso8601}),
'modified_timestamp': ('video', 'updated_at', {parse_iso8601}),
'thumbnail': ('video', 'thumbnailUrl', {url_or_none}),
}),
}
def _real_extract(self, url):
course_id, lecture_id = self._match_valid_url(url).group('course_id', 'lecture_id')
data = self._download_json(
f'https://api.gamedev.tv/api/courses/my/{course_id}', course_id,
headers=self._API_HEADERS)['data']
course_info = traverse_obj(data, {
'title': ('title', {str}),
'tags': ('tags', ..., 'name', {str}),
'categories': ('categories', ..., 'title', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601}),
'thumbnail': ('image', {url_or_none}),
})
entries = self._entries(data, course_id, course_info, lecture_id)
if lecture_id:
lecture = next(entries, None)
if not lecture:
raise ExtractorError('Lecture not found')
return lecture
return self.playlist_result(entries, course_id, **course_info)

View File

@ -5,56 +5,63 @@
import hmac import hmac
import json import json
import os import os
import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none,
js_to_json,
remove_end,
traverse_obj, traverse_obj,
unescapeHTML,
) )
class GoPlayIE(InfoExtractor): class GoPlayIE(InfoExtractor):
_VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/]+/[^/]+/|)(?P<display_id>[^/#]+)' _VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/?#]+/[^/?#]+/|)(?P<id>[^/#]+)'
_NETRC_MACHINE = 'goplay' _NETRC_MACHINE = 'goplay'
_TESTS = [{ _TESTS = [{
'url': 'https://www.goplay.be/video/de-container-cup/de-container-cup-s3/de-container-cup-s3-aflevering-2#autoplay', 'url': 'https://www.goplay.be/video/de-slimste-mens-ter-wereld/de-slimste-mens-ter-wereld-s22/de-slimste-mens-ter-wereld-s22-aflevering-1',
'info_dict': { 'info_dict': {
'id': '9c4214b8-e55d-4e4b-a446-f015f6c6f811', 'id': '2baa4560-87a0-421b-bffc-359914e3c387',
'ext': 'mp4', 'ext': 'mp4',
'title': 'S3 - Aflevering 2', 'title': 'S22 - Aflevering 1',
'series': 'De Container Cup', 'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}',
'season': 'Season 3', 'series': 'De Slimste Mens ter Wereld',
'season_number': 3, 'episode': 'Episode 1',
'episode': 'Episode 2', 'season_number': 22,
'episode_number': 2, 'episode_number': 1,
'season': 'Season 22',
}, },
'params': {'skip_download': True},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}, { }, {
'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay', 'url': 'https://www.goplay.be/video/1917',
'info_dict': { 'info_dict': {
'id': '74e3ed07-748c-49e4-85a0-393a93337dbf', 'id': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
'ext': 'mp4', 'ext': 'mp4',
'title': 'A Family for the Holidays', 'title': '1917',
'description': r're:Op het hoogtepunt van de Eerste Wereldoorlog krijgen twee jonge .{94}',
}, },
'params': {'skip_download': True},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}, { }, {
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay', 'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
'info_dict': { 'info_dict': {
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656', 'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee',
'ext': 'mp4', 'ext': 'mp4',
'title': 'S11 - Aflevering 1', 'title': 'S11 - Aflevering 1',
'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}',
'episode': 'Episode 1', 'episode': 'Episode 1',
'series': 'De Mol', 'series': 'De Mol',
'season_number': 11, 'season_number': 11,
'episode_number': 1, 'episode_number': 1,
'season': 'Season 11', 'season': 'Season 11',
}, },
'params': { 'params': {'skip_download': True},
'skip_download': True,
},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}] }]
@ -69,27 +76,42 @@ def _real_initialize(self):
if not self._id_token: if not self._id_token:
raise self.raise_login_required(method='password') raise self.raise_login_required(method='password')
def _real_extract(self, url): def _find_json(self, s):
url, display_id = self._match_valid_url(url).group(0, 'display_id') return self._search_json(
webpage = self._download_webpage(url, display_id) r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
video_data_json = self._html_search_regex(r'<div\s+data-hero="([^"]+)"', webpage, 'video_data')
video_data = self._parse_json(unescapeHTML(video_data_json), display_id).get('data')
movie = video_data.get('movie') def _real_extract(self, url):
if movie: display_id = self._match_id(url)
video_id = movie['videoUuid'] webpage = self._download_webpage(url, display_id)
info_dict = {
'title': movie.get('title'), nextjs_data = traverse_obj(
} re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
else: (..., {js_to_json}, {json.loads}, ..., {self._find_json}, ...))
episode = traverse_obj(video_data, ('playlists', ..., 'episodes', lambda _, v: v['pageInfo']['url'] == url), get_all=False) meta = traverse_obj(nextjs_data, (
video_id = episode['videoUuid'] ..., lambda _, v: v['meta']['path'] == urllib.parse.urlparse(url).path, 'meta', any))
info_dict = {
'title': episode.get('episodeTitle'), video_id = meta['uuid']
'series': traverse_obj(episode, ('program', 'title')), info_dict = traverse_obj(meta, {
'season_number': episode.get('seasonNumber'), 'title': ('title', {str}),
'episode_number': episode.get('episodeNumber'), 'description': ('description', {str.strip}),
} })
if traverse_obj(meta, ('program', 'subtype')) != 'movie':
for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)):
episode_data = traverse_obj(
season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
if not episode_data:
continue
episode_title = traverse_obj(
episode_data, 'contextualTitle', 'episodeTitle', expected_type=str)
info_dict.update({
'title': episode_title or info_dict.get('title'),
'series': remove_end(info_dict.get('title'), f' - {episode_title}'),
'season_number': traverse_obj(season_data, ('season', {int_or_none})),
'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})),
})
break
api = self._download_json( api = self._download_json(
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}', f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',

View File

@ -1,4 +1,3 @@
import functools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -63,7 +62,7 @@ def _real_extract(self, url):
'url': ('podcast_raw_url', {url_or_none}), 'url': ('podcast_raw_url', {url_or_none}),
'thumbnail': ('image', {url_or_none}), 'thumbnail': ('image', {url_or_none}),
'timestamp': ('timestamp', {int_or_none}), 'timestamp': ('timestamp', {int_or_none}),
'duration': ('milliseconds', {functools.partial(float_or_none, scale=1000)}), 'duration': ('milliseconds', {float_or_none(scale=1000)}),
'availability': ('free', {lambda v: 'public' if v else 'subscriber_only'}), 'availability': ('free', {lambda v: 'public' if v else 'subscriber_only'}),
}), }),
} }

View File

@ -254,7 +254,7 @@ def _real_extract(self, url):
class InstagramIE(InstagramBaseIE): class InstagramIE(InstagramBaseIE):
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reels?(?!/audio/))/(?P<id>[^/?#&]+))' _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/(?!share/)[^/?#]+)?/(?:p|tv|reels?(?!/audio/))/(?P<id>[^/?#&]+))'
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1'] _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1']
_TESTS = [{ _TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',

View File

@ -326,11 +326,11 @@ def _real_extract(self, url):
# fallback metadata # fallback metadata
'title': ('name', {str}), 'title': ('name', {str}),
'description': ('fullSynopsis', {str}), 'description': ('fullSynopsis', {str}),
'series': ('show', 'name', {str}, {lambda x: x or None}), 'series': ('show', 'name', {str}, filter),
'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}), 'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}),
'season_number': ('episode', 'season', {int_or_none}, {lambda x: x or None}), 'season_number': ('episode', 'season', {int_or_none}, filter),
'episode': ('fullTitle', {str}), 'episode': ('fullTitle', {str}),
'episode_number': ('episode', 'episodeNo', {int_or_none}, {lambda x: x or None}), 'episode_number': ('episode', 'episodeNo', {int_or_none}, filter),
'age_limit': ('ageNemonic', {parse_age_limit}), 'age_limit': ('ageNemonic', {parse_age_limit}),
'duration': ('totalDuration', {float_or_none}), 'duration': ('totalDuration', {float_or_none}),
'thumbnail': ('images', {url_or_none}), 'thumbnail': ('images', {url_or_none}),
@ -338,10 +338,10 @@ def _real_extract(self, url):
**traverse_obj(metadata, ('result', 0, { **traverse_obj(metadata, ('result', 0, {
'title': ('fullTitle', {str}), 'title': ('fullTitle', {str}),
'description': ('fullSynopsis', {str}), 'description': ('fullSynopsis', {str}),
'series': ('showName', {str}, {lambda x: x or None}), 'series': ('showName', {str}, filter),
'season': ('seasonName', {str}, {lambda x: x or None}), 'season': ('seasonName', {str}, filter),
'season_number': ('season', {int_or_none}), 'season_number': ('season', {int_or_none}),
'season_id': ('seasonId', {str}, {lambda x: x or None}), 'season_id': ('seasonId', {str}, filter),
'episode': ('fullTitle', {str}), 'episode': ('fullTitle', {str}),
'episode_number': ('episode', {int_or_none}), 'episode_number': ('episode', {int_or_none}),
'timestamp': ('uploadTime', {int_or_none}), 'timestamp': ('uploadTime', {int_or_none}),

160
yt_dlp/extractor/kenh14.py Normal file
View File

@ -0,0 +1,160 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_attribute,
get_elements_html_by_class,
int_or_none,
parse_duration,
parse_iso8601,
remove_start,
strip_or_none,
unescapeHTML,
update_url,
url_or_none,
)
from ..utils.traversal import traverse_obj
class Kenh14VideoIE(InfoExtractor):
_VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P<id>[0-9]+)\.chn'
_TESTS = [{
'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn',
'md5': '1ed67f9c3a1e74acf15db69590cf6210',
'info_dict': {
'id': '316173',
'ext': 'mp4',
'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
'tags': [],
'uploader': 'Unbox Therapy',
'upload_date': '20220517',
'view_count': int,
'duration': 722.86,
'timestamp': 1652764468,
},
}, {
'url': 'https://video.kenh14.vn/video-316174.chn',
'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd',
'info_dict': {
'id': '316174',
'ext': 'mp4',
'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu',
'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc',
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
'tags': [],
'upload_date': '20220517',
'view_count': int,
'duration': 70.04,
'timestamp': 1652766021,
},
}, {
'url': 'https://video.kenh14.vn/0-344740.chn',
'md5': 'b843495d5e728142c8870c09b46df2a9',
'info_dict': {
'id': '344740',
'ext': 'mov',
'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi',
'description': 'md5:2a2dbb4a7397169fb21ee68f09160497',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$',
'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'],
'uploader': 'Quang Vũ',
'upload_date': '20241024',
'view_count': int,
'duration': 198.88,
'timestamp': 1729741590,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '')
direct_url = attrs['data-vid']
metadata = self._download_json(
'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format(
remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False)
formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}]
subtitles = {}
video_data = self._download_json(
f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False)
if hls_url := traverse_obj(video_data, ('hls', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
hls_url, video_id, m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})):
fmts, subs = self._extract_mpd_formats_and_subtitles(
dash_url, video_id, mpd_id='dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
**traverse_obj(metadata, {
'duration': ('duration', {parse_duration}),
'uploader': ('author', {strip_or_none}),
'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}),
'view_count': ('views', {int_or_none}),
}),
'id': video_id,
'title': (
traverse_obj(metadata, ('title', {strip_or_none}))
or clean_html(self._og_search_title(webpage))
or clean_html(get_element_by_class('vdbw-title', webpage))),
'formats': formats,
'subtitles': subtitles,
'description': (
clean_html(self._og_search_description(webpage))
or clean_html(get_element_by_class('vdbw-sapo', webpage))),
'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')),
'tags': traverse_obj(self._html_search_meta('keywords', webpage), (
{lambda x: x.split(';')}, ..., filter)),
}
class Kenh14PlaylistIE(InfoExtractor):
_VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P<id>[0-9]+)\.chn'
_TESTS = [{
'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn',
'info_dict': {
'id': '71',
'title': 'Trần Tình (Naked love) mùa 2',
'description': 'md5:e9522339304956dea931722dd72eddb2',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
},
'playlist_count': 9,
}, {
'url': 'https://video.kenh14.vn/playlist/0-72.chn',
'info_dict': {
'id': '72',
'title': 'Lau Lại Đầu Từ',
'description': 'Cùng xem xưa và nay có gì khác biệt nhé!',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
},
'playlist_count': 6,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
category_detail = get_element_by_class('category-detail', webpage) or ''
embed_info = traverse_obj(
self._yield_json_ld(webpage, playlist_id),
(lambda _, v: v['name'] and v['alternateName'], any)) or {}
return self.playlist_from_matches(
get_elements_html_by_class('video-item', webpage), playlist_id,
(clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))),
getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']),
ie=Kenh14VideoIE, playlist_description=(
clean_html(get_element_by_class('description', category_detail))
or unescapeHTML(embed_info.get('alternateName'))),
thumbnail=traverse_obj(
self._og_search_thumbnail(webpage),
({url_or_none}, {update_url(query=None)})))

View File

@ -1,4 +1,3 @@
import functools
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest from ..networking import HEADRequest
@ -137,7 +136,7 @@ def _real_extract(self, url):
'uploader': ('livestream', 'channel', 'user', 'username', {str}), 'uploader': ('livestream', 'channel', 'user', 'username', {str}),
'uploader_id': ('livestream', 'channel', 'user_id', {int}, {str_or_none}), 'uploader_id': ('livestream', 'channel', 'user_id', {int}, {str_or_none}),
'timestamp': ('created_at', {parse_iso8601}), 'timestamp': ('created_at', {parse_iso8601}),
'duration': ('livestream', 'duration', {functools.partial(float_or_none, scale=1000)}), 'duration': ('livestream', 'duration', {float_or_none(scale=1000)}),
'thumbnail': ('livestream', 'thumbnail', {url_or_none}), 'thumbnail': ('livestream', 'thumbnail', {url_or_none}),
'categories': ('livestream', 'categories', ..., 'name', {str}), 'categories': ('livestream', 'categories', ..., 'name', {str}),
'view_count': ('views', {int_or_none}), 'view_count': ('views', {int_or_none}),

View File

@ -119,7 +119,7 @@ def _extract_formats(self, media_info, video_id):
'width': ('frameWidth', {int_or_none}), 'width': ('frameWidth', {int_or_none}),
'height': ('frameHeight', {int_or_none}), 'height': ('frameHeight', {int_or_none}),
# NB: filesize is 0 if unknown, bitrate is -1 if unknown # NB: filesize is 0 if unknown, bitrate is -1 if unknown
'filesize': ('fileSize', {int_or_none}, {lambda x: x or None}), 'filesize': ('fileSize', {int_or_none}, filter),
'abr': ('bitrateAudio', {int_or_none}, {lambda x: None if x == -1 else x}), 'abr': ('bitrateAudio', {int_or_none}, {lambda x: None if x == -1 else x}),
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}), 'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
}), }),

View File

@ -32,7 +32,7 @@ def _parse_episode(self, episode):
VimeoIE, url_transparent=True, VimeoIE, url_transparent=True,
**traverse_obj(episode, { **traverse_obj(episode, {
'id': ('id', {int}, {str_or_none}), 'id': ('id', {int}, {str_or_none}),
'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}), 'webpage_url': ('path', {urljoin('https://laracasts.com')}),
'title': ('title', {clean_html}), 'title': ('title', {clean_html}),
'season_number': ('chapter', {int_or_none}), 'season_number': ('chapter', {int_or_none}),
'episode_number': ('position', {int_or_none}), 'episode_number': ('position', {int_or_none}),
@ -104,7 +104,7 @@ def _real_extract(self, url):
'description': ('body', {clean_html}), 'description': ('body', {clean_html}),
'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any), 'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any),
'duration': ('runTime', {parse_duration}), 'duration': ('runTime', {parse_duration}),
'categories': ('taxonomy', 'name', {str}, {lambda x: x and [x]}), 'categories': ('taxonomy', 'name', {str}, all, filter),
'tags': ('topics', ..., 'name', {str}), 'tags': ('topics', ..., 'name', {str}),
'modified_date': ('lastUpdated', {unified_strdate}), 'modified_date': ('lastUpdated', {unified_strdate}),
}), }),

View File

@ -66,7 +66,7 @@ def _parse_stream(self, stream, url):
'license': ('value', 'license', {str}), 'license': ('value', 'license', {str}),
'timestamp': ('timestamp', {int_or_none}), 'timestamp': ('timestamp', {int_or_none}),
'release_timestamp': ('value', 'release_time', {int_or_none}), 'release_timestamp': ('value', 'release_time', {int_or_none}),
'tags': ('value', 'tags', ..., {lambda x: x or None}), 'tags': ('value', 'tags', ..., filter),
'duration': ('value', stream_type, 'duration', {int_or_none}), 'duration': ('value', stream_type, 'duration', {int_or_none}),
'channel': ('signing_channel', 'value', 'title', {str}), 'channel': ('signing_channel', 'value', 'title', {str}),
'channel_id': ('signing_channel', 'claim_id', {str}), 'channel_id': ('signing_channel', 'claim_id', {str}),

View File

@ -6,13 +6,11 @@
ExtractorError, ExtractorError,
clean_html, clean_html,
extract_attributes, extract_attributes,
get_element_by_class,
get_element_html_by_id,
join_nonempty, join_nonempty,
parse_duration, parse_duration,
unified_timestamp, unified_timestamp,
) )
from ..utils.traversal import traverse_obj from ..utils.traversal import find_element, traverse_obj
class LearningOnScreenIE(InfoExtractor): class LearningOnScreenIE(InfoExtractor):
@ -32,28 +30,24 @@ class LearningOnScreenIE(InfoExtractor):
def _real_initialize(self): def _real_initialize(self):
if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'): if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'):
self.raise_login_required( self.raise_login_required(method='session_cookies')
'Use --cookies for authentication. See '
' https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp '
'for how to manually pass cookies', method=None)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
details = traverse_obj(webpage, ( details = traverse_obj(webpage, (
{functools.partial(get_element_html_by_id, 'programme-details')}, { {find_element(id='programme-details', html=True)}, {
'title': ({functools.partial(re.search, r'<h2>([^<]+)</h2>')}, 1, {clean_html}), 'title': ({find_element(tag='h2')}, {clean_html}),
'timestamp': ( 'timestamp': (
{functools.partial(get_element_by_class, 'broadcast-date')}, {find_element(cls='broadcast-date')},
{functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}), {functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}),
'duration': ( 'duration': (
{functools.partial(get_element_by_class, 'prog-running-time')}, {find_element(cls='prog-running-time')}, {clean_html}, {parse_duration}),
{clean_html}, {parse_duration}),
})) }))
title = details.pop('title', None) or traverse_obj(webpage, ( title = details.pop('title', None) or traverse_obj(webpage, (
{functools.partial(get_element_html_by_id, 'add-to-existing-playlist')}, {find_element(id='add-to-existing-playlist', html=True)},
{extract_attributes}, 'data-record-title', {clean_html})) {extract_attributes}, 'data-record-title', {clean_html}))
entries = self._parse_html5_media_entries( entries = self._parse_html5_media_entries(

View File

@ -6,12 +6,10 @@
extract_attributes, extract_attributes,
get_element_by_class, get_element_by_class,
get_element_html_by_id, get_element_html_by_id,
get_element_text_and_html_by_tag,
parse_duration, parse_duration,
strip_or_none, strip_or_none,
traverse_obj,
try_call,
) )
from ..utils.traversal import find_element, traverse_obj
class ListenNotesIE(InfoExtractor): class ListenNotesIE(InfoExtractor):
@ -22,14 +20,14 @@ class ListenNotesIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'KrDgvNb_u1n', 'id': 'KrDgvNb_u1n',
'ext': 'mp3', 'ext': 'mp3',
'title': 'md5:32236591a921adf17bbdbf0441b6c0e9', 'title': r're:Tim OReilly on noticing things other people .{113}',
'description': 'md5:c581ed197eeddcee55a67cdb547c8cbd', 'description': r're:(?s)We shape reality by what we notice and .{27459}',
'duration': 2148.0, 'duration': 2215.0,
'channel': 'Thriving on Overload', 'channel': 'Amplifying Cognition',
'channel_id': 'ed84wITivxF', 'channel_id': 'ed84wITivxF',
'episode_id': 'e1312583fa7b4e24acfbb5131050be00', 'episode_id': 'e1312583fa7b4e24acfbb5131050be00',
'thumbnail': 'https://production.listennotes.com/podcasts/thriving-on-overload-ross-dawson-1wb_KospA3P-ed84wITivxF.300x300.jpg', 'thumbnail': 'https://cdn-images-3.listennotes.com/podcasts/amplifying-cognition-ross-dawson-Iemft4Gdr0k-ed84wITivxF.300x300.jpg',
'channel_url': 'https://www.listennotes.com/podcasts/thriving-on-overload-ross-dawson-ed84wITivxF/', 'channel_url': 'https://www.listennotes.com/podcasts/amplifying-cognition-ross-dawson-ed84wITivxF/',
'cast': ['Tim OReilly', 'Cookie Monster', 'Lao Tzu', 'Wallace Steven', 'Eric Raymond', 'Christine Peterson', 'John Maynard Keyne', 'Ross Dawson'], 'cast': ['Tim OReilly', 'Cookie Monster', 'Lao Tzu', 'Wallace Steven', 'Eric Raymond', 'Christine Peterson', 'John Maynard Keyne', 'Ross Dawson'],
}, },
}, { }, {
@ -39,13 +37,13 @@ class ListenNotesIE(InfoExtractor):
'id': 'lwEA3154JzG', 'id': 'lwEA3154JzG',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Episode 177: WireGuard with Jason Donenfeld', 'title': 'Episode 177: WireGuard with Jason Donenfeld',
'description': 'md5:24744f36456a3e95f83c1193a3458594', 'description': r're:(?s)Jason Donenfeld lead developer joins us this hour to discuss WireGuard, .{3169}',
'duration': 3861.0, 'duration': 3861.0,
'channel': 'Ask Noah Show', 'channel': 'Ask Noah Show',
'channel_id': '4DQTzdS5-j7', 'channel_id': '4DQTzdS5-j7',
'episode_id': '8c8954b95e0b4859ad1eecec8bf6d3a4', 'episode_id': '8c8954b95e0b4859ad1eecec8bf6d3a4',
'channel_url': 'https://www.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-4DQTzdS5-j7/', 'channel_url': 'https://www.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-4DQTzdS5-j7/',
'thumbnail': 'https://production.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-cfbRUw9Gs3F-4DQTzdS5-j7.300x300.jpg', 'thumbnail': 'https://cdn-images-3.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-gD7vG150cxf-4DQTzdS5-j7.300x300.jpg',
'cast': ['noah showlink', 'noah show', 'noah dashboard', 'jason donenfeld'], 'cast': ['noah showlink', 'noah show', 'noah dashboard', 'jason donenfeld'],
}, },
}] }]
@ -70,7 +68,7 @@ def _real_extract(self, url):
'id': audio_id, 'id': audio_id,
'url': data['audio'], 'url': data['audio'],
'title': (data.get('data-title') 'title': (data.get('data-title')
or try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0]) or traverse_obj(webpage, ({find_element(tag='h1')}, {clean_html}))
or self._html_search_meta(('og:title', 'title', 'twitter:title'), webpage, 'title')), or self._html_search_meta(('og:title', 'title', 'twitter:title'), webpage, 'title')),
'description': (self._clean_description(get_element_by_class('ln-text-p', webpage)) 'description': (self._clean_description(get_element_by_class('ln-text-p', webpage))
or strip_or_none(description)), or strip_or_none(description)),

View File

@ -1,30 +1,32 @@
import json import json
import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
join_nonempty,
smuggle_url, smuggle_url,
traverse_obj, traverse_obj,
try_call, try_call,
unsmuggle_url, unsmuggle_url,
urljoin,
) )
class LiTVIE(InfoExtractor): class LiTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)' _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:[^/?#]+/watch/|vod/[^/?#]+/content\.do\?content_id=)(?P<id>[\w-]+)'
_URL_TEMPLATE = 'https://www.litv.tv/%s/watch/%s'
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s' _GEO_COUNTRIES = ['TW']
_TESTS = [{ _TESTS = [{
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', 'url': 'https://www.litv.tv/drama/watch/VOD00041610',
'info_dict': { 'info_dict': {
'id': 'VOD00041606', 'id': 'VOD00041606',
'title': '花千骨', 'title': '花千骨',
}, },
'playlist_count': 51, # 50 episodes + 1 trailer 'playlist_count': 51, # 50 episodes + 1 trailer
}, { }, {
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', 'url': 'https://www.litv.tv/drama/watch/VOD00041610',
'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a', 'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a',
'info_dict': { 'info_dict': {
'id': 'VOD00041610', 'id': 'VOD00041610',
@ -32,16 +34,15 @@ class LiTVIE(InfoExtractor):
'title': '花千骨第1集', 'title': '花千骨第1集',
'thumbnail': r're:https?://.*\.jpg$', 'thumbnail': r're:https?://.*\.jpg$',
'description': '《花千骨》陸劇線上看。十六年前,平靜的村莊內,一名女嬰隨異相出生,途徑此地的蜀山掌門清虛道長算出此女命運非同一般,她體內散發的異香易招惹妖魔。一念慈悲下,他在村莊周邊設下結界阻擋妖魔入侵,讓其年滿十六後去蜀山,並賜名花千骨。', 'description': '《花千骨》陸劇線上看。十六年前,平靜的村莊內,一名女嬰隨異相出生,途徑此地的蜀山掌門清虛道長算出此女命運非同一般,她體內散發的異香易招惹妖魔。一念慈悲下,他在村莊周邊設下結界阻擋妖魔入侵,讓其年滿十六後去蜀山,並賜名花千骨。',
'categories': ['奇幻', '愛情', '中國', '仙俠'], 'categories': ['奇幻', '愛情', '仙俠', '古裝'],
'episode': 'Episode 1', 'episode': 'Episode 1',
'episode_number': 1, 'episode_number': 1,
}, },
'params': { 'params': {
'noplaylist': True, 'noplaylist': True,
}, },
'skip': 'Georestricted to Taiwan',
}, { }, {
'url': 'https://www.litv.tv/promo/miyuezhuan/?content_id=VOD00044841&', 'url': 'https://www.litv.tv/drama/watch/VOD00044841',
'md5': '88322ea132f848d6e3e18b32a832b918', 'md5': '88322ea132f848d6e3e18b32a832b918',
'info_dict': { 'info_dict': {
'id': 'VOD00044841', 'id': 'VOD00044841',
@ -55,94 +56,62 @@ class LiTVIE(InfoExtractor):
def _extract_playlist(self, playlist_data, content_type): def _extract_playlist(self, playlist_data, content_type):
all_episodes = [ all_episodes = [
self.url_result(smuggle_url( self.url_result(smuggle_url(
self._URL_TEMPLATE % (content_type, episode['contentId']), self._URL_TEMPLATE % (content_type, episode['content_id']),
{'force_noplaylist': True})) # To prevent infinite recursion {'force_noplaylist': True})) # To prevent infinite recursion
for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))] for episode in traverse_obj(playlist_data, ('seasons', ..., 'episodes', lambda _, v: v['content_id']))]
return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title')) return self.playlist_result(all_episodes, playlist_data['content_id'], playlist_data.get('title'))
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
vod_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']
if self._search_regex( program_info = traverse_obj(vod_data, ('programInformation', {dict})) or {}
r'(?i)<meta\s[^>]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"', playlist_data = traverse_obj(vod_data, ('seriesTree'))
webpage, 'meta refresh redirect', default=False, group=0): if playlist_data and self._yes_playlist(program_info.get('series_id'), video_id, smuggled_data):
raise ExtractorError('No such content found', expected=True) return self._extract_playlist(playlist_data, program_info.get('content_type'))
program_info = self._parse_json(self._search_regex( asset_id = traverse_obj(program_info, ('assets', 0, 'asset_id', {str}))
r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'), if asset_id: # This is a VOD
video_id) media_type = 'vod'
else: # This is a live stream
asset_id = program_info['content_id']
media_type = program_info['content_type']
puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
if puid:
endpoint = 'get-urls'
else:
puid = str(uuid.uuid4())
endpoint = 'get-urls-no-auth'
video_data = self._download_json(
f'https://www.litv.tv/api/{endpoint}', video_id,
data=json.dumps({'AssetId': asset_id, 'MediaType': media_type, 'puid': puid}).encode(),
headers={'Content-Type': 'application/json'})
# In browsers `getProgramInfo` request is always issued. Usually this if error := traverse_obj(video_data, ('error', {dict})):
# endpoint gives the same result as the data embedded in the webpage. error_msg = traverse_obj(error, ('message', {str}))
# If, for some reason, there are no embedded data, we do an extra request. if error_msg and 'OutsideRegionError' in error_msg:
if 'assetId' not in program_info:
program_info = self._download_json(
'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
query={'contentId': video_id},
headers={'Accept': 'application/json'})
series_id = program_info['seriesId']
if self._yes_playlist(series_id, video_id, smuggled_data):
playlist_data = self._download_json(
'https://www.litv.tv/vod/ajax/getSeriesTree', video_id,
query={'seriesId': series_id}, headers={'Accept': 'application/json'})
return self._extract_playlist(playlist_data, program_info['contentType'])
video_data = self._parse_json(self._search_regex(
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
webpage, 'video data', default='{}'), video_id)
if not video_data:
payload = {'assetId': program_info['assetId']}
puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
if puid:
payload.update({
'type': 'auth',
'puid': puid,
})
endpoint = 'getUrl'
else:
payload.update({
'watchDevices': program_info['watchDevices'],
'contentType': program_info['contentType'],
})
endpoint = 'getMainUrlNoAuth'
video_data = self._download_json(
f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
data=json.dumps(payload).encode(),
headers={'Content-Type': 'application/json'})
if not video_data.get('fullpath'):
error_msg = video_data.get('errorMessage')
if error_msg == 'vod.error.outsideregionerror':
self.raise_geo_restricted('This video is available in Taiwan only') self.raise_geo_restricted('This video is available in Taiwan only')
if error_msg: elif error_msg:
raise ExtractorError(f'{self.IE_NAME} said: {error_msg}', expected=True) raise ExtractorError(f'{self.IE_NAME} said: {error_msg}', expected=True)
raise ExtractorError(f'Unexpected result from {self.IE_NAME}') raise ExtractorError(f'Unexpected error from {self.IE_NAME}')
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
video_data['fullpath'], video_id, ext='mp4', video_data['result']['AssetURLs'][0], video_id, ext='mp4', m3u8_id='hls')
entry_protocol='m3u8_native', m3u8_id='hls')
for a_format in formats: for a_format in formats:
# LiTV HLS segments doesn't like compressions # LiTV HLS segments doesn't like compressions
a_format.setdefault('http_headers', {})['Accept-Encoding'] = 'identity' a_format.setdefault('http_headers', {})['Accept-Encoding'] = 'identity'
title = program_info['title'] + program_info.get('secondaryMark', '')
description = program_info.get('description')
thumbnail = program_info.get('imageFile')
categories = [item['name'] for item in program_info.get('category', [])]
episode = int_or_none(program_info.get('episode'))
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'title': title, 'title': join_nonempty('title', 'secondary_mark', delim='', from_dict=program_info),
'description': description, **traverse_obj(program_info, {
'thumbnail': thumbnail, 'description': ('description', {str}),
'categories': categories, 'thumbnail': ('picture', {urljoin('https://p-cdnstatic.svc.litv.tv/')}),
'episode_number': episode, 'categories': ('genres', ..., 'name', {str}),
'episode_number': ('episode', {int_or_none}),
}),
} }

View File

@ -114,7 +114,7 @@ class LSMLREmbedIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
query = parse_qs(url) query = parse_qs(url)
video_id = traverse_obj(query, ( video_id = traverse_obj(query, (
('show', 'id'), 0, {int_or_none}, {lambda x: x or None}, {str_or_none}), get_all=False) ('show', 'id'), 0, {int_or_none}, filter, {str_or_none}), get_all=False)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
player_data, media_data = self._search_regex( player_data, media_data = self._search_regex(

View File

@ -57,6 +57,6 @@ def _real_extract(self, url):
'duration': ('runtimeInSeconds', {int_or_none}), 'duration': ('runtimeInSeconds', {int_or_none}),
'location': ('countriesOfProduction', {list}, {lambda x: join_nonempty(*x, delim=', ')}), 'location': ('countriesOfProduction', {list}, {lambda x: join_nonempty(*x, delim=', ')}),
'release_year': ('yearOfProduction', {int_or_none}), 'release_year': ('yearOfProduction', {int_or_none}),
'categories': ('mainGenre', {str}, {lambda x: x and [x]}), 'categories': ('mainGenre', {str}, all, filter),
})), })),
} }

Some files were not shown because too many files have changed in this diff Show More