diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8012ebb8cd..e523154c41 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -242,7 +242,7 @@ jobs: permissions: contents: read actions: write # For cleaning up cache - runs-on: macos-13 + runs-on: macos-14 steps: - uses: actions/checkout@v4 @@ -261,6 +261,8 @@ jobs: - name: Install Requirements run: | brew install coreutils + # We need to use system Python in order to roll our own universal2 curl_cffi wheel + brew uninstall --ignore-dependencies python3 python3 -m venv ~/yt-dlp-build-venv source ~/yt-dlp-build-venv/bin/activate python3 devscripts/install_deps.py -o --include build diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index dd2c6f481e..86036989c0 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -37,7 +37,7 @@ jobs: matrix: os: [ubuntu-latest] # CPython 3.9 is in quick-test - python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] + python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.11] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest @@ -49,7 +49,7 @@ jobs: - os: windows-latest python-version: '3.13' - os: windows-latest - python-version: pypy-3.10 + python-version: pypy-3.11 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml index 6849fba9b6..594a664c9c 100644 --- a/.github/workflows/download.yml +++ b/.github/workflows/download.yml @@ -28,13 +28,13 @@ jobs: fail-fast: true matrix: os: [ubuntu-latest] - python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] + python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.11] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest python-version: '3.9' - os: windows-latest - python-version: pypy-3.10 + python-version: pypy-3.11 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/signature-tests.yml b/.github/workflows/signature-tests.yml index 203172e0b9..42c65db353 100644 --- a/.github/workflows/signature-tests.yml +++ b/.github/workflows/signature-tests.yml @@ -25,7 +25,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest] - python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.10, pypy-3.11] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.11] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fd7b0f1210..8822907b79 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -126,7 +126,7 @@ ### Are you willing to share account details if needed? While these steps won't necessarily ensure that no misuse of the account takes place, these are still some good practices to follow. - Look for people with `Member` (maintainers of the project) or `Contributor` (people who have previously contributed code) tag on their messages. -- Change the password before sharing the account to something random (use [this](https://passwordsgenerator.net/) if you don't have a random password generator). +- Change the password before sharing the account to something random. - Change the password after receiving the account back. ### Is the website primarily used for piracy? @@ -272,7 +272,7 @@ ## Adding support for a new site You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`). -1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.10. Backward compatibility is not required for even older versions of Python. +1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.11. Backward compatibility is not required for even older versions of Python. 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: ```shell diff --git a/CONTRIBUTORS b/CONTRIBUTORS index ba23b66dc5..f20b4ce172 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -784,3 +784,12 @@ eason1478 ceandreasen chauhantirth helpimnotdrowning +adamralph +averageFOSSenjoyer +bubo +flanter21 +Georift +moonshinerd +R0hanW +ShockedPlot7560 +swayll diff --git a/Changelog.md b/Changelog.md index 5a5c18cf34..7205b95aa3 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,97 @@ # Changelog # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2025.07.21 + +#### Important changes +- **Default behaviour changed from `--mtime` to `--no-mtime`** +yt-dlp no longer applies the server modified time to downloaded files by default. [Read more](https://github.com/yt-dlp/yt-dlp/issues/12780) +- Security: [[CVE-2025-54072](https://nvd.nist.gov/vuln/detail/CVE-2025-54072)] [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-45hg-7f49-5h56) + - When `--exec` is used on Windows, the filepath expanded from `{}` (or the default placeholder) is now properly escaped + +#### Core changes +- [Allow extractors to designate formats/subtitles for impersonation](https://github.com/yt-dlp/yt-dlp/commit/32809eb2da92c649e540a5b714f6235036026161) ([#13778](https://github.com/yt-dlp/yt-dlp/issues/13778)) by [bashonly](https://github.com/bashonly) (With fixes in [3e49bc8](https://github.com/yt-dlp/yt-dlp/commit/3e49bc8a1bdb4109b857f2c361c358e86fa63405), [2ac3eb9](https://github.com/yt-dlp/yt-dlp/commit/2ac3eb98373d1c31341c5e918c83872c7ff409c6)) +- [Don't let format testing alter the return code](https://github.com/yt-dlp/yt-dlp/commit/4919051e447c7f8ae9df8ba5c4208b6b5c04915a) ([#13767](https://github.com/yt-dlp/yt-dlp/issues/13767)) by [bashonly](https://github.com/bashonly) +- [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/commit/959ac99e98c3215437e573c22d64be42d361e863) by [Grub4K](https://github.com/Grub4K) +- [No longer enable `--mtime` by default](https://github.com/yt-dlp/yt-dlp/commit/f3008bc5f89d2691f2f8dfc51b406ef4e25281c3) ([#12781](https://github.com/yt-dlp/yt-dlp/issues/12781)) by [seproDev](https://github.com/seproDev) +- [Warn when skipping formats](https://github.com/yt-dlp/yt-dlp/commit/1f27a9f8baccb9105f2476154557540efe09a937) ([#13090](https://github.com/yt-dlp/yt-dlp/issues/13090)) by [bashonly](https://github.com/bashonly) +- **jsinterp** + - [Cache undefined variable names](https://github.com/yt-dlp/yt-dlp/commit/b342d27f3f82d913976509ddf5bff539ad8567ec) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly) (With fixes in [805519b](https://github.com/yt-dlp/yt-dlp/commit/805519bfaa7cb5443912dfe45ac774834ba65a16)) + - [Fix variable scoping](https://github.com/yt-dlp/yt-dlp/commit/b6328ca05030d815222b25d208cc59a964623bf9) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) +- **utils** + - `mimetype2ext`: [Always parse `flac` from `audio/flac`](https://github.com/yt-dlp/yt-dlp/commit/b8abd255e454acbe0023cdb946f9eb461ced7eeb) ([#13748](https://github.com/yt-dlp/yt-dlp/issues/13748)) by [bashonly](https://github.com/bashonly) + - `unified_timestamp`: [Return `int` values](https://github.com/yt-dlp/yt-dlp/commit/6be26626f7cfa71d28e0fac2861eb04758810c5d) ([#13796](https://github.com/yt-dlp/yt-dlp/issues/13796)) by [doe1080](https://github.com/doe1080) + - `urlhandle_detect_ext`: [Use `x-amz-meta-file-type` headers](https://github.com/yt-dlp/yt-dlp/commit/28bf46b7dafe2e241137763bf570a2f91ba8a53a) ([#13749](https://github.com/yt-dlp/yt-dlp/issues/13749)) by [bashonly](https://github.com/bashonly) + +#### Extractor changes +- [Add `_search_nextjs_v13_data` helper](https://github.com/yt-dlp/yt-dlp/commit/5245231e4a39ecd5595d4337d46d85e150e2430a) ([#13398](https://github.com/yt-dlp/yt-dlp/issues/13398)) by [bashonly](https://github.com/bashonly) (With fixes in [b5fea53](https://github.com/yt-dlp/yt-dlp/commit/b5fea53f2099bed41ba1b17ab0ac87c8dba5a5ec)) +- [Detect invalid m3u8 playlist data](https://github.com/yt-dlp/yt-dlp/commit/e99c0b838a9c5feb40c0dcd291bd7b8620b8d36d) ([#13601](https://github.com/yt-dlp/yt-dlp/issues/13601)) by [Grub4K](https://github.com/Grub4K) +- **10play**: [Support new site domain](https://github.com/yt-dlp/yt-dlp/commit/790c286ce3e0b534ca2d8f6648ced220d888f139) ([#13611](https://github.com/yt-dlp/yt-dlp/issues/13611)) by [Georift](https://github.com/Georift) +- **9gag**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/0b359b184dee0c7052be482857bf562de67e4928) ([#13678](https://github.com/yt-dlp/yt-dlp/issues/13678)) by [bashonly](https://github.com/bashonly) +- **aenetworks**: [Support new URL formats](https://github.com/yt-dlp/yt-dlp/commit/5f951ce929b56a822514f1a02cc06af030855ec7) ([#13747](https://github.com/yt-dlp/yt-dlp/issues/13747)) by [bashonly](https://github.com/bashonly) +- **archive.org**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d42a6ff0c4ca8893d722ff4e0c109aecbf4cc7cf) ([#13706](https://github.com/yt-dlp/yt-dlp/issues/13706)) by [rdamas](https://github.com/rdamas) +- **bandaichannel**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/23e9389f936ec5236a87815b8576e5ce567b2f77) ([#13152](https://github.com/yt-dlp/yt-dlp/issues/13152)) by [doe1080](https://github.com/doe1080) +- **bandcamp**: [Extract tags](https://github.com/yt-dlp/yt-dlp/commit/f9dff95cb1c138913011417b3bba020c0a691bba) ([#13480](https://github.com/yt-dlp/yt-dlp/issues/13480)) by [WouterGordts](https://github.com/WouterGordts) +- **bellmedia**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/6fb3947c0dc6d0e3eab5077c5bada8402f47a277) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080) +- **bilibili**: [Pass newer user-agent with API requests](https://github.com/yt-dlp/yt-dlp/commit/d3edc5d52a7159eda2331dbc7e14bf40a6585c81) ([#13736](https://github.com/yt-dlp/yt-dlp/issues/13736)) by [c-basalt](https://github.com/c-basalt) +- **bilibilibangumi** + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b15aa8d77257b86fa44c9a42a615dfe47ac5b3b7) ([#13800](https://github.com/yt-dlp/yt-dlp/issues/13800)) by [bashonly](https://github.com/bashonly) + - [Fix geo-block detection](https://github.com/yt-dlp/yt-dlp/commit/884f35d54a64f1e6e7be49459842f573fc3a2701) ([#13667](https://github.com/yt-dlp/yt-dlp/issues/13667)) by [bashonly](https://github.com/bashonly) +- **blackboardcollaborate**: [Support subtitles and authwalled videos](https://github.com/yt-dlp/yt-dlp/commit/dcc4cba39e2a79d3efce16afa28dbe245468489f) ([#12473](https://github.com/yt-dlp/yt-dlp/issues/12473)) by [flanter21](https://github.com/flanter21) +- **btvplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3ae61e0f313dd03a09060abc7a212775c3717818) ([#13541](https://github.com/yt-dlp/yt-dlp/issues/13541)) by [bubo](https://github.com/bubo) +- **ctv**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/9f54ea38984788811773ca2ceaca73864acf0e8a) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080) +- **dangalplay**: [Support other login regions](https://github.com/yt-dlp/yt-dlp/commit/09982bc33e2f1f9a1ff66e6738df44f15b36f6a6) ([#13768](https://github.com/yt-dlp/yt-dlp/issues/13768)) by [bashonly](https://github.com/bashonly) +- **francetv**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/ade876efb31d55d3394185ffc56942fdc8d325cc) ([#13726](https://github.com/yt-dlp/yt-dlp/issues/13726)) by [bashonly](https://github.com/bashonly) +- **hotstar** + - [Fix support for free accounts](https://github.com/yt-dlp/yt-dlp/commit/07d1d85f6387e4bdb107096f0131c7054f078bb9) ([#13700](https://github.com/yt-dlp/yt-dlp/issues/13700)) by [chauhantirth](https://github.com/chauhantirth) + - [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/7e0af2b1f0c3edb688603b022f3a9ca0bfdf75e9) ([#13727](https://github.com/yt-dlp/yt-dlp/issues/13727)) by [bashonly](https://github.com/bashonly) (With fixes in [ef103b2](https://github.com/yt-dlp/yt-dlp/commit/ef103b2d115bd0e880f9cfd2f7dd705f48e4b40d)) +- **joqrag**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/6d39c420f7774562a106d90253e2ed5b75036321) ([#13152](https://github.com/yt-dlp/yt-dlp/issues/13152)) by [doe1080](https://github.com/doe1080) +- **limelight**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/5d693446e882931618c40c99bb593f0b87b30eb9) ([#13267](https://github.com/yt-dlp/yt-dlp/issues/13267)) by [doe1080](https://github.com/doe1080) +- **lrtradio**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b4b4486effdcb96bb6b8148171a49ff579b69a4a) ([#13717](https://github.com/yt-dlp/yt-dlp/issues/13717)) by [Pawka](https://github.com/Pawka) +- **mir24.tv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/7b4c96e0898db048259ef5fdf12ed14e3605dce3) ([#13651](https://github.com/yt-dlp/yt-dlp/issues/13651)) by [swayll](https://github.com/swayll) +- **mixlr**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/0f33950c778331bf4803c76e8b0ba1862df93431) ([#13561](https://github.com/yt-dlp/yt-dlp/issues/13561)) by [seproDev](https://github.com/seproDev), [ShockedPlot7560](https://github.com/ShockedPlot7560) +- **mlbtv**: [Make formats downloadable with ffmpeg](https://github.com/yt-dlp/yt-dlp/commit/87e3dc8c7f78929d2ef4f4a44e6a567e04cd8226) ([#13761](https://github.com/yt-dlp/yt-dlp/issues/13761)) by [bashonly](https://github.com/bashonly) +- **newspicks**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2aaf1aa71d174700859c9ec1a81109b78e34961c) ([#13612](https://github.com/yt-dlp/yt-dlp/issues/13612)) by [doe1080](https://github.com/doe1080) +- **nhkradiru**: [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/7c49a937887756efcfa162abdcf17e48c244cb0c) ([#12708](https://github.com/yt-dlp/yt-dlp/issues/12708)) by [garret1317](https://github.com/garret1317) +- **noovo**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/d57a0b5aa78d59324b037d37492fe86aa4fbf58a) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080) +- **patreon**: campaign: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d88b304d44c599d81acfa4231502270c8b9fe2f8) ([#13712](https://github.com/yt-dlp/yt-dlp/issues/13712)) by [bashonly](https://github.com/bashonly) +- **playerfm**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1a8474c3ca6dbe51bb153b2b8eef7b9a61fa7dc3) ([#13016](https://github.com/yt-dlp/yt-dlp/issues/13016)) by [R0hanW](https://github.com/R0hanW) +- **rai**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c8329fc572903eeed7edad1642773b2268b71a62) ([#13572](https://github.com/yt-dlp/yt-dlp/issues/13572)) by [moonshinerd](https://github.com/moonshinerd), [seproDev](https://github.com/seproDev) +- **raisudtirol**: [Support alternative domain](https://github.com/yt-dlp/yt-dlp/commit/85c3fa1925a9057ef4ae8af682686d5b3eb8e568) ([#13718](https://github.com/yt-dlp/yt-dlp/issues/13718)) by [barsnick](https://github.com/barsnick) +- **skeb**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/060c6a4501a0b8a92f1b9c12788f556d902c83c6) ([#13593](https://github.com/yt-dlp/yt-dlp/issues/13593)) by [doe1080](https://github.com/doe1080) +- **soundcloud**: [Always extract original format extension](https://github.com/yt-dlp/yt-dlp/commit/c1ac543c8166ff031d62e340b3244ca8556e3fb9) ([#13746](https://github.com/yt-dlp/yt-dlp/issues/13746)) by [bashonly](https://github.com/bashonly) +- **sproutvideo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0b41746964e1d0470ac286ce09408940a3a51147) ([#13610](https://github.com/yt-dlp/yt-dlp/issues/13610)) by [bashonly](https://github.com/bashonly) +- **thehighwire**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3a84be9d1660ef798ea28f929a20391bef6afda4) ([#13505](https://github.com/yt-dlp/yt-dlp/issues/13505)) by [swayll](https://github.com/swayll) +- **twitch**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/422cc8cb2ff2bd3b4c2bc64e23507b7e6f522c35) ([#13618](https://github.com/yt-dlp/yt-dlp/issues/13618)) by [bashonly](https://github.com/bashonly) +- **unitednationswebtv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/630f3389c33f0f7f6ec97e8917d20aeb4e4078da) ([#13538](https://github.com/yt-dlp/yt-dlp/issues/13538)) by [averageFOSSenjoyer](https://github.com/averageFOSSenjoyer) +- **vimeo** + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a5d697f62d8be78ffd472acb2f52c8bc32833003) ([#13692](https://github.com/yt-dlp/yt-dlp/issues/13692)) by [bashonly](https://github.com/bashonly) + - [Handle age-restricted videos](https://github.com/yt-dlp/yt-dlp/commit/a6db1d297ab40cc346de24aacbeab93112b2f4e1) ([#13719](https://github.com/yt-dlp/yt-dlp/issues/13719)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Do not require PO Token for premium accounts](https://github.com/yt-dlp/yt-dlp/commit/5b57b72c1a7c6bd249ffcebdf5630761ec664c10) ([#13640](https://github.com/yt-dlp/yt-dlp/issues/13640)) by [coletdjnz](https://github.com/coletdjnz) + - [Ensure context params are consistent for web clients](https://github.com/yt-dlp/yt-dlp/commit/6e5bee418bc108565108153fd745c8e7a59f16dd) ([#13701](https://github.com/yt-dlp/yt-dlp/issues/13701)) by [coletdjnz](https://github.com/coletdjnz) + - [Extract global nsig helper functions](https://github.com/yt-dlp/yt-dlp/commit/fca94ac5d63ed6578b5cd9c8129d97a8a713c39a) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) + - [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/0e68332bcb9fba87c42805b7a051eeb2bed36206) ([#13659](https://github.com/yt-dlp/yt-dlp/issues/13659)) by [bashonly](https://github.com/bashonly) + - [Log bad playability statuses of player responses](https://github.com/yt-dlp/yt-dlp/commit/aa9f1f4d577e99897ac16cd19d4e217d688ea75d) ([#13647](https://github.com/yt-dlp/yt-dlp/issues/13647)) by [coletdjnz](https://github.com/coletdjnz) + - [Use impersonation for downloading subtitles](https://github.com/yt-dlp/yt-dlp/commit/8820101aa3152e5f4811541c645f8b5de231ba8c) ([#13786](https://github.com/yt-dlp/yt-dlp/issues/13786)) by [bashonly](https://github.com/bashonly) + - tab: [Fix subscriptions feed extraction](https://github.com/yt-dlp/yt-dlp/commit/c23d837b6524d1e7a4595948871ba1708cba4dfa) ([#13665](https://github.com/yt-dlp/yt-dlp/issues/13665)) by [bashonly](https://github.com/bashonly) + +#### Downloader changes +- **hls**: [Do not fall back to ffmpeg when native is required](https://github.com/yt-dlp/yt-dlp/commit/a7113722ec33f30fc898caee9242af2b82188a53) ([#13655](https://github.com/yt-dlp/yt-dlp/issues/13655)) by [bashonly](https://github.com/bashonly) + +#### Networking changes +- **Request Handler** + - requests + - [Refactor default headers](https://github.com/yt-dlp/yt-dlp/commit/a4561c7a66c39d88efe7ae51e7fa1986faf093fb) ([#13785](https://github.com/yt-dlp/yt-dlp/issues/13785)) by [bashonly](https://github.com/bashonly) + - [Work around partial read dropping data](https://github.com/yt-dlp/yt-dlp/commit/c2ff2dbaec7929015373fe002e9bd4849931a4ce) ([#13599](https://github.com/yt-dlp/yt-dlp/issues/13599)) by [Grub4K](https://github.com/Grub4K) (With fixes in [c316416](https://github.com/yt-dlp/yt-dlp/commit/c316416b972d1b05e58fbcc21e80428b900ce102)) + +#### Misc. changes +- **cleanup** + - [Bump ruff to 0.12.x](https://github.com/yt-dlp/yt-dlp/commit/ca5cce5b07d51efe7310b449cdefeca8d873e9df) ([#13596](https://github.com/yt-dlp/yt-dlp/issues/13596)) by [seproDev](https://github.com/seproDev) + - Miscellaneous: [9951fdd](https://github.com/yt-dlp/yt-dlp/commit/9951fdd0d08b655cb1af8cd7f32a3fb7e2b1324e) by [adamralph](https://github.com/adamralph), [bashonly](https://github.com/bashonly), [doe1080](https://github.com/doe1080), [hseg](https://github.com/hseg), [InvalidUsernameException](https://github.com/InvalidUsernameException), [seproDev](https://github.com/seproDev) +- **devscripts**: [Fix filename/directory Bash completions](https://github.com/yt-dlp/yt-dlp/commit/99093e96fd6a26dea9d6e4bd1e4b16283b6ad1ee) ([#13620](https://github.com/yt-dlp/yt-dlp/issues/13620)) by [barsnick](https://github.com/barsnick) +- **test**: download: [Support `playlist_maxcount`](https://github.com/yt-dlp/yt-dlp/commit/fd36b8f31bafbd8096bdb92a446a0c9c6081209c) ([#13433](https://github.com/yt-dlp/yt-dlp/issues/13433)) by [InvalidUsernameException](https://github.com/InvalidUsernameException) + ### 2025.06.30 #### Core changes diff --git a/README.md b/README.md index 26a27ce7e4..1f8c7936e4 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,7 @@ # To install nightly with pip: ``` ## DEPENDENCIES -Python versions 3.9+ (CPython) and 3.10+ (PyPy) are supported. Other versions and implementations may or may not work correctly. +Python versions 3.9+ (CPython) and 3.11+ (PyPy) are supported. Other versions and implementations may or may not work correctly. yt-dlp [OPTIONS] [--] URL [URL...] -`Ctrl+F` is your friend :D +Tip: Use `CTRL`+`F` (or `Command`+`F`) to search by keywords @@ -640,9 +640,9 @@ ## Filesystem Options: --no-part Do not use .part files - write directly into output file --mtime Use the Last-modified header to set the file - modification time (default) + modification time --no-mtime Do not use the Last-modified header to set - the file modification time + the file modification time (default) --write-description Write video description to a .description file --no-write-description Do not write video description (default) --write-info-json Write video metadata to a .info.json file @@ -1903,6 +1903,10 @@ #### sonylivseries #### tver * `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated) +#### vimeo +* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `web` client is used by default. The `web` client only works with account cookies or login credentials. The `android` and `ios` clients only work with previously cached OAuth tokens +* `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the web client's API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability + **Note**: These options may be changed/removed in the future without concern for backward compatibility diff --git a/bundle/pyinstaller.py b/bundle/pyinstaller.py index c2f6511210..0597f602d0 100755 --- a/bundle/pyinstaller.py +++ b/bundle/pyinstaller.py @@ -62,16 +62,22 @@ def parse_options(): def exe(onedir): """@returns (name, path)""" + platform_name, machine, extension = { + 'win32': (None, MACHINE, '.exe'), + 'darwin': ('macos', None, None), + }.get(OS_NAME, (OS_NAME, MACHINE, None)) + name = '_'.join(filter(None, ( 'yt-dlp', - {'win32': '', 'darwin': 'macos'}.get(OS_NAME, OS_NAME), - MACHINE, + platform_name, + machine, ))) + return name, ''.join(filter(None, ( 'dist/', onedir and f'{name}/', name, - OS_NAME == 'win32' and '.exe', + extension, ))) diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index d7296bf309..c22ea94bfc 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -262,5 +262,15 @@ { "action": "remove", "when": "500761e41acb96953a5064e951d41d190c287e46" + }, + { + "action": "add", + "when": "f3008bc5f89d2691f2f8dfc51b406ef4e25281c3", + "short": "[priority] **Default behaviour changed from `--mtime` to `--no-mtime`**\nyt-dlp no longer applies the server modified time to downloaded files by default. [Read more](https://github.com/yt-dlp/yt-dlp/issues/12780)" + }, + { + "action": "add", + "when": "959ac99e98c3215437e573c22d64be42d361e863", + "short": "[priority] Security: [[CVE-2025-54072](https://nvd.nist.gov/vuln/detail/CVE-2025-54072)] [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-45hg-7f49-5h56)\n - When `--exec` is used on Windows, the filepath expanded from `{}` (or the default placeholder) is now properly escaped" } ] diff --git a/supportedsites.md b/supportedsites.md index 8e48135d22..3e0bef4bcf 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -133,7 +133,6 @@ # Supported sites - **BaiduVideo**: 百度视频 - **BanBye** - **BanByeChannel** - - **bandaichannel** - **Bandcamp** - **Bandcamp:album** - **Bandcamp:user** @@ -157,7 +156,6 @@ # Supported sites - **Beeg** - **BehindKink**: (**Currently broken**) - **Bellator** - - **BellMedia** - **BerufeTV** - **Bet**: (**Currently broken**) - **bfi:player**: (**Currently broken**) @@ -197,6 +195,7 @@ # Supported sites - **BitChute** - **BitChuteChannel** - **BlackboardCollaborate** + - **BlackboardCollaborateLaunch** - **BleacherReport**: (**Currently broken**) - **BleacherReportCMS**: (**Currently broken**) - **blerp** @@ -225,6 +224,7 @@ # Supported sites - **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org - **bt:article**: Bergens Tidende Articles - **bt:vestlendingen**: Bergens Tidende - Vestlendingen + - **BTVPlus** - **Bundesliga** - **Bundestag** - **BunnyCdn** @@ -317,7 +317,6 @@ # Supported sites - **CSpan**: C-SPAN - **CSpanCongress** - **CtsNews**: 華視新聞 - - **CTV** - **CTVNews** - **cu.ntv.co.jp**: 日テレ無料TADA! - **CultureUnplugged** @@ -652,7 +651,6 @@ # Supported sites - **jiosaavn:​show:playlist** - **jiosaavn:song** - **Joj** - - **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR) - **Jove** - **JStream** - **JTBC**: jtbc.co.kr @@ -723,9 +721,6 @@ # Supported sites - **life:embed** - **likee** - **likee:user** - - **limelight** - - **limelight:channel** - - **limelight:channel_list** - **LinkedIn**: [*linkedin*](## "netrc machine") - **linkedin:events**: [*linkedin*](## "netrc machine") - **linkedin:learning**: [*linkedin*](## "netrc machine") @@ -807,6 +802,7 @@ # Supported sites - **minds:channel** - **minds:group** - **Minoto** + - **mir24.tv** - **mirrativ** - **mirrativ:user** - **MirrorCoUK** @@ -817,6 +813,8 @@ # Supported sites - **mixcloud** - **mixcloud:playlist** - **mixcloud:user** + - **Mixlr** + - **MixlrRecoring** - **MLB** - **MLBArticle** - **MLBTV**: [*mlb*](## "netrc machine") @@ -973,7 +971,6 @@ # Supported sites - **NoicePodcast** - **NonkTube** - **NoodleMagazine** - - **Noovo** - **NOSNLArticle** - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - **NovaEmbed** @@ -1097,6 +1094,7 @@ # Supported sites - **Platzi**: [*platzi*](## "netrc machine") - **PlatziCourse**: [*platzi*](## "netrc machine") - **player.sky.it** + - **PlayerFm** - **playeur** - **PlayPlusTV**: [*playplustv*](## "netrc machine") - **PlaySuisse**: [*playsuisse*](## "netrc machine") @@ -1472,11 +1470,12 @@ # Supported sites - **Tempo** - **TennisTV**: [*tennistv*](## "netrc machine") - **TF1** - - **TFO** + - **TFO**: (**Currently broken**) - **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine") - **theatercomplextown:vod**: [*theatercomplextown*](## "netrc machine") - **TheGuardianPodcast** - **TheGuardianPodcastPlaylist** + - **TheHighWire** - **TheHoleTv** - **TheIntercept** - **ThePlatform** @@ -1544,8 +1543,8 @@ # Supported sites - **tv2playseries.hu** - **TV4**: tv4.se and tv4play.se - **TV5MONDE** - - **tv5unis** - - **tv5unis:video** + - **tv5unis**: (**Currently broken**) + - **tv5unis:video**: (**Currently broken**) - **tv8.it** - **tv8.it:live**: TV8 Live - **tv8.it:playlist**: TV8 Playlist @@ -1600,6 +1599,7 @@ # Supported sites - **UlizaPortal**: ulizaportal.jp - **umg:de**: Universal Music Deutschland - **Unistra** + - **UnitedNationsWebTv** - **Unity**: (**Currently broken**) - **uol.com.br** - **uplynk** diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index c9f70431f7..40dd05e136 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1959,6 +1959,37 @@ def test_search_nextjs_data(self): with self.assertWarns(DeprecationWarning): self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {}) + def test_search_nextjs_v13_data(self): + HTML = R''' + + + + + + + + ''' + EXPECTED = { + '18': { + 'foo': 'bar', + }, + '16': { + 'meta': { + 'dateCreated': 1730489700, + 'uuid': '40cac41d-8d29-4ef5-aa11-75047b9f0907', + }, + }, + '19': { + 'duplicated_field_name': {'x': 1}, + }, + '20': { + 'duplicated_field_name': {'y': 2}, + }, + } + self.assertEqual(self.ie._search_nextjs_v13_data(HTML, None), EXPECTED) + self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), {}) + self.assertEqual(self.ie._search_nextjs_v13_data(None, None, fatal=False), {}) + def test_search_nuxt_json(self): HTML_TMPL = '' VALID_DATA = ''' diff --git a/test/test_compat.py b/test/test_compat.py index b1cc2a8187..3aa9c0c518 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -21,9 +21,6 @@ def test_compat_passthrough(self): with self.assertWarns(DeprecationWarning): _ = compat.compat_basestring - with self.assertWarns(DeprecationWarning): - _ = compat.WINDOWS_VT_MODE - self.assertEqual(urllib.request.getproxies, getproxies) with self.assertWarns(DeprecationWarning): diff --git a/test/test_download.py b/test/test_download.py index c7842735c2..1714cb52ec 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -66,10 +66,6 @@ def _file_md5(fn): @is_download_test class TestDownload(unittest.TestCase): - # Parallel testing in nosetests. See - # http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html - _multiprocess_shared_ = True - maxDiff = None COMPLETED_TESTS = {} diff --git a/test/test_utils.py b/test/test_utils.py index aedb565ec1..44747efda6 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1373,6 +1373,7 @@ def test_parse_resolution(self): self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('ep1x2'), {}) self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('1920w', lenient=True), {'width': 1920}) def test_parse_bitrate(self): self.assertEqual(parse_bitrate(None), None) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index abf6507b30..37d6eae4c5 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -36,6 +36,7 @@ from .globals import ( IN_CLI, LAZY_EXTRACTORS, + WINDOWS_VT_MODE, plugin_ies, plugin_ies_overrides, plugin_pps, @@ -52,7 +53,7 @@ SSLError, network_exceptions, ) -from .networking.impersonate import ImpersonateRequestHandler +from .networking.impersonate import ImpersonateRequestHandler, ImpersonateTarget from .plugins import directories as plugin_directories, load_all_plugins from .postprocessor import ( EmbedThumbnailPP, @@ -529,6 +530,7 @@ class YoutubeDL: discontinuities such as ad breaks (default: False) extractor_args: A dictionary of arguments to be passed to the extractors. See "EXTRACTOR ARGUMENTS" for details. + Argument values must always be a list of string(s). E.g. {'youtube': {'skip': ['dash', 'hls']}} mark_watched: Mark videos watched (even with --simulate). Only for YouTube @@ -2195,7 +2197,7 @@ def _filter(f): return op(actual_value, comparison_value) return _filter - def _check_formats(self, formats): + def _check_formats(self, formats, warning=True): for f in formats: working = f.get('__working') if working is not None: @@ -2208,6 +2210,9 @@ def _check_formats(self, formats): continue temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None) temp_file.close() + # If FragmentFD fails when testing a fragment, it will wrongly set a non-zero return code. + # Save the actual return code for later. See https://github.com/yt-dlp/yt-dlp/issues/13750 + original_retcode = self._download_retcode try: success, _ = self.dl(temp_file.name, f, test=True) except (DownloadError, OSError, ValueError, *network_exceptions): @@ -2218,12 +2223,18 @@ def _check_formats(self, formats): os.remove(temp_file.name) except OSError: self.report_warning(f'Unable to delete temporary file "{temp_file.name}"') + # Restore the actual return code + self._download_retcode = original_retcode f['__working'] = success if success: f.pop('__needs_testing', None) yield f else: - self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id'])) + msg = f'Unable to download format {f["format_id"]}. Skipping...' + if warning: + self.report_warning(msg) + else: + self.to_screen(f'[info] {msg}') def _select_formats(self, formats, selector): return list(selector({ @@ -2949,7 +2960,7 @@ def is_wellformed(f): ) if self.params.get('check_formats') is True: - formats = LazyList(self._check_formats(formats[::-1]), reverse=True) + formats = LazyList(self._check_formats(formats[::-1], warning=False), reverse=True) if not formats or formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them @@ -3222,6 +3233,7 @@ def dl(self, name, info, subtitle=False, test=False): } else: params = self.params + fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params) if not test: for ph in self._progress_hooks: @@ -3697,6 +3709,8 @@ def filter_fn(obj): return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)} elif isinstance(obj, (list, tuple, set, LazyList)): return list(map(filter_fn, obj)) + elif isinstance(obj, ImpersonateTarget): + return str(obj) elif obj is None or isinstance(obj, (str, int, float, bool)): return obj elif callable(obj): @@ -4029,8 +4043,7 @@ def get_encoding(stream): if os.environ.get('TERM', '').lower() == 'dumb': additional_info.append('dumb') if not supports_terminal_sequences(stream): - from .utils import WINDOWS_VT_MODE # Must be imported locally - additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI') + additional_info.append('No VT' if WINDOWS_VT_MODE.value is False else 'No ANSI') if additional_info: ret = f'{ret} ({",".join(additional_info)})' return ret @@ -4176,6 +4189,31 @@ def _impersonate_target_available(self, target): for rh in self._request_director.handlers.values() if isinstance(rh, ImpersonateRequestHandler)) + def _parse_impersonate_targets(self, impersonate): + if impersonate in (True, ''): + impersonate = ImpersonateTarget() + + requested_targets = [ + t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t) + for t in variadic(impersonate) + ] if impersonate else [] + + available_target = next(filter(self._impersonate_target_available, requested_targets), None) + + return available_target, requested_targets + + @staticmethod + def _unavailable_targets_message(requested_targets, note=None, is_error=False): + note = note or 'The extractor specified to use impersonation for this download' + specific_targets = ', '.join(filter(None, map(str, requested_targets))) + message = ( + 'no impersonate target is available' if not specific_targets + else f'none of these impersonate targets are available: {specific_targets}') + return ( + f'{note}, but {message}. {"See" if is_error else "If you encounter errors, then see"}' + f' https://github.com/yt-dlp/yt-dlp#impersonation ' + f'for information on installing the required dependencies') + def urlopen(self, req): """ Start an HTTP download """ if isinstance(req, str): diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index dae2c14592..2f3e35d4a8 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -37,7 +37,7 @@ from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 from ..networking.exceptions import HTTPError as compat_HTTPError -passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) +passthrough_module(__name__, '...utils', ('windows_enable_vt_mode',)) # compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 5675445ace..459a4b7de0 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -1335,7 +1335,7 @@ def prepare_line(line): if len(cookie_list) != self._ENTRY_LEN: raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}') cookie = self._CookieFileEntry(*cookie_list) - if cookie.expires_at and not cookie.expires_at.isdigit(): + if cookie.expires_at and not re.fullmatch(r'[0-9]+(?:\.[0-9]+)?', cookie.expires_at): raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}') return line diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index 8f575ece4c..4ad0b6a0f6 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -105,7 +105,7 @@ def _get_suitable_downloader(info_dict, protocol, params, default): if external_downloader is None: if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params): return FFmpegFD - elif external_downloader.lower() != 'native': + elif external_downloader.lower() != 'native' and info_dict.get('impersonate') is None: ed = get_external_downloader(external_downloader) if ed.can_download(info_dict, external_downloader): return ed diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index bb9303f8a1..7bc70a51a2 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -495,3 +495,14 @@ def _debug_cmd(self, args, exe=None): exe = os.path.basename(args[0]) self.write_debug(f'{exe} command line: {shell_quote(args)}') + + def _get_impersonate_target(self, info_dict): + impersonate = info_dict.get('impersonate') + if impersonate is None: + return None + available_target, requested_targets = self.ydl._parse_impersonate_targets(impersonate) + if available_target: + return available_target + elif requested_targets: + self.report_warning(self.ydl._unavailable_targets_message(requested_targets)) + return None diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index ee73ac043e..65ed83991b 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -572,7 +572,21 @@ def _call_downloader(self, tmpfilename, info_dict): if end_time: args += ['-t', str(end_time - start_time)] - args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']] + url = fmt['url'] + if self.params.get('enable_file_urls') and url.startswith('file:'): + # The default protocol_whitelist is 'file,crypto,data' when reading local m3u8 URLs, + # so only local segments can be read unless we also include 'http,https,tcp,tls' + args += ['-protocol_whitelist', 'file,crypto,data,http,https,tcp,tls'] + # ffmpeg incorrectly handles 'file:' URLs by only removing the + # 'file:' prefix and treating the rest as if it's a normal filepath. + # FFmpegPostProcessor also depends on this behavior, so we need to fixup the URLs: + # - On Windows/Cygwin, replace 'file:///' and 'file://localhost/' with 'file:' + # - On *nix, replace 'file://localhost/' with 'file:/' + # Ref: https://github.com/yt-dlp/yt-dlp/issues/13781 + # https://trac.ffmpeg.org/ticket/2702 + url = re.sub(r'^file://(?:localhost)?/', 'file:' if os.name == 'nt' else 'file:/', url) + + args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', url] if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): args += ['-c', 'copy'] diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 2256305785..58cfbbf163 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -205,7 +205,7 @@ def is_ad_fragment_end(s): line = line.strip() if line: if not line.startswith('#'): - if format_index and discontinuity_count != format_index: + if format_index is not None and discontinuity_count != format_index: continue if ad_frag_next: continue @@ -231,7 +231,7 @@ def is_ad_fragment_end(s): byte_range = {} elif line.startswith('#EXT-X-MAP'): - if format_index and discontinuity_count != format_index: + if format_index is not None and discontinuity_count != format_index: continue if frag_index > 0: self.report_error( diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 90bfcaf552..c388deb7ea 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -27,6 +27,10 @@ class HttpFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] request_data = info_dict.get('request_data', None) + request_extensions = {} + impersonate_target = self._get_impersonate_target(info_dict) + if impersonate_target is not None: + request_extensions['impersonate'] = impersonate_target class DownloadContext(dict): __getattr__ = dict.get @@ -109,7 +113,7 @@ def establish_connection(): if try_call(lambda: range_end >= ctx.content_len): range_end = ctx.content_len - 1 - request = Request(url, request_data, headers) + request = Request(url, request_data, headers, extensions=request_extensions) has_range = range_start is not None if has_range: request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}' diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 84da570b0a..3eea0cdf6b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -201,7 +201,6 @@ BanByeChannelIE, BanByeIE, ) -from .bandaichannel import BandaiChannelIE from .bandcamp import ( BandcampAlbumIE, BandcampIE, @@ -229,7 +228,6 @@ from .beatport import BeatportIE from .beeg import BeegIE from .behindkink import BehindKinkIE -from .bellmedia import BellMediaIE from .berufetv import BerufeTVIE from .bet import BetIE from .bfi import BFIPlayerIE @@ -275,7 +273,10 @@ BitChuteChannelIE, BitChuteIE, ) -from .blackboardcollaborate import BlackboardCollaborateIE +from .blackboardcollaborate import ( + BlackboardCollaborateIE, + BlackboardCollaborateLaunchIE, +) from .bleacherreport import ( BleacherReportCMSIE, BleacherReportIE, @@ -309,6 +310,7 @@ BrilliantpalaClassesIE, BrilliantpalaElearnIE, ) +from .btvplus import BTVPlusIE from .bundesliga import BundesligaIE from .bundestag import BundestagIE from .bunnycdn import BunnyCdnIE @@ -446,7 +448,6 @@ CSpanIE, ) from .ctsnews import CtsNewsIE -from .ctv import CTVIE from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .curiositystream import ( @@ -570,10 +571,6 @@ DWIE, DWArticleIE, ) -from .eagleplatform import ( - ClipYouEmbedIE, - EaglePlatformIE, -) from .ebaumsworld import EbaumsWorldIE from .ebay import EbayIE from .egghead import ( @@ -639,6 +636,7 @@ FancodeVodIE, ) from .fathom import FathomIE +from .faulio import FaulioLiveIE from .faz import FazIE from .fc2 import ( FC2IE, @@ -928,7 +926,6 @@ JioSaavnSongIE, ) from .joj import JojIE -from .joqrag import JoqrAgIE from .jove import JoveIE from .jstream import JStreamIE from .jtbc import ( @@ -1031,11 +1028,6 @@ LikeeIE, LikeeUserIE, ) -from .limelight import ( - LimelightChannelIE, - LimelightChannelListIE, - LimelightMediaIE, -) from .linkedin import ( LinkedInEventsIE, LinkedInIE, @@ -1168,6 +1160,10 @@ MixcloudPlaylistIE, MixcloudUserIE, ) +from .mixlr import ( + MixlrIE, + MixlrRecoringIE, +) from .mlb import ( MLBIE, MLBTVIE, @@ -1378,7 +1374,6 @@ from .noice import NoicePodcastIE from .nonktube import NonkTubeIE from .noodlemagazine import NoodleMagazineIE -from .noovo import NoovoIE from .nosnl import NOSNLArticleIE from .nova import ( NovaEmbedIE, @@ -1559,6 +1554,7 @@ PlatziCourseIE, PlatziIE, ) +from .playerfm import PlayerFmIE from .playplustv import PlayPlusTVIE from .playsuisse import PlaySuisseIE from .playtvak import PlaytvakIE @@ -1569,6 +1565,7 @@ ) from .plutotv import PlutoTVIE from .plvideo import PlVideoIE +from .plyr import PlyrEmbedIE from .podbayfm import ( PodbayFMChannelIE, PodbayFMIE, @@ -1784,6 +1781,7 @@ RTVEALaCartaIE, RTVEAudioIE, RTVELiveIE, + RTVEProgramIE, RTVETelevisionIE, ) from .rtvs import RTVSIE @@ -2097,6 +2095,7 @@ TheGuardianPodcastIE, TheGuardianPodcastPlaylistIE, ) +from .thehighwire import TheHighWireIE from .theholetv import TheHoleTvIE from .theintercept import TheInterceptIE from .theplatform import ( @@ -2166,7 +2165,6 @@ from .trueid import TrueIDIE from .trunews import TruNewsIE from .truth import TruthIE -from .trutv import TruTVIE from .tube8 import Tube8IE from .tubetugraz import ( TubeTuGrazIE, @@ -2237,6 +2235,7 @@ from .tvplayer import TVPlayerIE from .tvw import ( TvwIE, + TvwNewsIE, TvwTvChannelsIE, ) from .tweakers import TweakersIE @@ -2285,6 +2284,7 @@ ) from .umg import UMGDeIE from .unistra import UnistraIE +from .unitednations import UnitedNationsWebTvIE from .unity import UnityIE from .unsupported import ( KnownDRMIE, diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 8c2d9d9340..eb45734ec0 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -48,7 +48,6 @@ 'username_field': 'user', 'password_field': 'passwd', 'login_hostname': 'login.xfinity.com', - 'needs_newer_ua': True, }, 'TWC': { 'name': 'Time Warner Cable | Spectrum', @@ -1379,11 +1378,8 @@ def _download_webpage_handle(self, *args, **kwargs): @staticmethod def _get_mso_headers(mso_info): - # yt-dlp's default user-agent is usually too old for some MSO's like Comcast_SSO - # See: https://github.com/yt-dlp/yt-dlp/issues/10848 - return { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0', - } if mso_info.get('needs_newer_ua') else {} + # Not needed currently + return {} @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): diff --git a/yt_dlp/extractor/adobetv.py b/yt_dlp/extractor/adobetv.py index 4608e5c13d..997e1b92cb 100644 --- a/yt_dlp/extractor/adobetv.py +++ b/yt_dlp/extractor/adobetv.py @@ -84,9 +84,10 @@ def _parse_video_data(self, video_data): class AdobeTVEmbedIE(AdobeTVBaseIE): + _WORKING = False IE_NAME = 'adobetv:embed' _VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'https://tv.adobe.com/embed/22/4153', 'md5': 'c8c0461bf04d54574fc2b4d07ac6783a', 'info_dict': { @@ -94,12 +95,12 @@ class AdobeTVEmbedIE(AdobeTVBaseIE): 'ext': 'flv', 'title': 'Creating Graphics Optimized for BlackBerry', 'description': 'md5:eac6e8dced38bdaae51cd94447927459', - 'thumbnail': r're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.+\.jpg', 'upload_date': '20091109', 'duration': 377, 'view_count': int, }, - } + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -110,10 +111,10 @@ def _real_extract(self, url): class AdobeTVIE(AdobeTVBaseIE): + _WORKING = False IE_NAME = 'adobetv' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?watch/(?P[^/]+)/(?P[^/]+)' - - _TEST = { + _TESTS = [{ 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/', 'md5': '9bc5727bcdd55251f35ad311ca74fa1e', 'info_dict': { @@ -121,12 +122,12 @@ class AdobeTVIE(AdobeTVBaseIE): 'ext': 'mp4', 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop', 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311', - 'thumbnail': r're:https?://.*\.jpg$', + 'thumbnail': r're:https?://.+\.jpg', 'upload_date': '20110914', 'duration': 60, 'view_count': int, }, - } + }] def _real_extract(self, url): language, show_urlname, urlname = self._match_valid_url(url).groups() @@ -159,10 +160,10 @@ def _extract_playlist_entries(self, display_id, query): class AdobeTVShowIE(AdobeTVPlaylistBaseIE): + _WORKING = False IE_NAME = 'adobetv:show' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?show/(?P[^/]+)' - - _TEST = { + _TESTS = [{ 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost', 'info_dict': { 'id': '36', @@ -170,7 +171,7 @@ class AdobeTVShowIE(AdobeTVPlaylistBaseIE): 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27', }, 'playlist_mincount': 136, - } + }] _RESOURCE = 'episode' _process_data = AdobeTVBaseIE._parse_video_data @@ -195,16 +196,16 @@ def _real_extract(self, url): class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): + _WORKING = False IE_NAME = 'adobetv:channel' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?channel/(?P[^/]+)(?:/(?P[^/]+))?' - - _TEST = { + _TESTS = [{ 'url': 'http://tv.adobe.com/channel/development', 'info_dict': { 'id': 'development', }, 'playlist_mincount': 96, - } + }] _RESOURCE = 'show' def _process_data(self, show_data): @@ -231,8 +232,7 @@ class AdobeTVVideoIE(AdobeTVBaseIE): IE_NAME = 'adobetv:video' _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P\d+)' _EMBED_REGEX = [r']+src=[\'"](?P(?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]'] - - _TEST = { + _TESTS = [{ # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners 'url': 'https://video.tv.adobe.com/v/2456/', 'md5': '43662b577c018ad707a63766462b1e87', @@ -242,8 +242,20 @@ class AdobeTVVideoIE(AdobeTVBaseIE): 'title': 'New experience with Acrobat DC', 'description': 'New experience with Acrobat DC', 'duration': 248.667, + 'thumbnail': r're:https?://images-tv\.adobe\.com/.+\.jpg', }, - } + }] + _WEBPAGE_TESTS = [{ + # FIXME: Invalid extension + 'url': 'https://www.adobe.com/learn/acrobat/web/customize-toolbar', + 'info_dict': { + 'id': '3463980', + 'ext': 'm3u8', + 'title': 'Adobe Acrobat: How to Customize the Toolbar for Faster PDF Editing', + 'description': 'md5:94368ab95ae24f9c1bee0cb346e03dc3', + 'duration': 97.557, + }, + }] def _real_extract(self, url): video_id = self._match_id(url) diff --git a/yt_dlp/extractor/aenetworks.py b/yt_dlp/extractor/aenetworks.py index e5c922b41f..a4a5f409ec 100644 --- a/yt_dlp/extractor/aenetworks.py +++ b/yt_dlp/extractor/aenetworks.py @@ -111,11 +111,9 @@ class AENetworksIE(AENetworksBaseIE): IE_NAME = 'aenetworks' IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault' _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P - shows/[^/]+/season-\d+/episode-\d+| - (?: - (?:movie|special)s/[^/]+| - (?:shows/[^/]+/)?videos - )/[^/?#&]+ + shows/[^/?#]+/season-\d+/episode-\d+| + (?Pmovie|special)s/[^/?#]+(?P/[^/?#]+)?| + (?:shows/[^/?#]+/)?videos/[^/?#]+ )''' _TESTS = [{ 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', @@ -128,7 +126,7 @@ class AENetworksIE(AENetworksBaseIE): 'upload_date': '20120529', 'uploader': 'AENE-NEW', 'duration': 2592.0, - 'thumbnail': r're:^https?://.*\.jpe?g$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'chapters': 'count:5', 'tags': 'count:14', 'categories': ['Mountain Men'], @@ -139,10 +137,7 @@ class AENetworksIE(AENetworksBaseIE): 'series': 'Mountain Men', 'age_limit': 0, }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'params': {'skip_download': 'm3u8'}, 'add_ie': ['ThePlatform'], 'skip': 'Geo-restricted - This content is not available in your location.', }, { @@ -156,7 +151,7 @@ class AENetworksIE(AENetworksBaseIE): 'upload_date': '20160112', 'uploader': 'AENE-NEW', 'duration': 1277.695, - 'thumbnail': r're:^https?://.*\.jpe?g$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'chapters': 'count:4', 'tags': 'count:23', 'episode': 'Inlawful Entry', @@ -166,10 +161,53 @@ class AENetworksIE(AENetworksBaseIE): 'series': 'Duck Dynasty', 'age_limit': 0, }, - 'params': { - # m3u8 download - 'skip_download': True, + 'params': {'skip_download': 'm3u8'}, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'https://play.mylifetime.com/movies/v-c-andrews-web-of-dreams', + 'info_dict': { + 'id': '1590627395981', + 'ext': 'mp4', + 'title': 'VC Andrews\' Web of Dreams', + 'description': 'md5:2a8ba13ae64271c79eb65c0577d312ce', + 'uploader': 'AENE-NEW', + 'age_limit': 14, + 'duration': 5253.665, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'chapters': 'count:8', + 'tags': ['lifetime', 'mylifetime', 'lifetime channel', "VC Andrews' Web of Dreams"], + 'series': '', + 'season': 'Season 0', + 'season_number': 0, + 'episode': 'VC Andrews\' Web of Dreams', + 'episode_number': 0, + 'timestamp': 1566489703.0, + 'upload_date': '20190822', }, + 'params': {'skip_download': 'm3u8'}, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story', + 'info_dict': { + 'id': '1488235587551', + 'ext': 'mp4', + 'title': 'Hunting JonBenet\'s Killer: The Untold Story', + 'description': 'md5:209869425ee392d74fe29201821e48b4', + 'uploader': 'AENE-NEW', + 'age_limit': 14, + 'duration': 5003.903, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'chapters': 'count:10', + 'tags': 'count:11', + 'series': '', + 'season': 'Season 0', + 'season_number': 0, + 'episode': 'Hunting JonBenet\'s Killer: The Untold Story', + 'episode_number': 0, + 'timestamp': 1554987697.0, + 'upload_date': '20190411', + }, + 'params': {'skip_download': 'm3u8'}, 'add_ie': ['ThePlatform'], }, { 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', @@ -198,7 +236,9 @@ class AENetworksIE(AENetworksBaseIE): }] def _real_extract(self, url): - domain, canonical = self._match_valid_url(url).groups() + domain, canonical, url_type, extra = self._match_valid_url(url).group('domain', 'id', 'type', 'extra') + if url_type in ('movie', 'special') and not extra: + canonical += f'/full-{url_type}' return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url) diff --git a/yt_dlp/extractor/apa.py b/yt_dlp/extractor/apa.py index fed597042a..db82b56187 100644 --- a/yt_dlp/extractor/apa.py +++ b/yt_dlp/extractor/apa.py @@ -11,12 +11,11 @@ class APAIE(InfoExtractor): _EMBED_REGEX = [r']+\bsrc=(["\'])(?P(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1'] _TESTS = [{ 'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029', - 'md5': '2b12292faeb0a7d930c778c7a5b4759b', 'info_dict': { 'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029', 'ext': 'mp4', 'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029', - 'thumbnail': r're:^https?://.*\.jpg$', + 'thumbnail': r're:https?://kf-vn\.sf\.apa\.at/vn/.+\.jpg', }, }, { 'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78', @@ -28,6 +27,15 @@ class APAIE(InfoExtractor): 'url': 'http://uvp-kleinezeitung.sf.apa.at/embed/f1c44979-dba2-4ebf-b021-e4cf2cac3c81', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + 'url': 'https://www.vol.at/blue-man-group/5593454', + 'info_dict': { + 'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029', + 'ext': 'mp4', + 'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029', + 'thumbnail': r're:https?://kf-vn\.sf\.apa\.at/vn/.+\.jpg', + }, + }] def _real_extract(self, url): mobj = self._match_valid_url(url) diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index 2849d9fd5b..1864ddbfd9 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -16,6 +16,7 @@ dict_get, extract_attributes, get_element_by_id, + get_element_text_and_html_by_tag, int_or_none, join_nonempty, js_to_json, @@ -32,7 +33,6 @@ unified_timestamp, url_or_none, urlhandle_detect_ext, - variadic, ) @@ -72,6 +72,7 @@ class ArchiveOrgIE(InfoExtractor): 'display_id': 'Cops-v2.mp4', 'thumbnail': r're:https://archive\.org/download/.*\.jpg', 'duration': 1091.96, + 'track': 'Cops-v2', }, }, { 'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect', @@ -86,6 +87,7 @@ class ArchiveOrgIE(InfoExtractor): 'thumbnail': r're:https://archive\.org/download/.*\.jpg', 'duration': 59.77, 'display_id': 'Commercial-JFK1960ElectionAdCampaignJingle.mpg', + 'track': 'Commercial-JFK1960ElectionAdCampaignJingle', }, }, { 'url': 'https://archive.org/details/Election_Ads/Commercial-Nixon1960ElectionAdToughonDefense.mpg', @@ -102,6 +104,7 @@ class ArchiveOrgIE(InfoExtractor): 'duration': 59.51, 'license': 'http://creativecommons.org/licenses/publicdomain/', 'thumbnail': r're:https://archive\.org/download/.*\.jpg', + 'track': 'Commercial-Nixon1960ElectionAdToughonDefense', }, }, { 'url': 'https://archive.org/details/gd1977-05-08.shure57.stevenson.29303.flac16', @@ -182,6 +185,7 @@ class ArchiveOrgIE(InfoExtractor): 'duration': 130.46, 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_01_000117.jpg', 'display_id': 'irelandthemakingofarepublicreel1_01.mov', + 'track': 'irelandthemakingofarepublicreel1 01', }, }, { 'md5': '67335ee3b23a0da930841981c1e79b02', @@ -192,6 +196,7 @@ class ArchiveOrgIE(InfoExtractor): 'title': 'irelandthemakingofarepublicreel1_02.mov', 'display_id': 'irelandthemakingofarepublicreel1_02.mov', 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_02_001374.jpg', + 'track': 'irelandthemakingofarepublicreel1 02', }, }, { 'md5': 'e470e86787893603f4a341a16c281eb5', @@ -202,6 +207,7 @@ class ArchiveOrgIE(InfoExtractor): 'title': 'irelandthemakingofarepublicreel2.mov', 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg', 'display_id': 'irelandthemakingofarepublicreel2.mov', + 'track': 'irelandthemakingofarepublicreel2', }, }, ], @@ -225,19 +231,29 @@ class ArchiveOrgIE(InfoExtractor): 'release_date': '19950402', 'timestamp': 1084927901, }, + }, { + # metadata['metadata']['description'] is a list of strings instead of str + 'url': 'https://archive.org/details/pra-KZ1908.02', + 'info_dict': { + 'id': 'pra-KZ1908.02', + 'ext': 'mp3', + 'display_id': 'KZ1908.02_01.wav', + 'title': 'Crips and Bloods speak about gang life', + 'description': 'md5:2b56b35ff021311e3554b47a285e70b3', + 'uploader': 'jake@archive.org', + 'duration': 1733.74, + 'track': 'KZ1908.02 01', + 'track_number': 1, + 'timestamp': 1336026026, + 'upload_date': '20120503', + 'release_year': 1992, + }, }] @staticmethod def _playlist_data(webpage): - element = re.findall(r'''(?xs) - - ''', webpage)[0] - - return json.loads(extract_attributes(element)['value']) + element = get_element_text_and_html_by_tag('play-av', webpage)[1] + return json.loads(extract_attributes(element)['playlist']) def _real_extract(self, url): video_id = urllib.parse.unquote_plus(self._match_id(url)) @@ -274,34 +290,40 @@ def _real_extract(self, url): m = metadata['metadata'] identifier = m['identifier'] - info = { + info = traverse_obj(m, { + 'title': ('title', {str}), + 'description': ('description', ({str}, (..., all, {' '.join})), {clean_html}, filter, any), + 'uploader': (('uploader', 'adder'), {str}, any), + 'creators': ('creator', (None, ...), {str}, filter, all, filter), + 'license': ('licenseurl', {url_or_none}), + 'release_date': ('date', {unified_strdate}), + 'timestamp': (('publicdate', 'addeddate'), {unified_timestamp}, any), + 'location': ('venue', {str}), + 'release_year': ('year', {int_or_none}), + }) + info.update({ 'id': identifier, - 'title': m['title'], - 'description': clean_html(m.get('description')), - 'uploader': dict_get(m, ['uploader', 'adder']), - 'creators': traverse_obj(m, ('creator', {variadic}, {lambda x: x[0] and list(x)})), - 'license': m.get('licenseurl'), - 'release_date': unified_strdate(m.get('date')), - 'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])), 'webpage_url': f'https://archive.org/details/{identifier}', - 'location': m.get('venue'), - 'release_year': int_or_none(m.get('year'))} + }) for f in metadata['files']: if f['name'] in entries: entries[f['name']] = merge_dicts(entries[f['name']], { 'id': identifier + '/' + f['name'], - 'title': f.get('title') or f['name'], - 'display_id': f['name'], - 'description': clean_html(f.get('description')), - 'creators': traverse_obj(f, ('creator', {variadic}, {lambda x: x[0] and list(x)})), - 'duration': parse_duration(f.get('length')), - 'track_number': int_or_none(f.get('track')), - 'album': f.get('album'), - 'discnumber': int_or_none(f.get('disc')), - 'release_year': int_or_none(f.get('year'))}) + **traverse_obj(f, { + 'title': (('title', 'name'), {str}, any), + 'display_id': ('name', {str}), + 'description': ('description', ({str}, (..., all, {' '.join})), {clean_html}, filter, any), + 'creators': ('creator', (None, ...), {str}, filter, all, filter), + 'duration': ('length', {parse_duration}), + 'track_number': ('track', {int_or_none}), + 'album': ('album', {str}), + 'discnumber': ('disc', {int_or_none}), + 'release_year': ('year', {int_or_none}), + }), + }) entry = entries[f['name']] - elif traverse_obj(f, 'original', expected_type=str) in entries: + elif traverse_obj(f, ('original', {str})) in entries: entry = entries[f['original']] else: continue diff --git a/yt_dlp/extractor/arcpublishing.py b/yt_dlp/extractor/arcpublishing.py index 8da9bc4ccb..903f9c739a 100644 --- a/yt_dlp/extractor/arcpublishing.py +++ b/yt_dlp/extractor/arcpublishing.py @@ -62,6 +62,20 @@ class ArcPublishingIE(InfoExtractor): 'url': 'arcpublishing:tronc:460f2931-8130-4719-8ea1-ffcb2d7cb685', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + 'url': 'https://www.uppermichiganssource.com/2025/07/18/scattered-showers-storms-bring-heavy-rain-potential/', + 'info_dict': { + 'id': '508116f7-e999-48db-b7c2-60a04842679b', + 'ext': 'mp4', + 'title': 'Scattered showers & storms bring heavy rain potential', + 'description': 'Scattered showers & storms bring heavy rain potential', + 'duration': 2016, + 'thumbnail': r're:https?://.+\.jpg', + 'timestamp': 1752881287, + 'upload_date': '20250718', + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], + }] _POWA_DEFAULTS = [ (['cmg', 'prisa'], '%s-config-prod.api.cdn.arcpublishing.com/video'), ([ diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 142d4b066b..3f17da463d 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -51,8 +51,8 @@ class ArteTVIE(ArteTVBaseIE): 'id': '109067-000-A', 'ext': 'mp4', 'description': 'md5:d2ca367b8ecee028dddaa8bd1aebc739', + 'thumbnail': r're:https?://api-cdn\.arte\.tv/img/v2/image/.+', 'timestamp': 1713927600, - 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/3rR6PLzfbigSkkeHtkCZNF/940x530', 'duration': 7599, 'title': 'La loi de Téhéran', 'upload_date': '20240424', @@ -62,6 +62,7 @@ class ArteTVIE(ArteTVBaseIE): 'fr-forced': 'mincount:1', }, }, + 'skip': 'Invalid URL', }, { 'note': 'age-restricted', 'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/', @@ -69,9 +70,9 @@ class ArteTVIE(ArteTVBaseIE): 'id': '006785-000-A', 'description': 'md5:c2f94fdfefc8a280e4dab68ab96ab0ba', 'title': 'The Element of Crime', + 'thumbnail': r're:https?://api-cdn\.arte\.tv/img/v2/image/.+', 'timestamp': 1696111200, 'duration': 5849, - 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530', 'upload_date': '20230930', 'ext': 'mp4', }, @@ -252,6 +253,30 @@ class ArteTVEmbedIE(InfoExtractor): 'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + # FIXME: Embed detection + 'url': 'https://timesofmalta.com/article/watch-sunken-warships-north-sea-arte.1108358', + 'info_dict': { + 'id': '110288-000-A', + 'ext': 'mp4', + 'title': 'Danger on the Seabed', + 'alt_title': 'Sunken Warships in the North Sea', + 'description': 'md5:a2c84cbad37d280bddb6484087120add', + 'duration': 3148, + 'thumbnail': r're:https?://api-cdn\.arte\.tv/img/v2/image/.+', + 'timestamp': 1741686820, + 'upload_date': '20250311', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + # FIXME: Embed detection + 'url': 'https://www.eurockeennes.fr/en-live/', + 'info_dict': { + 'id': 'en-live', + 'title': 'Les Eurocks en live | Les Eurockéennes de Belfort – 3-4-5-6 juillet 2025 sur la Presqu'Île du Malsaucy', + }, + 'playlist_count': 4, + }] def _real_extract(self, url): qs = parse_qs(url) @@ -304,9 +329,9 @@ class ArteTVCategoryIE(ArteTVBaseIE): 'info_dict': { 'id': 'politics-and-society', 'title': 'Politics and society', - 'description': 'Investigative documentary series, geopolitical analysis, and international commentary', + 'description': 'Watch documentaries and reportage about politics, society and current affairs.', }, - 'playlist_mincount': 13, + 'playlist_mincount': 3, }] @classmethod diff --git a/yt_dlp/extractor/bandaichannel.py b/yt_dlp/extractor/bandaichannel.py deleted file mode 100644 index d7fcf44bd9..0000000000 --- a/yt_dlp/extractor/bandaichannel.py +++ /dev/null @@ -1,33 +0,0 @@ -from .brightcove import BrightcoveNewBaseIE -from ..utils import extract_attributes - - -class BandaiChannelIE(BrightcoveNewBaseIE): - IE_NAME = 'bandaichannel' - _VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P\d+/\d+)' - _TESTS = [{ - 'url': 'https://www.b-ch.com/titles/514/001', - 'md5': 'a0f2d787baa5729bed71108257f613a4', - 'info_dict': { - 'id': '6128044564001', - 'ext': 'mp4', - 'title': 'メタルファイターMIKU 第1話', - 'timestamp': 1580354056, - 'uploader_id': '5797077852001', - 'upload_date': '20200130', - 'duration': 1387.733, - }, - 'params': { - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - attrs = extract_attributes(self._search_regex( - r'(]+\bid="bcplayer"[^>]*>)', webpage, 'player')) - bc = self._download_json( - 'https://pbifcd.b-ch.com/v1/playbackinfo/ST/70/' + attrs['data-info'], - video_id, headers={'X-API-KEY': attrs['data-auth'].strip()})['bc'] - return self._parse_brightcove_metadata(bc, bc['id']) diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index 939c2800e6..0a8f88fa8c 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -7,6 +7,7 @@ from ..utils import ( KNOWN_EXTENSIONS, ExtractorError, + clean_html, extract_attributes, float_or_none, int_or_none, @@ -19,7 +20,7 @@ url_or_none, urljoin, ) -from ..utils.traversal import find_element, traverse_obj +from ..utils.traversal import find_element, find_elements, traverse_obj class BandcampIE(InfoExtractor): @@ -35,14 +36,12 @@ class BandcampIE(InfoExtractor): 'duration': 9.8485, 'uploader': 'youtube-dl "\'/\\ä↭', 'upload_date': '20121129', + 'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg', 'timestamp': 1354224127, 'track': 'youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭', - 'album_artist': 'youtube-dl "\'/\\ä↭', 'track_id': '1812978515', - 'artist': 'youtube-dl "\'/\\ä↭', 'uploader_url': 'https://youtube-dl.bandcamp.com', 'uploader_id': 'youtube-dl', - 'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg', 'artists': ['youtube-dl "\'/\\ä↭'], 'album_artists': ['youtube-dl "\'/\\ä↭'], }, @@ -53,10 +52,9 @@ class BandcampIE(InfoExtractor): 'info_dict': { 'id': '2650410135', 'ext': 'm4a', - 'acodec': r're:[fa]lac', 'title': 'Ben Prunty - Lanius (Battle)', - 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Ben Prunty', + 'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg', 'timestamp': 1396508491, 'upload_date': '20140403', 'release_timestamp': 1396483200, @@ -65,11 +63,12 @@ class BandcampIE(InfoExtractor): 'track': 'Lanius (Battle)', 'track_number': 1, 'track_id': '2650410135', - 'artist': 'Ben Prunty', - 'album_artist': 'Ben Prunty', 'album': 'FTL: Advanced Edition Soundtrack', 'uploader_url': 'https://benprunty.bandcamp.com', 'uploader_id': 'benprunty', + 'tags': ['soundtrack', 'chiptunes', 'cinematic', 'electronic', 'video game music', 'California'], + 'artists': ['Ben Prunty'], + 'album_artists': ['Ben Prunty'], }, }, { # no free download, mp3 128 @@ -79,8 +78,8 @@ class BandcampIE(InfoExtractor): 'id': '2584466013', 'ext': 'mp3', 'title': 'Mastodon - Hail to Fire', - 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Mastodon', + 'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg', 'timestamp': 1322005399, 'upload_date': '20111122', 'release_timestamp': 1076112000, @@ -89,11 +88,12 @@ class BandcampIE(InfoExtractor): 'track': 'Hail to Fire', 'track_number': 5, 'track_id': '2584466013', - 'artist': 'Mastodon', - 'album_artist': 'Mastodon', 'album': 'Call of the Mastodon', 'uploader_url': 'https://relapsealumni.bandcamp.com', 'uploader_id': 'relapsealumni', + 'tags': ['Philadelphia'], + 'artists': ['Mastodon'], + 'album_artists': ['Mastodon'], }, }, { # track from compilation album (artist/album_artist difference) @@ -103,8 +103,8 @@ class BandcampIE(InfoExtractor): 'id': '1978174799', 'ext': 'mp3', 'title': 'submerse - submerse - Safehouse', - 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'submerse', + 'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg', 'timestamp': 1480779297, 'upload_date': '20161203', 'release_timestamp': 1481068800, @@ -113,11 +113,36 @@ class BandcampIE(InfoExtractor): 'track': 'submerse - Safehouse', 'track_number': 3, 'track_id': '1978174799', - 'artist': 'submerse', - 'album_artist': 'Diskotopia', 'album': 'DSK F/W 2016-2017 Free Compilation', 'uploader_url': 'https://diskotopia.bandcamp.com', 'uploader_id': 'diskotopia', + 'tags': ['Japan'], + 'artists': ['submerse'], + 'album_artists': ['Diskotopia'], + }, + }] + _WEBPAGE_TESTS = [{ + # FIXME: Embed detection + 'url': 'https://www.punknews.org/article/85809/stay-inside-super-sonic', + 'info_dict': { + 'id': '2475540375', + 'ext': 'mp3', + 'title': 'Stay Inside - Super Sonic', + 'album': 'Lunger', + 'album_artists': ['Stay Inside'], + 'artists': ['Stay Inside'], + 'duration': 166.157, + 'release_date': '20251003', + 'release_timestamp': 1759449600.0, + 'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg', + 'timestamp': 1749473029.0, + 'track': 'Super Sonic', + 'track_id': '2475540375', + 'track_number': 3, + 'upload_date': '20250609', + 'uploader': 'Stay Inside', + 'uploader_id': 'stayinside', + 'uploader_url': 'https://stayinside.bandcamp.com', }, }] @@ -252,6 +277,7 @@ def _real_extract(self, url): 'album': embed.get('album_title'), 'album_artist': album_artist, 'formats': formats, + 'tags': traverse_obj(webpage, ({find_elements(cls='tag')}, ..., {clean_html})), } @@ -268,10 +294,10 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'id': '1353101989', 'ext': 'mp3', 'title': 'Blazo - Intro', + 'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg', 'timestamp': 1311756226, 'upload_date': '20110727', 'uploader': 'Blazo', - 'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg', 'album_artists': ['Blazo'], 'uploader_url': 'https://blazo.bandcamp.com', 'release_date': '20110727', @@ -291,6 +317,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'id': '38097443', 'ext': 'mp3', 'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)', + 'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg', 'timestamp': 1311757238, 'upload_date': '20110727', 'uploader': 'Blazo', @@ -304,7 +331,6 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'uploader_id': 'blazo', 'album_artists': ['Blazo'], 'artists': ['Blazo'], - 'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg', 'release_timestamp': 1311724800.0, }, }, diff --git a/yt_dlp/extractor/bellmedia.py b/yt_dlp/extractor/bellmedia.py deleted file mode 100644 index ac45dd4779..0000000000 --- a/yt_dlp/extractor/bellmedia.py +++ /dev/null @@ -1,91 +0,0 @@ -from .common import InfoExtractor - - -class BellMediaIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:www\.)? - (?P - (?: - ctv| - tsn| - bnn(?:bloomberg)?| - thecomedynetwork| - discovery| - discoveryvelocity| - sciencechannel| - investigationdiscovery| - animalplanet| - bravo| - mtv| - space| - etalk| - marilyn - )\.ca| - (?:much|cp24)\.com - )/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P[0-9]{6,})''' - _TESTS = [{ - 'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070', - 'md5': '3e5b8e38370741d5089da79161646635', - 'info_dict': { - 'id': '1403070', - 'ext': 'flv', - 'title': 'David Cockfield\'s Top Picks', - 'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3', - 'upload_date': '20180525', - 'timestamp': 1527288600, - 'season_id': '73997', - 'season': '2018', - 'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg', - 'tags': [], - 'categories': ['ETFs'], - 'season_number': 8, - 'duration': 272.038, - 'series': 'Market Call Tonight', - }, - }, { - 'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582', - 'only_matching': True, - }, { - 'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549', - 'only_matching': True, - }, { - 'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654', - 'only_matching': True, - }, { - 'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009', - 'only_matching': True, - }, { - 'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016', - 'only_matching': True, - }, { - 'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6', - 'only_matching': True, - }, { - 'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430', - 'only_matching': True, - }, { - 'url': 'http://www.etalk.ca/video?videoid=663455', - 'only_matching': True, - }, { - 'url': 'https://www.cp24.com/video?clipId=1982548', - 'only_matching': True, - }] - _DOMAINS = { - 'thecomedynetwork': 'comedy', - 'discoveryvelocity': 'discvel', - 'sciencechannel': 'discsci', - 'investigationdiscovery': 'invdisc', - 'animalplanet': 'aniplan', - 'etalk': 'ctv', - 'bnnbloomberg': 'bnn', - 'marilyn': 'ctv_marilyn', - } - - def _real_extract(self, url): - domain, video_id = self._match_valid_url(url).groups() - domain = domain.split('.')[0] - return { - '_type': 'url_transparent', - 'id': video_id, - 'url': f'9c9media:{self._DOMAINS.get(domain, domain)}_web:{video_id}', - 'ie_key': 'NineCNineMedia', - } diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 0c6535fc72..d00ac63176 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -353,7 +353,7 @@ class BiliBiliIE(BilibiliBaseIE): 'id': 'BV1bK411W797', 'title': '物语中的人物是如何吐槽自己的OP的', }, - 'playlist_count': 18, + 'playlist_count': 23, 'playlist': [{ 'info_dict': { 'id': 'BV1bK411W797_p1', @@ -373,6 +373,7 @@ class BiliBiliIE(BilibiliBaseIE): '_old_archive_ids': ['bilibili 498159642_part1'], }, }], + 'params': {'playlist_items': '2'}, }, { 'note': 'Specific page of Anthology', 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1', @@ -899,13 +900,26 @@ def _real_extract(self, url): 'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id}, headers=headers)) - geo_blocked = traverse_obj(play_info, ( - ('result', ('raw', 'data')), 'plugins', - lambda _, v: v['name'] == 'AreaLimitPanel', - 'config', 'is_block', {bool}, any)) - premium_only = play_info.get('code') == -10403 + # play_info can be structured in at least three different ways, e.g.: + # 1.) play_info['result']['video_info'] and play_info['code'] + # 2.) play_info['raw']['data']['video_info'] and play_info['code'] + # 3.) play_info['data']['result']['video_info'] and play_info['data']['code'] + # So we need to transform any of the above into a common structure + status_code = play_info.get('code') + if 'raw' in play_info: + play_info = play_info['raw'] + if 'data' in play_info: + play_info = play_info['data'] + if status_code is None: + status_code = play_info.get('code') + if 'result' in play_info: + play_info = play_info['result'] - video_info = traverse_obj(play_info, (('result', ('raw', 'data')), 'video_info', {dict}, any)) or {} + geo_blocked = traverse_obj(play_info, ( + 'plugins', lambda _, v: v['name'] == 'AreaLimitPanel', 'config', 'is_block', {bool}, any)) + premium_only = status_code == -10403 + + video_info = traverse_obj(play_info, ('video_info', {dict})) or {} formats = self.extract_formats(video_info) if not formats: @@ -915,8 +929,8 @@ def _real_extract(self, url): self.raise_login_required('This video is for premium members only') if traverse_obj(play_info, (( - ('result', 'play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE' - (('result', ('raw', 'data')), 'play_video_type'), # 'preview' vs 'whole' vs 'none' + ('play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE' vs 'PLAY_NONE' + 'play_video_type', # 'preview' vs 'whole' vs 'none' ), any, {lambda x: x in ('PLAY_PREVIEW', 'preview')})): self.report_warning( 'Only preview format is available, ' @@ -1002,6 +1016,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE): 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], + 'params': {'playlist_items': '2'}, }] def _real_extract(self, url): @@ -1057,6 +1072,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE): 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], + 'params': {'playlist_items': '2'}, }] def _real_extract(self, url): @@ -1847,7 +1863,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE): 'thumbnail': r're:^https?://.+\.jpg', 'timestamp': 1564836614, 'upload_date': '20190803', - 'uploader': 'tsukimi-つきみぐー', + 'uploader': '十六夜tsukimiつきみぐ', 'view_count': int, }, } @@ -1902,10 +1918,10 @@ class BilibiliAudioAlbumIE(BilibiliAudioBaseIE): 'url': 'https://www.bilibili.com/audio/am10624', 'info_dict': { 'id': '10624', - 'title': '每日新曲推荐(每日11:00更新)', + 'title': '新曲推荐', 'description': '每天11:00更新,为你推送最新音乐', }, - 'playlist_count': 19, + 'playlist_count': 16, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index 535890979b..c14ff1f142 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -1,16 +1,27 @@ from .common import InfoExtractor -from ..utils import parse_iso8601 +from ..utils import ( + UnsupportedError, + float_or_none, + int_or_none, + join_nonempty, + jwt_decode_hs256, + mimetype2ext, + parse_iso8601, + parse_qs, + url_or_none, +) +from ..utils.traversal import traverse_obj class BlackboardCollaborateIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// - (?P[a-z-]+)\.bbcollab\.com/ + (?P[a-z]+)(?:-lti)?\.bbcollab\.com/ (?: collab/ui/session/playback/load| recording )/ - (?P[^/]+)''' + (?P[^/?#]+)''' _TESTS = [ { 'url': 'https://us-lti.bbcollab.com/collab/ui/session/playback/load/0a633b6a88824deb8c918f470b22b256', @@ -19,9 +30,55 @@ class BlackboardCollaborateIE(InfoExtractor): 'id': '0a633b6a88824deb8c918f470b22b256', 'title': 'HESI A2 Information Session - Thursday, May 6, 2021 - recording_1', 'ext': 'mp4', - 'duration': 1896000, - 'timestamp': 1620331399, + 'duration': 1896, + 'timestamp': 1620333295, 'upload_date': '20210506', + 'subtitles': { + 'live_chat': 'mincount:1', + }, + }, + }, + { + 'url': 'https://eu.bbcollab.com/collab/ui/session/playback/load/4bde2dee104f40289a10f8e554270600', + 'md5': '108db6a8f83dcb0c2a07793649581865', + 'info_dict': { + 'id': '4bde2dee104f40289a10f8e554270600', + 'title': 'Meeting - Azerbaycanca erize formasi', + 'ext': 'mp4', + 'duration': 880, + 'timestamp': 1671176868, + 'upload_date': '20221216', + }, + }, + { + 'url': 'https://eu.bbcollab.com/recording/f83be390ecff46c0bf7dccb9dddcf5f6', + 'md5': 'e3b0b88ddf7847eae4b4c0e2d40b83a5', + 'info_dict': { + 'id': 'f83be390ecff46c0bf7dccb9dddcf5f6', + 'title': 'Keynote lecture by Laura Carvalho - recording_1', + 'ext': 'mp4', + 'duration': 5506, + 'timestamp': 1662721705, + 'upload_date': '20220909', + 'subtitles': { + 'live_chat': 'mincount:1', + }, + }, + }, + { + 'url': 'https://eu.bbcollab.com/recording/c3e1e7c9e83d4cd9981c93c74888d496', + 'md5': 'fdb2d8c43d66fbc0b0b74ef5e604eb1f', + 'info_dict': { + 'id': 'c3e1e7c9e83d4cd9981c93c74888d496', + 'title': 'International Ally User Group - recording_18', + 'ext': 'mp4', + 'duration': 3479, + 'timestamp': 1721919621, + 'upload_date': '20240725', + 'subtitles': { + 'en': 'mincount:1', + 'live_chat': 'mincount:1', + }, }, }, { @@ -42,22 +99,81 @@ class BlackboardCollaborateIE(InfoExtractor): }, ] + def _call_api(self, region, video_id, path=None, token=None, note=None, fatal=False): + # Ref: https://github.com/blackboard/BBDN-Collab-Postman-REST + return self._download_json( + join_nonempty(f'https://{region}.bbcollab.com/collab/api/csa/recordings', video_id, path, delim='/'), + video_id, note or 'Downloading JSON metadata', fatal=fatal, + headers={'Authorization': f'Bearer {token}'} if token else None) + def _real_extract(self, url): mobj = self._match_valid_url(url) region = mobj.group('region') video_id = mobj.group('id') - info = self._download_json( - f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/data', video_id) - duration = info.get('duration') - title = info['name'] - upload_date = info.get('created') - streams = info['streams'] - formats = [{'format_id': k, 'url': url} for k, url in streams.items()] + token = parse_qs(url).get('authToken', [None])[-1] + + video_info = self._call_api(region, video_id, path='data/secure', token=token, note='Trying auth token') + if video_info: + video_extra = self._call_api(region, video_id, token=token, note='Retrieving extra attributes') + else: + video_info = self._call_api(region, video_id, path='data', note='Trying fallback', fatal=True) + video_extra = {} + + formats = traverse_obj(video_info, ('extStreams', lambda _, v: url_or_none(v['streamUrl']), { + 'url': 'streamUrl', + 'ext': ('contentType', {mimetype2ext}), + 'aspect_ratio': ('aspectRatio', {float_or_none}), + })) + + if filesize := traverse_obj(video_extra, ('storageSize', {int_or_none})): + for fmt in formats: + fmt['filesize'] = filesize + + subtitles = {} + for subs in traverse_obj(video_info, ('subtitles', lambda _, v: url_or_none(v['url']))): + subtitles.setdefault(subs.get('lang') or 'und', []).append({ + 'name': traverse_obj(subs, ('label', {str})), + 'url': subs['url'], + }) + + for live_chat_url in traverse_obj(video_info, ('chats', ..., 'url', {url_or_none})): + subtitles.setdefault('live_chat', []).append({'url': live_chat_url}) return { - 'duration': duration, + **traverse_obj(video_info, { + 'title': ('name', {str}), + 'timestamp': ('created', {parse_iso8601}), + 'duration': ('duration', {int_or_none(scale=1000)}), + }), 'formats': formats, 'id': video_id, - 'timestamp': parse_iso8601(upload_date), - 'title': title, + 'subtitles': subtitles, } + + +class BlackboardCollaborateLaunchIE(InfoExtractor): + _VALID_URL = r'https?://[a-z]+\.bbcollab\.com/launch/(?P[^/?#]+)' + + _TESTS = [ + { + 'url': 'https://au.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNzQwNDE2NDgzLCJpYXQiOjE3NDA0MTYxODMsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI3MzI4YzRjZTNmM2U0ZTcwYmY3MTY3N2RkZTgzMzk2NSIsImNvbnN1bWVySWQiOiJhM2Q3NGM0Y2QyZGU0MGJmODFkMjFlODNlMmEzNzM5MCIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.xuELw4EafEwUMoYcCHidGn4Tw9O1QCbYHzYGJUl0kKk', + 'only_matching': True, + }, + { + 'url': 'https://us.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNjk0NDgxOTc3LCJpYXQiOjE2OTQ0ODE2NzcsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI3YWU0MTFhNTU3NjU0OWFiOTZlYjVmMTM1YmY3MWU5MCIsImNvbnN1bWVySWQiOiJBRUU2MEI4MDI2QzM3ODU2RjMwMzNEN0ZEOTQzMTFFNSIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.yOhRZNaIjXYoMYMpcTzgjZJCnIFaYf2cAzbco8OAxlY', + 'only_matching': True, + }, + { + 'url': 'https://eu.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNzUyNjgyODYwLCJpYXQiOjE3NTI2ODI1NjAsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI4MjQzYjFiODg2Nzk0NTZkYjkwN2NmNDZmZmE1MmFhZiIsImNvbnN1bWVySWQiOiI5ZTY4NzYwZWJiNzM0MzRiYWY3NTQyZjA1YmJkOTMzMCIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.Xj4ymojYLwZ1vKPKZ-KxjpqQvFXoJekjRaG0npngwWs', + 'only_matching': True, + }, + ] + + def _real_extract(self, url): + token = self._match_id(url) + video_id = jwt_decode_hs256(token)['resourceAccessTicket']['resourceId'] + + redirect_url = self._request_webpage(url, video_id).url + if self.suitable(redirect_url): + raise UnsupportedError(redirect_url) + return self.url_result(redirect_url, BlackboardCollaborateIE, video_id) diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py index 1614b6f947..6ff72e70d2 100644 --- a/yt_dlp/extractor/blogger.py +++ b/yt_dlp/extractor/blogger.py @@ -19,8 +19,19 @@ class BloggerIE(InfoExtractor): 'id': 'BLOGGER-video-3c740e3a49197e16-796', 'title': 'BLOGGER-video-3c740e3a49197e16-796', 'ext': 'mp4', - 'thumbnail': r're:^https?://.*', 'duration': 76.068, + 'thumbnail': r're:https?://i9\.ytimg\.com/vi_blogger/.+', + }, + }] + _WEBPAGE_TESTS = [{ + 'url': 'https://blog.tomeuvizoso.net/2019/01/a-panfrost-milestone.html', + 'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac', + 'info_dict': { + 'id': 'BLOGGER-video-3c740e3a49197e16-12203', + 'ext': 'mp4', + 'title': 'BLOGGER-video-3c740e3a49197e16-12203', + 'duration': 76.068, + 'thumbnail': r're:https?://i9\.ytimg\.com/vi_blogger/.+', }, }] diff --git a/yt_dlp/extractor/btvplus.py b/yt_dlp/extractor/btvplus.py new file mode 100644 index 0000000000..531ace1471 --- /dev/null +++ b/yt_dlp/extractor/btvplus.py @@ -0,0 +1,73 @@ +from .common import InfoExtractor +from ..utils import ( + bug_reports_message, + clean_html, + get_element_by_class, + js_to_json, + mimetype2ext, + strip_or_none, + url_or_none, + urljoin, +) +from ..utils.traversal import traverse_obj + + +class BTVPlusIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?btvplus\.bg/produkt/(?:predavaniya|seriali|novini)/(?P\d+)' + _TESTS = [{ + 'url': 'https://btvplus.bg/produkt/predavaniya/67271/btv-reporterite/btv-reporterite-12-07-2025-g', + 'info_dict': { + 'ext': 'mp4', + 'id': '67271', + 'title': 'bTV Репортерите - 12.07.2025 г.', + 'thumbnail': 'https://cdn.btv.bg/media/images/940x529/Jul2025/2113606319.jpg', + }, + }, { + 'url': 'https://btvplus.bg/produkt/seriali/66942/sezon-2/plen-sezon-2-epizod-55', + 'info_dict': { + 'ext': 'mp4', + 'id': '66942', + 'title': 'Плен - сезон 2, епизод 55', + 'thumbnail': 'https://cdn.btv.bg/media/images/940x529/Jun2025/2113595104.jpg', + }, + }, { + 'url': 'https://btvplus.bg/produkt/novini/67270/btv-novinite-centralna-emisija-12-07-2025', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + player_url = self._search_regex( + r'var\s+videoUrl\s*=\s*[\'"]([^\'"]+)[\'"]', + webpage, 'player URL') + + player_config = self._download_json( + urljoin('https://btvplus.bg', player_url), video_id)['config'] + + videojs_data = self._search_json( + r'videojs\(["\'][^"\']+["\'],', player_config, 'videojs data', + video_id, transform_source=js_to_json) + formats = [] + subtitles = {} + for src in traverse_obj(videojs_data, ('sources', lambda _, v: url_or_none(v['src']))): + ext = mimetype2ext(src.get('type')) + if ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles( + src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + else: + self.report_warning(f'Unknown format type {ext}{bug_reports_message()}') + + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + 'title': ( + strip_or_none(self._og_search_title(webpage, default=None)) + or clean_html(get_element_by_class('product-title', webpage))), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), + 'description': self._og_search_description(webpage, default=None), + } diff --git a/yt_dlp/extractor/cloudflarestream.py b/yt_dlp/extractor/cloudflarestream.py index 9e9e89a801..a90fceaacf 100644 --- a/yt_dlp/extractor/cloudflarestream.py +++ b/yt_dlp/extractor/cloudflarestream.py @@ -19,18 +19,16 @@ class CloudflareStreamIE(InfoExtractor): 'id': '31c9291ab41fac05471db4e73aa11717', 'ext': 'mp4', 'title': '31c9291ab41fac05471db4e73aa11717', - 'thumbnail': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg', - }, - 'params': { - 'skip_download': 'm3u8', + 'thumbnail': r're:https?://cloudflarestream\.com/.+\.jpg', }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://watch.cloudflarestream.com/embed/sdk-iframe-integration.fla9.latest.js?video=0e8e040aec776862e1d632a699edf59e', 'info_dict': { 'id': '0e8e040aec776862e1d632a699edf59e', 'ext': 'mp4', 'title': '0e8e040aec776862e1d632a699edf59e', - 'thumbnail': 'https://cloudflarestream.com/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg', + 'thumbnail': r're:https?://cloudflarestream\.com/.+\.jpg', }, }, { 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', @@ -54,11 +52,21 @@ class CloudflareStreamIE(InfoExtractor): 'id': 'eaef9dea5159cf968be84241b5cedfe7', 'ext': 'mp4', 'title': 'eaef9dea5159cf968be84241b5cedfe7', - 'thumbnail': 'https://cloudflarestream.com/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg', + 'thumbnail': r're:https?://cloudflarestream\.com/.+\.jpg', }, 'params': { + 'extractor_args': {'generic': {'impersonate': ['chrome']}}, 'skip_download': 'm3u8', }, + }, { + # FIXME: Embed detection + 'url': 'https://www.cloudflare.com/developer-platform/products/cloudflare-stream/', + 'info_dict': { + 'id': 'e7bd2dd67e0f8860b4ae81e33a966049', + 'ext': 'mp4', + 'title': 'e7bd2dd67e0f8860b4ae81e33a966049', + 'thumbnail': r're:https?://cloudflarestream\.com/.+\.jpg', + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index b75e806233..4a4b5416d0 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -38,7 +38,6 @@ TransportError, network_exceptions, ) -from ..networking.impersonate import ImpersonateTarget from ..utils import ( IDENTITY, JSON_LD_RE, @@ -259,6 +258,11 @@ class InfoExtractor: * key The key (as hex) used to decrypt fragments. If `key` is given, any key URI will be ignored * iv The IV (as hex) used to decrypt fragments + * impersonate Impersonate target(s). Can be any of the following entities: + * an instance of yt_dlp.networking.impersonate.ImpersonateTarget + * a string in the format of CLIENT[:OS] + * a list or a tuple of CLIENT[:OS] strings or ImpersonateTarget instances + * a boolean value; True means any impersonate target is sufficient * downloader_options A dictionary of downloader options (For internal use only) * http_chunk_size Chunk size for HTTP downloads @@ -336,6 +340,7 @@ class InfoExtractor: * "name": Name or description of the subtitles * "http_headers": A dictionary of additional HTTP headers to add to the request. + * "impersonate": Impersonate target(s); same as the "formats" field "ext" will be calculated from URL if missing automatic_captions: Like 'subtitles'; contains automatically generated captions instead of normal subtitles @@ -392,6 +397,8 @@ class InfoExtractor: chapters: A list of dictionaries, with the following entries: * "start_time" - The start time of the chapter in seconds * "end_time" - The end time of the chapter in seconds + (optional: core code can determine this value from + the next chapter's start_time or the video's duration) * "title" (optional, string) heatmap: A list of dictionaries, with the following entries: * "start_time" - The start time of the data point in seconds @@ -406,7 +413,8 @@ class InfoExtractor: 'unlisted' or 'public'. Use 'InfoExtractor._availability' to set it media_type: The type of media as classified by the site, e.g. "episode", "clip", "trailer" - _old_archive_ids: A list of old archive ids needed for backward compatibility + _old_archive_ids: A list of old archive ids needed for backward + compatibility. Use yt_dlp.utils.make_archive_id to generate ids _format_sort_fields: A list of fields to use for sorting formats __post_extractor: A function to be called just before the metadata is written to either disk, logger or console. The function @@ -884,26 +892,17 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa extensions = {} - if impersonate in (True, ''): - impersonate = ImpersonateTarget() - requested_targets = [ - t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t) - for t in variadic(impersonate) - ] if impersonate else [] - - available_target = next(filter(self._downloader._impersonate_target_available, requested_targets), None) + available_target, requested_targets = self._downloader._parse_impersonate_targets(impersonate) if available_target: extensions['impersonate'] = available_target elif requested_targets: - message = 'The extractor is attempting impersonation, but ' - message += ( - 'no impersonate target is available' if not str(impersonate) - else f'none of these impersonate targets are available: "{", ".join(map(str, requested_targets))}"') - info_msg = ('see https://github.com/yt-dlp/yt-dlp#impersonation ' - 'for information on installing the required dependencies') + msg = 'The extractor is attempting impersonation' if require_impersonation: - raise ExtractorError(f'{message}; {info_msg}', expected=True) - self.report_warning(f'{message}; if you encounter errors, then {info_msg}', only_once=True) + raise ExtractorError( + self._downloader._unavailable_targets_message(requested_targets, note=msg, is_error=True), + expected=True) + self.report_warning( + self._downloader._unavailable_targets_message(requested_targets, note=msg), only_once=True) try: return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query, extensions)) @@ -1783,6 +1782,59 @@ def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAU r']+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data', video_id, end_pattern='', fatal=fatal, default=default, **kw) + def _search_nextjs_v13_data(self, webpage, video_id, fatal=True): + """Parses Next.js app router flight data that was introduced in Next.js v13""" + nextjs_data = {} + if not fatal and not isinstance(webpage, str): + return nextjs_data + + def flatten(flight_data): + if not isinstance(flight_data, list): + return + if len(flight_data) == 4 and flight_data[0] == '$': + _, name, _, data = flight_data + if not isinstance(data, dict): + return + children = data.pop('children', None) + if data and isinstance(name, str) and re.fullmatch(r'\$L[0-9a-f]+', name): + # It is useful hydration JSON data + nextjs_data[name[2:]] = data + flatten(children) + return + for f in flight_data: + flatten(f) + + flight_text = '' + # The pattern for the surrounding JS/tag should be strict as it's a hardcoded string in the next.js source + # Ref: https://github.com/vercel/next.js/blob/5a4a08fdc/packages/next/src/server/app-render/use-flight-response.tsx#L189 + for flight_segment in re.findall(r']*>self\.__next_f\.push\((\[.+?\])\)', webpage): + segment = self._parse_json(flight_segment, video_id, fatal=fatal, errnote=None if fatal else False) + # Some earlier versions of next.js "optimized" away this array structure; this is unsupported + # Ref: https://github.com/vercel/next.js/commit/0123a9d5c9a9a77a86f135b7ae30b46ca986d761 + if not isinstance(segment, list) or len(segment) != 2: + self.write_debug( + f'{video_id}: Unsupported next.js flight data structure detected', only_once=True) + continue + # Only use the relevant payload type (1 == data) + # Ref: https://github.com/vercel/next.js/blob/5a4a08fdc/packages/next/src/server/app-render/use-flight-response.tsx#L11-L14 + payload_type, chunk = segment + if payload_type == 1: + flight_text += chunk + + for f in flight_text.splitlines(): + prefix, _, body = f.lstrip().partition(':') + if not re.fullmatch(r'[0-9a-f]+', prefix): + continue + # The body still isn't guaranteed to be valid JSON, so parsing should always be non-fatal + if body.startswith('[') and body.endswith(']'): + flatten(self._parse_json(body, video_id, fatal=False, errnote=False)) + elif body.startswith('{') and body.endswith('}'): + data = self._parse_json(body, video_id, fatal=False, errnote=False) + if data is not None: + nextjs_data[prefix] = data + + return nextjs_data + def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)): """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function""" rectx = re.escape(context_name) diff --git a/yt_dlp/extractor/condenast.py b/yt_dlp/extractor/condenast.py index 0c84cfdab7..318fa8943b 100644 --- a/yt_dlp/extractor/condenast.py +++ b/yt_dlp/extractor/condenast.py @@ -96,6 +96,24 @@ class CondeNastIE(InfoExtractor): 'upload_date': '20150916', 'timestamp': 1442434920, }, + }, { + # FIXME: Subtitles + 'url': 'https://www.vanityfair.com/video/watch/vf-quiz-show-squid-game-s3', + 'info_dict': { + 'id': '6862f999c1afbc5ff06b4803', + 'ext': 'mp4', + 'title': '\'Squid Game\' Cast Tests How Well They Know Each Other', + 'categories': ['Arts & Culture', 'Hollywood'], + 'description': 'md5:7a9c668a1fc87648e77da13842ec1534', + 'duration': 955, + 'season': 'Season 1', + 'series': 'Quizzing Each Other', + 'tags': 'count:2', + 'thumbnail': r're:https?://dwgyu36up6iuz\.cloudfront\.net/.+\.jpg', + 'timestamp': 1751341306, + 'upload_date': '20250701', + 'uploader': 'vanityfair', + }, }, { 'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player', 'only_matching': True, diff --git a/yt_dlp/extractor/crooksandliars.py b/yt_dlp/extractor/crooksandliars.py index abd3322a95..29bbc2fe6a 100644 --- a/yt_dlp/extractor/crooksandliars.py +++ b/yt_dlp/extractor/crooksandliars.py @@ -8,7 +8,6 @@ class CrooksAndLiarsIE(InfoExtractor): _VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P[A-Za-z0-9]+)' _EMBED_REGEX = [r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1'] - _TESTS = [{ 'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi', 'info_dict': { @@ -16,7 +15,7 @@ class CrooksAndLiarsIE(InfoExtractor): 'ext': 'mp4', 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!', 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', - 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnail': r're:https?://crooksandliars\.com/files/.+', 'timestamp': 1428207000, 'upload_date': '20150405', 'uploader': 'Heather', @@ -26,6 +25,20 @@ class CrooksAndLiarsIE(InfoExtractor): 'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + 'url': 'https://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists', + 'info_dict': { + 'id': '8RUoRhRi', + 'ext': 'mp4', + 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!', + 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', + 'duration': 236, + 'thumbnail': r're:https?://crooksandliars\.com/files/.+', + 'timestamp': 1428207000, + 'upload_date': '20150405', + 'uploader': 'Heather', + }, + }] def _real_extract(self, url): video_id = self._match_id(url) diff --git a/yt_dlp/extractor/ctv.py b/yt_dlp/extractor/ctv.py deleted file mode 100644 index a41dab11b1..0000000000 --- a/yt_dlp/extractor/ctv.py +++ /dev/null @@ -1,49 +0,0 @@ -from .common import InfoExtractor - - -class CTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P(?:show|movie)s/[^/]+/[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88', - 'info_dict': { - 'id': '2102249', - 'ext': 'flv', - 'title': 'Wednesday, December 23, 2020', - 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.', - 'timestamp': 1608732000, - 'upload_date': '20201223', - 'series': 'Your Morning', - 'season': '2020-2021', - 'season_number': 5, - 'episode_number': 88, - 'tags': ['Your Morning'], - 'categories': ['Talk Show'], - 'duration': 7467.126, - }, - }, { - 'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - content = self._download_json( - 'https://www.ctv.ca/space-graphql/graphql', display_id, query={ - 'query': '''{ - resolvedPath(path: "/%s") { - lastSegment { - content { - ... on AxisContent { - axisId - videoPlayerDestCode - } - } - } - } -}''' % display_id, # noqa: UP031 - })['data']['resolvedPath']['lastSegment']['content'] - video_id = content['axisId'] - return self.url_result( - '9c9media:{}:{}'.format(content['videoPlayerDestCode'], video_id), - 'NineCNineMedia', video_id) diff --git a/yt_dlp/extractor/dailymail.py b/yt_dlp/extractor/dailymail.py index 540676ac0f..3058a0f7b5 100644 --- a/yt_dlp/extractor/dailymail.py +++ b/yt_dlp/extractor/dailymail.py @@ -19,11 +19,22 @@ class DailyMailIE(InfoExtractor): 'ext': 'mp4', 'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'', 'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84', + 'thumbnail': r're:https?://i\.dailymail\.co\.uk/.+\.jpg', }, }, { 'url': 'http://www.dailymail.co.uk/embed/video/1295863.html', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + 'url': 'https://www.daily-news.gr/lifestyle/%ce%b7-%cf%84%cf%81%ce%b1%ce%b3%ce%bf%cf%85%ce%b4%ce%af%cf%83%cf%84%cf%81%ce%b9%ce%b1-jessie-j-%ce%bc%ce%bf%ce%b9%cf%81%ce%ac%cf%83%cf%84%ce%b7%ce%ba%ce%b5-%cf%83%cf%85%ce%b3%ce%ba%ce%bb%ce%bf%ce%bd/', + 'info_dict': { + 'id': '3463585', + 'ext': 'mp4', + 'title': 'Jessie J reveals she has undergone surgery as she shares clips', + 'description': 'md5:9fa9a25feca5b656b0b4a39c922fad1e', + 'thumbnail': r're:https?://i\.dailymail\.co\.uk/.+\.jpg', + }, + }] def _real_extract(self, url): video_id = self._match_id(url) diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py index a81f0a26dd..d27a027702 100644 --- a/yt_dlp/extractor/dailymotion.py +++ b/yt_dlp/extractor/dailymotion.py @@ -119,13 +119,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor): _EMBED_REGEX = [rf'(?ix)<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)["\'](?P{_VALID_URL[5:]})'] _TESTS = [{ 'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news', - 'md5': '074b95bdee76b9e3654137aee9c79dfe', 'info_dict': { 'id': 'x5kesuj', 'ext': 'mp4', 'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller', 'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller', 'duration': 187, + 'tags': 'count:5', + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'timestamp': 1493651285, 'upload_date': '20170501', 'uploader': 'Deadline', @@ -133,18 +134,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'age_limit': 0, 'view_count': int, 'like_count': int, - 'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'], - 'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1cmt4ZcZ9KiM/x1080', }, }, { 'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true', - 'md5': 'e2f9717c6604773f963f069ca53a07f8', 'info_dict': { 'id': 'x89eyek', 'ext': 'mp4', - 'title': "En quête d'esprit du 27/03/2022", + 'title': 'En quête d\'esprit du 27/03/2022', 'description': 'md5:66542b9f4df2eb23f314fc097488e553', 'duration': 2756, + 'tags': 'count:1', + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'timestamp': 1648383669, 'upload_date': '20220327', 'uploader': 'CNEWS', @@ -152,8 +152,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'age_limit': 0, 'view_count': int, 'like_count': int, - 'tags': ['en_quete_d_esprit'], - 'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1clTH6StrxMP/x1080', }, }, { 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames', @@ -163,8 +161,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'ext': 'mp4', 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News', 'description': 'Several come bundled with the Steam Controller.', - 'thumbnail': r're:^https?:.*\.(?:jpg|png)$', 'duration': 74, + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'timestamp': 1425657362, 'upload_date': '20150306', 'uploader': 'IGN', @@ -183,10 +181,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'uploader': 'Katy Perry', 'upload_date': '20130905', }, - 'params': { - 'skip_download': True, - }, - 'skip': 'VEVO is only available in some countries', + 'skip': 'Invalid URL', }, { # age-restricted video 'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband', @@ -259,9 +254,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'uploader_id': 'x2vtgmm', 'age_limit': 0, 'tags': [], + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'view_count': int, 'like_count': int, - 'thumbnail': r're:https://\w+.dmcdn.net/v/WnEY61cmvMxt2Fi6d/x1080', }, }, { # https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj @@ -276,18 +271,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'info_dict': { 'id': 'x8u4owg', 'ext': 'mp4', + 'description': 'À bord du « véloto », l’alternative à la voiture pour la campagne', 'like_count': int, 'uploader': 'Le Parisien', - 'thumbnail': 'https://www.leparisien.fr/resizer/ho_GwveeYftNkLwg_cEta--5Bv4=/1200x675/cloudfront-eu-central-1.images.arcpublishing.com/leparisien/BFXJNEBN75EUNHGYJLORUC3TX4.jpg', 'upload_date': '20240309', 'view_count': int, + 'tags': 'count:7', + 'thumbnail': r're:https?://www\.leparisien\.fr/.+\.jpg', 'timestamp': 1709997866, 'age_limit': 0, 'uploader_id': 'x32f7b', 'title': 'VIDÉO. Le «\xa0véloto\xa0», la voiture à pédales qui aimerait se faire une place sur les routes', 'duration': 428.0, - 'description': 'À bord du « véloto », l’alternative à la voiture pour la campagne', - 'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'], }, }, { # https://geo.dailymotion.com/player/xry80.html?video=x8vu47w @@ -297,9 +292,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'ext': 'mp4', 'like_count': int, 'uploader': 'Metatube', - 'thumbnail': r're:https://\w+.dmcdn.net/v/W1G_S1coGSFTfkTeR/x1080', 'upload_date': '20240326', 'view_count': int, + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'timestamp': 1711496732, 'age_limit': 0, 'uploader_id': 'x2xpy74', @@ -308,6 +303,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'description': 'Que lindura', 'tags': [], }, + 'skip': 'Invalid URL', }, { # //geo.dailymotion.com/player/xysxq.html?video=k2Y4Mjp7krAF9iCuINM 'url': 'https://lcp.fr/programmes/avant-la-catastrophe-la-naissance-de-la-dictature-nazie-1933-1936-346819', @@ -322,11 +318,30 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'like_count': int, 'age_limit': 0, 'duration': 3220, - 'thumbnail': 'https://s1.dmcdn.net/v/Xvumk1djJBUZfjj2a/x1080', 'tags': [], + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', 'timestamp': 1739919947, 'upload_date': '20250218', }, + 'skip': 'Invalid URL', + }, { + 'url': 'https://forum.ionicframework.com/t/ionic-2-jw-player-dailymotion-player/83248', + 'info_dict': { + 'id': 'xwr14q', + 'ext': 'mp4', + 'title': 'Macklemore & Ryan Lewis - Thrift Shop (feat. Wanz)', + 'age_limit': 0, + 'description': 'md5:47fbe168b5a6ddc4a205e20dd6c841b2', + 'duration': 234, + 'like_count': int, + 'tags': 'count:5', + 'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+', + 'timestamp': 1358177670, + 'upload_date': '20130114', + 'uploader': 'Macklemore Official', + 'uploader_id': 'x19qlwr', + 'view_count': int, + }, }] _GEO_BYPASS = False _COMMON_MEDIA_FIELDS = '''description @@ -540,7 +555,7 @@ class DailymotionSearchIE(DailymotionPlaylistBaseIE): 'id': 'king of turtles', 'title': 'king of turtles', }, - 'playlist_mincount': 90, + 'playlist_mincount': 0, }] _SEARCH_QUERY = 'query SEARCH_QUERY( $query: String! $page: Int $limit: Int ) { search { videos( query: $query first: $limit page: $page ) { edges { node { xid } } } } } ' @@ -584,7 +599,7 @@ class DailymotionUserIE(DailymotionPlaylistBaseIE): 'info_dict': { 'id': 'nqtv', }, - 'playlist_mincount': 152, + 'playlist_mincount': 148, }, { 'url': 'http://www.dailymotion.com/user/UnderProject', 'info_dict': { diff --git a/yt_dlp/extractor/dangalplay.py b/yt_dlp/extractor/dangalplay.py index f7b243234a..3b0dc1f607 100644 --- a/yt_dlp/extractor/dangalplay.py +++ b/yt_dlp/extractor/dangalplay.py @@ -11,8 +11,14 @@ class DangalPlayBaseIE(InfoExtractor): _NETRC_MACHINE = 'dangalplay' + _REGION = 'IN' _OTV_USER_ID = None - _LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage' + _LOGIN_HINT = ( + 'Pass credentials as -u "token" -p "USER_ID" ' + '(where USER_ID is the value of "otv_user_id" in your browser local storage). ' + 'Your login region can be optionally suffixed to the username as @REGION ' + '(where REGION is the two-letter "region" code found in your browser local storage), ' + 'e.g.: -u "token@IN" -p "USER_ID"') _API_BASE = 'https://ottapi.dangalplay.com' _AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js _SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above @@ -20,8 +26,12 @@ class DangalPlayBaseIE(InfoExtractor): def _perform_login(self, username, password): if self._OTV_USER_ID: return - if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password): + mobj = re.fullmatch(r'token(?:@(?P[A-Z]{2}))?', username) + if not mobj or not re.fullmatch(r'[\da-f]{32}', password): raise ExtractorError(self._LOGIN_HINT, expected=True) + if region := mobj.group('region'): + self._REGION = region + self.write_debug(f'Setting login region to "{self._REGION}"') self._OTV_USER_ID = password def _real_initialize(self): @@ -52,7 +62,7 @@ def _call_api(self, path, display_id, note='Downloading JSON metadata', fatal=Tr f'{self._API_BASE}/{path}', display_id, note, fatal=fatal, headers={'Accept': 'application/json'}, query={ 'auth_token': self._AUTH_TOKEN, - 'region': 'IN', + 'region': self._REGION, **query, }) @@ -106,7 +116,7 @@ def _generate_api_data(self, data): 'catalog_id': catalog_id, 'content_id': content_id, 'category': '', - 'region': 'IN', + 'region': self._REGION, 'auth_token': self._AUTH_TOKEN, 'id': self._OTV_USER_ID, 'md5': hashlib.md5(unhashed.encode()).hexdigest(), @@ -129,11 +139,14 @@ def _real_extract(self, url): except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 422: error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {} - if error_info.get('code') == '1016': + error_code = error_info.get('code') + if error_code == '1016': self.raise_login_required( f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None) - elif msg := error_info.get('message'): - raise ExtractorError(msg) + elif error_code == '4028': + self.raise_login_required( + f'Your login region is unspecified or incorrect. {self._LOGIN_HINT}', method=None) + raise ExtractorError(join_nonempty(error_code, error_info.get('message'), delim=': ')) raise m3u8_url = traverse_obj(details, ( diff --git a/yt_dlp/extractor/dbtv.py b/yt_dlp/extractor/dbtv.py index 795fbacc41..ff93c642d2 100644 --- a/yt_dlp/extractor/dbtv.py +++ b/yt_dlp/extractor/dbtv.py @@ -12,13 +12,13 @@ class DBTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', 'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f', - 'thumbnail': r're:https?://.*\.jpg', + 'thumbnail': r're:https?://.+\.jpg', 'upload_date': '20160916', 'duration': 69, 'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ', 'uploader': 'Dagbladet', }, - 'add_ie': ['Youtube'], + 'skip': 'Invalid URL', }, { 'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false', 'only_matching': True, @@ -26,6 +26,20 @@ class DBTVIE(InfoExtractor): 'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + # FIXME: Embed detection + 'url': 'https://www.dagbladet.no/nyheter/rekordstort-russisk-angrep/83325693', + 'info_dict': { + 'id': '1HW7fYry', + 'ext': 'mp4', + 'title': 'Putin taler - så skjer dette', + 'description': 'md5:3e8bacee33de861a9663d9a3fcc54e5e', + 'display_id': 'putin-taler-sa-skjer-dette', + 'thumbnail': r're:https?://cdn\.jwplayer\.com/v2/media/.+', + 'timestamp': 1751043600, + 'upload_date': '20250627', + }, + }] def _real_extract(self, url): display_id, video_id = self._match_valid_url(url).groups() diff --git a/yt_dlp/extractor/eagleplatform.py b/yt_dlp/extractor/eagleplatform.py deleted file mode 100644 index 685f8c0590..0000000000 --- a/yt_dlp/extractor/eagleplatform.py +++ /dev/null @@ -1,215 +0,0 @@ -import functools -import re - -from .common import InfoExtractor -from ..networking.exceptions import HTTPError -from ..utils import ( - ExtractorError, - int_or_none, - smuggle_url, - unsmuggle_url, - url_or_none, -) - - -class EaglePlatformIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - eagleplatform:(?P[^/]+):| - https?://(?P.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id= - ) - (?P\d+) - ''' - _EMBED_REGEX = [r']+src=(["\'])(?P(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1'] - _TESTS = [{ - # http://lenta.ru/news/2015/03/06/navalny/ - 'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201', - # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used - 'info_dict': { - 'id': '227304', - 'ext': 'mp4', - 'title': 'Навальный вышел на свободу', - 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 87, - 'view_count': int, - 'age_limit': 0, - }, - }, { - # http://muz-tv.ru/play/7129/ - # http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true - 'url': 'eagleplatform:media.clipyou.ru:12820', - 'md5': '358597369cf8ba56675c1df15e7af624', - 'info_dict': { - 'id': '12820', - 'ext': 'mp4', - 'title': "'O Sole Mio", - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 216, - 'view_count': int, - }, - 'skip': 'Georestricted', - }, { - # referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/) - 'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306', - 'only_matching': True, - }] - - @classmethod - def _extract_embed_urls(cls, url, webpage): - add_referer = functools.partial(smuggle_url, data={'referrer': url}) - - res = tuple(super()._extract_embed_urls(url, webpage)) - if res: - return map(add_referer, res) - - PLAYER_JS_RE = r''' - ]+ - src=(?P["\'])(?:https?:)?//(?P(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs) - .+? - ''' - # "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/) - mobj = re.search( - rf'''(?xs) - {PLAYER_JS_RE} - ]+ - class=(?P["\'])eagleplayer(?P=qclass)[^>]+ - data-id=["\'](?P\d+) - ''', webpage) - if mobj is not None: - return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))] - # Generalization of "Javascript code usage", "Combined usage" and - # "Usage without attaching to DOM" embeddings (see - # http://dultonmedia.github.io/eplayer/) - mobj = re.search( - r'''(?xs) - %s - - ''' % PLAYER_JS_RE, webpage) # noqa: UP031 - if mobj is not None: - return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))] - - @staticmethod - def _handle_error(response): - status = int_or_none(response.get('status', 200)) - if status != 200: - raise ExtractorError(' '.join(response['errors']), expected=True) - - def _download_json(self, url_or_request, video_id, *args, **kwargs): - try: - response = super()._download_json( - url_or_request, video_id, *args, **kwargs) - except ExtractorError as ee: - if isinstance(ee.cause, HTTPError): - response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id) - self._handle_error(response) - raise - return response - - def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'): - return self._download_json(url_or_request, video_id, note)['data'][0] - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - - mobj = self._match_valid_url(url) - host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id') - - headers = {} - query = { - 'id': video_id, - } - - referrer = smuggled_data.get('referrer') - if referrer: - headers['Referer'] = referrer - query['referrer'] = referrer - - player_data = self._download_json( - f'http://{host}/api/player_data', video_id, - headers=headers, query=query) - - media = player_data['data']['playlist']['viewports'][0]['medialist'][0] - - title = media['title'] - description = media.get('description') - thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:') - duration = int_or_none(media.get('duration')) - view_count = int_or_none(media.get('views')) - - age_restriction = media.get('age_restriction') - age_limit = None - if age_restriction: - age_limit = 0 if age_restriction == 'allow_all' else 18 - - secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:') - - formats = [] - - m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON') - m3u8_formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False) - formats.extend(m3u8_formats) - - m3u8_formats_dict = {} - for f in m3u8_formats: - if f.get('height') is not None: - m3u8_formats_dict[f['height']] = f - - mp4_data = self._download_json( - # Secure mp4 URL is constructed according to Player.prototype.mp4 from - # http://lentaru.media.eagleplatform.com/player/player.js - re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8), - video_id, 'Downloading mp4 JSON', fatal=False) - if mp4_data: - for format_id, format_url in mp4_data.get('data', {}).items(): - if not url_or_none(format_url): - continue - height = int_or_none(format_id) - if height is not None and m3u8_formats_dict.get(height): - f = m3u8_formats_dict[height].copy() - f.update({ - 'format_id': f['format_id'].replace('hls', 'http'), - 'protocol': 'http', - }) - else: - f = { - 'format_id': f'http-{format_id}', - 'height': int_or_none(format_id), - } - f['url'] = format_url - formats.append(f) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'view_count': view_count, - 'age_limit': age_limit, - 'formats': formats, - } - - -class ClipYouEmbedIE(InfoExtractor): - _VALID_URL = False - - @classmethod - def _extract_embed_urls(cls, url, webpage): - mobj = re.search( - r']+src="https?://(?Pmedia\.clipyou\.ru)/index/player\?.*\brecord_id=(?P\d+).*"', webpage) - if mobj is not None: - yield smuggle_url('eagleplatform:{host}:{id}'.format(**mobj.groupdict()), {'referrer': url}) diff --git a/yt_dlp/extractor/ertgr.py b/yt_dlp/extractor/ertgr.py index 6f3f60ff43..ba68d5324d 100644 --- a/yt_dlp/extractor/ertgr.py +++ b/yt_dlp/extractor/ertgr.py @@ -64,14 +64,12 @@ class ERTFlixCodenameIE(ERTFlixBaseIE): _VALID_URL = r'ertflix:(?P[\w-]+)' _TESTS = [{ 'url': 'ertflix:monogramma-praxitelis-tzanoylinos', - 'md5': '5b9c2cd171f09126167e4082fc1dd0ef', 'info_dict': { 'id': 'monogramma-praxitelis-tzanoylinos', 'ext': 'mp4', - 'title': 'md5:ef0b439902963d56c43ac83c3f41dd0e', + 'title': 'monogramma-praxitelis-tzanoylinos', }, - }, - ] + }] def _extract_formats_and_subs(self, video_id): media_info = self._call_api(video_id, codename=video_id) @@ -131,13 +129,14 @@ class ERTFlixIE(ERTFlixBaseIE): 'duration': 3166, 'age_limit': 8, }, + 'skip': 'Invalid URL', }, { 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma', 'info_dict': { 'id': 'ser.3448', 'age_limit': 8, - 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.', - 'title': 'Μονόγραμμα', + 'title': 'Monogramma', + 'description': 'md5:e30cc640e6463da87f210a8ed10b2439', }, 'playlist_mincount': 64, }, { @@ -145,28 +144,28 @@ class ERTFlixIE(ERTFlixBaseIE): 'info_dict': { 'id': 'ser.3448', 'age_limit': 8, - 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.', - 'title': 'Μονόγραμμα', + 'title': 'Monogramma', + 'description': 'md5:e30cc640e6463da87f210a8ed10b2439', }, - 'playlist_count': 22, + 'playlist_mincount': 66, }, { 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1&season=2021%20-%202022', 'info_dict': { 'id': 'ser.3448', 'age_limit': 8, - 'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.', - 'title': 'Μονόγραμμα', + 'title': 'Monogramma', + 'description': 'md5:e30cc640e6463da87f210a8ed10b2439', }, - 'playlist_mincount': 36, + 'playlist_mincount': 25, }, { 'url': 'https://www.ertflix.gr/series/ser.164991-to-diktuo-1?season=1-9', 'info_dict': { 'id': 'ser.164991', 'age_limit': 8, - 'description': 'Η πρώτη ελληνική εκπομπή με θεματολογία αποκλειστικά γύρω από το ίντερνετ.', - 'title': 'Το δίκτυο', + 'title': 'The Network', + 'description': 'The first Greek show featuring topics exclusively around the internet.', }, - 'playlist_mincount': 9, + 'playlist_mincount': 0, }, { 'url': 'https://www.ertflix.gr/en/vod/vod.127652-ta-kalytera-mas-chronia-ep1-mia-volta-sto-feggari', 'only_matching': True, @@ -282,6 +281,16 @@ class ERTWebtvEmbedIE(InfoExtractor): 'ext': 'mp4', 'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg', }, + 'skip': 'Invalid URL', + }] + _WEBPAGE_TESTS = [{ + 'url': 'https://www.ertnews.gr/video/manolis-goyalles-o-anthropos-piso-apo-ti-diadiktyaki-vasilopita/', + 'info_dict': { + 'id': '2022/tv/news-themata-ianouarios/20220114-apotis6-gouales-pita.mp4', + 'ext': 'mp4', + 'title': 'VOD - 2022/tv/news-themata-ianouarios/20220114-apotis6-gouales-pita.mp4', + 'thumbnail': r're:https?://www\.ert\.gr/themata/photos/.+\.jpg', + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 24ecb03505..2c35013faa 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -81,13 +81,14 @@ class FacebookIE(InfoExtractor): 'description': 'md5:34675bda53336b1d16400265c2bb9b3b', 'uploader': 'RADIO KICKS FM', 'upload_date': '20230818', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'timestamp': 1692346159, - 'thumbnail': r're:^https?://.*', 'uploader_id': '100063551323670', 'duration': 3133.583, 'view_count': int, 'concurrent_view_count': 0, }, + 'expected_warnings': ['Cannot parse data'], }, { 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf', 'md5': '6a40d33c0eccbb1af76cf0485a052659', @@ -106,17 +107,18 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '274175099429670', 'ext': 'mp4', - 'title': 'Asif', + 'title': '119 reactions · 1.4K shares | Asif Nawab Butt on Reels', 'description': '', 'uploader': 'Asif Nawab Butt', 'upload_date': '20140506', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'timestamp': 1399398998, - 'thumbnail': r're:^https?://.*', - 'uploader_id': 'pfbid05AzrFTXgY37tqwaSgbFTTEpCLBjjEJHkigogwGiRPtKEpAsJYJpzE94H1RxYXWEtl', + 'uploader_id': 'pfbid028xue38TBXRyNbiqBSV2LFs3QK3yopvKjupbqFoL6U9SKbx4p2SMdJjQSBvnjsHGWl', 'duration': 131.03, 'concurrent_view_count': int, 'view_count': int, }, + 'expected_warnings': ['Cannot parse data'], }, { 'note': 'Video with DASH manifest', 'url': 'https://www.facebook.com/video.php?v=957955867617029', @@ -158,7 +160,7 @@ class FacebookIE(InfoExtractor): 'id': '10153664894881749', 'ext': 'mp4', 'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'timestamp': 1456259628, 'upload_date': '20160223', 'uploader': 'Barack Obama', @@ -168,7 +170,7 @@ class FacebookIE(InfoExtractor): # have 1080P, but only up to 720p in swf params # data.video.story.attachments[].media 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/', - 'md5': '1659aa21fb3dd1585874f668e81a72c8', + 'md5': '70b82ebf5f0e9b91b2a49d3db3563611', 'info_dict': { 'id': '10155529876156509', 'ext': 'mp4', @@ -177,7 +179,7 @@ class FacebookIE(InfoExtractor): 'timestamp': 1477818095, 'upload_date': '20161030', 'uploader': 'CNN', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'view_count': int, 'uploader_id': '100059479812265', 'concurrent_view_count': int, @@ -198,13 +200,11 @@ class FacebookIE(InfoExtractor): 'uploader': 'Yaroslav Korpan', 'uploader_id': 'pfbid06AScABAWcW91qpiuGrLt99Ef9tvwHoXP6t8KeFYEqkSfreMtfa9nTveh8b2ZEVSWl', 'concurrent_view_count': int, - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'view_count': int, 'duration': 11736.446, }, - 'params': { - 'skip_download': True, - }, + 'skip': 'Invalid URL', }, { # FIXME: Cannot parse data error 'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471', @@ -215,7 +215,7 @@ class FacebookIE(InfoExtractor): 'timestamp': 1477305000, 'upload_date': '20161024', 'uploader': 'La Guía Del Varón', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', }, 'skip': 'Requires logging in', }, { @@ -244,9 +244,10 @@ class FacebookIE(InfoExtractor): 'upload_date': '20171124', 'uploader': 'Vickie Gentry', 'uploader_id': 'pfbid0FkkycT95ySNNyfCw4Cho6u5G7WbbZEcxT496Hq8rtx1K3LcTCATpR3wnyYhmyGC5l', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'duration': 148.224, }, + 'skip': 'Invalid URL', }, { # data.node.comet_sections.content.story.attachments[].styles.attachment.media 'url': 'https://www.facebook.com/attn/posts/pfbid0j1Czf2gGDVqeQ8KiMLFm3pWN8GxsQmeRrVhimWDzMuKQoR8r4b1knNsejELmUgyhl', @@ -260,7 +261,7 @@ class FacebookIE(InfoExtractor): 'duration': 132.675, 'uploader_id': '100064451419378', 'view_count': int, - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'timestamp': 1701975646, }, }, { @@ -271,9 +272,9 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': 'Lela Evans', 'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'uploader': 'Lela Evans', - 'uploader_id': 'pfbid0swT2y7t6TAsZVBvcyeYPdhTMefGaS26mzUwML3vd1ma6ndGZKxsyS4Ssu3jitZLXl', + 'uploader_id': 'pfbid02wjMpknobSMnyynK3TNKN4Ww1StcpAKXgowqTyge3bz7LwHZMQ68uiXzzbu7xeryBl', 'upload_date': '20231228', 'timestamp': 1703804085, 'duration': 394.347, @@ -326,28 +327,27 @@ class FacebookIE(InfoExtractor): 'uploader_id': '100066514874195', 'duration': 4524.001, 'view_count': int, - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'concurrent_view_count': int, }, - 'params': { - 'skip_download': True, - }, + 'params': {'skip_download': True}, }, { # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media 'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/', 'info_dict': { 'id': '106560053808006', 'ext': 'mp4', - 'title': 'Josef', - 'thumbnail': r're:^https?://.*', + 'title': 'Josef Novak on Reels', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'concurrent_view_count': int, - 'uploader_id': 'pfbid02gpfwRM2XvdEJfsERupwQiNmBiDArc38RMRYZnap372q6Vs7MtFTVy72mmFWpJBTKl', + 'uploader_id': 'pfbid0cjYJYXpePWqhZ9DgpB6gKXrN2q3obwducdKm4wT7K5nkhbfKg5cneocYbsdaji7fl', 'timestamp': 1549275572, 'duration': 3.283, 'uploader': 'Josef Novak', 'description': '', 'upload_date': '20190204', }, + 'expected_warnings': ['Cannot parse data'], }, { # data.video.story.attachments[].media 'url': 'https://www.facebook.com/watch/?v=647537299265662', @@ -406,7 +406,7 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': 'ANALISI IN CAMPO OSCURO " Coaguli nel sangue dei vaccinati"', 'description': 'Other event by Comitato Liberi Pensatori on Tuesday, October 18 2022', - 'thumbnail': r're:^https?://.*', + 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+', 'uploader': 'Comitato Liberi Pensatori', 'uploader_id': '100065709540881', }, @@ -414,6 +414,56 @@ class FacebookIE(InfoExtractor): 'url': 'https://www.facebook.com/groups/1513990329015294/posts/d41d8cd9/2013209885760000/?app=fbl', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + #