diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4b71a621c..e2411ecfa 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -256,7 +256,7 @@ jobs: with: path: | ~/yt-dlp-build-venv - key: cache-reqs-${{ github.job }} + key: cache-reqs-${{ github.job }}-${{ github.ref }} - name: Install Requirements run: | @@ -331,19 +331,16 @@ jobs: if: steps.restore-cache.outputs.cache-hit == 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - cache_key: cache-reqs-${{ github.job }} - repository: ${{ github.repository }} - branch: ${{ github.ref }} + cache_key: cache-reqs-${{ github.job }}-${{ github.ref }} run: | - gh extension install actions/gh-actions-cache - gh actions-cache delete "${cache_key}" -R "${repository}" -B "${branch}" --confirm + gh cache delete "${cache_key}" - name: Cache requirements uses: actions/cache/save@v4 with: path: | ~/yt-dlp-build-venv - key: cache-reqs-${{ github.job }} + key: cache-reqs-${{ github.job }}-${{ github.ref }} macos_legacy: needs: process diff --git a/.gitignore b/.gitignore index 8fcd0de64..40bb34d2a 100644 --- a/.gitignore +++ b/.gitignore @@ -105,6 +105,8 @@ README.txt *.zsh *.spec test/testdata/sigs/player-*.js +test/testdata/thumbnails/empty.webp +test/testdata/thumbnails/foo\ %d\ bar/foo_%d.* # Binary /youtube-dl diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 5710f9a9e..00d4d15aa 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -770,3 +770,14 @@ NeonMan pj47x troex WouterGordts +baierjan +GeoffreyFrogeye +Pawka +v3DJG6GL +yozel +brian6932 +iednod55 +maxbin123 +nullpos +anlar +eason1478 diff --git a/Changelog.md b/Changelog.md index 513724bf4..d37852658 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,126 @@ # Changelog # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2025.06.25 + +#### Extractor changes +- [Add `_search_nuxt_json` helper](https://github.com/yt-dlp/yt-dlp/commit/51887484e46ab6015c041cb1ab626a55f25a03bd) ([#13386](https://github.com/yt-dlp/yt-dlp/issues/13386)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +- **brightcove**: new: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/e6bd4a3da295b760ab20b39c18ce8934d312c2bf) ([#13461](https://github.com/yt-dlp/yt-dlp/issues/13461)) by [doe1080](https://github.com/doe1080) +- **huya**: live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2600849badb0d08c55b58dcc77a13af6ba423da6) ([#13520](https://github.com/yt-dlp/yt-dlp/issues/13520)) by [doe1080](https://github.com/doe1080) +- **hypergryph**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/1722c55400ff30bb5aee5dd7a262f0b7e9ce2f0e) ([#13415](https://github.com/yt-dlp/yt-dlp/issues/13415)) by [doe1080](https://github.com/doe1080), [eason1478](https://github.com/eason1478) +- **lsm**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/c57412d1f9cf0124adc972a47858ac42b740c61d) ([#13126](https://github.com/yt-dlp/yt-dlp/issues/13126)) by [Caesim404](https://github.com/Caesim404) +- **mave**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1838a1ce5d4ade80770ba9162eaffc9a1607dc70) ([#13380](https://github.com/yt-dlp/yt-dlp/issues/13380)) by [anlar](https://github.com/anlar) +- **sportdeutschland**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a4ce4327c9836691d3b6b00e44a90b6741601ed8) ([#13519](https://github.com/yt-dlp/yt-dlp/issues/13519)) by [DTrombett](https://github.com/DTrombett) +- **sproutvideo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5b559d0072b7164daf06bacdc41c6f11283452c8) ([#13544](https://github.com/yt-dlp/yt-dlp/issues/13544)) by [bashonly](https://github.com/bashonly) +- **tv8.it**: [Support slugless URLs](https://github.com/yt-dlp/yt-dlp/commit/3bd30291601c47fa4a257983473884103ecab0c7) ([#13478](https://github.com/yt-dlp/yt-dlp/issues/13478)) by [DTrombett](https://github.com/DTrombett) +- **youtube** + - [Check any `ios` m3u8 formats prior to download](https://github.com/yt-dlp/yt-dlp/commit/8f94b76cbf7bbd9dfd8762c63cdea04f90f1297f) ([#13524](https://github.com/yt-dlp/yt-dlp/issues/13524)) by [bashonly](https://github.com/bashonly) + - [Improve player context payloads](https://github.com/yt-dlp/yt-dlp/commit/ff6f94041aeee19c5559e1c1cd693960a1c1dd14) ([#13539](https://github.com/yt-dlp/yt-dlp/issues/13539)) by [bashonly](https://github.com/bashonly) + +#### Misc. changes +- **test**: `traversal`: [Fix morsel tests for Python 3.14](https://github.com/yt-dlp/yt-dlp/commit/73bf10211668e4a59ccafd790e06ee82d9fea9ea) ([#13471](https://github.com/yt-dlp/yt-dlp/issues/13471)) by [Grub4K](https://github.com/Grub4K) + +### 2025.06.09 + +#### Extractor changes +- [Improve JSON LD thumbnails extraction](https://github.com/yt-dlp/yt-dlp/commit/85c8a405e3651dc041b758f4744d4fb3c4c55e01) ([#13368](https://github.com/yt-dlp/yt-dlp/issues/13368)) by [bashonly](https://github.com/bashonly), [doe1080](https://github.com/doe1080) +- **10play**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6d265388c6e943419ac99e9151cf75a3265f980f) ([#13349](https://github.com/yt-dlp/yt-dlp/issues/13349)) by [bashonly](https://github.com/bashonly) +- **adobepass** + - [Add Fubo MSO](https://github.com/yt-dlp/yt-dlp/commit/eee90acc47d7f8de24afaa8b0271ccaefdf6e88c) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123) + - [Always add newer user-agent when required](https://github.com/yt-dlp/yt-dlp/commit/0ee1102268cf31b07f8a8318a47424c66b2f7378) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) + - [Fix Philo MSO authentication](https://github.com/yt-dlp/yt-dlp/commit/943083edcd3df45aaa597a6967bc6c95b720f54c) ([#13335](https://github.com/yt-dlp/yt-dlp/issues/13335)) by [Sipherdrakon](https://github.com/Sipherdrakon) + - [Rework to require software statement](https://github.com/yt-dlp/yt-dlp/commit/711c5d5d098fee2992a1a624b1c4b30364b91426) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly), [maxbin123](https://github.com/maxbin123) + - [Validate login URL before sending credentials](https://github.com/yt-dlp/yt-dlp/commit/89c1b349ad81318d9d3bea76c01c891696e58d38) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **aenetworks** + - [Fix playlist extractors](https://github.com/yt-dlp/yt-dlp/commit/f37d599a697e82fe68b423865897d55bae34f373) ([#13408](https://github.com/yt-dlp/yt-dlp/issues/13408)) by [Sipherdrakon](https://github.com/Sipherdrakon) + - [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/6693d6603358ae6beca834dbd822a7917498b813) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123) +- **bilibilibangumi**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/13e55162719528d42d2133e16b65ff59a667a6e4) ([#13416](https://github.com/yt-dlp/yt-dlp/issues/13416)) by [c-basalt](https://github.com/c-basalt) +- **brightcove**: new: [Adapt to new AdobePass requirement](https://github.com/yt-dlp/yt-dlp/commit/98f8eec956e3b16cb66a3d49cc71af3807db795e) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **cu.ntv.co.jp**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/aa863ddab9b1d104678e9cf39bb76f5b14fca660) ([#13302](https://github.com/yt-dlp/yt-dlp/issues/13302)) by [doe1080](https://github.com/doe1080), [nullpos](https://github.com/nullpos) +- **go**: [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/2e5bf002dad16f5ce35aa2023d392c9e518fcd8f) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly), [maxbin123](https://github.com/maxbin123) +- **nbc**: [Rework and adapt extractors to new AdobePass flow](https://github.com/yt-dlp/yt-dlp/commit/2d7949d5642bc37d1e71bf00c9a55260e5505d58) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **nobelprize**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/97ddfefeb4faba6e61cd80996c16952b8eab16f3) ([#13205](https://github.com/yt-dlp/yt-dlp/issues/13205)) by [doe1080](https://github.com/doe1080) +- **odnoklassniki**: [Detect and raise when login is required](https://github.com/yt-dlp/yt-dlp/commit/148a1eb4c59e127965396c7a6e6acf1979de459e) ([#13361](https://github.com/yt-dlp/yt-dlp/issues/13361)) by [bashonly](https://github.com/bashonly) +- **patreon**: [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/e0d6c0822930f6e63f574d46d946a58b73ecd10c) ([#13266](https://github.com/yt-dlp/yt-dlp/issues/13266)) by [bashonly](https://github.com/bashonly) (With fixes in [1a8a03e](https://github.com/yt-dlp/yt-dlp/commit/1a8a03ea8d827107319a18076ee3505090667c5a)) +- **podchaser**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/538eb305673c26bff6a2b12f1c96375fe02ce41a) ([#13271](https://github.com/yt-dlp/yt-dlp/issues/13271)) by [bashonly](https://github.com/bashonly) +- **sr**: mediathek: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/e3c605a61f4cc2de9059f37434fa108c3c20f58e) ([#13294](https://github.com/yt-dlp/yt-dlp/issues/13294)) by [doe1080](https://github.com/doe1080) +- **stacommu**: [Avoid partial stream formats](https://github.com/yt-dlp/yt-dlp/commit/5d96527be80dc1ed1702d9cd548ff86de570ad70) ([#13412](https://github.com/yt-dlp/yt-dlp/issues/13412)) by [bashonly](https://github.com/bashonly) +- **startrek**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a8bf0011bde92b3f1324a98bfbd38932fd3ebe18) ([#13188](https://github.com/yt-dlp/yt-dlp/issues/13188)) by [doe1080](https://github.com/doe1080) +- **svt**: play: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/e1b6062f8c4a3fa33c65269d48d09ec78de765a2) ([#13329](https://github.com/yt-dlp/yt-dlp/issues/13329)) by [barsnick](https://github.com/barsnick), [bashonly](https://github.com/bashonly) +- **telecinco**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/03dba2012d9bd3f402fa8c2f122afba89bbd22a4) ([#13379](https://github.com/yt-dlp/yt-dlp/issues/13379)) by [bashonly](https://github.com/bashonly) +- **theplatform**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/ed108b3ea481c6a4b5215a9302ba92d74baa2425) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **toutiao**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f8051e3a61686c5db1de5f5746366ecfbc3ad20c) ([#13246](https://github.com/yt-dlp/yt-dlp/issues/13246)) by [doe1080](https://github.com/doe1080) +- **turner**: [Adapt extractors to new AdobePass flow](https://github.com/yt-dlp/yt-dlp/commit/0daddc780d3ac5bebc3a3ec5b884d9243cbc0745) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **twitcasting**: [Fix password-protected livestream support](https://github.com/yt-dlp/yt-dlp/commit/52f9729c9a92ad4656d746ff0b1acecb87b3e96d) ([#13097](https://github.com/yt-dlp/yt-dlp/issues/13097)) by [bashonly](https://github.com/bashonly) +- **twitter**: broadcast: [Support events URLs](https://github.com/yt-dlp/yt-dlp/commit/7794374de8afb20499b023107e2abfd4e6b93ee4) ([#13248](https://github.com/yt-dlp/yt-dlp/issues/13248)) by [doe1080](https://github.com/doe1080) +- **umg**: de: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/4e7c1ea346b510280218b47e8653dbbca3a69870) ([#13373](https://github.com/yt-dlp/yt-dlp/issues/13373)) by [doe1080](https://github.com/doe1080) +- **vice**: [Mark extractors as broken](https://github.com/yt-dlp/yt-dlp/commit/6121559e027a04574690799c1776bc42bb51af31) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **vimeo**: [Extract subtitles from player subdomain](https://github.com/yt-dlp/yt-dlp/commit/c723c4e5e78263df178dbe69844a3d05f3ef9e35) ([#13350](https://github.com/yt-dlp/yt-dlp/issues/13350)) by [bashonly](https://github.com/bashonly) +- **watchespn**: [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/b094747e93cfb0a2c53007120e37d0d84d41f030) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123) +- **weverse**: [Support login with oauth refresh tokens](https://github.com/yt-dlp/yt-dlp/commit/3fe72e9eea38d9a58211cde42cfaa577ce020e2c) ([#13284](https://github.com/yt-dlp/yt-dlp/issues/13284)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Add `tv_simply` player client](https://github.com/yt-dlp/yt-dlp/commit/1fd0e88b67db53ad163393d6965f68e908fa70e3) ([#13389](https://github.com/yt-dlp/yt-dlp/issues/13389)) by [gamer191](https://github.com/gamer191) + - [Extract srt subtitles](https://github.com/yt-dlp/yt-dlp/commit/231349786e8c42089c2e079ec94c0ea866c37999) ([#13411](https://github.com/yt-dlp/yt-dlp/issues/13411)) by [gamer191](https://github.com/gamer191) + - [Fix `--mark-watched` support](https://github.com/yt-dlp/yt-dlp/commit/b5be29fa58ec98226e11621fd9c58585bcff6879) ([#13222](https://github.com/yt-dlp/yt-dlp/issues/13222)) by [brian6932](https://github.com/brian6932), [iednod55](https://github.com/iednod55) + - [Fix automatic captions for some client combinations](https://github.com/yt-dlp/yt-dlp/commit/53ea743a9c158f8ca2d75a09ca44ba68606042d8) ([#13268](https://github.com/yt-dlp/yt-dlp/issues/13268)) by [bashonly](https://github.com/bashonly) + - [Improve signature extraction debug output](https://github.com/yt-dlp/yt-dlp/commit/d30a49742cfa22e61c47df4ac0e7334d648fb85d) ([#13327](https://github.com/yt-dlp/yt-dlp/issues/13327)) by [bashonly](https://github.com/bashonly) + - [Rework nsig function name extraction](https://github.com/yt-dlp/yt-dlp/commit/9e38b273b7ac942e7e9fc05a651ed810ab7d30ba) ([#13403](https://github.com/yt-dlp/yt-dlp/issues/13403)) by [Grub4K](https://github.com/Grub4K) + - [nsig code improvements and cleanup](https://github.com/yt-dlp/yt-dlp/commit/f7bbf5a617f9ab54ef51eaef99be36e175b5e9c3) ([#13280](https://github.com/yt-dlp/yt-dlp/issues/13280)) by [bashonly](https://github.com/bashonly) +- **zdf**: [Fix language extraction and format sorting](https://github.com/yt-dlp/yt-dlp/commit/db162b76f6bdece50babe2e0cacfe56888c2e125) ([#13313](https://github.com/yt-dlp/yt-dlp/issues/13313)) by [InvalidUsernameException](https://github.com/InvalidUsernameException) + +#### Misc. changes +- **build** + - [Exclude `pkg_resources` from being collected](https://github.com/yt-dlp/yt-dlp/commit/cc749a8a3b8b6e5c05318868c72a403f376a1b38) ([#13320](https://github.com/yt-dlp/yt-dlp/issues/13320)) by [bashonly](https://github.com/bashonly) + - [Fix macOS requirements caching](https://github.com/yt-dlp/yt-dlp/commit/201812100f315c6727a4418698d5b4e8a79863d4) ([#13328](https://github.com/yt-dlp/yt-dlp/issues/13328)) by [bashonly](https://github.com/bashonly) +- **cleanup**: Miscellaneous: [339614a](https://github.com/yt-dlp/yt-dlp/commit/339614a173c74b42d63e858c446a9cae262a13af) by [bashonly](https://github.com/bashonly) +- **test**: postprocessors: [Remove binary thumbnail test data](https://github.com/yt-dlp/yt-dlp/commit/a9b370069838e84d44ac7ad095d657003665885a) ([#13341](https://github.com/yt-dlp/yt-dlp/issues/13341)) by [bashonly](https://github.com/bashonly) + +### 2025.05.22 + +#### Core changes +- **cookies**: [Fix Linux desktop environment detection](https://github.com/yt-dlp/yt-dlp/commit/e491fd4d090db3af52a82863fb0553dd5e17fb85) ([#13197](https://github.com/yt-dlp/yt-dlp/issues/13197)) by [mbway](https://github.com/mbway) +- **jsinterp**: [Fix increment/decrement evaluation](https://github.com/yt-dlp/yt-dlp/commit/167d7a9f0ffd1b4fe600193441bdb7358db2740b) ([#13238](https://github.com/yt-dlp/yt-dlp/issues/13238)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) + +#### Extractor changes +- **1tv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/41c0a1fb89628696f8bb88e2b9f3a68f355b8c26) ([#13168](https://github.com/yt-dlp/yt-dlp/issues/13168)) by [bashonly](https://github.com/bashonly) +- **amcnetworks**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/464c84fedf78eef822a431361155f108b5df96d7) ([#13147](https://github.com/yt-dlp/yt-dlp/issues/13147)) by [bashonly](https://github.com/bashonly) +- **bitchute**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1d0f6539c47e5d5c68c3c47cdb7075339e2885ac) ([#13081](https://github.com/yt-dlp/yt-dlp/issues/13081)) by [bashonly](https://github.com/bashonly) +- **cartoonnetwork**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/7dbb47f84f0ee1266a3a01f58c9bc4c76d76794a) ([#13148](https://github.com/yt-dlp/yt-dlp/issues/13148)) by [bashonly](https://github.com/bashonly) +- **iprima**: [Fix login support](https://github.com/yt-dlp/yt-dlp/commit/a7d9a5eb79ceeecb851389f3f2c88597871ca3f2) ([#12937](https://github.com/yt-dlp/yt-dlp/issues/12937)) by [baierjan](https://github.com/baierjan) +- **jiosaavn** + - artist: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/586b557b124f954d3f625360ebe970989022ad97) ([#12803](https://github.com/yt-dlp/yt-dlp/issues/12803)) by [subrat-lima](https://github.com/subrat-lima) + - playlist, show: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/317f4b8006c2c0f0f64f095b1485163ad97c9053) ([#12803](https://github.com/yt-dlp/yt-dlp/issues/12803)) by [subrat-lima](https://github.com/subrat-lima) + - show: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6839276496d8814cf16f58b637e45663467928e6) ([#12803](https://github.com/yt-dlp/yt-dlp/issues/12803)) by [subrat-lima](https://github.com/subrat-lima) +- **lrtradio**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/abf58dcd6a09e14eec4ea82ae12f79a0337cb383) ([#13200](https://github.com/yt-dlp/yt-dlp/issues/13200)) by [Pawka](https://github.com/Pawka) +- **nebula**: [Support `--mark-watched`](https://github.com/yt-dlp/yt-dlp/commit/20f288bdc2173c7cc58d709d25ca193c1f6001e7) ([#13120](https://github.com/yt-dlp/yt-dlp/issues/13120)) by [GeoffreyFrogeye](https://github.com/GeoffreyFrogeye) +- **niconico** + - [Fix error handling](https://github.com/yt-dlp/yt-dlp/commit/f569be4602c2a857087e495d5d7ed6060cd97abe) ([#13236](https://github.com/yt-dlp/yt-dlp/issues/13236)) by [bashonly](https://github.com/bashonly) + - live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7a7b85c9014d96421e18aa7ea5f4c1bee5ceece0) ([#13045](https://github.com/yt-dlp/yt-dlp/issues/13045)) by [doe1080](https://github.com/doe1080) +- **nytimesarticle**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/b26bc32579c00ef579d75a835807ccc87d20ee0a) ([#13104](https://github.com/yt-dlp/yt-dlp/issues/13104)) by [bashonly](https://github.com/bashonly) +- **once**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/f475e8b529d18efdad603ffda02a56e707fe0e2c) ([#13164](https://github.com/yt-dlp/yt-dlp/issues/13164)) by [bashonly](https://github.com/bashonly) +- **picarto**: vod: [Support `/profile/` video URLs](https://github.com/yt-dlp/yt-dlp/commit/31e090cb787f3504ec25485adff9a2a51d056734) ([#13227](https://github.com/yt-dlp/yt-dlp/issues/13227)) by [subrat-lima](https://github.com/subrat-lima) +- **playsuisse**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/d880e060803ae8ed5a047e578cca01e1f0e630ce) ([#12466](https://github.com/yt-dlp/yt-dlp/issues/12466)) by [v3DJG6GL](https://github.com/v3DJG6GL) +- **sprout**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/cbcfe6378dde33a650e3852ab17ad4503b8e008d) ([#13149](https://github.com/yt-dlp/yt-dlp/issues/13149)) by [bashonly](https://github.com/bashonly) +- **svtpage**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/ea8498ed534642dd7e925961b97b934987142fd3) ([#12957](https://github.com/yt-dlp/yt-dlp/issues/12957)) by [diman8](https://github.com/diman8) +- **twitch**: [Support `--live-from-start`](https://github.com/yt-dlp/yt-dlp/commit/00b1bec55249cf2ad6271d36492c51b34b6459d1) ([#13202](https://github.com/yt-dlp/yt-dlp/issues/13202)) by [bashonly](https://github.com/bashonly) +- **vimeo**: event: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/545c1a5b6f2fe88722b41aef0e7485bf3be3f3f9) ([#13216](https://github.com/yt-dlp/yt-dlp/issues/13216)) by [bashonly](https://github.com/bashonly) +- **wat.tv**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/f123cc83b3aea45053f5fa1d9141048b01fc2774) ([#13111](https://github.com/yt-dlp/yt-dlp/issues/13111)) by [bashonly](https://github.com/bashonly) +- **weverse**: [Fix live extraction](https://github.com/yt-dlp/yt-dlp/commit/5328eda8820cc5f21dcf917684d23fbdca41831d) ([#13084](https://github.com/yt-dlp/yt-dlp/issues/13084)) by [bashonly](https://github.com/bashonly) +- **xinpianchang**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/83fabf352489d52843f67e6e9cc752db86d27e6e) ([#13245](https://github.com/yt-dlp/yt-dlp/issues/13245)) by [garret1317](https://github.com/garret1317) +- **youtube** + - [Add PO token support for subtitles](https://github.com/yt-dlp/yt-dlp/commit/32ed5f107c6c641958d1cd2752e130de4db55a13) ([#13234](https://github.com/yt-dlp/yt-dlp/issues/13234)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz) + - [Add `web_embedded` client for age-restricted videos](https://github.com/yt-dlp/yt-dlp/commit/0feec6dc131f488428bf881519e7c69766fbb9ae) ([#13089](https://github.com/yt-dlp/yt-dlp/issues/13089)) by [bashonly](https://github.com/bashonly) + - [Add a PO Token Provider Framework](https://github.com/yt-dlp/yt-dlp/commit/2685654a37141cca63eda3a92da0e2706e23ccfd) ([#12840](https://github.com/yt-dlp/yt-dlp/issues/12840)) by [coletdjnz](https://github.com/coletdjnz) + - [Extract `media_type` for all videos](https://github.com/yt-dlp/yt-dlp/commit/ded11ebc9afba6ba33923375103e9be2d7c804e7) ([#13136](https://github.com/yt-dlp/yt-dlp/issues/13136)) by [bashonly](https://github.com/bashonly) + - [Fix `--live-from-start` support for premieres](https://github.com/yt-dlp/yt-dlp/commit/8f303afb43395be360cafd7ad4ce2b6e2eedfb8a) ([#13079](https://github.com/yt-dlp/yt-dlp/issues/13079)) by [arabcoders](https://github.com/arabcoders) + - [Fix geo-restriction error handling](https://github.com/yt-dlp/yt-dlp/commit/c7e575e31608c19c5b26c10a4229db89db5fc9a8) ([#13217](https://github.com/yt-dlp/yt-dlp/issues/13217)) by [yozel](https://github.com/yozel) + +#### Misc. changes +- **build** + - [Bump PyInstaller to v6.13.0](https://github.com/yt-dlp/yt-dlp/commit/17cf9088d0d535e4a7feffbf02bd49cd9dae5ab9) ([#13082](https://github.com/yt-dlp/yt-dlp/issues/13082)) by [bashonly](https://github.com/bashonly) + - [Bump run-on-arch-action to v3](https://github.com/yt-dlp/yt-dlp/commit/9064d2482d1fe722bbb4a49731fe0711c410d1c8) ([#13088](https://github.com/yt-dlp/yt-dlp/issues/13088)) by [bashonly](https://github.com/bashonly) +- **cleanup**: Miscellaneous: [7977b32](https://github.com/yt-dlp/yt-dlp/commit/7977b329ed97b216e37bd402f4935f28c00eac9e) by [bashonly](https://github.com/bashonly) + ### 2025.04.30 #### Important changes diff --git a/Makefile b/Makefile index 6c72ead1e..273cb3cc0 100644 --- a/Makefile +++ b/Makefile @@ -18,10 +18,11 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \ tar pypi-files lazy-extractors install uninstall clean-test: - rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ + rm -rf tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ *.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \ *.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.lrc *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 *.mp4 \ - *.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp + *.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp \ + test/testdata/sigs/player-*.js test/testdata/thumbnails/empty.webp "test/testdata/thumbnails/foo %d bar/foo_%d."* clean-dist: rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \ yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS diff --git a/README.md b/README.md index 9f542844e..0f9a7d556 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ * [Post-processing Options](#post-processing-options) * [SponsorBlock Options](#sponsorblock-options) * [Extractor Options](#extractor-options) + * [Preset Aliases](#preset-aliases) * [CONFIGURATION](#configuration) * [Configuration file encoding](#configuration-file-encoding) * [Authentication with netrc](#authentication-with-netrc) @@ -348,8 +349,8 @@ ## General Options: --no-flat-playlist Fully extract the videos of a playlist (default) --live-from-start Download livestreams from the start. - Currently only supported for YouTube - (Experimental) + Currently experimental and only supported + for YouTube and Twitch --no-live-from-start Download livestreams from the current time (default) --wait-for-video MIN[-MAX] Wait for scheduled streams to become @@ -375,12 +376,12 @@ ## General Options: an alias starts with a dash "-", it is prefixed with "--". Arguments are parsed according to the Python string formatting - mini-language. E.g. --alias get-audio,-X - "-S=aext:{0},abr -x --audio-format {0}" - creates options "--get-audio" and "-X" that - takes an argument (ARG0) and expands to - "-S=aext:ARG0,abr -x --audio-format ARG0". - All defined aliases are listed in the --help + mini-language. E.g. --alias get-audio,-X "-S + aext:{0},abr -x --audio-format {0}" creates + options "--get-audio" and "-X" that takes an + argument (ARG0) and expands to "-S + aext:ARG0,abr -x --audio-format ARG0". All + defined aliases are listed in the --help output. Alias options can trigger more aliases; so be careful to avoid defining recursive options. As a safety measure, each @@ -1105,6 +1106,10 @@ ## Extractor Options: arguments for different extractors ## Preset Aliases: +Predefined aliases for convenience and ease of use. Note that future + versions of yt-dlp may add or adjust presets, but the existing preset + names will not be changed or removed + -t mp3 -f 'ba[acodec^=mp3]/ba/b' -x --audio-format mp3 @@ -1790,9 +1795,9 @@ # EXTRACTOR ARGUMENTS The following extractors use this feature: #### youtube -* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes +* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube/_base.py](https://github.com/yt-dlp/yt-dlp/blob/415b4c9f955b1a0391204bd24a7132590e7b3bdb/yt_dlp/extractor/youtube/_base.py#L402-L409) for the list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios` +* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv`, `tv_simply` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios` * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `player_js_variant`: The player javascript variant to use for signature and nsig deciphering. The known variants are: `main`, `tce`, `tv`, `tv_es6`, `phone`, `tablet`. Only `main` is recommended as a possible workaround; the others are for debugging purposes. The default is to use what is prescribed by the site, and can be selected with `actual` @@ -1805,7 +1810,7 @@ #### youtube * `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning * `data_sync_id`: Overrides the account Data Sync ID used in Innertube API requests. This may be needed if you are using an account with `youtube:player_skip=webpage,configs` or `youtubetab:skip=webpage` * `visitor_data`: Overrides the Visitor Data used in Innertube API requests. This should be used with `player_skip=webpage,configs` and without cookies. Note: this may have adverse effects if used improperly. If a session from a browser is wanted, you should pass cookies instead (which contain the Visitor ID) -* `po_token`: Proof of Origin (PO) Token(s) to use. Comma seperated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be either `gvs` (Google Video Server URLs) or `player` (Innertube player request) +* `po_token`: Proof of Origin (PO) Token(s) to use. Comma seperated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be any of `gvs` (Google Video Server URLs), `player` (Innertube player request) or `subs` (Subtitles) * `pot_trace`: Enable debug logging for PO Token fetching. Either `true` or `false` (default) * `fetch_pot`: Policy to use for fetching a PO Token from providers. One of `always` (always try fetch a PO Token regardless if the client requires one for the given context), `never` (never fetch a PO Token), or `auto` (default; only fetch a PO Token if the client requires one for the given context) diff --git a/bundle/docker/static/entrypoint.sh b/bundle/docker/static/entrypoint.sh index 220275974..8049e6820 100755 --- a/bundle/docker/static/entrypoint.sh +++ b/bundle/docker/static/entrypoint.sh @@ -2,6 +2,7 @@ set -e source ~/.local/share/pipx/venvs/pyinstaller/bin/activate +python -m devscripts.install_deps -o --include build python -m devscripts.install_deps --include secretstorage --include curl-cffi python -m devscripts.make_lazy_extractors python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}" diff --git a/bundle/pyinstaller.py b/bundle/pyinstaller.py index 4184c4bc9..c2f651121 100755 --- a/bundle/pyinstaller.py +++ b/bundle/pyinstaller.py @@ -36,6 +36,9 @@ def main(): f'--name={name}', '--icon=devscripts/logo.ico', '--upx-exclude=vcruntime140.dll', + # Ref: https://github.com/yt-dlp/yt-dlp/issues/13311 + # https://github.com/pyinstaller/pyinstaller/issues/9149 + '--exclude-module=pkg_resources', '--noconfirm', '--additional-hooks-dir=yt_dlp/__pyinstaller', *opts, diff --git a/pyproject.toml b/pyproject.toml index 7accaeeb9..3775251e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,7 @@ build = [ "build", "hatchling", "pip", - "setuptools>=71.0.2", # 71.0.0 broke pyinstaller + "setuptools>=71.0.2,<81", # See https://github.com/pyinstaller/pyinstaller/issues/9149 "wheel", ] dev = [ diff --git a/supportedsites.md b/supportedsites.md index 03bd8a7c3..b3fe01173 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -5,6 +5,8 @@ # Supported sites Not all sites listed here are guaranteed to work; websites are constantly changing and sometimes this breaks yt-dlp's support for them. The only reliable way to check if a site is supported is to try it. + - **10play**: [*10play*](## "netrc machine") + - **10play:season** - **17live** - **17live:clip** - **17live:vod** @@ -246,7 +248,6 @@ # Supported sites - **Canalplus**: mycanal.fr and piwiplus.fr - **Canalsurmas** - **CaracolTvPlay**: [*caracoltv-play*](## "netrc machine") - - **CartoonNetwork** - **cbc.ca** - **cbc.ca:player** - **cbc.ca:​player:playlist** @@ -296,7 +297,7 @@ # Supported sites - **CNNIndonesia** - **ComedyCentral** - **ComedyCentralTV** - - **ConanClassic** + - **ConanClassic**: (**Currently broken**) - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED - **CONtv** - **CookingChannel** @@ -318,7 +319,7 @@ # Supported sites - **CtsNews**: 華視新聞 - **CTV** - **CTVNews** - - **cu.ntv.co.jp**: Nippon Television Network + - **cu.ntv.co.jp**: 日テレ無料TADA! - **CultureUnplugged** - **curiositystream**: [*curiositystream*](## "netrc machine") - **curiositystream:collections**: [*curiositystream*](## "netrc machine") @@ -589,7 +590,7 @@ # Supported sites - **Hungama** - **HungamaAlbumPlaylist** - **HungamaSong** - - **huya:live**: huya.com + - **huya:live**: 虎牙直播 - **huya:video**: 虎牙视频 - **Hypem** - **Hytale** @@ -649,7 +650,10 @@ # Supported sites - **jiocinema**: [*jiocinema*](## "netrc machine") - **jiocinema:series**: [*jiocinema*](## "netrc machine") - **jiosaavn:album** + - **jiosaavn:artist** - **jiosaavn:playlist** + - **jiosaavn:show** + - **jiosaavn:​show:playlist** - **jiosaavn:song** - **Joj** - **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR) @@ -772,6 +776,7 @@ # Supported sites - **massengeschmack.tv** - **Masters** - **MatchTV** + - **Mave** - **MBN**: mbn.co.kr (매일방송) - **MDR**: MDR.DE - **MedalTV** @@ -828,7 +833,7 @@ # Supported sites - **Mojevideo**: mojevideo.sk - **Mojvideo** - **Monstercat** - - **MonsterSirenHypergryphMusic** + - **monstersiren**: 塞壬唱片 - **Motherless** - **MotherlessGallery** - **MotherlessGroup** @@ -880,19 +885,19 @@ # Supported sites - **Naver** - **Naver:live** - **navernow** - - **nba** - - **nba:channel** - - **nba:embed** - - **nba:watch** - - **nba:​watch:collection** - - **nba:​watch:embed** + - **nba**: (**Currently broken**) + - **nba:channel**: (**Currently broken**) + - **nba:embed**: (**Currently broken**) + - **nba:watch**: (**Currently broken**) + - **nba:​watch:collection**: (**Currently broken**) + - **nba:​watch:embed**: (**Currently broken**) - **NBC** - **NBCNews** - **nbcolympics** - - **nbcolympics:stream** - - **NBCSports** - - **NBCSportsStream** - - **NBCSportsVPlayer** + - **nbcolympics:stream**: (**Currently broken**) + - **NBCSports**: (**Currently broken**) + - **NBCSportsStream**: (**Currently broken**) + - **NBCSportsVPlayer**: (**Currently broken**) - **NBCStations** - **ndr**: NDR.de - Norddeutscher Rundfunk - **ndr:embed** @@ -968,7 +973,7 @@ # Supported sites - **Nitter** - **njoy**: N-JOY - **njoy:embed** - - **NobelPrize**: (**Currently broken**) + - **NobelPrize** - **NoicePodcast** - **NonkTube** - **NoodleMagazine** @@ -1081,8 +1086,8 @@ # Supported sites - **Photobucket** - **PiaLive** - **Piapro**: [*piapro*](## "netrc machine") - - **Picarto** - - **PicartoVod** + - **picarto** + - **picarto:vod** - **Piksel** - **Pinkbike** - **Pinterest** @@ -1390,16 +1395,15 @@ # Supported sites - **Spreaker** - **SpreakerShow** - **SpringboardPlatform** - - **Sprout** - **SproutVideo** - - **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**) + - **sr:mediathek**: Saarländischer Rundfunk - **SRGSSR** - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - **StacommuLive**: [*stacommu*](## "netrc machine") - **StacommuVOD**: [*stacommu*](## "netrc machine") - **StagePlusVODConcert**: [*stageplus*](## "netrc machine") - **stanfordoc**: Stanford Open ClassRoom - - **StarTrek**: (**Currently broken**) + - **startrek**: STAR TREK - **startv** - **Steam** - **SteamCommunityBroadcast** @@ -1422,12 +1426,11 @@ # Supported sites - **SunPorno** - **sverigesradio:episode** - **sverigesradio:publication** - - **SVT** - - **SVTPage** - - **SVTPlay**: SVT Play and Öppet arkiv - - **SVTSeries** + - **svt:page** + - **svt:play**: SVT Play and Öppet arkiv + - **svt:​play:series** - **SwearnetEpisode** - - **Syfy**: (**Currently broken**) + - **Syfy** - **SYVDK** - **SztvHu** - **t-online.de**: (**Currently broken**) @@ -1471,8 +1474,6 @@ # Supported sites - **Telewebion**: (**Currently broken**) - **Tempo** - **TennisTV**: [*tennistv*](## "netrc machine") - - **TenPlay**: [*10play*](## "netrc machine") - - **TenPlaySeason** - **TF1** - **TFO** - **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine") @@ -1510,6 +1511,7 @@ # Supported sites - **tokfm:podcast** - **ToonGoggles** - **tou.tv**: [*toutv*](## "netrc machine") + - **toutiao**: 今日头条 - **Toypics**: Toypics video (**Currently broken**) - **ToypicsUser**: Toypics user profile (**Currently broken**) - **TrailerAddict**: (**Currently broken**) @@ -1599,7 +1601,7 @@ # Supported sites - **UKTVPlay** - **UlizaPlayer** - **UlizaPortal**: ulizaportal.jp - - **umg:de**: Universal Music Deutschland (**Currently broken**) + - **umg:de**: Universal Music Deutschland - **Unistra** - **Unity**: (**Currently broken**) - **uol.com.br** @@ -1622,9 +1624,9 @@ # Supported sites - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **vh1.com** - **vhx:embed**: [*vimeo*](## "netrc machine") - - **vice** - - **vice:article** - - **vice:show** + - **vice**: (**Currently broken**) + - **vice:article**: (**Currently broken**) + - **vice:show**: (**Currently broken**) - **Viddler** - **Videa** - **video.arnes.si**: Arnes Video @@ -1656,6 +1658,7 @@ # Supported sites - **vimeo**: [*vimeo*](## "netrc machine") - **vimeo:album**: [*vimeo*](## "netrc machine") - **vimeo:channel**: [*vimeo*](## "netrc machine") + - **vimeo:event**: [*vimeo*](## "netrc machine") - **vimeo:group**: [*vimeo*](## "netrc machine") - **vimeo:likes**: [*vimeo*](## "netrc machine") Vimeo user likes - **vimeo:ondemand**: [*vimeo*](## "netrc machine") diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index c6ff6209a..e6c8d574e 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -314,6 +314,20 @@ def test_search_json_ld_realworld(self): }, {}, ), + ( + # test thumbnail_url key without URL scheme + r''' +''', + { + 'thumbnails': [{'url': 'https://www.nobelprize.org/images/12693-landscape-medium-gallery.jpg'}], + }, + {}, + ), ] for html, expected_dict, search_json_ld_kwargs in _TESTS: expect_dict( @@ -1933,6 +1947,137 @@ def test_search_nextjs_data(self): with self.assertWarns(DeprecationWarning): self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {}) + def test_search_nuxt_json(self): + HTML_TMPL = '' + VALID_DATA = ''' + ["ShallowReactive",1], + {"data":2,"state":21,"once":25,"_errors":28,"_server_errors":30}, + ["ShallowReactive",3], + {"$abcdef123456":4}, + {"podcast":5,"activeEpisodeData":7}, + {"podcast":6,"seasons":14}, + {"title":10,"id":11}, + ["Reactive",8], + {"episode":9,"creators":18,"empty_list":20}, + {"title":12,"id":13,"refs":34,"empty_refs":35}, + "Series Title", + "podcast-id-01", + "Episode Title", + "episode-id-99", + [15,16,17], + 1, + 2, + 3, + [19], + "Podcast Creator", + [], + {"$ssite-config":22}, + {"env":23,"name":24,"map":26,"numbers":14}, + "production", + "podcast-website", + ["Set"], + ["Reactive",27], + ["Map"], + ["ShallowReactive",29], + {}, + ["NuxtError",31], + {"status":32,"message":33}, + 503, + "Service Unavailable", + [36,37], + [38,39], + ["Ref",40], + ["ShallowRef",41], + ["EmptyRef",42], + ["EmptyShallowRef",43], + "ref", + "shallow_ref", + "{\\"ref\\":1}", + "{\\"shallow_ref\\":2}" + ''' + PAYLOAD = { + 'data': { + '$abcdef123456': { + 'podcast': { + 'podcast': { + 'title': 'Series Title', + 'id': 'podcast-id-01', + }, + 'seasons': [1, 2, 3], + }, + 'activeEpisodeData': { + 'episode': { + 'title': 'Episode Title', + 'id': 'episode-id-99', + 'refs': ['ref', 'shallow_ref'], + 'empty_refs': [{'ref': 1}, {'shallow_ref': 2}], + }, + 'creators': ['Podcast Creator'], + 'empty_list': [], + }, + }, + }, + 'state': { + '$ssite-config': { + 'env': 'production', + 'name': 'podcast-website', + 'map': [], + 'numbers': [1, 2, 3], + }, + }, + 'once': [], + '_errors': {}, + '_server_errors': { + 'status': 503, + 'message': 'Service Unavailable', + }, + } + PARTIALLY_INVALID = [( + ''' + {"data":1}, + {"invalid_raw_list":2}, + [15,16,17] + ''', + {'data': {'invalid_raw_list': [None, None, None]}}, + ), ( + ''' + {"data":1}, + ["EmptyRef",2], + "not valid JSON" + ''', + {'data': None}, + ), ( + ''' + {"data":1}, + ["EmptyShallowRef",2], + "not valid JSON" + ''', + {'data': None}, + )] + INVALID = [ + ''' + [] + ''', + ''' + ["unsupported",1], + {"data":2}, + {} + ''', + ] + DEFAULT = object() + + self.assertEqual(self.ie._search_nuxt_json(HTML_TMPL.format(VALID_DATA), None), PAYLOAD) + self.assertEqual(self.ie._search_nuxt_json('', None, fatal=False), {}) + self.assertIs(self.ie._search_nuxt_json('', None, default=DEFAULT), DEFAULT) + + for data, expected in PARTIALLY_INVALID: + self.assertEqual( + self.ie._search_nuxt_json(HTML_TMPL.format(data), None, fatal=False), expected) + + for data in INVALID: + self.assertIs( + self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT) + if __name__ == '__main__': unittest.main() diff --git a/test/test_cookies.py b/test/test_cookies.py index 4b9b9b5a9..f956ab187 100644 --- a/test/test_cookies.py +++ b/test/test_cookies.py @@ -58,6 +58,14 @@ def test_get_desktop_environment(self): ({'DESKTOP_SESSION': 'kde'}, _LinuxDesktopEnvironment.KDE3), ({'DESKTOP_SESSION': 'xfce'}, _LinuxDesktopEnvironment.XFCE), + ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'gnome'}, _LinuxDesktopEnvironment.GNOME), + ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'mate'}, _LinuxDesktopEnvironment.GNOME), + ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4), + ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'kde'}, _LinuxDesktopEnvironment.KDE3), + ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'xfce'}, _LinuxDesktopEnvironment.XFCE), + + ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'my_custom_de', 'GNOME_DESKTOP_SESSION_ID': 1}, _LinuxDesktopEnvironment.GNOME), + ({'GNOME_DESKTOP_SESSION_ID': 1}, _LinuxDesktopEnvironment.GNOME), ({'KDE_FULL_SESSION': 1}, _LinuxDesktopEnvironment.KDE3), ({'KDE_FULL_SESSION': 1, 'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4), diff --git a/test/test_devalue.py b/test/test_devalue.py new file mode 100644 index 000000000..29eb89e87 --- /dev/null +++ b/test/test_devalue.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import datetime as dt +import json +import math +import re +import unittest + +from yt_dlp.utils.jslib import devalue + + +TEST_CASES_EQUALS = [{ + 'name': 'int', + 'unparsed': [-42], + 'parsed': -42, +}, { + 'name': 'str', + 'unparsed': ['woo!!!'], + 'parsed': 'woo!!!', +}, { + 'name': 'Number', + 'unparsed': [['Object', 42]], + 'parsed': 42, +}, { + 'name': 'String', + 'unparsed': [['Object', 'yar']], + 'parsed': 'yar', +}, { + 'name': 'Infinity', + 'unparsed': -4, + 'parsed': math.inf, +}, { + 'name': 'negative Infinity', + 'unparsed': -5, + 'parsed': -math.inf, +}, { + 'name': 'negative zero', + 'unparsed': -6, + 'parsed': -0.0, +}, { + 'name': 'RegExp', + 'unparsed': [['RegExp', 'regexp', 'gim']], # XXX: flags are ignored + 'parsed': re.compile('regexp'), +}, { + 'name': 'Date', + 'unparsed': [['Date', '2001-09-09T01:46:40.000Z']], + 'parsed': dt.datetime.fromtimestamp(1e9, tz=dt.timezone.utc), +}, { + 'name': 'Array', + 'unparsed': [[1, 2, 3], 'a', 'b', 'c'], + 'parsed': ['a', 'b', 'c'], +}, { + 'name': 'Array (empty)', + 'unparsed': [[]], + 'parsed': [], +}, { + 'name': 'Array (sparse)', + 'unparsed': [[-2, 1, -2], 'b'], + 'parsed': [None, 'b', None], +}, { + 'name': 'Object', + 'unparsed': [{'foo': 1, 'x-y': 2}, 'bar', 'z'], + 'parsed': {'foo': 'bar', 'x-y': 'z'}, +}, { + 'name': 'Set', + 'unparsed': [['Set', 1, 2, 3], 1, 2, 3], + 'parsed': [1, 2, 3], +}, { + 'name': 'Map', + 'unparsed': [['Map', 1, 2], 'a', 'b'], + 'parsed': [['a', 'b']], +}, { + 'name': 'BigInt', + 'unparsed': [['BigInt', '1']], + 'parsed': 1, +}, { + 'name': 'Uint8Array', + 'unparsed': [['Uint8Array', 'AQID']], + 'parsed': [1, 2, 3], +}, { + 'name': 'ArrayBuffer', + 'unparsed': [['ArrayBuffer', 'AQID']], + 'parsed': [1, 2, 3], +}, { + 'name': 'str (repetition)', + 'unparsed': [[1, 1], 'a string'], + 'parsed': ['a string', 'a string'], +}, { + 'name': 'None (repetition)', + 'unparsed': [[1, 1], None], + 'parsed': [None, None], +}, { + 'name': 'dict (repetition)', + 'unparsed': [[1, 1], {}], + 'parsed': [{}, {}], +}, { + 'name': 'Object without prototype', + 'unparsed': [['null']], + 'parsed': {}, +}, { + 'name': 'cross-realm POJO', + 'unparsed': [{}], + 'parsed': {}, +}] + +TEST_CASES_IS = [{ + 'name': 'bool', + 'unparsed': [True], + 'parsed': True, +}, { + 'name': 'Boolean', + 'unparsed': [['Object', False]], + 'parsed': False, +}, { + 'name': 'undefined', + 'unparsed': -1, + 'parsed': None, +}, { + 'name': 'null', + 'unparsed': [None], + 'parsed': None, +}, { + 'name': 'NaN', + 'unparsed': -3, + 'parsed': math.nan, +}] + +TEST_CASES_INVALID = [{ + 'name': 'empty string', + 'unparsed': '', + 'error': ValueError, + 'pattern': r'expected int or list as input', +}, { + 'name': 'hole', + 'unparsed': -2, + 'error': ValueError, + 'pattern': r'invalid integer input', +}, { + 'name': 'string', + 'unparsed': 'hello', + 'error': ValueError, + 'pattern': r'expected int or list as input', +}, { + 'name': 'number', + 'unparsed': 42, + 'error': ValueError, + 'pattern': r'invalid integer input', +}, { + 'name': 'boolean', + 'unparsed': True, + 'error': ValueError, + 'pattern': r'expected int or list as input', +}, { + 'name': 'null', + 'unparsed': None, + 'error': ValueError, + 'pattern': r'expected int or list as input', +}, { + 'name': 'object', + 'unparsed': {}, + 'error': ValueError, + 'pattern': r'expected int or list as input', +}, { + 'name': 'empty array', + 'unparsed': [], + 'error': ValueError, + 'pattern': r'expected a non-empty list as input', +}, { + 'name': 'Python negative indexing', + 'unparsed': [[1, 2, 3, 4, 5, 6, 7, -7], 1, 2, 3, 4, 5, 6, 7], + 'error': IndexError, + 'pattern': r'invalid index: -7', +}] + + +class TestDevalue(unittest.TestCase): + def test_devalue_parse_equals(self): + for tc in TEST_CASES_EQUALS: + self.assertEqual(devalue.parse(tc['unparsed']), tc['parsed'], tc['name']) + + def test_devalue_parse_is(self): + for tc in TEST_CASES_IS: + self.assertIs(devalue.parse(tc['unparsed']), tc['parsed'], tc['name']) + + def test_devalue_parse_invalid(self): + for tc in TEST_CASES_INVALID: + with self.assertRaisesRegex(tc['error'], tc['pattern'], msg=tc['name']): + devalue.parse(tc['unparsed']) + + def test_devalue_parse_cyclical(self): + name = 'Map (cyclical)' + result = devalue.parse([['Map', 1, 0], 'self']) + self.assertEqual(result[0][0], 'self', name) + self.assertIs(result, result[0][1], name) + + name = 'Set (cyclical)' + result = devalue.parse([['Set', 0, 1], 42]) + self.assertEqual(result[1], 42, name) + self.assertIs(result, result[0], name) + + result = devalue.parse([[0]]) + self.assertIs(result, result[0], 'Array (cyclical)') + + name = 'Object (cyclical)' + result = devalue.parse([{'self': 0}]) + self.assertIs(result, result['self'], name) + + name = 'Object with null prototype (cyclical)' + result = devalue.parse([['null', 'self', 0]]) + self.assertIs(result, result['self'], name) + + name = 'Objects (cyclical)' + result = devalue.parse([[1, 2], {'second': 2}, {'first': 1}]) + self.assertIs(result[0], result[1]['first'], name) + self.assertIs(result[1], result[0]['second'], name) + + def test_devalue_parse_revivers(self): + self.assertEqual( + devalue.parse([['indirect', 1], {'a': 2}, 'b'], revivers={'indirect': lambda x: x}), + {'a': 'b'}, 'revivers (indirect)') + + self.assertEqual( + devalue.parse([['parse', 1], '{"a":0}'], revivers={'parse': lambda x: json.loads(x)}), + {'a': 0}, 'revivers (parse)') + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index b14069ccc..2e3cdc2a5 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -478,6 +478,14 @@ def test_extract_function_with_global_stack(self): func = jsi.extract_function('c', {'e': 10}, {'f': 100, 'g': 1000}) self.assertEqual(func([1]), 1111) + def test_increment_decrement(self): + self._test('function f() { var x = 1; return ++x; }', 2) + self._test('function f() { var x = 1; return x++; }', 1) + self._test('function f() { var x = 1; x--; return x }', 0) + self._test('function f() { var y; var x = 1; x++, --x, x--, x--, y="z", "abc", x++; return --x }', -1) + self._test('function f() { var a = "test--"; return a; }', 'test--') + self._test('function f() { var b = 1; var a = "b--"; return a; }', 'b--') + if __name__ == '__main__': unittest.main() diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index 603f85c65..ecc73e39e 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -8,6 +8,8 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import subprocess + from yt_dlp import YoutubeDL from yt_dlp.utils import shell_quote from yt_dlp.postprocessor import ( @@ -47,7 +49,18 @@ def test_escaping(self): print('Skipping: ffmpeg not found') return - file = 'test/testdata/thumbnails/foo %d bar/foo_%d.{}' + test_data_dir = 'test/testdata/thumbnails' + generated_file = f'{test_data_dir}/empty.webp' + + subprocess.check_call([ + pp.executable, '-y', '-f', 'lavfi', '-i', 'color=c=black:s=320x320', + '-c:v', 'libwebp', '-pix_fmt', 'yuv420p', '-vframes', '1', generated_file, + ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + file = test_data_dir + '/foo %d bar/foo_%d.{}' + initial_file = file.format('webp') + os.replace(generated_file, initial_file) + tests = (('webp', 'png'), ('png', 'jpg')) for inp, out in tests: @@ -55,11 +68,13 @@ def test_escaping(self): if os.path.exists(out_file): os.remove(out_file) pp.convert_thumbnail(file.format(inp), out) - assert os.path.exists(out_file) + self.assertTrue(os.path.exists(out_file)) for _, out in tests: os.remove(file.format(out)) + os.remove(initial_file) + class TestExec(unittest.TestCase): def test_parse_cmd(self): @@ -610,3 +625,7 @@ def test_quote_for_concat_QuotesAtEnd(self): self.assertEqual( r"'special '\'' characters '\'' galore'\'\'\'", self._pp._quote_for_ffmpeg("special ' characters ' galore'''")) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_pot/test_pot_builtin_utils.py b/test/test_pot/test_pot_builtin_utils.py index 1682e42a1..7645ba601 100644 --- a/test/test_pot/test_pot_builtin_utils.py +++ b/test/test_pot/test_pot_builtin_utils.py @@ -11,10 +11,11 @@ class TestGetWebPoContentBinding: @pytest.mark.parametrize('client_name, context, is_authenticated, expected', [ *[(client, context, is_authenticated, expected) for client in [ - 'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER'] + 'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'TVHTML5_SIMPLY'] for context, is_authenticated, expected in [ (PoTokenContext.GVS, False, ('example-visitor-data', ContentBindingType.VISITOR_DATA)), (PoTokenContext.PLAYER, False, ('example-video-id', ContentBindingType.VIDEO_ID)), + (PoTokenContext.SUBS, False, ('example-video-id', ContentBindingType.VIDEO_ID)), (PoTokenContext.GVS, True, ('example-data-sync-id', ContentBindingType.DATASYNC_ID)), ]], ('WEB_REMIX', PoTokenContext.GVS, False, ('example-visitor-data', ContentBindingType.VISITOR_DATA)), diff --git a/test/test_pot/test_pot_builtin_webpospec.py b/test/test_pot/test_pot_builtin_webpospec.py index c5fb6f382..078008415 100644 --- a/test/test_pot/test_pot_builtin_webpospec.py +++ b/test/test_pot/test_pot_builtin_webpospec.py @@ -49,7 +49,7 @@ def test_not_supports(self, ie, logger, pot_request, client_name, context, is_au @pytest.mark.parametrize('client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected', [ *[(client, context, is_authenticated, remote_host, source_address, request_proxy, expected) for client in [ - 'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER'] + 'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'TVHTML5_SIMPLY'] for context, is_authenticated, remote_host, source_address, request_proxy, expected in [ (PoTokenContext.GVS, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}), (PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'video_id'}), diff --git a/test/test_traversal.py b/test/test_traversal.py index bc433029d..52215f5a7 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -416,18 +416,8 @@ def test_traversal_unbranching(self): '`any` should allow further branching' def test_traversal_morsel(self): - values = { - 'expires': 'a', - 'path': 'b', - 'comment': 'c', - 'domain': 'd', - 'max-age': 'e', - 'secure': 'f', - 'httponly': 'g', - 'version': 'h', - 'samesite': 'i', - } morsel = http.cookies.Morsel() + values = dict(zip(morsel, 'abcdefghijklmnop')) morsel.set('item_key', 'item_value', 'coded_value') morsel.update(values) values['key'] = 'item_key' diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 0f0885366..3336b6bff 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -316,6 +316,18 @@ 'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js', 'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE', ), + ( + 'https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js', + 'D3XWVpYgwhLLKNK4AGX', 'aZrQ1qWJ5yv5h', + ), + ( + 'https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js', + 'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u', + ), + ( + 'https://www.youtube.com/s/player/fc2a56a5/tv-player-ias.vflset/tv-player-ias.js', + 'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u', + ), ] diff --git a/test/testdata/thumbnails/foo %d bar/foo_%d.webp b/test/testdata/thumbnails/foo %d bar/foo_%d.webp deleted file mode 100644 index d64d0839f..000000000 Binary files a/test/testdata/thumbnails/foo %d bar/foo_%d.webp and /dev/null differ diff --git a/test/testdata/thumbnails/foo %d bar/placeholder b/test/testdata/thumbnails/foo %d bar/placeholder new file mode 100644 index 000000000..e69de29bb diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index ea6264a0d..67ca90349 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -490,7 +490,7 @@ class YoutubeDL: The template is mapped on a dictionary with keys 'progress' and 'info' retry_sleep_functions: Dictionary of functions that takes the number of attempts as argument and returns the time to sleep in seconds. - Allowed keys are 'http', 'fragment', 'file_access' + Allowed keys are 'http', 'fragment', 'file_access', 'extractor' download_ranges: A callback function that gets called for every video with the signature (info_dict, ydl) -> Iterable[Section]. Only the returned sections will be downloaded. @@ -2219,6 +2219,7 @@ def _check_formats(self, formats): self.report_warning(f'Unable to delete temporary file "{temp_file.name}"') f['__working'] = success if success: + f.pop('__needs_testing', None) yield f else: self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id'])) @@ -3963,6 +3964,7 @@ def simplified_codec(f, field): self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None, (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe' else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None), + self._format_out('Untested', self.Styles.WARNING) if f.get('__needs_testing') else None, format_field(f, 'format_note'), format_field(f, 'container', ignore=(None, f.get('ext'))), delim=', '), delim=' '), diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index fad323c90..5675445ac 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -764,11 +764,11 @@ def _get_linux_desktop_environment(env, logger): GetDesktopEnvironment """ xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None) - desktop_session = env.get('DESKTOP_SESSION', None) + desktop_session = env.get('DESKTOP_SESSION', '') if xdg_current_desktop is not None: for part in map(str.strip, xdg_current_desktop.split(':')): if part == 'Unity': - if desktop_session is not None and 'gnome-fallback' in desktop_session: + if 'gnome-fallback' in desktop_session: return _LinuxDesktopEnvironment.GNOME else: return _LinuxDesktopEnvironment.UNITY @@ -797,35 +797,34 @@ def _get_linux_desktop_environment(env, logger): return _LinuxDesktopEnvironment.UKUI elif part == 'LXQt': return _LinuxDesktopEnvironment.LXQT - logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"') + logger.debug(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"') - elif desktop_session is not None: - if desktop_session == 'deepin': - return _LinuxDesktopEnvironment.DEEPIN - elif desktop_session in ('mate', 'gnome'): - return _LinuxDesktopEnvironment.GNOME - elif desktop_session in ('kde4', 'kde-plasma'): + if desktop_session == 'deepin': + return _LinuxDesktopEnvironment.DEEPIN + elif desktop_session in ('mate', 'gnome'): + return _LinuxDesktopEnvironment.GNOME + elif desktop_session in ('kde4', 'kde-plasma'): + return _LinuxDesktopEnvironment.KDE4 + elif desktop_session == 'kde': + if 'KDE_SESSION_VERSION' in env: return _LinuxDesktopEnvironment.KDE4 - elif desktop_session == 'kde': - if 'KDE_SESSION_VERSION' in env: - return _LinuxDesktopEnvironment.KDE4 - else: - return _LinuxDesktopEnvironment.KDE3 - elif 'xfce' in desktop_session or desktop_session == 'xubuntu': - return _LinuxDesktopEnvironment.XFCE - elif desktop_session == 'ukui': - return _LinuxDesktopEnvironment.UKUI else: - logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"') - + return _LinuxDesktopEnvironment.KDE3 + elif 'xfce' in desktop_session or desktop_session == 'xubuntu': + return _LinuxDesktopEnvironment.XFCE + elif desktop_session == 'ukui': + return _LinuxDesktopEnvironment.UKUI else: - if 'GNOME_DESKTOP_SESSION_ID' in env: - return _LinuxDesktopEnvironment.GNOME - elif 'KDE_FULL_SESSION' in env: - if 'KDE_SESSION_VERSION' in env: - return _LinuxDesktopEnvironment.KDE4 - else: - return _LinuxDesktopEnvironment.KDE3 + logger.debug(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"') + + if 'GNOME_DESKTOP_SESSION_ID' in env: + return _LinuxDesktopEnvironment.GNOME + elif 'KDE_FULL_SESSION' in env: + if 'KDE_SESSION_VERSION' in env: + return _LinuxDesktopEnvironment.KDE4 + else: + return _LinuxDesktopEnvironment.KDE3 + return _LinuxDesktopEnvironment.OTHER diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index 33cf15df8..35a12b555 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -5,47 +5,46 @@ from .common import FileDownloader from .external import FFmpegFD from ..networking import Request -from ..utils import DownloadError, str_or_none, try_get +from ..networking.websocket import WebSocketResponse +from ..utils import DownloadError, str_or_none, truncate_string +from ..utils.traversal import traverse_obj class NiconicoLiveFD(FileDownloader): """ Downloads niconico live without being stopped """ def real_download(self, filename, info_dict): - video_id = info_dict['video_id'] - ws_url = info_dict['url'] - ws_extractor = info_dict['ws'] - ws_origin_host = info_dict['origin'] - live_quality = info_dict.get('live_quality', 'high') - live_latency = info_dict.get('live_latency', 'high') + video_id = info_dict['id'] + opts = info_dict['downloader_options'] + quality, ws_extractor, ws_url = opts['max_quality'], opts['ws'], opts['ws_url'] dl = FFmpegFD(self.ydl, self.params or {}) new_info_dict = info_dict.copy() - new_info_dict.update({ - 'protocol': 'm3u8', - }) + new_info_dict['protocol'] = 'm3u8' def communicate_ws(reconnect): - if reconnect: - ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'})) + # Support --load-info-json as if it is a reconnect attempt + if reconnect or not isinstance(ws_extractor, WebSocketResponse): + ws = self.ydl.urlopen(Request( + ws_url, headers={'Origin': 'https://live.nicovideo.jp'})) if self.ydl.params.get('verbose', False): - self.to_screen('[debug] Sending startWatching request') + self.write_debug('Sending startWatching request') ws.send(json.dumps({ - 'type': 'startWatching', 'data': { + 'reconnect': True, + 'room': { + 'commentable': True, + 'protocol': 'webSocket', + }, 'stream': { - 'quality': live_quality, - 'protocol': 'hls+fmp4', - 'latency': live_latency, 'accessRightMethod': 'single_cookie', 'chasePlay': False, + 'latency': 'high', + 'protocol': 'hls', + 'quality': quality, }, - 'room': { - 'protocol': 'webSocket', - 'commentable': True, - }, - 'reconnect': True, }, + 'type': 'startWatching', })) else: ws = ws_extractor @@ -58,7 +57,6 @@ def communicate_ws(reconnect): if not data or not isinstance(data, dict): continue if data.get('type') == 'ping': - # pong back ws.send(r'{"type":"pong"}') ws.send(r'{"type":"keepSeat"}') elif data.get('type') == 'disconnect': @@ -66,12 +64,10 @@ def communicate_ws(reconnect): return True elif data.get('type') == 'error': self.write_debug(data) - message = try_get(data, lambda x: x['body']['code'], str) or recv + message = traverse_obj(data, ('body', 'code', {str_or_none}), default=recv) return DownloadError(message) elif self.ydl.params.get('verbose', False): - if len(recv) > 100: - recv = recv[:100] + '...' - self.to_screen(f'[debug] Server said: {recv}') + self.write_debug(f'Server response: {truncate_string(recv, 100)}') def ws_main(): reconnect = False @@ -81,7 +77,8 @@ def ws_main(): if ret is True: return except BaseException as e: - self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e))) + self.to_screen( + f'[niconico:live] {video_id}: Connection error occured, reconnecting after 10 seconds: {e}') time.sleep(10) continue finally: diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 14a006893..fbbd9571f 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -300,7 +300,6 @@ BrainPOPIlIE, BrainPOPJrIE, ) -from .bravotv import BravoTVIE from .breitbart import BreitBartIE from .brightcove import ( BrightcoveLegacyIE, @@ -1108,6 +1107,7 @@ from .massengeschmacktv import MassengeschmackTVIE from .masters import MastersIE from .matchtv import MatchTVIE +from .mave import MaveIE from .mbn import MBNIE from .mdr import MDRIE from .medaltv import MedalTVIE @@ -1262,6 +1262,7 @@ ) from .nbc import ( NBCIE, + BravoTVIE, NBCNewsIE, NBCOlympicsIE, NBCOlympicsStreamIE, @@ -1269,6 +1270,7 @@ NBCSportsStreamIE, NBCSportsVPlayerIE, NBCStationsIE, + SyfyIE, ) from .ndr import ( NDRIE, @@ -2016,13 +2018,11 @@ SverigesRadioPublicationIE, ) from .svt import ( - SVTIE, SVTPageIE, SVTPlayIE, SVTSeriesIE, ) from .swearnet import SwearnetEpisodeIE -from .syfy import SyfyIE from .syvdk import SYVDKIE from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE @@ -2147,6 +2147,7 @@ from .toggo import ToggoIE from .tonline import TOnlineIE from .toongoggles import ToonGogglesIE +from .toutiao import ToutiaoIE from .toutv import TouTvIE from .toypics import ( ToypicsIE, diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index f1b877927..8c2d9d934 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -3,6 +3,7 @@ import re import time import urllib.parse +import uuid import xml.etree.ElementTree as etree from .common import InfoExtractor @@ -10,6 +11,7 @@ from ..utils import ( NO_DEFAULT, ExtractorError, + parse_qs, unescapeHTML, unified_timestamp, urlencode_postdata, @@ -45,6 +47,8 @@ 'name': 'Comcast XFINITY', 'username_field': 'user', 'password_field': 'passwd', + 'login_hostname': 'login.xfinity.com', + 'needs_newer_ua': True, }, 'TWC': { 'name': 'Time Warner Cable | Spectrum', @@ -74,6 +78,12 @@ 'name': 'Verizon FiOS', 'username_field': 'IDToken1', 'password_field': 'IDToken2', + 'login_hostname': 'ssoauth.verizon.com', + }, + 'Fubo': { + 'name': 'Fubo', + 'username_field': 'username', + 'password_field': 'password', }, 'Cablevision': { 'name': 'Optimum/Cablevision', @@ -1338,6 +1348,7 @@ 'name': 'Sling TV', 'username_field': 'username', 'password_field': 'password', + 'login_hostname': 'identity.sling.com', }, 'Suddenlink': { 'name': 'Suddenlink', @@ -1355,7 +1366,6 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' - _MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0' _MVPD_CACHE = 'ap-mvpd' _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page' @@ -1367,6 +1377,14 @@ def _download_webpage_handle(self, *args, **kwargs): return super()._download_webpage_handle( *args, **kwargs) + @staticmethod + def _get_mso_headers(mso_info): + # yt-dlp's default user-agent is usually too old for some MSO's like Comcast_SSO + # See: https://github.com/yt-dlp/yt-dlp/issues/10848 + return { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0', + } if mso_info.get('needs_newer_ua') else {} + @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): channel = etree.Element('channel') @@ -1382,7 +1400,13 @@ def _get_mvpd_resource(provider_id, title, guid, rating): resource_rating.text = rating return '' + etree.tostring(channel).decode() + '' - def _extract_mvpd_auth(self, url, video_id, requestor_id, resource): + def _extract_mvpd_auth(self, url, video_id, requestor_id, resource, software_statement): + mso_id = self.get_param('ap_mso') + if mso_id: + mso_info = MSO_INFO[mso_id] + else: + mso_info = {} + def xml_text(xml_str, tag): return self._search_regex( f'<{tag}>(.+?)', xml_str, tag) @@ -1391,15 +1415,27 @@ def is_expired(token, date_ele): token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) return token_expires and token_expires <= int(time.time()) - def post_form(form_page_res, note, data={}): + def post_form(form_page_res, note, data={}, validate_url=False): form_page, urlh = form_page_res post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') if not re.match(r'https?://', post_url): post_url = urllib.parse.urljoin(urlh.url, post_url) + if validate_url: + # This request is submitting credentials so we should validate it when possible + url_parsed = urllib.parse.urlparse(post_url) + expected_hostname = mso_info.get('login_hostname') + if expected_hostname and expected_hostname != url_parsed.hostname: + raise ExtractorError( + f'Unexpected login URL hostname; expected "{expected_hostname}" but got ' + f'"{url_parsed.hostname}". Aborting before submitting credentials') + if url_parsed.scheme != 'https': + self.write_debug('Upgrading login URL scheme to https') + post_url = urllib.parse.urlunparse(url_parsed._replace(scheme='https')) form_data = self._hidden_inputs(form_page) form_data.update(data) return self._download_webpage_handle( post_url, video_id, note, data=urlencode_postdata(form_data), headers={ + **self._get_mso_headers(mso_info), 'Content-Type': 'application/x-www-form-urlencoded', }) @@ -1432,40 +1468,72 @@ def extract_redirect_url(html, url=None, fatal=False): } guid = xml_text(resource, 'guid') if '<' in resource else resource - count = 0 - while count < 2: + for _ in range(2): requestor_info = self.cache.load(self._MVPD_CACHE, requestor_id) or {} authn_token = requestor_info.get('authn_token') if authn_token and is_expired(authn_token, 'simpleTokenExpires'): authn_token = None if not authn_token: - mso_id = self.get_param('ap_mso') - if mso_id: - username, password = self._get_login_info('ap_username', 'ap_password', mso_id) - if not username or not password: - raise_mvpd_required() - mso_info = MSO_INFO[mso_id] - - provider_redirect_page_res = self._download_webpage_handle( - self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, - 'Downloading Provider Redirect Page', query={ - 'noflash': 'true', - 'mso_id': mso_id, - 'requestor_id': requestor_id, - 'no_iframe': 'false', - 'domain_name': 'adobe.com', - 'redirect_url': url, - }, headers={ - # yt-dlp's default user-agent is usually too old for Comcast_SSO - # See: https://github.com/yt-dlp/yt-dlp/issues/10848 - 'User-Agent': self._MODERN_USER_AGENT, - } if mso_id == 'Comcast_SSO' else None) - elif not self._cookies_passed: + if not mso_id: + raise_mvpd_required() + username, password = self._get_login_info('ap_username', 'ap_password', mso_id) + if not username or not password: raise_mvpd_required() - if not mso_id: - pass - elif mso_id == 'Comcast_SSO': + device_info, urlh = self._download_json_handle( + 'https://sp.auth.adobe.com/indiv/devices', + video_id, 'Registering device with Adobe', + data=json.dumps({'fingerprint': uuid.uuid4().hex}).encode(), + headers={'Content-Type': 'application/json; charset=UTF-8'}) + + device_id = device_info['deviceId'] + mvpd_headers['pass_sfp'] = urlh.get_header('pass_sfp') + mvpd_headers['Ap_21'] = device_id + + registration = self._download_json( + 'https://sp.auth.adobe.com/o/client/register', + video_id, 'Registering client with Adobe', + data=json.dumps({'software_statement': software_statement}).encode(), + headers={'Content-Type': 'application/json; charset=UTF-8'}) + + access_token = self._download_json( + 'https://sp.auth.adobe.com/o/client/token', video_id, + 'Obtaining access token', data=urlencode_postdata({ + 'grant_type': 'client_credentials', + 'client_id': registration['client_id'], + 'client_secret': registration['client_secret'], + }), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + })['access_token'] + mvpd_headers['Authorization'] = f'Bearer {access_token}' + + reg_code = self._download_json( + f'https://sp.auth.adobe.com/reggie/v1/{requestor_id}/regcode', + video_id, 'Obtaining registration code', + data=urlencode_postdata({ + 'requestor': requestor_id, + 'deviceId': device_id, + 'format': 'json', + }), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'Authorization': f'Bearer {access_token}', + })['code'] + + provider_redirect_page_res = self._download_webpage_handle( + self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, + 'Downloading Provider Redirect Page', query={ + 'noflash': 'true', + 'mso_id': mso_id, + 'requestor_id': requestor_id, + 'no_iframe': 'false', + 'domain_name': 'adobe.com', + 'redirect_url': url, + 'reg_code': reg_code, + }, headers=self._get_mso_headers(mso_info)) + + if mso_id == 'Comcast_SSO': # Comcast page flow varies by video site and whether you # are on Comcast's network. provider_redirect_page, urlh = provider_redirect_page_res @@ -1489,8 +1557,8 @@ def extract_redirect_url(html, url=None, fatal=False): oauth_redirect_url = extract_redirect_url( provider_redirect_page, fatal=True) provider_login_page_res = self._download_webpage_handle( - oauth_redirect_url, video_id, - self._DOWNLOADING_LOGIN_PAGE) + oauth_redirect_url, video_id, self._DOWNLOADING_LOGIN_PAGE, + headers=self._get_mso_headers(mso_info)) else: provider_login_page_res = post_form( provider_redirect_page_res, @@ -1500,24 +1568,35 @@ def extract_redirect_url(html, url=None, fatal=False): provider_login_page_res, 'Logging in', { mso_info['username_field']: username, mso_info['password_field']: password, - }) + }, validate_url=True) mvpd_confirm_page, urlh = mvpd_confirm_page_res if '' in mvpd_confirm_page: post_form(mvpd_confirm_page_res, 'Confirming Login') elif mso_id == 'Philo': # Philo has very unique authentication method - self._download_webpage( - 'https://idp.philo.com/auth/init/login_code', video_id, 'Requesting auth code', data=urlencode_postdata({ + self._request_webpage( + 'https://idp.philo.com/auth/init/login_code', video_id, + 'Requesting Philo auth code', data=json.dumps({ 'ident': username, 'device': 'web', 'send_confirm_link': False, 'send_token': True, - })) + 'device_ident': f'web-{uuid.uuid4().hex}', + 'include_login_link': True, + }).encode(), headers={ + 'Content-Type': 'application/json', + 'Accept': 'application/json', + }) + philo_code = getpass.getpass('Type auth code you have received [Return]: ') - self._download_webpage( - 'https://idp.philo.com/auth/update/login_code', video_id, 'Submitting token', data=urlencode_postdata({ - 'token': philo_code, - })) + self._request_webpage( + 'https://idp.philo.com/auth/update/login_code', video_id, + 'Submitting token', data=json.dumps({'token': philo_code}).encode(), + headers={ + 'Content-Type': 'application/json', + 'Accept': 'application/json', + }) + mvpd_confirm_page_res = self._download_webpage_handle('https://idp.philo.com/idp/submit', video_id, 'Confirming Philo Login') post_form(mvpd_confirm_page_res, 'Confirming Login') elif mso_id == 'Verizon': @@ -1539,7 +1618,7 @@ def extract_redirect_url(html, url=None, fatal=False): provider_redirect_page_res, 'Logging in', { mso_info['username_field']: username, mso_info['password_field']: password, - }) + }, validate_url=True) saml_login_page, urlh = saml_login_page_res if 'Please try again.' in saml_login_page: raise ExtractorError( @@ -1560,7 +1639,7 @@ def extract_redirect_url(html, url=None, fatal=False): [saml_login_page, saml_redirect_url], 'Logging in', { mso_info['username_field']: username, mso_info['password_field']: password, - }) + }, validate_url=True) if 'Please try again.' in saml_login_page: raise ExtractorError( 'Failed to login, incorrect User ID or Password.') @@ -1631,7 +1710,7 @@ def extract_redirect_url(html, url=None, fatal=False): provider_login_page_res, 'Logging in', { mso_info['username_field']: username, mso_info['password_field']: password, - }) + }, validate_url=True) provider_refresh_redirect_url = extract_redirect_url( provider_association_redirect, url=urlh.url) @@ -1682,7 +1761,7 @@ def extract_redirect_url(html, url=None, fatal=False): provider_login_page_res, 'Logging in', { mso_info['username_field']: username, mso_info['password_field']: password, - }) + }, validate_url=True) provider_refresh_redirect_url = extract_redirect_url( provider_association_redirect, url=urlh.url) @@ -1699,6 +1778,27 @@ def extract_redirect_url(html, url=None, fatal=False): query=hidden_data) post_form(mvpd_confirm_page_res, 'Confirming Login') + elif mso_id == 'Fubo': + _, urlh = provider_redirect_page_res + + fubo_response = self._download_json( + 'https://api.fubo.tv/partners/tve/connect', video_id, + 'Authenticating with Fubo', 'Unable to authenticate with Fubo', + query=parse_qs(urlh.url), data=json.dumps({ + 'username': username, + 'password': password, + }).encode(), headers={ + 'Accept': 'application/json', + 'Content-Type': 'application/json', + }) + + self._request_webpage( + 'https://sp.auth.adobe.com/adobe-services/oauth2', video_id, + 'Authenticating with Adobe', 'Failed to authenticate with Adobe', + query={ + 'code': fubo_response['code'], + 'state': fubo_response['state'], + }) else: # Some providers (e.g. DIRECTV NOW) have another meta refresh # based redirect that should be followed. @@ -1717,7 +1817,8 @@ def extract_redirect_url(html, url=None, fatal=False): } if mso_id in ('Cablevision', 'AlticeOne'): form_data['_eventId_proceed'] = '' - mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', form_data) + mvpd_confirm_page_res = post_form( + provider_login_page_res, 'Logging in', form_data, validate_url=True) if mso_id != 'Rogers': post_form(mvpd_confirm_page_res, 'Confirming Login') @@ -1727,6 +1828,7 @@ def extract_redirect_url(html, url=None, fatal=False): 'Retrieving Session', data=urlencode_postdata({ '_method': 'GET', 'requestor_id': requestor_id, + 'reg_code': reg_code, }), headers=mvpd_headers) except ExtractorError as e: if not mso_id and isinstance(e.cause, HTTPError) and e.cause.status == 401: @@ -1734,7 +1836,6 @@ def extract_redirect_url(html, url=None, fatal=False): raise if 'bravotv|oxygen)\.com/(?:[^/]+/)+(?P[^/?#]+)' - _TESTS = [{ - 'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is', - 'info_dict': { - 'id': '3923059', - 'ext': 'mp4', - 'title': 'The Top Chef Season 16 Winner Is...', - 'description': 'Find out who takes the title of Top Chef!', - 'upload_date': '20190314', - 'timestamp': 1552591860, - 'season_number': 16, - 'episode_number': 15, - 'series': 'Top Chef', - 'episode': 'The Top Chef Season 16 Winner Is...', - 'duration': 190.357, - 'season': 'Season 16', - 'thumbnail': r're:^https://.+\.jpg', - }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'https://www.bravotv.com/top-chef/season-20/episode-1/london-calling', - 'info_dict': { - 'id': '9000234570', - 'ext': 'mp4', - 'title': 'London Calling', - 'description': 'md5:5af95a8cbac1856bd10e7562f86bb759', - 'upload_date': '20230310', - 'timestamp': 1678410000, - 'season_number': 20, - 'episode_number': 1, - 'series': 'Top Chef', - 'episode': 'London Calling', - 'duration': 3266.03, - 'season': 'Season 20', - 'chapters': 'count:7', - 'thumbnail': r're:^https://.+\.jpg', - 'age_limit': 14, - }, - 'params': {'skip_download': 'm3u8'}, - 'skip': 'This video requires AdobePass MSO credentials', - }, { - 'url': 'https://www.oxygen.com/in-ice-cold-blood/season-1/closing-night', - 'info_dict': { - 'id': '3692045', - 'ext': 'mp4', - 'title': 'Closing Night', - 'description': 'md5:3170065c5c2f19548d72a4cbc254af63', - 'upload_date': '20180401', - 'timestamp': 1522623600, - 'season_number': 1, - 'episode_number': 1, - 'series': 'In Ice Cold Blood', - 'episode': 'Closing Night', - 'duration': 2629.051, - 'season': 'Season 1', - 'chapters': 'count:6', - 'thumbnail': r're:^https://.+\.jpg', - 'age_limit': 14, - }, - 'params': {'skip_download': 'm3u8'}, - 'skip': 'This video requires AdobePass MSO credentials', - }, { - 'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2', - 'info_dict': { - 'id': '3974019', - 'ext': 'mp4', - 'title': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)', - 'description': 'md5:f9d638dd6946a1c1c0533a9c6100eae5', - 'upload_date': '20190617', - 'timestamp': 1560790800, - 'season_number': 2, - 'episode_number': 16, - 'series': 'In Ice Cold Blood', - 'episode': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)', - 'duration': 68.235, - 'season': 'Season 2', - 'thumbnail': r're:^https://.+\.jpg', - 'age_limit': 14, - }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'https://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1', - 'only_matching': True, - }] - - def _real_extract(self, url): - site, display_id = self._match_valid_url(url).group('site', 'id') - webpage = self._download_webpage(url, display_id) - settings = self._search_json( - r']+data-drupal-selector="drupal-settings-json"[^>]*>', webpage, 'settings', display_id) - tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '') - query = { - 'manifest': 'm3u', - 'formats': 'm3u,mpeg4', - } - - if tve: - account_pid = tve.get('data-mpx-media-account-pid') or 'HNK2IC' - account_id = tve['data-mpx-media-account-id'] - metadata = self._parse_json( - tve.get('data-normalized-video', ''), display_id, fatal=False, transform_source=unescapeHTML) - video_id = tve.get('data-guid') or metadata['guid'] - if tve.get('data-entitlement') == 'auth': - auth = traverse_obj(settings, ('tve_adobe_auth', {dict})) or {} - site = remove_end(site, 'tv') - release_pid = tve['data-release-pid'] - resource = self._get_mvpd_resource( - tve.get('data-adobe-pass-resource-id') or auth.get('adobePassResourceId') or site, - tve['data-title'], release_pid, tve.get('data-rating')) - query.update({ - 'switch': 'HLSServiceSecure', - 'auth': self._extract_mvpd_auth( - url, release_pid, auth.get('adobePassRequestorId') or site, resource), - }) - - else: - ls_playlist = traverse_obj(settings, ('ls_playlist', ..., {dict}), get_all=False) or {} - account_pid = ls_playlist.get('mpxMediaAccountPid') or 'PHSl-B' - account_id = ls_playlist['mpxMediaAccountId'] - video_id = ls_playlist['defaultGuid'] - metadata = traverse_obj( - ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, {dict}), get_all=False) - - tp_url = f'https://link.theplatform.com/s/{account_pid}/media/guid/{account_id}/{video_id}' - tp_metadata = self._download_json( - update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False) - - chapters = traverse_obj(tp_metadata, ('chapters', ..., { - 'start_time': ('startTime', {float_or_none(scale=1000)}), - 'end_time': ('endTime', {float_or_none(scale=1000)}), - })) - # prune pointless single chapters that span the entire duration from short videos - if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')): - chapters = None - - m3u8_url = self._request_webpage(HEADRequest( - update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').url - if 'mpeg_cenc' in m3u8_url: - self.report_drm(video_id) - formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls') - - return { - 'id': video_id, - 'formats': formats, - 'subtitles': subtitles, - 'chapters': chapters, - **merge_dicts(traverse_obj(tp_metadata, { - 'title': 'title', - 'description': 'description', - 'duration': ('duration', {float_or_none(scale=1000)}), - 'timestamp': ('pubDate', {float_or_none(scale=1000)}), - 'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}), - 'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}), - 'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}), - 'episode': (('title', 'pl1$episodeNumber', 'nbcu$episodeNumber'), {str_or_none}), - 'age_limit': ('ratings', ..., 'rating', {parse_age_limit}), - }, get_all=False), traverse_obj(metadata, { - 'title': 'title', - 'description': 'description', - 'duration': ('durationInSeconds', {int_or_none}), - 'timestamp': ('airDate', {unified_timestamp}), - 'thumbnail': ('thumbnailUrl', {url_or_none}), - 'season_number': ('seasonNumber', {int_or_none}), - 'episode_number': ('episodeNumber', {int_or_none}), - 'episode': 'episodeTitle', - 'series': 'show', - })), - } diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 3ada1fd5d..c0f2f8b57 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -495,8 +495,6 @@ def _real_extract(self, url): class BrightcoveNewBaseIE(AdobePassIE): def _parse_brightcove_metadata(self, json_data, video_id, headers={}): - title = json_data['name'].strip() - formats, subtitles = [], {} sources = json_data.get('sources') or [] for source in sources: @@ -600,16 +598,18 @@ def build_format_id(kind): return { 'id': video_id, - 'title': title, - 'description': clean_html(json_data.get('description')), 'thumbnails': thumbnails, 'duration': duration, - 'timestamp': parse_iso8601(json_data.get('published_at')), - 'uploader_id': json_data.get('account_id'), 'formats': formats, 'subtitles': subtitles, - 'tags': json_data.get('tags', []), 'is_live': is_live, + **traverse_obj(json_data, { + 'title': ('name', {clean_html}), + 'description': ('description', {clean_html}), + 'tags': ('tags', ..., {str}, filter, all, filter), + 'timestamp': ('published_at', {parse_iso8601}), + 'uploader_id': ('account_id', {str}), + }), } @@ -645,10 +645,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): 'uploader_id': '4036320279001', 'formats': 'mincount:39', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'skip': '404 Not Found', }, { # playlist stream 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001', @@ -709,7 +706,6 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): 'ext': 'mp4', 'title': 'TGD_01-032_5', 'thumbnail': r're:^https?://.*\.jpg$', - 'tags': [], 'timestamp': 1646078943, 'uploader_id': '1569565978001', 'upload_date': '20220228', @@ -721,7 +717,6 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): 'ext': 'mp4', 'title': 'TGD 01-087 (Airs 05.25.22)_Segment 5', 'thumbnail': r're:^https?://.*\.jpg$', - 'tags': [], 'timestamp': 1651604591, 'uploader_id': '1569565978001', 'upload_date': '20220503', @@ -923,10 +918,18 @@ def extract_policy_key(): errors = json_data.get('errors') if errors and errors[0].get('error_subcode') == 'TVE_AUTH': custom_fields = json_data['custom_fields'] + missing_fields = ', '.join( + key for key in ('source_url', 'software_statement') if not smuggled_data.get(key)) + if missing_fields: + raise ExtractorError( + f'Missing fields in smuggled data: {missing_fields}. ' + f'This video can be only extracted from the webpage where it is embedded. ' + f'Pass the URL of the embedding webpage instead of the Brightcove URL', expected=True) tve_token = self._extract_mvpd_auth( smuggled_data['source_url'], video_id, custom_fields['bcadobepassrequestorid'], - custom_fields['bcadobepassresourceid']) + custom_fields['bcadobepassresourceid'], + smuggled_data['software_statement']) json_data = self._download_json( api_url, video_id, headers={ 'Accept': f'application/json;pk={policy_key}', diff --git a/yt_dlp/extractor/cloudycdn.py b/yt_dlp/extractor/cloudycdn.py index 6e757d79e..a9a539274 100644 --- a/yt_dlp/extractor/cloudycdn.py +++ b/yt_dlp/extractor/cloudycdn.py @@ -11,7 +11,7 @@ class CloudyCDNIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//embed\.cloudycdn\.services/(?P[^/?#]+)/media/(?P[\w-]+)' + _VALID_URL = r'(?:https?:)?//embed\.(?Pcloudycdn\.services|backscreen\.com)/(?P[^/?#]+)/media/(?P[\w-]+)' _EMBED_REGEX = [rf']+\bsrc=[\'"](?P{_VALID_URL})'] _TESTS = [{ 'url': 'https://embed.cloudycdn.services/ltv/media/46k_d23-6000-105?', @@ -23,7 +23,7 @@ class CloudyCDNIE(InfoExtractor): 'duration': 1442, 'upload_date': '20231121', 'title': 'D23-6000-105_cetstud', - 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg', + 'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/660858/placeholder1700589200.jpg', }, }, { 'url': 'https://embed.cloudycdn.services/izm/media/26e_lv-8-5-1', @@ -33,7 +33,7 @@ class CloudyCDNIE(InfoExtractor): 'ext': 'mp4', 'title': 'LV-8-5-1', 'timestamp': 1669767167, - 'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/488306/placeholder1679423604.jpg', + 'thumbnail': 'https://store.bstrm.net/tmsp00120/assets/media/488306/placeholder1679423604.jpg', 'duration': 1205, 'upload_date': '20221130', }, @@ -48,9 +48,21 @@ class CloudyCDNIE(InfoExtractor): 'duration': 1673, 'title': 'D24-6000-074-cetstud', 'timestamp': 1718902233, - 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/788392/placeholder1718903938.jpg', + 'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/788392/placeholder1718903938.jpg', }, 'params': {'format': 'bv'}, + }, { + 'url': 'https://embed.backscreen.com/ltv/media/32j_z25-0600-127?', + 'md5': '9b6fa09ac1a4de53d4f42b94affc3b42', + 'info_dict': { + 'id': '32j_z25-0600-127', + 'ext': 'mp4', + 'title': 'Z25-0600-127-DZ', + 'duration': 1906, + 'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/977427/placeholder1746633646.jpg', + 'timestamp': 1746632402, + 'upload_date': '20250507', + }, }] _WEBPAGE_TESTS = [{ 'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/', @@ -60,17 +72,17 @@ class CloudyCDNIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20230223', 'duration': 629, - 'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/518407/placeholder1678748124.jpg', + 'thumbnail': 'https://store.bstrm.net/tmsp00120/assets/media/518407/placeholder1678748124.jpg', 'timestamp': 1677181513, 'title': 'LIB-2', }, }] def _real_extract(self, url): - site_id, video_id = self._match_valid_url(url).group('site_id', 'id') + domain, site_id, video_id = self._match_valid_url(url).group('domain', 'site_id', 'id') data = self._download_json( - f'https://player.cloudycdn.services/player/{site_id}/media/{video_id}/', + f'https://player.{domain}/player/{site_id}/media/{video_id}/', video_id, data=urlencode_postdata({ 'version': '6.4.0', 'referer': url, diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index d5607296d..32b4680b7 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -101,6 +101,7 @@ xpath_with_ns, ) from ..utils._utils import _request_dump_filename +from ..utils.jslib import devalue class InfoExtractor: @@ -262,6 +263,9 @@ class InfoExtractor: * http_chunk_size Chunk size for HTTP downloads * ffmpeg_args Extra arguments for ffmpeg downloader (input) * ffmpeg_args_out Extra arguments for ffmpeg downloader (output) + * ws (NiconicoLiveFD only) WebSocketResponse + * ws_url (NiconicoLiveFD only) Websockets URL + * max_quality (NiconicoLiveFD only) Max stream quality string * is_dash_periods Whether the format is a result of merging multiple DASH periods. RTMP formats can also have the additional fields: page_url, @@ -1675,9 +1679,9 @@ def extract_video_object(e): 'ext': mimetype2ext(e.get('encodingFormat')), 'title': unescapeHTML(e.get('name')), 'description': unescapeHTML(e.get('description')), - 'thumbnails': [{'url': unescapeHTML(url)} - for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL')) - if url_or_none(url)], + 'thumbnails': traverse_obj(e, (('thumbnailUrl', 'thumbnailURL', 'thumbnail_url'), (None, ...), { + 'url': ({str}, {unescapeHTML}, {self._proto_relative_url}, {url_or_none}), + })), 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('uploadDate')), # author can be an instance of 'Organization' or 'Person' types. @@ -1795,6 +1799,63 @@ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal) return traverse_obj(ret, traverse) or {} + def _resolve_nuxt_array(self, array, video_id, *, fatal=True, default=NO_DEFAULT): + """Resolves Nuxt rich JSON payload arrays""" + # Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57 + # https://github.com/nuxt/nuxt/pull/19205 + if default is not NO_DEFAULT: + fatal = False + + if not isinstance(array, list) or not array: + error_msg = 'Unable to resolve Nuxt JSON data: invalid input' + if fatal: + raise ExtractorError(error_msg, video_id=video_id) + elif default is NO_DEFAULT: + self.report_warning(error_msg, video_id=video_id) + return {} if default is NO_DEFAULT else default + + def indirect_reviver(data): + return data + + def json_reviver(data): + return json.loads(data) + + gen = devalue.parse_iter(array, revivers={ + 'NuxtError': indirect_reviver, + 'EmptyShallowRef': json_reviver, + 'EmptyRef': json_reviver, + 'ShallowRef': indirect_reviver, + 'ShallowReactive': indirect_reviver, + 'Ref': indirect_reviver, + 'Reactive': indirect_reviver, + }) + + while True: + try: + error_msg = f'Error resolving Nuxt JSON: {gen.send(None)}' + if fatal: + raise ExtractorError(error_msg, video_id=video_id) + elif default is NO_DEFAULT: + self.report_warning(error_msg, video_id=video_id, only_once=True) + else: + self.write_debug(f'{video_id}: {error_msg}', only_once=True) + except StopIteration as error: + return error.value or ({} if default is NO_DEFAULT else default) + + def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT): + """Parses metadata from Nuxt rich JSON payloads embedded in HTML""" + passed_default = default is not NO_DEFAULT + + array = self._search_json( + r']+\bid="__NUXT_DATA__"[^>]*>', webpage, + 'Nuxt JSON data', video_id, contains_pattern=r'\[(?s:.+)\]', + fatal=fatal, default=NO_DEFAULT if not passed_default else None) + + if not array: + return default if passed_default else {} + + return self._resolve_nuxt_array(array, video_id, fatal=fatal, default=default) + @staticmethod def _hidden_inputs(html): html = re.sub(r'', '', html) diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py index e36eac919..68ace240c 100644 --- a/yt_dlp/extractor/douyutv.py +++ b/yt_dlp/extractor/douyutv.py @@ -206,7 +206,7 @@ def _real_extract(self, url): 'is_live': True, **traverse_obj(room, { 'display_id': ('url', {str}, {lambda i: i[1:]}), - 'title': ('room_name', {unescapeHTML}), + 'title': ('room_name', {str}, {unescapeHTML}), 'description': ('show_details', {str}), 'uploader': ('nickname', {str}), 'thumbnail': ('room_src', {url_or_none}), diff --git a/yt_dlp/extractor/dreisat.py b/yt_dlp/extractor/dreisat.py index edd66e46c..fb8a8e87c 100644 --- a/yt_dlp/extractor/dreisat.py +++ b/yt_dlp/extractor/dreisat.py @@ -64,7 +64,7 @@ class DreiSatIE(ZDFBaseIE): 'title': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1', 'description': 'md5:bae51bfc22f15563ce3acbf97d2e8844', 'duration': 5399.0, - 'thumbnail': 'https://www.3sat.de/assets/buchmesse-kerkeling-100~original?cb=1743329640903', + 'thumbnail': 'https://www.3sat.de/assets/buchmesse-kerkeling-100~original?cb=1747256996338', 'chapters': 'count:24', 'episode': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1', 'episode_id': 'POS_1ef236cc-b390-401e-acd0-4fb4b04315fb', diff --git a/yt_dlp/extractor/espn.py b/yt_dlp/extractor/espn.py index 37e74bc08..ceba024bc 100644 --- a/yt_dlp/extractor/espn.py +++ b/yt_dlp/extractor/espn.py @@ -329,6 +329,7 @@ class WatchESPNIE(AdobePassIE): }] _API_KEY = 'ZXNwbiZicm93c2VyJjEuMC4w.ptUt7QxsteaRruuPmGZFaJByOoqKvDP2a5YkInHrc7c' + _SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIyZGJmZWM4My03OWE1LTQyNzEtYTVmZC04NTZjYTMxMjRjNjMiLCJuYmYiOjE1NDAyMTI3NjEsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTQwMjEyNzYxfQ.yaK3r4AI2uLVvsyN1GLzqzgzRlxMPtasSaiYYBV0wIstqih5tvjTmeoLmi8Xy9Kp_U7Md-bOffwiyK3srHkpUkhhwXLH2x6RPjmS1tPmhaG7-3LBcHTf2ySPvXhVf7cN4ngldawK4tdtLtsw6rF_JoZE2yaC6XbS2F51nXSFEDDnOQWIHEQRG3aYAj-38P2CLGf7g-Yfhbp5cKXeksHHQ90u3eOO4WH0EAjc9oO47h33U8KMEXxJbvjV5J8Va2G2fQSgLDZ013NBI3kQnE313qgqQh2feQILkyCENpB7g-TVBreAjOaH1fU471htSoGGYepcAXv-UDtpgitDiLy7CQ' def _call_bamgrid_api(self, path, video_id, payload=None, headers={}): if 'Authorization' not in headers: @@ -405,8 +406,8 @@ def _real_extract(self, url): # TV Provider required else: - resource = self._get_mvpd_resource('ESPN', video_data['name'], video_id, None) - auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource).encode() + resource = self._get_mvpd_resource('espn1', video_data['name'], video_id, None) + auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource, self._SOFTWARE_STATEMENT).encode() asset = self._download_json( f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb', diff --git a/yt_dlp/extractor/go.py b/yt_dlp/extractor/go.py index 83c1979db..4e138a828 100644 --- a/yt_dlp/extractor/go.py +++ b/yt_dlp/extractor/go.py @@ -7,161 +7,157 @@ int_or_none, join_nonempty, parse_age_limit, - remove_end, - remove_start, - traverse_obj, - try_get, unified_timestamp, urlencode_postdata, ) +from ..utils.traversal import traverse_obj class GoIE(AdobePassIE): _SITE_INFO = { 'abc': { 'brand': '001', - 'requestor_id': 'ABC', + 'requestor_id': 'dtci', + 'provider_id': 'ABC', + 'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI4OTcwMjlkYS0yYjM1LTQyOWUtYWQ0NS02ZjZiZjVkZTdhOTUiLCJuYmYiOjE2MjAxNzM5NjksImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNjIwMTczOTY5fQ.SC69DVJWSL8sIe-vVUrP6xS_kzHKqwz9PdKYexs_y-f7Vin6mM-7S-W1TE_-K55O0pyf-TL4xYgvm6LIye8CckG-nZfVwNPV4huduov0jmIcxCQFeUwkHULG2IaA44wfBVUBdaHgkhPweZ2amjycO_IXtez-gBXOLbE3B7Gx9j_5ISCFtyVUblThKfoGyQv6KT6t8Vpmc4ZSKCCQp74KWFFypydb9ucego1taW_nQD06Cdf4yByLd6NaTBceMcIKbug9b9gxFm3XBgJ5q3z7KGo1Kr6XalAV5j4m-fQ91wczlTilX8FM4AljMupyRM9mA_aEADILQ4hS79q4SM0w6w', }, 'freeform': { 'brand': '002', 'requestor_id': 'ABCFamily', - }, - 'watchdisneychannel': { - 'brand': '004', - 'resource_id': 'Disney', - }, - 'watchdisneyjunior': { - 'brand': '008', - 'resource_id': 'DisneyJunior', - }, - 'watchdisneyxd': { - 'brand': '009', - 'resource_id': 'DisneyXD', + 'provider_id': 'ABCFamily', + 'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZWM2MGYyNC0xYzRjLTQ1NzQtYjc0Zi03ZmM4N2E5YWMzMzgiLCJuYmYiOjE1ODc2NjU5MjMsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTg3NjY1OTIzfQ.flCn3dhvmvPnWmV0JV8Fm0YFyj07yPez9-n1GFEwVIm_S2wQVWbWyJhqsAyLZVFrhOMZYTqmPS3OHxGwTwXkEYn6PD7o_vIVG3oqi-Xn1m5jRt_Gazw5qEtpat6VE7bvKGSD3ZhcidOrsCk8NcYyq75u61NHDvSl81pcedJjVRVUpsqrEwmo0aVbA0C8PX3ri0mEbGvkMKvHn8E60xp-PSE-VK8SDT0plwPu_TwUszkZ6-_I8_2xcv_WBqcXFkAVg7Q-iNJXgQvmNsrpcrYuLvi6hEH4ZLtoDcXU6MhwTQAJTiHSo8x9aHX1_qFP09CzlNOFQbC2ZEJdP9SvA53SLQ', }, 'disneynow': { - 'brand': '011', + 'brand': '011', # also: '004', '008', '009' + 'requestor_id': 'DisneyChannels', + 'provider_id': 'DisneyChannels', + 'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI1MzAzNTRiOS04NDNiLTRkNjAtYTQ3ZS0yNzk1MzlkOTIyNTciLCJuYmYiOjE1NTg5ODc0NDksImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTU4OTg3NDQ5fQ.Jud6YS6-J2h0h6po0oMheDym0qRTJQGj4kzacrz4DFuEwhcBkkykW6pF5pKuAUJy9HCZ40oDAHe2KcTlDJjCZF5tDaUEfdihakZ9cC_rG7MU-QoRne8qaB_dPDKwGuk-ZyWD8eV3zwTJmbGo8hDxYTEU81YNCxwhyc_BPDr5TYiubbmpP3_pTnXmSpuL58isJ2peSKWlX9BacuXtBY25c_QnPFKk-_EETm7IHkTpDazde1QfHWGu4s4yJpKGk8RVVujVG6h6ELlL-ZeYLilBm7iS7h1TYG1u7fJhyZRL7isaom6NvAzsvN3ngss1fLwt8decP8wzdFHrbYTdTjW8qw', 'resource_id': 'Disney', }, - 'fxnow.fxnetworks': { - 'brand': '025', + 'fxnetworks': { + 'brand': '025', # also: '020' 'requestor_id': 'dtci', + 'provider_id': 'fx', # also 'fxx', 'fxm' + 'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIzYWRhYWZiNC02OTAxLTRlYzktOTdmNy1lYWZkZTJkODJkN2EiLCJuYmYiOjE1NjIwMjQwNzYsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTYyMDI0MDc2fQ.dhKMpZK50AObbZYrMiYPSfWtzXHUaeMP3jrIY4Cgfvh0GaEgk0Mns_zp78jypFeZgRtPVleQMQDNq2YEloRLcAGqP1aa6WVDglnK77ZWUm4IKai14Rwf3A6YBhSRoO2_lMmUGkuTf6gZY-kMIPqBYKqzTQiQl4HbniPFodIzFRiuI9QJVrkoyTGrJL4oqiX08PoFI3Z-TOti1Heu3EbFC-GveQHhlinYrzU7rbiAqLEz7FImtfBDsnXX1Y3uJDLYM3Bq4Oh0nrzTv1Fd62wNsCNErHHIbELidh1zZF0ujvt7ReuZUwAitm0UhEJ7OxNOUbEQWtae6pVNscvdvTFMpg', + }, + 'nationalgeographic': { + 'brand': '026', # also '023' + 'requestor_id': 'dtci', + 'provider_id': 'ngc', # also 'ngw' + 'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIxMzE4YTM1Ni05Mjc4LTQ4NjEtYTFmNi1jMTIzMzg1ZWMzYzMiLCJuYmYiOjE1NjIwMjM4MjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTYyMDIzODI4fQ.Le-2OzF9-jrhJ7ZfWtLWk5iSHGVZoxeU1w0_fO--Heli0OwRZsRq2slSmx-oZTzxuWmAgDEiBkWSDcDK6sM25DrCLsdsJa3MBuZ-slBRtH8aq3HpNoqqLkU-vg6gRUEKMtwBUtwCu_9aKUCayYtndWv4b1DjVQeSrteOW5NNudWVYleAe0kxeNJQHo5If9SCzDudKVJktFUjhNks4QPOC_uONPkRRlL9D0fNvtOY-LRFckfcHhf5z9l1iZjeukV0YhdKnuw1wyiaWrQXBUDiBfbkCRd2DM-KnelqPxfiXCaTjGKDURRBO3pz33ebge3IFXSiU5vl4qHQ8xvunzGpFw', }, } - _VALID_URL = r'''(?x) - https?:// - (?P - (?:{}\.)?go|fxnow\.fxnetworks| - (?:www\.)?(?:abc|freeform|disneynow) - )\.com/ - (?: - (?:[^/]+/)*(?P[Vv][Dd][Kk][Aa]\w+)| - (?:[^/]+/)*(?P[^/?\#]+) - ) - '''.format(r'\.|'.join(list(_SITE_INFO.keys()))) + _URL_PATH_RE = r'(?:video|episode|movies-and-specials)/(?P[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' + _VALID_URL = [ + fr'https?://(?:www\.)?(?Pabc)\.com/{_URL_PATH_RE}', + fr'https?://(?:www\.)?(?Pfreeform)\.com/{_URL_PATH_RE}', + fr'https?://(?:www\.)?(?Pdisneynow)\.com/{_URL_PATH_RE}', + fr'https?://fxnow\.(?Pfxnetworks)\.com/{_URL_PATH_RE}', + fr'https?://(?:www\.)?(?Pnationalgeographic)\.com/tv/{_URL_PATH_RE}', + ] _TESTS = [{ - 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', + 'url': 'https://abc.com/episode/4192c0e6-26e5-47a8-817b-ce8272b9e440/playlist/PL551127435', 'info_dict': { - 'id': 'VDKA3807643', + 'id': 'VDKA10805898', 'ext': 'mp4', - 'title': 'The Traitor in the White House', - 'description': 'md5:05b009d2d145a1e85d25111bd37222e8', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'skip': 'This content is no longer available.', - }, { - 'url': 'https://disneynow.com/shows/big-hero-6-the-series', - 'info_dict': { - 'title': 'Doraemon', - 'id': 'SH55574025', - }, - 'playlist_mincount': 51, - }, { - 'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood', - 'info_dict': { - 'id': 'VDKA3609139', - 'title': 'This Guilty Blood', - 'description': 'md5:f18e79ad1c613798d95fdabfe96cd292', + 'title': 'Switch the Flip', + 'description': 'To help get Brian’s life in order, Stewie and Brian swap bodies using a machine that Stewie invents.', 'age_limit': 14, + 'duration': 1297, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'series': 'Family Guy', + 'season': 'Season 16', + 'season_number': 16, + 'episode': 'Episode 17', + 'episode_number': 17, + 'timestamp': 1746082800.0, + 'upload_date': '20250501', + }, + 'params': {'skip_download': 'm3u8'}, + 'skip': 'This video requires AdobePass MSO credentials', + }, { + 'url': 'https://disneynow.com/episode/21029660-ba06-4406-adb0-a9a78f6e265e/playlist/PL553044961', + 'info_dict': { + 'id': 'VDKA39546942', + 'ext': 'mp4', + 'title': 'Zero Friends Again', + 'description': 'Relationships fray under the pressures of a difficult journey.', + 'age_limit': 0, + 'duration': 1721, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'series': 'Star Wars: Skeleton Crew', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 6', + 'episode_number': 6, + 'timestamp': 1746946800.0, + 'upload_date': '20250511', + }, + 'params': {'skip_download': 'm3u8'}, + 'skip': 'This video requires AdobePass MSO credentials', + }, { + 'url': 'https://fxnow.fxnetworks.com/episode/09f4fa6f-c293-469e-aebe-32c9ca5842a7/playlist/PL554408064', + 'info_dict': { + 'id': 'VDKA38112033', + 'ext': 'mp4', + 'title': 'The Return of Jerry', + 'description': 'The vampires’ long-lost fifth roommate returns. Written by Paul Simms; directed by Kyle Newacheck.', + 'age_limit': 17, + 'duration': 1493, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'series': 'What We Do in the Shadows', + 'season': 'Season 6', + 'season_number': 6, 'episode': 'Episode 1', - 'upload_date': '20170102', - 'season': 'Season 2', - 'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/abcf/Shadowhunters/video/201/ae5f75608d86bf88aa4f9f4aa76ab1b7/579x325-Q100_ae5f75608d86bf88aa4f9f4aa76ab1b7.jpg', - 'duration': 2544, - 'season_number': 2, - 'series': 'Shadowhunters', 'episode_number': 1, - 'timestamp': 1483387200, - 'ext': 'mp4', - }, - 'params': { - 'geo_bypass_ip_block': '3.244.239.0/24', - # m3u8 download - 'skip_download': True, + 'timestamp': 1729573200.0, + 'upload_date': '20241022', }, + 'params': {'skip_download': 'm3u8'}, + 'skip': 'This video requires AdobePass MSO credentials', }, { - 'url': 'https://abc.com/shows/the-rookie/episode-guide/season-04/12-the-knock', + 'url': 'https://www.freeform.com/episode/bda0eaf7-761a-4838-aa44-96f794000844/playlist/PL553044961', 'info_dict': { - 'id': 'VDKA26050359', - 'title': 'The Knock', - 'description': 'md5:0c2947e3ada4c31f28296db7db14aa64', - 'age_limit': 14, + 'id': 'VDKA39007340', 'ext': 'mp4', - 'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/abc/TheRookie/video/412/daf830d06e83b11eaf5c0a299d993ae3/1556x876-Q75_daf830d06e83b11eaf5c0a299d993ae3.jpg', - 'episode': 'Episode 12', - 'season_number': 4, - 'season': 'Season 4', - 'timestamp': 1642975200, - 'episode_number': 12, - 'upload_date': '20220123', - 'series': 'The Rookie', - 'duration': 2572, - }, - 'params': { - 'geo_bypass_ip_block': '3.244.239.0/24', - # m3u8 download - 'skip_download': True, + 'title': 'Angel\'s Landing', + 'description': 'md5:91bf084e785c968fab16734df7313446', + 'age_limit': 14, + 'duration': 2523, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'series': 'How I Escaped My Cult', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 2', + 'episode_number': 2, + 'timestamp': 1740038400.0, + 'upload_date': '20250220', }, + 'params': {'skip_download': 'm3u8'}, }, { - 'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841', + 'url': 'https://www.nationalgeographic.com/tv/episode/ca694661-1186-41ae-8089-82f64d69b16d/playlist/PL554408064', 'info_dict': { - 'id': 'VDKA12782841', - 'title': 'First Look: Better Things - Season 2', - 'description': 'md5:fa73584a95761c605d9d54904e35b407', + 'id': 'VDKA39492078', 'ext': 'mp4', - 'age_limit': 14, - 'upload_date': '20170825', - 'duration': 161, - 'series': 'Better Things', - 'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/fx/BetterThings/video/12782841/b6b05e58264121cc2c98811318e6d507/1556x876-Q75_b6b05e58264121cc2c98811318e6d507.jpg', - 'timestamp': 1503661074, - }, - 'params': { - 'geo_bypass_ip_block': '3.244.239.0/24', - # m3u8 download - 'skip_download': True, + 'title': 'Heart of the Emperors', + 'description': 'md5:4fc50a2878f030bb3a7eac9124dca677', + 'age_limit': 0, + 'duration': 2775, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'series': 'Secrets of the Penguins', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 1', + 'episode_number': 1, + 'timestamp': 1745204400.0, + 'upload_date': '20250421', }, + 'params': {'skip_download': 'm3u8'}, }, { - 'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding', + 'url': 'https://www.freeform.com/movies-and-specials/c38281fc-9f8f-47c7-8220-22394f9df2e1', 'only_matching': True, }, { - 'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland', - 'only_matching': True, - }, { - # brand 004 - 'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915', - 'only_matching': True, - }, { - # brand 008 - 'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013', - 'only_matching': True, - }, { - 'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013', - 'only_matching': True, - }, { - 'url': 'https://www.freeform.com/shows/cruel-summer/episode-guide/season-01/01-happy-birthday-jeanette-turner', + 'url': 'https://abc.com/video/219a454a-172c-41bf-878a-d169e6bc0bdc/playlist/PL5523098420', 'only_matching': True, }] @@ -171,58 +167,29 @@ def _extract_videos(self, brand, video_id='-1', show_id='-1'): f'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/{brand}/001/-1/{show_id}/-1/{video_id}/-1/-1.json', display_id)['video'] + def _extract_global_var(self, name, webpage, video_id): + return self._search_json( + fr'window\[["\']{re.escape(name)}["\']\]\s*=', + webpage, f'{name.strip("_")} JSON', video_id) + def _real_extract(self, url): - mobj = self._match_valid_url(url) - sub_domain = remove_start(remove_end(mobj.group('sub_domain') or '', '.go'), 'www.') - video_id, display_id = mobj.group('id', 'display_id') - site_info = self._SITE_INFO.get(sub_domain, {}) - brand = site_info.get('brand') - if not video_id or not site_info: - webpage = self._download_webpage(url, display_id or video_id) - data = self._parse_json( - self._search_regex( - r'["\']__abc_com__["\']\s*\]\s*=\s*({.+?})\s*;', webpage, - 'data', default='{}'), - display_id or video_id, fatal=False) - # https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot - layout = try_get(data, lambda x: x['page']['content']['video']['layout'], dict) - video_id = None - if layout: - video_id = try_get( - layout, - (lambda x: x['videoid'], lambda x: x['video']['id']), - str) - if not video_id: - video_id = self._search_regex( - ( - # There may be inner quotes, e.g. data-video-id="'VDKA3609139'" - # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood - r'data-video-id=["\']*(VDKA\w+)', - # page.analytics.videoIdCode - r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\w+)', - # https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet - r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)', - ), webpage, 'video id', default=video_id) - if not site_info: - brand = self._search_regex( - (r'data-brand=\s*["\']\s*(\d+)', - r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand', - default='004') - site_info = next( - si for _, si in self._SITE_INFO.items() - if si.get('brand') == brand) - if not video_id: - # show extraction works for Disney, DisneyJunior and DisneyXD - # ABC and Freeform has different layout - show_id = self._search_regex(r'data-show-id=["\']*(SH\d+)', webpage, 'show id') - videos = self._extract_videos(brand, show_id=show_id) - show_title = self._search_regex(r'data-show-title="([^"]+)"', webpage, 'show title', fatal=False) - entries = [] - for video in videos: - entries.append(self.url_result( - video['url'], 'Go', video.get('id'), video.get('title'))) - entries.reverse() - return self.playlist_result(entries, show_id, show_title) + site, display_id = self._match_valid_url(url).group('site', 'id') + webpage = self._download_webpage(url, display_id) + config = self._extract_global_var('__CONFIG__', webpage, display_id) + data = self._extract_global_var(config['globalVar'], webpage, display_id) + video_id = traverse_obj(data, ( + 'page', 'content', 'video', 'layout', (('video', 'id'), 'videoid'), {str}, any)) + if not video_id: + video_id = self._search_regex([ + # data-track-video_id="VDKA39492078" + # data-track-video_id_code="vdka39492078" + # data-video-id="'VDKA3609139'" + r'data-(?:track-)?video[_-]id(?:_code)?=["\']*((?:vdka|VDKA)\d+)', + # page.analytics.videoIdCode + r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\d+)'], webpage, 'video ID') + + site_info = self._SITE_INFO[site] + brand = site_info['brand'] video_data = self._extract_videos(brand, video_id)[0] video_id = video_data['id'] title = video_data['title'] @@ -238,26 +205,31 @@ def _real_extract(self, url): if ext == 'm3u8': video_type = video_data.get('type') data = { - 'video_id': video_data['id'], + 'video_id': video_id, 'video_type': video_type, 'brand': brand, 'device': '001', + 'app_name': 'webplayer-abc', } if video_data.get('accesslevel') == '1': - requestor_id = site_info.get('requestor_id', 'DisneyChannels') + provider_id = site_info['provider_id'] + software_statement = traverse_obj(data, ('app', 'config', ( + ('features', 'auth', 'softwareStatement'), + ('tvAuth', 'SOFTWARE_STATEMENTS', 'PRODUCTION'), + ), {str}, any)) or site_info['software_statement'] resource = site_info.get('resource_id') or self._get_mvpd_resource( - requestor_id, title, video_id, None) + provider_id, title, video_id, None) auth = self._extract_mvpd_auth( - url, video_id, requestor_id, resource) + url, video_id, site_info['requestor_id'], resource, software_statement) data.update({ 'token': auth, 'token_type': 'ap', - 'adobe_requestor_id': requestor_id, + 'adobe_requestor_id': provider_id, }) else: self._initialize_geo_bypass({'countries': ['US']}) entitlement = self._download_json( - 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json', + 'https://prod.gatekeeper.us-abc.symphony.edgedatg.go.com/vp2/ws-secure/entitlement/2020/playmanifest_secure.json', video_id, data=urlencode_postdata(data)) errors = entitlement.get('errors', {}).get('errors', []) if errors: @@ -267,7 +239,7 @@ def _real_extract(self, url): error['message'], countries=['US']) error_message = ', '.join([error['message'] for error in errors]) raise ExtractorError(f'{self.IE_NAME} said: {error_message}', expected=True) - asset_url += '?' + entitlement['uplynkData']['sessionKey'] + asset_url += '?' + entitlement['entitlement']['uplynkData']['sessionKey'] fmts, subs = self._extract_m3u8_formats_and_subtitles( asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False) formats.extend(fmts) diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py index f79e032e4..65099b062 100644 --- a/yt_dlp/extractor/huya.py +++ b/yt_dlp/extractor/huya.py @@ -7,12 +7,13 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, + clean_html, int_or_none, parse_duration, str_or_none, try_get, unescapeHTML, - unified_strdate, + update_url, update_url_query, url_or_none, ) @@ -22,8 +23,8 @@ class HuyaLiveIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?!(?:video/play/))(?P[^/#?&]+)(?:\D|$)' IE_NAME = 'huya:live' - IE_DESC = 'huya.com' - TESTS = [{ + IE_DESC = '虎牙直播' + _TESTS = [{ 'url': 'https://www.huya.com/572329', 'info_dict': { 'id': '572329', @@ -149,63 +150,94 @@ class HuyaVideoIE(InfoExtractor): 'id': '1002412640', 'ext': 'mp4', 'title': '8月3日', - 'thumbnail': r're:https?://.*\.jpg', - 'duration': 14, + 'categories': ['主机游戏'], + 'duration': 14.0, 'uploader': '虎牙-ATS欧卡车队青木', 'uploader_id': '1564376151', 'upload_date': '20240803', 'view_count': int, 'comment_count': int, 'like_count': int, + 'thumbnail': r're:https?://.+\.jpg', + 'timestamp': 1722675433, }, - }, - { + }, { 'url': 'https://www.huya.com/video/play/556054543.html', 'info_dict': { 'id': '556054543', 'ext': 'mp4', 'title': '我不挑事 也不怕事', - 'thumbnail': r're:https?://.*\.jpg', - 'duration': 1864, + 'categories': ['英雄联盟'], + 'description': 'md5:58184869687d18ce62dc7b4b2ad21201', + 'duration': 1864.0, 'uploader': '卡尔', 'uploader_id': '367138632', 'upload_date': '20210811', 'view_count': int, 'comment_count': int, 'like_count': int, + 'tags': 'count:4', + 'thumbnail': r're:https?://.+\.jpg', + 'timestamp': 1628675950, + }, + }, { + # Only m3u8 available + 'url': 'https://www.huya.com/video/play/1063345618.html', + 'info_dict': { + 'id': '1063345618', + 'ext': 'mp4', + 'title': '峡谷第一中!黑铁上钻石顶级教学对抗elo', + 'categories': ['英雄联盟'], + 'comment_count': int, + 'duration': 21603.0, + 'like_count': int, + 'thumbnail': r're:https?://.+\.jpg', + 'timestamp': 1749668803, + 'upload_date': '20250611', + 'uploader': '北枫CC', + 'uploader_id': '2183525275', + 'view_count': int, }, }] def _real_extract(self, url: str): video_id = self._match_id(url) - video_data = self._download_json( - 'https://liveapi.huya.com/moment/getMomentContent', video_id, - query={'videoId': video_id})['data']['moment']['videoInfo'] + moment = self._download_json( + 'https://liveapi.huya.com/moment/getMomentContent', + video_id, query={'videoId': video_id})['data']['moment'] formats = [] - for definition in traverse_obj(video_data, ('definitions', lambda _, v: url_or_none(v['url']))): - formats.append({ - 'url': definition['url'], - **traverse_obj(definition, { - 'format_id': ('defName', {str}), - 'width': ('width', {int_or_none}), - 'height': ('height', {int_or_none}), + for definition in traverse_obj(moment, ( + 'videoInfo', 'definitions', lambda _, v: url_or_none(v['m3u8']), + )): + fmts = self._extract_m3u8_formats(definition['m3u8'], video_id, 'mp4', fatal=False) + for fmt in fmts: + fmt.update(**traverse_obj(definition, { 'filesize': ('size', {int_or_none}), - }), - }) + 'format_id': ('defName', {str}), + 'height': ('height', {int_or_none}), + 'quality': ('definition', {int_or_none}), + 'width': ('width', {int_or_none}), + })) + formats.extend(fmts) return { 'id': video_id, 'formats': formats, - **traverse_obj(video_data, { + **traverse_obj(moment, { + 'comment_count': ('commentCount', {int_or_none}), + 'description': ('content', {clean_html}, filter), + 'like_count': ('favorCount', {int_or_none}), + 'timestamp': ('cTime', {int_or_none}), + }), + **traverse_obj(moment, ('videoInfo', { 'title': ('videoTitle', {str}), - 'thumbnail': ('videoCover', {url_or_none}), + 'categories': ('category', {str}, filter, all, filter), 'duration': ('videoDuration', {parse_duration}), + 'tags': ('tags', ..., {str}, filter, all, filter), + 'thumbnail': (('videoBigCover', 'videoCover'), {url_or_none}, {update_url(query=None)}, any), 'uploader': ('nickName', {str}), 'uploader_id': ('uid', {str_or_none}), - 'upload_date': ('videoUploadTime', {unified_strdate}), 'view_count': ('videoPlayNum', {int_or_none}), - 'comment_count': ('videoCommentNum', {int_or_none}), - 'like_count': ('favorCount', {int_or_none}), - }), + })), } diff --git a/yt_dlp/extractor/hypergryph.py b/yt_dlp/extractor/hypergryph.py index 1fb2e9a98..f405d14b5 100644 --- a/yt_dlp/extractor/hypergryph.py +++ b/yt_dlp/extractor/hypergryph.py @@ -1,32 +1,66 @@ from .common import InfoExtractor -from ..utils import js_to_json, traverse_obj +from ..utils import ( + ExtractorError, + clean_html, + url_or_none, +) +from ..utils.traversal import subs_list_to_dict, traverse_obj class MonsterSirenHypergryphMusicIE(InfoExtractor): + IE_NAME = 'monstersiren' + IE_DESC = '塞壬唱片' + _API_BASE = 'https://monster-siren.hypergryph.com/api' _VALID_URL = r'https?://monster-siren\.hypergryph\.com/music/(?P\d+)' _TESTS = [{ 'url': 'https://monster-siren.hypergryph.com/music/514562', 'info_dict': { 'id': '514562', 'ext': 'wav', - 'artists': ['塞壬唱片-MSR'], - 'album': 'Flame Shadow', 'title': 'Flame Shadow', + 'album': 'Flame Shadow', + 'artists': ['塞壬唱片-MSR'], + 'description': 'md5:19e2acfcd1b65b41b29e8079ab948053', + 'thumbnail': r're:https?://web\.hycdn\.cn/siren/pic/.+\.jpg', + }, + }, { + 'url': 'https://monster-siren.hypergryph.com/music/514518', + 'info_dict': { + 'id': '514518', + 'ext': 'wav', + 'title': 'Heavenly Me (Instrumental)', + 'album': 'Heavenly Me', + 'artists': ['塞壬唱片-MSR', 'AIYUE blessed : 理名'], + 'description': 'md5:ce790b41c932d1ad72eb791d1d8ae598', + 'thumbnail': r're:https?://web\.hycdn\.cn/siren/pic/.+\.jpg', }, }] def _real_extract(self, url): audio_id = self._match_id(url) - webpage = self._download_webpage(url, audio_id) - json_data = self._search_json( - r'window\.g_initialProps\s*=', webpage, 'data', audio_id, transform_source=js_to_json) + song = self._download_json(f'{self._API_BASE}/song/{audio_id}', audio_id) + if traverse_obj(song, 'code') != 0: + msg = traverse_obj(song, ('msg', {str}, filter)) + raise ExtractorError( + msg or 'API returned an error response', expected=bool(msg)) + + album = None + if album_id := traverse_obj(song, ('data', 'albumCid', {str})): + album = self._download_json( + f'{self._API_BASE}/album/{album_id}/detail', album_id, fatal=False) return { 'id': audio_id, - 'title': traverse_obj(json_data, ('player', 'songDetail', 'name')), - 'url': traverse_obj(json_data, ('player', 'songDetail', 'sourceUrl')), - 'ext': 'wav', 'vcodec': 'none', - 'artists': traverse_obj(json_data, ('player', 'songDetail', 'artists', ...)), - 'album': traverse_obj(json_data, ('musicPlay', 'albumDetail', 'name')), + **traverse_obj(song, ('data', { + 'title': ('name', {str}), + 'artists': ('artists', ..., {str}), + 'subtitles': ({'url': 'lyricUrl'}, all, {subs_list_to_dict(lang='en')}), + 'url': ('sourceUrl', {url_or_none}), + })), + **traverse_obj(album, ('data', { + 'album': ('name', {str}), + 'description': ('intro', {clean_html}), + 'thumbnail': ('coverUrl', {url_or_none}), + })), } diff --git a/yt_dlp/extractor/lsm.py b/yt_dlp/extractor/lsm.py index 56c06d745..93ea2e35d 100644 --- a/yt_dlp/extractor/lsm.py +++ b/yt_dlp/extractor/lsm.py @@ -167,11 +167,11 @@ class LSMLTVEmbedIE(InfoExtractor): 'duration': 1442, 'upload_date': '20231121', 'title': 'D23-6000-105_cetstud', - 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg', + 'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/660858/placeholder1700589200.jpg', }, }, { 'url': 'https://ltv.lsm.lv/embed?enablesdkjs=1&c=eyJpdiI6IncwVzZmUFk2MU12enVWK1I3SUcwQ1E9PSIsInZhbHVlIjoid3FhV29vamc3T2sxL1RaRmJ5Rm1GTXozU0o2dVczdUtLK0cwZEZJMDQ2a3ZIRG5DK2pneGlnbktBQy9uazVleHN6VXhxdWIweWNvcHRDSnlISlNYOHlVZ1lpcTUrcWZSTUZPQW14TVdkMW9aOUtRWVNDcFF4eWpHNGcrT0VZbUNFQStKQk91cGpndW9FVjJIa0lpbkh3PT0iLCJtYWMiOiIyZGI1NDJlMWRlM2QyMGNhOGEwYTM2MmNlN2JlOGRhY2QyYjdkMmEzN2RlOTEzYTVkNzI1ODlhZDlhZjU4MjQ2IiwidGFnIjoiIn0=', - 'md5': 'a1711e190fe680fdb68fd8413b378e87', + 'md5': 'f236cef2fd5953612754e4e66be51e7a', 'info_dict': { 'id': 'wUnFArIPDSY', 'ext': 'mp4', @@ -198,6 +198,8 @@ class LSMLTVEmbedIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/@LTV16plus', 'like_count': int, 'description': 'md5:7ff0c42ba971e3c13e4b8a2ff03b70b5', + 'media_type': 'livestream', + 'timestamp': 1652550741, }, }] @@ -208,7 +210,7 @@ def _real_extract(self, url): r'window\.ltvEmbedPayload\s*=', webpage, 'embed json', video_id) embed_type = traverse_obj(data, ('source', 'name', {str})) - if embed_type == 'telia': + if embed_type in ('backscreen', 'telia'): # 'telia' only for backwards compat ie_key = 'CloudyCDN' embed_url = traverse_obj(data, ('source', 'embed_url', {url_or_none})) elif embed_type == 'youtube': @@ -226,9 +228,9 @@ def _real_extract(self, url): class LSMReplayIE(InfoExtractor): - _VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:ieraksts|statja)/[^/?#]+/(?P\d+)' + _VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:skaties/|klausies/)?(?:ieraksts|statja)/[^/?#]+/(?P\d+)' _TESTS = [{ - 'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija', + 'url': 'https://replay.lsm.lv/lv/skaties/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija', 'md5': '64f72a360ca530d5ed89c77646c9eee5', 'info_dict': { 'id': '46k_d23-6000-105', @@ -241,20 +243,23 @@ class LSMReplayIE(InfoExtractor): 'thumbnail': 'https://ltv.lsm.lv/storage/media/8/7/large/5/1f9604e1.jpg', }, }, { - 'url': 'https://replay.lsm.lv/lv/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam', - 'md5': '719b33875cd1429846eeeaeec6df2830', + 'url': 'https://replay.lsm.lv/lv/klausies/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam', + 'md5': '84feb80fd7e6ec07744726a9f01cda4d', 'info_dict': { - 'id': 'a342781', - 'ext': 'mp3', + 'id': '183522', + 'ext': 'm4a', 'duration': 1823, 'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām', 'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/large_fd4675ac.jpg', 'upload_date': '20231102', - 'timestamp': 1698921060, + 'timestamp': 1698913860, 'description': 'md5:7bac3b2dd41e44325032943251c357b1', }, }, { - 'url': 'https://replay.lsm.lv/ru/statja/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija', + 'url': 'https://replay.lsm.lv/ru/skaties/statja/ltv/355067/v-kengaragse-nacalas-ukladka-relsov', + 'only_matching': True, + }, { + 'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija', 'only_matching': True, }] @@ -267,12 +272,24 @@ def _real_extract(self, url): data = self._search_nuxt_data( self._fix_nuxt_data(webpage), video_id, context_name='__REPLAY__') + playback_type = data['playback']['type'] + + if playback_type == 'playable_audio_lr': + playback_data = { + 'formats': self._extract_m3u8_formats(data['playback']['service']['hls_url'], video_id), + } + elif playback_type == 'embed': + playback_data = { + '_type': 'url_transparent', + 'url': data['playback']['service']['url'], + } + else: + raise ExtractorError(f'Unsupported playback type "{playback_type}"') return { - '_type': 'url_transparent', 'id': video_id, + **playback_data, **traverse_obj(data, { - 'url': ('playback', 'service', 'url', {url_or_none}), 'title': ('mediaItem', 'title'), 'description': ('mediaItem', ('lead', 'body')), 'duration': ('mediaItem', 'duration', {int_or_none}), diff --git a/yt_dlp/extractor/mave.py b/yt_dlp/extractor/mave.py new file mode 100644 index 000000000..86d8d8b7c --- /dev/null +++ b/yt_dlp/extractor/mave.py @@ -0,0 +1,107 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + int_or_none, + parse_iso8601, + urljoin, +) +from ..utils.traversal import require, traverse_obj + + +class MaveIE(InfoExtractor): + _VALID_URL = r'https?://(?P[\w-]+)\.mave\.digital/(?Pep-\d+)' + _TESTS = [{ + 'url': 'https://ochenlichnoe.mave.digital/ep-25', + 'md5': 'aa3e513ef588b4366df1520657cbc10c', + 'info_dict': { + 'id': '4035f587-914b-44b6-aa5a-d76685ad9bc2', + 'ext': 'mp3', + 'display_id': 'ochenlichnoe-ep-25', + 'title': 'Между мной и миром: психология самооценки', + 'description': 'md5:4b7463baaccb6982f326bce5c700382a', + 'uploader': 'Самарский университет', + 'channel': 'Очень личное', + 'channel_id': 'ochenlichnoe', + 'channel_url': 'https://ochenlichnoe.mave.digital/', + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'duration': 3744, + 'thumbnail': r're:https://.+/storage/podcasts/.+\.jpg', + 'series': 'Очень личное', + 'series_id': '2e0c3749-6df2-4946-82f4-50691419c065', + 'season': 'Season 3', + 'season_number': 3, + 'episode': 'Episode 3', + 'episode_number': 3, + 'timestamp': 1747817300, + 'upload_date': '20250521', + }, + }, { + 'url': 'https://budem.mave.digital/ep-12', + 'md5': 'e1ce2780fcdb6f17821aa3ca3e8c919f', + 'info_dict': { + 'id': '41898bb5-ff57-4797-9236-37a8e537aa21', + 'ext': 'mp3', + 'display_id': 'budem-ep-12', + 'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана', + 'description': 'md5:fa3bdd59ee829dfaf16e3efcb13f1d19', + 'uploader': 'Полина Цветкова+Евгения Акопова', + 'channel': 'Все там будем', + 'channel_id': 'budem', + 'channel_url': 'https://budem.mave.digital/', + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'age_limit': 18, + 'duration': 3664, + 'thumbnail': r're:https://.+/storage/podcasts/.+\.jpg', + 'series': 'Все там будем', + 'series_id': 'fe9347bf-c009-4ebd-87e8-b06f2f324746', + 'season': 'Season 2', + 'season_number': 2, + 'episode': 'Episode 5', + 'episode_number': 5, + 'timestamp': 1735538400, + 'upload_date': '20241230', + }, + }] + _API_BASE_URL = 'https://api.mave.digital/' + + def _real_extract(self, url): + channel_id, slug = self._match_valid_url(url).group('channel', 'id') + display_id = f'{channel_id}-{slug}' + webpage = self._download_webpage(url, display_id) + data = traverse_obj( + self._search_nuxt_json(webpage, display_id), + ('data', lambda _, v: v['activeEpisodeData'], any, {require('podcast data')})) + + return { + 'display_id': display_id, + 'channel_id': channel_id, + 'channel_url': f'https://{channel_id}.mave.digital/', + 'vcodec': 'none', + 'thumbnail': re.sub(r'_\d+(?=\.(?:jpg|png))', '', self._og_search_thumbnail(webpage, default='')) or None, + **traverse_obj(data, ('activeEpisodeData', { + 'url': ('audio', {urljoin(self._API_BASE_URL)}), + 'id': ('id', {str}), + 'title': ('title', {str}), + 'description': ('description', {clean_html}), + 'duration': ('duration', {int_or_none}), + 'season_number': ('season', {int_or_none}), + 'episode_number': ('number', {int_or_none}), + 'view_count': ('listenings', {int_or_none}), + 'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any), + 'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any), + 'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}), + 'timestamp': ('publish_date', {parse_iso8601}), + })), + **traverse_obj(data, ('podcast', 'podcast', { + 'series_id': ('id', {str}), + 'series': ('title', {str}), + 'channel': ('title', {str}), + 'uploader': ('author', {str}), + })), + } diff --git a/yt_dlp/extractor/mitele.py b/yt_dlp/extractor/mitele.py index 55fa83b51..0dded38c6 100644 --- a/yt_dlp/extractor/mitele.py +++ b/yt_dlp/extractor/mitele.py @@ -1,7 +1,5 @@ from .telecinco import TelecincoBaseIE -from ..networking.exceptions import HTTPError from ..utils import ( - ExtractorError, int_or_none, parse_iso8601, ) @@ -81,17 +79,7 @@ class MiTeleIE(TelecincoBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - - try: # yt-dlp's default user-agents are too old and blocked by akamai - webpage = self._download_webpage(url, display_id, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0', - }) - except ExtractorError as e: - if not isinstance(e.cause, HTTPError) or e.cause.status != 403: - raise - # Retry with impersonation if hardcoded UA is insufficient to bypass akamai - webpage = self._download_webpage(url, display_id, impersonate=True) - + webpage = self._download_akamai_webpage(url, display_id) pre_player = self._search_json( r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=', webpage, 'Pre Player', display_id)['prePlayer'] diff --git a/yt_dlp/extractor/nba.py b/yt_dlp/extractor/nba.py index 91ae1d14c..c87046d8e 100644 --- a/yt_dlp/extractor/nba.py +++ b/yt_dlp/extractor/nba.py @@ -19,7 +19,8 @@ class NBACVPBaseIE(TurnerBaseIE): def _extract_nba_cvp_info(self, path, video_id, fatal=False): return self._extract_cvp_info( - f'http://secure.nba.com/{path}', video_id, { + # XXX: The 3rd argument (None) needs to be the AdobePass software_statement + f'http://secure.nba.com/{path}', video_id, None, { 'default': { 'media_src': 'http://nba.cdn.turner.com/nba/big', }, @@ -94,6 +95,7 @@ def _extract_video(self, filter_key, filter_value): class NBAWatchEmbedIE(NBAWatchBaseIE): + _WORKING = False IE_NAME = 'nba:watch:embed' _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P\d+)' _TESTS = [{ @@ -115,6 +117,7 @@ def _real_extract(self, url): class NBAWatchIE(NBAWatchBaseIE): + _WORKING = False IE_NAME = 'nba:watch' _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)' _TESTS = [{ @@ -167,6 +170,7 @@ def _real_extract(self, url): class NBAWatchCollectionIE(NBAWatchBaseIE): + _WORKING = False IE_NAME = 'nba:watch:collection' _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P[^/?#&]+)' _TESTS = [{ @@ -336,6 +340,7 @@ def _real_extract(self, url): class NBAEmbedIE(NBABaseIE): + _WORKING = False IE_NAME = 'nba:embed' _VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P[^?#&]+)' _TESTS = [{ @@ -358,6 +363,7 @@ def _real_extract(self, url): class NBAIE(NBABaseIE): + _WORKING = False IE_NAME = 'nba' _VALID_URL = NBABaseIE._VALID_URL_BASE + f'(?!{NBABaseIE._CHANNEL_PATH_REGEX})video/(?P(?:[^/]+/)*[^/?#&]+)' _TESTS = [{ @@ -385,6 +391,7 @@ def _extract_url_results(self, team, content_id): class NBAChannelIE(NBABaseIE): + _WORKING = False IE_NAME = 'nba:channel' _VALID_URL = NBABaseIE._VALID_URL_BASE + f'(?:{NBABaseIE._CHANNEL_PATH_REGEX})/(?P[^/?#&]+)' _TESTS = [{ diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index d9aded09e..bd4862bde 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -6,7 +6,7 @@ from .adobepass import AdobePassIE from .common import InfoExtractor -from .theplatform import ThePlatformIE, default_ns +from .theplatform import ThePlatformBaseIE, ThePlatformIE, default_ns from ..networking import HEADRequest from ..utils import ( ExtractorError, @@ -14,26 +14,130 @@ UserNotLive, clean_html, determine_ext, + extract_attributes, float_or_none, + get_element_html_by_class, int_or_none, join_nonempty, + make_archive_id, mimetype2ext, parse_age_limit, parse_duration, + parse_iso8601, remove_end, - smuggle_url, - traverse_obj, try_get, unescapeHTML, unified_timestamp, update_url_query, url_basename, + url_or_none, ) +from ..utils.traversal import require, traverse_obj -class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE - _VALID_URL = r'https?(?P://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P(?:NBCE|n)?\d+))' +class NBCUniversalBaseIE(ThePlatformBaseIE): + _GEO_COUNTRIES = ['US'] + _GEO_BYPASS = False + _M3U8_RE = r'https?://[^/?#]+/prod/[\w-]+/(?P[^?#]+/)cmaf/mpeg_(?:cbcs|cenc)\w*/master_cmaf\w*\.m3u8' + def _download_nbcu_smil_and_extract_m3u8_url(self, tp_path, video_id, query): + smil = self._download_xml( + f'https://link.theplatform.com/s/{tp_path}', video_id, + 'Downloading SMIL manifest', 'Failed to download SMIL manifest', query={ + **query, + 'format': 'SMIL', # XXX: Do not confuse "format" with "formats" + 'manifest': 'm3u', + 'switch': 'HLSServiceSecure', # Or else we get broken mp4 http URLs instead of HLS + }, headers=self.geo_verification_headers()) + + ns = f'//{{{default_ns}}}' + if url := traverse_obj(smil, (f'{ns}video/@src', lambda _, v: determine_ext(v) == 'm3u8', any)): + return url + + exc = traverse_obj(smil, (f'{ns}param', lambda _, v: v.get('name') == 'exception', '@value', any)) + if exc == 'GeoLocationBlocked': + self.raise_geo_restricted(countries=self._GEO_COUNTRIES) + raise ExtractorError(traverse_obj(smil, (f'{ns}ref/@abstract', ..., any)), expected=exc == 'Expired') + + def _extract_nbcu_formats_and_subtitles(self, tp_path, video_id, query): + # formats='mpeg4' will return either a working m3u8 URL or an m3u8 template for non-DRM HLS + # formats='m3u+none,mpeg4' may return DRM HLS but w/the "folders" needed for non-DRM template + query['formats'] = 'm3u+none,mpeg4' + m3u8_url = self._download_nbcu_smil_and_extract_m3u8_url(tp_path, video_id, query) + + if mobj := re.fullmatch(self._M3U8_RE, m3u8_url): + query['formats'] = 'mpeg4' + m3u8_tmpl = self._download_nbcu_smil_and_extract_m3u8_url(tp_path, video_id, query) + # Example: https://vod-lf-oneapp-prd.akamaized.net/prod/video/{folders}master_hls.m3u8 + if '{folders}' in m3u8_tmpl: + self.write_debug('Found m3u8 URL template, formatting URL path') + m3u8_url = m3u8_tmpl.format(folders=mobj.group('folders')) + + if '/mpeg_cenc' in m3u8_url or '/mpeg_cbcs' in m3u8_url: + self.report_drm(video_id) + + return self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls') + + def _extract_nbcu_video(self, url, display_id, old_ie_key=None): + webpage = self._download_webpage(url, display_id) + settings = self._search_json( + r']+data-drupal-selector="drupal-settings-json"[^>]*>', + webpage, 'settings', display_id) + + query = {} + tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '') + if tve: + account_pid = tve.get('data-mpx-media-account-pid') or tve['data-mpx-account-pid'] + account_id = tve['data-mpx-media-account-id'] + metadata = self._parse_json( + tve.get('data-normalized-video') or '', display_id, fatal=False, transform_source=unescapeHTML) + video_id = tve.get('data-guid') or metadata['guid'] + if tve.get('data-entitlement') == 'auth': + auth = settings['tve_adobe_auth'] + release_pid = tve['data-release-pid'] + resource = self._get_mvpd_resource( + tve.get('data-adobe-pass-resource-id') or auth['adobePassResourceId'], + tve['data-title'], release_pid, tve.get('data-rating')) + query['auth'] = self._extract_mvpd_auth( + url, release_pid, auth['adobePassRequestorId'], + resource, auth['adobePassSoftwareStatement']) + else: + ls_playlist = traverse_obj(settings, ( + 'ls_playlist', lambda _, v: v['defaultGuid'], any, {require('LS playlist')})) + video_id = ls_playlist['defaultGuid'] + account_pid = ls_playlist.get('mpxMediaAccountPid') or ls_playlist['mpxAccountPid'] + account_id = ls_playlist['mpxMediaAccountId'] + metadata = traverse_obj(ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, any)) or {} + + tp_path = f'{account_pid}/media/guid/{account_id}/{video_id}' + formats, subtitles = self._extract_nbcu_formats_and_subtitles(tp_path, video_id, query) + tp_metadata = self._download_theplatform_metadata(tp_path, video_id, fatal=False) + parsed_info = self._parse_theplatform_metadata(tp_metadata) + self._merge_subtitles(parsed_info['subtitles'], target=subtitles) + + return { + **parsed_info, + **traverse_obj(metadata, { + 'title': ('title', {str}), + 'description': ('description', {str}), + 'duration': ('durationInSeconds', {int_or_none}), + 'timestamp': ('airDate', {parse_iso8601}), + 'thumbnail': ('thumbnailUrl', {url_or_none}), + 'season_number': ('seasonNumber', {int_or_none}), + 'episode_number': ('episodeNumber', {int_or_none}), + 'episode': ('episodeTitle', {str}), + 'series': ('show', {str}), + }), + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'subtitles': subtitles, + '_old_archive_ids': [make_archive_id(old_ie_key, video_id)] if old_ie_key else None, + } + + +class NBCIE(NBCUniversalBaseIE): + _VALID_URL = r'https?(?P://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/?#]+/video/[^/?#]+/(?P\w+))' _TESTS = [ { 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237', @@ -49,47 +153,20 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE 'episode_number': 86, 'season': 'Season 2', 'season_number': 2, - 'series': 'Tonight Show: Jimmy Fallon', - 'duration': 237.0, - 'chapters': 'count:1', - 'tags': 'count:4', + 'series': 'Tonight', + 'duration': 236.504, + 'tags': 'count:2', 'thumbnail': r're:https?://.+\.jpg', 'categories': ['Series/The Tonight Show Starring Jimmy Fallon'], 'media_type': 'Full Episode', + 'age_limit': 14, + '_old_archive_ids': ['theplatform 2848237'], }, 'params': { 'skip_download': 'm3u8', }, }, { - 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821', - 'info_dict': { - 'id': '2832821', - 'ext': 'mp4', - 'title': 'Star Wars Teaser', - 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442', - 'timestamp': 1417852800, - 'upload_date': '20141206', - 'uploader': 'NBCU-COM', - }, - 'skip': 'page not found', - }, - { - # HLS streams requires the 'hdnea3' cookie - 'url': 'http://www.nbc.com/Kings/video/goliath/n1806', - 'info_dict': { - 'id': '101528f5a9e8127b107e98c5e6ce4638', - 'ext': 'mp4', - 'title': 'Goliath', - 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.', - 'timestamp': 1237100400, - 'upload_date': '20090315', - 'uploader': 'NBCU-COM', - }, - 'skip': 'page not found', - }, - { - # manifest url does not have extension 'url': 'https://www.nbc.com/the-golden-globe-awards/video/oprah-winfrey-receives-cecil-b-de-mille-award-at-the-2018-golden-globes/3646439', 'info_dict': { 'id': '3646439', @@ -99,48 +176,47 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE 'episode_number': 1, 'season': 'Season 75', 'season_number': 75, - 'series': 'The Golden Globe Awards', + 'series': 'Golden Globes', 'description': 'Oprah Winfrey receives the Cecil B. de Mille Award at the 75th Annual Golden Globe Awards.', 'uploader': 'NBCU-COM', 'upload_date': '20180107', 'timestamp': 1515312000, - 'duration': 570.0, + 'duration': 569.703, 'tags': 'count:8', 'thumbnail': r're:https?://.+\.jpg', - 'chapters': 'count:1', + 'media_type': 'Highlight', + 'age_limit': 0, + 'categories': ['Series/The Golden Globe Awards'], + '_old_archive_ids': ['theplatform 3646439'], }, 'params': { 'skip_download': 'm3u8', }, }, { - # new video_id format - 'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978', + # Needs to be extracted from webpage instead of GraphQL + 'url': 'https://www.nbc.com/paris2024/video/ali-truwit-found-purpose-pool-after-her-life-changed/para24_sww_alitruwittodayshow_240823', 'info_dict': { - 'id': 'NBCE125189978', + 'id': 'para24_sww_alitruwittodayshow_240823', 'ext': 'mp4', - 'title': 'Ben\'s First Leap | NBC\'s Quantum Leap', - 'description': 'md5:a82762449b7ec4bb83291a7b355ebf8e', - 'uploader': 'NBCU-COM', - 'series': 'Quantum Leap', - 'season': 'Season 1', - 'season_number': 1, - 'episode': 'Ben\'s First Leap | NBC\'s Quantum Leap', - 'episode_number': 1, - 'duration': 170.171, - 'chapters': [], - 'timestamp': 1663956155, - 'upload_date': '20220923', - 'tags': 'count:10', - 'age_limit': 0, + 'title': 'Ali Truwit found purpose in the pool after her life changed', + 'description': 'md5:c16d7489e1516593de1cc5d3f39b9bdb', + 'uploader': 'NBCU-SPORTS', + 'duration': 311.077, 'thumbnail': r're:https?://.+\.jpg', - 'categories': ['Series/Quantum Leap 2022'], - 'media_type': 'Highlight', + 'episode': 'Ali Truwit found purpose in the pool after her life changed', + 'timestamp': 1724435902.0, + 'upload_date': '20240823', + '_old_archive_ids': ['theplatform para24_sww_alitruwittodayshow_240823'], }, 'params': { 'skip_download': 'm3u8', }, }, + { + 'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978', + 'only_matching': True, + }, { 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310', 'only_matching': True, @@ -151,6 +227,7 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE 'only_matching': True, }, ] + _SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI1Yzg2YjdkYy04NDI3LTRjNDUtOGQwZi1iNDkzYmE3MmQwYjQiLCJuYmYiOjE1Nzg3MDM2MzEsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTc4NzAzNjMxfQ.QQKIsBhAjGQTMdAqRTqhcz2Cddr4Y2hEjnSiOeKKki4nLrkDOsjQMmqeTR0hSRarraxH54wBgLvsxI7LHwKMvr7G8QpynNAxylHlQD3yhN9tFhxt4KR5wW3as02B-W2TznK9bhNWPKIyHND95Uo2Mi6rEQoq8tM9O09WPWaanE5BX_-r6Llr6dPq5F0Lpx2QOn2xYRb1T4nFxdFTNoss8GBds8OvChTiKpXMLHegLTc1OS4H_1a8tO_37jDwSdJuZ8iTyRLV4kZ2cpL6OL5JPMObD4-HQiec_dfcYgMKPiIfP9ZqdXpec2SVaCLsWEk86ZYvD97hLIQrK5rrKd1y-A' def _real_extract(self, url): permalink, video_id = self._match_valid_url(url).groups() @@ -196,62 +273,50 @@ def _real_extract(self, url): 'userId': '0', }), })['data']['bonanzaPage']['metadata'] - query = { - 'mbr': 'true', - 'manifest': 'm3u', - 'switch': 'HLSServiceSecure', - } + + if not video_data: + # Some videos are not available via GraphQL API + webpage = self._download_webpage(url, video_id) + video_data = self._search_json( + r'