diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4b71a621c..e2411ecfa 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -256,7 +256,7 @@ jobs: with: path: | ~/yt-dlp-build-venv - key: cache-reqs-${{ github.job }} + key: cache-reqs-${{ github.job }}-${{ github.ref }} - name: Install Requirements run: | @@ -331,19 +331,16 @@ jobs: if: steps.restore-cache.outputs.cache-hit == 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - cache_key: cache-reqs-${{ github.job }} - repository: ${{ github.repository }} - branch: ${{ github.ref }} + cache_key: cache-reqs-${{ github.job }}-${{ github.ref }} run: | - gh extension install actions/gh-actions-cache - gh actions-cache delete "${cache_key}" -R "${repository}" -B "${branch}" --confirm + gh cache delete "${cache_key}" - name: Cache requirements uses: actions/cache/save@v4 with: path: | ~/yt-dlp-build-venv - key: cache-reqs-${{ github.job }} + key: cache-reqs-${{ github.job }}-${{ github.ref }} macos_legacy: needs: process diff --git a/.gitignore b/.gitignore index 8fcd0de64..40bb34d2a 100644 --- a/.gitignore +++ b/.gitignore @@ -105,6 +105,8 @@ README.txt *.zsh *.spec test/testdata/sigs/player-*.js +test/testdata/thumbnails/empty.webp +test/testdata/thumbnails/foo\ %d\ bar/foo_%d.* # Binary /youtube-dl diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 5710f9a9e..ea391bc15 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -770,3 +770,12 @@ NeonMan pj47x troex WouterGordts +baierjan +GeoffreyFrogeye +Pawka +v3DJG6GL +yozel +brian6932 +iednod55 +maxbin123 +nullpos diff --git a/Changelog.md b/Changelog.md index 513724bf4..dd95abc86 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,107 @@ # Changelog # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2025.06.09 + +#### Extractor changes +- [Improve JSON LD thumbnails extraction](https://github.com/yt-dlp/yt-dlp/commit/85c8a405e3651dc041b758f4744d4fb3c4c55e01) ([#13368](https://github.com/yt-dlp/yt-dlp/issues/13368)) by [bashonly](https://github.com/bashonly), [doe1080](https://github.com/doe1080) +- **10play**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6d265388c6e943419ac99e9151cf75a3265f980f) ([#13349](https://github.com/yt-dlp/yt-dlp/issues/13349)) by [bashonly](https://github.com/bashonly) +- **adobepass** + - [Add Fubo MSO](https://github.com/yt-dlp/yt-dlp/commit/eee90acc47d7f8de24afaa8b0271ccaefdf6e88c) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123) + - [Always add newer user-agent when required](https://github.com/yt-dlp/yt-dlp/commit/0ee1102268cf31b07f8a8318a47424c66b2f7378) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) + - [Fix Philo MSO authentication](https://github.com/yt-dlp/yt-dlp/commit/943083edcd3df45aaa597a6967bc6c95b720f54c) ([#13335](https://github.com/yt-dlp/yt-dlp/issues/13335)) by [Sipherdrakon](https://github.com/Sipherdrakon) + - [Rework to require software statement](https://github.com/yt-dlp/yt-dlp/commit/711c5d5d098fee2992a1a624b1c4b30364b91426) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly), [maxbin123](https://github.com/maxbin123) + - [Validate login URL before sending credentials](https://github.com/yt-dlp/yt-dlp/commit/89c1b349ad81318d9d3bea76c01c891696e58d38) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **aenetworks** + - [Fix playlist extractors](https://github.com/yt-dlp/yt-dlp/commit/f37d599a697e82fe68b423865897d55bae34f373) ([#13408](https://github.com/yt-dlp/yt-dlp/issues/13408)) by [Sipherdrakon](https://github.com/Sipherdrakon) + - [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/6693d6603358ae6beca834dbd822a7917498b813) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123) +- **bilibilibangumi**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/13e55162719528d42d2133e16b65ff59a667a6e4) ([#13416](https://github.com/yt-dlp/yt-dlp/issues/13416)) by [c-basalt](https://github.com/c-basalt) +- **brightcove**: new: [Adapt to new AdobePass requirement](https://github.com/yt-dlp/yt-dlp/commit/98f8eec956e3b16cb66a3d49cc71af3807db795e) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **cu.ntv.co.jp**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/aa863ddab9b1d104678e9cf39bb76f5b14fca660) ([#13302](https://github.com/yt-dlp/yt-dlp/issues/13302)) by [doe1080](https://github.com/doe1080), [nullpos](https://github.com/nullpos) +- **go**: [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/2e5bf002dad16f5ce35aa2023d392c9e518fcd8f) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly), [maxbin123](https://github.com/maxbin123) +- **nbc**: [Rework and adapt extractors to new AdobePass flow](https://github.com/yt-dlp/yt-dlp/commit/2d7949d5642bc37d1e71bf00c9a55260e5505d58) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **nobelprize**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/97ddfefeb4faba6e61cd80996c16952b8eab16f3) ([#13205](https://github.com/yt-dlp/yt-dlp/issues/13205)) by [doe1080](https://github.com/doe1080) +- **odnoklassniki**: [Detect and raise when login is required](https://github.com/yt-dlp/yt-dlp/commit/148a1eb4c59e127965396c7a6e6acf1979de459e) ([#13361](https://github.com/yt-dlp/yt-dlp/issues/13361)) by [bashonly](https://github.com/bashonly) +- **patreon**: [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/e0d6c0822930f6e63f574d46d946a58b73ecd10c) ([#13266](https://github.com/yt-dlp/yt-dlp/issues/13266)) by [bashonly](https://github.com/bashonly) (With fixes in [1a8a03e](https://github.com/yt-dlp/yt-dlp/commit/1a8a03ea8d827107319a18076ee3505090667c5a)) +- **podchaser**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/538eb305673c26bff6a2b12f1c96375fe02ce41a) ([#13271](https://github.com/yt-dlp/yt-dlp/issues/13271)) by [bashonly](https://github.com/bashonly) +- **sr**: mediathek: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/e3c605a61f4cc2de9059f37434fa108c3c20f58e) ([#13294](https://github.com/yt-dlp/yt-dlp/issues/13294)) by [doe1080](https://github.com/doe1080) +- **stacommu**: [Avoid partial stream formats](https://github.com/yt-dlp/yt-dlp/commit/5d96527be80dc1ed1702d9cd548ff86de570ad70) ([#13412](https://github.com/yt-dlp/yt-dlp/issues/13412)) by [bashonly](https://github.com/bashonly) +- **startrek**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a8bf0011bde92b3f1324a98bfbd38932fd3ebe18) ([#13188](https://github.com/yt-dlp/yt-dlp/issues/13188)) by [doe1080](https://github.com/doe1080) +- **svt**: play: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/e1b6062f8c4a3fa33c65269d48d09ec78de765a2) ([#13329](https://github.com/yt-dlp/yt-dlp/issues/13329)) by [barsnick](https://github.com/barsnick), [bashonly](https://github.com/bashonly) +- **telecinco**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/03dba2012d9bd3f402fa8c2f122afba89bbd22a4) ([#13379](https://github.com/yt-dlp/yt-dlp/issues/13379)) by [bashonly](https://github.com/bashonly) +- **theplatform**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/ed108b3ea481c6a4b5215a9302ba92d74baa2425) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **toutiao**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f8051e3a61686c5db1de5f5746366ecfbc3ad20c) ([#13246](https://github.com/yt-dlp/yt-dlp/issues/13246)) by [doe1080](https://github.com/doe1080) +- **turner**: [Adapt extractors to new AdobePass flow](https://github.com/yt-dlp/yt-dlp/commit/0daddc780d3ac5bebc3a3ec5b884d9243cbc0745) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **twitcasting**: [Fix password-protected livestream support](https://github.com/yt-dlp/yt-dlp/commit/52f9729c9a92ad4656d746ff0b1acecb87b3e96d) ([#13097](https://github.com/yt-dlp/yt-dlp/issues/13097)) by [bashonly](https://github.com/bashonly) +- **twitter**: broadcast: [Support events URLs](https://github.com/yt-dlp/yt-dlp/commit/7794374de8afb20499b023107e2abfd4e6b93ee4) ([#13248](https://github.com/yt-dlp/yt-dlp/issues/13248)) by [doe1080](https://github.com/doe1080) +- **umg**: de: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/4e7c1ea346b510280218b47e8653dbbca3a69870) ([#13373](https://github.com/yt-dlp/yt-dlp/issues/13373)) by [doe1080](https://github.com/doe1080) +- **vice**: [Mark extractors as broken](https://github.com/yt-dlp/yt-dlp/commit/6121559e027a04574690799c1776bc42bb51af31) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **vimeo**: [Extract subtitles from player subdomain](https://github.com/yt-dlp/yt-dlp/commit/c723c4e5e78263df178dbe69844a3d05f3ef9e35) ([#13350](https://github.com/yt-dlp/yt-dlp/issues/13350)) by [bashonly](https://github.com/bashonly) +- **watchespn**: [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/b094747e93cfb0a2c53007120e37d0d84d41f030) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123) +- **weverse**: [Support login with oauth refresh tokens](https://github.com/yt-dlp/yt-dlp/commit/3fe72e9eea38d9a58211cde42cfaa577ce020e2c) ([#13284](https://github.com/yt-dlp/yt-dlp/issues/13284)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Add `tv_simply` player client](https://github.com/yt-dlp/yt-dlp/commit/1fd0e88b67db53ad163393d6965f68e908fa70e3) ([#13389](https://github.com/yt-dlp/yt-dlp/issues/13389)) by [gamer191](https://github.com/gamer191) + - [Extract srt subtitles](https://github.com/yt-dlp/yt-dlp/commit/231349786e8c42089c2e079ec94c0ea866c37999) ([#13411](https://github.com/yt-dlp/yt-dlp/issues/13411)) by [gamer191](https://github.com/gamer191) + - [Fix `--mark-watched` support](https://github.com/yt-dlp/yt-dlp/commit/b5be29fa58ec98226e11621fd9c58585bcff6879) ([#13222](https://github.com/yt-dlp/yt-dlp/issues/13222)) by [brian6932](https://github.com/brian6932), [iednod55](https://github.com/iednod55) + - [Fix automatic captions for some client combinations](https://github.com/yt-dlp/yt-dlp/commit/53ea743a9c158f8ca2d75a09ca44ba68606042d8) ([#13268](https://github.com/yt-dlp/yt-dlp/issues/13268)) by [bashonly](https://github.com/bashonly) + - [Improve signature extraction debug output](https://github.com/yt-dlp/yt-dlp/commit/d30a49742cfa22e61c47df4ac0e7334d648fb85d) ([#13327](https://github.com/yt-dlp/yt-dlp/issues/13327)) by [bashonly](https://github.com/bashonly) + - [Rework nsig function name extraction](https://github.com/yt-dlp/yt-dlp/commit/9e38b273b7ac942e7e9fc05a651ed810ab7d30ba) ([#13403](https://github.com/yt-dlp/yt-dlp/issues/13403)) by [Grub4K](https://github.com/Grub4K) + - [nsig code improvements and cleanup](https://github.com/yt-dlp/yt-dlp/commit/f7bbf5a617f9ab54ef51eaef99be36e175b5e9c3) ([#13280](https://github.com/yt-dlp/yt-dlp/issues/13280)) by [bashonly](https://github.com/bashonly) +- **zdf**: [Fix language extraction and format sorting](https://github.com/yt-dlp/yt-dlp/commit/db162b76f6bdece50babe2e0cacfe56888c2e125) ([#13313](https://github.com/yt-dlp/yt-dlp/issues/13313)) by [InvalidUsernameException](https://github.com/InvalidUsernameException) + +#### Misc. changes +- **build** + - [Exclude `pkg_resources` from being collected](https://github.com/yt-dlp/yt-dlp/commit/cc749a8a3b8b6e5c05318868c72a403f376a1b38) ([#13320](https://github.com/yt-dlp/yt-dlp/issues/13320)) by [bashonly](https://github.com/bashonly) + - [Fix macOS requirements caching](https://github.com/yt-dlp/yt-dlp/commit/201812100f315c6727a4418698d5b4e8a79863d4) ([#13328](https://github.com/yt-dlp/yt-dlp/issues/13328)) by [bashonly](https://github.com/bashonly) +- **cleanup**: Miscellaneous: [339614a](https://github.com/yt-dlp/yt-dlp/commit/339614a173c74b42d63e858c446a9cae262a13af) by [bashonly](https://github.com/bashonly) +- **test**: postprocessors: [Remove binary thumbnail test data](https://github.com/yt-dlp/yt-dlp/commit/a9b370069838e84d44ac7ad095d657003665885a) ([#13341](https://github.com/yt-dlp/yt-dlp/issues/13341)) by [bashonly](https://github.com/bashonly) + +### 2025.05.22 + +#### Core changes +- **cookies**: [Fix Linux desktop environment detection](https://github.com/yt-dlp/yt-dlp/commit/e491fd4d090db3af52a82863fb0553dd5e17fb85) ([#13197](https://github.com/yt-dlp/yt-dlp/issues/13197)) by [mbway](https://github.com/mbway) +- **jsinterp**: [Fix increment/decrement evaluation](https://github.com/yt-dlp/yt-dlp/commit/167d7a9f0ffd1b4fe600193441bdb7358db2740b) ([#13238](https://github.com/yt-dlp/yt-dlp/issues/13238)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) + +#### Extractor changes +- **1tv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/41c0a1fb89628696f8bb88e2b9f3a68f355b8c26) ([#13168](https://github.com/yt-dlp/yt-dlp/issues/13168)) by [bashonly](https://github.com/bashonly) +- **amcnetworks**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/464c84fedf78eef822a431361155f108b5df96d7) ([#13147](https://github.com/yt-dlp/yt-dlp/issues/13147)) by [bashonly](https://github.com/bashonly) +- **bitchute**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1d0f6539c47e5d5c68c3c47cdb7075339e2885ac) ([#13081](https://github.com/yt-dlp/yt-dlp/issues/13081)) by [bashonly](https://github.com/bashonly) +- **cartoonnetwork**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/7dbb47f84f0ee1266a3a01f58c9bc4c76d76794a) ([#13148](https://github.com/yt-dlp/yt-dlp/issues/13148)) by [bashonly](https://github.com/bashonly) +- **iprima**: [Fix login support](https://github.com/yt-dlp/yt-dlp/commit/a7d9a5eb79ceeecb851389f3f2c88597871ca3f2) ([#12937](https://github.com/yt-dlp/yt-dlp/issues/12937)) by [baierjan](https://github.com/baierjan) +- **jiosaavn** + - artist: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/586b557b124f954d3f625360ebe970989022ad97) ([#12803](https://github.com/yt-dlp/yt-dlp/issues/12803)) by [subrat-lima](https://github.com/subrat-lima) + - playlist, show: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/317f4b8006c2c0f0f64f095b1485163ad97c9053) ([#12803](https://github.com/yt-dlp/yt-dlp/issues/12803)) by [subrat-lima](https://github.com/subrat-lima) + - show: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6839276496d8814cf16f58b637e45663467928e6) ([#12803](https://github.com/yt-dlp/yt-dlp/issues/12803)) by [subrat-lima](https://github.com/subrat-lima) +- **lrtradio**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/abf58dcd6a09e14eec4ea82ae12f79a0337cb383) ([#13200](https://github.com/yt-dlp/yt-dlp/issues/13200)) by [Pawka](https://github.com/Pawka) +- **nebula**: [Support `--mark-watched`](https://github.com/yt-dlp/yt-dlp/commit/20f288bdc2173c7cc58d709d25ca193c1f6001e7) ([#13120](https://github.com/yt-dlp/yt-dlp/issues/13120)) by [GeoffreyFrogeye](https://github.com/GeoffreyFrogeye) +- **niconico** + - [Fix error handling](https://github.com/yt-dlp/yt-dlp/commit/f569be4602c2a857087e495d5d7ed6060cd97abe) ([#13236](https://github.com/yt-dlp/yt-dlp/issues/13236)) by [bashonly](https://github.com/bashonly) + - live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7a7b85c9014d96421e18aa7ea5f4c1bee5ceece0) ([#13045](https://github.com/yt-dlp/yt-dlp/issues/13045)) by [doe1080](https://github.com/doe1080) +- **nytimesarticle**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/b26bc32579c00ef579d75a835807ccc87d20ee0a) ([#13104](https://github.com/yt-dlp/yt-dlp/issues/13104)) by [bashonly](https://github.com/bashonly) +- **once**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/f475e8b529d18efdad603ffda02a56e707fe0e2c) ([#13164](https://github.com/yt-dlp/yt-dlp/issues/13164)) by [bashonly](https://github.com/bashonly) +- **picarto**: vod: [Support `/profile/` video URLs](https://github.com/yt-dlp/yt-dlp/commit/31e090cb787f3504ec25485adff9a2a51d056734) ([#13227](https://github.com/yt-dlp/yt-dlp/issues/13227)) by [subrat-lima](https://github.com/subrat-lima) +- **playsuisse**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/d880e060803ae8ed5a047e578cca01e1f0e630ce) ([#12466](https://github.com/yt-dlp/yt-dlp/issues/12466)) by [v3DJG6GL](https://github.com/v3DJG6GL) +- **sprout**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/cbcfe6378dde33a650e3852ab17ad4503b8e008d) ([#13149](https://github.com/yt-dlp/yt-dlp/issues/13149)) by [bashonly](https://github.com/bashonly) +- **svtpage**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/ea8498ed534642dd7e925961b97b934987142fd3) ([#12957](https://github.com/yt-dlp/yt-dlp/issues/12957)) by [diman8](https://github.com/diman8) +- **twitch**: [Support `--live-from-start`](https://github.com/yt-dlp/yt-dlp/commit/00b1bec55249cf2ad6271d36492c51b34b6459d1) ([#13202](https://github.com/yt-dlp/yt-dlp/issues/13202)) by [bashonly](https://github.com/bashonly) +- **vimeo**: event: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/545c1a5b6f2fe88722b41aef0e7485bf3be3f3f9) ([#13216](https://github.com/yt-dlp/yt-dlp/issues/13216)) by [bashonly](https://github.com/bashonly) +- **wat.tv**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/f123cc83b3aea45053f5fa1d9141048b01fc2774) ([#13111](https://github.com/yt-dlp/yt-dlp/issues/13111)) by [bashonly](https://github.com/bashonly) +- **weverse**: [Fix live extraction](https://github.com/yt-dlp/yt-dlp/commit/5328eda8820cc5f21dcf917684d23fbdca41831d) ([#13084](https://github.com/yt-dlp/yt-dlp/issues/13084)) by [bashonly](https://github.com/bashonly) +- **xinpianchang**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/83fabf352489d52843f67e6e9cc752db86d27e6e) ([#13245](https://github.com/yt-dlp/yt-dlp/issues/13245)) by [garret1317](https://github.com/garret1317) +- **youtube** + - [Add PO token support for subtitles](https://github.com/yt-dlp/yt-dlp/commit/32ed5f107c6c641958d1cd2752e130de4db55a13) ([#13234](https://github.com/yt-dlp/yt-dlp/issues/13234)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz) + - [Add `web_embedded` client for age-restricted videos](https://github.com/yt-dlp/yt-dlp/commit/0feec6dc131f488428bf881519e7c69766fbb9ae) ([#13089](https://github.com/yt-dlp/yt-dlp/issues/13089)) by [bashonly](https://github.com/bashonly) + - [Add a PO Token Provider Framework](https://github.com/yt-dlp/yt-dlp/commit/2685654a37141cca63eda3a92da0e2706e23ccfd) ([#12840](https://github.com/yt-dlp/yt-dlp/issues/12840)) by [coletdjnz](https://github.com/coletdjnz) + - [Extract `media_type` for all videos](https://github.com/yt-dlp/yt-dlp/commit/ded11ebc9afba6ba33923375103e9be2d7c804e7) ([#13136](https://github.com/yt-dlp/yt-dlp/issues/13136)) by [bashonly](https://github.com/bashonly) + - [Fix `--live-from-start` support for premieres](https://github.com/yt-dlp/yt-dlp/commit/8f303afb43395be360cafd7ad4ce2b6e2eedfb8a) ([#13079](https://github.com/yt-dlp/yt-dlp/issues/13079)) by [arabcoders](https://github.com/arabcoders) + - [Fix geo-restriction error handling](https://github.com/yt-dlp/yt-dlp/commit/c7e575e31608c19c5b26c10a4229db89db5fc9a8) ([#13217](https://github.com/yt-dlp/yt-dlp/issues/13217)) by [yozel](https://github.com/yozel) + +#### Misc. changes +- **build** + - [Bump PyInstaller to v6.13.0](https://github.com/yt-dlp/yt-dlp/commit/17cf9088d0d535e4a7feffbf02bd49cd9dae5ab9) ([#13082](https://github.com/yt-dlp/yt-dlp/issues/13082)) by [bashonly](https://github.com/bashonly) + - [Bump run-on-arch-action to v3](https://github.com/yt-dlp/yt-dlp/commit/9064d2482d1fe722bbb4a49731fe0711c410d1c8) ([#13088](https://github.com/yt-dlp/yt-dlp/issues/13088)) by [bashonly](https://github.com/bashonly) +- **cleanup**: Miscellaneous: [7977b32](https://github.com/yt-dlp/yt-dlp/commit/7977b329ed97b216e37bd402f4935f28c00eac9e) by [bashonly](https://github.com/bashonly) + ### 2025.04.30 #### Important changes diff --git a/Makefile b/Makefile index 6c72ead1e..273cb3cc0 100644 --- a/Makefile +++ b/Makefile @@ -18,10 +18,11 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \ tar pypi-files lazy-extractors install uninstall clean-test: - rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ + rm -rf tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ *.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \ *.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.lrc *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 *.mp4 \ - *.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp + *.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp \ + test/testdata/sigs/player-*.js test/testdata/thumbnails/empty.webp "test/testdata/thumbnails/foo %d bar/foo_%d."* clean-dist: rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \ yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS diff --git a/README.md b/README.md index db6504a07..8d7f021c2 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ * [Post-processing Options](#post-processing-options) * [SponsorBlock Options](#sponsorblock-options) * [Extractor Options](#extractor-options) + * [Preset Aliases](#preset-aliases) * [CONFIGURATION](#configuration) * [Configuration file encoding](#configuration-file-encoding) * [Authentication with netrc](#authentication-with-netrc) @@ -348,8 +349,8 @@ ## General Options: --no-flat-playlist Fully extract the videos of a playlist (default) --live-from-start Download livestreams from the start. - Currently only supported for YouTube - (Experimental) + Currently experimental and only supported + for YouTube and Twitch --no-live-from-start Download livestreams from the current time (default) --wait-for-video MIN[-MAX][:RETRIES] @@ -379,12 +380,12 @@ ## General Options: an alias starts with a dash "-", it is prefixed with "--". Arguments are parsed according to the Python string formatting - mini-language. E.g. --alias get-audio,-X - "-S=aext:{0},abr -x --audio-format {0}" - creates options "--get-audio" and "-X" that - takes an argument (ARG0) and expands to - "-S=aext:ARG0,abr -x --audio-format ARG0". - All defined aliases are listed in the --help + mini-language. E.g. --alias get-audio,-X "-S + aext:{0},abr -x --audio-format {0}" creates + options "--get-audio" and "-X" that takes an + argument (ARG0) and expands to "-S + aext:ARG0,abr -x --audio-format ARG0". All + defined aliases are listed in the --help output. Alias options can trigger more aliases; so be careful to avoid defining recursive options. As a safety measure, each @@ -1109,6 +1110,10 @@ ## Extractor Options: arguments for different extractors ## Preset Aliases: +Predefined aliases for convenience and ease of use. Note that future + versions of yt-dlp may add or adjust presets, but the existing preset + names will not be changed or removed + -t mp3 -f 'ba[acodec^=mp3]/ba/b' -x --audio-format mp3 @@ -1794,11 +1799,12 @@ # EXTRACTOR ARGUMENTS The following extractors use this feature: #### youtube -* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes +* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube/_base.py](https://github.com/yt-dlp/yt-dlp/blob/415b4c9f955b1a0391204bd24a7132590e7b3bdb/yt_dlp/extractor/youtube/_base.py#L402-L409) for the list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios` +* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv`, `tv_simply` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios` * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. +* `player_js_variant`: The player javascript variant to use for signature and nsig deciphering. The known variants are: `main`, `tce`, `tv`, `tv_es6`, `phone`, `tablet`. Only `main` is recommended as a possible workaround; the others are for debugging purposes. The default is to use what is prescribed by the site, and can be selected with `actual` * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total @@ -1808,8 +1814,12 @@ #### youtube * `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning * `data_sync_id`: Overrides the account Data Sync ID used in Innertube API requests. This may be needed if you are using an account with `youtube:player_skip=webpage,configs` or `youtubetab:skip=webpage` * `visitor_data`: Overrides the Visitor Data used in Innertube API requests. This should be used with `player_skip=webpage,configs` and without cookies. Note: this may have adverse effects if used improperly. If a session from a browser is wanted, you should pass cookies instead (which contain the Visitor ID) -* `po_token`: Proof of Origin (PO) Token(s) to use. Comma seperated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be either `gvs` (Google Video Server URLs) or `player` (Innertube player request) -* `player_js_variant`: The player javascript variant to use for signature and nsig deciphering. The known variants are: `main`, `tce`, `tv`, `tv_es6`, `phone`, `tablet`. Only `main` is recommended as a possible workaround; the others are for debugging purposes. The default is to use what is prescribed by the site, and can be selected with `actual` +* `po_token`: Proof of Origin (PO) Token(s) to use. Comma seperated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be any of `gvs` (Google Video Server URLs), `player` (Innertube player request) or `subs` (Subtitles) +* `pot_trace`: Enable debug logging for PO Token fetching. Either `true` or `false` (default) +* `fetch_pot`: Policy to use for fetching a PO Token from providers. One of `always` (always try fetch a PO Token regardless if the client requires one for the given context), `never` (never fetch a PO Token), or `auto` (default; only fetch a PO Token if the client requires one for the given context) + +#### youtubepot-webpo +* `bind_to_visitor_id`: Whether to use the Visitor ID instead of Visitor Data for caching WebPO tokens. Either `true` (default) or `false` #### youtubetab (YouTube playlists, channels, feeds, etc.) * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) diff --git a/bundle/docker/static/entrypoint.sh b/bundle/docker/static/entrypoint.sh index 220275974..8049e6820 100755 --- a/bundle/docker/static/entrypoint.sh +++ b/bundle/docker/static/entrypoint.sh @@ -2,6 +2,7 @@ set -e source ~/.local/share/pipx/venvs/pyinstaller/bin/activate +python -m devscripts.install_deps -o --include build python -m devscripts.install_deps --include secretstorage --include curl-cffi python -m devscripts.make_lazy_extractors python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}" diff --git a/bundle/pyinstaller.py b/bundle/pyinstaller.py index 4184c4bc9..c2f651121 100755 --- a/bundle/pyinstaller.py +++ b/bundle/pyinstaller.py @@ -36,6 +36,9 @@ def main(): f'--name={name}', '--icon=devscripts/logo.ico', '--upx-exclude=vcruntime140.dll', + # Ref: https://github.com/yt-dlp/yt-dlp/issues/13311 + # https://github.com/pyinstaller/pyinstaller/issues/9149 + '--exclude-module=pkg_resources', '--noconfirm', '--additional-hooks-dir=yt_dlp/__pyinstaller', *opts, diff --git a/pyproject.toml b/pyproject.toml index 7accaeeb9..3775251e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,7 @@ build = [ "build", "hatchling", "pip", - "setuptools>=71.0.2", # 71.0.0 broke pyinstaller + "setuptools>=71.0.2,<81", # See https://github.com/pyinstaller/pyinstaller/issues/9149 "wheel", ] dev = [ diff --git a/supportedsites.md b/supportedsites.md index 03bd8a7c3..1fe381603 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -5,6 +5,8 @@ # Supported sites Not all sites listed here are guaranteed to work; websites are constantly changing and sometimes this breaks yt-dlp's support for them. The only reliable way to check if a site is supported is to try it. + - **10play**: [*10play*](## "netrc machine") + - **10play:season** - **17live** - **17live:clip** - **17live:vod** @@ -246,7 +248,6 @@ # Supported sites - **Canalplus**: mycanal.fr and piwiplus.fr - **Canalsurmas** - **CaracolTvPlay**: [*caracoltv-play*](## "netrc machine") - - **CartoonNetwork** - **cbc.ca** - **cbc.ca:player** - **cbc.ca:​player:playlist** @@ -296,7 +297,7 @@ # Supported sites - **CNNIndonesia** - **ComedyCentral** - **ComedyCentralTV** - - **ConanClassic** + - **ConanClassic**: (**Currently broken**) - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED - **CONtv** - **CookingChannel** @@ -318,7 +319,7 @@ # Supported sites - **CtsNews**: 華視新聞 - **CTV** - **CTVNews** - - **cu.ntv.co.jp**: Nippon Television Network + - **cu.ntv.co.jp**: 日テレ無料TADA! - **CultureUnplugged** - **curiositystream**: [*curiositystream*](## "netrc machine") - **curiositystream:collections**: [*curiositystream*](## "netrc machine") @@ -649,7 +650,10 @@ # Supported sites - **jiocinema**: [*jiocinema*](## "netrc machine") - **jiocinema:series**: [*jiocinema*](## "netrc machine") - **jiosaavn:album** + - **jiosaavn:artist** - **jiosaavn:playlist** + - **jiosaavn:show** + - **jiosaavn:​show:playlist** - **jiosaavn:song** - **Joj** - **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR) @@ -880,19 +884,19 @@ # Supported sites - **Naver** - **Naver:live** - **navernow** - - **nba** - - **nba:channel** - - **nba:embed** - - **nba:watch** - - **nba:​watch:collection** - - **nba:​watch:embed** + - **nba**: (**Currently broken**) + - **nba:channel**: (**Currently broken**) + - **nba:embed**: (**Currently broken**) + - **nba:watch**: (**Currently broken**) + - **nba:​watch:collection**: (**Currently broken**) + - **nba:​watch:embed**: (**Currently broken**) - **NBC** - **NBCNews** - **nbcolympics** - - **nbcolympics:stream** - - **NBCSports** - - **NBCSportsStream** - - **NBCSportsVPlayer** + - **nbcolympics:stream**: (**Currently broken**) + - **NBCSports**: (**Currently broken**) + - **NBCSportsStream**: (**Currently broken**) + - **NBCSportsVPlayer**: (**Currently broken**) - **NBCStations** - **ndr**: NDR.de - Norddeutscher Rundfunk - **ndr:embed** @@ -968,7 +972,7 @@ # Supported sites - **Nitter** - **njoy**: N-JOY - **njoy:embed** - - **NobelPrize**: (**Currently broken**) + - **NobelPrize** - **NoicePodcast** - **NonkTube** - **NoodleMagazine** @@ -1081,8 +1085,8 @@ # Supported sites - **Photobucket** - **PiaLive** - **Piapro**: [*piapro*](## "netrc machine") - - **Picarto** - - **PicartoVod** + - **picarto** + - **picarto:vod** - **Piksel** - **Pinkbike** - **Pinterest** @@ -1390,16 +1394,15 @@ # Supported sites - **Spreaker** - **SpreakerShow** - **SpringboardPlatform** - - **Sprout** - **SproutVideo** - - **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**) + - **sr:mediathek**: Saarländischer Rundfunk - **SRGSSR** - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - **StacommuLive**: [*stacommu*](## "netrc machine") - **StacommuVOD**: [*stacommu*](## "netrc machine") - **StagePlusVODConcert**: [*stageplus*](## "netrc machine") - **stanfordoc**: Stanford Open ClassRoom - - **StarTrek**: (**Currently broken**) + - **startrek**: STAR TREK - **startv** - **Steam** - **SteamCommunityBroadcast** @@ -1422,12 +1425,11 @@ # Supported sites - **SunPorno** - **sverigesradio:episode** - **sverigesradio:publication** - - **SVT** - - **SVTPage** - - **SVTPlay**: SVT Play and Öppet arkiv - - **SVTSeries** + - **svt:page** + - **svt:play**: SVT Play and Öppet arkiv + - **svt:​play:series** - **SwearnetEpisode** - - **Syfy**: (**Currently broken**) + - **Syfy** - **SYVDK** - **SztvHu** - **t-online.de**: (**Currently broken**) @@ -1471,8 +1473,6 @@ # Supported sites - **Telewebion**: (**Currently broken**) - **Tempo** - **TennisTV**: [*tennistv*](## "netrc machine") - - **TenPlay**: [*10play*](## "netrc machine") - - **TenPlaySeason** - **TF1** - **TFO** - **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine") @@ -1510,6 +1510,7 @@ # Supported sites - **tokfm:podcast** - **ToonGoggles** - **tou.tv**: [*toutv*](## "netrc machine") + - **toutiao**: 今日头条 - **Toypics**: Toypics video (**Currently broken**) - **ToypicsUser**: Toypics user profile (**Currently broken**) - **TrailerAddict**: (**Currently broken**) @@ -1599,7 +1600,7 @@ # Supported sites - **UKTVPlay** - **UlizaPlayer** - **UlizaPortal**: ulizaportal.jp - - **umg:de**: Universal Music Deutschland (**Currently broken**) + - **umg:de**: Universal Music Deutschland - **Unistra** - **Unity**: (**Currently broken**) - **uol.com.br** @@ -1622,9 +1623,9 @@ # Supported sites - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **vh1.com** - **vhx:embed**: [*vimeo*](## "netrc machine") - - **vice** - - **vice:article** - - **vice:show** + - **vice**: (**Currently broken**) + - **vice:article**: (**Currently broken**) + - **vice:show**: (**Currently broken**) - **Viddler** - **Videa** - **video.arnes.si**: Arnes Video @@ -1656,6 +1657,7 @@ # Supported sites - **vimeo**: [*vimeo*](## "netrc machine") - **vimeo:album**: [*vimeo*](## "netrc machine") - **vimeo:channel**: [*vimeo*](## "netrc machine") + - **vimeo:event**: [*vimeo*](## "netrc machine") - **vimeo:group**: [*vimeo*](## "netrc machine") - **vimeo:likes**: [*vimeo*](## "netrc machine") Vimeo user likes - **vimeo:ondemand**: [*vimeo*](## "netrc machine") diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index c6ff6209a..e6c8d574e 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -314,6 +314,20 @@ def test_search_json_ld_realworld(self): }, {}, ), + ( + # test thumbnail_url key without URL scheme + r''' +''', + { + 'thumbnails': [{'url': 'https://www.nobelprize.org/images/12693-landscape-medium-gallery.jpg'}], + }, + {}, + ), ] for html, expected_dict, search_json_ld_kwargs in _TESTS: expect_dict( @@ -1933,6 +1947,137 @@ def test_search_nextjs_data(self): with self.assertWarns(DeprecationWarning): self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {}) + def test_search_nuxt_json(self): + HTML_TMPL = '' + VALID_DATA = ''' + ["ShallowReactive",1], + {"data":2,"state":21,"once":25,"_errors":28,"_server_errors":30}, + ["ShallowReactive",3], + {"$abcdef123456":4}, + {"podcast":5,"activeEpisodeData":7}, + {"podcast":6,"seasons":14}, + {"title":10,"id":11}, + ["Reactive",8], + {"episode":9,"creators":18,"empty_list":20}, + {"title":12,"id":13,"refs":34,"empty_refs":35}, + "Series Title", + "podcast-id-01", + "Episode Title", + "episode-id-99", + [15,16,17], + 1, + 2, + 3, + [19], + "Podcast Creator", + [], + {"$ssite-config":22}, + {"env":23,"name":24,"map":26,"numbers":14}, + "production", + "podcast-website", + ["Set"], + ["Reactive",27], + ["Map"], + ["ShallowReactive",29], + {}, + ["NuxtError",31], + {"status":32,"message":33}, + 503, + "Service Unavailable", + [36,37], + [38,39], + ["Ref",40], + ["ShallowRef",41], + ["EmptyRef",42], + ["EmptyShallowRef",43], + "ref", + "shallow_ref", + "{\\"ref\\":1}", + "{\\"shallow_ref\\":2}" + ''' + PAYLOAD = { + 'data': { + '$abcdef123456': { + 'podcast': { + 'podcast': { + 'title': 'Series Title', + 'id': 'podcast-id-01', + }, + 'seasons': [1, 2, 3], + }, + 'activeEpisodeData': { + 'episode': { + 'title': 'Episode Title', + 'id': 'episode-id-99', + 'refs': ['ref', 'shallow_ref'], + 'empty_refs': [{'ref': 1}, {'shallow_ref': 2}], + }, + 'creators': ['Podcast Creator'], + 'empty_list': [], + }, + }, + }, + 'state': { + '$ssite-config': { + 'env': 'production', + 'name': 'podcast-website', + 'map': [], + 'numbers': [1, 2, 3], + }, + }, + 'once': [], + '_errors': {}, + '_server_errors': { + 'status': 503, + 'message': 'Service Unavailable', + }, + } + PARTIALLY_INVALID = [( + ''' + {"data":1}, + {"invalid_raw_list":2}, + [15,16,17] + ''', + {'data': {'invalid_raw_list': [None, None, None]}}, + ), ( + ''' + {"data":1}, + ["EmptyRef",2], + "not valid JSON" + ''', + {'data': None}, + ), ( + ''' + {"data":1}, + ["EmptyShallowRef",2], + "not valid JSON" + ''', + {'data': None}, + )] + INVALID = [ + ''' + [] + ''', + ''' + ["unsupported",1], + {"data":2}, + {} + ''', + ] + DEFAULT = object() + + self.assertEqual(self.ie._search_nuxt_json(HTML_TMPL.format(VALID_DATA), None), PAYLOAD) + self.assertEqual(self.ie._search_nuxt_json('', None, fatal=False), {}) + self.assertIs(self.ie._search_nuxt_json('', None, default=DEFAULT), DEFAULT) + + for data, expected in PARTIALLY_INVALID: + self.assertEqual( + self.ie._search_nuxt_json(HTML_TMPL.format(data), None, fatal=False), expected) + + for data in INVALID: + self.assertIs( + self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT) + if __name__ == '__main__': unittest.main() diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 708a04f92..91312e4e5 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -1435,6 +1435,27 @@ def test_load_plugins_compat(self): FakeYDL().close() assert all_plugins_loaded.value + def test_close_hooks(self): + # Should call all registered close hooks on close + close_hook_called = False + close_hook_two_called = False + + def close_hook(): + nonlocal close_hook_called + close_hook_called = True + + def close_hook_two(): + nonlocal close_hook_two_called + close_hook_two_called = True + + ydl = FakeYDL() + ydl.add_close_hook(close_hook) + ydl.add_close_hook(close_hook_two) + + ydl.close() + self.assertTrue(close_hook_called, 'Close hook was not called') + self.assertTrue(close_hook_two_called, 'Close hook two was not called') + if __name__ == '__main__': unittest.main() diff --git a/test/test_cookies.py b/test/test_cookies.py index 4b9b9b5a9..f956ab187 100644 --- a/test/test_cookies.py +++ b/test/test_cookies.py @@ -58,6 +58,14 @@ def test_get_desktop_environment(self): ({'DESKTOP_SESSION': 'kde'}, _LinuxDesktopEnvironment.KDE3), ({'DESKTOP_SESSION': 'xfce'}, _LinuxDesktopEnvironment.XFCE), + ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'gnome'}, _LinuxDesktopEnvironment.GNOME), + ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'mate'}, _LinuxDesktopEnvironment.GNOME), + ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4), + ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'kde'}, _LinuxDesktopEnvironment.KDE3), + ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'xfce'}, _LinuxDesktopEnvironment.XFCE), + + ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'my_custom_de', 'GNOME_DESKTOP_SESSION_ID': 1}, _LinuxDesktopEnvironment.GNOME), + ({'GNOME_DESKTOP_SESSION_ID': 1}, _LinuxDesktopEnvironment.GNOME), ({'KDE_FULL_SESSION': 1}, _LinuxDesktopEnvironment.KDE3), ({'KDE_FULL_SESSION': 1, 'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4), diff --git a/test/test_devalue.py b/test/test_devalue.py new file mode 100644 index 000000000..29eb89e87 --- /dev/null +++ b/test/test_devalue.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import datetime as dt +import json +import math +import re +import unittest + +from yt_dlp.utils.jslib import devalue + + +TEST_CASES_EQUALS = [{ + 'name': 'int', + 'unparsed': [-42], + 'parsed': -42, +}, { + 'name': 'str', + 'unparsed': ['woo!!!'], + 'parsed': 'woo!!!', +}, { + 'name': 'Number', + 'unparsed': [['Object', 42]], + 'parsed': 42, +}, { + 'name': 'String', + 'unparsed': [['Object', 'yar']], + 'parsed': 'yar', +}, { + 'name': 'Infinity', + 'unparsed': -4, + 'parsed': math.inf, +}, { + 'name': 'negative Infinity', + 'unparsed': -5, + 'parsed': -math.inf, +}, { + 'name': 'negative zero', + 'unparsed': -6, + 'parsed': -0.0, +}, { + 'name': 'RegExp', + 'unparsed': [['RegExp', 'regexp', 'gim']], # XXX: flags are ignored + 'parsed': re.compile('regexp'), +}, { + 'name': 'Date', + 'unparsed': [['Date', '2001-09-09T01:46:40.000Z']], + 'parsed': dt.datetime.fromtimestamp(1e9, tz=dt.timezone.utc), +}, { + 'name': 'Array', + 'unparsed': [[1, 2, 3], 'a', 'b', 'c'], + 'parsed': ['a', 'b', 'c'], +}, { + 'name': 'Array (empty)', + 'unparsed': [[]], + 'parsed': [], +}, { + 'name': 'Array (sparse)', + 'unparsed': [[-2, 1, -2], 'b'], + 'parsed': [None, 'b', None], +}, { + 'name': 'Object', + 'unparsed': [{'foo': 1, 'x-y': 2}, 'bar', 'z'], + 'parsed': {'foo': 'bar', 'x-y': 'z'}, +}, { + 'name': 'Set', + 'unparsed': [['Set', 1, 2, 3], 1, 2, 3], + 'parsed': [1, 2, 3], +}, { + 'name': 'Map', + 'unparsed': [['Map', 1, 2], 'a', 'b'], + 'parsed': [['a', 'b']], +}, { + 'name': 'BigInt', + 'unparsed': [['BigInt', '1']], + 'parsed': 1, +}, { + 'name': 'Uint8Array', + 'unparsed': [['Uint8Array', 'AQID']], + 'parsed': [1, 2, 3], +}, { + 'name': 'ArrayBuffer', + 'unparsed': [['ArrayBuffer', 'AQID']], + 'parsed': [1, 2, 3], +}, { + 'name': 'str (repetition)', + 'unparsed': [[1, 1], 'a string'], + 'parsed': ['a string', 'a string'], +}, { + 'name': 'None (repetition)', + 'unparsed': [[1, 1], None], + 'parsed': [None, None], +}, { + 'name': 'dict (repetition)', + 'unparsed': [[1, 1], {}], + 'parsed': [{}, {}], +}, { + 'name': 'Object without prototype', + 'unparsed': [['null']], + 'parsed': {}, +}, { + 'name': 'cross-realm POJO', + 'unparsed': [{}], + 'parsed': {}, +}] + +TEST_CASES_IS = [{ + 'name': 'bool', + 'unparsed': [True], + 'parsed': True, +}, { + 'name': 'Boolean', + 'unparsed': [['Object', False]], + 'parsed': False, +}, { + 'name': 'undefined', + 'unparsed': -1, + 'parsed': None, +}, { + 'name': 'null', + 'unparsed': [None], + 'parsed': None, +}, { + 'name': 'NaN', + 'unparsed': -3, + 'parsed': math.nan, +}] + +TEST_CASES_INVALID = [{ + 'name': 'empty string', + 'unparsed': '', + 'error': ValueError, + 'pattern': r'expected int or list as input', +}, { + 'name': 'hole', + 'unparsed': -2, + 'error': ValueError, + 'pattern': r'invalid integer input', +}, { + 'name': 'string', + 'unparsed': 'hello', + 'error': ValueError, + 'pattern': r'expected int or list as input', +}, { + 'name': 'number', + 'unparsed': 42, + 'error': ValueError, + 'pattern': r'invalid integer input', +}, { + 'name': 'boolean', + 'unparsed': True, + 'error': ValueError, + 'pattern': r'expected int or list as input', +}, { + 'name': 'null', + 'unparsed': None, + 'error': ValueError, + 'pattern': r'expected int or list as input', +}, { + 'name': 'object', + 'unparsed': {}, + 'error': ValueError, + 'pattern': r'expected int or list as input', +}, { + 'name': 'empty array', + 'unparsed': [], + 'error': ValueError, + 'pattern': r'expected a non-empty list as input', +}, { + 'name': 'Python negative indexing', + 'unparsed': [[1, 2, 3, 4, 5, 6, 7, -7], 1, 2, 3, 4, 5, 6, 7], + 'error': IndexError, + 'pattern': r'invalid index: -7', +}] + + +class TestDevalue(unittest.TestCase): + def test_devalue_parse_equals(self): + for tc in TEST_CASES_EQUALS: + self.assertEqual(devalue.parse(tc['unparsed']), tc['parsed'], tc['name']) + + def test_devalue_parse_is(self): + for tc in TEST_CASES_IS: + self.assertIs(devalue.parse(tc['unparsed']), tc['parsed'], tc['name']) + + def test_devalue_parse_invalid(self): + for tc in TEST_CASES_INVALID: + with self.assertRaisesRegex(tc['error'], tc['pattern'], msg=tc['name']): + devalue.parse(tc['unparsed']) + + def test_devalue_parse_cyclical(self): + name = 'Map (cyclical)' + result = devalue.parse([['Map', 1, 0], 'self']) + self.assertEqual(result[0][0], 'self', name) + self.assertIs(result, result[0][1], name) + + name = 'Set (cyclical)' + result = devalue.parse([['Set', 0, 1], 42]) + self.assertEqual(result[1], 42, name) + self.assertIs(result, result[0], name) + + result = devalue.parse([[0]]) + self.assertIs(result, result[0], 'Array (cyclical)') + + name = 'Object (cyclical)' + result = devalue.parse([{'self': 0}]) + self.assertIs(result, result['self'], name) + + name = 'Object with null prototype (cyclical)' + result = devalue.parse([['null', 'self', 0]]) + self.assertIs(result, result['self'], name) + + name = 'Objects (cyclical)' + result = devalue.parse([[1, 2], {'second': 2}, {'first': 1}]) + self.assertIs(result[0], result[1]['first'], name) + self.assertIs(result[1], result[0]['second'], name) + + def test_devalue_parse_revivers(self): + self.assertEqual( + devalue.parse([['indirect', 1], {'a': 2}, 'b'], revivers={'indirect': lambda x: x}), + {'a': 'b'}, 'revivers (indirect)') + + self.assertEqual( + devalue.parse([['parse', 1], '{"a":0}'], revivers={'parse': lambda x: json.loads(x)}), + {'a': 0}, 'revivers (parse)') + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index b14069ccc..2e3cdc2a5 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -478,6 +478,14 @@ def test_extract_function_with_global_stack(self): func = jsi.extract_function('c', {'e': 10}, {'f': 100, 'g': 1000}) self.assertEqual(func([1]), 1111) + def test_increment_decrement(self): + self._test('function f() { var x = 1; return ++x; }', 2) + self._test('function f() { var x = 1; return x++; }', 1) + self._test('function f() { var x = 1; x--; return x }', 0) + self._test('function f() { var y; var x = 1; x++, --x, x--, x--, y="z", "abc", x++; return --x }', -1) + self._test('function f() { var a = "test--"; return a; }', 'test--') + self._test('function f() { var b = 1; var a = "b--"; return a; }', 'b--') + if __name__ == '__main__': unittest.main() diff --git a/test/test_networking_utils.py b/test/test_networking_utils.py index 204fe87bd..a2feacba7 100644 --- a/test/test_networking_utils.py +++ b/test/test_networking_utils.py @@ -20,7 +20,6 @@ add_accept_encoding_header, get_redirect_method, make_socks_proxy_opts, - select_proxy, ssl_load_certs, ) from yt_dlp.networking.exceptions import ( @@ -28,7 +27,7 @@ IncompleteRead, ) from yt_dlp.socks import ProxyType -from yt_dlp.utils.networking import HTTPHeaderDict +from yt_dlp.utils.networking import HTTPHeaderDict, select_proxy TEST_DIR = os.path.dirname(os.path.abspath(__file__)) diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index 603f85c65..ecc73e39e 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -8,6 +8,8 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import subprocess + from yt_dlp import YoutubeDL from yt_dlp.utils import shell_quote from yt_dlp.postprocessor import ( @@ -47,7 +49,18 @@ def test_escaping(self): print('Skipping: ffmpeg not found') return - file = 'test/testdata/thumbnails/foo %d bar/foo_%d.{}' + test_data_dir = 'test/testdata/thumbnails' + generated_file = f'{test_data_dir}/empty.webp' + + subprocess.check_call([ + pp.executable, '-y', '-f', 'lavfi', '-i', 'color=c=black:s=320x320', + '-c:v', 'libwebp', '-pix_fmt', 'yuv420p', '-vframes', '1', generated_file, + ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + file = test_data_dir + '/foo %d bar/foo_%d.{}' + initial_file = file.format('webp') + os.replace(generated_file, initial_file) + tests = (('webp', 'png'), ('png', 'jpg')) for inp, out in tests: @@ -55,11 +68,13 @@ def test_escaping(self): if os.path.exists(out_file): os.remove(out_file) pp.convert_thumbnail(file.format(inp), out) - assert os.path.exists(out_file) + self.assertTrue(os.path.exists(out_file)) for _, out in tests: os.remove(file.format(out)) + os.remove(initial_file) + class TestExec(unittest.TestCase): def test_parse_cmd(self): @@ -610,3 +625,7 @@ def test_quote_for_concat_QuotesAtEnd(self): self.assertEqual( r"'special '\'' characters '\'' galore'\'\'\'", self._pp._quote_for_ffmpeg("special ' characters ' galore'''")) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_pot/conftest.py b/test/test_pot/conftest.py new file mode 100644 index 000000000..ff0667e92 --- /dev/null +++ b/test/test_pot/conftest.py @@ -0,0 +1,71 @@ +import collections + +import pytest + +from yt_dlp import YoutubeDL +from yt_dlp.cookies import YoutubeDLCookieJar +from yt_dlp.extractor.common import InfoExtractor +from yt_dlp.extractor.youtube.pot._provider import IEContentProviderLogger +from yt_dlp.extractor.youtube.pot.provider import PoTokenRequest, PoTokenContext +from yt_dlp.utils.networking import HTTPHeaderDict + + +class MockLogger(IEContentProviderLogger): + + log_level = IEContentProviderLogger.LogLevel.TRACE + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.messages = collections.defaultdict(list) + + def trace(self, message: str): + self.messages['trace'].append(message) + + def debug(self, message: str): + self.messages['debug'].append(message) + + def info(self, message: str): + self.messages['info'].append(message) + + def warning(self, message: str, *, once=False): + self.messages['warning'].append(message) + + def error(self, message: str): + self.messages['error'].append(message) + + +@pytest.fixture +def ie() -> InfoExtractor: + ydl = YoutubeDL() + return ydl.get_info_extractor('Youtube') + + +@pytest.fixture +def logger() -> MockLogger: + return MockLogger() + + +@pytest.fixture() +def pot_request() -> PoTokenRequest: + return PoTokenRequest( + context=PoTokenContext.GVS, + innertube_context={'client': {'clientName': 'WEB'}}, + innertube_host='youtube.com', + session_index=None, + player_url=None, + is_authenticated=False, + video_webpage=None, + + visitor_data='example-visitor-data', + data_sync_id='example-data-sync-id', + video_id='example-video-id', + + request_cookiejar=YoutubeDLCookieJar(), + request_proxy=None, + request_headers=HTTPHeaderDict(), + request_timeout=None, + request_source_address=None, + request_verify_tls=True, + + bypass_cache=False, + ) diff --git a/test/test_pot/test_pot_builtin_memorycache.py b/test/test_pot/test_pot_builtin_memorycache.py new file mode 100644 index 000000000..ea19fbe29 --- /dev/null +++ b/test/test_pot/test_pot_builtin_memorycache.py @@ -0,0 +1,117 @@ +import threading +import time +from collections import OrderedDict +import pytest +from yt_dlp.extractor.youtube.pot._provider import IEContentProvider, BuiltinIEContentProvider +from yt_dlp.utils import bug_reports_message +from yt_dlp.extractor.youtube.pot._builtin.memory_cache import MemoryLRUPCP, memorylru_preference, initialize_global_cache +from yt_dlp.version import __version__ +from yt_dlp.extractor.youtube.pot._registry import _pot_cache_providers, _pot_memory_cache + + +class TestMemoryLRUPCS: + + def test_base_type(self): + assert issubclass(MemoryLRUPCP, IEContentProvider) + assert issubclass(MemoryLRUPCP, BuiltinIEContentProvider) + + @pytest.fixture + def pcp(self, ie, logger) -> MemoryLRUPCP: + return MemoryLRUPCP(ie, logger, {}, initialize_cache=lambda max_size: (OrderedDict(), threading.Lock(), max_size)) + + def test_is_registered(self): + assert _pot_cache_providers.value.get('MemoryLRU') == MemoryLRUPCP + + def test_initialization(self, pcp): + assert pcp.PROVIDER_NAME == 'memory' + assert pcp.PROVIDER_VERSION == __version__ + assert pcp.BUG_REPORT_MESSAGE == bug_reports_message(before='') + assert pcp.is_available() + + def test_store_and_get(self, pcp): + pcp.store('key1', 'value1', int(time.time()) + 60) + assert pcp.get('key1') == 'value1' + assert len(pcp.cache) == 1 + + def test_store_ignore_expired(self, pcp): + pcp.store('key1', 'value1', int(time.time()) - 1) + assert len(pcp.cache) == 0 + assert pcp.get('key1') is None + assert len(pcp.cache) == 0 + + def test_store_override_existing_key(self, ie, logger): + MAX_SIZE = 2 + pcp = MemoryLRUPCP(ie, logger, {}, initialize_cache=lambda max_size: (OrderedDict(), threading.Lock(), MAX_SIZE)) + pcp.store('key1', 'value1', int(time.time()) + 60) + pcp.store('key2', 'value2', int(time.time()) + 60) + assert len(pcp.cache) == 2 + pcp.store('key1', 'value2', int(time.time()) + 60) + # Ensure that the override key gets added to the end of the cache instead of in the same position + pcp.store('key3', 'value3', int(time.time()) + 60) + assert pcp.get('key1') == 'value2' + + def test_store_ignore_expired_existing_key(self, pcp): + pcp.store('key1', 'value2', int(time.time()) + 60) + pcp.store('key1', 'value1', int(time.time()) - 1) + assert len(pcp.cache) == 1 + assert pcp.get('key1') == 'value2' + assert len(pcp.cache) == 1 + + def test_get_key_expired(self, pcp): + pcp.store('key1', 'value1', int(time.time()) + 60) + assert pcp.get('key1') == 'value1' + assert len(pcp.cache) == 1 + pcp.cache['key1'] = ('value1', int(time.time()) - 1) + assert pcp.get('key1') is None + assert len(pcp.cache) == 0 + + def test_lru_eviction(self, ie, logger): + MAX_SIZE = 2 + provider = MemoryLRUPCP(ie, logger, {}, initialize_cache=lambda max_size: (OrderedDict(), threading.Lock(), MAX_SIZE)) + provider.store('key1', 'value1', int(time.time()) + 5) + provider.store('key2', 'value2', int(time.time()) + 5) + assert len(provider.cache) == 2 + + assert provider.get('key1') == 'value1' + + provider.store('key3', 'value3', int(time.time()) + 5) + assert len(provider.cache) == 2 + + assert provider.get('key2') is None + + provider.store('key4', 'value4', int(time.time()) + 5) + assert len(provider.cache) == 2 + + assert provider.get('key1') is None + assert provider.get('key3') == 'value3' + assert provider.get('key4') == 'value4' + + def test_delete(self, pcp): + pcp.store('key1', 'value1', int(time.time()) + 5) + assert len(pcp.cache) == 1 + assert pcp.get('key1') == 'value1' + pcp.delete('key1') + assert len(pcp.cache) == 0 + assert pcp.get('key1') is None + + def test_use_global_cache_default(self, ie, logger): + pcp = MemoryLRUPCP(ie, logger, {}) + assert pcp.max_size == _pot_memory_cache.value['max_size'] == 25 + assert pcp.cache is _pot_memory_cache.value['cache'] + assert pcp.lock is _pot_memory_cache.value['lock'] + + pcp2 = MemoryLRUPCP(ie, logger, {}) + assert pcp.max_size == pcp2.max_size == _pot_memory_cache.value['max_size'] == 25 + assert pcp.cache is pcp2.cache is _pot_memory_cache.value['cache'] + assert pcp.lock is pcp2.lock is _pot_memory_cache.value['lock'] + + def test_fail_max_size_change_global(self, ie, logger): + pcp = MemoryLRUPCP(ie, logger, {}) + assert pcp.max_size == _pot_memory_cache.value['max_size'] == 25 + with pytest.raises(ValueError, match='Cannot change max_size of initialized global memory cache'): + initialize_global_cache(50) + + assert pcp.max_size == _pot_memory_cache.value['max_size'] == 25 + + def test_memory_lru_preference(self, pcp, ie, pot_request): + assert memorylru_preference(pcp, pot_request) == 10000 diff --git a/test/test_pot/test_pot_builtin_utils.py b/test/test_pot/test_pot_builtin_utils.py new file mode 100644 index 000000000..7645ba601 --- /dev/null +++ b/test/test_pot/test_pot_builtin_utils.py @@ -0,0 +1,47 @@ +import pytest +from yt_dlp.extractor.youtube.pot.provider import ( + PoTokenContext, + +) + +from yt_dlp.extractor.youtube.pot.utils import get_webpo_content_binding, ContentBindingType + + +class TestGetWebPoContentBinding: + + @pytest.mark.parametrize('client_name, context, is_authenticated, expected', [ + *[(client, context, is_authenticated, expected) for client in [ + 'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'TVHTML5_SIMPLY'] + for context, is_authenticated, expected in [ + (PoTokenContext.GVS, False, ('example-visitor-data', ContentBindingType.VISITOR_DATA)), + (PoTokenContext.PLAYER, False, ('example-video-id', ContentBindingType.VIDEO_ID)), + (PoTokenContext.SUBS, False, ('example-video-id', ContentBindingType.VIDEO_ID)), + (PoTokenContext.GVS, True, ('example-data-sync-id', ContentBindingType.DATASYNC_ID)), + ]], + ('WEB_REMIX', PoTokenContext.GVS, False, ('example-visitor-data', ContentBindingType.VISITOR_DATA)), + ('WEB_REMIX', PoTokenContext.PLAYER, False, ('example-visitor-data', ContentBindingType.VISITOR_DATA)), + ('ANDROID', PoTokenContext.GVS, False, (None, None)), + ('IOS', PoTokenContext.GVS, False, (None, None)), + ]) + def test_get_webpo_content_binding(self, pot_request, client_name, context, is_authenticated, expected): + pot_request.innertube_context['client']['clientName'] = client_name + pot_request.context = context + pot_request.is_authenticated = is_authenticated + assert get_webpo_content_binding(pot_request) == expected + + def test_extract_visitor_id(self, pot_request): + pot_request.visitor_data = 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D' + assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == ('123abcXYZ_-', ContentBindingType.VISITOR_ID) + + def test_invalid_visitor_id(self, pot_request): + # visitor id not alphanumeric (i.e. protobuf extraction failed) + pot_request.visitor_data = 'CggxMjM0NTY3OCiA4s-qBg%3D%3D' + assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == (pot_request.visitor_data, ContentBindingType.VISITOR_DATA) + + def test_no_visitor_id(self, pot_request): + pot_request.visitor_data = 'KIDiz6oG' + assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == (pot_request.visitor_data, ContentBindingType.VISITOR_DATA) + + def test_invalid_base64(self, pot_request): + pot_request.visitor_data = 'invalid-base64' + assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == (pot_request.visitor_data, ContentBindingType.VISITOR_DATA) diff --git a/test/test_pot/test_pot_builtin_webpospec.py b/test/test_pot/test_pot_builtin_webpospec.py new file mode 100644 index 000000000..078008415 --- /dev/null +++ b/test/test_pot/test_pot_builtin_webpospec.py @@ -0,0 +1,92 @@ +import pytest + +from yt_dlp.extractor.youtube.pot._provider import IEContentProvider, BuiltinIEContentProvider +from yt_dlp.extractor.youtube.pot.cache import CacheProviderWritePolicy +from yt_dlp.utils import bug_reports_message +from yt_dlp.extractor.youtube.pot.provider import ( + PoTokenRequest, + PoTokenContext, + +) +from yt_dlp.version import __version__ + +from yt_dlp.extractor.youtube.pot._builtin.webpo_cachespec import WebPoPCSP +from yt_dlp.extractor.youtube.pot._registry import _pot_pcs_providers + + +@pytest.fixture() +def pot_request(pot_request) -> PoTokenRequest: + pot_request.visitor_data = 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D' # visitor_id=123abcXYZ_- + return pot_request + + +class TestWebPoPCSP: + def test_base_type(self): + assert issubclass(WebPoPCSP, IEContentProvider) + assert issubclass(WebPoPCSP, BuiltinIEContentProvider) + + def test_init(self, ie, logger): + pcs = WebPoPCSP(ie=ie, logger=logger, settings={}) + assert pcs.PROVIDER_NAME == 'webpo' + assert pcs.PROVIDER_VERSION == __version__ + assert pcs.BUG_REPORT_MESSAGE == bug_reports_message(before='') + assert pcs.is_available() + + def test_is_registered(self): + assert _pot_pcs_providers.value.get('WebPo') == WebPoPCSP + + @pytest.mark.parametrize('client_name, context, is_authenticated', [ + ('ANDROID', PoTokenContext.GVS, False), + ('IOS', PoTokenContext.GVS, False), + ('IOS', PoTokenContext.PLAYER, False), + ]) + def test_not_supports(self, ie, logger, pot_request, client_name, context, is_authenticated): + pcs = WebPoPCSP(ie=ie, logger=logger, settings={}) + pot_request.innertube_context['client']['clientName'] = client_name + pot_request.context = context + pot_request.is_authenticated = is_authenticated + assert pcs.generate_cache_spec(pot_request) is None + + @pytest.mark.parametrize('client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected', [ + *[(client, context, is_authenticated, remote_host, source_address, request_proxy, expected) for client in [ + 'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'TVHTML5_SIMPLY'] + for context, is_authenticated, remote_host, source_address, request_proxy, expected in [ + (PoTokenContext.GVS, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}), + (PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'video_id'}), + (PoTokenContext.GVS, True, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': 'example-data-sync-id', 'cbt': 'datasync_id'}), + ]], + ('WEB_REMIX', PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}), + ('WEB', PoTokenContext.GVS, False, None, None, None, {'t': 'webpo', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id', 'ip': None, 'sa': None, 'px': None}), + ('TVHTML5', PoTokenContext.PLAYER, False, None, None, 'http://example.com', {'t': 'webpo', 'cb': '123abcXYZ_-', 'cbt': 'video_id', 'ip': None, 'sa': None, 'px': 'http://example.com'}), + + ]) + def test_generate_key_bindings(self, ie, logger, pot_request, client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected): + pcs = WebPoPCSP(ie=ie, logger=logger, settings={}) + pot_request.innertube_context['client']['clientName'] = client_name + pot_request.context = context + pot_request.is_authenticated = is_authenticated + pot_request.innertube_context['client']['remoteHost'] = remote_host + pot_request.request_source_address = source_address + pot_request.request_proxy = request_proxy + pot_request.video_id = '123abcXYZ_-' # same as visitor id to test type + + assert pcs.generate_cache_spec(pot_request).key_bindings == expected + + def test_no_bind_visitor_id(self, ie, logger, pot_request): + # Should not bind to visitor id if setting is set to False + pcs = WebPoPCSP(ie=ie, logger=logger, settings={'bind_to_visitor_id': ['false']}) + pot_request.innertube_context['client']['clientName'] = 'WEB' + pot_request.context = PoTokenContext.GVS + pot_request.is_authenticated = False + assert pcs.generate_cache_spec(pot_request).key_bindings == {'t': 'webpo', 'ip': None, 'sa': None, 'px': None, 'cb': 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D', 'cbt': 'visitor_data'} + + def test_default_ttl(self, ie, logger, pot_request): + pcs = WebPoPCSP(ie=ie, logger=logger, settings={}) + assert pcs.generate_cache_spec(pot_request).default_ttl == 6 * 60 * 60 # should default to 6 hours + + def test_write_policy(self, ie, logger, pot_request): + pcs = WebPoPCSP(ie=ie, logger=logger, settings={}) + pot_request.context = PoTokenContext.GVS + assert pcs.generate_cache_spec(pot_request).write_policy == CacheProviderWritePolicy.WRITE_ALL + pot_request.context = PoTokenContext.PLAYER + assert pcs.generate_cache_spec(pot_request).write_policy == CacheProviderWritePolicy.WRITE_FIRST diff --git a/test/test_pot/test_pot_director.py b/test/test_pot/test_pot_director.py new file mode 100644 index 000000000..bbfdd0e98 --- /dev/null +++ b/test/test_pot/test_pot_director.py @@ -0,0 +1,1529 @@ +from __future__ import annotations +import abc +import base64 +import dataclasses +import hashlib +import json +import time +import pytest + +from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider, IEContentProvider + +from yt_dlp.extractor.youtube.pot.provider import ( + PoTokenRequest, + PoTokenContext, + PoTokenProviderError, + PoTokenProviderRejectedRequest, +) +from yt_dlp.extractor.youtube.pot._director import ( + PoTokenCache, + validate_cache_spec, + clean_pot, + validate_response, + PoTokenRequestDirector, + provider_display_list, +) + +from yt_dlp.extractor.youtube.pot.cache import ( + PoTokenCacheSpec, + PoTokenCacheSpecProvider, + PoTokenCacheProvider, + CacheProviderWritePolicy, + PoTokenCacheProviderError, +) + + +from yt_dlp.extractor.youtube.pot.provider import ( + PoTokenResponse, + PoTokenProvider, +) + + +class BaseMockPoTokenProvider(PoTokenProvider, abc.ABC): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.available_called_times = 0 + self.request_called_times = 0 + self.close_called = False + + def is_available(self) -> bool: + self.available_called_times += 1 + return True + + def request_pot(self, *args, **kwargs): + self.request_called_times += 1 + return super().request_pot(*args, **kwargs) + + def close(self): + self.close_called = True + super().close() + + +class ExamplePTP(BaseMockPoTokenProvider): + PROVIDER_NAME = 'example' + PROVIDER_VERSION = '0.0.1' + BUG_REPORT_LOCATION = 'https://example.com/issues' + + _SUPPORTED_CLIENTS = ('WEB',) + _SUPPORTED_CONTEXTS = (PoTokenContext.GVS, ) + + def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse: + if request.data_sync_id == 'example': + return PoTokenResponse(request.video_id) + return PoTokenResponse(EXAMPLE_PO_TOKEN) + + +def success_ptp(response: PoTokenResponse | None = None, key: str | None = None): + class SuccessPTP(BaseMockPoTokenProvider): + PROVIDER_NAME = 'success' + PROVIDER_VERSION = '0.0.1' + BUG_REPORT_LOCATION = 'https://success.example.com/issues' + + _SUPPORTED_CLIENTS = ('WEB',) + _SUPPORTED_CONTEXTS = (PoTokenContext.GVS,) + + def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse: + return response or PoTokenResponse(EXAMPLE_PO_TOKEN) + + if key: + SuccessPTP.PROVIDER_KEY = key + return SuccessPTP + + +@pytest.fixture +def pot_provider(ie, logger): + return success_ptp()(ie=ie, logger=logger, settings={}) + + +class UnavailablePTP(BaseMockPoTokenProvider): + PROVIDER_NAME = 'unavailable' + BUG_REPORT_LOCATION = 'https://unavailable.example.com/issues' + _SUPPORTED_CLIENTS = None + _SUPPORTED_CONTEXTS = None + + def is_available(self) -> bool: + super().is_available() + return False + + def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse: + raise PoTokenProviderError('something went wrong') + + +class UnsupportedPTP(BaseMockPoTokenProvider): + PROVIDER_NAME = 'unsupported' + BUG_REPORT_LOCATION = 'https://unsupported.example.com/issues' + _SUPPORTED_CLIENTS = None + _SUPPORTED_CONTEXTS = None + + def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse: + raise PoTokenProviderRejectedRequest('unsupported request') + + +class ErrorPTP(BaseMockPoTokenProvider): + PROVIDER_NAME = 'error' + BUG_REPORT_LOCATION = 'https://error.example.com/issues' + _SUPPORTED_CLIENTS = None + _SUPPORTED_CONTEXTS = None + + def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse: + expected = request.video_id == 'expected' + raise PoTokenProviderError('an error occurred', expected=expected) + + +class UnexpectedErrorPTP(BaseMockPoTokenProvider): + PROVIDER_NAME = 'unexpected_error' + BUG_REPORT_LOCATION = 'https://unexpected.example.com/issues' + _SUPPORTED_CLIENTS = None + _SUPPORTED_CONTEXTS = None + + def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse: + raise ValueError('an unexpected error occurred') + + +class InvalidPTP(BaseMockPoTokenProvider): + PROVIDER_NAME = 'invalid' + BUG_REPORT_LOCATION = 'https://invalid.example.com/issues' + _SUPPORTED_CLIENTS = None + _SUPPORTED_CONTEXTS = None + + def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse: + if request.video_id == 'invalid_type': + return 'invalid-response' + else: + return PoTokenResponse('example-token?', expires_at='123') + + +class BaseMockCacheSpecProvider(PoTokenCacheSpecProvider, abc.ABC): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.generate_called_times = 0 + self.is_available_called_times = 0 + self.close_called = False + + def is_available(self) -> bool: + self.is_available_called_times += 1 + return super().is_available() + + def generate_cache_spec(self, request: PoTokenRequest): + self.generate_called_times += 1 + + def close(self): + self.close_called = True + super().close() + + +class ExampleCacheSpecProviderPCSP(BaseMockCacheSpecProvider): + + PROVIDER_NAME = 'example' + PROVIDER_VERSION = '0.0.1' + BUG_REPORT_LOCATION = 'https://example.com/issues' + + def generate_cache_spec(self, request: PoTokenRequest): + super().generate_cache_spec(request) + return PoTokenCacheSpec( + key_bindings={'v': request.video_id, 'e': None}, + default_ttl=60, + ) + + +class UnavailableCacheSpecProviderPCSP(BaseMockCacheSpecProvider): + + PROVIDER_NAME = 'unavailable' + PROVIDER_VERSION = '0.0.1' + + def is_available(self) -> bool: + super().is_available() + return False + + def generate_cache_spec(self, request: PoTokenRequest): + super().generate_cache_spec(request) + return None + + +class UnsupportedCacheSpecProviderPCSP(BaseMockCacheSpecProvider): + + PROVIDER_NAME = 'unsupported' + PROVIDER_VERSION = '0.0.1' + + def generate_cache_spec(self, request: PoTokenRequest): + super().generate_cache_spec(request) + return None + + +class InvalidSpecCacheSpecProviderPCSP(BaseMockCacheSpecProvider): + + PROVIDER_NAME = 'invalid' + PROVIDER_VERSION = '0.0.1' + + def generate_cache_spec(self, request: PoTokenRequest): + super().generate_cache_spec(request) + return 'invalid-spec' + + +class ErrorSpecCacheSpecProviderPCSP(BaseMockCacheSpecProvider): + + PROVIDER_NAME = 'invalid' + PROVIDER_VERSION = '0.0.1' + + def generate_cache_spec(self, request: PoTokenRequest): + super().generate_cache_spec(request) + raise ValueError('something went wrong') + + +class BaseMockCacheProvider(PoTokenCacheProvider, abc.ABC): + BUG_REPORT_MESSAGE = 'example bug report message' + + def __init__(self, *args, available=True, **kwargs): + super().__init__(*args, **kwargs) + self.store_calls = 0 + self.delete_calls = 0 + self.get_calls = 0 + self.available_called_times = 0 + self.available = available + + def is_available(self) -> bool: + self.available_called_times += 1 + return self.available + + def store(self, *args, **kwargs): + self.store_calls += 1 + + def delete(self, *args, **kwargs): + self.delete_calls += 1 + + def get(self, *args, **kwargs): + self.get_calls += 1 + + def close(self): + self.close_called = True + super().close() + + +class ErrorPCP(BaseMockCacheProvider): + PROVIDER_NAME = 'error' + + def store(self, *args, **kwargs): + super().store(*args, **kwargs) + raise PoTokenCacheProviderError('something went wrong') + + def get(self, *args, **kwargs): + super().get(*args, **kwargs) + raise PoTokenCacheProviderError('something went wrong') + + +class UnexpectedErrorPCP(BaseMockCacheProvider): + PROVIDER_NAME = 'unexpected_error' + + def store(self, *args, **kwargs): + super().store(*args, **kwargs) + raise ValueError('something went wrong') + + def get(self, *args, **kwargs): + super().get(*args, **kwargs) + raise ValueError('something went wrong') + + +class MockMemoryPCP(BaseMockCacheProvider): + PROVIDER_NAME = 'memory' + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.cache = {} + + def store(self, key, value, expires_at): + super().store(key, value, expires_at) + self.cache[key] = (value, expires_at) + + def delete(self, key): + super().delete(key) + self.cache.pop(key, None) + + def get(self, key): + super().get(key) + return self.cache.get(key, [None])[0] + + +def create_memory_pcp(ie, logger, provider_key='memory', provider_name='memory', available=True): + cache = MockMemoryPCP(ie, logger, {}, available=available) + cache.PROVIDER_KEY = provider_key + cache.PROVIDER_NAME = provider_name + return cache + + +@pytest.fixture +def memorypcp(ie, logger) -> MockMemoryPCP: + return create_memory_pcp(ie, logger) + + +@pytest.fixture +def pot_cache(ie, logger): + class MockPoTokenCache(PoTokenCache): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.get_calls = 0 + self.store_calls = 0 + self.close_called = False + + def get(self, *args, **kwargs): + self.get_calls += 1 + return super().get(*args, **kwargs) + + def store(self, *args, **kwargs): + self.store_calls += 1 + return super().store(*args, **kwargs) + + def close(self): + self.close_called = True + super().close() + + return MockPoTokenCache( + cache_providers=[MockMemoryPCP(ie, logger, {})], + cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie, logger, settings={})], + logger=logger, + ) + + +EXAMPLE_PO_TOKEN = base64.urlsafe_b64encode(b'example-token').decode() + + +class TestPoTokenCache: + + def test_cache_success(self, memorypcp, pot_request, ie, logger): + cache = PoTokenCache( + cache_providers=[memorypcp], + cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + + assert cache.get(pot_request) is None + cache.store(pot_request, response) + + cached_response = cache.get(pot_request) + assert cached_response is not None + assert cached_response.po_token == EXAMPLE_PO_TOKEN + assert cached_response.expires_at is not None + + assert cache.get(dataclasses.replace(pot_request, video_id='another-video-id')) is None + + def test_unsupported_cache_spec_no_fallback(self, memorypcp, pot_request, ie, logger): + unsupported_provider = UnsupportedCacheSpecProviderPCSP(ie=ie, logger=logger, settings={}) + cache = PoTokenCache( + cache_providers=[memorypcp], + cache_spec_providers=[unsupported_provider], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + assert cache.get(pot_request) is None + assert unsupported_provider.generate_called_times == 1 + cache.store(pot_request, response) + assert len(memorypcp.cache) == 0 + assert unsupported_provider.generate_called_times == 2 + assert cache.get(pot_request) is None + assert unsupported_provider.generate_called_times == 3 + assert len(logger.messages.get('error', [])) == 0 + + def test_unsupported_cache_spec_fallback(self, memorypcp, pot_request, ie, logger): + unsupported_provider = UnsupportedCacheSpecProviderPCSP(ie=ie, logger=logger, settings={}) + example_provider = ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={}) + cache = PoTokenCache( + cache_providers=[memorypcp], + cache_spec_providers=[unsupported_provider, example_provider], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + + assert cache.get(pot_request) is None + assert unsupported_provider.generate_called_times == 1 + assert example_provider.generate_called_times == 1 + + cache.store(pot_request, response) + assert unsupported_provider.generate_called_times == 2 + assert example_provider.generate_called_times == 2 + + cached_response = cache.get(pot_request) + assert unsupported_provider.generate_called_times == 3 + assert example_provider.generate_called_times == 3 + assert cached_response is not None + assert cached_response.po_token == EXAMPLE_PO_TOKEN + assert cached_response.expires_at is not None + + assert len(logger.messages.get('error', [])) == 0 + + def test_invalid_cache_spec_no_fallback(self, memorypcp, pot_request, ie, logger): + cache = PoTokenCache( + cache_providers=[memorypcp], + cache_spec_providers=[InvalidSpecCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + + assert cache.get(pot_request) is None + cache.store(pot_request, response) + + assert cache.get(pot_request) is None + + assert 'PoTokenCacheSpecProvider "InvalidSpecCacheSpecProvider" generate_cache_spec() returned invalid spec invalid-spec; please report this issue to the provider developer at (developer has not provided a bug report location) .' in logger.messages['error'] + + def test_invalid_cache_spec_fallback(self, memorypcp, pot_request, ie, logger): + + invalid_provider = InvalidSpecCacheSpecProviderPCSP(ie=ie, logger=logger, settings={}) + example_provider = ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={}) + cache = PoTokenCache( + cache_providers=[memorypcp], + cache_spec_providers=[invalid_provider, example_provider], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + + assert cache.get(pot_request) is None + assert invalid_provider.generate_called_times == example_provider.generate_called_times == 1 + + cache.store(pot_request, response) + assert invalid_provider.generate_called_times == example_provider.generate_called_times == 2 + + cached_response = cache.get(pot_request) + assert invalid_provider.generate_called_times == example_provider.generate_called_times == 3 + assert cached_response is not None + assert cached_response.po_token == EXAMPLE_PO_TOKEN + assert cached_response.expires_at is not None + + assert 'PoTokenCacheSpecProvider "InvalidSpecCacheSpecProvider" generate_cache_spec() returned invalid spec invalid-spec; please report this issue to the provider developer at (developer has not provided a bug report location) .' in logger.messages['error'] + + def test_unavailable_cache_spec_no_fallback(self, memorypcp, pot_request, ie, logger): + unavailable_provider = UnavailableCacheSpecProviderPCSP(ie=ie, logger=logger, settings={}) + cache = PoTokenCache( + cache_providers=[memorypcp], + cache_spec_providers=[unavailable_provider], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + + assert cache.get(pot_request) is None + cache.store(pot_request, response) + assert cache.get(pot_request) is None + assert unavailable_provider.generate_called_times == 0 + + def test_unavailable_cache_spec_fallback(self, memorypcp, pot_request, ie, logger): + unavailable_provider = UnavailableCacheSpecProviderPCSP(ie=ie, logger=logger, settings={}) + example_provider = ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={}) + cache = PoTokenCache( + cache_providers=[memorypcp], + cache_spec_providers=[unavailable_provider, example_provider], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + + assert cache.get(pot_request) is None + assert unavailable_provider.generate_called_times == 0 + assert unavailable_provider.is_available_called_times == 1 + assert example_provider.generate_called_times == 1 + + cache.store(pot_request, response) + assert unavailable_provider.generate_called_times == 0 + assert unavailable_provider.is_available_called_times == 2 + assert example_provider.generate_called_times == 2 + + cached_response = cache.get(pot_request) + assert unavailable_provider.generate_called_times == 0 + assert unavailable_provider.is_available_called_times == 3 + assert example_provider.generate_called_times == 3 + assert example_provider.is_available_called_times == 3 + assert cached_response is not None + assert cached_response.po_token == EXAMPLE_PO_TOKEN + assert cached_response.expires_at is not None + + def test_unexpected_error_cache_spec(self, memorypcp, pot_request, ie, logger): + error_provider = ErrorSpecCacheSpecProviderPCSP(ie=ie, logger=logger, settings={}) + cache = PoTokenCache( + cache_providers=[memorypcp], + cache_spec_providers=[error_provider], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + + assert cache.get(pot_request) is None + cache.store(pot_request, response) + assert cache.get(pot_request) is None + assert error_provider.generate_called_times == 3 + assert error_provider.is_available_called_times == 3 + + assert 'Error occurred with "invalid" PO Token cache spec provider: ValueError(\'something went wrong\'); please report this issue to the provider developer at (developer has not provided a bug report location) .' in logger.messages['error'] + + def test_unexpected_error_cache_spec_fallback(self, memorypcp, pot_request, ie, logger): + error_provider = ErrorSpecCacheSpecProviderPCSP(ie=ie, logger=logger, settings={}) + example_provider = ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={}) + cache = PoTokenCache( + cache_providers=[memorypcp], + cache_spec_providers=[error_provider, example_provider], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + + assert cache.get(pot_request) is None + assert error_provider.generate_called_times == 1 + assert error_provider.is_available_called_times == 1 + assert example_provider.generate_called_times == 1 + + cache.store(pot_request, response) + assert error_provider.generate_called_times == 2 + assert error_provider.is_available_called_times == 2 + assert example_provider.generate_called_times == 2 + + cached_response = cache.get(pot_request) + assert error_provider.generate_called_times == 3 + assert error_provider.is_available_called_times == 3 + assert example_provider.generate_called_times == 3 + assert example_provider.is_available_called_times == 3 + assert cached_response is not None + assert cached_response.po_token == EXAMPLE_PO_TOKEN + assert cached_response.expires_at is not None + + assert 'Error occurred with "invalid" PO Token cache spec provider: ValueError(\'something went wrong\'); please report this issue to the provider developer at (developer has not provided a bug report location) .' in logger.messages['error'] + + def test_key_bindings_spec_provider(self, memorypcp, pot_request, ie, logger): + + class ExampleProviderPCSP(PoTokenCacheSpecProvider): + PROVIDER_NAME = 'example' + + def generate_cache_spec(self, request: PoTokenRequest): + return PoTokenCacheSpec( + key_bindings={'v': request.video_id}, + default_ttl=60, + ) + + class ExampleProviderTwoPCSP(ExampleProviderPCSP): + pass + + example_provider = ExampleProviderPCSP(ie=ie, logger=logger, settings={}) + example_provider_two = ExampleProviderTwoPCSP(ie=ie, logger=logger, settings={}) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + + cache = PoTokenCache( + cache_providers=[memorypcp], + cache_spec_providers=[example_provider], + logger=logger, + ) + + assert cache.get(pot_request) is None + cache.store(pot_request, response) + assert len(memorypcp.cache) == 1 + assert hashlib.sha256( + f"{{'_dlp_cache': 'v1', '_p': 'ExampleProvider', 'v': '{pot_request.video_id}'}}".encode()).hexdigest() in memorypcp.cache + + # The second spec provider returns the exact same key bindings as the first one, + # however the PoTokenCache should use the provider key to differentiate between them + cache = PoTokenCache( + cache_providers=[memorypcp], + cache_spec_providers=[example_provider_two], + logger=logger, + ) + + assert cache.get(pot_request) is None + cache.store(pot_request, response) + assert len(memorypcp.cache) == 2 + assert hashlib.sha256( + f"{{'_dlp_cache': 'v1', '_p': 'ExampleProviderTwo', 'v': '{pot_request.video_id}'}}".encode()).hexdigest() in memorypcp.cache + + def test_cache_provider_preferences(self, pot_request, ie, logger): + pcp_one = create_memory_pcp(ie, logger, provider_key='memory_pcp_one') + pcp_two = create_memory_pcp(ie, logger, provider_key='memory_pcp_two') + + cache = PoTokenCache( + cache_providers=[pcp_one, pcp_two], + cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + cache.store(pot_request, PoTokenResponse(EXAMPLE_PO_TOKEN), write_policy=CacheProviderWritePolicy.WRITE_FIRST) + assert len(pcp_one.cache) == 1 + assert len(pcp_two.cache) == 0 + + assert cache.get(pot_request) + assert pcp_one.get_calls == 1 + assert pcp_two.get_calls == 0 + + standard_preference_called = False + pcp_one_preference_claled = False + + def standard_preference(provider, request, *_, **__): + nonlocal standard_preference_called + standard_preference_called = True + assert isinstance(provider, PoTokenCacheProvider) + assert isinstance(request, PoTokenRequest) + return 1 + + def pcp_one_preference(provider, request, *_, **__): + nonlocal pcp_one_preference_claled + pcp_one_preference_claled = True + assert isinstance(provider, PoTokenCacheProvider) + assert isinstance(request, PoTokenRequest) + if provider.PROVIDER_KEY == pcp_one.PROVIDER_KEY: + return -100 + return 0 + + # test that it can hanldle multiple preferences + cache.cache_provider_preferences.append(standard_preference) + cache.cache_provider_preferences.append(pcp_one_preference) + + cache.store(pot_request, PoTokenResponse(EXAMPLE_PO_TOKEN), write_policy=CacheProviderWritePolicy.WRITE_FIRST) + assert cache.get(pot_request) + assert len(pcp_one.cache) == len(pcp_two.cache) == 1 + assert pcp_two.get_calls == pcp_one.get_calls == 1 + assert pcp_one.store_calls == pcp_two.store_calls == 1 + assert standard_preference_called + assert pcp_one_preference_claled + + def test_secondary_cache_provider_hit(self, pot_request, ie, logger): + pcp_one = create_memory_pcp(ie, logger, provider_key='memory_pcp_one') + pcp_two = create_memory_pcp(ie, logger, provider_key='memory_pcp_two') + + cache = PoTokenCache( + cache_providers=[pcp_two], + cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + # Given the lower priority provider has the cache hit, store the response in the higher priority provider + cache.store(pot_request, PoTokenResponse(EXAMPLE_PO_TOKEN)) + assert cache.get(pot_request) + + cache.cache_providers[pcp_one.PROVIDER_KEY] = pcp_one + + def pcp_one_pref(provider, *_, **__): + if provider.PROVIDER_KEY == pcp_one.PROVIDER_KEY: + return 1 + return -1 + + cache.cache_provider_preferences.append(pcp_one_pref) + + assert cache.get(pot_request) + assert pcp_one.get_calls == 1 + assert pcp_two.get_calls == 2 + # Should write back to pcp_one (now the highest priority cache provider) + assert pcp_one.store_calls == pcp_two.store_calls == 1 + assert 'Writing PO Token response to highest priority cache provider' in logger.messages['trace'] + + def test_cache_provider_no_hits(self, pot_request, ie, logger): + pcp_one = create_memory_pcp(ie, logger, provider_key='memory_pcp_one') + pcp_two = create_memory_pcp(ie, logger, provider_key='memory_pcp_two') + + cache = PoTokenCache( + cache_providers=[pcp_one, pcp_two], + cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + assert cache.get(pot_request) is None + assert pcp_one.get_calls == pcp_two.get_calls == 1 + + def test_get_invalid_po_token_response(self, pot_request, ie, logger): + # Test various scenarios where the po token response stored in the cache provider is invalid + pcp_one = create_memory_pcp(ie, logger, provider_key='memory_pcp_one') + pcp_two = create_memory_pcp(ie, logger, provider_key='memory_pcp_two') + + cache = PoTokenCache( + cache_providers=[pcp_one, pcp_two], + cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + valid_response = PoTokenResponse(EXAMPLE_PO_TOKEN) + cache.store(pot_request, valid_response) + assert len(pcp_one.cache) == len(pcp_two.cache) == 1 + # Overwrite the valid response with an invalid one in the cache + pcp_one.store(next(iter(pcp_one.cache.keys())), json.dumps(dataclasses.asdict(PoTokenResponse(None))), int(time.time() + 1000)) + assert cache.get(pot_request).po_token == valid_response.po_token + assert pcp_one.get_calls == pcp_two.get_calls == 1 + assert pcp_one.delete_calls == 1 # Invalid response should be deleted from cache + assert pcp_one.store_calls == 3 # Since response was fetched from second cache provider, it should be stored in the first one + assert len(pcp_one.cache) == 1 + assert 'Invalid PO Token response retrieved from cache provider "memory": {"po_token": null, "expires_at": null}; example bug report message' in logger.messages['error'] + + # Overwrite the valid response with an invalid json in the cache + pcp_one.store(next(iter(pcp_one.cache.keys())), 'invalid-json', int(time.time() + 1000)) + assert cache.get(pot_request).po_token == valid_response.po_token + assert pcp_one.get_calls == pcp_two.get_calls == 2 + assert pcp_one.delete_calls == 2 + assert pcp_one.store_calls == 5 # 3 + 1 store we made in the test + 1 store from lower priority cache provider + assert len(pcp_one.cache) == 1 + + assert 'Invalid PO Token response retrieved from cache provider "memory": invalid-json; example bug report message' in logger.messages['error'] + + # Valid json, but missing required fields + pcp_one.store(next(iter(pcp_one.cache.keys())), '{"unknown_param": 0}', int(time.time() + 1000)) + assert cache.get(pot_request).po_token == valid_response.po_token + assert pcp_one.get_calls == pcp_two.get_calls == 3 + assert pcp_one.delete_calls == 3 + assert pcp_one.store_calls == 7 # 5 + 1 store from test + 1 store from lower priority cache provider + assert len(pcp_one.cache) == 1 + + assert 'Invalid PO Token response retrieved from cache provider "memory": {"unknown_param": 0}; example bug report message' in logger.messages['error'] + + def test_store_invalid_po_token_response(self, pot_request, ie, logger): + # Should not store an invalid po token response + pcp_one = create_memory_pcp(ie, logger, provider_key='memory_pcp_one') + + cache = PoTokenCache( + cache_providers=[pcp_one], + cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + cache.store(pot_request, PoTokenResponse(po_token=EXAMPLE_PO_TOKEN, expires_at=80)) + assert cache.get(pot_request) is None + assert pcp_one.store_calls == 0 + assert 'Invalid PO Token response provided to PoTokenCache.store()' in logger.messages['error'][0] + + def test_store_write_policy(self, pot_request, ie, logger): + pcp_one = create_memory_pcp(ie, logger, provider_key='memory_pcp_one') + pcp_two = create_memory_pcp(ie, logger, provider_key='memory_pcp_two') + + cache = PoTokenCache( + cache_providers=[pcp_one, pcp_two], + cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + cache.store(pot_request, PoTokenResponse(EXAMPLE_PO_TOKEN), write_policy=CacheProviderWritePolicy.WRITE_FIRST) + assert pcp_one.store_calls == 1 + assert pcp_two.store_calls == 0 + + cache.store(pot_request, PoTokenResponse(EXAMPLE_PO_TOKEN), write_policy=CacheProviderWritePolicy.WRITE_ALL) + assert pcp_one.store_calls == 2 + assert pcp_two.store_calls == 1 + + def test_store_write_first_policy_cache_spec(self, pot_request, ie, logger): + pcp_one = create_memory_pcp(ie, logger, provider_key='memory_pcp_one') + pcp_two = create_memory_pcp(ie, logger, provider_key='memory_pcp_two') + + class WriteFirstPCSP(BaseMockCacheSpecProvider): + def generate_cache_spec(self, request: PoTokenRequest): + super().generate_cache_spec(request) + return PoTokenCacheSpec( + key_bindings={'v': request.video_id, 'e': None}, + default_ttl=60, + write_policy=CacheProviderWritePolicy.WRITE_FIRST, + ) + + cache = PoTokenCache( + cache_providers=[pcp_one, pcp_two], + cache_spec_providers=[WriteFirstPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + cache.store(pot_request, PoTokenResponse(EXAMPLE_PO_TOKEN)) + assert pcp_one.store_calls == 1 + assert pcp_two.store_calls == 0 + + def test_store_write_all_policy_cache_spec(self, pot_request, ie, logger): + pcp_one = create_memory_pcp(ie, logger, provider_key='memory_pcp_one') + pcp_two = create_memory_pcp(ie, logger, provider_key='memory_pcp_two') + + class WriteAllPCSP(BaseMockCacheSpecProvider): + def generate_cache_spec(self, request: PoTokenRequest): + super().generate_cache_spec(request) + return PoTokenCacheSpec( + key_bindings={'v': request.video_id, 'e': None}, + default_ttl=60, + write_policy=CacheProviderWritePolicy.WRITE_ALL, + ) + + cache = PoTokenCache( + cache_providers=[pcp_one, pcp_two], + cache_spec_providers=[WriteAllPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + cache.store(pot_request, PoTokenResponse(EXAMPLE_PO_TOKEN)) + assert pcp_one.store_calls == 1 + assert pcp_two.store_calls == 1 + + def test_expires_at_pot_response(self, pot_request, memorypcp, ie, logger): + cache = PoTokenCache( + cache_providers=[memorypcp], + cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=10000000000) + cache.store(pot_request, response) + assert next(iter(memorypcp.cache.values()))[1] == 10000000000 + + def test_expires_at_default_spec(self, pot_request, memorypcp, ie, logger): + + class TtlPCSP(BaseMockCacheSpecProvider): + def generate_cache_spec(self, request: PoTokenRequest): + super().generate_cache_spec(request) + return PoTokenCacheSpec( + key_bindings={'v': request.video_id, 'e': None}, + default_ttl=10000000000, + ) + + cache = PoTokenCache( + cache_providers=[memorypcp], + cache_spec_providers=[TtlPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + cache.store(pot_request, response) + assert next(iter(memorypcp.cache.values()))[1] >= 10000000000 + + def test_cache_provider_error_no_fallback(self, pot_request, ie, logger): + error_pcp = ErrorPCP(ie, logger, {}) + cache = PoTokenCache( + cache_providers=[error_pcp], + cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + cache.store(pot_request, response) + assert cache.get(pot_request) is None + assert error_pcp.get_calls == 1 + assert error_pcp.store_calls == 1 + + assert logger.messages['warning'].count("Error from \"error\" PO Token cache provider: PoTokenCacheProviderError('something went wrong'); example bug report message") == 2 + + def test_cache_provider_error_fallback(self, pot_request, ie, logger): + error_pcp = ErrorPCP(ie, logger, {}) + memory_pcp = create_memory_pcp(ie, logger, provider_key='memory') + + cache = PoTokenCache( + cache_providers=[error_pcp, memory_pcp], + cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + cache.store(pot_request, response) + + # 1. Store fails for error_pcp, stored in memory_pcp + # 2. Get fails for error_pcp, fetched from memory_pcp + # 3. Since fetched from lower priority, it should be stored in the highest priority cache provider + # 4. Store fails in error_pcp. Since write policy is WRITE_FIRST, it should not try to store in memory_pcp regardless of if the store in error_pcp fails + + assert cache.get(pot_request) + assert error_pcp.get_calls == 1 + assert error_pcp.store_calls == 2 # since highest priority, when fetched from lower priority, it should be stored in the highest priority cache provider + assert memory_pcp.get_calls == 1 + assert memory_pcp.store_calls == 1 + + assert logger.messages['warning'].count("Error from \"error\" PO Token cache provider: PoTokenCacheProviderError('something went wrong'); example bug report message") == 3 + + def test_cache_provider_unexpected_error_no_fallback(self, pot_request, ie, logger): + error_pcp = UnexpectedErrorPCP(ie, logger, {}) + cache = PoTokenCache( + cache_providers=[error_pcp], + cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + cache.store(pot_request, response) + assert cache.get(pot_request) is None + assert error_pcp.get_calls == 1 + assert error_pcp.store_calls == 1 + + assert logger.messages['error'].count("Error occurred with \"unexpected_error\" PO Token cache provider: ValueError('something went wrong'); example bug report message") == 2 + + def test_cache_provider_unexpected_error_fallback(self, pot_request, ie, logger): + error_pcp = UnexpectedErrorPCP(ie, logger, {}) + memory_pcp = create_memory_pcp(ie, logger, provider_key='memory') + + cache = PoTokenCache( + cache_providers=[error_pcp, memory_pcp], + cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + cache.store(pot_request, response) + + # 1. Store fails for error_pcp, stored in memory_pcp + # 2. Get fails for error_pcp, fetched from memory_pcp + # 3. Since fetched from lower priority, it should be stored in the highest priority cache provider + # 4. Store fails in error_pcp. Since write policy is WRITE_FIRST, it should not try to store in memory_pcp regardless of if the store in error_pcp fails + + assert cache.get(pot_request) + assert error_pcp.get_calls == 1 + assert error_pcp.store_calls == 2 # since highest priority, when fetched from lower priority, it should be stored in the highest priority cache provider + assert memory_pcp.get_calls == 1 + assert memory_pcp.store_calls == 1 + + assert logger.messages['error'].count("Error occurred with \"unexpected_error\" PO Token cache provider: ValueError('something went wrong'); example bug report message") == 3 + + def test_cache_provider_unavailable_no_fallback(self, pot_request, ie, logger): + provider = create_memory_pcp(ie, logger, available=False) + + cache = PoTokenCache( + cache_providers=[provider], + cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + cache.store(pot_request, response) + assert cache.get(pot_request) is None + assert provider.get_calls == 0 + assert provider.store_calls == 0 + assert provider.available_called_times + + def test_cache_provider_unavailable_fallback(self, pot_request, ie, logger): + provider_unavailable = create_memory_pcp(ie, logger, provider_key='unavailable', provider_name='unavailable', available=False) + provider_available = create_memory_pcp(ie, logger, provider_key='available', provider_name='available') + + cache = PoTokenCache( + cache_providers=[provider_unavailable, provider_available], + cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + cache.store(pot_request, response) + assert cache.get(pot_request) is not None + assert provider_unavailable.get_calls == 0 + assert provider_unavailable.store_calls == 0 + assert provider_available.get_calls == 1 + assert provider_available.store_calls == 1 + assert provider_unavailable.available_called_times + assert provider_available.available_called_times + + # should not even try to use the provider for the request + assert 'Attempting to fetch a PO Token response from "unavailable" provider' not in logger.messages['trace'] + assert 'Attempting to fetch a PO Token response from "available" provider' not in logger.messages['trace'] + + def test_available_not_called(self, ie, pot_request, logger): + # Test that the available method is not called when provider higher in the list is available + provider_unavailable = create_memory_pcp( + ie, logger, provider_key='unavailable', provider_name='unavailable', available=False) + provider_available = create_memory_pcp(ie, logger, provider_key='available', provider_name='available') + + logger.log_level = logger.LogLevel.INFO + + cache = PoTokenCache( + cache_providers=[provider_available, provider_unavailable], + cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + cache.store(pot_request, response, write_policy=CacheProviderWritePolicy.WRITE_FIRST) + assert cache.get(pot_request) is not None + assert provider_unavailable.get_calls == 0 + assert provider_unavailable.store_calls == 0 + assert provider_available.get_calls == 1 + assert provider_available.store_calls == 1 + assert provider_unavailable.available_called_times == 0 + assert provider_available.available_called_times + assert 'PO Token Cache Providers: available-0.0.0 (external), unavailable-0.0.0 (external, unavailable)' not in logger.messages.get('trace', []) + + def test_available_called_trace(self, ie, pot_request, logger): + # But if logging level is trace should call available (as part of debug logging) + provider_unavailable = create_memory_pcp( + ie, logger, provider_key='unavailable', provider_name='unavailable', available=False) + provider_available = create_memory_pcp(ie, logger, provider_key='available', provider_name='available') + + logger.log_level = logger.LogLevel.TRACE + + cache = PoTokenCache( + cache_providers=[provider_available, provider_unavailable], + cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})], + logger=logger, + ) + + response = PoTokenResponse(EXAMPLE_PO_TOKEN) + cache.store(pot_request, response, write_policy=CacheProviderWritePolicy.WRITE_FIRST) + assert cache.get(pot_request) is not None + assert provider_unavailable.get_calls == 0 + assert provider_unavailable.store_calls == 0 + assert provider_available.get_calls == 1 + assert provider_available.store_calls == 1 + assert provider_unavailable.available_called_times + assert provider_available.available_called_times + assert 'PO Token Cache Providers: available-0.0.0 (external), unavailable-0.0.0 (external, unavailable)' in logger.messages.get('trace', []) + + def test_close(self, ie, pot_request, logger): + # Should call close on the cache providers and cache specs + memory_pcp = create_memory_pcp(ie, logger, provider_key='memory') + memory2_pcp = create_memory_pcp(ie, logger, provider_key='memory2') + + spec1 = ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={}) + spec2 = UnavailableCacheSpecProviderPCSP(ie=ie, logger=logger, settings={}) + + cache = PoTokenCache( + cache_providers=[memory2_pcp, memory_pcp], + cache_spec_providers=[spec1, spec2], + logger=logger, + ) + + cache.close() + assert memory_pcp.close_called + assert memory2_pcp.close_called + assert spec1.close_called + assert spec2.close_called + + +class TestPoTokenRequestDirector: + + def test_request_pot_success(self, ie, pot_request, pot_cache, pot_provider, logger): + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + director.register_provider(pot_provider) + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + + def test_request_and_cache(self, ie, pot_request, pot_cache, pot_provider, logger): + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + director.register_provider(pot_provider) + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + assert pot_provider.request_called_times == 1 + assert pot_cache.get_calls == 1 + assert pot_cache.store_calls == 1 + + # Second request, should be cached + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + assert pot_cache.get_calls == 2 + assert pot_cache.store_calls == 1 + assert pot_provider.request_called_times == 1 + + def test_bypass_cache(self, ie, pot_request, pot_cache, logger, pot_provider): + pot_request.bypass_cache = True + + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + director.register_provider(pot_provider) + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + assert pot_provider.request_called_times == 1 + assert pot_cache.get_calls == 0 + assert pot_cache.store_calls == 1 + + # Second request, should not get from cache + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + assert pot_provider.request_called_times == 2 + assert pot_cache.get_calls == 0 + assert pot_cache.store_calls == 2 + + # POT is still cached, should get from cache + pot_request.bypass_cache = False + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + assert pot_provider.request_called_times == 2 + assert pot_cache.get_calls == 1 + assert pot_cache.store_calls == 2 + + def test_clean_pot_generate(self, ie, pot_request, pot_cache, logger): + # Token should be cleaned before returning + base_token = base64.urlsafe_b64encode(b'token').decode() + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + provider = success_ptp(PoTokenResponse(base_token + '?extra=params'))(ie, logger, settings={}) + director.register_provider(provider) + + response = director.get_po_token(pot_request) + assert response == base_token + assert provider.request_called_times == 1 + + # Confirm the cleaned version was stored in the cache + cached_token = pot_cache.get(pot_request) + assert cached_token.po_token == base_token + + def test_clean_pot_cache(self, ie, pot_request, pot_cache, logger, pot_provider): + # Token retrieved from cache should be cleaned before returning + base_token = base64.urlsafe_b64encode(b'token').decode() + pot_cache.store(pot_request, PoTokenResponse(base_token + '?extra=params')) + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + director.register_provider(pot_provider) + + response = director.get_po_token(pot_request) + assert response == base_token + assert pot_cache.get_calls == 1 + assert pot_provider.request_called_times == 0 + + def test_cache_expires_at_none(self, ie, pot_request, pot_cache, logger, pot_provider): + # Should cache if expires_at=None in the response + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + provider = success_ptp(PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=None))(ie, logger, settings={}) + director.register_provider(provider) + + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + assert pot_cache.store_calls == 1 + assert pot_cache.get(pot_request).po_token == EXAMPLE_PO_TOKEN + + def test_cache_expires_at_positive(self, ie, pot_request, pot_cache, logger, pot_provider): + # Should cache if expires_at is a positive number in the response + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + provider = success_ptp(PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=99999999999))(ie, logger, settings={}) + director.register_provider(provider) + + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + assert pot_cache.store_calls == 1 + assert pot_cache.get(pot_request).po_token == EXAMPLE_PO_TOKEN + + @pytest.mark.parametrize('expires_at', [0, -1]) + def test_not_cache_expires_at(self, ie, pot_request, pot_cache, logger, pot_provider, expires_at): + # Should not cache if expires_at <= 0 in the response + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + provider = success_ptp(PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=expires_at))(ie, logger, settings={}) + director.register_provider(provider) + + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + assert pot_cache.store_calls == 0 + assert pot_cache.get(pot_request) is None + + def test_no_providers(self, ie, pot_request, pot_cache, logger): + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + response = director.get_po_token(pot_request) + assert response is None + + def test_try_cache_no_providers(self, ie, pot_request, pot_cache, logger): + # Should still try the cache even if no providers are configured + pot_cache.store(pot_request, PoTokenResponse(EXAMPLE_PO_TOKEN)) + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + + def test_close(self, ie, pot_request, pot_cache, pot_provider, logger): + # Should call close on the pot cache and any providers + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + + provider2 = UnavailablePTP(ie, logger, {}) + director.register_provider(pot_provider) + director.register_provider(provider2) + + director.close() + assert pot_provider.close_called + assert provider2.close_called + assert pot_cache.close_called + + def test_pot_provider_preferences(self, pot_request, pot_cache, ie, logger): + pot_request.bypass_cache = True + provider_two_pot = base64.urlsafe_b64encode(b'token2').decode() + + example_provider = success_ptp(response=PoTokenResponse(EXAMPLE_PO_TOKEN), key='exampleone')(ie, logger, settings={}) + example_provider_two = success_ptp(response=PoTokenResponse(provider_two_pot), key='exampletwo')(ie, logger, settings={}) + + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + director.register_provider(example_provider) + director.register_provider(example_provider_two) + + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + assert example_provider.request_called_times == 1 + assert example_provider_two.request_called_times == 0 + + standard_preference_called = False + example_preference_called = False + + # Test that the provider preferences are respected + def standard_preference(provider, request, *_, **__): + nonlocal standard_preference_called + standard_preference_called = True + assert isinstance(provider, PoTokenProvider) + assert isinstance(request, PoTokenRequest) + return 1 + + def example_preference(provider, request, *_, **__): + nonlocal example_preference_called + example_preference_called = True + assert isinstance(provider, PoTokenProvider) + assert isinstance(request, PoTokenRequest) + if provider.PROVIDER_KEY == example_provider.PROVIDER_KEY: + return -100 + return 0 + + # test that it can handle multiple preferences + director.register_preference(example_preference) + director.register_preference(standard_preference) + + response = director.get_po_token(pot_request) + assert response == provider_two_pot + assert example_provider.request_called_times == 1 + assert example_provider_two.request_called_times == 1 + assert standard_preference_called + assert example_preference_called + + def test_unsupported_request_no_fallback(self, ie, logger, pot_cache, pot_request): + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + provider = UnsupportedPTP(ie, logger, {}) + director.register_provider(provider) + + response = director.get_po_token(pot_request) + assert response is None + assert provider.request_called_times == 1 + + def test_unsupported_request_fallback(self, ie, logger, pot_cache, pot_request, pot_provider): + # Should fallback to the next provider if the first one does not support the request + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + provider = UnsupportedPTP(ie, logger, {}) + director.register_provider(provider) + director.register_provider(pot_provider) + + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + assert provider.request_called_times == 1 + assert pot_provider.request_called_times == 1 + assert 'PO Token Provider "unsupported" rejected this request, trying next available provider. Reason: unsupported request' in logger.messages['trace'] + + def test_unavailable_request_no_fallback(self, ie, logger, pot_cache, pot_request): + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + provider = UnavailablePTP(ie, logger, {}) + director.register_provider(provider) + + response = director.get_po_token(pot_request) + assert response is None + assert provider.request_called_times == 0 + assert provider.available_called_times + + def test_unavailable_request_fallback(self, ie, logger, pot_cache, pot_request, pot_provider): + # Should fallback to the next provider if the first one is unavailable + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + provider = UnavailablePTP(ie, logger, {}) + director.register_provider(provider) + director.register_provider(pot_provider) + + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + assert provider.request_called_times == 0 + assert provider.available_called_times + assert pot_provider.request_called_times == 1 + assert pot_provider.available_called_times + # should not even try use the provider for the request + assert 'Attempting to fetch a PO Token from "unavailable" provider' not in logger.messages['trace'] + assert 'Attempting to fetch a PO Token from "success" provider' in logger.messages['trace'] + + def test_available_not_called(self, ie, logger, pot_cache, pot_request, pot_provider): + # Test that the available method is not called when provider higher in the list is available + logger.log_level = logger.LogLevel.INFO + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + provider = UnavailablePTP(ie, logger, {}) + director.register_provider(pot_provider) + director.register_provider(provider) + + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + assert provider.request_called_times == 0 + assert provider.available_called_times == 0 + assert pot_provider.request_called_times == 1 + assert pot_provider.available_called_times == 2 + assert 'PO Token Providers: success-0.0.1 (external), unavailable-0.0.0 (external, unavailable)' not in logger.messages.get('trace', []) + + def test_available_called_trace(self, ie, logger, pot_cache, pot_request, pot_provider): + # But if logging level is trace should call available (as part of debug logging) + logger.log_level = logger.LogLevel.TRACE + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + provider = UnavailablePTP(ie, logger, {}) + director.register_provider(pot_provider) + director.register_provider(provider) + + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + assert provider.request_called_times == 0 + assert provider.available_called_times == 1 + assert pot_provider.request_called_times == 1 + assert pot_provider.available_called_times == 3 + assert 'PO Token Providers: success-0.0.1 (external), unavailable-0.0.0 (external, unavailable)' in logger.messages['trace'] + + def test_provider_error_no_fallback_unexpected(self, ie, logger, pot_cache, pot_request): + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + provider = ErrorPTP(ie, logger, {}) + director.register_provider(provider) + pot_request.video_id = 'unexpected' + response = director.get_po_token(pot_request) + assert response is None + assert provider.request_called_times == 1 + assert "Error fetching PO Token from \"error\" provider: PoTokenProviderError('an error occurred'); please report this issue to the provider developer at https://error.example.com/issues ." in logger.messages['warning'] + + def test_provider_error_no_fallback_expected(self, ie, logger, pot_cache, pot_request): + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + provider = ErrorPTP(ie, logger, {}) + director.register_provider(provider) + pot_request.video_id = 'expected' + response = director.get_po_token(pot_request) + assert response is None + assert provider.request_called_times == 1 + assert "Error fetching PO Token from \"error\" provider: PoTokenProviderError('an error occurred')" in logger.messages['warning'] + + def test_provider_error_fallback(self, ie, logger, pot_cache, pot_request, pot_provider): + # Should fallback to the next provider if the first one raises an error + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + provider = ErrorPTP(ie, logger, {}) + director.register_provider(provider) + director.register_provider(pot_provider) + + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + assert provider.request_called_times == 1 + assert pot_provider.request_called_times == 1 + assert "Error fetching PO Token from \"error\" provider: PoTokenProviderError('an error occurred'); please report this issue to the provider developer at https://error.example.com/issues ." in logger.messages['warning'] + + def test_provider_unexpected_error_no_fallback(self, ie, logger, pot_cache, pot_request): + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + provider = UnexpectedErrorPTP(ie, logger, {}) + director.register_provider(provider) + + response = director.get_po_token(pot_request) + assert response is None + assert provider.request_called_times == 1 + assert "Unexpected error when fetching PO Token from \"unexpected_error\" provider: ValueError('an unexpected error occurred'); please report this issue to the provider developer at https://unexpected.example.com/issues ." in logger.messages['error'] + + def test_provider_unexpected_error_fallback(self, ie, logger, pot_cache, pot_request, pot_provider): + # Should fallback to the next provider if the first one raises an unexpected error + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + provider = UnexpectedErrorPTP(ie, logger, {}) + director.register_provider(provider) + director.register_provider(pot_provider) + + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + assert provider.request_called_times == 1 + assert pot_provider.request_called_times == 1 + assert "Unexpected error when fetching PO Token from \"unexpected_error\" provider: ValueError('an unexpected error occurred'); please report this issue to the provider developer at https://unexpected.example.com/issues ." in logger.messages['error'] + + def test_invalid_po_token_response_type(self, ie, logger, pot_cache, pot_request, pot_provider): + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + provider = InvalidPTP(ie, logger, {}) + director.register_provider(provider) + + pot_request.video_id = 'invalid_type' + + response = director.get_po_token(pot_request) + assert response is None + assert provider.request_called_times == 1 + assert 'Invalid PO Token response received from "invalid" provider: invalid-response; please report this issue to the provider developer at https://invalid.example.com/issues .' in logger.messages['error'] + + # Should fallback to next available provider + director.register_provider(pot_provider) + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + assert provider.request_called_times == 2 + assert pot_provider.request_called_times == 1 + + def test_invalid_po_token_response(self, ie, logger, pot_cache, pot_request, pot_provider): + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + provider = InvalidPTP(ie, logger, {}) + director.register_provider(provider) + + response = director.get_po_token(pot_request) + assert response is None + assert provider.request_called_times == 1 + assert "Invalid PO Token response received from \"invalid\" provider: PoTokenResponse(po_token='example-token?', expires_at='123'); please report this issue to the provider developer at https://invalid.example.com/issues ." in logger.messages['error'] + + # Should fallback to next available provider + director.register_provider(pot_provider) + response = director.get_po_token(pot_request) + assert response == EXAMPLE_PO_TOKEN + assert provider.request_called_times == 2 + assert pot_provider.request_called_times == 1 + + def test_copy_request_provider(self, ie, logger, pot_cache, pot_request): + + class BadProviderPTP(BaseMockPoTokenProvider): + _SUPPORTED_CONTEXTS = None + _SUPPORTED_CLIENTS = None + + def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse: + # Providers should not modify the request object, but we should guard against it + request.video_id = 'bad' + raise PoTokenProviderRejectedRequest('bad request') + + class GoodProviderPTP(BaseMockPoTokenProvider): + _SUPPORTED_CONTEXTS = None + _SUPPORTED_CLIENTS = None + + def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse: + return PoTokenResponse(base64.urlsafe_b64encode(request.video_id.encode()).decode()) + + director = PoTokenRequestDirector(logger=logger, cache=pot_cache) + + bad_provider = BadProviderPTP(ie, logger, {}) + good_provider = GoodProviderPTP(ie, logger, {}) + + director.register_provider(bad_provider) + director.register_provider(good_provider) + + pot_request.video_id = 'good' + response = director.get_po_token(pot_request) + assert response == base64.urlsafe_b64encode(b'good').decode() + assert bad_provider.request_called_times == 1 + assert good_provider.request_called_times == 1 + assert pot_request.video_id == 'good' + + +@pytest.mark.parametrize('spec, expected', [ + (None, False), + (PoTokenCacheSpec(key_bindings={'v': 'video-id'}, default_ttl=60, write_policy=None), False), # type: ignore + (PoTokenCacheSpec(key_bindings={'v': 'video-id'}, default_ttl='invalid'), False), # type: ignore + (PoTokenCacheSpec(key_bindings='invalid', default_ttl=60), False), # type: ignore + (PoTokenCacheSpec(key_bindings={2: 'video-id'}, default_ttl=60), False), # type: ignore + (PoTokenCacheSpec(key_bindings={'v': 2}, default_ttl=60), False), # type: ignore + (PoTokenCacheSpec(key_bindings={'v': None}, default_ttl=60), False), # type: ignore + + (PoTokenCacheSpec(key_bindings={'v': 'video_id', 'e': None}, default_ttl=60), True), + (PoTokenCacheSpec(key_bindings={'v': 'video_id'}, default_ttl=60, write_policy=CacheProviderWritePolicy.WRITE_FIRST), True), +]) +def test_validate_cache_spec(spec, expected): + assert validate_cache_spec(spec) == expected + + +@pytest.mark.parametrize('po_token', [ + 'invalid-token?', + '123', +]) +def test_clean_pot_fail(po_token): + with pytest.raises(ValueError, match='Invalid PO Token'): + clean_pot(po_token) + + +@pytest.mark.parametrize('po_token,expected', [ + ('TwAA/+8=', 'TwAA_-8='), + ('TwAA%5F%2D9VA6Q92v%5FvEQ4==?extra-param=2', 'TwAA_-9VA6Q92v_vEQ4='), +]) +def test_clean_pot(po_token, expected): + assert clean_pot(po_token) == expected + + +@pytest.mark.parametrize( + 'response, expected', + [ + (None, False), + (PoTokenResponse(None), False), + (PoTokenResponse(1), False), + (PoTokenResponse('invalid-token?'), False), + (PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at='abc'), False), # type: ignore + (PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=100), False), + (PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=time.time() + 10000.0), False), # type: ignore + (PoTokenResponse(EXAMPLE_PO_TOKEN), True), + (PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=-1), True), + (PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=0), True), + (PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=int(time.time()) + 10000), True), + ], +) +def test_validate_pot_response(response, expected): + assert validate_response(response) == expected + + +def test_built_in_provider(ie, logger): + class BuiltinProviderDefaultT(BuiltinIEContentProvider, suffix='T'): + def is_available(self): + return True + + class BuiltinProviderCustomNameT(BuiltinIEContentProvider, suffix='T'): + PROVIDER_NAME = 'CustomName' + + def is_available(self): + return True + + class ExternalProviderDefaultT(IEContentProvider, suffix='T'): + def is_available(self): + return True + + class ExternalProviderCustomT(IEContentProvider, suffix='T'): + PROVIDER_NAME = 'custom' + PROVIDER_VERSION = '5.4b2' + + def is_available(self): + return True + + class ExternalProviderUnavailableT(IEContentProvider, suffix='T'): + def is_available(self) -> bool: + return False + + class BuiltinProviderUnavailableT(IEContentProvider, suffix='T'): + def is_available(self) -> bool: + return False + + built_in_default = BuiltinProviderDefaultT(ie=ie, logger=logger, settings={}) + built_in_custom_name = BuiltinProviderCustomNameT(ie=ie, logger=logger, settings={}) + built_in_unavailable = BuiltinProviderUnavailableT(ie=ie, logger=logger, settings={}) + external_default = ExternalProviderDefaultT(ie=ie, logger=logger, settings={}) + external_custom = ExternalProviderCustomT(ie=ie, logger=logger, settings={}) + external_unavailable = ExternalProviderUnavailableT(ie=ie, logger=logger, settings={}) + + assert provider_display_list([]) == 'none' + assert provider_display_list([built_in_default]) == 'BuiltinProviderDefault' + assert provider_display_list([external_unavailable]) == 'ExternalProviderUnavailable-0.0.0 (external, unavailable)' + assert provider_display_list([ + built_in_default, + built_in_custom_name, + external_default, + external_custom, + external_unavailable, + built_in_unavailable], + ) == 'BuiltinProviderDefault, CustomName, ExternalProviderDefault-0.0.0 (external), custom-5.4b2 (external), ExternalProviderUnavailable-0.0.0 (external, unavailable), BuiltinProviderUnavailable-0.0.0 (external, unavailable)' diff --git a/test/test_pot/test_pot_framework.py b/test/test_pot/test_pot_framework.py new file mode 100644 index 000000000..bc94653f4 --- /dev/null +++ b/test/test_pot/test_pot_framework.py @@ -0,0 +1,629 @@ +import pytest + +from yt_dlp.extractor.youtube.pot._provider import IEContentProvider +from yt_dlp.cookies import YoutubeDLCookieJar +from yt_dlp.utils.networking import HTTPHeaderDict +from yt_dlp.extractor.youtube.pot.provider import ( + PoTokenRequest, + PoTokenContext, + ExternalRequestFeature, + +) + +from yt_dlp.extractor.youtube.pot.cache import ( + PoTokenCacheProvider, + PoTokenCacheSpec, + PoTokenCacheSpecProvider, + CacheProviderWritePolicy, +) + +import yt_dlp.extractor.youtube.pot.cache as cache + +from yt_dlp.networking import Request +from yt_dlp.extractor.youtube.pot.provider import ( + PoTokenResponse, + PoTokenProvider, + PoTokenProviderRejectedRequest, + provider_bug_report_message, + register_provider, + register_preference, +) + +from yt_dlp.extractor.youtube.pot._registry import _pot_providers, _ptp_preferences, _pot_pcs_providers, _pot_cache_providers, _pot_cache_provider_preferences + + +class ExamplePTP(PoTokenProvider): + PROVIDER_NAME = 'example' + PROVIDER_VERSION = '0.0.1' + BUG_REPORT_LOCATION = 'https://example.com/issues' + + _SUPPORTED_CLIENTS = ('WEB',) + _SUPPORTED_CONTEXTS = (PoTokenContext.GVS, ) + + _SUPPORTED_EXTERNAL_REQUEST_FEATURES = ( + ExternalRequestFeature.PROXY_SCHEME_HTTP, + ExternalRequestFeature.PROXY_SCHEME_SOCKS5H, + ) + + def is_available(self) -> bool: + return True + + def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse: + return PoTokenResponse('example-token', expires_at=123) + + +class ExampleCacheProviderPCP(PoTokenCacheProvider): + + PROVIDER_NAME = 'example' + PROVIDER_VERSION = '0.0.1' + BUG_REPORT_LOCATION = 'https://example.com/issues' + + def is_available(self) -> bool: + return True + + def get(self, key: str): + return 'example-cache' + + def store(self, key: str, value: str, expires_at: int): + pass + + def delete(self, key: str): + pass + + +class ExampleCacheSpecProviderPCSP(PoTokenCacheSpecProvider): + + PROVIDER_NAME = 'example' + PROVIDER_VERSION = '0.0.1' + BUG_REPORT_LOCATION = 'https://example.com/issues' + + def generate_cache_spec(self, request: PoTokenRequest): + return PoTokenCacheSpec( + key_bindings={'field': 'example-key'}, + default_ttl=60, + write_policy=CacheProviderWritePolicy.WRITE_FIRST, + ) + + +class TestPoTokenProvider: + + def test_base_type(self): + assert issubclass(PoTokenProvider, IEContentProvider) + + def test_create_provider_missing_fetch_method(self, ie, logger): + class MissingMethodsPTP(PoTokenProvider): + def is_available(self) -> bool: + return True + + with pytest.raises(TypeError): + MissingMethodsPTP(ie=ie, logger=logger, settings={}) + + def test_create_provider_missing_available_method(self, ie, logger): + class MissingMethodsPTP(PoTokenProvider): + def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse: + raise PoTokenProviderRejectedRequest('Not implemented') + + with pytest.raises(TypeError): + MissingMethodsPTP(ie=ie, logger=logger, settings={}) + + def test_barebones_provider(self, ie, logger): + class BarebonesProviderPTP(PoTokenProvider): + def is_available(self) -> bool: + return True + + def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse: + raise PoTokenProviderRejectedRequest('Not implemented') + + provider = BarebonesProviderPTP(ie=ie, logger=logger, settings={}) + assert provider.PROVIDER_NAME == 'BarebonesProvider' + assert provider.PROVIDER_KEY == 'BarebonesProvider' + assert provider.PROVIDER_VERSION == '0.0.0' + assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at (developer has not provided a bug report location) .' + + def test_example_provider_success(self, ie, logger, pot_request): + provider = ExamplePTP(ie=ie, logger=logger, settings={}) + assert provider.PROVIDER_NAME == 'example' + assert provider.PROVIDER_KEY == 'Example' + assert provider.PROVIDER_VERSION == '0.0.1' + assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at https://example.com/issues .' + assert provider.is_available() + + response = provider.request_pot(pot_request) + + assert response.po_token == 'example-token' + assert response.expires_at == 123 + + def test_provider_unsupported_context(self, ie, logger, pot_request): + provider = ExamplePTP(ie=ie, logger=logger, settings={}) + pot_request.context = PoTokenContext.PLAYER + + with pytest.raises(PoTokenProviderRejectedRequest): + provider.request_pot(pot_request) + + def test_provider_unsupported_client(self, ie, logger, pot_request): + provider = ExamplePTP(ie=ie, logger=logger, settings={}) + pot_request.innertube_context['client']['clientName'] = 'ANDROID' + + with pytest.raises(PoTokenProviderRejectedRequest): + provider.request_pot(pot_request) + + def test_provider_unsupported_proxy_scheme(self, ie, logger, pot_request): + provider = ExamplePTP(ie=ie, logger=logger, settings={}) + pot_request.request_proxy = 'socks4://example.com' + + with pytest.raises( + PoTokenProviderRejectedRequest, + match='External requests by "example" provider do not support proxy scheme "socks4". Supported proxy ' + 'schemes: http, socks5h', + ): + provider.request_pot(pot_request) + + pot_request.request_proxy = 'http://example.com' + + assert provider.request_pot(pot_request) + + def test_provider_ignore_external_request_features(self, ie, logger, pot_request): + class InternalPTP(ExamplePTP): + _SUPPORTED_EXTERNAL_REQUEST_FEATURES = None + + provider = InternalPTP(ie=ie, logger=logger, settings={}) + + pot_request.request_proxy = 'socks5://example.com' + assert provider.request_pot(pot_request) + pot_request.request_source_address = '0.0.0.0' + assert provider.request_pot(pot_request) + + def test_provider_unsupported_external_request_source_address(self, ie, logger, pot_request): + class InternalPTP(ExamplePTP): + _SUPPORTED_EXTERNAL_REQUEST_FEATURES = tuple() + + provider = InternalPTP(ie=ie, logger=logger, settings={}) + + pot_request.request_source_address = None + assert provider.request_pot(pot_request) + + pot_request.request_source_address = '0.0.0.0' + with pytest.raises( + PoTokenProviderRejectedRequest, + match='External requests by "example" provider do not support setting source address', + ): + provider.request_pot(pot_request) + + def test_provider_supported_external_request_source_address(self, ie, logger, pot_request): + class InternalPTP(ExamplePTP): + _SUPPORTED_EXTERNAL_REQUEST_FEATURES = ( + ExternalRequestFeature.SOURCE_ADDRESS, + ) + + provider = InternalPTP(ie=ie, logger=logger, settings={}) + + pot_request.request_source_address = None + assert provider.request_pot(pot_request) + + pot_request.request_source_address = '0.0.0.0' + assert provider.request_pot(pot_request) + + def test_provider_unsupported_external_request_tls_verification(self, ie, logger, pot_request): + class InternalPTP(ExamplePTP): + _SUPPORTED_EXTERNAL_REQUEST_FEATURES = tuple() + + provider = InternalPTP(ie=ie, logger=logger, settings={}) + + pot_request.request_verify_tls = True + assert provider.request_pot(pot_request) + + pot_request.request_verify_tls = False + with pytest.raises( + PoTokenProviderRejectedRequest, + match='External requests by "example" provider do not support ignoring TLS certificate failures', + ): + provider.request_pot(pot_request) + + def test_provider_supported_external_request_tls_verification(self, ie, logger, pot_request): + class InternalPTP(ExamplePTP): + _SUPPORTED_EXTERNAL_REQUEST_FEATURES = ( + ExternalRequestFeature.DISABLE_TLS_VERIFICATION, + ) + + provider = InternalPTP(ie=ie, logger=logger, settings={}) + + pot_request.request_verify_tls = True + assert provider.request_pot(pot_request) + + pot_request.request_verify_tls = False + assert provider.request_pot(pot_request) + + def test_provider_request_webpage(self, ie, logger, pot_request): + provider = ExamplePTP(ie=ie, logger=logger, settings={}) + + cookiejar = YoutubeDLCookieJar() + pot_request.request_headers = HTTPHeaderDict({'User-Agent': 'example-user-agent'}) + pot_request.request_proxy = 'socks5://example-proxy.com' + pot_request.request_cookiejar = cookiejar + + def mock_urlopen(request): + return request + + ie._downloader.urlopen = mock_urlopen + + sent_request = provider._request_webpage(Request( + 'https://example.com', + ), pot_request=pot_request) + + assert sent_request.url == 'https://example.com' + assert sent_request.headers['User-Agent'] == 'example-user-agent' + assert sent_request.proxies == {'all': 'socks5://example-proxy.com'} + assert sent_request.extensions['cookiejar'] is cookiejar + assert 'Requesting webpage' in logger.messages['info'] + + def test_provider_request_webpage_override(self, ie, logger, pot_request): + provider = ExamplePTP(ie=ie, logger=logger, settings={}) + + cookiejar_request = YoutubeDLCookieJar() + pot_request.request_headers = HTTPHeaderDict({'User-Agent': 'example-user-agent'}) + pot_request.request_proxy = 'socks5://example-proxy.com' + pot_request.request_cookiejar = cookiejar_request + + def mock_urlopen(request): + return request + + ie._downloader.urlopen = mock_urlopen + + sent_request = provider._request_webpage(Request( + 'https://example.com', + headers={'User-Agent': 'override-user-agent-override'}, + proxies={'http': 'http://example-proxy-override.com'}, + extensions={'cookiejar': YoutubeDLCookieJar()}, + ), pot_request=pot_request, note='Custom requesting webpage') + + assert sent_request.url == 'https://example.com' + assert sent_request.headers['User-Agent'] == 'override-user-agent-override' + assert sent_request.proxies == {'http': 'http://example-proxy-override.com'} + assert sent_request.extensions['cookiejar'] is not cookiejar_request + assert 'Custom requesting webpage' in logger.messages['info'] + + def test_provider_request_webpage_no_log(self, ie, logger, pot_request): + provider = ExamplePTP(ie=ie, logger=logger, settings={}) + + def mock_urlopen(request): + return request + + ie._downloader.urlopen = mock_urlopen + + sent_request = provider._request_webpage(Request( + 'https://example.com', + ), note=False) + + assert sent_request.url == 'https://example.com' + assert 'info' not in logger.messages + + def test_provider_request_webpage_no_pot_request(self, ie, logger): + provider = ExamplePTP(ie=ie, logger=logger, settings={}) + + def mock_urlopen(request): + return request + + ie._downloader.urlopen = mock_urlopen + + sent_request = provider._request_webpage(Request( + 'https://example.com', + ), pot_request=None) + + assert sent_request.url == 'https://example.com' + + def test_get_config_arg(self, ie, logger): + provider = ExamplePTP(ie=ie, logger=logger, settings={'abc': ['123D'], 'xyz': ['456a', '789B']}) + + assert provider._configuration_arg('abc') == ['123d'] + assert provider._configuration_arg('abc', default=['default']) == ['123d'] + assert provider._configuration_arg('ABC', default=['default']) == ['default'] + assert provider._configuration_arg('abc', casesense=True) == ['123D'] + assert provider._configuration_arg('xyz', casesense=False) == ['456a', '789b'] + + def test_require_class_end_with_suffix(self, ie, logger): + class InvalidSuffix(PoTokenProvider): + PROVIDER_NAME = 'invalid-suffix' + + def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse: + raise PoTokenProviderRejectedRequest('Not implemented') + + def is_available(self) -> bool: + return True + + provider = InvalidSuffix(ie=ie, logger=logger, settings={}) + + with pytest.raises(AssertionError): + provider.PROVIDER_KEY # noqa: B018 + + +class TestPoTokenCacheProvider: + + def test_base_type(self): + assert issubclass(PoTokenCacheProvider, IEContentProvider) + + def test_create_provider_missing_get_method(self, ie, logger): + class MissingMethodsPCP(PoTokenCacheProvider): + def store(self, key: str, value: str, expires_at: int): + pass + + def delete(self, key: str): + pass + + def is_available(self) -> bool: + return True + + with pytest.raises(TypeError): + MissingMethodsPCP(ie=ie, logger=logger, settings={}) + + def test_create_provider_missing_store_method(self, ie, logger): + class MissingMethodsPCP(PoTokenCacheProvider): + def get(self, key: str): + pass + + def delete(self, key: str): + pass + + def is_available(self) -> bool: + return True + + with pytest.raises(TypeError): + MissingMethodsPCP(ie=ie, logger=logger, settings={}) + + def test_create_provider_missing_delete_method(self, ie, logger): + class MissingMethodsPCP(PoTokenCacheProvider): + def get(self, key: str): + pass + + def store(self, key: str, value: str, expires_at: int): + pass + + def is_available(self) -> bool: + return True + + with pytest.raises(TypeError): + MissingMethodsPCP(ie=ie, logger=logger, settings={}) + + def test_create_provider_missing_is_available_method(self, ie, logger): + class MissingMethodsPCP(PoTokenCacheProvider): + def get(self, key: str): + pass + + def store(self, key: str, value: str, expires_at: int): + pass + + def delete(self, key: str): + pass + + with pytest.raises(TypeError): + MissingMethodsPCP(ie=ie, logger=logger, settings={}) + + def test_barebones_provider(self, ie, logger): + class BarebonesProviderPCP(PoTokenCacheProvider): + + def is_available(self) -> bool: + return True + + def get(self, key: str): + return 'example-cache' + + def store(self, key: str, value: str, expires_at: int): + pass + + def delete(self, key: str): + pass + + provider = BarebonesProviderPCP(ie=ie, logger=logger, settings={}) + assert provider.PROVIDER_NAME == 'BarebonesProvider' + assert provider.PROVIDER_KEY == 'BarebonesProvider' + assert provider.PROVIDER_VERSION == '0.0.0' + assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at (developer has not provided a bug report location) .' + + def test_create_provider_example(self, ie, logger): + provider = ExampleCacheProviderPCP(ie=ie, logger=logger, settings={}) + assert provider.PROVIDER_NAME == 'example' + assert provider.PROVIDER_KEY == 'ExampleCacheProvider' + assert provider.PROVIDER_VERSION == '0.0.1' + assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at https://example.com/issues .' + assert provider.is_available() + + def test_get_config_arg(self, ie, logger): + provider = ExampleCacheProviderPCP(ie=ie, logger=logger, settings={'abc': ['123D'], 'xyz': ['456a', '789B']}) + assert provider._configuration_arg('abc') == ['123d'] + assert provider._configuration_arg('abc', default=['default']) == ['123d'] + assert provider._configuration_arg('ABC', default=['default']) == ['default'] + assert provider._configuration_arg('abc', casesense=True) == ['123D'] + assert provider._configuration_arg('xyz', casesense=False) == ['456a', '789b'] + + def test_require_class_end_with_suffix(self, ie, logger): + class InvalidSuffix(PoTokenCacheProvider): + def get(self, key: str): + return 'example-cache' + + def store(self, key: str, value: str, expires_at: int): + pass + + def delete(self, key: str): + pass + + def is_available(self) -> bool: + return True + + provider = InvalidSuffix(ie=ie, logger=logger, settings={}) + + with pytest.raises(AssertionError): + provider.PROVIDER_KEY # noqa: B018 + + +class TestPoTokenCacheSpecProvider: + + def test_base_type(self): + assert issubclass(PoTokenCacheSpecProvider, IEContentProvider) + + def test_create_provider_missing_supports_method(self, ie, logger): + class MissingMethodsPCS(PoTokenCacheSpecProvider): + pass + + with pytest.raises(TypeError): + MissingMethodsPCS(ie=ie, logger=logger, settings={}) + + def test_create_provider_barebones(self, ie, pot_request, logger): + class BarebonesProviderPCSP(PoTokenCacheSpecProvider): + def generate_cache_spec(self, request: PoTokenRequest): + return PoTokenCacheSpec( + default_ttl=100, + key_bindings={}, + ) + + provider = BarebonesProviderPCSP(ie=ie, logger=logger, settings={}) + assert provider.PROVIDER_NAME == 'BarebonesProvider' + assert provider.PROVIDER_KEY == 'BarebonesProvider' + assert provider.PROVIDER_VERSION == '0.0.0' + assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at (developer has not provided a bug report location) .' + assert provider.is_available() + assert provider.generate_cache_spec(request=pot_request).default_ttl == 100 + assert provider.generate_cache_spec(request=pot_request).key_bindings == {} + assert provider.generate_cache_spec(request=pot_request).write_policy == CacheProviderWritePolicy.WRITE_ALL + + def test_create_provider_example(self, ie, pot_request, logger): + provider = ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={}) + assert provider.PROVIDER_NAME == 'example' + assert provider.PROVIDER_KEY == 'ExampleCacheSpecProvider' + assert provider.PROVIDER_VERSION == '0.0.1' + assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at https://example.com/issues .' + assert provider.is_available() + assert provider.generate_cache_spec(pot_request) + assert provider.generate_cache_spec(pot_request).key_bindings == {'field': 'example-key'} + assert provider.generate_cache_spec(pot_request).default_ttl == 60 + assert provider.generate_cache_spec(pot_request).write_policy == CacheProviderWritePolicy.WRITE_FIRST + + def test_get_config_arg(self, ie, logger): + provider = ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={'abc': ['123D'], 'xyz': ['456a', '789B']}) + + assert provider._configuration_arg('abc') == ['123d'] + assert provider._configuration_arg('abc', default=['default']) == ['123d'] + assert provider._configuration_arg('ABC', default=['default']) == ['default'] + assert provider._configuration_arg('abc', casesense=True) == ['123D'] + assert provider._configuration_arg('xyz', casesense=False) == ['456a', '789b'] + + def test_require_class_end_with_suffix(self, ie, logger): + class InvalidSuffix(PoTokenCacheSpecProvider): + def generate_cache_spec(self, request: PoTokenRequest): + return None + + provider = InvalidSuffix(ie=ie, logger=logger, settings={}) + + with pytest.raises(AssertionError): + provider.PROVIDER_KEY # noqa: B018 + + +class TestPoTokenRequest: + def test_copy_request(self, pot_request): + copied_request = pot_request.copy() + + assert copied_request is not pot_request + assert copied_request.context == pot_request.context + assert copied_request.innertube_context == pot_request.innertube_context + assert copied_request.innertube_context is not pot_request.innertube_context + copied_request.innertube_context['client']['clientName'] = 'ANDROID' + assert pot_request.innertube_context['client']['clientName'] != 'ANDROID' + assert copied_request.innertube_host == pot_request.innertube_host + assert copied_request.session_index == pot_request.session_index + assert copied_request.player_url == pot_request.player_url + assert copied_request.is_authenticated == pot_request.is_authenticated + assert copied_request.visitor_data == pot_request.visitor_data + assert copied_request.data_sync_id == pot_request.data_sync_id + assert copied_request.video_id == pot_request.video_id + assert copied_request.request_cookiejar is pot_request.request_cookiejar + assert copied_request.request_proxy == pot_request.request_proxy + assert copied_request.request_headers == pot_request.request_headers + assert copied_request.request_headers is not pot_request.request_headers + assert copied_request.request_timeout == pot_request.request_timeout + assert copied_request.request_source_address == pot_request.request_source_address + assert copied_request.request_verify_tls == pot_request.request_verify_tls + assert copied_request.bypass_cache == pot_request.bypass_cache + + +def test_provider_bug_report_message(ie, logger): + provider = ExamplePTP(ie=ie, logger=logger, settings={}) + assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at https://example.com/issues .' + + message = provider_bug_report_message(provider) + assert message == '; please report this issue to the provider developer at https://example.com/issues .' + + message_before = provider_bug_report_message(provider, before='custom message!') + assert message_before == 'custom message! Please report this issue to the provider developer at https://example.com/issues .' + + +def test_register_provider(ie): + + @register_provider + class UnavailableProviderPTP(PoTokenProvider): + def is_available(self) -> bool: + return False + + def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse: + raise PoTokenProviderRejectedRequest('Not implemented') + + assert _pot_providers.value.get('UnavailableProvider') == UnavailableProviderPTP + _pot_providers.value.pop('UnavailableProvider') + + +def test_register_pot_preference(ie): + before = len(_ptp_preferences.value) + + @register_preference(ExamplePTP) + def unavailable_preference(provider: PoTokenProvider, request: PoTokenRequest): + return 1 + + assert len(_ptp_preferences.value) == before + 1 + + +def test_register_cache_provider(ie): + + @cache.register_provider + class UnavailableCacheProviderPCP(PoTokenCacheProvider): + def is_available(self) -> bool: + return False + + def get(self, key: str): + return 'example-cache' + + def store(self, key: str, value: str, expires_at: int): + pass + + def delete(self, key: str): + pass + + assert _pot_cache_providers.value.get('UnavailableCacheProvider') == UnavailableCacheProviderPCP + _pot_cache_providers.value.pop('UnavailableCacheProvider') + + +def test_register_cache_provider_spec(ie): + + @cache.register_spec + class UnavailableCacheProviderPCSP(PoTokenCacheSpecProvider): + def is_available(self) -> bool: + return False + + def generate_cache_spec(self, request: PoTokenRequest): + return None + + assert _pot_pcs_providers.value.get('UnavailableCacheProvider') == UnavailableCacheProviderPCSP + _pot_pcs_providers.value.pop('UnavailableCacheProvider') + + +def test_register_cache_provider_preference(ie): + before = len(_pot_cache_provider_preferences.value) + + @cache.register_preference(ExampleCacheProviderPCP) + def unavailable_preference(provider: PoTokenCacheProvider, request: PoTokenRequest): + return 1 + + assert len(_pot_cache_provider_preferences.value) == before + 1 + + +def test_logger_log_level(logger): + assert logger.LogLevel('INFO') == logger.LogLevel.INFO + assert logger.LogLevel('debuG') == logger.LogLevel.DEBUG + assert logger.LogLevel(10) == logger.LogLevel.DEBUG + assert logger.LogLevel('UNKNOWN') == logger.LogLevel.INFO diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 0f0885366..3336b6bff 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -316,6 +316,18 @@ 'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js', 'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE', ), + ( + 'https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js', + 'D3XWVpYgwhLLKNK4AGX', 'aZrQ1qWJ5yv5h', + ), + ( + 'https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js', + 'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u', + ), + ( + 'https://www.youtube.com/s/player/fc2a56a5/tv-player-ias.vflset/tv-player-ias.js', + 'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u', + ), ] diff --git a/test/testdata/thumbnails/foo %d bar/foo_%d.webp b/test/testdata/thumbnails/foo %d bar/foo_%d.webp deleted file mode 100644 index d64d0839f..000000000 Binary files a/test/testdata/thumbnails/foo %d bar/foo_%d.webp and /dev/null differ diff --git a/test/testdata/thumbnails/foo %d bar/placeholder b/test/testdata/thumbnails/foo %d bar/placeholder new file mode 100644 index 000000000..e69de29bb diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 7021c2d89..7c4ff8f33 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -491,7 +491,7 @@ class YoutubeDL: The template is mapped on a dictionary with keys 'progress' and 'info' retry_sleep_functions: Dictionary of functions that takes the number of attempts as argument and returns the time to sleep in seconds. - Allowed keys are 'http', 'fragment', 'file_access' + Allowed keys are 'http', 'fragment', 'file_access', 'extractor' download_ranges: A callback function that gets called for every video with the signature (info_dict, ydl) -> Iterable[Section]. Only the returned sections will be downloaded. @@ -641,6 +641,7 @@ def __init__(self, params=None, auto_init=True): self._printed_messages = set() self._first_webpage_request = True self._post_hooks = [] + self._close_hooks = [] self._progress_hooks = [] self._postprocessor_hooks = [] self._download_retcode = 0 @@ -909,6 +910,11 @@ def add_post_hook(self, ph): """Add the post hook""" self._post_hooks.append(ph) + def add_close_hook(self, ch): + """Add a close hook, called when YoutubeDL.close() is called""" + assert callable(ch), 'Close hook must be callable' + self._close_hooks.append(ch) + def add_progress_hook(self, ph): """Add the download progress hook""" self._progress_hooks.append(ph) @@ -1017,6 +1023,9 @@ def close(self): self._request_director.close() del self._request_director + for close_hook in self._close_hooks: + close_hook() + def trouble(self, message=None, tb=None, is_error=True): """Determine action to take when a download problem appears. diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index fad323c90..5675445ac 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -764,11 +764,11 @@ def _get_linux_desktop_environment(env, logger): GetDesktopEnvironment """ xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None) - desktop_session = env.get('DESKTOP_SESSION', None) + desktop_session = env.get('DESKTOP_SESSION', '') if xdg_current_desktop is not None: for part in map(str.strip, xdg_current_desktop.split(':')): if part == 'Unity': - if desktop_session is not None and 'gnome-fallback' in desktop_session: + if 'gnome-fallback' in desktop_session: return _LinuxDesktopEnvironment.GNOME else: return _LinuxDesktopEnvironment.UNITY @@ -797,35 +797,34 @@ def _get_linux_desktop_environment(env, logger): return _LinuxDesktopEnvironment.UKUI elif part == 'LXQt': return _LinuxDesktopEnvironment.LXQT - logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"') + logger.debug(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"') - elif desktop_session is not None: - if desktop_session == 'deepin': - return _LinuxDesktopEnvironment.DEEPIN - elif desktop_session in ('mate', 'gnome'): - return _LinuxDesktopEnvironment.GNOME - elif desktop_session in ('kde4', 'kde-plasma'): + if desktop_session == 'deepin': + return _LinuxDesktopEnvironment.DEEPIN + elif desktop_session in ('mate', 'gnome'): + return _LinuxDesktopEnvironment.GNOME + elif desktop_session in ('kde4', 'kde-plasma'): + return _LinuxDesktopEnvironment.KDE4 + elif desktop_session == 'kde': + if 'KDE_SESSION_VERSION' in env: return _LinuxDesktopEnvironment.KDE4 - elif desktop_session == 'kde': - if 'KDE_SESSION_VERSION' in env: - return _LinuxDesktopEnvironment.KDE4 - else: - return _LinuxDesktopEnvironment.KDE3 - elif 'xfce' in desktop_session or desktop_session == 'xubuntu': - return _LinuxDesktopEnvironment.XFCE - elif desktop_session == 'ukui': - return _LinuxDesktopEnvironment.UKUI else: - logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"') - + return _LinuxDesktopEnvironment.KDE3 + elif 'xfce' in desktop_session or desktop_session == 'xubuntu': + return _LinuxDesktopEnvironment.XFCE + elif desktop_session == 'ukui': + return _LinuxDesktopEnvironment.UKUI else: - if 'GNOME_DESKTOP_SESSION_ID' in env: - return _LinuxDesktopEnvironment.GNOME - elif 'KDE_FULL_SESSION' in env: - if 'KDE_SESSION_VERSION' in env: - return _LinuxDesktopEnvironment.KDE4 - else: - return _LinuxDesktopEnvironment.KDE3 + logger.debug(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"') + + if 'GNOME_DESKTOP_SESSION_ID' in env: + return _LinuxDesktopEnvironment.GNOME + elif 'KDE_FULL_SESSION' in env: + if 'KDE_SESSION_VERSION' in env: + return _LinuxDesktopEnvironment.KDE4 + else: + return _LinuxDesktopEnvironment.KDE3 + return _LinuxDesktopEnvironment.OTHER diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e7dcb9853..34c98b537 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -300,7 +300,6 @@ BrainPOPIlIE, BrainPOPJrIE, ) -from .bravotv import BravoTVIE from .breitbart import BreitBartIE from .brightcove import ( BrightcoveLegacyIE, @@ -1262,6 +1261,7 @@ ) from .nbc import ( NBCIE, + BravoTVIE, NBCNewsIE, NBCOlympicsIE, NBCOlympicsStreamIE, @@ -1269,6 +1269,7 @@ NBCSportsStreamIE, NBCSportsVPlayerIE, NBCStationsIE, + SyfyIE, ) from .ndr import ( NDRIE, @@ -2016,13 +2017,11 @@ SverigesRadioPublicationIE, ) from .svt import ( - SVTIE, SVTPageIE, SVTPlayIE, SVTSeriesIE, ) from .swearnet import SwearnetEpisodeIE -from .syfy import SyfyIE from .syvdk import SYVDKIE from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE @@ -2147,6 +2146,7 @@ from .toggo import ToggoIE from .tonline import TOnlineIE from .toongoggles import ToonGogglesIE +from .toutiao import ToutiaoIE from .toutv import TouTvIE from .toypics import ( ToypicsIE, @@ -2369,6 +2369,7 @@ VHXEmbedIE, VimeoAlbumIE, VimeoChannelIE, + VimeoEventIE, VimeoGroupsIE, VimeoIE, VimeoLikesIE, diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index f1b877927..8c2d9d934 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -3,6 +3,7 @@ import re import time import urllib.parse +import uuid import xml.etree.ElementTree as etree from .common import InfoExtractor @@ -10,6 +11,7 @@ from ..utils import ( NO_DEFAULT, ExtractorError, + parse_qs, unescapeHTML, unified_timestamp, urlencode_postdata, @@ -45,6 +47,8 @@ 'name': 'Comcast XFINITY', 'username_field': 'user', 'password_field': 'passwd', + 'login_hostname': 'login.xfinity.com', + 'needs_newer_ua': True, }, 'TWC': { 'name': 'Time Warner Cable | Spectrum', @@ -74,6 +78,12 @@ 'name': 'Verizon FiOS', 'username_field': 'IDToken1', 'password_field': 'IDToken2', + 'login_hostname': 'ssoauth.verizon.com', + }, + 'Fubo': { + 'name': 'Fubo', + 'username_field': 'username', + 'password_field': 'password', }, 'Cablevision': { 'name': 'Optimum/Cablevision', @@ -1338,6 +1348,7 @@ 'name': 'Sling TV', 'username_field': 'username', 'password_field': 'password', + 'login_hostname': 'identity.sling.com', }, 'Suddenlink': { 'name': 'Suddenlink', @@ -1355,7 +1366,6 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' - _MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0' _MVPD_CACHE = 'ap-mvpd' _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page' @@ -1367,6 +1377,14 @@ def _download_webpage_handle(self, *args, **kwargs): return super()._download_webpage_handle( *args, **kwargs) + @staticmethod + def _get_mso_headers(mso_info): + # yt-dlp's default user-agent is usually too old for some MSO's like Comcast_SSO + # See: https://github.com/yt-dlp/yt-dlp/issues/10848 + return { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0', + } if mso_info.get('needs_newer_ua') else {} + @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): channel = etree.Element('channel') @@ -1382,7 +1400,13 @@ def _get_mvpd_resource(provider_id, title, guid, rating): resource_rating.text = rating return '' + etree.tostring(channel).decode() + '' - def _extract_mvpd_auth(self, url, video_id, requestor_id, resource): + def _extract_mvpd_auth(self, url, video_id, requestor_id, resource, software_statement): + mso_id = self.get_param('ap_mso') + if mso_id: + mso_info = MSO_INFO[mso_id] + else: + mso_info = {} + def xml_text(xml_str, tag): return self._search_regex( f'<{tag}>(.+?)', xml_str, tag) @@ -1391,15 +1415,27 @@ def is_expired(token, date_ele): token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) return token_expires and token_expires <= int(time.time()) - def post_form(form_page_res, note, data={}): + def post_form(form_page_res, note, data={}, validate_url=False): form_page, urlh = form_page_res post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') if not re.match(r'https?://', post_url): post_url = urllib.parse.urljoin(urlh.url, post_url) + if validate_url: + # This request is submitting credentials so we should validate it when possible + url_parsed = urllib.parse.urlparse(post_url) + expected_hostname = mso_info.get('login_hostname') + if expected_hostname and expected_hostname != url_parsed.hostname: + raise ExtractorError( + f'Unexpected login URL hostname; expected "{expected_hostname}" but got ' + f'"{url_parsed.hostname}". Aborting before submitting credentials') + if url_parsed.scheme != 'https': + self.write_debug('Upgrading login URL scheme to https') + post_url = urllib.parse.urlunparse(url_parsed._replace(scheme='https')) form_data = self._hidden_inputs(form_page) form_data.update(data) return self._download_webpage_handle( post_url, video_id, note, data=urlencode_postdata(form_data), headers={ + **self._get_mso_headers(mso_info), 'Content-Type': 'application/x-www-form-urlencoded', }) @@ -1432,40 +1468,72 @@ def extract_redirect_url(html, url=None, fatal=False): } guid = xml_text(resource, 'guid') if '<' in resource else resource - count = 0 - while count < 2: + for _ in range(2): requestor_info = self.cache.load(self._MVPD_CACHE, requestor_id) or {} authn_token = requestor_info.get('authn_token') if authn_token and is_expired(authn_token, 'simpleTokenExpires'): authn_token = None if not authn_token: - mso_id = self.get_param('ap_mso') - if mso_id: - username, password = self._get_login_info('ap_username', 'ap_password', mso_id) - if not username or not password: - raise_mvpd_required() - mso_info = MSO_INFO[mso_id] - - provider_redirect_page_res = self._download_webpage_handle( - self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, - 'Downloading Provider Redirect Page', query={ - 'noflash': 'true', - 'mso_id': mso_id, - 'requestor_id': requestor_id, - 'no_iframe': 'false', - 'domain_name': 'adobe.com', - 'redirect_url': url, - }, headers={ - # yt-dlp's default user-agent is usually too old for Comcast_SSO - # See: https://github.com/yt-dlp/yt-dlp/issues/10848 - 'User-Agent': self._MODERN_USER_AGENT, - } if mso_id == 'Comcast_SSO' else None) - elif not self._cookies_passed: + if not mso_id: + raise_mvpd_required() + username, password = self._get_login_info('ap_username', 'ap_password', mso_id) + if not username or not password: raise_mvpd_required() - if not mso_id: - pass - elif mso_id == 'Comcast_SSO': + device_info, urlh = self._download_json_handle( + 'https://sp.auth.adobe.com/indiv/devices', + video_id, 'Registering device with Adobe', + data=json.dumps({'fingerprint': uuid.uuid4().hex}).encode(), + headers={'Content-Type': 'application/json; charset=UTF-8'}) + + device_id = device_info['deviceId'] + mvpd_headers['pass_sfp'] = urlh.get_header('pass_sfp') + mvpd_headers['Ap_21'] = device_id + + registration = self._download_json( + 'https://sp.auth.adobe.com/o/client/register', + video_id, 'Registering client with Adobe', + data=json.dumps({'software_statement': software_statement}).encode(), + headers={'Content-Type': 'application/json; charset=UTF-8'}) + + access_token = self._download_json( + 'https://sp.auth.adobe.com/o/client/token', video_id, + 'Obtaining access token', data=urlencode_postdata({ + 'grant_type': 'client_credentials', + 'client_id': registration['client_id'], + 'client_secret': registration['client_secret'], + }), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + })['access_token'] + mvpd_headers['Authorization'] = f'Bearer {access_token}' + + reg_code = self._download_json( + f'https://sp.auth.adobe.com/reggie/v1/{requestor_id}/regcode', + video_id, 'Obtaining registration code', + data=urlencode_postdata({ + 'requestor': requestor_id, + 'deviceId': device_id, + 'format': 'json', + }), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'Authorization': f'Bearer {access_token}', + })['code'] + + provider_redirect_page_res = self._download_webpage_handle( + self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, + 'Downloading Provider Redirect Page', query={ + 'noflash': 'true', + 'mso_id': mso_id, + 'requestor_id': requestor_id, + 'no_iframe': 'false', + 'domain_name': 'adobe.com', + 'redirect_url': url, + 'reg_code': reg_code, + }, headers=self._get_mso_headers(mso_info)) + + if mso_id == 'Comcast_SSO': # Comcast page flow varies by video site and whether you # are on Comcast's network. provider_redirect_page, urlh = provider_redirect_page_res @@ -1489,8 +1557,8 @@ def extract_redirect_url(html, url=None, fatal=False): oauth_redirect_url = extract_redirect_url( provider_redirect_page, fatal=True) provider_login_page_res = self._download_webpage_handle( - oauth_redirect_url, video_id, - self._DOWNLOADING_LOGIN_PAGE) + oauth_redirect_url, video_id, self._DOWNLOADING_LOGIN_PAGE, + headers=self._get_mso_headers(mso_info)) else: provider_login_page_res = post_form( provider_redirect_page_res, @@ -1500,24 +1568,35 @@ def extract_redirect_url(html, url=None, fatal=False): provider_login_page_res, 'Logging in', { mso_info['username_field']: username, mso_info['password_field']: password, - }) + }, validate_url=True) mvpd_confirm_page, urlh = mvpd_confirm_page_res if '' in mvpd_confirm_page: post_form(mvpd_confirm_page_res, 'Confirming Login') elif mso_id == 'Philo': # Philo has very unique authentication method - self._download_webpage( - 'https://idp.philo.com/auth/init/login_code', video_id, 'Requesting auth code', data=urlencode_postdata({ + self._request_webpage( + 'https://idp.philo.com/auth/init/login_code', video_id, + 'Requesting Philo auth code', data=json.dumps({ 'ident': username, 'device': 'web', 'send_confirm_link': False, 'send_token': True, - })) + 'device_ident': f'web-{uuid.uuid4().hex}', + 'include_login_link': True, + }).encode(), headers={ + 'Content-Type': 'application/json', + 'Accept': 'application/json', + }) + philo_code = getpass.getpass('Type auth code you have received [Return]: ') - self._download_webpage( - 'https://idp.philo.com/auth/update/login_code', video_id, 'Submitting token', data=urlencode_postdata({ - 'token': philo_code, - })) + self._request_webpage( + 'https://idp.philo.com/auth/update/login_code', video_id, + 'Submitting token', data=json.dumps({'token': philo_code}).encode(), + headers={ + 'Content-Type': 'application/json', + 'Accept': 'application/json', + }) + mvpd_confirm_page_res = self._download_webpage_handle('https://idp.philo.com/idp/submit', video_id, 'Confirming Philo Login') post_form(mvpd_confirm_page_res, 'Confirming Login') elif mso_id == 'Verizon': @@ -1539,7 +1618,7 @@ def extract_redirect_url(html, url=None, fatal=False): provider_redirect_page_res, 'Logging in', { mso_info['username_field']: username, mso_info['password_field']: password, - }) + }, validate_url=True) saml_login_page, urlh = saml_login_page_res if 'Please try again.' in saml_login_page: raise ExtractorError( @@ -1560,7 +1639,7 @@ def extract_redirect_url(html, url=None, fatal=False): [saml_login_page, saml_redirect_url], 'Logging in', { mso_info['username_field']: username, mso_info['password_field']: password, - }) + }, validate_url=True) if 'Please try again.' in saml_login_page: raise ExtractorError( 'Failed to login, incorrect User ID or Password.') @@ -1631,7 +1710,7 @@ def extract_redirect_url(html, url=None, fatal=False): provider_login_page_res, 'Logging in', { mso_info['username_field']: username, mso_info['password_field']: password, - }) + }, validate_url=True) provider_refresh_redirect_url = extract_redirect_url( provider_association_redirect, url=urlh.url) @@ -1682,7 +1761,7 @@ def extract_redirect_url(html, url=None, fatal=False): provider_login_page_res, 'Logging in', { mso_info['username_field']: username, mso_info['password_field']: password, - }) + }, validate_url=True) provider_refresh_redirect_url = extract_redirect_url( provider_association_redirect, url=urlh.url) @@ -1699,6 +1778,27 @@ def extract_redirect_url(html, url=None, fatal=False): query=hidden_data) post_form(mvpd_confirm_page_res, 'Confirming Login') + elif mso_id == 'Fubo': + _, urlh = provider_redirect_page_res + + fubo_response = self._download_json( + 'https://api.fubo.tv/partners/tve/connect', video_id, + 'Authenticating with Fubo', 'Unable to authenticate with Fubo', + query=parse_qs(urlh.url), data=json.dumps({ + 'username': username, + 'password': password, + }).encode(), headers={ + 'Accept': 'application/json', + 'Content-Type': 'application/json', + }) + + self._request_webpage( + 'https://sp.auth.adobe.com/adobe-services/oauth2', video_id, + 'Authenticating with Adobe', 'Failed to authenticate with Adobe', + query={ + 'code': fubo_response['code'], + 'state': fubo_response['state'], + }) else: # Some providers (e.g. DIRECTV NOW) have another meta refresh # based redirect that should be followed. @@ -1717,7 +1817,8 @@ def extract_redirect_url(html, url=None, fatal=False): } if mso_id in ('Cablevision', 'AlticeOne'): form_data['_eventId_proceed'] = '' - mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', form_data) + mvpd_confirm_page_res = post_form( + provider_login_page_res, 'Logging in', form_data, validate_url=True) if mso_id != 'Rogers': post_form(mvpd_confirm_page_res, 'Confirming Login') @@ -1727,6 +1828,7 @@ def extract_redirect_url(html, url=None, fatal=False): 'Retrieving Session', data=urlencode_postdata({ '_method': 'GET', 'requestor_id': requestor_id, + 'reg_code': reg_code, }), headers=mvpd_headers) except ExtractorError as e: if not mso_id and isinstance(e.cause, HTTPError) and e.cause.status == 401: @@ -1734,7 +1836,6 @@ def extract_redirect_url(html, url=None, fatal=False): raise if 'bravotv|oxygen)\.com/(?:[^/]+/)+(?P[^/?#]+)' - _TESTS = [{ - 'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is', - 'info_dict': { - 'id': '3923059', - 'ext': 'mp4', - 'title': 'The Top Chef Season 16 Winner Is...', - 'description': 'Find out who takes the title of Top Chef!', - 'upload_date': '20190314', - 'timestamp': 1552591860, - 'season_number': 16, - 'episode_number': 15, - 'series': 'Top Chef', - 'episode': 'The Top Chef Season 16 Winner Is...', - 'duration': 190.357, - 'season': 'Season 16', - 'thumbnail': r're:^https://.+\.jpg', - }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'https://www.bravotv.com/top-chef/season-20/episode-1/london-calling', - 'info_dict': { - 'id': '9000234570', - 'ext': 'mp4', - 'title': 'London Calling', - 'description': 'md5:5af95a8cbac1856bd10e7562f86bb759', - 'upload_date': '20230310', - 'timestamp': 1678410000, - 'season_number': 20, - 'episode_number': 1, - 'series': 'Top Chef', - 'episode': 'London Calling', - 'duration': 3266.03, - 'season': 'Season 20', - 'chapters': 'count:7', - 'thumbnail': r're:^https://.+\.jpg', - 'age_limit': 14, - }, - 'params': {'skip_download': 'm3u8'}, - 'skip': 'This video requires AdobePass MSO credentials', - }, { - 'url': 'https://www.oxygen.com/in-ice-cold-blood/season-1/closing-night', - 'info_dict': { - 'id': '3692045', - 'ext': 'mp4', - 'title': 'Closing Night', - 'description': 'md5:3170065c5c2f19548d72a4cbc254af63', - 'upload_date': '20180401', - 'timestamp': 1522623600, - 'season_number': 1, - 'episode_number': 1, - 'series': 'In Ice Cold Blood', - 'episode': 'Closing Night', - 'duration': 2629.051, - 'season': 'Season 1', - 'chapters': 'count:6', - 'thumbnail': r're:^https://.+\.jpg', - 'age_limit': 14, - }, - 'params': {'skip_download': 'm3u8'}, - 'skip': 'This video requires AdobePass MSO credentials', - }, { - 'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2', - 'info_dict': { - 'id': '3974019', - 'ext': 'mp4', - 'title': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)', - 'description': 'md5:f9d638dd6946a1c1c0533a9c6100eae5', - 'upload_date': '20190617', - 'timestamp': 1560790800, - 'season_number': 2, - 'episode_number': 16, - 'series': 'In Ice Cold Blood', - 'episode': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)', - 'duration': 68.235, - 'season': 'Season 2', - 'thumbnail': r're:^https://.+\.jpg', - 'age_limit': 14, - }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'https://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1', - 'only_matching': True, - }] - - def _real_extract(self, url): - site, display_id = self._match_valid_url(url).group('site', 'id') - webpage = self._download_webpage(url, display_id) - settings = self._search_json( - r']+data-drupal-selector="drupal-settings-json"[^>]*>', webpage, 'settings', display_id) - tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '') - query = { - 'manifest': 'm3u', - 'formats': 'm3u,mpeg4', - } - - if tve: - account_pid = tve.get('data-mpx-media-account-pid') or 'HNK2IC' - account_id = tve['data-mpx-media-account-id'] - metadata = self._parse_json( - tve.get('data-normalized-video', ''), display_id, fatal=False, transform_source=unescapeHTML) - video_id = tve.get('data-guid') or metadata['guid'] - if tve.get('data-entitlement') == 'auth': - auth = traverse_obj(settings, ('tve_adobe_auth', {dict})) or {} - site = remove_end(site, 'tv') - release_pid = tve['data-release-pid'] - resource = self._get_mvpd_resource( - tve.get('data-adobe-pass-resource-id') or auth.get('adobePassResourceId') or site, - tve['data-title'], release_pid, tve.get('data-rating')) - query.update({ - 'switch': 'HLSServiceSecure', - 'auth': self._extract_mvpd_auth( - url, release_pid, auth.get('adobePassRequestorId') or site, resource), - }) - - else: - ls_playlist = traverse_obj(settings, ('ls_playlist', ..., {dict}), get_all=False) or {} - account_pid = ls_playlist.get('mpxMediaAccountPid') or 'PHSl-B' - account_id = ls_playlist['mpxMediaAccountId'] - video_id = ls_playlist['defaultGuid'] - metadata = traverse_obj( - ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, {dict}), get_all=False) - - tp_url = f'https://link.theplatform.com/s/{account_pid}/media/guid/{account_id}/{video_id}' - tp_metadata = self._download_json( - update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False) - - chapters = traverse_obj(tp_metadata, ('chapters', ..., { - 'start_time': ('startTime', {float_or_none(scale=1000)}), - 'end_time': ('endTime', {float_or_none(scale=1000)}), - })) - # prune pointless single chapters that span the entire duration from short videos - if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')): - chapters = None - - m3u8_url = self._request_webpage(HEADRequest( - update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').url - if 'mpeg_cenc' in m3u8_url: - self.report_drm(video_id) - formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls') - - return { - 'id': video_id, - 'formats': formats, - 'subtitles': subtitles, - 'chapters': chapters, - **merge_dicts(traverse_obj(tp_metadata, { - 'title': 'title', - 'description': 'description', - 'duration': ('duration', {float_or_none(scale=1000)}), - 'timestamp': ('pubDate', {float_or_none(scale=1000)}), - 'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}), - 'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}), - 'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}), - 'episode': (('title', 'pl1$episodeNumber', 'nbcu$episodeNumber'), {str_or_none}), - 'age_limit': ('ratings', ..., 'rating', {parse_age_limit}), - }, get_all=False), traverse_obj(metadata, { - 'title': 'title', - 'description': 'description', - 'duration': ('durationInSeconds', {int_or_none}), - 'timestamp': ('airDate', {unified_timestamp}), - 'thumbnail': ('thumbnailUrl', {url_or_none}), - 'season_number': ('seasonNumber', {int_or_none}), - 'episode_number': ('episodeNumber', {int_or_none}), - 'episode': 'episodeTitle', - 'series': 'show', - })), - } diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 3ada1fd5d..c0f2f8b57 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -495,8 +495,6 @@ def _real_extract(self, url): class BrightcoveNewBaseIE(AdobePassIE): def _parse_brightcove_metadata(self, json_data, video_id, headers={}): - title = json_data['name'].strip() - formats, subtitles = [], {} sources = json_data.get('sources') or [] for source in sources: @@ -600,16 +598,18 @@ def build_format_id(kind): return { 'id': video_id, - 'title': title, - 'description': clean_html(json_data.get('description')), 'thumbnails': thumbnails, 'duration': duration, - 'timestamp': parse_iso8601(json_data.get('published_at')), - 'uploader_id': json_data.get('account_id'), 'formats': formats, 'subtitles': subtitles, - 'tags': json_data.get('tags', []), 'is_live': is_live, + **traverse_obj(json_data, { + 'title': ('name', {clean_html}), + 'description': ('description', {clean_html}), + 'tags': ('tags', ..., {str}, filter, all, filter), + 'timestamp': ('published_at', {parse_iso8601}), + 'uploader_id': ('account_id', {str}), + }), } @@ -645,10 +645,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): 'uploader_id': '4036320279001', 'formats': 'mincount:39', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'skip': '404 Not Found', }, { # playlist stream 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001', @@ -709,7 +706,6 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): 'ext': 'mp4', 'title': 'TGD_01-032_5', 'thumbnail': r're:^https?://.*\.jpg$', - 'tags': [], 'timestamp': 1646078943, 'uploader_id': '1569565978001', 'upload_date': '20220228', @@ -721,7 +717,6 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): 'ext': 'mp4', 'title': 'TGD 01-087 (Airs 05.25.22)_Segment 5', 'thumbnail': r're:^https?://.*\.jpg$', - 'tags': [], 'timestamp': 1651604591, 'uploader_id': '1569565978001', 'upload_date': '20220503', @@ -923,10 +918,18 @@ def extract_policy_key(): errors = json_data.get('errors') if errors and errors[0].get('error_subcode') == 'TVE_AUTH': custom_fields = json_data['custom_fields'] + missing_fields = ', '.join( + key for key in ('source_url', 'software_statement') if not smuggled_data.get(key)) + if missing_fields: + raise ExtractorError( + f'Missing fields in smuggled data: {missing_fields}. ' + f'This video can be only extracted from the webpage where it is embedded. ' + f'Pass the URL of the embedding webpage instead of the Brightcove URL', expected=True) tve_token = self._extract_mvpd_auth( smuggled_data['source_url'], video_id, custom_fields['bcadobepassrequestorid'], - custom_fields['bcadobepassresourceid']) + custom_fields['bcadobepassresourceid'], + smuggled_data['software_statement']) json_data = self._download_json( api_url, video_id, headers={ 'Accept': f'application/json;pk={policy_key}', diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index d5607296d..6058f66ae 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -101,6 +101,7 @@ xpath_with_ns, ) from ..utils._utils import _request_dump_filename +from ..utils.jslib import devalue class InfoExtractor: @@ -1675,9 +1676,9 @@ def extract_video_object(e): 'ext': mimetype2ext(e.get('encodingFormat')), 'title': unescapeHTML(e.get('name')), 'description': unescapeHTML(e.get('description')), - 'thumbnails': [{'url': unescapeHTML(url)} - for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL')) - if url_or_none(url)], + 'thumbnails': traverse_obj(e, (('thumbnailUrl', 'thumbnailURL', 'thumbnail_url'), (None, ...), { + 'url': ({str}, {unescapeHTML}, {self._proto_relative_url}, {url_or_none}), + })), 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('uploadDate')), # author can be an instance of 'Organization' or 'Person' types. @@ -1795,6 +1796,63 @@ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal) return traverse_obj(ret, traverse) or {} + def _resolve_nuxt_array(self, array, video_id, *, fatal=True, default=NO_DEFAULT): + """Resolves Nuxt rich JSON payload arrays""" + # Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57 + # https://github.com/nuxt/nuxt/pull/19205 + if default is not NO_DEFAULT: + fatal = False + + if not isinstance(array, list) or not array: + error_msg = 'Unable to resolve Nuxt JSON data: invalid input' + if fatal: + raise ExtractorError(error_msg, video_id=video_id) + elif default is NO_DEFAULT: + self.report_warning(error_msg, video_id=video_id) + return {} if default is NO_DEFAULT else default + + def indirect_reviver(data): + return data + + def json_reviver(data): + return json.loads(data) + + gen = devalue.parse_iter(array, revivers={ + 'NuxtError': indirect_reviver, + 'EmptyShallowRef': json_reviver, + 'EmptyRef': json_reviver, + 'ShallowRef': indirect_reviver, + 'ShallowReactive': indirect_reviver, + 'Ref': indirect_reviver, + 'Reactive': indirect_reviver, + }) + + while True: + try: + error_msg = f'Error resolving Nuxt JSON: {gen.send(None)}' + if fatal: + raise ExtractorError(error_msg, video_id=video_id) + elif default is NO_DEFAULT: + self.report_warning(error_msg, video_id=video_id, only_once=True) + else: + self.write_debug(f'{video_id}: {error_msg}', only_once=True) + except StopIteration as error: + return error.value or ({} if default is NO_DEFAULT else default) + + def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT): + """Parses metadata from Nuxt rich JSON payloads embedded in HTML""" + passed_default = default is not NO_DEFAULT + + array = self._search_json( + r']+\bid="__NUXT_DATA__"[^>]*>', webpage, + 'Nuxt JSON data', video_id, contains_pattern=r'\[(?s:.+)\]', + fatal=fatal, default=NO_DEFAULT if not passed_default else None) + + if not array: + return default if passed_default else {} + + return self._resolve_nuxt_array(array, video_id, fatal=fatal, default=default) + @staticmethod def _hidden_inputs(html): html = re.sub(r'', '', html) diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py index e36eac919..68ace240c 100644 --- a/yt_dlp/extractor/douyutv.py +++ b/yt_dlp/extractor/douyutv.py @@ -206,7 +206,7 @@ def _real_extract(self, url): 'is_live': True, **traverse_obj(room, { 'display_id': ('url', {str}, {lambda i: i[1:]}), - 'title': ('room_name', {unescapeHTML}), + 'title': ('room_name', {str}, {unescapeHTML}), 'description': ('show_details', {str}), 'uploader': ('nickname', {str}), 'thumbnail': ('room_src', {url_or_none}), diff --git a/yt_dlp/extractor/dreisat.py b/yt_dlp/extractor/dreisat.py index edd66e46c..fb8a8e87c 100644 --- a/yt_dlp/extractor/dreisat.py +++ b/yt_dlp/extractor/dreisat.py @@ -64,7 +64,7 @@ class DreiSatIE(ZDFBaseIE): 'title': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1', 'description': 'md5:bae51bfc22f15563ce3acbf97d2e8844', 'duration': 5399.0, - 'thumbnail': 'https://www.3sat.de/assets/buchmesse-kerkeling-100~original?cb=1743329640903', + 'thumbnail': 'https://www.3sat.de/assets/buchmesse-kerkeling-100~original?cb=1747256996338', 'chapters': 'count:24', 'episode': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1', 'episode_id': 'POS_1ef236cc-b390-401e-acd0-4fb4b04315fb', diff --git a/yt_dlp/extractor/espn.py b/yt_dlp/extractor/espn.py index 552f9af12..ceba024bc 100644 --- a/yt_dlp/extractor/espn.py +++ b/yt_dlp/extractor/espn.py @@ -5,7 +5,6 @@ from .adobepass import AdobePassIE from .common import InfoExtractor -from .once import OnceIE from ..utils import ( determine_ext, dict_get, @@ -16,7 +15,7 @@ ) -class ESPNIE(OnceIE): +class ESPNIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: @@ -131,9 +130,7 @@ def extract_source(source_url, source_id=None): return format_urls.add(source_url) ext = determine_ext(source_url) - if OnceIE.suitable(source_url): - formats.extend(self._extract_once_formats(source_url)) - elif ext == 'smil': + if ext == 'smil': formats.extend(self._extract_smil_formats( source_url, video_id, fatal=False)) elif ext == 'f4m': @@ -332,6 +329,7 @@ class WatchESPNIE(AdobePassIE): }] _API_KEY = 'ZXNwbiZicm93c2VyJjEuMC4w.ptUt7QxsteaRruuPmGZFaJByOoqKvDP2a5YkInHrc7c' + _SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIyZGJmZWM4My03OWE1LTQyNzEtYTVmZC04NTZjYTMxMjRjNjMiLCJuYmYiOjE1NDAyMTI3NjEsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTQwMjEyNzYxfQ.yaK3r4AI2uLVvsyN1GLzqzgzRlxMPtasSaiYYBV0wIstqih5tvjTmeoLmi8Xy9Kp_U7Md-bOffwiyK3srHkpUkhhwXLH2x6RPjmS1tPmhaG7-3LBcHTf2ySPvXhVf7cN4ngldawK4tdtLtsw6rF_JoZE2yaC6XbS2F51nXSFEDDnOQWIHEQRG3aYAj-38P2CLGf7g-Yfhbp5cKXeksHHQ90u3eOO4WH0EAjc9oO47h33U8KMEXxJbvjV5J8Va2G2fQSgLDZ013NBI3kQnE313qgqQh2feQILkyCENpB7g-TVBreAjOaH1fU471htSoGGYepcAXv-UDtpgitDiLy7CQ' def _call_bamgrid_api(self, path, video_id, payload=None, headers={}): if 'Authorization' not in headers: @@ -408,8 +406,8 @@ def _real_extract(self, url): # TV Provider required else: - resource = self._get_mvpd_resource('ESPN', video_data['name'], video_id, None) - auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource).encode() + resource = self._get_mvpd_resource('espn1', video_data['name'], video_id, None) + auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource, self._SOFTWARE_STATEMENT).encode() asset = self._download_json( f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb', diff --git a/yt_dlp/extractor/firsttv.py b/yt_dlp/extractor/firsttv.py index ac7697bb3..878732c49 100644 --- a/yt_dlp/extractor/firsttv.py +++ b/yt_dlp/extractor/firsttv.py @@ -2,11 +2,15 @@ from .common import InfoExtractor from ..utils import ( + determine_ext, int_or_none, - qualities, + join_nonempty, + mimetype2ext, + parse_qs, unified_strdate, url_or_none, ) +from ..utils.traversal import traverse_obj class FirstTVIE(InfoExtractor): @@ -15,40 +19,51 @@ class FirstTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:sport)?1tv\.ru/(?:[^/?#]+/)+(?P[^/?#]+)' _TESTS = [{ - # single format - 'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015', - 'md5': 'a1b6b60d530ebcf8daacf4565762bbaf', + # single format; has item.id + 'url': 'https://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015', + 'md5': '8011ae8e88ff4150107ab9c5a8f5b659', 'info_dict': { 'id': '40049', 'ext': 'mp4', 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015', - 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'upload_date': '20150212', 'duration': 2694, }, + 'params': {'skip_download': 'm3u8'}, }, { - # multiple formats - 'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016', + # multiple formats; has item.id + 'url': 'https://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016', 'info_dict': { 'id': '364746', 'ext': 'mp4', 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', - 'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$', + 'thumbnail': r're:https?://.+/.+\.jpg', 'upload_date': '20160407', 'duration': 179, 'formats': 'mincount:3', }, - 'params': { - 'skip_download': True, - }, + 'params': {'skip_download': 'm3u8'}, }, { - 'url': 'http://www.1tv.ru/news/issue/2016-12-01/14:00', + 'url': 'https://www.1tv.ru/news/issue/2016-12-01/14:00', 'info_dict': { 'id': '14:00', - 'title': 'Выпуск новостей в 14:00 1 декабря 2016 года. Новости. Первый канал', - 'description': 'md5:2e921b948f8c1ff93901da78ebdb1dfd', + 'title': 'Выпуск программы «Время» в 20:00 1 декабря 2016 года. Новости. Первый канал', + 'thumbnail': 'https://static.1tv.ru/uploads/photo/image/8/big/338448_big_8fc7eb236f.jpg', }, 'playlist_count': 13, + }, { + # has timestamp; has item.uid but not item.id + 'url': 'https://www.1tv.ru/shows/segodnya-vecherom/vypuski/avtory-odnogo-hita-segodnya-vecherom-vypusk-ot-03-05-2025', + 'info_dict': { + 'id': '270411', + 'ext': 'mp4', + 'title': 'Авторы одного хита. Сегодня вечером. Выпуск от 03.05.2025', + 'thumbnail': r're:https?://.+/.+\.jpg', + 'timestamp': 1746286020, + 'upload_date': '20250503', + }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'http://www.1tv.ru/shows/tochvtoch-supersezon/vystupleniya/evgeniy-dyatlov-vladimir-vysockiy-koni-priveredlivye-toch-v-toch-supersezon-fragment-vypuska-ot-06-11-2016', 'only_matching': True, @@ -57,96 +72,60 @@ class FirstTVIE(InfoExtractor): 'only_matching': True, }] + def _entries(self, items): + for item in items: + video_id = str(item.get('id') or item['uid']) + + formats, subtitles = [], {} + for f in traverse_obj(item, ('sources', lambda _, v: url_or_none(v['src']))): + src = f['src'] + ext = mimetype2ext(f.get('type'), default=determine_ext(src)) + if ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles( + src, video_id, 'mp4', m3u8_id='hls', fatal=False) + elif ext == 'mpd': + fmts, subs = self._extract_mpd_formats_and_subtitles( + src, video_id, mpd_id='dash', fatal=False) + else: + tbr = self._search_regex(fr'_(\d{{3,}})\.{ext}', src, 'tbr', default=None) + formats.append({ + 'url': src, + 'ext': ext, + 'format_id': join_nonempty('http', ext, tbr), + 'tbr': int_or_none(tbr), + # quality metadata of http formats may be incorrect + 'quality': -10, + }) + continue + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + yield { + **traverse_obj(item, { + 'title': ('title', {str}), + 'thumbnail': ('poster', {url_or_none}), + 'timestamp': ('dvr_begin_at', {int_or_none}), + 'upload_date': ('date_air', {unified_strdate}), + 'duration': ('duration', {int_or_none}), + }), + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + } + def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - playlist_url = urllib.parse.urljoin(url, self._search_regex( + playlist_url = urllib.parse.urljoin(url, self._html_search_regex( r'data-playlist-url=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'playlist url', group='url')) - parsed_url = urllib.parse.urlparse(playlist_url) - qs = urllib.parse.parse_qs(parsed_url.query) - item_ids = qs.get('videos_ids[]') or qs.get('news_ids[]') + item_ids = traverse_obj(parse_qs(playlist_url), 'video_id', 'videos_ids[]', 'news_ids[]') + items = traverse_obj( + self._download_json(playlist_url, display_id), + lambda _, v: v['uid'] and (str(v['uid']) in item_ids if item_ids else True)) - items = self._download_json(playlist_url, display_id) - - if item_ids: - items = [ - item for item in items - if item.get('uid') and str(item['uid']) in item_ids] - else: - items = [items[0]] - - entries = [] - QUALITIES = ('ld', 'sd', 'hd') - - for item in items: - title = item['title'] - quality = qualities(QUALITIES) - formats = [] - path = None - for f in item.get('mbr', []): - src = url_or_none(f.get('src')) - if not src: - continue - tbr = int_or_none(self._search_regex( - r'_(\d{3,})\.mp4', src, 'tbr', default=None)) - if not path: - path = self._search_regex( - r'//[^/]+/(.+?)_\d+\.mp4', src, - 'm3u8 path', default=None) - formats.append({ - 'url': src, - 'format_id': f.get('name'), - 'tbr': tbr, - 'source_preference': quality(f.get('name')), - # quality metadata of http formats may be incorrect - 'preference': -10, - }) - # m3u8 URL format is reverse engineered from [1] (search for - # master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru) - # is taken from [2]. - # 1. http://static.1tv.ru/player/eump1tv-current/eump-1tv.all.min.js?rnd=9097422834:formatted - # 2. http://static.1tv.ru/player/eump1tv-config/config-main.js?rnd=9097422834 - if not path and len(formats) == 1: - path = self._search_regex( - r'//[^/]+/(.+?$)', formats[0]['url'], - 'm3u8 path', default=None) - if path: - if len(formats) == 1: - m3u8_path = ',' - else: - tbrs = [str(t) for t in sorted(f['tbr'] for f in formats)] - m3u8_path = '_,{},{}'.format(','.join(tbrs), '.mp4') - formats.extend(self._extract_m3u8_formats( - f'http://balancer-vod.1tv.ru/{path}{m3u8_path}.urlset/master.m3u8', - display_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) - - thumbnail = item.get('poster') or self._og_search_thumbnail(webpage) - duration = int_or_none(item.get('duration') or self._html_search_meta( - 'video:duration', webpage, 'video duration', fatal=False)) - upload_date = unified_strdate(self._html_search_meta( - 'ya:ovs:upload_date', webpage, 'upload date', default=None)) - - entries.append({ - 'id': str(item.get('id') or item['uid']), - 'thumbnail': thumbnail, - 'title': title, - 'upload_date': upload_date, - 'duration': int_or_none(duration), - 'formats': formats, - }) - - title = self._html_search_regex( - (r'
\s*

([^<]*)', - r"'title'\s*:\s*'([^']+)'"), - webpage, 'title', default=None) or self._og_search_title( - webpage, default=None) - description = self._html_search_regex( - r'
\s*
 
\s*

([^<]*)

', - webpage, 'description', default=None) or self._html_search_meta( - 'description', webpage, 'description', default=None) - - return self.playlist_result(entries, display_id, title, description) + return self.playlist_result( + self._entries(items), display_id, self._og_search_title(webpage, default=None), + thumbnail=self._og_search_thumbnail(webpage, default=None)) diff --git a/yt_dlp/extractor/gamespot.py b/yt_dlp/extractor/gamespot.py index cd3f9655d..2799a27ba 100644 --- a/yt_dlp/extractor/gamespot.py +++ b/yt_dlp/extractor/gamespot.py @@ -1,9 +1,9 @@ import urllib.parse -from .once import OnceIE +from .common import InfoExtractor -class GameSpotIE(OnceIE): +class GameSpotIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article|review)s/(?:[^/]+/\d+-|embed/)(?P\d+)' _TESTS = [{ 'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', diff --git a/yt_dlp/extractor/go.py b/yt_dlp/extractor/go.py index 83c1979db..4e138a828 100644 --- a/yt_dlp/extractor/go.py +++ b/yt_dlp/extractor/go.py @@ -7,161 +7,157 @@ int_or_none, join_nonempty, parse_age_limit, - remove_end, - remove_start, - traverse_obj, - try_get, unified_timestamp, urlencode_postdata, ) +from ..utils.traversal import traverse_obj class GoIE(AdobePassIE): _SITE_INFO = { 'abc': { 'brand': '001', - 'requestor_id': 'ABC', + 'requestor_id': 'dtci', + 'provider_id': 'ABC', + 'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI4OTcwMjlkYS0yYjM1LTQyOWUtYWQ0NS02ZjZiZjVkZTdhOTUiLCJuYmYiOjE2MjAxNzM5NjksImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNjIwMTczOTY5fQ.SC69DVJWSL8sIe-vVUrP6xS_kzHKqwz9PdKYexs_y-f7Vin6mM-7S-W1TE_-K55O0pyf-TL4xYgvm6LIye8CckG-nZfVwNPV4huduov0jmIcxCQFeUwkHULG2IaA44wfBVUBdaHgkhPweZ2amjycO_IXtez-gBXOLbE3B7Gx9j_5ISCFtyVUblThKfoGyQv6KT6t8Vpmc4ZSKCCQp74KWFFypydb9ucego1taW_nQD06Cdf4yByLd6NaTBceMcIKbug9b9gxFm3XBgJ5q3z7KGo1Kr6XalAV5j4m-fQ91wczlTilX8FM4AljMupyRM9mA_aEADILQ4hS79q4SM0w6w', }, 'freeform': { 'brand': '002', 'requestor_id': 'ABCFamily', - }, - 'watchdisneychannel': { - 'brand': '004', - 'resource_id': 'Disney', - }, - 'watchdisneyjunior': { - 'brand': '008', - 'resource_id': 'DisneyJunior', - }, - 'watchdisneyxd': { - 'brand': '009', - 'resource_id': 'DisneyXD', + 'provider_id': 'ABCFamily', + 'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZWM2MGYyNC0xYzRjLTQ1NzQtYjc0Zi03ZmM4N2E5YWMzMzgiLCJuYmYiOjE1ODc2NjU5MjMsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTg3NjY1OTIzfQ.flCn3dhvmvPnWmV0JV8Fm0YFyj07yPez9-n1GFEwVIm_S2wQVWbWyJhqsAyLZVFrhOMZYTqmPS3OHxGwTwXkEYn6PD7o_vIVG3oqi-Xn1m5jRt_Gazw5qEtpat6VE7bvKGSD3ZhcidOrsCk8NcYyq75u61NHDvSl81pcedJjVRVUpsqrEwmo0aVbA0C8PX3ri0mEbGvkMKvHn8E60xp-PSE-VK8SDT0plwPu_TwUszkZ6-_I8_2xcv_WBqcXFkAVg7Q-iNJXgQvmNsrpcrYuLvi6hEH4ZLtoDcXU6MhwTQAJTiHSo8x9aHX1_qFP09CzlNOFQbC2ZEJdP9SvA53SLQ', }, 'disneynow': { - 'brand': '011', + 'brand': '011', # also: '004', '008', '009' + 'requestor_id': 'DisneyChannels', + 'provider_id': 'DisneyChannels', + 'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI1MzAzNTRiOS04NDNiLTRkNjAtYTQ3ZS0yNzk1MzlkOTIyNTciLCJuYmYiOjE1NTg5ODc0NDksImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTU4OTg3NDQ5fQ.Jud6YS6-J2h0h6po0oMheDym0qRTJQGj4kzacrz4DFuEwhcBkkykW6pF5pKuAUJy9HCZ40oDAHe2KcTlDJjCZF5tDaUEfdihakZ9cC_rG7MU-QoRne8qaB_dPDKwGuk-ZyWD8eV3zwTJmbGo8hDxYTEU81YNCxwhyc_BPDr5TYiubbmpP3_pTnXmSpuL58isJ2peSKWlX9BacuXtBY25c_QnPFKk-_EETm7IHkTpDazde1QfHWGu4s4yJpKGk8RVVujVG6h6ELlL-ZeYLilBm7iS7h1TYG1u7fJhyZRL7isaom6NvAzsvN3ngss1fLwt8decP8wzdFHrbYTdTjW8qw', 'resource_id': 'Disney', }, - 'fxnow.fxnetworks': { - 'brand': '025', + 'fxnetworks': { + 'brand': '025', # also: '020' 'requestor_id': 'dtci', + 'provider_id': 'fx', # also 'fxx', 'fxm' + 'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIzYWRhYWZiNC02OTAxLTRlYzktOTdmNy1lYWZkZTJkODJkN2EiLCJuYmYiOjE1NjIwMjQwNzYsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTYyMDI0MDc2fQ.dhKMpZK50AObbZYrMiYPSfWtzXHUaeMP3jrIY4Cgfvh0GaEgk0Mns_zp78jypFeZgRtPVleQMQDNq2YEloRLcAGqP1aa6WVDglnK77ZWUm4IKai14Rwf3A6YBhSRoO2_lMmUGkuTf6gZY-kMIPqBYKqzTQiQl4HbniPFodIzFRiuI9QJVrkoyTGrJL4oqiX08PoFI3Z-TOti1Heu3EbFC-GveQHhlinYrzU7rbiAqLEz7FImtfBDsnXX1Y3uJDLYM3Bq4Oh0nrzTv1Fd62wNsCNErHHIbELidh1zZF0ujvt7ReuZUwAitm0UhEJ7OxNOUbEQWtae6pVNscvdvTFMpg', + }, + 'nationalgeographic': { + 'brand': '026', # also '023' + 'requestor_id': 'dtci', + 'provider_id': 'ngc', # also 'ngw' + 'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIxMzE4YTM1Ni05Mjc4LTQ4NjEtYTFmNi1jMTIzMzg1ZWMzYzMiLCJuYmYiOjE1NjIwMjM4MjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTYyMDIzODI4fQ.Le-2OzF9-jrhJ7ZfWtLWk5iSHGVZoxeU1w0_fO--Heli0OwRZsRq2slSmx-oZTzxuWmAgDEiBkWSDcDK6sM25DrCLsdsJa3MBuZ-slBRtH8aq3HpNoqqLkU-vg6gRUEKMtwBUtwCu_9aKUCayYtndWv4b1DjVQeSrteOW5NNudWVYleAe0kxeNJQHo5If9SCzDudKVJktFUjhNks4QPOC_uONPkRRlL9D0fNvtOY-LRFckfcHhf5z9l1iZjeukV0YhdKnuw1wyiaWrQXBUDiBfbkCRd2DM-KnelqPxfiXCaTjGKDURRBO3pz33ebge3IFXSiU5vl4qHQ8xvunzGpFw', }, } - _VALID_URL = r'''(?x) - https?:// - (?P - (?:{}\.)?go|fxnow\.fxnetworks| - (?:www\.)?(?:abc|freeform|disneynow) - )\.com/ - (?: - (?:[^/]+/)*(?P[Vv][Dd][Kk][Aa]\w+)| - (?:[^/]+/)*(?P[^/?\#]+) - ) - '''.format(r'\.|'.join(list(_SITE_INFO.keys()))) + _URL_PATH_RE = r'(?:video|episode|movies-and-specials)/(?P[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' + _VALID_URL = [ + fr'https?://(?:www\.)?(?Pabc)\.com/{_URL_PATH_RE}', + fr'https?://(?:www\.)?(?Pfreeform)\.com/{_URL_PATH_RE}', + fr'https?://(?:www\.)?(?Pdisneynow)\.com/{_URL_PATH_RE}', + fr'https?://fxnow\.(?Pfxnetworks)\.com/{_URL_PATH_RE}', + fr'https?://(?:www\.)?(?Pnationalgeographic)\.com/tv/{_URL_PATH_RE}', + ] _TESTS = [{ - 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', + 'url': 'https://abc.com/episode/4192c0e6-26e5-47a8-817b-ce8272b9e440/playlist/PL551127435', 'info_dict': { - 'id': 'VDKA3807643', + 'id': 'VDKA10805898', 'ext': 'mp4', - 'title': 'The Traitor in the White House', - 'description': 'md5:05b009d2d145a1e85d25111bd37222e8', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'skip': 'This content is no longer available.', - }, { - 'url': 'https://disneynow.com/shows/big-hero-6-the-series', - 'info_dict': { - 'title': 'Doraemon', - 'id': 'SH55574025', - }, - 'playlist_mincount': 51, - }, { - 'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood', - 'info_dict': { - 'id': 'VDKA3609139', - 'title': 'This Guilty Blood', - 'description': 'md5:f18e79ad1c613798d95fdabfe96cd292', + 'title': 'Switch the Flip', + 'description': 'To help get Brian’s life in order, Stewie and Brian swap bodies using a machine that Stewie invents.', 'age_limit': 14, + 'duration': 1297, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'series': 'Family Guy', + 'season': 'Season 16', + 'season_number': 16, + 'episode': 'Episode 17', + 'episode_number': 17, + 'timestamp': 1746082800.0, + 'upload_date': '20250501', + }, + 'params': {'skip_download': 'm3u8'}, + 'skip': 'This video requires AdobePass MSO credentials', + }, { + 'url': 'https://disneynow.com/episode/21029660-ba06-4406-adb0-a9a78f6e265e/playlist/PL553044961', + 'info_dict': { + 'id': 'VDKA39546942', + 'ext': 'mp4', + 'title': 'Zero Friends Again', + 'description': 'Relationships fray under the pressures of a difficult journey.', + 'age_limit': 0, + 'duration': 1721, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'series': 'Star Wars: Skeleton Crew', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 6', + 'episode_number': 6, + 'timestamp': 1746946800.0, + 'upload_date': '20250511', + }, + 'params': {'skip_download': 'm3u8'}, + 'skip': 'This video requires AdobePass MSO credentials', + }, { + 'url': 'https://fxnow.fxnetworks.com/episode/09f4fa6f-c293-469e-aebe-32c9ca5842a7/playlist/PL554408064', + 'info_dict': { + 'id': 'VDKA38112033', + 'ext': 'mp4', + 'title': 'The Return of Jerry', + 'description': 'The vampires’ long-lost fifth roommate returns. Written by Paul Simms; directed by Kyle Newacheck.', + 'age_limit': 17, + 'duration': 1493, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'series': 'What We Do in the Shadows', + 'season': 'Season 6', + 'season_number': 6, 'episode': 'Episode 1', - 'upload_date': '20170102', - 'season': 'Season 2', - 'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/abcf/Shadowhunters/video/201/ae5f75608d86bf88aa4f9f4aa76ab1b7/579x325-Q100_ae5f75608d86bf88aa4f9f4aa76ab1b7.jpg', - 'duration': 2544, - 'season_number': 2, - 'series': 'Shadowhunters', 'episode_number': 1, - 'timestamp': 1483387200, - 'ext': 'mp4', - }, - 'params': { - 'geo_bypass_ip_block': '3.244.239.0/24', - # m3u8 download - 'skip_download': True, + 'timestamp': 1729573200.0, + 'upload_date': '20241022', }, + 'params': {'skip_download': 'm3u8'}, + 'skip': 'This video requires AdobePass MSO credentials', }, { - 'url': 'https://abc.com/shows/the-rookie/episode-guide/season-04/12-the-knock', + 'url': 'https://www.freeform.com/episode/bda0eaf7-761a-4838-aa44-96f794000844/playlist/PL553044961', 'info_dict': { - 'id': 'VDKA26050359', - 'title': 'The Knock', - 'description': 'md5:0c2947e3ada4c31f28296db7db14aa64', - 'age_limit': 14, + 'id': 'VDKA39007340', 'ext': 'mp4', - 'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/abc/TheRookie/video/412/daf830d06e83b11eaf5c0a299d993ae3/1556x876-Q75_daf830d06e83b11eaf5c0a299d993ae3.jpg', - 'episode': 'Episode 12', - 'season_number': 4, - 'season': 'Season 4', - 'timestamp': 1642975200, - 'episode_number': 12, - 'upload_date': '20220123', - 'series': 'The Rookie', - 'duration': 2572, - }, - 'params': { - 'geo_bypass_ip_block': '3.244.239.0/24', - # m3u8 download - 'skip_download': True, + 'title': 'Angel\'s Landing', + 'description': 'md5:91bf084e785c968fab16734df7313446', + 'age_limit': 14, + 'duration': 2523, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'series': 'How I Escaped My Cult', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 2', + 'episode_number': 2, + 'timestamp': 1740038400.0, + 'upload_date': '20250220', }, + 'params': {'skip_download': 'm3u8'}, }, { - 'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841', + 'url': 'https://www.nationalgeographic.com/tv/episode/ca694661-1186-41ae-8089-82f64d69b16d/playlist/PL554408064', 'info_dict': { - 'id': 'VDKA12782841', - 'title': 'First Look: Better Things - Season 2', - 'description': 'md5:fa73584a95761c605d9d54904e35b407', + 'id': 'VDKA39492078', 'ext': 'mp4', - 'age_limit': 14, - 'upload_date': '20170825', - 'duration': 161, - 'series': 'Better Things', - 'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/fx/BetterThings/video/12782841/b6b05e58264121cc2c98811318e6d507/1556x876-Q75_b6b05e58264121cc2c98811318e6d507.jpg', - 'timestamp': 1503661074, - }, - 'params': { - 'geo_bypass_ip_block': '3.244.239.0/24', - # m3u8 download - 'skip_download': True, + 'title': 'Heart of the Emperors', + 'description': 'md5:4fc50a2878f030bb3a7eac9124dca677', + 'age_limit': 0, + 'duration': 2775, + 'thumbnail': r're:https?://.+/.+\.jpg', + 'series': 'Secrets of the Penguins', + 'season': 'Season 1', + 'season_number': 1, + 'episode': 'Episode 1', + 'episode_number': 1, + 'timestamp': 1745204400.0, + 'upload_date': '20250421', }, + 'params': {'skip_download': 'm3u8'}, }, { - 'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding', + 'url': 'https://www.freeform.com/movies-and-specials/c38281fc-9f8f-47c7-8220-22394f9df2e1', 'only_matching': True, }, { - 'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland', - 'only_matching': True, - }, { - # brand 004 - 'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915', - 'only_matching': True, - }, { - # brand 008 - 'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013', - 'only_matching': True, - }, { - 'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013', - 'only_matching': True, - }, { - 'url': 'https://www.freeform.com/shows/cruel-summer/episode-guide/season-01/01-happy-birthday-jeanette-turner', + 'url': 'https://abc.com/video/219a454a-172c-41bf-878a-d169e6bc0bdc/playlist/PL5523098420', 'only_matching': True, }] @@ -171,58 +167,29 @@ def _extract_videos(self, brand, video_id='-1', show_id='-1'): f'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/{brand}/001/-1/{show_id}/-1/{video_id}/-1/-1.json', display_id)['video'] + def _extract_global_var(self, name, webpage, video_id): + return self._search_json( + fr'window\[["\']{re.escape(name)}["\']\]\s*=', + webpage, f'{name.strip("_")} JSON', video_id) + def _real_extract(self, url): - mobj = self._match_valid_url(url) - sub_domain = remove_start(remove_end(mobj.group('sub_domain') or '', '.go'), 'www.') - video_id, display_id = mobj.group('id', 'display_id') - site_info = self._SITE_INFO.get(sub_domain, {}) - brand = site_info.get('brand') - if not video_id or not site_info: - webpage = self._download_webpage(url, display_id or video_id) - data = self._parse_json( - self._search_regex( - r'["\']__abc_com__["\']\s*\]\s*=\s*({.+?})\s*;', webpage, - 'data', default='{}'), - display_id or video_id, fatal=False) - # https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot - layout = try_get(data, lambda x: x['page']['content']['video']['layout'], dict) - video_id = None - if layout: - video_id = try_get( - layout, - (lambda x: x['videoid'], lambda x: x['video']['id']), - str) - if not video_id: - video_id = self._search_regex( - ( - # There may be inner quotes, e.g. data-video-id="'VDKA3609139'" - # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood - r'data-video-id=["\']*(VDKA\w+)', - # page.analytics.videoIdCode - r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\w+)', - # https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet - r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)', - ), webpage, 'video id', default=video_id) - if not site_info: - brand = self._search_regex( - (r'data-brand=\s*["\']\s*(\d+)', - r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand', - default='004') - site_info = next( - si for _, si in self._SITE_INFO.items() - if si.get('brand') == brand) - if not video_id: - # show extraction works for Disney, DisneyJunior and DisneyXD - # ABC and Freeform has different layout - show_id = self._search_regex(r'data-show-id=["\']*(SH\d+)', webpage, 'show id') - videos = self._extract_videos(brand, show_id=show_id) - show_title = self._search_regex(r'data-show-title="([^"]+)"', webpage, 'show title', fatal=False) - entries = [] - for video in videos: - entries.append(self.url_result( - video['url'], 'Go', video.get('id'), video.get('title'))) - entries.reverse() - return self.playlist_result(entries, show_id, show_title) + site, display_id = self._match_valid_url(url).group('site', 'id') + webpage = self._download_webpage(url, display_id) + config = self._extract_global_var('__CONFIG__', webpage, display_id) + data = self._extract_global_var(config['globalVar'], webpage, display_id) + video_id = traverse_obj(data, ( + 'page', 'content', 'video', 'layout', (('video', 'id'), 'videoid'), {str}, any)) + if not video_id: + video_id = self._search_regex([ + # data-track-video_id="VDKA39492078" + # data-track-video_id_code="vdka39492078" + # data-video-id="'VDKA3609139'" + r'data-(?:track-)?video[_-]id(?:_code)?=["\']*((?:vdka|VDKA)\d+)', + # page.analytics.videoIdCode + r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\d+)'], webpage, 'video ID') + + site_info = self._SITE_INFO[site] + brand = site_info['brand'] video_data = self._extract_videos(brand, video_id)[0] video_id = video_data['id'] title = video_data['title'] @@ -238,26 +205,31 @@ def _real_extract(self, url): if ext == 'm3u8': video_type = video_data.get('type') data = { - 'video_id': video_data['id'], + 'video_id': video_id, 'video_type': video_type, 'brand': brand, 'device': '001', + 'app_name': 'webplayer-abc', } if video_data.get('accesslevel') == '1': - requestor_id = site_info.get('requestor_id', 'DisneyChannels') + provider_id = site_info['provider_id'] + software_statement = traverse_obj(data, ('app', 'config', ( + ('features', 'auth', 'softwareStatement'), + ('tvAuth', 'SOFTWARE_STATEMENTS', 'PRODUCTION'), + ), {str}, any)) or site_info['software_statement'] resource = site_info.get('resource_id') or self._get_mvpd_resource( - requestor_id, title, video_id, None) + provider_id, title, video_id, None) auth = self._extract_mvpd_auth( - url, video_id, requestor_id, resource) + url, video_id, site_info['requestor_id'], resource, software_statement) data.update({ 'token': auth, 'token_type': 'ap', - 'adobe_requestor_id': requestor_id, + 'adobe_requestor_id': provider_id, }) else: self._initialize_geo_bypass({'countries': ['US']}) entitlement = self._download_json( - 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json', + 'https://prod.gatekeeper.us-abc.symphony.edgedatg.go.com/vp2/ws-secure/entitlement/2020/playmanifest_secure.json', video_id, data=urlencode_postdata(data)) errors = entitlement.get('errors', {}).get('errors', []) if errors: @@ -267,7 +239,7 @@ def _real_extract(self, url): error['message'], countries=['US']) error_message = ', '.join([error['message'] for error in errors]) raise ExtractorError(f'{self.IE_NAME} said: {error_message}', expected=True) - asset_url += '?' + entitlement['uplynkData']['sessionKey'] + asset_url += '?' + entitlement['entitlement']['uplynkData']['sessionKey'] fmts, subs = self._extract_m3u8_formats_and_subtitles( asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False) formats.extend(fmts) diff --git a/yt_dlp/extractor/hypergryph.py b/yt_dlp/extractor/hypergryph.py index 1fb2e9a98..f405d14b5 100644 --- a/yt_dlp/extractor/hypergryph.py +++ b/yt_dlp/extractor/hypergryph.py @@ -1,32 +1,66 @@ from .common import InfoExtractor -from ..utils import js_to_json, traverse_obj +from ..utils import ( + ExtractorError, + clean_html, + url_or_none, +) +from ..utils.traversal import subs_list_to_dict, traverse_obj class MonsterSirenHypergryphMusicIE(InfoExtractor): + IE_NAME = 'monstersiren' + IE_DESC = '塞壬唱片' + _API_BASE = 'https://monster-siren.hypergryph.com/api' _VALID_URL = r'https?://monster-siren\.hypergryph\.com/music/(?P\d+)' _TESTS = [{ 'url': 'https://monster-siren.hypergryph.com/music/514562', 'info_dict': { 'id': '514562', 'ext': 'wav', - 'artists': ['塞壬唱片-MSR'], - 'album': 'Flame Shadow', 'title': 'Flame Shadow', + 'album': 'Flame Shadow', + 'artists': ['塞壬唱片-MSR'], + 'description': 'md5:19e2acfcd1b65b41b29e8079ab948053', + 'thumbnail': r're:https?://web\.hycdn\.cn/siren/pic/.+\.jpg', + }, + }, { + 'url': 'https://monster-siren.hypergryph.com/music/514518', + 'info_dict': { + 'id': '514518', + 'ext': 'wav', + 'title': 'Heavenly Me (Instrumental)', + 'album': 'Heavenly Me', + 'artists': ['塞壬唱片-MSR', 'AIYUE blessed : 理名'], + 'description': 'md5:ce790b41c932d1ad72eb791d1d8ae598', + 'thumbnail': r're:https?://web\.hycdn\.cn/siren/pic/.+\.jpg', }, }] def _real_extract(self, url): audio_id = self._match_id(url) - webpage = self._download_webpage(url, audio_id) - json_data = self._search_json( - r'window\.g_initialProps\s*=', webpage, 'data', audio_id, transform_source=js_to_json) + song = self._download_json(f'{self._API_BASE}/song/{audio_id}', audio_id) + if traverse_obj(song, 'code') != 0: + msg = traverse_obj(song, ('msg', {str}, filter)) + raise ExtractorError( + msg or 'API returned an error response', expected=bool(msg)) + + album = None + if album_id := traverse_obj(song, ('data', 'albumCid', {str})): + album = self._download_json( + f'{self._API_BASE}/album/{album_id}/detail', album_id, fatal=False) return { 'id': audio_id, - 'title': traverse_obj(json_data, ('player', 'songDetail', 'name')), - 'url': traverse_obj(json_data, ('player', 'songDetail', 'sourceUrl')), - 'ext': 'wav', 'vcodec': 'none', - 'artists': traverse_obj(json_data, ('player', 'songDetail', 'artists', ...)), - 'album': traverse_obj(json_data, ('musicPlay', 'albumDetail', 'name')), + **traverse_obj(song, ('data', { + 'title': ('name', {str}), + 'artists': ('artists', ..., {str}), + 'subtitles': ({'url': 'lyricUrl'}, all, {subs_list_to_dict(lang='en')}), + 'url': ('sourceUrl', {url_or_none}), + })), + **traverse_obj(album, ('data', { + 'album': ('name', {str}), + 'description': ('intro', {clean_html}), + 'thumbnail': ('coverUrl', {url_or_none}), + })), } diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py index 9b91a454b..bf2344f72 100644 --- a/yt_dlp/extractor/iprima.py +++ b/yt_dlp/extractor/iprima.py @@ -1,3 +1,4 @@ +import json import re import time @@ -6,9 +7,7 @@ ExtractorError, determine_ext, js_to_json, - parse_qs, traverse_obj, - urlencode_postdata, ) @@ -16,7 +15,6 @@ class IPrimaIE(InfoExtractor): _VALID_URL = r'https?://(?!cnn)(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P[^/?#&]+)' _GEO_BYPASS = False _NETRC_MACHINE = 'iprima' - _AUTH_ROOT = 'https://auth.iprima.cz' access_token = None _TESTS = [{ @@ -86,48 +84,18 @@ def _perform_login(self, username, password): if self.access_token: return - login_page = self._download_webpage( - f'{self._AUTH_ROOT}/oauth2/login', None, note='Downloading login page', - errnote='Downloading login page failed') - - login_form = self._hidden_inputs(login_page) - - login_form.update({ - '_email': username, - '_password': password}) - - profile_select_html, login_handle = self._download_webpage_handle( - f'{self._AUTH_ROOT}/oauth2/login', None, data=urlencode_postdata(login_form), - note='Logging in') - - # a profile may need to be selected first, even when there is only a single one - if '/profile-select' in login_handle.url: - profile_id = self._search_regex( - r'data-identifier\s*=\s*["\']?(\w+)', profile_select_html, 'profile id') - - login_handle = self._request_webpage( - f'{self._AUTH_ROOT}/user/profile-select-perform/{profile_id}', None, - query={'continueUrl': '/user/login?redirect_uri=/user/'}, note='Selecting profile') - - code = traverse_obj(login_handle.url, ({parse_qs}, 'code', 0)) - if not code: - raise ExtractorError('Login failed', expected=True) - - token_request_data = { - 'scope': 'openid+email+profile+phone+address+offline_access', - 'client_id': 'prima_sso', - 'grant_type': 'authorization_code', - 'code': code, - 'redirect_uri': f'{self._AUTH_ROOT}/sso/auth-check'} - token_data = self._download_json( - f'{self._AUTH_ROOT}/oauth2/token', None, - note='Downloading token', errnote='Downloading token failed', - data=urlencode_postdata(token_request_data)) + 'https://ucet.iprima.cz/api/session/create', None, + note='Logging in', errnote='Failed to log in', + data=json.dumps({ + 'email': username, + 'password': password, + 'deviceName': 'Windows Chrome', + }).encode(), headers={'content-type': 'application/json'}) - self.access_token = token_data.get('access_token') - if self.access_token is None: - raise ExtractorError('Getting token failed', expected=True) + self.access_token = token_data['accessToken']['value'] + if not self.access_token: + raise ExtractorError('Failed to fetch access token') def _real_initialize(self): if not self.access_token: diff --git a/yt_dlp/extractor/lrt.py b/yt_dlp/extractor/lrt.py index e50194f88..caff9125e 100644 --- a/yt_dlp/extractor/lrt.py +++ b/yt_dlp/extractor/lrt.py @@ -2,7 +2,6 @@ from ..utils import ( clean_html, merge_dicts, - str_or_none, traverse_obj, unified_timestamp, url_or_none, @@ -138,13 +137,15 @@ def _real_extract(self, url): 'https://www.lrt.lt/radioteka/api/media', video_id, query={'url': f'/mediateka/irasas/{video_id}/{path}'}) - return traverse_obj(media, { - 'id': ('id', {int}, {str_or_none}), - 'title': ('title', {str}), - 'tags': ('tags', ..., 'name', {str}), - 'categories': ('playlist_item', 'category', {str}, filter, all, filter), - 'description': ('content', {clean_html}, {str}), - 'timestamp': ('date', {lambda x: x.replace('.', '/')}, {unified_timestamp}), - 'thumbnail': ('playlist_item', 'image', {urljoin('https://www.lrt.lt')}), - 'formats': ('playlist_item', 'file', {lambda x: self._extract_m3u8_formats(x, video_id)}), - }) + return { + 'id': video_id, + 'formats': self._extract_m3u8_formats(media['playlist_item']['file'], video_id), + **traverse_obj(media, { + 'title': ('title', {str}), + 'tags': ('tags', ..., 'name', {str}), + 'categories': ('playlist_item', 'category', {str}, filter, all, filter), + 'description': ('content', {clean_html}, {str}), + 'timestamp': ('date', {lambda x: x.replace('.', '/')}, {unified_timestamp}), + 'thumbnail': ('playlist_item', 'image', {urljoin('https://www.lrt.lt')}), + }), + } diff --git a/yt_dlp/extractor/mitele.py b/yt_dlp/extractor/mitele.py index 55fa83b51..0dded38c6 100644 --- a/yt_dlp/extractor/mitele.py +++ b/yt_dlp/extractor/mitele.py @@ -1,7 +1,5 @@ from .telecinco import TelecincoBaseIE -from ..networking.exceptions import HTTPError from ..utils import ( - ExtractorError, int_or_none, parse_iso8601, ) @@ -81,17 +79,7 @@ class MiTeleIE(TelecincoBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - - try: # yt-dlp's default user-agents are too old and blocked by akamai - webpage = self._download_webpage(url, display_id, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0', - }) - except ExtractorError as e: - if not isinstance(e.cause, HTTPError) or e.cause.status != 403: - raise - # Retry with impersonation if hardcoded UA is insufficient to bypass akamai - webpage = self._download_webpage(url, display_id, impersonate=True) - + webpage = self._download_akamai_webpage(url, display_id) pre_player = self._search_json( r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=', webpage, 'Pre Player', display_id)['prePlayer'] diff --git a/yt_dlp/extractor/nba.py b/yt_dlp/extractor/nba.py index 91ae1d14c..c87046d8e 100644 --- a/yt_dlp/extractor/nba.py +++ b/yt_dlp/extractor/nba.py @@ -19,7 +19,8 @@ class NBACVPBaseIE(TurnerBaseIE): def _extract_nba_cvp_info(self, path, video_id, fatal=False): return self._extract_cvp_info( - f'http://secure.nba.com/{path}', video_id, { + # XXX: The 3rd argument (None) needs to be the AdobePass software_statement + f'http://secure.nba.com/{path}', video_id, None, { 'default': { 'media_src': 'http://nba.cdn.turner.com/nba/big', }, @@ -94,6 +95,7 @@ def _extract_video(self, filter_key, filter_value): class NBAWatchEmbedIE(NBAWatchBaseIE): + _WORKING = False IE_NAME = 'nba:watch:embed' _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P\d+)' _TESTS = [{ @@ -115,6 +117,7 @@ def _real_extract(self, url): class NBAWatchIE(NBAWatchBaseIE): + _WORKING = False IE_NAME = 'nba:watch' _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)' _TESTS = [{ @@ -167,6 +170,7 @@ def _real_extract(self, url): class NBAWatchCollectionIE(NBAWatchBaseIE): + _WORKING = False IE_NAME = 'nba:watch:collection' _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P[^/?#&]+)' _TESTS = [{ @@ -336,6 +340,7 @@ def _real_extract(self, url): class NBAEmbedIE(NBABaseIE): + _WORKING = False IE_NAME = 'nba:embed' _VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P[^?#&]+)' _TESTS = [{ @@ -358,6 +363,7 @@ def _real_extract(self, url): class NBAIE(NBABaseIE): + _WORKING = False IE_NAME = 'nba' _VALID_URL = NBABaseIE._VALID_URL_BASE + f'(?!{NBABaseIE._CHANNEL_PATH_REGEX})video/(?P(?:[^/]+/)*[^/?#&]+)' _TESTS = [{ @@ -385,6 +391,7 @@ def _extract_url_results(self, team, content_id): class NBAChannelIE(NBABaseIE): + _WORKING = False IE_NAME = 'nba:channel' _VALID_URL = NBABaseIE._VALID_URL_BASE + f'(?:{NBABaseIE._CHANNEL_PATH_REGEX})/(?P[^/?#&]+)' _TESTS = [{ diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index d9aded09e..bd4862bde 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -6,7 +6,7 @@ from .adobepass import AdobePassIE from .common import InfoExtractor -from .theplatform import ThePlatformIE, default_ns +from .theplatform import ThePlatformBaseIE, ThePlatformIE, default_ns from ..networking import HEADRequest from ..utils import ( ExtractorError, @@ -14,26 +14,130 @@ UserNotLive, clean_html, determine_ext, + extract_attributes, float_or_none, + get_element_html_by_class, int_or_none, join_nonempty, + make_archive_id, mimetype2ext, parse_age_limit, parse_duration, + parse_iso8601, remove_end, - smuggle_url, - traverse_obj, try_get, unescapeHTML, unified_timestamp, update_url_query, url_basename, + url_or_none, ) +from ..utils.traversal import require, traverse_obj -class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE - _VALID_URL = r'https?(?P://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P(?:NBCE|n)?\d+))' +class NBCUniversalBaseIE(ThePlatformBaseIE): + _GEO_COUNTRIES = ['US'] + _GEO_BYPASS = False + _M3U8_RE = r'https?://[^/?#]+/prod/[\w-]+/(?P[^?#]+/)cmaf/mpeg_(?:cbcs|cenc)\w*/master_cmaf\w*\.m3u8' + def _download_nbcu_smil_and_extract_m3u8_url(self, tp_path, video_id, query): + smil = self._download_xml( + f'https://link.theplatform.com/s/{tp_path}', video_id, + 'Downloading SMIL manifest', 'Failed to download SMIL manifest', query={ + **query, + 'format': 'SMIL', # XXX: Do not confuse "format" with "formats" + 'manifest': 'm3u', + 'switch': 'HLSServiceSecure', # Or else we get broken mp4 http URLs instead of HLS + }, headers=self.geo_verification_headers()) + + ns = f'//{{{default_ns}}}' + if url := traverse_obj(smil, (f'{ns}video/@src', lambda _, v: determine_ext(v) == 'm3u8', any)): + return url + + exc = traverse_obj(smil, (f'{ns}param', lambda _, v: v.get('name') == 'exception', '@value', any)) + if exc == 'GeoLocationBlocked': + self.raise_geo_restricted(countries=self._GEO_COUNTRIES) + raise ExtractorError(traverse_obj(smil, (f'{ns}ref/@abstract', ..., any)), expected=exc == 'Expired') + + def _extract_nbcu_formats_and_subtitles(self, tp_path, video_id, query): + # formats='mpeg4' will return either a working m3u8 URL or an m3u8 template for non-DRM HLS + # formats='m3u+none,mpeg4' may return DRM HLS but w/the "folders" needed for non-DRM template + query['formats'] = 'm3u+none,mpeg4' + m3u8_url = self._download_nbcu_smil_and_extract_m3u8_url(tp_path, video_id, query) + + if mobj := re.fullmatch(self._M3U8_RE, m3u8_url): + query['formats'] = 'mpeg4' + m3u8_tmpl = self._download_nbcu_smil_and_extract_m3u8_url(tp_path, video_id, query) + # Example: https://vod-lf-oneapp-prd.akamaized.net/prod/video/{folders}master_hls.m3u8 + if '{folders}' in m3u8_tmpl: + self.write_debug('Found m3u8 URL template, formatting URL path') + m3u8_url = m3u8_tmpl.format(folders=mobj.group('folders')) + + if '/mpeg_cenc' in m3u8_url or '/mpeg_cbcs' in m3u8_url: + self.report_drm(video_id) + + return self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls') + + def _extract_nbcu_video(self, url, display_id, old_ie_key=None): + webpage = self._download_webpage(url, display_id) + settings = self._search_json( + r']+data-drupal-selector="drupal-settings-json"[^>]*>', + webpage, 'settings', display_id) + + query = {} + tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '') + if tve: + account_pid = tve.get('data-mpx-media-account-pid') or tve['data-mpx-account-pid'] + account_id = tve['data-mpx-media-account-id'] + metadata = self._parse_json( + tve.get('data-normalized-video') or '', display_id, fatal=False, transform_source=unescapeHTML) + video_id = tve.get('data-guid') or metadata['guid'] + if tve.get('data-entitlement') == 'auth': + auth = settings['tve_adobe_auth'] + release_pid = tve['data-release-pid'] + resource = self._get_mvpd_resource( + tve.get('data-adobe-pass-resource-id') or auth['adobePassResourceId'], + tve['data-title'], release_pid, tve.get('data-rating')) + query['auth'] = self._extract_mvpd_auth( + url, release_pid, auth['adobePassRequestorId'], + resource, auth['adobePassSoftwareStatement']) + else: + ls_playlist = traverse_obj(settings, ( + 'ls_playlist', lambda _, v: v['defaultGuid'], any, {require('LS playlist')})) + video_id = ls_playlist['defaultGuid'] + account_pid = ls_playlist.get('mpxMediaAccountPid') or ls_playlist['mpxAccountPid'] + account_id = ls_playlist['mpxMediaAccountId'] + metadata = traverse_obj(ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, any)) or {} + + tp_path = f'{account_pid}/media/guid/{account_id}/{video_id}' + formats, subtitles = self._extract_nbcu_formats_and_subtitles(tp_path, video_id, query) + tp_metadata = self._download_theplatform_metadata(tp_path, video_id, fatal=False) + parsed_info = self._parse_theplatform_metadata(tp_metadata) + self._merge_subtitles(parsed_info['subtitles'], target=subtitles) + + return { + **parsed_info, + **traverse_obj(metadata, { + 'title': ('title', {str}), + 'description': ('description', {str}), + 'duration': ('durationInSeconds', {int_or_none}), + 'timestamp': ('airDate', {parse_iso8601}), + 'thumbnail': ('thumbnailUrl', {url_or_none}), + 'season_number': ('seasonNumber', {int_or_none}), + 'episode_number': ('episodeNumber', {int_or_none}), + 'episode': ('episodeTitle', {str}), + 'series': ('show', {str}), + }), + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'subtitles': subtitles, + '_old_archive_ids': [make_archive_id(old_ie_key, video_id)] if old_ie_key else None, + } + + +class NBCIE(NBCUniversalBaseIE): + _VALID_URL = r'https?(?P://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/?#]+/video/[^/?#]+/(?P\w+))' _TESTS = [ { 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237', @@ -49,47 +153,20 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE 'episode_number': 86, 'season': 'Season 2', 'season_number': 2, - 'series': 'Tonight Show: Jimmy Fallon', - 'duration': 237.0, - 'chapters': 'count:1', - 'tags': 'count:4', + 'series': 'Tonight', + 'duration': 236.504, + 'tags': 'count:2', 'thumbnail': r're:https?://.+\.jpg', 'categories': ['Series/The Tonight Show Starring Jimmy Fallon'], 'media_type': 'Full Episode', + 'age_limit': 14, + '_old_archive_ids': ['theplatform 2848237'], }, 'params': { 'skip_download': 'm3u8', }, }, { - 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821', - 'info_dict': { - 'id': '2832821', - 'ext': 'mp4', - 'title': 'Star Wars Teaser', - 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442', - 'timestamp': 1417852800, - 'upload_date': '20141206', - 'uploader': 'NBCU-COM', - }, - 'skip': 'page not found', - }, - { - # HLS streams requires the 'hdnea3' cookie - 'url': 'http://www.nbc.com/Kings/video/goliath/n1806', - 'info_dict': { - 'id': '101528f5a9e8127b107e98c5e6ce4638', - 'ext': 'mp4', - 'title': 'Goliath', - 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.', - 'timestamp': 1237100400, - 'upload_date': '20090315', - 'uploader': 'NBCU-COM', - }, - 'skip': 'page not found', - }, - { - # manifest url does not have extension 'url': 'https://www.nbc.com/the-golden-globe-awards/video/oprah-winfrey-receives-cecil-b-de-mille-award-at-the-2018-golden-globes/3646439', 'info_dict': { 'id': '3646439', @@ -99,48 +176,47 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE 'episode_number': 1, 'season': 'Season 75', 'season_number': 75, - 'series': 'The Golden Globe Awards', + 'series': 'Golden Globes', 'description': 'Oprah Winfrey receives the Cecil B. de Mille Award at the 75th Annual Golden Globe Awards.', 'uploader': 'NBCU-COM', 'upload_date': '20180107', 'timestamp': 1515312000, - 'duration': 570.0, + 'duration': 569.703, 'tags': 'count:8', 'thumbnail': r're:https?://.+\.jpg', - 'chapters': 'count:1', + 'media_type': 'Highlight', + 'age_limit': 0, + 'categories': ['Series/The Golden Globe Awards'], + '_old_archive_ids': ['theplatform 3646439'], }, 'params': { 'skip_download': 'm3u8', }, }, { - # new video_id format - 'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978', + # Needs to be extracted from webpage instead of GraphQL + 'url': 'https://www.nbc.com/paris2024/video/ali-truwit-found-purpose-pool-after-her-life-changed/para24_sww_alitruwittodayshow_240823', 'info_dict': { - 'id': 'NBCE125189978', + 'id': 'para24_sww_alitruwittodayshow_240823', 'ext': 'mp4', - 'title': 'Ben\'s First Leap | NBC\'s Quantum Leap', - 'description': 'md5:a82762449b7ec4bb83291a7b355ebf8e', - 'uploader': 'NBCU-COM', - 'series': 'Quantum Leap', - 'season': 'Season 1', - 'season_number': 1, - 'episode': 'Ben\'s First Leap | NBC\'s Quantum Leap', - 'episode_number': 1, - 'duration': 170.171, - 'chapters': [], - 'timestamp': 1663956155, - 'upload_date': '20220923', - 'tags': 'count:10', - 'age_limit': 0, + 'title': 'Ali Truwit found purpose in the pool after her life changed', + 'description': 'md5:c16d7489e1516593de1cc5d3f39b9bdb', + 'uploader': 'NBCU-SPORTS', + 'duration': 311.077, 'thumbnail': r're:https?://.+\.jpg', - 'categories': ['Series/Quantum Leap 2022'], - 'media_type': 'Highlight', + 'episode': 'Ali Truwit found purpose in the pool after her life changed', + 'timestamp': 1724435902.0, + 'upload_date': '20240823', + '_old_archive_ids': ['theplatform para24_sww_alitruwittodayshow_240823'], }, 'params': { 'skip_download': 'm3u8', }, }, + { + 'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978', + 'only_matching': True, + }, { 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310', 'only_matching': True, @@ -151,6 +227,7 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE 'only_matching': True, }, ] + _SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI1Yzg2YjdkYy04NDI3LTRjNDUtOGQwZi1iNDkzYmE3MmQwYjQiLCJuYmYiOjE1Nzg3MDM2MzEsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTc4NzAzNjMxfQ.QQKIsBhAjGQTMdAqRTqhcz2Cddr4Y2hEjnSiOeKKki4nLrkDOsjQMmqeTR0hSRarraxH54wBgLvsxI7LHwKMvr7G8QpynNAxylHlQD3yhN9tFhxt4KR5wW3as02B-W2TznK9bhNWPKIyHND95Uo2Mi6rEQoq8tM9O09WPWaanE5BX_-r6Llr6dPq5F0Lpx2QOn2xYRb1T4nFxdFTNoss8GBds8OvChTiKpXMLHegLTc1OS4H_1a8tO_37jDwSdJuZ8iTyRLV4kZ2cpL6OL5JPMObD4-HQiec_dfcYgMKPiIfP9ZqdXpec2SVaCLsWEk86ZYvD97hLIQrK5rrKd1y-A' def _real_extract(self, url): permalink, video_id = self._match_valid_url(url).groups() @@ -196,62 +273,50 @@ def _real_extract(self, url): 'userId': '0', }), })['data']['bonanzaPage']['metadata'] - query = { - 'mbr': 'true', - 'manifest': 'm3u', - 'switch': 'HLSServiceSecure', - } + + if not video_data: + # Some videos are not available via GraphQL API + webpage = self._download_webpage(url, video_id) + video_data = self._search_json( + r'