diff --git a/.github/workflows/signature-tests.yml b/.github/workflows/signature-tests.yml new file mode 100644 index 000000000..203172e0b --- /dev/null +++ b/.github/workflows/signature-tests.yml @@ -0,0 +1,41 @@ +name: Signature Tests +on: + push: + paths: + - .github/workflows/signature-tests.yml + - test/test_youtube_signature.py + - yt_dlp/jsinterp.py + pull_request: + paths: + - .github/workflows/signature-tests.yml + - test/test_youtube_signature.py + - yt_dlp/jsinterp.py +permissions: + contents: read + +concurrency: + group: signature-tests-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +jobs: + tests: + name: Signature Tests + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.10, pypy-3.11] + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install test requirements + run: python3 ./devscripts/install_deps.py --only-optional --include test + - name: Run tests + timeout-minutes: 15 + run: | + python3 -m yt_dlp -v || true # Print debug head + python3 ./devscripts/run_tests.py test/test_youtube_signature.py diff --git a/.gitignore b/.gitignore index 8fcd0de64..40bb34d2a 100644 --- a/.gitignore +++ b/.gitignore @@ -105,6 +105,8 @@ README.txt *.zsh *.spec test/testdata/sigs/player-*.js +test/testdata/thumbnails/empty.webp +test/testdata/thumbnails/foo\ %d\ bar/foo_%d.* # Binary /youtube-dl diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 6aa52c595..ba23b66dc 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -775,3 +775,12 @@ GeoffreyFrogeye Pawka v3DJG6GL yozel +brian6932 +iednod55 +maxbin123 +nullpos +anlar +eason1478 +ceandreasen +chauhantirth +helpimnotdrowning diff --git a/Changelog.md b/Changelog.md index 80b72da05..5a5c18cf3 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,103 @@ # Changelog # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2025.06.30 + +#### Core changes +- **jsinterp**: [Fix `extract_object`](https://github.com/yt-dlp/yt-dlp/commit/958153a226214c86879e36211ac191bf78289578) ([#13580](https://github.com/yt-dlp/yt-dlp/issues/13580)) by [seproDev](https://github.com/seproDev) + +#### Extractor changes +- **bilibilispacevideo**: [Extract hidden-mode collections as playlists](https://github.com/yt-dlp/yt-dlp/commit/99b85ac102047446e6adf5b62bfc3c8d80b53778) ([#13533](https://github.com/yt-dlp/yt-dlp/issues/13533)) by [c-basalt](https://github.com/c-basalt) +- **hotstar** + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b5bd057fe86550f3aa67f2fc8790d1c6a251c57b) ([#13530](https://github.com/yt-dlp/yt-dlp/issues/13530)) by [bashonly](https://github.com/bashonly), [chauhantirth](https://github.com/chauhantirth) (With fixes in [e9f1576](https://github.com/yt-dlp/yt-dlp/commit/e9f157669e24953a88d15ce22053649db7a8e81e) by [bashonly](https://github.com/bashonly)) + - [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/0a6b1044899f452cd10b6c7a6b00fa985a9a8b97) ([#13560](https://github.com/yt-dlp/yt-dlp/issues/13560)) by [bashonly](https://github.com/bashonly) + - [Raise for login required](https://github.com/yt-dlp/yt-dlp/commit/5e292baad62c749b6c340621ab2d0f904165ddfb) ([#10405](https://github.com/yt-dlp/yt-dlp/issues/10405)) by [bashonly](https://github.com/bashonly) + - series: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4bd9a7ade7e0508b9795b3e72a69eeb40788b62b) ([#13564](https://github.com/yt-dlp/yt-dlp/issues/13564)) by [bashonly](https://github.com/bashonly) +- **jiocinema**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/7e2504f941a11ea2b0dba00de3f0295cdc253e79) ([#13565](https://github.com/yt-dlp/yt-dlp/issues/13565)) by [bashonly](https://github.com/bashonly) +- **kick**: [Support subscriber-only content](https://github.com/yt-dlp/yt-dlp/commit/b16722ede83377f77ea8352dcd0a6ca8e83b8f0f) ([#13550](https://github.com/yt-dlp/yt-dlp/issues/13550)) by [helpimnotdrowning](https://github.com/helpimnotdrowning) +- **niconico**: live: [Fix extractor and downloader](https://github.com/yt-dlp/yt-dlp/commit/06c1a8cdffe14050206683253726875144192ef5) ([#13158](https://github.com/yt-dlp/yt-dlp/issues/13158)) by [doe1080](https://github.com/doe1080) +- **sauceplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/35fc33fbc51c7f5392fb2300f65abf6cf107ef90) ([#13567](https://github.com/yt-dlp/yt-dlp/issues/13567)) by [bashonly](https://github.com/bashonly), [ceandreasen](https://github.com/ceandreasen) +- **sproutvideo**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/11b9416e10cff7513167d76d6c47774fcdd3e26a) ([#13589](https://github.com/yt-dlp/yt-dlp/issues/13589)) by [bashonly](https://github.com/bashonly) +- **youtube**: [Fix premium formats extraction](https://github.com/yt-dlp/yt-dlp/commit/2ba5391cd68ed4f2415c827d2cecbcbc75ace10b) ([#13586](https://github.com/yt-dlp/yt-dlp/issues/13586)) by [bashonly](https://github.com/bashonly) + +#### Misc. changes +- **ci**: [Add signature tests](https://github.com/yt-dlp/yt-dlp/commit/1b883846347addeab12663fd74317fd544341a1c) ([#13582](https://github.com/yt-dlp/yt-dlp/issues/13582)) by [bashonly](https://github.com/bashonly) +- **cleanup**: Miscellaneous: [b018784](https://github.com/yt-dlp/yt-dlp/commit/b0187844988e557c7e1e6bb1aabd4c1176768d86) by [bashonly](https://github.com/bashonly) + +### 2025.06.25 + +#### Extractor changes +- [Add `_search_nuxt_json` helper](https://github.com/yt-dlp/yt-dlp/commit/51887484e46ab6015c041cb1ab626a55f25a03bd) ([#13386](https://github.com/yt-dlp/yt-dlp/issues/13386)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +- **brightcove**: new: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/e6bd4a3da295b760ab20b39c18ce8934d312c2bf) ([#13461](https://github.com/yt-dlp/yt-dlp/issues/13461)) by [doe1080](https://github.com/doe1080) +- **huya**: live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2600849badb0d08c55b58dcc77a13af6ba423da6) ([#13520](https://github.com/yt-dlp/yt-dlp/issues/13520)) by [doe1080](https://github.com/doe1080) +- **hypergryph**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/1722c55400ff30bb5aee5dd7a262f0b7e9ce2f0e) ([#13415](https://github.com/yt-dlp/yt-dlp/issues/13415)) by [doe1080](https://github.com/doe1080), [eason1478](https://github.com/eason1478) +- **lsm**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/c57412d1f9cf0124adc972a47858ac42b740c61d) ([#13126](https://github.com/yt-dlp/yt-dlp/issues/13126)) by [Caesim404](https://github.com/Caesim404) +- **mave**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1838a1ce5d4ade80770ba9162eaffc9a1607dc70) ([#13380](https://github.com/yt-dlp/yt-dlp/issues/13380)) by [anlar](https://github.com/anlar) +- **sportdeutschland**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a4ce4327c9836691d3b6b00e44a90b6741601ed8) ([#13519](https://github.com/yt-dlp/yt-dlp/issues/13519)) by [DTrombett](https://github.com/DTrombett) +- **sproutvideo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5b559d0072b7164daf06bacdc41c6f11283452c8) ([#13544](https://github.com/yt-dlp/yt-dlp/issues/13544)) by [bashonly](https://github.com/bashonly) +- **tv8.it**: [Support slugless URLs](https://github.com/yt-dlp/yt-dlp/commit/3bd30291601c47fa4a257983473884103ecab0c7) ([#13478](https://github.com/yt-dlp/yt-dlp/issues/13478)) by [DTrombett](https://github.com/DTrombett) +- **youtube** + - [Check any `ios` m3u8 formats prior to download](https://github.com/yt-dlp/yt-dlp/commit/8f94b76cbf7bbd9dfd8762c63cdea04f90f1297f) ([#13524](https://github.com/yt-dlp/yt-dlp/issues/13524)) by [bashonly](https://github.com/bashonly) + - [Improve player context payloads](https://github.com/yt-dlp/yt-dlp/commit/ff6f94041aeee19c5559e1c1cd693960a1c1dd14) ([#13539](https://github.com/yt-dlp/yt-dlp/issues/13539)) by [bashonly](https://github.com/bashonly) + +#### Misc. changes +- **test**: `traversal`: [Fix morsel tests for Python 3.14](https://github.com/yt-dlp/yt-dlp/commit/73bf10211668e4a59ccafd790e06ee82d9fea9ea) ([#13471](https://github.com/yt-dlp/yt-dlp/issues/13471)) by [Grub4K](https://github.com/Grub4K) + +### 2025.06.09 + +#### Extractor changes +- [Improve JSON LD thumbnails extraction](https://github.com/yt-dlp/yt-dlp/commit/85c8a405e3651dc041b758f4744d4fb3c4c55e01) ([#13368](https://github.com/yt-dlp/yt-dlp/issues/13368)) by [bashonly](https://github.com/bashonly), [doe1080](https://github.com/doe1080) +- **10play**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6d265388c6e943419ac99e9151cf75a3265f980f) ([#13349](https://github.com/yt-dlp/yt-dlp/issues/13349)) by [bashonly](https://github.com/bashonly) +- **adobepass** + - [Add Fubo MSO](https://github.com/yt-dlp/yt-dlp/commit/eee90acc47d7f8de24afaa8b0271ccaefdf6e88c) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123) + - [Always add newer user-agent when required](https://github.com/yt-dlp/yt-dlp/commit/0ee1102268cf31b07f8a8318a47424c66b2f7378) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) + - [Fix Philo MSO authentication](https://github.com/yt-dlp/yt-dlp/commit/943083edcd3df45aaa597a6967bc6c95b720f54c) ([#13335](https://github.com/yt-dlp/yt-dlp/issues/13335)) by [Sipherdrakon](https://github.com/Sipherdrakon) + - [Rework to require software statement](https://github.com/yt-dlp/yt-dlp/commit/711c5d5d098fee2992a1a624b1c4b30364b91426) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly), [maxbin123](https://github.com/maxbin123) + - [Validate login URL before sending credentials](https://github.com/yt-dlp/yt-dlp/commit/89c1b349ad81318d9d3bea76c01c891696e58d38) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **aenetworks** + - [Fix playlist extractors](https://github.com/yt-dlp/yt-dlp/commit/f37d599a697e82fe68b423865897d55bae34f373) ([#13408](https://github.com/yt-dlp/yt-dlp/issues/13408)) by [Sipherdrakon](https://github.com/Sipherdrakon) + - [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/6693d6603358ae6beca834dbd822a7917498b813) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123) +- **bilibilibangumi**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/13e55162719528d42d2133e16b65ff59a667a6e4) ([#13416](https://github.com/yt-dlp/yt-dlp/issues/13416)) by [c-basalt](https://github.com/c-basalt) +- **brightcove**: new: [Adapt to new AdobePass requirement](https://github.com/yt-dlp/yt-dlp/commit/98f8eec956e3b16cb66a3d49cc71af3807db795e) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **cu.ntv.co.jp**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/aa863ddab9b1d104678e9cf39bb76f5b14fca660) ([#13302](https://github.com/yt-dlp/yt-dlp/issues/13302)) by [doe1080](https://github.com/doe1080), [nullpos](https://github.com/nullpos) +- **go**: [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/2e5bf002dad16f5ce35aa2023d392c9e518fcd8f) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly), [maxbin123](https://github.com/maxbin123) +- **nbc**: [Rework and adapt extractors to new AdobePass flow](https://github.com/yt-dlp/yt-dlp/commit/2d7949d5642bc37d1e71bf00c9a55260e5505d58) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **nobelprize**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/97ddfefeb4faba6e61cd80996c16952b8eab16f3) ([#13205](https://github.com/yt-dlp/yt-dlp/issues/13205)) by [doe1080](https://github.com/doe1080) +- **odnoklassniki**: [Detect and raise when login is required](https://github.com/yt-dlp/yt-dlp/commit/148a1eb4c59e127965396c7a6e6acf1979de459e) ([#13361](https://github.com/yt-dlp/yt-dlp/issues/13361)) by [bashonly](https://github.com/bashonly) +- **patreon**: [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/e0d6c0822930f6e63f574d46d946a58b73ecd10c) ([#13266](https://github.com/yt-dlp/yt-dlp/issues/13266)) by [bashonly](https://github.com/bashonly) (With fixes in [1a8a03e](https://github.com/yt-dlp/yt-dlp/commit/1a8a03ea8d827107319a18076ee3505090667c5a)) +- **podchaser**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/538eb305673c26bff6a2b12f1c96375fe02ce41a) ([#13271](https://github.com/yt-dlp/yt-dlp/issues/13271)) by [bashonly](https://github.com/bashonly) +- **sr**: mediathek: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/e3c605a61f4cc2de9059f37434fa108c3c20f58e) ([#13294](https://github.com/yt-dlp/yt-dlp/issues/13294)) by [doe1080](https://github.com/doe1080) +- **stacommu**: [Avoid partial stream formats](https://github.com/yt-dlp/yt-dlp/commit/5d96527be80dc1ed1702d9cd548ff86de570ad70) ([#13412](https://github.com/yt-dlp/yt-dlp/issues/13412)) by [bashonly](https://github.com/bashonly) +- **startrek**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a8bf0011bde92b3f1324a98bfbd38932fd3ebe18) ([#13188](https://github.com/yt-dlp/yt-dlp/issues/13188)) by [doe1080](https://github.com/doe1080) +- **svt**: play: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/e1b6062f8c4a3fa33c65269d48d09ec78de765a2) ([#13329](https://github.com/yt-dlp/yt-dlp/issues/13329)) by [barsnick](https://github.com/barsnick), [bashonly](https://github.com/bashonly) +- **telecinco**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/03dba2012d9bd3f402fa8c2f122afba89bbd22a4) ([#13379](https://github.com/yt-dlp/yt-dlp/issues/13379)) by [bashonly](https://github.com/bashonly) +- **theplatform**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/ed108b3ea481c6a4b5215a9302ba92d74baa2425) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **toutiao**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f8051e3a61686c5db1de5f5746366ecfbc3ad20c) ([#13246](https://github.com/yt-dlp/yt-dlp/issues/13246)) by [doe1080](https://github.com/doe1080) +- **turner**: [Adapt extractors to new AdobePass flow](https://github.com/yt-dlp/yt-dlp/commit/0daddc780d3ac5bebc3a3ec5b884d9243cbc0745) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **twitcasting**: [Fix password-protected livestream support](https://github.com/yt-dlp/yt-dlp/commit/52f9729c9a92ad4656d746ff0b1acecb87b3e96d) ([#13097](https://github.com/yt-dlp/yt-dlp/issues/13097)) by [bashonly](https://github.com/bashonly) +- **twitter**: broadcast: [Support events URLs](https://github.com/yt-dlp/yt-dlp/commit/7794374de8afb20499b023107e2abfd4e6b93ee4) ([#13248](https://github.com/yt-dlp/yt-dlp/issues/13248)) by [doe1080](https://github.com/doe1080) +- **umg**: de: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/4e7c1ea346b510280218b47e8653dbbca3a69870) ([#13373](https://github.com/yt-dlp/yt-dlp/issues/13373)) by [doe1080](https://github.com/doe1080) +- **vice**: [Mark extractors as broken](https://github.com/yt-dlp/yt-dlp/commit/6121559e027a04574690799c1776bc42bb51af31) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly) +- **vimeo**: [Extract subtitles from player subdomain](https://github.com/yt-dlp/yt-dlp/commit/c723c4e5e78263df178dbe69844a3d05f3ef9e35) ([#13350](https://github.com/yt-dlp/yt-dlp/issues/13350)) by [bashonly](https://github.com/bashonly) +- **watchespn**: [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/b094747e93cfb0a2c53007120e37d0d84d41f030) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123) +- **weverse**: [Support login with oauth refresh tokens](https://github.com/yt-dlp/yt-dlp/commit/3fe72e9eea38d9a58211cde42cfaa577ce020e2c) ([#13284](https://github.com/yt-dlp/yt-dlp/issues/13284)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Add `tv_simply` player client](https://github.com/yt-dlp/yt-dlp/commit/1fd0e88b67db53ad163393d6965f68e908fa70e3) ([#13389](https://github.com/yt-dlp/yt-dlp/issues/13389)) by [gamer191](https://github.com/gamer191) + - [Extract srt subtitles](https://github.com/yt-dlp/yt-dlp/commit/231349786e8c42089c2e079ec94c0ea866c37999) ([#13411](https://github.com/yt-dlp/yt-dlp/issues/13411)) by [gamer191](https://github.com/gamer191) + - [Fix `--mark-watched` support](https://github.com/yt-dlp/yt-dlp/commit/b5be29fa58ec98226e11621fd9c58585bcff6879) ([#13222](https://github.com/yt-dlp/yt-dlp/issues/13222)) by [brian6932](https://github.com/brian6932), [iednod55](https://github.com/iednod55) + - [Fix automatic captions for some client combinations](https://github.com/yt-dlp/yt-dlp/commit/53ea743a9c158f8ca2d75a09ca44ba68606042d8) ([#13268](https://github.com/yt-dlp/yt-dlp/issues/13268)) by [bashonly](https://github.com/bashonly) + - [Improve signature extraction debug output](https://github.com/yt-dlp/yt-dlp/commit/d30a49742cfa22e61c47df4ac0e7334d648fb85d) ([#13327](https://github.com/yt-dlp/yt-dlp/issues/13327)) by [bashonly](https://github.com/bashonly) + - [Rework nsig function name extraction](https://github.com/yt-dlp/yt-dlp/commit/9e38b273b7ac942e7e9fc05a651ed810ab7d30ba) ([#13403](https://github.com/yt-dlp/yt-dlp/issues/13403)) by [Grub4K](https://github.com/Grub4K) + - [nsig code improvements and cleanup](https://github.com/yt-dlp/yt-dlp/commit/f7bbf5a617f9ab54ef51eaef99be36e175b5e9c3) ([#13280](https://github.com/yt-dlp/yt-dlp/issues/13280)) by [bashonly](https://github.com/bashonly) +- **zdf**: [Fix language extraction and format sorting](https://github.com/yt-dlp/yt-dlp/commit/db162b76f6bdece50babe2e0cacfe56888c2e125) ([#13313](https://github.com/yt-dlp/yt-dlp/issues/13313)) by [InvalidUsernameException](https://github.com/InvalidUsernameException) + +#### Misc. changes +- **build** + - [Exclude `pkg_resources` from being collected](https://github.com/yt-dlp/yt-dlp/commit/cc749a8a3b8b6e5c05318868c72a403f376a1b38) ([#13320](https://github.com/yt-dlp/yt-dlp/issues/13320)) by [bashonly](https://github.com/bashonly) + - [Fix macOS requirements caching](https://github.com/yt-dlp/yt-dlp/commit/201812100f315c6727a4418698d5b4e8a79863d4) ([#13328](https://github.com/yt-dlp/yt-dlp/issues/13328)) by [bashonly](https://github.com/bashonly) +- **cleanup**: Miscellaneous: [339614a](https://github.com/yt-dlp/yt-dlp/commit/339614a173c74b42d63e858c446a9cae262a13af) by [bashonly](https://github.com/bashonly) +- **test**: postprocessors: [Remove binary thumbnail test data](https://github.com/yt-dlp/yt-dlp/commit/a9b370069838e84d44ac7ad095d657003665885a) ([#13341](https://github.com/yt-dlp/yt-dlp/issues/13341)) by [bashonly](https://github.com/bashonly) + ### 2025.05.22 #### Core changes diff --git a/Makefile b/Makefile index 6c72ead1e..273cb3cc0 100644 --- a/Makefile +++ b/Makefile @@ -18,10 +18,11 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \ tar pypi-files lazy-extractors install uninstall clean-test: - rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ + rm -rf tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ *.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \ *.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.lrc *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 *.mp4 \ - *.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp + *.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp \ + test/testdata/sigs/player-*.js test/testdata/thumbnails/empty.webp "test/testdata/thumbnails/foo %d bar/foo_%d."* clean-dist: rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \ yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS diff --git a/README.md b/README.md index 6e2dc6243..925ebd8c5 100644 --- a/README.md +++ b/README.md @@ -1156,15 +1156,15 @@ # CONFIGURATION * `/etc/yt-dlp/config` * `/etc/yt-dlp/config.txt` -E.g. with the following configuration file, yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: +E.g. with the following configuration file, yt-dlp will always extract the audio, copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: ``` # Lines starting with # are comments # Always extract audio -x -# Do not copy the mtime ---no-mtime +# Copy the mtime +--mtime # Use this proxy --proxy 127.0.0.1:3128 @@ -1795,10 +1795,11 @@ # EXTRACTOR ARGUMENTS The following extractors use this feature: #### youtube -* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes +* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube/_base.py](https://github.com/yt-dlp/yt-dlp/blob/415b4c9f955b1a0391204bd24a7132590e7b3bdb/yt_dlp/extractor/youtube/_base.py#L402-L409) for the list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios` +* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv`, `tv_simply` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios` * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details +* `webpage_skip`: Skip extraction of embedded webpage data. One or both of `player_response`, `initial_data`. These options are for testing purposes and don't skip any network requests * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `player_js_variant`: The player javascript variant to use for signature and nsig deciphering. The known variants are: `main`, `tce`, `tv`, `tv_es6`, `phone`, `tablet`. Only `main` is recommended as a possible workaround; the others are for debugging purposes. The default is to use what is prescribed by the site, and can be selected with `actual` * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) @@ -1900,6 +1901,10 @@ #### sonylivseries #### tver * `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated) +#### vimeo +* `client`: Client to extract video data from. One of `android` (default), `ios` or `web`. The `ios` client only works with previously cached OAuth tokens. The `web` client only works when authenticated with credentials or account cookies +* `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability + **Note**: These options may be changed/removed in the future without concern for backward compatibility @@ -2262,6 +2267,7 @@ ### Differences in default behavior * yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests. * The sub-modules `swfinterp`, `casefold` are removed. * Passing `--simulate` (or calling `extract_info` with `download=False`) no longer alters the default format selection. See [#9843](https://github.com/yt-dlp/yt-dlp/issues/9843) for details. +* yt-dlp no longer applies the server modified time to downloaded files by default. Use `--mtime` or `--compat-options mtime-by-default` to revert this. For ease of use, a few more compat options are available: @@ -2271,7 +2277,7 @@ ### Differences in default behavior * `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization` * `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx` * `--compat-options 2023`: Same as `--compat-options 2024,prefer-vp9-sort` -* `--compat-options 2024`: Currently does nothing. Use this to enable all future compat options +* `--compat-options 2024`: Same as `--compat-options mtime-by-default`. Use this to enable all future compat options The following compat options restore vulnerable behavior from before security patches: diff --git a/devscripts/bash-completion.in b/devscripts/bash-completion.in index 21f52798e..bb66c2095 100644 --- a/devscripts/bash-completion.in +++ b/devscripts/bash-completion.in @@ -10,9 +10,13 @@ __yt_dlp() diropts="--cache-dir" if [[ ${prev} =~ ${fileopts} ]]; then + local IFS=$'\n' + type compopt &>/dev/null && compopt -o filenames COMPREPLY=( $(compgen -f -- ${cur}) ) return 0 elif [[ ${prev} =~ ${diropts} ]]; then + local IFS=$'\n' + type compopt &>/dev/null && compopt -o dirnames COMPREPLY=( $(compgen -d -- ${cur}) ) return 0 fi diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index 269de2c68..d7296bf30 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -254,5 +254,13 @@ { "action": "remove", "when": "d596824c2f8428362c072518856065070616e348" + }, + { + "action": "remove", + "when": "7b81634fb1d15999757e7a9883daa6ef09ea785b" + }, + { + "action": "remove", + "when": "500761e41acb96953a5064e951d41d190c287e46" } ] diff --git a/pyproject.toml b/pyproject.toml index 3775251e1..41d5ec3b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,7 +75,7 @@ dev = [ ] static-analysis = [ "autopep8~=2.0", - "ruff~=0.11.0", + "ruff~=0.12.0", ] test = [ "pytest~=8.1", @@ -210,10 +210,12 @@ ignore = [ "TD001", # invalid-todo-tag "TD002", # missing-todo-author "TD003", # missing-todo-link + "PLC0415", # import-outside-top-level "PLE0604", # invalid-all-object (false positives) "PLE0643", # potential-index-error (false positives) "PLW0603", # global-statement "PLW1510", # subprocess-run-without-check + "PLW1641", # eq-without-hash "PLW2901", # redefined-loop-name "RUF001", # ambiguous-unicode-character-string "RUF012", # mutable-class-default diff --git a/supportedsites.md b/supportedsites.md index c2d7b4555..8e48135d2 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -5,6 +5,8 @@ # Supported sites Not all sites listed here are guaranteed to work; websites are constantly changing and sometimes this breaks yt-dlp's support for them. The only reliable way to check if a site is supported is to try it. + - **10play**: [*10play*](## "netrc machine") + - **10play:season** - **17live** - **17live:clip** - **17live:vod** @@ -295,7 +297,7 @@ # Supported sites - **CNNIndonesia** - **ComedyCentral** - **ComedyCentralTV** - - **ConanClassic** + - **ConanClassic**: (**Currently broken**) - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED - **CONtv** - **CookingChannel** @@ -317,7 +319,7 @@ # Supported sites - **CtsNews**: 華視新聞 - **CTV** - **CTVNews** - - **cu.ntv.co.jp**: Nippon Television Network + - **cu.ntv.co.jp**: 日テレ無料TADA! - **CultureUnplugged** - **curiositystream**: [*curiositystream*](## "netrc machine") - **curiositystream:collections**: [*curiositystream*](## "netrc machine") @@ -573,9 +575,7 @@ # Supported sites - **HollywoodReporterPlaylist** - **Holodex** - **HotNewHipHop**: (**Currently broken**) - - **hotstar** - - **hotstar:playlist** - - **hotstar:season** + - **hotstar**: JioHotstar - **hotstar:series** - **hrfernsehen** - **HRTi**: [*hrti*](## "netrc machine") @@ -588,7 +588,7 @@ # Supported sites - **Hungama** - **HungamaAlbumPlaylist** - **HungamaSong** - - **huya:live**: huya.com + - **huya:live**: 虎牙直播 - **huya:video**: 虎牙视频 - **Hypem** - **Hytale** @@ -645,8 +645,6 @@ # Supported sites - **Jamendo** - **JamendoAlbum** - **JeuxVideo**: (**Currently broken**) - - **jiocinema**: [*jiocinema*](## "netrc machine") - - **jiocinema:series**: [*jiocinema*](## "netrc machine") - **jiosaavn:album** - **jiosaavn:artist** - **jiosaavn:playlist** @@ -774,6 +772,7 @@ # Supported sites - **massengeschmack.tv** - **Masters** - **MatchTV** + - **Mave** - **MBN**: mbn.co.kr (매일방송) - **MDR**: MDR.DE - **MedalTV** @@ -830,7 +829,7 @@ # Supported sites - **Mojevideo**: mojevideo.sk - **Mojvideo** - **Monstercat** - - **MonsterSirenHypergryphMusic** + - **monstersiren**: 塞壬唱片 - **Motherless** - **MotherlessGallery** - **MotherlessGroup** @@ -882,19 +881,19 @@ # Supported sites - **Naver** - **Naver:live** - **navernow** - - **nba** - - **nba:channel** - - **nba:embed** - - **nba:watch** - - **nba:​watch:collection** - - **nba:​watch:embed** + - **nba**: (**Currently broken**) + - **nba:channel**: (**Currently broken**) + - **nba:embed**: (**Currently broken**) + - **nba:watch**: (**Currently broken**) + - **nba:​watch:collection**: (**Currently broken**) + - **nba:​watch:embed**: (**Currently broken**) - **NBC** - **NBCNews** - **nbcolympics** - - **nbcolympics:stream** - - **NBCSports** - - **NBCSportsStream** - - **NBCSportsVPlayer** + - **nbcolympics:stream**: (**Currently broken**) + - **NBCSports**: (**Currently broken**) + - **NBCSportsStream**: (**Currently broken**) + - **NBCSportsVPlayer**: (**Currently broken**) - **NBCStations** - **ndr**: NDR.de - Norddeutscher Rundfunk - **ndr:embed** @@ -970,7 +969,7 @@ # Supported sites - **Nitter** - **njoy**: N-JOY - **njoy:embed** - - **NobelPrize**: (**Currently broken**) + - **NobelPrize** - **NoicePodcast** - **NonkTube** - **NoodleMagazine** @@ -1296,6 +1295,7 @@ # Supported sites - **SampleFocus** - **Sangiin**: 参議院インターネット審議中継 (archive) - **Sapo**: SAPO Vídeos + - **SaucePlus**: Sauce+ - **SBS**: sbs.com.au - **sbs.co.kr** - **sbs.co.kr:allvod_program** @@ -1393,14 +1393,14 @@ # Supported sites - **SpreakerShow** - **SpringboardPlatform** - **SproutVideo** - - **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**) + - **sr:mediathek**: Saarländischer Rundfunk - **SRGSSR** - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - **StacommuLive**: [*stacommu*](## "netrc machine") - **StacommuVOD**: [*stacommu*](## "netrc machine") - **StagePlusVODConcert**: [*stageplus*](## "netrc machine") - **stanfordoc**: Stanford Open ClassRoom - - **StarTrek**: (**Currently broken**) + - **startrek**: STAR TREK - **startv** - **Steam** - **SteamCommunityBroadcast** @@ -1423,12 +1423,11 @@ # Supported sites - **SunPorno** - **sverigesradio:episode** - **sverigesradio:publication** - - **SVT** - - **SVTPage** - - **SVTPlay**: SVT Play and Öppet arkiv - - **SVTSeries** + - **svt:page** + - **svt:play**: SVT Play and Öppet arkiv + - **svt:​play:series** - **SwearnetEpisode** - - **Syfy**: (**Currently broken**) + - **Syfy** - **SYVDK** - **SztvHu** - **t-online.de**: (**Currently broken**) @@ -1472,8 +1471,6 @@ # Supported sites - **Telewebion**: (**Currently broken**) - **Tempo** - **TennisTV**: [*tennistv*](## "netrc machine") - - **TenPlay**: [*10play*](## "netrc machine") - - **TenPlaySeason** - **TF1** - **TFO** - **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine") @@ -1511,6 +1508,7 @@ # Supported sites - **tokfm:podcast** - **ToonGoggles** - **tou.tv**: [*toutv*](## "netrc machine") + - **toutiao**: 今日头条 - **Toypics**: Toypics video (**Currently broken**) - **ToypicsUser**: Toypics user profile (**Currently broken**) - **TrailerAddict**: (**Currently broken**) @@ -1600,7 +1598,7 @@ # Supported sites - **UKTVPlay** - **UlizaPlayer** - **UlizaPortal**: ulizaportal.jp - - **umg:de**: Universal Music Deutschland (**Currently broken**) + - **umg:de**: Universal Music Deutschland - **Unistra** - **Unity**: (**Currently broken**) - **uol.com.br** @@ -1623,9 +1621,9 @@ # Supported sites - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **vh1.com** - **vhx:embed**: [*vimeo*](## "netrc machine") - - **vice** - - **vice:article** - - **vice:show** + - **vice**: (**Currently broken**) + - **vice:article**: (**Currently broken**) + - **vice:show**: (**Currently broken**) - **Viddler** - **Videa** - **video.arnes.si**: Arnes Video diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index c6ff6209a..40dd05e13 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -36,6 +36,18 @@ def do_GET(self): self.send_header('Content-Type', 'text/html; charset=utf-8') self.end_headers() self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) + elif self.path == '/fake.m3u8': + self.send_response(200) + self.send_header('Content-Length', '1024') + self.end_headers() + self.wfile.write(1024 * b'\x00') + elif self.path == '/bipbop.m3u8': + with open('test/testdata/m3u8/bipbop_16x9.m3u8', 'rb') as f: + data = f.read() + self.send_response(200) + self.send_header('Content-Length', str(len(data))) + self.end_headers() + self.wfile.write(data) else: assert False @@ -314,6 +326,20 @@ def test_search_json_ld_realworld(self): }, {}, ), + ( + # test thumbnail_url key without URL scheme + r''' +''', + { + 'thumbnails': [{'url': 'https://www.nobelprize.org/images/12693-landscape-medium-gallery.jpg'}], + }, + {}, + ), ] for html, expected_dict, search_json_ld_kwargs in _TESTS: expect_dict( @@ -1933,6 +1959,208 @@ def test_search_nextjs_data(self): with self.assertWarns(DeprecationWarning): self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {}) + def test_search_nextjs_v13_data(self): + HTML = R''' + + + + + + + + ''' + EXPECTED = { + '18': { + 'foo': 'bar', + }, + '16': { + 'meta': { + 'dateCreated': 1730489700, + 'uuid': '40cac41d-8d29-4ef5-aa11-75047b9f0907', + }, + }, + '19': { + 'duplicated_field_name': {'x': 1}, + }, + '20': { + 'duplicated_field_name': {'y': 2}, + }, + } + self.assertEqual(self.ie._search_nextjs_v13_data(HTML, None), EXPECTED) + self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), {}) + self.assertEqual(self.ie._search_nextjs_v13_data(None, None, fatal=False), {}) + + def test_search_nuxt_json(self): + HTML_TMPL = '' + VALID_DATA = ''' + ["ShallowReactive",1], + {"data":2,"state":21,"once":25,"_errors":28,"_server_errors":30}, + ["ShallowReactive",3], + {"$abcdef123456":4}, + {"podcast":5,"activeEpisodeData":7}, + {"podcast":6,"seasons":14}, + {"title":10,"id":11}, + ["Reactive",8], + {"episode":9,"creators":18,"empty_list":20}, + {"title":12,"id":13,"refs":34,"empty_refs":35}, + "Series Title", + "podcast-id-01", + "Episode Title", + "episode-id-99", + [15,16,17], + 1, + 2, + 3, + [19], + "Podcast Creator", + [], + {"$ssite-config":22}, + {"env":23,"name":24,"map":26,"numbers":14}, + "production", + "podcast-website", + ["Set"], + ["Reactive",27], + ["Map"], + ["ShallowReactive",29], + {}, + ["NuxtError",31], + {"status":32,"message":33}, + 503, + "Service Unavailable", + [36,37], + [38,39], + ["Ref",40], + ["ShallowRef",41], + ["EmptyRef",42], + ["EmptyShallowRef",43], + "ref", + "shallow_ref", + "{\\"ref\\":1}", + "{\\"shallow_ref\\":2}" + ''' + PAYLOAD = { + 'data': { + '$abcdef123456': { + 'podcast': { + 'podcast': { + 'title': 'Series Title', + 'id': 'podcast-id-01', + }, + 'seasons': [1, 2, 3], + }, + 'activeEpisodeData': { + 'episode': { + 'title': 'Episode Title', + 'id': 'episode-id-99', + 'refs': ['ref', 'shallow_ref'], + 'empty_refs': [{'ref': 1}, {'shallow_ref': 2}], + }, + 'creators': ['Podcast Creator'], + 'empty_list': [], + }, + }, + }, + 'state': { + '$ssite-config': { + 'env': 'production', + 'name': 'podcast-website', + 'map': [], + 'numbers': [1, 2, 3], + }, + }, + 'once': [], + '_errors': {}, + '_server_errors': { + 'status': 503, + 'message': 'Service Unavailable', + }, + } + PARTIALLY_INVALID = [( + ''' + {"data":1}, + {"invalid_raw_list":2}, + [15,16,17] + ''', + {'data': {'invalid_raw_list': [None, None, None]}}, + ), ( + ''' + {"data":1}, + ["EmptyRef",2], + "not valid JSON" + ''', + {'data': None}, + ), ( + ''' + {"data":1}, + ["EmptyShallowRef",2], + "not valid JSON" + ''', + {'data': None}, + )] + INVALID = [ + ''' + [] + ''', + ''' + ["unsupported",1], + {"data":2}, + {} + ''', + ] + DEFAULT = object() + + self.assertEqual(self.ie._search_nuxt_json(HTML_TMPL.format(VALID_DATA), None), PAYLOAD) + self.assertEqual(self.ie._search_nuxt_json('', None, fatal=False), {}) + self.assertIs(self.ie._search_nuxt_json('', None, default=DEFAULT), DEFAULT) + + for data, expected in PARTIALLY_INVALID: + self.assertEqual( + self.ie._search_nuxt_json(HTML_TMPL.format(data), None, fatal=False), expected) + + for data in INVALID: + self.assertIs( + self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT) + + +class TestInfoExtractorNetwork(unittest.TestCase): + def setUp(self, /): + self.httpd = http.server.HTTPServer( + ('127.0.0.1', 0), InfoExtractorTestRequestHandler) + self.port = http_server_port(self.httpd) + + self.server_thread = threading.Thread(target=self.httpd.serve_forever) + self.server_thread.daemon = True + self.server_thread.start() + + self.called = False + + def require_warning(*args, **kwargs): + self.called = True + + self.ydl = FakeYDL() + self.ydl.report_warning = require_warning + self.ie = DummyIE(self.ydl) + + def tearDown(self, /): + self.ydl.close() + self.httpd.shutdown() + self.httpd.server_close() + self.server_thread.join(1) + + def test_extract_m3u8_formats(self): + formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles( + f'http://127.0.0.1:{self.port}/bipbop.m3u8', None, fatal=False) + self.assertFalse(self.called) + self.assertTrue(formats) + self.assertTrue(subtitles) + + def test_extract_m3u8_formats_warning(self): + formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles( + f'http://127.0.0.1:{self.port}/fake.m3u8', None, fatal=False) + self.assertTrue(self.called, 'Warning was not issued for binary m3u8 file') + self.assertFalse(formats) + self.assertFalse(subtitles) + if __name__ == '__main__': unittest.main() diff --git a/test/test_devalue.py b/test/test_devalue.py new file mode 100644 index 000000000..29eb89e87 --- /dev/null +++ b/test/test_devalue.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import datetime as dt +import json +import math +import re +import unittest + +from yt_dlp.utils.jslib import devalue + + +TEST_CASES_EQUALS = [{ + 'name': 'int', + 'unparsed': [-42], + 'parsed': -42, +}, { + 'name': 'str', + 'unparsed': ['woo!!!'], + 'parsed': 'woo!!!', +}, { + 'name': 'Number', + 'unparsed': [['Object', 42]], + 'parsed': 42, +}, { + 'name': 'String', + 'unparsed': [['Object', 'yar']], + 'parsed': 'yar', +}, { + 'name': 'Infinity', + 'unparsed': -4, + 'parsed': math.inf, +}, { + 'name': 'negative Infinity', + 'unparsed': -5, + 'parsed': -math.inf, +}, { + 'name': 'negative zero', + 'unparsed': -6, + 'parsed': -0.0, +}, { + 'name': 'RegExp', + 'unparsed': [['RegExp', 'regexp', 'gim']], # XXX: flags are ignored + 'parsed': re.compile('regexp'), +}, { + 'name': 'Date', + 'unparsed': [['Date', '2001-09-09T01:46:40.000Z']], + 'parsed': dt.datetime.fromtimestamp(1e9, tz=dt.timezone.utc), +}, { + 'name': 'Array', + 'unparsed': [[1, 2, 3], 'a', 'b', 'c'], + 'parsed': ['a', 'b', 'c'], +}, { + 'name': 'Array (empty)', + 'unparsed': [[]], + 'parsed': [], +}, { + 'name': 'Array (sparse)', + 'unparsed': [[-2, 1, -2], 'b'], + 'parsed': [None, 'b', None], +}, { + 'name': 'Object', + 'unparsed': [{'foo': 1, 'x-y': 2}, 'bar', 'z'], + 'parsed': {'foo': 'bar', 'x-y': 'z'}, +}, { + 'name': 'Set', + 'unparsed': [['Set', 1, 2, 3], 1, 2, 3], + 'parsed': [1, 2, 3], +}, { + 'name': 'Map', + 'unparsed': [['Map', 1, 2], 'a', 'b'], + 'parsed': [['a', 'b']], +}, { + 'name': 'BigInt', + 'unparsed': [['BigInt', '1']], + 'parsed': 1, +}, { + 'name': 'Uint8Array', + 'unparsed': [['Uint8Array', 'AQID']], + 'parsed': [1, 2, 3], +}, { + 'name': 'ArrayBuffer', + 'unparsed': [['ArrayBuffer', 'AQID']], + 'parsed': [1, 2, 3], +}, { + 'name': 'str (repetition)', + 'unparsed': [[1, 1], 'a string'], + 'parsed': ['a string', 'a string'], +}, { + 'name': 'None (repetition)', + 'unparsed': [[1, 1], None], + 'parsed': [None, None], +}, { + 'name': 'dict (repetition)', + 'unparsed': [[1, 1], {}], + 'parsed': [{}, {}], +}, { + 'name': 'Object without prototype', + 'unparsed': [['null']], + 'parsed': {}, +}, { + 'name': 'cross-realm POJO', + 'unparsed': [{}], + 'parsed': {}, +}] + +TEST_CASES_IS = [{ + 'name': 'bool', + 'unparsed': [True], + 'parsed': True, +}, { + 'name': 'Boolean', + 'unparsed': [['Object', False]], + 'parsed': False, +}, { + 'name': 'undefined', + 'unparsed': -1, + 'parsed': None, +}, { + 'name': 'null', + 'unparsed': [None], + 'parsed': None, +}, { + 'name': 'NaN', + 'unparsed': -3, + 'parsed': math.nan, +}] + +TEST_CASES_INVALID = [{ + 'name': 'empty string', + 'unparsed': '', + 'error': ValueError, + 'pattern': r'expected int or list as input', +}, { + 'name': 'hole', + 'unparsed': -2, + 'error': ValueError, + 'pattern': r'invalid integer input', +}, { + 'name': 'string', + 'unparsed': 'hello', + 'error': ValueError, + 'pattern': r'expected int or list as input', +}, { + 'name': 'number', + 'unparsed': 42, + 'error': ValueError, + 'pattern': r'invalid integer input', +}, { + 'name': 'boolean', + 'unparsed': True, + 'error': ValueError, + 'pattern': r'expected int or list as input', +}, { + 'name': 'null', + 'unparsed': None, + 'error': ValueError, + 'pattern': r'expected int or list as input', +}, { + 'name': 'object', + 'unparsed': {}, + 'error': ValueError, + 'pattern': r'expected int or list as input', +}, { + 'name': 'empty array', + 'unparsed': [], + 'error': ValueError, + 'pattern': r'expected a non-empty list as input', +}, { + 'name': 'Python negative indexing', + 'unparsed': [[1, 2, 3, 4, 5, 6, 7, -7], 1, 2, 3, 4, 5, 6, 7], + 'error': IndexError, + 'pattern': r'invalid index: -7', +}] + + +class TestDevalue(unittest.TestCase): + def test_devalue_parse_equals(self): + for tc in TEST_CASES_EQUALS: + self.assertEqual(devalue.parse(tc['unparsed']), tc['parsed'], tc['name']) + + def test_devalue_parse_is(self): + for tc in TEST_CASES_IS: + self.assertIs(devalue.parse(tc['unparsed']), tc['parsed'], tc['name']) + + def test_devalue_parse_invalid(self): + for tc in TEST_CASES_INVALID: + with self.assertRaisesRegex(tc['error'], tc['pattern'], msg=tc['name']): + devalue.parse(tc['unparsed']) + + def test_devalue_parse_cyclical(self): + name = 'Map (cyclical)' + result = devalue.parse([['Map', 1, 0], 'self']) + self.assertEqual(result[0][0], 'self', name) + self.assertIs(result, result[0][1], name) + + name = 'Set (cyclical)' + result = devalue.parse([['Set', 0, 1], 42]) + self.assertEqual(result[1], 42, name) + self.assertIs(result, result[0], name) + + result = devalue.parse([[0]]) + self.assertIs(result, result[0], 'Array (cyclical)') + + name = 'Object (cyclical)' + result = devalue.parse([{'self': 0}]) + self.assertIs(result, result['self'], name) + + name = 'Object with null prototype (cyclical)' + result = devalue.parse([['null', 'self', 0]]) + self.assertIs(result, result['self'], name) + + name = 'Objects (cyclical)' + result = devalue.parse([[1, 2], {'second': 2}, {'first': 1}]) + self.assertIs(result[0], result[1]['first'], name) + self.assertIs(result[1], result[0]['second'], name) + + def test_devalue_parse_revivers(self): + self.assertEqual( + devalue.parse([['indirect', 1], {'a': 2}, 'b'], revivers={'indirect': lambda x: x}), + {'a': 'b'}, 'revivers (indirect)') + + self.assertEqual( + devalue.parse([['parse', 1], '{"a":0}'], revivers={'parse': lambda x: json.loads(x)}), + {'a': 0}, 'revivers (parse)') + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_download.py b/test/test_download.py index 3f36869d9..c7842735c 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -14,6 +14,7 @@ from test.helper import ( assertGreaterEqual, + assertLessEqual, expect_info_dict, expect_warnings, get_params, @@ -121,10 +122,13 @@ def print_skipping(reason): params = get_params(test_case.get('params', {})) params['outtmpl'] = tname + '_' + params['outtmpl'] if is_playlist and 'playlist' not in test_case: - params.setdefault('extract_flat', 'in_playlist') - params.setdefault('playlistend', test_case.get( - 'playlist_mincount', test_case.get('playlist_count', -2) + 1)) + params.setdefault('playlistend', max( + test_case.get('playlist_mincount', -1), + test_case.get('playlist_count', -2) + 1, + test_case.get('playlist_maxcount', -2) + 1)) params.setdefault('skip_download', True) + if 'playlist_duration_sum' not in test_case: + params.setdefault('extract_flat', 'in_playlist') ydl = YoutubeDL(params, auto_init=False) ydl.add_default_info_extractors() @@ -159,6 +163,7 @@ def try_rm_tcs_files(tcs=None): try_rm(os.path.splitext(tc_filename)[0] + '.info.json') try_rm_tcs_files() try: + test_url = test_case['url'] try_num = 1 while True: try: @@ -166,7 +171,7 @@ def try_rm_tcs_files(tcs=None): # for outside error handling, and returns the exit code # instead of the result dict. res_dict = ydl.extract_info( - test_case['url'], + test_url, force_generic_extractor=params.get('force_generic_extractor', False)) except (DownloadError, ExtractorError) as err: # Check if the exception is not a network related one @@ -194,23 +199,23 @@ def try_rm_tcs_files(tcs=None): self.assertTrue('entries' in res_dict) expect_info_dict(self, res_dict, test_case.get('info_dict', {})) + num_entries = len(res_dict.get('entries', [])) if 'playlist_mincount' in test_case: + mincount = test_case['playlist_mincount'] assertGreaterEqual( - self, - len(res_dict['entries']), - test_case['playlist_mincount'], - 'Expected at least %d in playlist %s, but got only %d' % ( - test_case['playlist_mincount'], test_case['url'], - len(res_dict['entries']))) + self, num_entries, mincount, + f'Expected at least {mincount} entries in playlist {test_url}, but got only {num_entries}') if 'playlist_count' in test_case: + count = test_case['playlist_count'] + got = num_entries if num_entries <= count else 'more' self.assertEqual( - len(res_dict['entries']), - test_case['playlist_count'], - 'Expected %d entries in playlist %s, but got %d.' % ( - test_case['playlist_count'], - test_case['url'], - len(res_dict['entries']), - )) + num_entries, count, + f'Expected exactly {count} entries in playlist {test_url}, but got {got}') + if 'playlist_maxcount' in test_case: + maxcount = test_case['playlist_maxcount'] + assertLessEqual( + self, num_entries, maxcount, + f'Expected at most {maxcount} entries in playlist {test_url}, but got more') if 'playlist_duration_sum' in test_case: got_duration = sum(e['duration'] for e in res_dict['entries']) self.assertEqual( diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 2e3cdc2a5..43b1d0fde 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -478,6 +478,10 @@ def test_extract_function_with_global_stack(self): func = jsi.extract_function('c', {'e': 10}, {'f': 100, 'g': 1000}) self.assertEqual(func([1]), 1111) + def test_extract_object(self): + jsi = JSInterpreter('var a={};a.xy={};var xy;var zxy={};xy={z:function(){return "abc"}};') + self.assertTrue('z' in jsi.extract_object('xy', None)) + def test_increment_decrement(self): self._test('function f() { var x = 1; return ++x; }', 2) self._test('function f() { var x = 1; return x++; }', 1) @@ -486,6 +490,57 @@ def test_increment_decrement(self): self._test('function f() { var a = "test--"; return a; }', 'test--') self._test('function f() { var b = 1; var a = "b--"; return a; }', 'b--') + def test_nested_function_scoping(self): + self._test(R''' + function f() { + var g = function() { + var P = 2; + return P; + }; + var P = 1; + g(); + return P; + } + ''', 1) + self._test(R''' + function f() { + var x = function() { + for (var w = 1, M = []; w < 2; w++) switch (w) { + case 1: + M.push("a"); + case 2: + M.push("b"); + } + return M + }; + var w = "c"; + var M = "d"; + var y = x(); + y.push(w); + y.push(M); + return y; + } + ''', ['a', 'b', 'c', 'd']) + self._test(R''' + function f() { + var P, Q; + var z = 100; + var g = function() { + var P, Q; P = 2; Q = 15; + z = 0; + return P+Q; + }; + P = 1; Q = 10; + var x = g(), y = 3; + return P+Q+x+y+z; + } + ''', 31) + + def test_undefined_varnames(self): + jsi = JSInterpreter('function f(){ var a; return [a, b]; }') + self._test(jsi, [JS_Undefined, JS_Undefined]) + self.assertEqual(jsi._undefined_varnames, {'b'}) + if __name__ == '__main__': unittest.main() diff --git a/test/test_networking.py b/test/test_networking.py index 2f441fced..afdd0c7aa 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -22,7 +22,6 @@ import tempfile import threading import time -import urllib.error import urllib.request import warnings import zlib @@ -223,10 +222,7 @@ def do_GET(self): if encoding == 'br' and brotli: payload = brotli.compress(payload) elif encoding == 'gzip': - buf = io.BytesIO() - with gzip.GzipFile(fileobj=buf, mode='wb') as f: - f.write(payload) - payload = buf.getvalue() + payload = gzip.compress(payload, mtime=0) elif encoding == 'deflate': payload = zlib.compress(payload) elif encoding == 'unsupported': @@ -729,6 +725,17 @@ def test_keep_header_casing(self, handler): assert 'X-test-heaDer: test' in res + def test_partial_read_then_full_read(self, handler): + with handler() as rh: + for encoding in ('', 'gzip', 'deflate'): + res = validate_and_send(rh, Request( + f'http://127.0.0.1:{self.http_port}/content-encoding', + headers={'ytdl-encoding': encoding})) + assert res.headers.get('Content-Encoding') == encoding + assert res.read(6) == b'' + assert res.read(0) == b'' + assert res.read() == b'