mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-27 17:08:32 +00:00
Merge branch 'master' of https://github.com/yt-dlp/yt-dlp into fix/ie/EuroParlWebstream
This commit is contained in:
commit
2d64537b60
6
.github/workflows/build.yml
vendored
6
.github/workflows/build.yml
vendored
@ -192,7 +192,7 @@ jobs:
|
||||
with:
|
||||
path: ./repo
|
||||
- name: Virtualized Install, Prepare & Build
|
||||
uses: yt-dlp/run-on-arch-action@v2
|
||||
uses: yt-dlp/run-on-arch-action@v3
|
||||
with:
|
||||
# Ref: https://github.com/uraimo/run-on-arch-action/issues/55
|
||||
env: |
|
||||
@ -411,7 +411,7 @@ jobs:
|
||||
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||
python devscripts/install_deps.py -o --include build
|
||||
python devscripts/install_deps.py --include curl-cffi
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.11.1-py3-none-any.whl"
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.13.0-py3-none-any.whl"
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
@ -460,7 +460,7 @@ jobs:
|
||||
run: |
|
||||
python devscripts/install_deps.py -o --include build
|
||||
python devscripts/install_deps.py
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.11.1-py3-none-any.whl"
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.13.0-py3-none-any.whl"
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
|
4
.github/workflows/core.yml
vendored
4
.github/workflows/core.yml
vendored
@ -6,7 +6,7 @@ on:
|
||||
- devscripts/**
|
||||
- test/**
|
||||
- yt_dlp/**.py
|
||||
- '!yt_dlp/extractor/*.py'
|
||||
- '!yt_dlp/extractor/**.py'
|
||||
- yt_dlp/extractor/__init__.py
|
||||
- yt_dlp/extractor/common.py
|
||||
- yt_dlp/extractor/extractors.py
|
||||
@ -16,7 +16,7 @@ on:
|
||||
- devscripts/**
|
||||
- test/**
|
||||
- yt_dlp/**.py
|
||||
- '!yt_dlp/extractor/*.py'
|
||||
- '!yt_dlp/extractor/**.py'
|
||||
- yt_dlp/extractor/__init__.py
|
||||
- yt_dlp/extractor/common.py
|
||||
- yt_dlp/extractor/extractors.py
|
||||
|
2
.github/workflows/quick-test.yml
vendored
2
.github/workflows/quick-test.yml
vendored
@ -38,3 +38,5 @@ jobs:
|
||||
run: ruff check --output-format github .
|
||||
- name: Run autopep8
|
||||
run: autopep8 --diff .
|
||||
- name: Check file mode
|
||||
run: git ls-files --format="%(objectmode) %(path)" yt_dlp/ | ( ! grep -v "^100644" )
|
||||
|
15
CONTRIBUTORS
15
CONTRIBUTORS
@ -760,3 +760,18 @@ vallovic
|
||||
arabcoders
|
||||
mireq
|
||||
mlabeeb03
|
||||
1271
|
||||
CasperMcFadden95
|
||||
Kicer86
|
||||
Kiritomo
|
||||
leeblackc
|
||||
meGAmeS1
|
||||
NeonMan
|
||||
pj47x
|
||||
troex
|
||||
WouterGordts
|
||||
baierjan
|
||||
GeoffreyFrogeye
|
||||
Pawka
|
||||
v3DJG6GL
|
||||
yozel
|
||||
|
125
Changelog.md
125
Changelog.md
@ -4,6 +4,131 @@ # Changelog
|
||||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||
-->
|
||||
|
||||
### 2025.05.22
|
||||
|
||||
#### Core changes
|
||||
- **cookies**: [Fix Linux desktop environment detection](https://github.com/yt-dlp/yt-dlp/commit/e491fd4d090db3af52a82863fb0553dd5e17fb85) ([#13197](https://github.com/yt-dlp/yt-dlp/issues/13197)) by [mbway](https://github.com/mbway)
|
||||
- **jsinterp**: [Fix increment/decrement evaluation](https://github.com/yt-dlp/yt-dlp/commit/167d7a9f0ffd1b4fe600193441bdb7358db2740b) ([#13238](https://github.com/yt-dlp/yt-dlp/issues/13238)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||
|
||||
#### Extractor changes
|
||||
- **1tv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/41c0a1fb89628696f8bb88e2b9f3a68f355b8c26) ([#13168](https://github.com/yt-dlp/yt-dlp/issues/13168)) by [bashonly](https://github.com/bashonly)
|
||||
- **amcnetworks**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/464c84fedf78eef822a431361155f108b5df96d7) ([#13147](https://github.com/yt-dlp/yt-dlp/issues/13147)) by [bashonly](https://github.com/bashonly)
|
||||
- **bitchute**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1d0f6539c47e5d5c68c3c47cdb7075339e2885ac) ([#13081](https://github.com/yt-dlp/yt-dlp/issues/13081)) by [bashonly](https://github.com/bashonly)
|
||||
- **cartoonnetwork**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/7dbb47f84f0ee1266a3a01f58c9bc4c76d76794a) ([#13148](https://github.com/yt-dlp/yt-dlp/issues/13148)) by [bashonly](https://github.com/bashonly)
|
||||
- **iprima**: [Fix login support](https://github.com/yt-dlp/yt-dlp/commit/a7d9a5eb79ceeecb851389f3f2c88597871ca3f2) ([#12937](https://github.com/yt-dlp/yt-dlp/issues/12937)) by [baierjan](https://github.com/baierjan)
|
||||
- **jiosaavn**
|
||||
- artist: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/586b557b124f954d3f625360ebe970989022ad97) ([#12803](https://github.com/yt-dlp/yt-dlp/issues/12803)) by [subrat-lima](https://github.com/subrat-lima)
|
||||
- playlist, show: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/317f4b8006c2c0f0f64f095b1485163ad97c9053) ([#12803](https://github.com/yt-dlp/yt-dlp/issues/12803)) by [subrat-lima](https://github.com/subrat-lima)
|
||||
- show: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6839276496d8814cf16f58b637e45663467928e6) ([#12803](https://github.com/yt-dlp/yt-dlp/issues/12803)) by [subrat-lima](https://github.com/subrat-lima)
|
||||
- **lrtradio**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/abf58dcd6a09e14eec4ea82ae12f79a0337cb383) ([#13200](https://github.com/yt-dlp/yt-dlp/issues/13200)) by [Pawka](https://github.com/Pawka)
|
||||
- **nebula**: [Support `--mark-watched`](https://github.com/yt-dlp/yt-dlp/commit/20f288bdc2173c7cc58d709d25ca193c1f6001e7) ([#13120](https://github.com/yt-dlp/yt-dlp/issues/13120)) by [GeoffreyFrogeye](https://github.com/GeoffreyFrogeye)
|
||||
- **niconico**
|
||||
- [Fix error handling](https://github.com/yt-dlp/yt-dlp/commit/f569be4602c2a857087e495d5d7ed6060cd97abe) ([#13236](https://github.com/yt-dlp/yt-dlp/issues/13236)) by [bashonly](https://github.com/bashonly)
|
||||
- live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7a7b85c9014d96421e18aa7ea5f4c1bee5ceece0) ([#13045](https://github.com/yt-dlp/yt-dlp/issues/13045)) by [doe1080](https://github.com/doe1080)
|
||||
- **nytimesarticle**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/b26bc32579c00ef579d75a835807ccc87d20ee0a) ([#13104](https://github.com/yt-dlp/yt-dlp/issues/13104)) by [bashonly](https://github.com/bashonly)
|
||||
- **once**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/f475e8b529d18efdad603ffda02a56e707fe0e2c) ([#13164](https://github.com/yt-dlp/yt-dlp/issues/13164)) by [bashonly](https://github.com/bashonly)
|
||||
- **picarto**: vod: [Support `/profile/` video URLs](https://github.com/yt-dlp/yt-dlp/commit/31e090cb787f3504ec25485adff9a2a51d056734) ([#13227](https://github.com/yt-dlp/yt-dlp/issues/13227)) by [subrat-lima](https://github.com/subrat-lima)
|
||||
- **playsuisse**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/d880e060803ae8ed5a047e578cca01e1f0e630ce) ([#12466](https://github.com/yt-dlp/yt-dlp/issues/12466)) by [v3DJG6GL](https://github.com/v3DJG6GL)
|
||||
- **sprout**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/cbcfe6378dde33a650e3852ab17ad4503b8e008d) ([#13149](https://github.com/yt-dlp/yt-dlp/issues/13149)) by [bashonly](https://github.com/bashonly)
|
||||
- **svtpage**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/ea8498ed534642dd7e925961b97b934987142fd3) ([#12957](https://github.com/yt-dlp/yt-dlp/issues/12957)) by [diman8](https://github.com/diman8)
|
||||
- **twitch**: [Support `--live-from-start`](https://github.com/yt-dlp/yt-dlp/commit/00b1bec55249cf2ad6271d36492c51b34b6459d1) ([#13202](https://github.com/yt-dlp/yt-dlp/issues/13202)) by [bashonly](https://github.com/bashonly)
|
||||
- **vimeo**: event: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/545c1a5b6f2fe88722b41aef0e7485bf3be3f3f9) ([#13216](https://github.com/yt-dlp/yt-dlp/issues/13216)) by [bashonly](https://github.com/bashonly)
|
||||
- **wat.tv**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/f123cc83b3aea45053f5fa1d9141048b01fc2774) ([#13111](https://github.com/yt-dlp/yt-dlp/issues/13111)) by [bashonly](https://github.com/bashonly)
|
||||
- **weverse**: [Fix live extraction](https://github.com/yt-dlp/yt-dlp/commit/5328eda8820cc5f21dcf917684d23fbdca41831d) ([#13084](https://github.com/yt-dlp/yt-dlp/issues/13084)) by [bashonly](https://github.com/bashonly)
|
||||
- **xinpianchang**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/83fabf352489d52843f67e6e9cc752db86d27e6e) ([#13245](https://github.com/yt-dlp/yt-dlp/issues/13245)) by [garret1317](https://github.com/garret1317)
|
||||
- **youtube**
|
||||
- [Add PO token support for subtitles](https://github.com/yt-dlp/yt-dlp/commit/32ed5f107c6c641958d1cd2752e130de4db55a13) ([#13234](https://github.com/yt-dlp/yt-dlp/issues/13234)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Add `web_embedded` client for age-restricted videos](https://github.com/yt-dlp/yt-dlp/commit/0feec6dc131f488428bf881519e7c69766fbb9ae) ([#13089](https://github.com/yt-dlp/yt-dlp/issues/13089)) by [bashonly](https://github.com/bashonly)
|
||||
- [Add a PO Token Provider Framework](https://github.com/yt-dlp/yt-dlp/commit/2685654a37141cca63eda3a92da0e2706e23ccfd) ([#12840](https://github.com/yt-dlp/yt-dlp/issues/12840)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Extract `media_type` for all videos](https://github.com/yt-dlp/yt-dlp/commit/ded11ebc9afba6ba33923375103e9be2d7c804e7) ([#13136](https://github.com/yt-dlp/yt-dlp/issues/13136)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix `--live-from-start` support for premieres](https://github.com/yt-dlp/yt-dlp/commit/8f303afb43395be360cafd7ad4ce2b6e2eedfb8a) ([#13079](https://github.com/yt-dlp/yt-dlp/issues/13079)) by [arabcoders](https://github.com/arabcoders)
|
||||
- [Fix geo-restriction error handling](https://github.com/yt-dlp/yt-dlp/commit/c7e575e31608c19c5b26c10a4229db89db5fc9a8) ([#13217](https://github.com/yt-dlp/yt-dlp/issues/13217)) by [yozel](https://github.com/yozel)
|
||||
|
||||
#### Misc. changes
|
||||
- **build**
|
||||
- [Bump PyInstaller to v6.13.0](https://github.com/yt-dlp/yt-dlp/commit/17cf9088d0d535e4a7feffbf02bd49cd9dae5ab9) ([#13082](https://github.com/yt-dlp/yt-dlp/issues/13082)) by [bashonly](https://github.com/bashonly)
|
||||
- [Bump run-on-arch-action to v3](https://github.com/yt-dlp/yt-dlp/commit/9064d2482d1fe722bbb4a49731fe0711c410d1c8) ([#13088](https://github.com/yt-dlp/yt-dlp/issues/13088)) by [bashonly](https://github.com/bashonly)
|
||||
- **cleanup**: Miscellaneous: [7977b32](https://github.com/yt-dlp/yt-dlp/commit/7977b329ed97b216e37bd402f4935f28c00eac9e) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
### 2025.04.30
|
||||
|
||||
#### Important changes
|
||||
- **New option `--preset-alias`/`-t` has been added**
|
||||
This provides convenient predefined aliases for common use cases. Available presets include `mp4`, `mp3`, `mkv`, `aac`, and `sleep`. See [the README](https://github.com/yt-dlp/yt-dlp/blob/master/README.md#preset-aliases) for more details.
|
||||
|
||||
#### Core changes
|
||||
- [Add `--preset-alias` option](https://github.com/yt-dlp/yt-dlp/commit/88eb1e7a9a2720ac89d653c0d0e40292388823bb) ([#12839](https://github.com/yt-dlp/yt-dlp/issues/12839)) by [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev)
|
||||
- **utils**
|
||||
- `_yield_json_ld`: [Make function less fatal](https://github.com/yt-dlp/yt-dlp/commit/45f01de00e1bc076b7f676a669736326178647b1) ([#12855](https://github.com/yt-dlp/yt-dlp/issues/12855)) by [seproDev](https://github.com/seproDev)
|
||||
- `url_or_none`: [Support WebSocket URLs](https://github.com/yt-dlp/yt-dlp/commit/a473e592337edb8ca40cde52c1fcaee261c54df9) ([#12848](https://github.com/yt-dlp/yt-dlp/issues/12848)) by [doe1080](https://github.com/doe1080)
|
||||
|
||||
#### Extractor changes
|
||||
- **abematv**: [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/f5736bb35bde62348caebf7b188668655e316deb) ([#12859](https://github.com/yt-dlp/yt-dlp/issues/12859)) by [Kiritomo](https://github.com/Kiritomo)
|
||||
- **atresplayer**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/839d64325356310e6de6cd9cad28fb546619ca63) ([#11424](https://github.com/yt-dlp/yt-dlp/issues/11424)) by [meGAmeS1](https://github.com/meGAmeS1), [seproDev](https://github.com/seproDev)
|
||||
- **bpb**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/80736b9c90818adee933a155079b8535bc06819f) ([#13015](https://github.com/yt-dlp/yt-dlp/issues/13015)) by [bashonly](https://github.com/bashonly)
|
||||
- **cda**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/9032f981362ea0be90626fab51ec37934feded6d) ([#12975](https://github.com/yt-dlp/yt-dlp/issues/12975)) by [bashonly](https://github.com/bashonly)
|
||||
- **cdafolder**: [Extend `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/cb271d445bc2d866c9a3404b1d8f59bcb77447df) ([#12919](https://github.com/yt-dlp/yt-dlp/issues/12919)) by [fireattack](https://github.com/fireattack), [Kicer86](https://github.com/Kicer86)
|
||||
- **crowdbunker**: [Make format extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/4ebf41309d04a6e196944f1c0f5f0154cff0055a) ([#12836](https://github.com/yt-dlp/yt-dlp/issues/12836)) by [seproDev](https://github.com/seproDev)
|
||||
- **dacast**: [Support tokenized URLs](https://github.com/yt-dlp/yt-dlp/commit/e7e3b7a55c456da4a5a812b4fefce4dce8e6a616) ([#12979](https://github.com/yt-dlp/yt-dlp/issues/12979)) by [bashonly](https://github.com/bashonly)
|
||||
- **dzen.ru**: [Rework extractors](https://github.com/yt-dlp/yt-dlp/commit/a3f2b54c2535d862de6efa9cfaa6ca9a2b2f7dd6) ([#12852](https://github.com/yt-dlp/yt-dlp/issues/12852)) by [seproDev](https://github.com/seproDev)
|
||||
- **generic**: [Fix MPD extraction for `file://` URLs](https://github.com/yt-dlp/yt-dlp/commit/34a061a295d156934417c67ee98070b94943006b) ([#12978](https://github.com/yt-dlp/yt-dlp/issues/12978)) by [bashonly](https://github.com/bashonly)
|
||||
- **getcourseru**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/741fd809bc4d301c19b53877692ae510334a6750) ([#12943](https://github.com/yt-dlp/yt-dlp/issues/12943)) by [troex](https://github.com/troex)
|
||||
- **ivoox**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/7faa18b83dcfc74a1a1e2034e6b0369c495ca645) ([#12768](https://github.com/yt-dlp/yt-dlp/issues/12768)) by [NeonMan](https://github.com/NeonMan), [seproDev](https://github.com/seproDev)
|
||||
- **kika**: [Add playlist extractor](https://github.com/yt-dlp/yt-dlp/commit/3c1c75ecb8ab352f422b59af46fff2be992e4115) ([#12832](https://github.com/yt-dlp/yt-dlp/issues/12832)) by [1100101](https://github.com/1100101)
|
||||
- **linkedin**
|
||||
- [Support feed URLs](https://github.com/yt-dlp/yt-dlp/commit/73a26f9ee68610e33c0b4407b77355f2ab7afd0e) ([#12927](https://github.com/yt-dlp/yt-dlp/issues/12927)) by [seproDev](https://github.com/seproDev)
|
||||
- events: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b37ff4de5baf4e4e70c6a0ec34e136a279ad20af) ([#12926](https://github.com/yt-dlp/yt-dlp/issues/12926)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||
- **loco**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f5a37ea40e20865b976ffeeff13eeae60292eb23) ([#12934](https://github.com/yt-dlp/yt-dlp/issues/12934)) by [seproDev](https://github.com/seproDev)
|
||||
- **lrtradio**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/74e90dd9b8f9c1a5c48a2515126654f4d398d687) ([#12801](https://github.com/yt-dlp/yt-dlp/issues/12801)) by [subrat-lima](https://github.com/subrat-lima)
|
||||
- **manyvids**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/77aa15e98f34c4ad425aabf39dd1ee37b48f772c) ([#10907](https://github.com/yt-dlp/yt-dlp/issues/10907)) by [pj47x](https://github.com/pj47x)
|
||||
- **mixcloud**: [Refactor extractor](https://github.com/yt-dlp/yt-dlp/commit/db6d1f145ad583e0220637726029f8f2fa6200a0) ([#12830](https://github.com/yt-dlp/yt-dlp/issues/12830)) by [seproDev](https://github.com/seproDev), [WouterGordts](https://github.com/WouterGordts)
|
||||
- **mlbtv**: [Fix device ID caching](https://github.com/yt-dlp/yt-dlp/commit/36da6360e130197df927ee93409519ce3f4075f5) ([#12980](https://github.com/yt-dlp/yt-dlp/issues/12980)) by [bashonly](https://github.com/bashonly)
|
||||
- **niconico**
|
||||
- [Fix login support](https://github.com/yt-dlp/yt-dlp/commit/25cd7c1ecbb6cbf21dd3a6e59608e4af94715ecc) ([#13008](https://github.com/yt-dlp/yt-dlp/issues/13008)) by [doe1080](https://github.com/doe1080)
|
||||
- [Remove DMC formats support](https://github.com/yt-dlp/yt-dlp/commit/7d05aa99c65352feae1cd9a3ff8784b64bfe382a) ([#12916](https://github.com/yt-dlp/yt-dlp/issues/12916)) by [doe1080](https://github.com/doe1080)
|
||||
- live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1d45e30537bf83e069184a440703e4c43b2e0198) ([#12809](https://github.com/yt-dlp/yt-dlp/issues/12809)) by [Snack-X](https://github.com/Snack-X)
|
||||
- **panopto**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/9d26daa04ad5108257bc5e30f7f040c7f1fe7a5a) ([#12925](https://github.com/yt-dlp/yt-dlp/issues/12925)) by [seproDev](https://github.com/seproDev)
|
||||
- **parti**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/425017531fbc3369becb5a44013e26f26efabf45) ([#12769](https://github.com/yt-dlp/yt-dlp/issues/12769)) by [benfaerber](https://github.com/benfaerber)
|
||||
- **raiplay**: [Fix DRM detection](https://github.com/yt-dlp/yt-dlp/commit/dce82346245e35a46fda836ca2089805d2347935) ([#12971](https://github.com/yt-dlp/yt-dlp/issues/12971)) by [DTrombett](https://github.com/DTrombett)
|
||||
- **reddit**: [Support `--ignore-no-formats-error`](https://github.com/yt-dlp/yt-dlp/commit/28f04e8a5e383ff531db646190b4be45554610d6) ([#12993](https://github.com/yt-dlp/yt-dlp/issues/12993)) by [bashonly](https://github.com/bashonly)
|
||||
- **royalive**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e1847535e28788414a25546a45bebcada2f34558) ([#12817](https://github.com/yt-dlp/yt-dlp/issues/12817)) by [CasperMcFadden95](https://github.com/CasperMcFadden95)
|
||||
- **rtve**: [Rework extractors](https://github.com/yt-dlp/yt-dlp/commit/f07ee91c71920ab1187a7ea756720e81aa406a9d) ([#10388](https://github.com/yt-dlp/yt-dlp/issues/10388)) by [meGAmeS1](https://github.com/meGAmeS1), [seproDev](https://github.com/seproDev)
|
||||
- **rumble**: [Improve format extraction](https://github.com/yt-dlp/yt-dlp/commit/58d0c83457b93b3c9a81eb6bc5a4c65f25e949df) ([#12838](https://github.com/yt-dlp/yt-dlp/issues/12838)) by [seproDev](https://github.com/seproDev)
|
||||
- **tokfmpodcast**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/91832111a12d87499294a0f430829b8c2254c339) ([#12842](https://github.com/yt-dlp/yt-dlp/issues/12842)) by [selfisekai](https://github.com/selfisekai)
|
||||
- **tv2dk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a3e91df30a45943f40759d2c1e0b6c2ca4b2a263) ([#12945](https://github.com/yt-dlp/yt-dlp/issues/12945)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||
- **tvp**: vod: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/4e69a626cce51428bc1d66dc606a56d9498b03a5) ([#12923](https://github.com/yt-dlp/yt-dlp/issues/12923)) by [seproDev](https://github.com/seproDev)
|
||||
- **tvw**: tvchannels: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ed8ad1b4d6b9d7a1426ff5192ff924f3371e4721) ([#12721](https://github.com/yt-dlp/yt-dlp/issues/12721)) by [fries1234](https://github.com/fries1234)
|
||||
- **twitcasting**: [Fix livestream extraction](https://github.com/yt-dlp/yt-dlp/commit/de271a06fd6d20d4f55597ff7f90e4d913de0a52) ([#12977](https://github.com/yt-dlp/yt-dlp/issues/12977)) by [bashonly](https://github.com/bashonly)
|
||||
- **twitch**: clips: [Fix uploader metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/1ae6bff564a65af41e94f1a4727892471ecdd05a) ([#13022](https://github.com/yt-dlp/yt-dlp/issues/13022)) by [1271](https://github.com/1271)
|
||||
- **twitter**
|
||||
- [Fix extraction when logged-in](https://github.com/yt-dlp/yt-dlp/commit/1cf39ddf3d10b6512daa7dd139e5f6c0dc548bbc) ([#13024](https://github.com/yt-dlp/yt-dlp/issues/13024)) by [bashonly](https://github.com/bashonly)
|
||||
- spaces: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/70599e53b736bb75922b737e6e0d4f76e419bb20) ([#12911](https://github.com/yt-dlp/yt-dlp/issues/12911)) by [doe1080](https://github.com/doe1080)
|
||||
- **vimeo**: [Extract from mobile API](https://github.com/yt-dlp/yt-dlp/commit/22ac81a0692019ac833cf282e4ef99718e9ef3fa) ([#13034](https://github.com/yt-dlp/yt-dlp/issues/13034)) by [bashonly](https://github.com/bashonly)
|
||||
- **vk**
|
||||
- [Fix chapters extraction](https://github.com/yt-dlp/yt-dlp/commit/5361a7c6e2933c919716e0cb1e3116c28c40419f) ([#12821](https://github.com/yt-dlp/yt-dlp/issues/12821)) by [seproDev](https://github.com/seproDev)
|
||||
- [Fix uploader extraction](https://github.com/yt-dlp/yt-dlp/commit/2381881fe58a723853350a6ab750a5efc9f10c85) ([#12985](https://github.com/yt-dlp/yt-dlp/issues/12985)) by [seproDev](https://github.com/seproDev)
|
||||
- **youtube**
|
||||
- [Add context to video request rate limit error](https://github.com/yt-dlp/yt-dlp/commit/26feac3dd142536ad08ad1ed731378cb88e63602) ([#12958](https://github.com/yt-dlp/yt-dlp/issues/12958)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Add extractor arg to skip "initial_data" request](https://github.com/yt-dlp/yt-dlp/commit/ed6c6d7eefbc78fa72e4e60ad6edaa3ee2acc715) ([#12865](https://github.com/yt-dlp/yt-dlp/issues/12865)) by [leeblackc](https://github.com/leeblackc)
|
||||
- [Add warning on video captcha challenge](https://github.com/yt-dlp/yt-dlp/commit/f484c51599a6cd01eb078ea7dc9bbba942967774) ([#12939](https://github.com/yt-dlp/yt-dlp/issues/12939)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Cache signature timestamps](https://github.com/yt-dlp/yt-dlp/commit/61c9a938b390b8334ee3a879fe2d93f714e30138) ([#13047](https://github.com/yt-dlp/yt-dlp/issues/13047)) by [bashonly](https://github.com/bashonly)
|
||||
- [Detect and warn when account cookies are rotated](https://github.com/yt-dlp/yt-dlp/commit/8cb08028f5be2acb9835ce1670b196b9b077052f) ([#13014](https://github.com/yt-dlp/yt-dlp/issues/13014)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Detect player JS variants for any locale](https://github.com/yt-dlp/yt-dlp/commit/c2d6659d1069f8cff97e1fd61d1c59e949e1e63d) ([#13003](https://github.com/yt-dlp/yt-dlp/issues/13003)) by [bashonly](https://github.com/bashonly)
|
||||
- [Do not strictly deprioritize `missing_pot` formats](https://github.com/yt-dlp/yt-dlp/commit/74fc2ae12c24eb6b4e02c6360c89bd05f3c8f740) ([#13061](https://github.com/yt-dlp/yt-dlp/issues/13061)) by [bashonly](https://github.com/bashonly)
|
||||
- [Improve warning for SABR-only/SSAP player responses](https://github.com/yt-dlp/yt-dlp/commit/fd8394bc50301ac5e930aa65aa71ab1b8372b8ab) ([#13049](https://github.com/yt-dlp/yt-dlp/issues/13049)) by [bashonly](https://github.com/bashonly)
|
||||
- tab: [Extract continuation from empty page](https://github.com/yt-dlp/yt-dlp/commit/72ba4879304c2082fecbb472e6cc05ee2d154a3b) ([#12938](https://github.com/yt-dlp/yt-dlp/issues/12938)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- **zdf**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/7be14109a6bd493a2e881da4f9e30adaf3e7e5d5) ([#12779](https://github.com/yt-dlp/yt-dlp/issues/12779)) by [bashonly](https://github.com/bashonly), [InvalidUsernameException](https://github.com/InvalidUsernameException)
|
||||
|
||||
#### Downloader changes
|
||||
- **niconicodmc**: [Remove downloader](https://github.com/yt-dlp/yt-dlp/commit/8d127b18f81131453eaba05d3bb810d9b73adb75) ([#12916](https://github.com/yt-dlp/yt-dlp/issues/12916)) by [doe1080](https://github.com/doe1080)
|
||||
|
||||
#### Networking changes
|
||||
- [Add PATCH request shortcut](https://github.com/yt-dlp/yt-dlp/commit/ceab4d5ed63a1f135a1816fe967c9d9a1ec7e6e8) ([#12884](https://github.com/yt-dlp/yt-dlp/issues/12884)) by [doe1080](https://github.com/doe1080)
|
||||
|
||||
#### Misc. changes
|
||||
- **ci**: [Add file mode test to code check](https://github.com/yt-dlp/yt-dlp/commit/3690e91265d1d0bbeffaf6a9b8cc9baded1367bd) ([#13036](https://github.com/yt-dlp/yt-dlp/issues/13036)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **cleanup**: Miscellaneous: [505b400](https://github.com/yt-dlp/yt-dlp/commit/505b400795af557bdcfd9d4fa7e9133b26ef431c) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||
|
||||
### 2025.03.31
|
||||
|
||||
#### Core changes
|
||||
|
62
README.md
62
README.md
@ -44,6 +44,7 @@
|
||||
* [Post-processing Options](#post-processing-options)
|
||||
* [SponsorBlock Options](#sponsorblock-options)
|
||||
* [Extractor Options](#extractor-options)
|
||||
* [Preset Aliases](#preset-aliases)
|
||||
* [CONFIGURATION](#configuration)
|
||||
* [Configuration file encoding](#configuration-file-encoding)
|
||||
* [Authentication with netrc](#authentication-with-netrc)
|
||||
@ -348,8 +349,8 @@ ## General Options:
|
||||
--no-flat-playlist Fully extract the videos of a playlist
|
||||
(default)
|
||||
--live-from-start Download livestreams from the start.
|
||||
Currently only supported for YouTube
|
||||
(Experimental)
|
||||
Currently experimental and only supported
|
||||
for YouTube and Twitch
|
||||
--no-live-from-start Download livestreams from the current time
|
||||
(default)
|
||||
--wait-for-video MIN[-MAX] Wait for scheduled streams to become
|
||||
@ -375,17 +376,23 @@ ## General Options:
|
||||
an alias starts with a dash "-", it is
|
||||
prefixed with "--". Arguments are parsed
|
||||
according to the Python string formatting
|
||||
mini-language. E.g. --alias get-audio,-X
|
||||
"-S=aext:{0},abr -x --audio-format {0}"
|
||||
creates options "--get-audio" and "-X" that
|
||||
takes an argument (ARG0) and expands to
|
||||
"-S=aext:ARG0,abr -x --audio-format ARG0".
|
||||
All defined aliases are listed in the --help
|
||||
mini-language. E.g. --alias get-audio,-X "-S
|
||||
aext:{0},abr -x --audio-format {0}" creates
|
||||
options "--get-audio" and "-X" that takes an
|
||||
argument (ARG0) and expands to "-S
|
||||
aext:ARG0,abr -x --audio-format ARG0". All
|
||||
defined aliases are listed in the --help
|
||||
output. Alias options can trigger more
|
||||
aliases; so be careful to avoid defining
|
||||
recursive options. As a safety measure, each
|
||||
alias may be triggered a maximum of 100
|
||||
times. This option can be used multiple times
|
||||
-t, --preset-alias PRESET Applies a predefined set of options. e.g.
|
||||
--preset-alias mp3. The following presets
|
||||
are available: mp3, aac, mp4, mkv, sleep.
|
||||
See the "Preset Aliases" section at the end
|
||||
for more info. This option can be used
|
||||
multiple times
|
||||
|
||||
## Network Options:
|
||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. To
|
||||
@ -1098,6 +1105,27 @@ ## Extractor Options:
|
||||
can use this option multiple times to give
|
||||
arguments for different extractors
|
||||
|
||||
## Preset Aliases:
|
||||
Predefined aliases for convenience and ease of use. Note that future
|
||||
versions of yt-dlp may add or adjust presets, but the existing preset
|
||||
names will not be changed or removed
|
||||
|
||||
-t mp3 -f 'ba[acodec^=mp3]/ba/b' -x --audio-format
|
||||
mp3
|
||||
|
||||
-t aac -f
|
||||
'ba[acodec^=aac]/ba[acodec^=mp4a.40.]/ba/b'
|
||||
-x --audio-format aac
|
||||
|
||||
-t mp4 --merge-output-format mp4 --remux-video mp4
|
||||
-S vcodec:h264,lang,quality,res,fps,hdr:12,a
|
||||
codec:aac
|
||||
|
||||
-t mkv --merge-output-format mkv --remux-video mkv
|
||||
|
||||
-t sleep --sleep-subtitles 5 --sleep-requests 0.75
|
||||
--sleep-interval 10 --max-sleep-interval 20
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure yt-dlp by placing any supported command line option in a configuration file. The configuration is loaded from the following locations:
|
||||
@ -1769,9 +1797,10 @@ # EXTRACTOR ARGUMENTS
|
||||
#### youtube
|
||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
|
||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||
* `player_js_variant`: The player javascript variant to use for signature and nsig deciphering. The known variants are: `main`, `tce`, `tv`, `tv_es6`, `phone`, `tablet`. Only `main` is recommended as a possible workaround; the others are for debugging purposes. The default is to use what is prescribed by the site, and can be selected with `actual`
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
|
||||
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
|
||||
@ -1781,8 +1810,12 @@ #### youtube
|
||||
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
|
||||
* `data_sync_id`: Overrides the account Data Sync ID used in Innertube API requests. This may be needed if you are using an account with `youtube:player_skip=webpage,configs` or `youtubetab:skip=webpage`
|
||||
* `visitor_data`: Overrides the Visitor Data used in Innertube API requests. This should be used with `player_skip=webpage,configs` and without cookies. Note: this may have adverse effects if used improperly. If a session from a browser is wanted, you should pass cookies instead (which contain the Visitor ID)
|
||||
* `po_token`: Proof of Origin (PO) Token(s) to use. Comma seperated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be either `gvs` (Google Video Server URLs) or `player` (Innertube player request)
|
||||
* `player_js_variant`: The player javascript variant to use for signature and nsig deciphering. The known variants are: `main`, `tce`, `tv`, `tv_es6`, `phone`, `tablet`. Only `main` is recommended as a possible workaround; the others are for debugging purposes. The default is to use what is prescribed by the site, and can be selected with `actual`
|
||||
* `po_token`: Proof of Origin (PO) Token(s) to use. Comma seperated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be any of `gvs` (Google Video Server URLs), `player` (Innertube player request) or `subs` (Subtitles)
|
||||
* `pot_trace`: Enable debug logging for PO Token fetching. Either `true` or `false` (default)
|
||||
* `fetch_pot`: Policy to use for fetching a PO Token from providers. One of `always` (always try fetch a PO Token regardless if the client requires one for the given context), `never` (never fetch a PO Token), or `auto` (default; only fetch a PO Token if the client requires one for the given context)
|
||||
|
||||
#### youtubepot-webpo
|
||||
* `bind_to_visitor_id`: Whether to use the Visitor ID instead of Visitor Data for caching WebPO tokens. Either `true` (default) or `false`
|
||||
|
||||
#### youtubetab (YouTube playlists, channels, feeds, etc.)
|
||||
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
|
||||
@ -1799,9 +1832,6 @@ #### generic
|
||||
#### vikichannel
|
||||
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
|
||||
|
||||
#### niconico
|
||||
* `segment_duration`: Segment duration in milliseconds for HLS-DMC formats. Use it at your own risk since this feature **may result in your account termination.**
|
||||
|
||||
#### youtubewebarchive
|
||||
* `check_all`: Try to check more at the cost of more requests. One or more of `thumbnails`, `captures`
|
||||
|
||||
@ -2153,7 +2183,7 @@ ### New features
|
||||
|
||||
* **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples))
|
||||
|
||||
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
||||
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
||||
|
||||
* **YouTube improvements**:
|
||||
* Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`)
|
||||
|
@ -245,5 +245,14 @@
|
||||
"when": "76ac023ff02f06e8c003d104f02a03deeddebdcd",
|
||||
"short": "[ie/youtube:tab] Improve shorts title extraction (#11997)",
|
||||
"authors": ["bashonly", "d3d9"]
|
||||
},
|
||||
{
|
||||
"action": "add",
|
||||
"when": "88eb1e7a9a2720ac89d653c0d0e40292388823bb",
|
||||
"short": "[priority] **New option `--preset-alias`/`-t` has been added**\nThis provides convenient predefined aliases for common use cases. Available presets include `mp4`, `mp3`, `mkv`, `aac`, and `sleep`. See [the README](https://github.com/yt-dlp/yt-dlp/blob/master/README.md#preset-aliases) for more details."
|
||||
},
|
||||
{
|
||||
"action": "remove",
|
||||
"when": "d596824c2f8428362c072518856065070616e348"
|
||||
}
|
||||
]
|
||||
|
@ -82,7 +82,7 @@ test = [
|
||||
"pytest-rerunfailures~=14.0",
|
||||
]
|
||||
pyinstaller = [
|
||||
"pyinstaller>=6.11.1", # Windows temp cleanup fixed in 6.11.1
|
||||
"pyinstaller>=6.13.0", # Windows temp cleanup fixed in 6.13.0
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
@ -246,7 +246,6 @@ # Supported sites
|
||||
- **Canalplus**: mycanal.fr and piwiplus.fr
|
||||
- **Canalsurmas**
|
||||
- **CaracolTvPlay**: [*caracoltv-play*](## "netrc machine")
|
||||
- **CartoonNetwork**
|
||||
- **cbc.ca**
|
||||
- **cbc.ca:player**
|
||||
- **cbc.ca:player:playlist**
|
||||
@ -394,6 +393,8 @@ # Supported sites
|
||||
- **dvtv**: http://video.aktualne.cz/
|
||||
- **dw**: (**Currently broken**)
|
||||
- **dw:article**: (**Currently broken**)
|
||||
- **dzen.ru**: Дзен (dzen) formerly Яндекс.Дзен (Yandex Zen)
|
||||
- **dzen.ru:channel**
|
||||
- **EaglePlatform**
|
||||
- **EbaumsWorld**
|
||||
- **Ebay**
|
||||
@ -634,6 +635,7 @@ # Supported sites
|
||||
- **ivi**: ivi.ru
|
||||
- **ivi:compilation**: ivi.ru compilations
|
||||
- **ivideon**: Ivideon TV
|
||||
- **Ivoox**
|
||||
- **IVXPlayer**
|
||||
- **iwara**: [*iwara*](## "netrc machine")
|
||||
- **iwara:playlist**: [*iwara*](## "netrc machine")
|
||||
@ -646,7 +648,10 @@ # Supported sites
|
||||
- **jiocinema**: [*jiocinema*](## "netrc machine")
|
||||
- **jiocinema:series**: [*jiocinema*](## "netrc machine")
|
||||
- **jiosaavn:album**
|
||||
- **jiosaavn:artist**
|
||||
- **jiosaavn:playlist**
|
||||
- **jiosaavn:show**
|
||||
- **jiosaavn:show:playlist**
|
||||
- **jiosaavn:song**
|
||||
- **Joj**
|
||||
- **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)
|
||||
@ -671,6 +676,7 @@ # Supported sites
|
||||
- **Kicker**
|
||||
- **KickStarter**
|
||||
- **Kika**: KiKA.de
|
||||
- **KikaPlaylist**
|
||||
- **kinja:embed**
|
||||
- **KinoPoisk**
|
||||
- **Kommunetv**
|
||||
@ -723,6 +729,7 @@ # Supported sites
|
||||
- **limelight:channel**
|
||||
- **limelight:channel_list**
|
||||
- **LinkedIn**: [*linkedin*](## "netrc machine")
|
||||
- **linkedin:events**: [*linkedin*](## "netrc machine")
|
||||
- **linkedin:learning**: [*linkedin*](## "netrc machine")
|
||||
- **linkedin:learning:course**: [*linkedin*](## "netrc machine")
|
||||
- **Liputan6**
|
||||
@ -738,6 +745,7 @@ # Supported sites
|
||||
- **loom**
|
||||
- **loom:folder**
|
||||
- **LoveHomePorn**
|
||||
- **LRTRadio**
|
||||
- **LRTStream**
|
||||
- **LRTVOD**
|
||||
- **LSMLREmbed**
|
||||
@ -759,7 +767,7 @@ # Supported sites
|
||||
- **ManotoTV**: Manoto TV (Episode)
|
||||
- **ManotoTVLive**: Manoto TV (Live)
|
||||
- **ManotoTVShow**: Manoto TV (Show)
|
||||
- **ManyVids**: (**Currently broken**)
|
||||
- **ManyVids**
|
||||
- **MaoriTV**
|
||||
- **Markiza**: (**Currently broken**)
|
||||
- **MarkizaPage**: (**Currently broken**)
|
||||
@ -946,7 +954,7 @@ # Supported sites
|
||||
- **nickelodeonru**
|
||||
- **niconico**: [*niconico*](## "netrc machine") ニコニコ動画
|
||||
- **niconico:history**: NicoNico user history or likes. Requires cookies.
|
||||
- **niconico:live**: ニコニコ生放送
|
||||
- **niconico:live**: [*niconico*](## "netrc machine") ニコニコ生放送
|
||||
- **niconico:playlist**
|
||||
- **niconico:series**
|
||||
- **niconico:tag**: NicoNico video tag URLs
|
||||
@ -1053,6 +1061,8 @@ # Supported sites
|
||||
- **Parler**: Posts on parler.com
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
- **Parlview**: (**Currently broken**)
|
||||
- **parti:livestream**
|
||||
- **parti:video**
|
||||
- **patreon**
|
||||
- **patreon:campaign**
|
||||
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
|
||||
@ -1073,8 +1083,8 @@ # Supported sites
|
||||
- **Photobucket**
|
||||
- **PiaLive**
|
||||
- **Piapro**: [*piapro*](## "netrc machine")
|
||||
- **Picarto**
|
||||
- **PicartoVod**
|
||||
- **picarto**
|
||||
- **picarto:vod**
|
||||
- **Piksel**
|
||||
- **Pinkbike**
|
||||
- **Pinterest**
|
||||
@ -1227,6 +1237,7 @@ # Supported sites
|
||||
- **RoosterTeeth**: [*roosterteeth*](## "netrc machine")
|
||||
- **RoosterTeethSeries**: [*roosterteeth*](## "netrc machine")
|
||||
- **RottenTomatoes**
|
||||
- **RoyaLive**
|
||||
- **Rozhlas**
|
||||
- **RozhlasVltava**
|
||||
- **RTBF**: [*rtbf*](## "netrc machine") (**Currently broken**)
|
||||
@ -1247,9 +1258,8 @@ # Supported sites
|
||||
- **RTVCKaltura**
|
||||
- **RTVCPlay**
|
||||
- **RTVCPlayEmbed**
|
||||
- **rtve.es:alacarta**: RTVE a la carta
|
||||
- **rtve.es:alacarta**: RTVE a la carta and Play
|
||||
- **rtve.es:audio**: RTVE audio
|
||||
- **rtve.es:infantil**: RTVE infantil
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **rtve.es:television**
|
||||
- **rtvslo.si**
|
||||
@ -1382,7 +1392,6 @@ # Supported sites
|
||||
- **Spreaker**
|
||||
- **SpreakerShow**
|
||||
- **SpringboardPlatform**
|
||||
- **Sprout**
|
||||
- **SproutVideo**
|
||||
- **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**)
|
||||
- **SRGSSR**
|
||||
@ -1562,7 +1571,8 @@ # Supported sites
|
||||
- **tvp:vod:series**
|
||||
- **TVPlayer**
|
||||
- **TVPlayHome**
|
||||
- **Tvw**
|
||||
- **tvw**
|
||||
- **tvw:tvchannels**
|
||||
- **Tweakers**
|
||||
- **TwitCasting**
|
||||
- **TwitCastingLive**
|
||||
@ -1647,6 +1657,7 @@ # Supported sites
|
||||
- **vimeo**: [*vimeo*](## "netrc machine")
|
||||
- **vimeo:album**: [*vimeo*](## "netrc machine")
|
||||
- **vimeo:channel**: [*vimeo*](## "netrc machine")
|
||||
- **vimeo:event**: [*vimeo*](## "netrc machine")
|
||||
- **vimeo:group**: [*vimeo*](## "netrc machine")
|
||||
- **vimeo:likes**: [*vimeo*](## "netrc machine") Vimeo user likes
|
||||
- **vimeo:ondemand**: [*vimeo*](## "netrc machine")
|
||||
@ -1821,14 +1832,12 @@ # Supported sites
|
||||
- **ZattooLive**: [*zattoo*](## "netrc machine")
|
||||
- **ZattooMovies**: [*zattoo*](## "netrc machine")
|
||||
- **ZattooRecordings**: [*zattoo*](## "netrc machine")
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **zdf**
|
||||
- **zdf:channel**
|
||||
- **Zee5**: [*zee5*](## "netrc machine")
|
||||
- **zee5:series**
|
||||
- **ZeeNews**: (**Currently broken**)
|
||||
- **ZenPorn**
|
||||
- **ZenYandex**
|
||||
- **ZenYandexChannel**
|
||||
- **ZetlandDKArticle**
|
||||
- **Zhihu**
|
||||
- **zingmp3**: zingmp3.vn
|
||||
|
@ -136,7 +136,7 @@ def _iter_differences(got, expected, field):
|
||||
return
|
||||
|
||||
if op == 'startswith':
|
||||
if not val.startswith(got):
|
||||
if not got.startswith(val):
|
||||
yield field, f'should start with {val!r}, got {got!r}'
|
||||
return
|
||||
|
||||
|
@ -1435,6 +1435,27 @@ def test_load_plugins_compat(self):
|
||||
FakeYDL().close()
|
||||
assert all_plugins_loaded.value
|
||||
|
||||
def test_close_hooks(self):
|
||||
# Should call all registered close hooks on close
|
||||
close_hook_called = False
|
||||
close_hook_two_called = False
|
||||
|
||||
def close_hook():
|
||||
nonlocal close_hook_called
|
||||
close_hook_called = True
|
||||
|
||||
def close_hook_two():
|
||||
nonlocal close_hook_two_called
|
||||
close_hook_two_called = True
|
||||
|
||||
ydl = FakeYDL()
|
||||
ydl.add_close_hook(close_hook)
|
||||
ydl.add_close_hook(close_hook_two)
|
||||
|
||||
ydl.close()
|
||||
self.assertTrue(close_hook_called, 'Close hook was not called')
|
||||
self.assertTrue(close_hook_two_called, 'Close hook two was not called')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -58,6 +58,14 @@ def test_get_desktop_environment(self):
|
||||
({'DESKTOP_SESSION': 'kde'}, _LinuxDesktopEnvironment.KDE3),
|
||||
({'DESKTOP_SESSION': 'xfce'}, _LinuxDesktopEnvironment.XFCE),
|
||||
|
||||
({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'gnome'}, _LinuxDesktopEnvironment.GNOME),
|
||||
({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'mate'}, _LinuxDesktopEnvironment.GNOME),
|
||||
({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4),
|
||||
({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'kde'}, _LinuxDesktopEnvironment.KDE3),
|
||||
({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'xfce'}, _LinuxDesktopEnvironment.XFCE),
|
||||
|
||||
({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'my_custom_de', 'GNOME_DESKTOP_SESSION_ID': 1}, _LinuxDesktopEnvironment.GNOME),
|
||||
|
||||
({'GNOME_DESKTOP_SESSION_ID': 1}, _LinuxDesktopEnvironment.GNOME),
|
||||
({'KDE_FULL_SESSION': 1}, _LinuxDesktopEnvironment.KDE3),
|
||||
({'KDE_FULL_SESSION': 1, 'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4),
|
||||
|
@ -478,6 +478,14 @@ def test_extract_function_with_global_stack(self):
|
||||
func = jsi.extract_function('c', {'e': 10}, {'f': 100, 'g': 1000})
|
||||
self.assertEqual(func([1]), 1111)
|
||||
|
||||
def test_increment_decrement(self):
|
||||
self._test('function f() { var x = 1; return ++x; }', 2)
|
||||
self._test('function f() { var x = 1; return x++; }', 1)
|
||||
self._test('function f() { var x = 1; x--; return x }', 0)
|
||||
self._test('function f() { var y; var x = 1; x++, --x, x--, x--, y="z", "abc", x++; return --x }', -1)
|
||||
self._test('function f() { var a = "test--"; return a; }', 'test--')
|
||||
self._test('function f() { var b = 1; var a = "b--"; return a; }', 'b--')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -39,6 +39,7 @@
|
||||
from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
|
||||
from yt_dlp.networking import (
|
||||
HEADRequest,
|
||||
PATCHRequest,
|
||||
PUTRequest,
|
||||
Request,
|
||||
RequestDirector,
|
||||
@ -1856,6 +1857,7 @@ def test_method(self):
|
||||
|
||||
def test_request_helpers(self):
|
||||
assert HEADRequest('http://example.com').method == 'HEAD'
|
||||
assert PATCHRequest('http://example.com').method == 'PATCH'
|
||||
assert PUTRequest('http://example.com').method == 'PUT'
|
||||
|
||||
def test_headers(self):
|
||||
|
@ -20,7 +20,6 @@
|
||||
add_accept_encoding_header,
|
||||
get_redirect_method,
|
||||
make_socks_proxy_opts,
|
||||
select_proxy,
|
||||
ssl_load_certs,
|
||||
)
|
||||
from yt_dlp.networking.exceptions import (
|
||||
@ -28,7 +27,7 @@
|
||||
IncompleteRead,
|
||||
)
|
||||
from yt_dlp.socks import ProxyType
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict, select_proxy
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
71
test/test_pot/conftest.py
Normal file
71
test/test_pot/conftest.py
Normal file
@ -0,0 +1,71 @@
|
||||
import collections
|
||||
|
||||
import pytest
|
||||
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.extractor.common import InfoExtractor
|
||||
from yt_dlp.extractor.youtube.pot._provider import IEContentProviderLogger
|
||||
from yt_dlp.extractor.youtube.pot.provider import PoTokenRequest, PoTokenContext
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||
|
||||
|
||||
class MockLogger(IEContentProviderLogger):
|
||||
|
||||
log_level = IEContentProviderLogger.LogLevel.TRACE
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.messages = collections.defaultdict(list)
|
||||
|
||||
def trace(self, message: str):
|
||||
self.messages['trace'].append(message)
|
||||
|
||||
def debug(self, message: str):
|
||||
self.messages['debug'].append(message)
|
||||
|
||||
def info(self, message: str):
|
||||
self.messages['info'].append(message)
|
||||
|
||||
def warning(self, message: str, *, once=False):
|
||||
self.messages['warning'].append(message)
|
||||
|
||||
def error(self, message: str):
|
||||
self.messages['error'].append(message)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ie() -> InfoExtractor:
|
||||
ydl = YoutubeDL()
|
||||
return ydl.get_info_extractor('Youtube')
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def logger() -> MockLogger:
|
||||
return MockLogger()
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def pot_request() -> PoTokenRequest:
|
||||
return PoTokenRequest(
|
||||
context=PoTokenContext.GVS,
|
||||
innertube_context={'client': {'clientName': 'WEB'}},
|
||||
innertube_host='youtube.com',
|
||||
session_index=None,
|
||||
player_url=None,
|
||||
is_authenticated=False,
|
||||
video_webpage=None,
|
||||
|
||||
visitor_data='example-visitor-data',
|
||||
data_sync_id='example-data-sync-id',
|
||||
video_id='example-video-id',
|
||||
|
||||
request_cookiejar=YoutubeDLCookieJar(),
|
||||
request_proxy=None,
|
||||
request_headers=HTTPHeaderDict(),
|
||||
request_timeout=None,
|
||||
request_source_address=None,
|
||||
request_verify_tls=True,
|
||||
|
||||
bypass_cache=False,
|
||||
)
|
117
test/test_pot/test_pot_builtin_memorycache.py
Normal file
117
test/test_pot/test_pot_builtin_memorycache.py
Normal file
@ -0,0 +1,117 @@
|
||||
import threading
|
||||
import time
|
||||
from collections import OrderedDict
|
||||
import pytest
|
||||
from yt_dlp.extractor.youtube.pot._provider import IEContentProvider, BuiltinIEContentProvider
|
||||
from yt_dlp.utils import bug_reports_message
|
||||
from yt_dlp.extractor.youtube.pot._builtin.memory_cache import MemoryLRUPCP, memorylru_preference, initialize_global_cache
|
||||
from yt_dlp.version import __version__
|
||||
from yt_dlp.extractor.youtube.pot._registry import _pot_cache_providers, _pot_memory_cache
|
||||
|
||||
|
||||
class TestMemoryLRUPCS:
|
||||
|
||||
def test_base_type(self):
|
||||
assert issubclass(MemoryLRUPCP, IEContentProvider)
|
||||
assert issubclass(MemoryLRUPCP, BuiltinIEContentProvider)
|
||||
|
||||
@pytest.fixture
|
||||
def pcp(self, ie, logger) -> MemoryLRUPCP:
|
||||
return MemoryLRUPCP(ie, logger, {}, initialize_cache=lambda max_size: (OrderedDict(), threading.Lock(), max_size))
|
||||
|
||||
def test_is_registered(self):
|
||||
assert _pot_cache_providers.value.get('MemoryLRU') == MemoryLRUPCP
|
||||
|
||||
def test_initialization(self, pcp):
|
||||
assert pcp.PROVIDER_NAME == 'memory'
|
||||
assert pcp.PROVIDER_VERSION == __version__
|
||||
assert pcp.BUG_REPORT_MESSAGE == bug_reports_message(before='')
|
||||
assert pcp.is_available()
|
||||
|
||||
def test_store_and_get(self, pcp):
|
||||
pcp.store('key1', 'value1', int(time.time()) + 60)
|
||||
assert pcp.get('key1') == 'value1'
|
||||
assert len(pcp.cache) == 1
|
||||
|
||||
def test_store_ignore_expired(self, pcp):
|
||||
pcp.store('key1', 'value1', int(time.time()) - 1)
|
||||
assert len(pcp.cache) == 0
|
||||
assert pcp.get('key1') is None
|
||||
assert len(pcp.cache) == 0
|
||||
|
||||
def test_store_override_existing_key(self, ie, logger):
|
||||
MAX_SIZE = 2
|
||||
pcp = MemoryLRUPCP(ie, logger, {}, initialize_cache=lambda max_size: (OrderedDict(), threading.Lock(), MAX_SIZE))
|
||||
pcp.store('key1', 'value1', int(time.time()) + 60)
|
||||
pcp.store('key2', 'value2', int(time.time()) + 60)
|
||||
assert len(pcp.cache) == 2
|
||||
pcp.store('key1', 'value2', int(time.time()) + 60)
|
||||
# Ensure that the override key gets added to the end of the cache instead of in the same position
|
||||
pcp.store('key3', 'value3', int(time.time()) + 60)
|
||||
assert pcp.get('key1') == 'value2'
|
||||
|
||||
def test_store_ignore_expired_existing_key(self, pcp):
|
||||
pcp.store('key1', 'value2', int(time.time()) + 60)
|
||||
pcp.store('key1', 'value1', int(time.time()) - 1)
|
||||
assert len(pcp.cache) == 1
|
||||
assert pcp.get('key1') == 'value2'
|
||||
assert len(pcp.cache) == 1
|
||||
|
||||
def test_get_key_expired(self, pcp):
|
||||
pcp.store('key1', 'value1', int(time.time()) + 60)
|
||||
assert pcp.get('key1') == 'value1'
|
||||
assert len(pcp.cache) == 1
|
||||
pcp.cache['key1'] = ('value1', int(time.time()) - 1)
|
||||
assert pcp.get('key1') is None
|
||||
assert len(pcp.cache) == 0
|
||||
|
||||
def test_lru_eviction(self, ie, logger):
|
||||
MAX_SIZE = 2
|
||||
provider = MemoryLRUPCP(ie, logger, {}, initialize_cache=lambda max_size: (OrderedDict(), threading.Lock(), MAX_SIZE))
|
||||
provider.store('key1', 'value1', int(time.time()) + 5)
|
||||
provider.store('key2', 'value2', int(time.time()) + 5)
|
||||
assert len(provider.cache) == 2
|
||||
|
||||
assert provider.get('key1') == 'value1'
|
||||
|
||||
provider.store('key3', 'value3', int(time.time()) + 5)
|
||||
assert len(provider.cache) == 2
|
||||
|
||||
assert provider.get('key2') is None
|
||||
|
||||
provider.store('key4', 'value4', int(time.time()) + 5)
|
||||
assert len(provider.cache) == 2
|
||||
|
||||
assert provider.get('key1') is None
|
||||
assert provider.get('key3') == 'value3'
|
||||
assert provider.get('key4') == 'value4'
|
||||
|
||||
def test_delete(self, pcp):
|
||||
pcp.store('key1', 'value1', int(time.time()) + 5)
|
||||
assert len(pcp.cache) == 1
|
||||
assert pcp.get('key1') == 'value1'
|
||||
pcp.delete('key1')
|
||||
assert len(pcp.cache) == 0
|
||||
assert pcp.get('key1') is None
|
||||
|
||||
def test_use_global_cache_default(self, ie, logger):
|
||||
pcp = MemoryLRUPCP(ie, logger, {})
|
||||
assert pcp.max_size == _pot_memory_cache.value['max_size'] == 25
|
||||
assert pcp.cache is _pot_memory_cache.value['cache']
|
||||
assert pcp.lock is _pot_memory_cache.value['lock']
|
||||
|
||||
pcp2 = MemoryLRUPCP(ie, logger, {})
|
||||
assert pcp.max_size == pcp2.max_size == _pot_memory_cache.value['max_size'] == 25
|
||||
assert pcp.cache is pcp2.cache is _pot_memory_cache.value['cache']
|
||||
assert pcp.lock is pcp2.lock is _pot_memory_cache.value['lock']
|
||||
|
||||
def test_fail_max_size_change_global(self, ie, logger):
|
||||
pcp = MemoryLRUPCP(ie, logger, {})
|
||||
assert pcp.max_size == _pot_memory_cache.value['max_size'] == 25
|
||||
with pytest.raises(ValueError, match='Cannot change max_size of initialized global memory cache'):
|
||||
initialize_global_cache(50)
|
||||
|
||||
assert pcp.max_size == _pot_memory_cache.value['max_size'] == 25
|
||||
|
||||
def test_memory_lru_preference(self, pcp, ie, pot_request):
|
||||
assert memorylru_preference(pcp, pot_request) == 10000
|
47
test/test_pot/test_pot_builtin_utils.py
Normal file
47
test/test_pot/test_pot_builtin_utils.py
Normal file
@ -0,0 +1,47 @@
|
||||
import pytest
|
||||
from yt_dlp.extractor.youtube.pot.provider import (
|
||||
PoTokenContext,
|
||||
|
||||
)
|
||||
|
||||
from yt_dlp.extractor.youtube.pot.utils import get_webpo_content_binding, ContentBindingType
|
||||
|
||||
|
||||
class TestGetWebPoContentBinding:
|
||||
|
||||
@pytest.mark.parametrize('client_name, context, is_authenticated, expected', [
|
||||
*[(client, context, is_authenticated, expected) for client in [
|
||||
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER']
|
||||
for context, is_authenticated, expected in [
|
||||
(PoTokenContext.GVS, False, ('example-visitor-data', ContentBindingType.VISITOR_DATA)),
|
||||
(PoTokenContext.PLAYER, False, ('example-video-id', ContentBindingType.VIDEO_ID)),
|
||||
(PoTokenContext.SUBS, False, ('example-video-id', ContentBindingType.VIDEO_ID)),
|
||||
(PoTokenContext.GVS, True, ('example-data-sync-id', ContentBindingType.DATASYNC_ID)),
|
||||
]],
|
||||
('WEB_REMIX', PoTokenContext.GVS, False, ('example-visitor-data', ContentBindingType.VISITOR_DATA)),
|
||||
('WEB_REMIX', PoTokenContext.PLAYER, False, ('example-visitor-data', ContentBindingType.VISITOR_DATA)),
|
||||
('ANDROID', PoTokenContext.GVS, False, (None, None)),
|
||||
('IOS', PoTokenContext.GVS, False, (None, None)),
|
||||
])
|
||||
def test_get_webpo_content_binding(self, pot_request, client_name, context, is_authenticated, expected):
|
||||
pot_request.innertube_context['client']['clientName'] = client_name
|
||||
pot_request.context = context
|
||||
pot_request.is_authenticated = is_authenticated
|
||||
assert get_webpo_content_binding(pot_request) == expected
|
||||
|
||||
def test_extract_visitor_id(self, pot_request):
|
||||
pot_request.visitor_data = 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D'
|
||||
assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == ('123abcXYZ_-', ContentBindingType.VISITOR_ID)
|
||||
|
||||
def test_invalid_visitor_id(self, pot_request):
|
||||
# visitor id not alphanumeric (i.e. protobuf extraction failed)
|
||||
pot_request.visitor_data = 'CggxMjM0NTY3OCiA4s-qBg%3D%3D'
|
||||
assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == (pot_request.visitor_data, ContentBindingType.VISITOR_DATA)
|
||||
|
||||
def test_no_visitor_id(self, pot_request):
|
||||
pot_request.visitor_data = 'KIDiz6oG'
|
||||
assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == (pot_request.visitor_data, ContentBindingType.VISITOR_DATA)
|
||||
|
||||
def test_invalid_base64(self, pot_request):
|
||||
pot_request.visitor_data = 'invalid-base64'
|
||||
assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == (pot_request.visitor_data, ContentBindingType.VISITOR_DATA)
|
92
test/test_pot/test_pot_builtin_webpospec.py
Normal file
92
test/test_pot/test_pot_builtin_webpospec.py
Normal file
@ -0,0 +1,92 @@
|
||||
import pytest
|
||||
|
||||
from yt_dlp.extractor.youtube.pot._provider import IEContentProvider, BuiltinIEContentProvider
|
||||
from yt_dlp.extractor.youtube.pot.cache import CacheProviderWritePolicy
|
||||
from yt_dlp.utils import bug_reports_message
|
||||
from yt_dlp.extractor.youtube.pot.provider import (
|
||||
PoTokenRequest,
|
||||
PoTokenContext,
|
||||
|
||||
)
|
||||
from yt_dlp.version import __version__
|
||||
|
||||
from yt_dlp.extractor.youtube.pot._builtin.webpo_cachespec import WebPoPCSP
|
||||
from yt_dlp.extractor.youtube.pot._registry import _pot_pcs_providers
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def pot_request(pot_request) -> PoTokenRequest:
|
||||
pot_request.visitor_data = 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D' # visitor_id=123abcXYZ_-
|
||||
return pot_request
|
||||
|
||||
|
||||
class TestWebPoPCSP:
|
||||
def test_base_type(self):
|
||||
assert issubclass(WebPoPCSP, IEContentProvider)
|
||||
assert issubclass(WebPoPCSP, BuiltinIEContentProvider)
|
||||
|
||||
def test_init(self, ie, logger):
|
||||
pcs = WebPoPCSP(ie=ie, logger=logger, settings={})
|
||||
assert pcs.PROVIDER_NAME == 'webpo'
|
||||
assert pcs.PROVIDER_VERSION == __version__
|
||||
assert pcs.BUG_REPORT_MESSAGE == bug_reports_message(before='')
|
||||
assert pcs.is_available()
|
||||
|
||||
def test_is_registered(self):
|
||||
assert _pot_pcs_providers.value.get('WebPo') == WebPoPCSP
|
||||
|
||||
@pytest.mark.parametrize('client_name, context, is_authenticated', [
|
||||
('ANDROID', PoTokenContext.GVS, False),
|
||||
('IOS', PoTokenContext.GVS, False),
|
||||
('IOS', PoTokenContext.PLAYER, False),
|
||||
])
|
||||
def test_not_supports(self, ie, logger, pot_request, client_name, context, is_authenticated):
|
||||
pcs = WebPoPCSP(ie=ie, logger=logger, settings={})
|
||||
pot_request.innertube_context['client']['clientName'] = client_name
|
||||
pot_request.context = context
|
||||
pot_request.is_authenticated = is_authenticated
|
||||
assert pcs.generate_cache_spec(pot_request) is None
|
||||
|
||||
@pytest.mark.parametrize('client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected', [
|
||||
*[(client, context, is_authenticated, remote_host, source_address, request_proxy, expected) for client in [
|
||||
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER']
|
||||
for context, is_authenticated, remote_host, source_address, request_proxy, expected in [
|
||||
(PoTokenContext.GVS, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}),
|
||||
(PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'video_id'}),
|
||||
(PoTokenContext.GVS, True, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': 'example-data-sync-id', 'cbt': 'datasync_id'}),
|
||||
]],
|
||||
('WEB_REMIX', PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}),
|
||||
('WEB', PoTokenContext.GVS, False, None, None, None, {'t': 'webpo', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id', 'ip': None, 'sa': None, 'px': None}),
|
||||
('TVHTML5', PoTokenContext.PLAYER, False, None, None, 'http://example.com', {'t': 'webpo', 'cb': '123abcXYZ_-', 'cbt': 'video_id', 'ip': None, 'sa': None, 'px': 'http://example.com'}),
|
||||
|
||||
])
|
||||
def test_generate_key_bindings(self, ie, logger, pot_request, client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected):
|
||||
pcs = WebPoPCSP(ie=ie, logger=logger, settings={})
|
||||
pot_request.innertube_context['client']['clientName'] = client_name
|
||||
pot_request.context = context
|
||||
pot_request.is_authenticated = is_authenticated
|
||||
pot_request.innertube_context['client']['remoteHost'] = remote_host
|
||||
pot_request.request_source_address = source_address
|
||||
pot_request.request_proxy = request_proxy
|
||||
pot_request.video_id = '123abcXYZ_-' # same as visitor id to test type
|
||||
|
||||
assert pcs.generate_cache_spec(pot_request).key_bindings == expected
|
||||
|
||||
def test_no_bind_visitor_id(self, ie, logger, pot_request):
|
||||
# Should not bind to visitor id if setting is set to False
|
||||
pcs = WebPoPCSP(ie=ie, logger=logger, settings={'bind_to_visitor_id': ['false']})
|
||||
pot_request.innertube_context['client']['clientName'] = 'WEB'
|
||||
pot_request.context = PoTokenContext.GVS
|
||||
pot_request.is_authenticated = False
|
||||
assert pcs.generate_cache_spec(pot_request).key_bindings == {'t': 'webpo', 'ip': None, 'sa': None, 'px': None, 'cb': 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D', 'cbt': 'visitor_data'}
|
||||
|
||||
def test_default_ttl(self, ie, logger, pot_request):
|
||||
pcs = WebPoPCSP(ie=ie, logger=logger, settings={})
|
||||
assert pcs.generate_cache_spec(pot_request).default_ttl == 6 * 60 * 60 # should default to 6 hours
|
||||
|
||||
def test_write_policy(self, ie, logger, pot_request):
|
||||
pcs = WebPoPCSP(ie=ie, logger=logger, settings={})
|
||||
pot_request.context = PoTokenContext.GVS
|
||||
assert pcs.generate_cache_spec(pot_request).write_policy == CacheProviderWritePolicy.WRITE_ALL
|
||||
pot_request.context = PoTokenContext.PLAYER
|
||||
assert pcs.generate_cache_spec(pot_request).write_policy == CacheProviderWritePolicy.WRITE_FIRST
|
1529
test/test_pot/test_pot_director.py
Normal file
1529
test/test_pot/test_pot_director.py
Normal file
File diff suppressed because it is too large
Load Diff
629
test/test_pot/test_pot_framework.py
Normal file
629
test/test_pot/test_pot_framework.py
Normal file
@ -0,0 +1,629 @@
|
||||
import pytest
|
||||
|
||||
from yt_dlp.extractor.youtube.pot._provider import IEContentProvider
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||
from yt_dlp.extractor.youtube.pot.provider import (
|
||||
PoTokenRequest,
|
||||
PoTokenContext,
|
||||
ExternalRequestFeature,
|
||||
|
||||
)
|
||||
|
||||
from yt_dlp.extractor.youtube.pot.cache import (
|
||||
PoTokenCacheProvider,
|
||||
PoTokenCacheSpec,
|
||||
PoTokenCacheSpecProvider,
|
||||
CacheProviderWritePolicy,
|
||||
)
|
||||
|
||||
import yt_dlp.extractor.youtube.pot.cache as cache
|
||||
|
||||
from yt_dlp.networking import Request
|
||||
from yt_dlp.extractor.youtube.pot.provider import (
|
||||
PoTokenResponse,
|
||||
PoTokenProvider,
|
||||
PoTokenProviderRejectedRequest,
|
||||
provider_bug_report_message,
|
||||
register_provider,
|
||||
register_preference,
|
||||
)
|
||||
|
||||
from yt_dlp.extractor.youtube.pot._registry import _pot_providers, _ptp_preferences, _pot_pcs_providers, _pot_cache_providers, _pot_cache_provider_preferences
|
||||
|
||||
|
||||
class ExamplePTP(PoTokenProvider):
|
||||
PROVIDER_NAME = 'example'
|
||||
PROVIDER_VERSION = '0.0.1'
|
||||
BUG_REPORT_LOCATION = 'https://example.com/issues'
|
||||
|
||||
_SUPPORTED_CLIENTS = ('WEB',)
|
||||
_SUPPORTED_CONTEXTS = (PoTokenContext.GVS, )
|
||||
|
||||
_SUPPORTED_EXTERNAL_REQUEST_FEATURES = (
|
||||
ExternalRequestFeature.PROXY_SCHEME_HTTP,
|
||||
ExternalRequestFeature.PROXY_SCHEME_SOCKS5H,
|
||||
)
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
|
||||
return PoTokenResponse('example-token', expires_at=123)
|
||||
|
||||
|
||||
class ExampleCacheProviderPCP(PoTokenCacheProvider):
|
||||
|
||||
PROVIDER_NAME = 'example'
|
||||
PROVIDER_VERSION = '0.0.1'
|
||||
BUG_REPORT_LOCATION = 'https://example.com/issues'
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
def get(self, key: str):
|
||||
return 'example-cache'
|
||||
|
||||
def store(self, key: str, value: str, expires_at: int):
|
||||
pass
|
||||
|
||||
def delete(self, key: str):
|
||||
pass
|
||||
|
||||
|
||||
class ExampleCacheSpecProviderPCSP(PoTokenCacheSpecProvider):
|
||||
|
||||
PROVIDER_NAME = 'example'
|
||||
PROVIDER_VERSION = '0.0.1'
|
||||
BUG_REPORT_LOCATION = 'https://example.com/issues'
|
||||
|
||||
def generate_cache_spec(self, request: PoTokenRequest):
|
||||
return PoTokenCacheSpec(
|
||||
key_bindings={'field': 'example-key'},
|
||||
default_ttl=60,
|
||||
write_policy=CacheProviderWritePolicy.WRITE_FIRST,
|
||||
)
|
||||
|
||||
|
||||
class TestPoTokenProvider:
|
||||
|
||||
def test_base_type(self):
|
||||
assert issubclass(PoTokenProvider, IEContentProvider)
|
||||
|
||||
def test_create_provider_missing_fetch_method(self, ie, logger):
|
||||
class MissingMethodsPTP(PoTokenProvider):
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
MissingMethodsPTP(ie=ie, logger=logger, settings={})
|
||||
|
||||
def test_create_provider_missing_available_method(self, ie, logger):
|
||||
class MissingMethodsPTP(PoTokenProvider):
|
||||
def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
|
||||
raise PoTokenProviderRejectedRequest('Not implemented')
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
MissingMethodsPTP(ie=ie, logger=logger, settings={})
|
||||
|
||||
def test_barebones_provider(self, ie, logger):
|
||||
class BarebonesProviderPTP(PoTokenProvider):
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
|
||||
raise PoTokenProviderRejectedRequest('Not implemented')
|
||||
|
||||
provider = BarebonesProviderPTP(ie=ie, logger=logger, settings={})
|
||||
assert provider.PROVIDER_NAME == 'BarebonesProvider'
|
||||
assert provider.PROVIDER_KEY == 'BarebonesProvider'
|
||||
assert provider.PROVIDER_VERSION == '0.0.0'
|
||||
assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at (developer has not provided a bug report location) .'
|
||||
|
||||
def test_example_provider_success(self, ie, logger, pot_request):
|
||||
provider = ExamplePTP(ie=ie, logger=logger, settings={})
|
||||
assert provider.PROVIDER_NAME == 'example'
|
||||
assert provider.PROVIDER_KEY == 'Example'
|
||||
assert provider.PROVIDER_VERSION == '0.0.1'
|
||||
assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at https://example.com/issues .'
|
||||
assert provider.is_available()
|
||||
|
||||
response = provider.request_pot(pot_request)
|
||||
|
||||
assert response.po_token == 'example-token'
|
||||
assert response.expires_at == 123
|
||||
|
||||
def test_provider_unsupported_context(self, ie, logger, pot_request):
|
||||
provider = ExamplePTP(ie=ie, logger=logger, settings={})
|
||||
pot_request.context = PoTokenContext.PLAYER
|
||||
|
||||
with pytest.raises(PoTokenProviderRejectedRequest):
|
||||
provider.request_pot(pot_request)
|
||||
|
||||
def test_provider_unsupported_client(self, ie, logger, pot_request):
|
||||
provider = ExamplePTP(ie=ie, logger=logger, settings={})
|
||||
pot_request.innertube_context['client']['clientName'] = 'ANDROID'
|
||||
|
||||
with pytest.raises(PoTokenProviderRejectedRequest):
|
||||
provider.request_pot(pot_request)
|
||||
|
||||
def test_provider_unsupported_proxy_scheme(self, ie, logger, pot_request):
|
||||
provider = ExamplePTP(ie=ie, logger=logger, settings={})
|
||||
pot_request.request_proxy = 'socks4://example.com'
|
||||
|
||||
with pytest.raises(
|
||||
PoTokenProviderRejectedRequest,
|
||||
match='External requests by "example" provider do not support proxy scheme "socks4". Supported proxy '
|
||||
'schemes: http, socks5h',
|
||||
):
|
||||
provider.request_pot(pot_request)
|
||||
|
||||
pot_request.request_proxy = 'http://example.com'
|
||||
|
||||
assert provider.request_pot(pot_request)
|
||||
|
||||
def test_provider_ignore_external_request_features(self, ie, logger, pot_request):
|
||||
class InternalPTP(ExamplePTP):
|
||||
_SUPPORTED_EXTERNAL_REQUEST_FEATURES = None
|
||||
|
||||
provider = InternalPTP(ie=ie, logger=logger, settings={})
|
||||
|
||||
pot_request.request_proxy = 'socks5://example.com'
|
||||
assert provider.request_pot(pot_request)
|
||||
pot_request.request_source_address = '0.0.0.0'
|
||||
assert provider.request_pot(pot_request)
|
||||
|
||||
def test_provider_unsupported_external_request_source_address(self, ie, logger, pot_request):
|
||||
class InternalPTP(ExamplePTP):
|
||||
_SUPPORTED_EXTERNAL_REQUEST_FEATURES = tuple()
|
||||
|
||||
provider = InternalPTP(ie=ie, logger=logger, settings={})
|
||||
|
||||
pot_request.request_source_address = None
|
||||
assert provider.request_pot(pot_request)
|
||||
|
||||
pot_request.request_source_address = '0.0.0.0'
|
||||
with pytest.raises(
|
||||
PoTokenProviderRejectedRequest,
|
||||
match='External requests by "example" provider do not support setting source address',
|
||||
):
|
||||
provider.request_pot(pot_request)
|
||||
|
||||
def test_provider_supported_external_request_source_address(self, ie, logger, pot_request):
|
||||
class InternalPTP(ExamplePTP):
|
||||
_SUPPORTED_EXTERNAL_REQUEST_FEATURES = (
|
||||
ExternalRequestFeature.SOURCE_ADDRESS,
|
||||
)
|
||||
|
||||
provider = InternalPTP(ie=ie, logger=logger, settings={})
|
||||
|
||||
pot_request.request_source_address = None
|
||||
assert provider.request_pot(pot_request)
|
||||
|
||||
pot_request.request_source_address = '0.0.0.0'
|
||||
assert provider.request_pot(pot_request)
|
||||
|
||||
def test_provider_unsupported_external_request_tls_verification(self, ie, logger, pot_request):
|
||||
class InternalPTP(ExamplePTP):
|
||||
_SUPPORTED_EXTERNAL_REQUEST_FEATURES = tuple()
|
||||
|
||||
provider = InternalPTP(ie=ie, logger=logger, settings={})
|
||||
|
||||
pot_request.request_verify_tls = True
|
||||
assert provider.request_pot(pot_request)
|
||||
|
||||
pot_request.request_verify_tls = False
|
||||
with pytest.raises(
|
||||
PoTokenProviderRejectedRequest,
|
||||
match='External requests by "example" provider do not support ignoring TLS certificate failures',
|
||||
):
|
||||
provider.request_pot(pot_request)
|
||||
|
||||
def test_provider_supported_external_request_tls_verification(self, ie, logger, pot_request):
|
||||
class InternalPTP(ExamplePTP):
|
||||
_SUPPORTED_EXTERNAL_REQUEST_FEATURES = (
|
||||
ExternalRequestFeature.DISABLE_TLS_VERIFICATION,
|
||||
)
|
||||
|
||||
provider = InternalPTP(ie=ie, logger=logger, settings={})
|
||||
|
||||
pot_request.request_verify_tls = True
|
||||
assert provider.request_pot(pot_request)
|
||||
|
||||
pot_request.request_verify_tls = False
|
||||
assert provider.request_pot(pot_request)
|
||||
|
||||
def test_provider_request_webpage(self, ie, logger, pot_request):
|
||||
provider = ExamplePTP(ie=ie, logger=logger, settings={})
|
||||
|
||||
cookiejar = YoutubeDLCookieJar()
|
||||
pot_request.request_headers = HTTPHeaderDict({'User-Agent': 'example-user-agent'})
|
||||
pot_request.request_proxy = 'socks5://example-proxy.com'
|
||||
pot_request.request_cookiejar = cookiejar
|
||||
|
||||
def mock_urlopen(request):
|
||||
return request
|
||||
|
||||
ie._downloader.urlopen = mock_urlopen
|
||||
|
||||
sent_request = provider._request_webpage(Request(
|
||||
'https://example.com',
|
||||
), pot_request=pot_request)
|
||||
|
||||
assert sent_request.url == 'https://example.com'
|
||||
assert sent_request.headers['User-Agent'] == 'example-user-agent'
|
||||
assert sent_request.proxies == {'all': 'socks5://example-proxy.com'}
|
||||
assert sent_request.extensions['cookiejar'] is cookiejar
|
||||
assert 'Requesting webpage' in logger.messages['info']
|
||||
|
||||
def test_provider_request_webpage_override(self, ie, logger, pot_request):
|
||||
provider = ExamplePTP(ie=ie, logger=logger, settings={})
|
||||
|
||||
cookiejar_request = YoutubeDLCookieJar()
|
||||
pot_request.request_headers = HTTPHeaderDict({'User-Agent': 'example-user-agent'})
|
||||
pot_request.request_proxy = 'socks5://example-proxy.com'
|
||||
pot_request.request_cookiejar = cookiejar_request
|
||||
|
||||
def mock_urlopen(request):
|
||||
return request
|
||||
|
||||
ie._downloader.urlopen = mock_urlopen
|
||||
|
||||
sent_request = provider._request_webpage(Request(
|
||||
'https://example.com',
|
||||
headers={'User-Agent': 'override-user-agent-override'},
|
||||
proxies={'http': 'http://example-proxy-override.com'},
|
||||
extensions={'cookiejar': YoutubeDLCookieJar()},
|
||||
), pot_request=pot_request, note='Custom requesting webpage')
|
||||
|
||||
assert sent_request.url == 'https://example.com'
|
||||
assert sent_request.headers['User-Agent'] == 'override-user-agent-override'
|
||||
assert sent_request.proxies == {'http': 'http://example-proxy-override.com'}
|
||||
assert sent_request.extensions['cookiejar'] is not cookiejar_request
|
||||
assert 'Custom requesting webpage' in logger.messages['info']
|
||||
|
||||
def test_provider_request_webpage_no_log(self, ie, logger, pot_request):
|
||||
provider = ExamplePTP(ie=ie, logger=logger, settings={})
|
||||
|
||||
def mock_urlopen(request):
|
||||
return request
|
||||
|
||||
ie._downloader.urlopen = mock_urlopen
|
||||
|
||||
sent_request = provider._request_webpage(Request(
|
||||
'https://example.com',
|
||||
), note=False)
|
||||
|
||||
assert sent_request.url == 'https://example.com'
|
||||
assert 'info' not in logger.messages
|
||||
|
||||
def test_provider_request_webpage_no_pot_request(self, ie, logger):
|
||||
provider = ExamplePTP(ie=ie, logger=logger, settings={})
|
||||
|
||||
def mock_urlopen(request):
|
||||
return request
|
||||
|
||||
ie._downloader.urlopen = mock_urlopen
|
||||
|
||||
sent_request = provider._request_webpage(Request(
|
||||
'https://example.com',
|
||||
), pot_request=None)
|
||||
|
||||
assert sent_request.url == 'https://example.com'
|
||||
|
||||
def test_get_config_arg(self, ie, logger):
|
||||
provider = ExamplePTP(ie=ie, logger=logger, settings={'abc': ['123D'], 'xyz': ['456a', '789B']})
|
||||
|
||||
assert provider._configuration_arg('abc') == ['123d']
|
||||
assert provider._configuration_arg('abc', default=['default']) == ['123d']
|
||||
assert provider._configuration_arg('ABC', default=['default']) == ['default']
|
||||
assert provider._configuration_arg('abc', casesense=True) == ['123D']
|
||||
assert provider._configuration_arg('xyz', casesense=False) == ['456a', '789b']
|
||||
|
||||
def test_require_class_end_with_suffix(self, ie, logger):
|
||||
class InvalidSuffix(PoTokenProvider):
|
||||
PROVIDER_NAME = 'invalid-suffix'
|
||||
|
||||
def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
|
||||
raise PoTokenProviderRejectedRequest('Not implemented')
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
provider = InvalidSuffix(ie=ie, logger=logger, settings={})
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
provider.PROVIDER_KEY # noqa: B018
|
||||
|
||||
|
||||
class TestPoTokenCacheProvider:
|
||||
|
||||
def test_base_type(self):
|
||||
assert issubclass(PoTokenCacheProvider, IEContentProvider)
|
||||
|
||||
def test_create_provider_missing_get_method(self, ie, logger):
|
||||
class MissingMethodsPCP(PoTokenCacheProvider):
|
||||
def store(self, key: str, value: str, expires_at: int):
|
||||
pass
|
||||
|
||||
def delete(self, key: str):
|
||||
pass
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
MissingMethodsPCP(ie=ie, logger=logger, settings={})
|
||||
|
||||
def test_create_provider_missing_store_method(self, ie, logger):
|
||||
class MissingMethodsPCP(PoTokenCacheProvider):
|
||||
def get(self, key: str):
|
||||
pass
|
||||
|
||||
def delete(self, key: str):
|
||||
pass
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
MissingMethodsPCP(ie=ie, logger=logger, settings={})
|
||||
|
||||
def test_create_provider_missing_delete_method(self, ie, logger):
|
||||
class MissingMethodsPCP(PoTokenCacheProvider):
|
||||
def get(self, key: str):
|
||||
pass
|
||||
|
||||
def store(self, key: str, value: str, expires_at: int):
|
||||
pass
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
MissingMethodsPCP(ie=ie, logger=logger, settings={})
|
||||
|
||||
def test_create_provider_missing_is_available_method(self, ie, logger):
|
||||
class MissingMethodsPCP(PoTokenCacheProvider):
|
||||
def get(self, key: str):
|
||||
pass
|
||||
|
||||
def store(self, key: str, value: str, expires_at: int):
|
||||
pass
|
||||
|
||||
def delete(self, key: str):
|
||||
pass
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
MissingMethodsPCP(ie=ie, logger=logger, settings={})
|
||||
|
||||
def test_barebones_provider(self, ie, logger):
|
||||
class BarebonesProviderPCP(PoTokenCacheProvider):
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
def get(self, key: str):
|
||||
return 'example-cache'
|
||||
|
||||
def store(self, key: str, value: str, expires_at: int):
|
||||
pass
|
||||
|
||||
def delete(self, key: str):
|
||||
pass
|
||||
|
||||
provider = BarebonesProviderPCP(ie=ie, logger=logger, settings={})
|
||||
assert provider.PROVIDER_NAME == 'BarebonesProvider'
|
||||
assert provider.PROVIDER_KEY == 'BarebonesProvider'
|
||||
assert provider.PROVIDER_VERSION == '0.0.0'
|
||||
assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at (developer has not provided a bug report location) .'
|
||||
|
||||
def test_create_provider_example(self, ie, logger):
|
||||
provider = ExampleCacheProviderPCP(ie=ie, logger=logger, settings={})
|
||||
assert provider.PROVIDER_NAME == 'example'
|
||||
assert provider.PROVIDER_KEY == 'ExampleCacheProvider'
|
||||
assert provider.PROVIDER_VERSION == '0.0.1'
|
||||
assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at https://example.com/issues .'
|
||||
assert provider.is_available()
|
||||
|
||||
def test_get_config_arg(self, ie, logger):
|
||||
provider = ExampleCacheProviderPCP(ie=ie, logger=logger, settings={'abc': ['123D'], 'xyz': ['456a', '789B']})
|
||||
assert provider._configuration_arg('abc') == ['123d']
|
||||
assert provider._configuration_arg('abc', default=['default']) == ['123d']
|
||||
assert provider._configuration_arg('ABC', default=['default']) == ['default']
|
||||
assert provider._configuration_arg('abc', casesense=True) == ['123D']
|
||||
assert provider._configuration_arg('xyz', casesense=False) == ['456a', '789b']
|
||||
|
||||
def test_require_class_end_with_suffix(self, ie, logger):
|
||||
class InvalidSuffix(PoTokenCacheProvider):
|
||||
def get(self, key: str):
|
||||
return 'example-cache'
|
||||
|
||||
def store(self, key: str, value: str, expires_at: int):
|
||||
pass
|
||||
|
||||
def delete(self, key: str):
|
||||
pass
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
provider = InvalidSuffix(ie=ie, logger=logger, settings={})
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
provider.PROVIDER_KEY # noqa: B018
|
||||
|
||||
|
||||
class TestPoTokenCacheSpecProvider:
|
||||
|
||||
def test_base_type(self):
|
||||
assert issubclass(PoTokenCacheSpecProvider, IEContentProvider)
|
||||
|
||||
def test_create_provider_missing_supports_method(self, ie, logger):
|
||||
class MissingMethodsPCS(PoTokenCacheSpecProvider):
|
||||
pass
|
||||
|
||||
with pytest.raises(TypeError):
|
||||
MissingMethodsPCS(ie=ie, logger=logger, settings={})
|
||||
|
||||
def test_create_provider_barebones(self, ie, pot_request, logger):
|
||||
class BarebonesProviderPCSP(PoTokenCacheSpecProvider):
|
||||
def generate_cache_spec(self, request: PoTokenRequest):
|
||||
return PoTokenCacheSpec(
|
||||
default_ttl=100,
|
||||
key_bindings={},
|
||||
)
|
||||
|
||||
provider = BarebonesProviderPCSP(ie=ie, logger=logger, settings={})
|
||||
assert provider.PROVIDER_NAME == 'BarebonesProvider'
|
||||
assert provider.PROVIDER_KEY == 'BarebonesProvider'
|
||||
assert provider.PROVIDER_VERSION == '0.0.0'
|
||||
assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at (developer has not provided a bug report location) .'
|
||||
assert provider.is_available()
|
||||
assert provider.generate_cache_spec(request=pot_request).default_ttl == 100
|
||||
assert provider.generate_cache_spec(request=pot_request).key_bindings == {}
|
||||
assert provider.generate_cache_spec(request=pot_request).write_policy == CacheProviderWritePolicy.WRITE_ALL
|
||||
|
||||
def test_create_provider_example(self, ie, pot_request, logger):
|
||||
provider = ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})
|
||||
assert provider.PROVIDER_NAME == 'example'
|
||||
assert provider.PROVIDER_KEY == 'ExampleCacheSpecProvider'
|
||||
assert provider.PROVIDER_VERSION == '0.0.1'
|
||||
assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at https://example.com/issues .'
|
||||
assert provider.is_available()
|
||||
assert provider.generate_cache_spec(pot_request)
|
||||
assert provider.generate_cache_spec(pot_request).key_bindings == {'field': 'example-key'}
|
||||
assert provider.generate_cache_spec(pot_request).default_ttl == 60
|
||||
assert provider.generate_cache_spec(pot_request).write_policy == CacheProviderWritePolicy.WRITE_FIRST
|
||||
|
||||
def test_get_config_arg(self, ie, logger):
|
||||
provider = ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={'abc': ['123D'], 'xyz': ['456a', '789B']})
|
||||
|
||||
assert provider._configuration_arg('abc') == ['123d']
|
||||
assert provider._configuration_arg('abc', default=['default']) == ['123d']
|
||||
assert provider._configuration_arg('ABC', default=['default']) == ['default']
|
||||
assert provider._configuration_arg('abc', casesense=True) == ['123D']
|
||||
assert provider._configuration_arg('xyz', casesense=False) == ['456a', '789b']
|
||||
|
||||
def test_require_class_end_with_suffix(self, ie, logger):
|
||||
class InvalidSuffix(PoTokenCacheSpecProvider):
|
||||
def generate_cache_spec(self, request: PoTokenRequest):
|
||||
return None
|
||||
|
||||
provider = InvalidSuffix(ie=ie, logger=logger, settings={})
|
||||
|
||||
with pytest.raises(AssertionError):
|
||||
provider.PROVIDER_KEY # noqa: B018
|
||||
|
||||
|
||||
class TestPoTokenRequest:
|
||||
def test_copy_request(self, pot_request):
|
||||
copied_request = pot_request.copy()
|
||||
|
||||
assert copied_request is not pot_request
|
||||
assert copied_request.context == pot_request.context
|
||||
assert copied_request.innertube_context == pot_request.innertube_context
|
||||
assert copied_request.innertube_context is not pot_request.innertube_context
|
||||
copied_request.innertube_context['client']['clientName'] = 'ANDROID'
|
||||
assert pot_request.innertube_context['client']['clientName'] != 'ANDROID'
|
||||
assert copied_request.innertube_host == pot_request.innertube_host
|
||||
assert copied_request.session_index == pot_request.session_index
|
||||
assert copied_request.player_url == pot_request.player_url
|
||||
assert copied_request.is_authenticated == pot_request.is_authenticated
|
||||
assert copied_request.visitor_data == pot_request.visitor_data
|
||||
assert copied_request.data_sync_id == pot_request.data_sync_id
|
||||
assert copied_request.video_id == pot_request.video_id
|
||||
assert copied_request.request_cookiejar is pot_request.request_cookiejar
|
||||
assert copied_request.request_proxy == pot_request.request_proxy
|
||||
assert copied_request.request_headers == pot_request.request_headers
|
||||
assert copied_request.request_headers is not pot_request.request_headers
|
||||
assert copied_request.request_timeout == pot_request.request_timeout
|
||||
assert copied_request.request_source_address == pot_request.request_source_address
|
||||
assert copied_request.request_verify_tls == pot_request.request_verify_tls
|
||||
assert copied_request.bypass_cache == pot_request.bypass_cache
|
||||
|
||||
|
||||
def test_provider_bug_report_message(ie, logger):
|
||||
provider = ExamplePTP(ie=ie, logger=logger, settings={})
|
||||
assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at https://example.com/issues .'
|
||||
|
||||
message = provider_bug_report_message(provider)
|
||||
assert message == '; please report this issue to the provider developer at https://example.com/issues .'
|
||||
|
||||
message_before = provider_bug_report_message(provider, before='custom message!')
|
||||
assert message_before == 'custom message! Please report this issue to the provider developer at https://example.com/issues .'
|
||||
|
||||
|
||||
def test_register_provider(ie):
|
||||
|
||||
@register_provider
|
||||
class UnavailableProviderPTP(PoTokenProvider):
|
||||
def is_available(self) -> bool:
|
||||
return False
|
||||
|
||||
def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
|
||||
raise PoTokenProviderRejectedRequest('Not implemented')
|
||||
|
||||
assert _pot_providers.value.get('UnavailableProvider') == UnavailableProviderPTP
|
||||
_pot_providers.value.pop('UnavailableProvider')
|
||||
|
||||
|
||||
def test_register_pot_preference(ie):
|
||||
before = len(_ptp_preferences.value)
|
||||
|
||||
@register_preference(ExamplePTP)
|
||||
def unavailable_preference(provider: PoTokenProvider, request: PoTokenRequest):
|
||||
return 1
|
||||
|
||||
assert len(_ptp_preferences.value) == before + 1
|
||||
|
||||
|
||||
def test_register_cache_provider(ie):
|
||||
|
||||
@cache.register_provider
|
||||
class UnavailableCacheProviderPCP(PoTokenCacheProvider):
|
||||
def is_available(self) -> bool:
|
||||
return False
|
||||
|
||||
def get(self, key: str):
|
||||
return 'example-cache'
|
||||
|
||||
def store(self, key: str, value: str, expires_at: int):
|
||||
pass
|
||||
|
||||
def delete(self, key: str):
|
||||
pass
|
||||
|
||||
assert _pot_cache_providers.value.get('UnavailableCacheProvider') == UnavailableCacheProviderPCP
|
||||
_pot_cache_providers.value.pop('UnavailableCacheProvider')
|
||||
|
||||
|
||||
def test_register_cache_provider_spec(ie):
|
||||
|
||||
@cache.register_spec
|
||||
class UnavailableCacheProviderPCSP(PoTokenCacheSpecProvider):
|
||||
def is_available(self) -> bool:
|
||||
return False
|
||||
|
||||
def generate_cache_spec(self, request: PoTokenRequest):
|
||||
return None
|
||||
|
||||
assert _pot_pcs_providers.value.get('UnavailableCacheProvider') == UnavailableCacheProviderPCSP
|
||||
_pot_pcs_providers.value.pop('UnavailableCacheProvider')
|
||||
|
||||
|
||||
def test_register_cache_provider_preference(ie):
|
||||
before = len(_pot_cache_provider_preferences.value)
|
||||
|
||||
@cache.register_preference(ExampleCacheProviderPCP)
|
||||
def unavailable_preference(provider: PoTokenCacheProvider, request: PoTokenRequest):
|
||||
return 1
|
||||
|
||||
assert len(_pot_cache_provider_preferences.value) == before + 1
|
||||
|
||||
|
||||
def test_logger_log_level(logger):
|
||||
assert logger.LogLevel('INFO') == logger.LogLevel.INFO
|
||||
assert logger.LogLevel('debuG') == logger.LogLevel.DEBUG
|
||||
assert logger.LogLevel(10) == logger.LogLevel.DEBUG
|
||||
assert logger.LogLevel('UNKNOWN') == logger.LogLevel.INFO
|
@ -316,6 +316,10 @@
|
||||
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
|
||||
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js',
|
||||
'D3XWVpYgwhLLKNK4AGX', 'aZrQ1qWJ5yv5h',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
|
@ -640,6 +640,7 @@ def __init__(self, params=None, auto_init=True):
|
||||
self._printed_messages = set()
|
||||
self._first_webpage_request = True
|
||||
self._post_hooks = []
|
||||
self._close_hooks = []
|
||||
self._progress_hooks = []
|
||||
self._postprocessor_hooks = []
|
||||
self._download_retcode = 0
|
||||
@ -908,6 +909,11 @@ def add_post_hook(self, ph):
|
||||
"""Add the post hook"""
|
||||
self._post_hooks.append(ph)
|
||||
|
||||
def add_close_hook(self, ch):
|
||||
"""Add a close hook, called when YoutubeDL.close() is called"""
|
||||
assert callable(ch), 'Close hook must be callable'
|
||||
self._close_hooks.append(ch)
|
||||
|
||||
def add_progress_hook(self, ph):
|
||||
"""Add the download progress hook"""
|
||||
self._progress_hooks.append(ph)
|
||||
@ -1016,6 +1022,9 @@ def close(self):
|
||||
self._request_director.close()
|
||||
del self._request_director
|
||||
|
||||
for close_hook in self._close_hooks:
|
||||
close_hook()
|
||||
|
||||
def trouble(self, message=None, tb=None, is_error=True):
|
||||
"""Determine action to take when a download problem appears.
|
||||
|
||||
|
@ -764,11 +764,11 @@ def _get_linux_desktop_environment(env, logger):
|
||||
GetDesktopEnvironment
|
||||
"""
|
||||
xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
|
||||
desktop_session = env.get('DESKTOP_SESSION', None)
|
||||
desktop_session = env.get('DESKTOP_SESSION', '')
|
||||
if xdg_current_desktop is not None:
|
||||
for part in map(str.strip, xdg_current_desktop.split(':')):
|
||||
if part == 'Unity':
|
||||
if desktop_session is not None and 'gnome-fallback' in desktop_session:
|
||||
if 'gnome-fallback' in desktop_session:
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.UNITY
|
||||
@ -797,35 +797,34 @@ def _get_linux_desktop_environment(env, logger):
|
||||
return _LinuxDesktopEnvironment.UKUI
|
||||
elif part == 'LXQt':
|
||||
return _LinuxDesktopEnvironment.LXQT
|
||||
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
|
||||
logger.debug(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
|
||||
|
||||
elif desktop_session is not None:
|
||||
if desktop_session == 'deepin':
|
||||
return _LinuxDesktopEnvironment.DEEPIN
|
||||
elif desktop_session in ('mate', 'gnome'):
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif desktop_session in ('kde4', 'kde-plasma'):
|
||||
if desktop_session == 'deepin':
|
||||
return _LinuxDesktopEnvironment.DEEPIN
|
||||
elif desktop_session in ('mate', 'gnome'):
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif desktop_session in ('kde4', 'kde-plasma'):
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
elif desktop_session == 'kde':
|
||||
if 'KDE_SESSION_VERSION' in env:
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
elif desktop_session == 'kde':
|
||||
if 'KDE_SESSION_VERSION' in env:
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.KDE3
|
||||
elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
|
||||
return _LinuxDesktopEnvironment.XFCE
|
||||
elif desktop_session == 'ukui':
|
||||
return _LinuxDesktopEnvironment.UKUI
|
||||
else:
|
||||
logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
|
||||
|
||||
return _LinuxDesktopEnvironment.KDE3
|
||||
elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
|
||||
return _LinuxDesktopEnvironment.XFCE
|
||||
elif desktop_session == 'ukui':
|
||||
return _LinuxDesktopEnvironment.UKUI
|
||||
else:
|
||||
if 'GNOME_DESKTOP_SESSION_ID' in env:
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif 'KDE_FULL_SESSION' in env:
|
||||
if 'KDE_SESSION_VERSION' in env:
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.KDE3
|
||||
logger.debug(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
|
||||
|
||||
if 'GNOME_DESKTOP_SESSION_ID' in env:
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif 'KDE_FULL_SESSION' in env:
|
||||
if 'KDE_SESSION_VERSION' in env:
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.KDE3
|
||||
|
||||
return _LinuxDesktopEnvironment.OTHER
|
||||
|
||||
|
||||
|
@ -30,7 +30,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
|
||||
from .http import HttpFD
|
||||
from .ism import IsmFD
|
||||
from .mhtml import MhtmlFD
|
||||
from .niconico import NiconicoDmcFD, NiconicoLiveFD
|
||||
from .niconico import NiconicoLiveFD
|
||||
from .rtmp import RtmpFD
|
||||
from .rtsp import RtspFD
|
||||
from .websocket import WebSocketFragmentFD
|
||||
@ -50,7 +50,6 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
|
||||
'http_dash_segments_generator': DashSegmentsFD,
|
||||
'ism': IsmFD,
|
||||
'mhtml': MhtmlFD,
|
||||
'niconico_dmc': NiconicoDmcFD,
|
||||
'niconico_live': NiconicoLiveFD,
|
||||
'fc2_live': FC2LiveFD,
|
||||
'websocket_frag': WebSocketFragmentFD,
|
||||
@ -67,7 +66,6 @@ def shorten_protocol_name(proto, simplify=False):
|
||||
'rtmp_ffmpeg': 'rtmpF',
|
||||
'http_dash_segments': 'dash',
|
||||
'http_dash_segments_generator': 'dashG',
|
||||
'niconico_dmc': 'dmc',
|
||||
'websocket_frag': 'WSfrag',
|
||||
}
|
||||
if simplify:
|
||||
|
@ -2,60 +2,12 @@
|
||||
import threading
|
||||
import time
|
||||
|
||||
from . import get_suitable_downloader
|
||||
from .common import FileDownloader
|
||||
from .external import FFmpegFD
|
||||
from ..networking import Request
|
||||
from ..utils import DownloadError, str_or_none, try_get
|
||||
|
||||
|
||||
class NiconicoDmcFD(FileDownloader):
|
||||
""" Downloading niconico douga from DMC with heartbeat """
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
from ..extractor.niconico import NiconicoIE
|
||||
|
||||
self.to_screen(f'[{self.FD_NAME}] Downloading from DMC')
|
||||
ie = NiconicoIE(self.ydl)
|
||||
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
|
||||
|
||||
fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params)
|
||||
|
||||
success = download_complete = False
|
||||
timer = [None]
|
||||
heartbeat_lock = threading.Lock()
|
||||
heartbeat_url = heartbeat_info_dict['url']
|
||||
heartbeat_data = heartbeat_info_dict['data'].encode()
|
||||
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
|
||||
|
||||
request = Request(heartbeat_url, heartbeat_data)
|
||||
|
||||
def heartbeat():
|
||||
try:
|
||||
self.ydl.urlopen(request).read()
|
||||
except Exception:
|
||||
self.to_screen(f'[{self.FD_NAME}] Heartbeat failed')
|
||||
|
||||
with heartbeat_lock:
|
||||
if not download_complete:
|
||||
timer[0] = threading.Timer(heartbeat_interval, heartbeat)
|
||||
timer[0].start()
|
||||
|
||||
heartbeat_info_dict['ping']()
|
||||
self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
|
||||
try:
|
||||
heartbeat()
|
||||
if type(fd).__name__ == 'HlsFD':
|
||||
info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
|
||||
success = fd.real_download(filename, info_dict)
|
||||
finally:
|
||||
if heartbeat_lock:
|
||||
with heartbeat_lock:
|
||||
timer[0].cancel()
|
||||
download_complete = True
|
||||
return success
|
||||
|
||||
|
||||
class NiconicoLiveFD(FileDownloader):
|
||||
""" Downloads niconico live without being stopped """
|
||||
|
||||
|
@ -338,7 +338,6 @@
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalsurmas import CanalsurmasIE
|
||||
from .caracoltv import CaracolTvPlayIE
|
||||
from .cartoonnetwork import CartoonNetworkIE
|
||||
from .cbc import (
|
||||
CBCIE,
|
||||
CBCGemIE,
|
||||
@ -929,7 +928,10 @@
|
||||
)
|
||||
from .jiosaavn import (
|
||||
JioSaavnAlbumIE,
|
||||
JioSaavnArtistIE,
|
||||
JioSaavnPlaylistIE,
|
||||
JioSaavnShowIE,
|
||||
JioSaavnShowPlaylistIE,
|
||||
JioSaavnSongIE,
|
||||
)
|
||||
from .joj import JojIE
|
||||
@ -1042,6 +1044,7 @@
|
||||
LimelightMediaIE,
|
||||
)
|
||||
from .linkedin import (
|
||||
LinkedInEventsIE,
|
||||
LinkedInIE,
|
||||
LinkedInLearningCourseIE,
|
||||
LinkedInLearningIE,
|
||||
@ -1783,7 +1786,6 @@
|
||||
from .rtve import (
|
||||
RTVEALaCartaIE,
|
||||
RTVEAudioIE,
|
||||
RTVEInfantilIE,
|
||||
RTVELiveIE,
|
||||
RTVETelevisionIE,
|
||||
)
|
||||
@ -1964,7 +1966,6 @@
|
||||
SpreakerShowIE,
|
||||
)
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
from .sproutvideo import (
|
||||
SproutVideoIE,
|
||||
VidsIoIE,
|
||||
@ -2146,6 +2147,7 @@
|
||||
from .toggo import ToggoIE
|
||||
from .tonline import TOnlineIE
|
||||
from .toongoggles import ToonGogglesIE
|
||||
from .toutiao import ToutiaoIE
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import (
|
||||
ToypicsIE,
|
||||
@ -2237,7 +2239,10 @@
|
||||
TVPlayIE,
|
||||
)
|
||||
from .tvplayer import TVPlayerIE
|
||||
from .tvw import TvwIE
|
||||
from .tvw import (
|
||||
TvwIE,
|
||||
TvwTvChannelsIE,
|
||||
)
|
||||
from .tweakers import TweakersIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .twentythreevideo import TwentyThreeVideoIE
|
||||
@ -2365,6 +2370,7 @@
|
||||
VHXEmbedIE,
|
||||
VimeoAlbumIE,
|
||||
VimeoChannelIE,
|
||||
VimeoEventIE,
|
||||
VimeoGroupsIE,
|
||||
VimeoIE,
|
||||
VimeoLikesIE,
|
||||
|
@ -21,6 +21,7 @@
|
||||
int_or_none,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
@ -417,6 +418,10 @@ def _real_extract(self, url):
|
||||
'is_live': is_live,
|
||||
'availability': availability,
|
||||
})
|
||||
|
||||
if thumbnail := update_url(self._og_search_thumbnail(webpage, default=''), query=None):
|
||||
info['thumbnails'] = [{'url': thumbnail}]
|
||||
|
||||
return info
|
||||
|
||||
|
||||
|
@ -1,32 +1,24 @@
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
|
||||
class AMCNetworksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/?#]+)+)/[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
|
||||
'url': 'https://www.amc.com/shows/dark-winds/videos/dark-winds-a-look-at-season-3--1072027',
|
||||
'info_dict': {
|
||||
'id': '4Lq1dzOnZGt0',
|
||||
'id': '6369261343112',
|
||||
'ext': 'mp4',
|
||||
'title': "The Graham Norton Show - Season 28 - Tina Fey's Adorable Airline-Themed Family Dinner",
|
||||
'description': "It turns out child stewardesses are very generous with the wine! All-new episodes of 'The Graham Norton Show' premiere Fridays at 11/10c on BBC America.",
|
||||
'upload_date': '20201120',
|
||||
'timestamp': 1605904350,
|
||||
'uploader': 'AMCN',
|
||||
'title': 'Dark Winds: A Look at Season 3',
|
||||
'uploader_id': '6240731308001',
|
||||
'duration': 176.427,
|
||||
'thumbnail': r're:https://[^/]+\.boltdns\.net/.+/image\.jpg',
|
||||
'tags': [],
|
||||
'timestamp': 1740414792,
|
||||
'upload_date': '20250224',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||
'only_matching': True,
|
||||
@ -52,96 +44,18 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_REQUESTOR_ID_MAP = {
|
||||
'amc': 'AMC',
|
||||
'bbcamerica': 'BBCA',
|
||||
'ifc': 'IFC',
|
||||
'sundancetv': 'SUNDANCE',
|
||||
'wetv': 'WETV',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, display_id = self._match_valid_url(url).groups()
|
||||
requestor_id = self._REQUESTOR_ID_MAP[site]
|
||||
page_data = self._download_json(
|
||||
f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}',
|
||||
display_id)['data']
|
||||
properties = page_data.get('properties') or {}
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
initial_data = self._search_json(
|
||||
r'window\.initialData\s*=\s*JSON\.parse\(String\.raw`', webpage, 'initial data', display_id)
|
||||
video_id = traverse_obj(initial_data, ('initialData', 'properties', 'videoId', {str}))
|
||||
if not video_id: # All locked videos are now DRM-protected
|
||||
self.report_drm(display_id)
|
||||
account_id = initial_data['config']['brightcove']['accountId']
|
||||
player_id = initial_data['config']['brightcove']['playerId']
|
||||
|
||||
video_player_count = 0
|
||||
try:
|
||||
for v in page_data['children']:
|
||||
if v.get('type') == 'video-player':
|
||||
release_pid = v['properties']['currentVideo']['meta']['releasePid']
|
||||
tp_path = 'M_UwQC/' + release_pid
|
||||
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
video_player_count += 1
|
||||
except KeyError:
|
||||
pass
|
||||
if video_player_count > 1:
|
||||
self.report_warning(
|
||||
f'The JSON data has {video_player_count} video players. Only one will be extracted')
|
||||
|
||||
# Fall back to videoPid if releasePid not found.
|
||||
# TODO: Fall back to videoPid if releasePid manifest uses DRM.
|
||||
if not video_player_count:
|
||||
tp_path = 'M_UwQC/media/' + properties['videoPid']
|
||||
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
|
||||
theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
video_id = theplatform_metadata['pid']
|
||||
title = theplatform_metadata['title']
|
||||
rating = try_get(
|
||||
theplatform_metadata, lambda x: x['ratings'][0]['rating'])
|
||||
video_category = properties.get('videoCategory')
|
||||
if video_category and video_category.endswith('-Auth'):
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, title, video_id, rating)
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
media_url = update_url_query(media_url, query)
|
||||
formats, subtitles = self._extract_theplatform_smil(
|
||||
media_url, video_id)
|
||||
|
||||
thumbnails = []
|
||||
thumbnail_urls = [properties.get('imageDesktop')]
|
||||
if 'thumbnail' in info:
|
||||
thumbnail_urls.append(info.pop('thumbnail'))
|
||||
for thumbnail_url in thumbnail_urls:
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
mobj = re.search(r'(\d+)x(\d+)', thumbnail_url)
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': int(mobj.group(1)) if mobj else None,
|
||||
'height': int(mobj.group(2)) if mobj else None,
|
||||
})
|
||||
|
||||
info.update({
|
||||
'age_limit': parse_age_limit(rating),
|
||||
'formats': formats,
|
||||
'id': video_id,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
})
|
||||
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
|
||||
if ns_keys:
|
||||
ns = next(iter(ns_keys))
|
||||
episode = theplatform_metadata.get(ns + '$episodeTitle') or None
|
||||
episode_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$episode'))
|
||||
season_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$season'))
|
||||
series = theplatform_metadata.get(ns + '$show') or None
|
||||
info.update({
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'season_number': season_number,
|
||||
'series': series,
|
||||
})
|
||||
return info
|
||||
return self.url_result(
|
||||
f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}',
|
||||
BrightcoveNewIE, video_id)
|
||||
|
@ -1,64 +1,105 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AtresPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
|
||||
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/(?:[^/?#]+/){4}(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
|
||||
_NETRC_MACHINE = 'atresplayer'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
|
||||
'info_dict': {
|
||||
'id': '5d4aa2c57ed1a88fc715a615',
|
||||
'ext': 'mp4',
|
||||
'title': 'Capítulo 7: Asuntos pendientes',
|
||||
'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
|
||||
'duration': 3413,
|
||||
},
|
||||
'skip': 'This video is only available for registered users',
|
||||
_TESTS = [{
|
||||
'url': 'https://www.atresplayer.com/lasexta/programas/el-objetivo/clips/mbappe-describe-como-entrenador-a-carlo-ancelotti-sabe-cuando-tiene-que-ser-padre-jefe-amigo-entrenador_67f2dfb2fb6ab0e4c7203849/',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '67f2dfb2fb6ab0e4c7203849',
|
||||
'display_id': 'md5:c203f8d4e425ed115ba56a1c6e4b3e6c',
|
||||
'title': 'Mbappé describe como entrenador a Carlo Ancelotti: "Sabe cuándo tiene que ser padre, jefe, amigo, entrenador..."',
|
||||
'channel': 'laSexta',
|
||||
'duration': 31,
|
||||
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/06/B02DBE1E-D59B-4683-8404-1A9595D15269/1920x1080.jpg',
|
||||
'tags': ['Entrevista informativa', 'Actualidad', 'Debate informativo', 'Política', 'Economía', 'Sociedad', 'Cara a cara', 'Análisis', 'Más periodismo'],
|
||||
'series': 'El Objetivo',
|
||||
'season': 'Temporada 12',
|
||||
'timestamp': 1743970079,
|
||||
'upload_date': '20250406',
|
||||
},
|
||||
{
|
||||
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.atresplayer.com/antena3/programas/el-hormiguero/clips/revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero_67f836baa4a5b0e4147ca59a/',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '67f836baa4a5b0e4147ca59a',
|
||||
'display_id': 'revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero',
|
||||
'title': 'Revive la entrevista completa a Miguel Bosé en El Hormiguero',
|
||||
'description': 'md5:c6d2b591408d45a7bc2986dfb938eb72',
|
||||
'channel': 'Antena 3',
|
||||
'duration': 2556,
|
||||
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/10/9076395F-F1FD-48BE-9F18-540DBA10EBAD/1920x1080.jpg',
|
||||
'tags': ['Entrevista', 'Variedades', 'Humor', 'Entretenimiento', 'Te sigo', 'Buen rollo', 'Cara a cara'],
|
||||
'series': 'El Hormiguero ',
|
||||
'season': 'Temporada 14',
|
||||
'timestamp': 1744320111,
|
||||
'upload_date': '20250410',
|
||||
},
|
||||
{
|
||||
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.atresplayer.com/flooxer/series/biara-proyecto-lazarus/temporada-1/capitulo-3-supervivientes_67a6038b64ceca00070f4f69/',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '67a6038b64ceca00070f4f69',
|
||||
'display_id': 'capitulo-3-supervivientes',
|
||||
'title': 'Capítulo 3: Supervivientes',
|
||||
'description': 'md5:65b231f20302f776c2b0dd24594599a1',
|
||||
'channel': 'Flooxer',
|
||||
'duration': 1196,
|
||||
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages01/2025/02/14/17CF90D3-FE67-40C5-A941-7825B3E13992/1920x1080.jpg',
|
||||
'tags': ['Juvenil', 'Terror', 'Piel de gallina', 'Te sigo', 'Un break', 'Del tirón'],
|
||||
'series': 'BIARA: Proyecto Lázarus',
|
||||
'season': 'Temporada 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 3',
|
||||
'episode_number': 3,
|
||||
'timestamp': 1743095191,
|
||||
'upload_date': '20250327',
|
||||
},
|
||||
]
|
||||
}, {
|
||||
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_BASE = 'https://api.atresplayer.com/'
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self._request_webpage(
|
||||
self._API_BASE + 'login', None, 'Downloading login page')
|
||||
|
||||
try:
|
||||
target_url = self._download_json(
|
||||
'https://account.atresmedia.com/api/login', None,
|
||||
'Logging in', headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}, data=urlencode_postdata({
|
||||
self._download_webpage(
|
||||
'https://account.atresplayer.com/auth/v1/login', None,
|
||||
'Logging in', 'Failed to log in', data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': password,
|
||||
}))['targetUrl']
|
||||
}))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
|
||||
raise ExtractorError('Invalid username and/or password', expected=True)
|
||||
raise
|
||||
|
||||
self._request_webpage(target_url, None, 'Following Target URL')
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
metadata_url = self._download_json(
|
||||
self._API_BASE + 'client/v1/url', video_id, 'Downloading API endpoint data',
|
||||
query={'href': urllib.parse.urlparse(url).path})['href']
|
||||
metadata = self._download_json(metadata_url, video_id)
|
||||
|
||||
try:
|
||||
episode = self._download_json(
|
||||
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
|
||||
video_data = self._download_json(metadata['urlVideo'], video_id, 'Downloading video data')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
error = self._parse_json(e.cause.response.read(), None)
|
||||
@ -67,37 +108,45 @@ def _real_extract(self, url):
|
||||
raise ExtractorError(error['error_description'], expected=True)
|
||||
raise
|
||||
|
||||
title = episode['titulo']
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for source in episode.get('sources', []):
|
||||
src = source.get('src')
|
||||
if not src:
|
||||
continue
|
||||
for source in traverse_obj(video_data, ('sources', lambda _, v: url_or_none(v['src']))):
|
||||
src_url = source['src']
|
||||
src_type = source.get('type')
|
||||
if src_type == 'application/vnd.apple.mpegurl':
|
||||
formats, subtitles = self._extract_m3u8_formats(
|
||||
src, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
elif src_type == 'application/dash+xml':
|
||||
formats, subtitles = self._extract_mpd_formats(
|
||||
src, video_id, mpd_id='dash', fatal=False)
|
||||
|
||||
heartbeat = episode.get('heartbeat') or {}
|
||||
omniture = episode.get('omniture') or {}
|
||||
get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
|
||||
if src_type in ('application/vnd.apple.mpegurl', 'application/hls+legacy', 'application/hls+hevc'):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
src_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
elif src_type in ('application/dash+xml', 'application/dash+hevc'):
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
src_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'display_id': display_id,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': episode.get('descripcion'),
|
||||
'thumbnail': episode.get('imgPoster'),
|
||||
'duration': int_or_none(episode.get('duration')),
|
||||
'formats': formats,
|
||||
'channel': get_meta('channel'),
|
||||
'season': get_meta('season'),
|
||||
'episode_number': int_or_none(get_meta('episodeNumber')),
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('titulo', {str}),
|
||||
'description': ('descripcion', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('imgPoster', {url_or_none}, {lambda v: f'{v}1920x1080.jpg'}),
|
||||
'age_limit': ('ageRating', {parse_age_limit}),
|
||||
}),
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'tags': ('tags', ..., 'title', {str}),
|
||||
'age_limit': ('ageRating', {parse_age_limit}),
|
||||
'series': ('format', 'title', {str}),
|
||||
'season': ('currentSeason', 'title', {str}),
|
||||
'season_number': ('currentSeason', 'seasonNumber', {int_or_none}),
|
||||
'episode_number': ('numberOfEpisode', {int_or_none}),
|
||||
'timestamp': ('publicationDate', {int_or_none(scale=1000)}),
|
||||
'channel': ('channel', 'title', {str}),
|
||||
}),
|
||||
}
|
||||
|
@ -1,30 +1,32 @@
|
||||
import functools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
determine_ext,
|
||||
format_field,
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
get_element_html_by_class,
|
||||
get_elements_html_by_class,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BitChuteIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/?#]+)/(?P<id>[^/?#&]+)'
|
||||
_EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
|
||||
@ -34,12 +36,17 @@ class BitChuteIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'This is the first video on #BitChute !',
|
||||
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg$',
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20170103',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||
'channel': 'BitChute',
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
'uploader_id': 'I5NgtHZn9vPj',
|
||||
'channel_id': '1VBwRfyNcKdX',
|
||||
'view_count': int,
|
||||
'duration': 16.0,
|
||||
'timestamp': 1483425443,
|
||||
},
|
||||
}, {
|
||||
# test case: video with different channel and uploader
|
||||
@ -49,13 +56,18 @@ class BitChuteIE(InfoExtractor):
|
||||
'id': 'Yti_j9A-UZ4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Israel at War | Full Measure',
|
||||
'description': 'md5:38cf7bc6f42da1a877835539111c69ef',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:e60198b89971966d6030d22b3268f08f',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg$',
|
||||
'uploader': 'sharylattkisson',
|
||||
'upload_date': '20231106',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
|
||||
'channel': 'Full Measure with Sharyl Attkisson',
|
||||
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/',
|
||||
'uploader_id': '9K0kUWA9zmd9',
|
||||
'channel_id': 'NpdxoCRv3ZLb',
|
||||
'view_count': int,
|
||||
'duration': 554.0,
|
||||
'timestamp': 1699296106,
|
||||
},
|
||||
}, {
|
||||
# video not downloadable in browser, but we can recover it
|
||||
@ -66,25 +78,21 @@ class BitChuteIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'filesize': 71537926,
|
||||
'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control',
|
||||
'description': 'md5:228ee93bd840a24938f536aeac9cf749',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:2029c7c212ccd4b040f52bb2d036ef4e',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg$',
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20181113',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||
'channel': 'BitChute',
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
'uploader_id': 'I5NgtHZn9vPj',
|
||||
'channel_id': '1VBwRfyNcKdX',
|
||||
'view_count': int,
|
||||
'duration': 1701.0,
|
||||
'tags': ['bitchute'],
|
||||
'timestamp': 1542130287,
|
||||
},
|
||||
'params': {'check_formats': None},
|
||||
}, {
|
||||
# restricted video
|
||||
'url': 'https://www.bitchute.com/video/WEnQU7XGcTdl/',
|
||||
'info_dict': {
|
||||
'id': 'WEnQU7XGcTdl',
|
||||
'ext': 'mp4',
|
||||
'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Georestricted in DE',
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
|
||||
'only_matching': True,
|
||||
@ -96,11 +104,8 @@ class BitChuteIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
||||
'Referer': 'https://www.bitchute.com/',
|
||||
}
|
||||
_UPLOADER_URL_TMPL = 'https://www.bitchute.com/profile/%s/'
|
||||
_CHANNEL_URL_TMPL = 'https://www.bitchute.com/channel/%s/'
|
||||
|
||||
def _check_format(self, video_url, video_id):
|
||||
urls = orderedSet(
|
||||
@ -112,7 +117,7 @@ def _check_format(self, video_url, video_id):
|
||||
for url in urls:
|
||||
try:
|
||||
response = self._request_webpage(
|
||||
HEADRequest(url), video_id=video_id, note=f'Checking {url}', headers=self._HEADERS)
|
||||
HEADRequest(url), video_id=video_id, note=f'Checking {url}')
|
||||
except ExtractorError as e:
|
||||
self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}')
|
||||
continue
|
||||
@ -121,54 +126,79 @@ def _check_format(self, video_url, video_id):
|
||||
'filesize': int_or_none(response.headers.get('Content-Length')),
|
||||
}
|
||||
|
||||
def _raise_if_restricted(self, webpage):
|
||||
page_title = clean_html(get_element_by_class('page-title', webpage)) or ''
|
||||
if re.fullmatch(r'(?:Channel|Video) Restricted', page_title):
|
||||
reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title
|
||||
self.raise_geo_restricted(reason)
|
||||
|
||||
@staticmethod
|
||||
def _make_url(html):
|
||||
path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href')
|
||||
return urljoin('https://www.bitchute.com', path)
|
||||
def _call_api(self, endpoint, data, display_id, fatal=True):
|
||||
note = endpoint.rpartition('/')[2]
|
||||
try:
|
||||
return self._download_json(
|
||||
f'https://api.bitchute.com/api/beta/{endpoint}', display_id,
|
||||
f'Downloading {note} API JSON', f'Unable to download {note} API JSON',
|
||||
data=json.dumps(data).encode(),
|
||||
headers={
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
errors = '. '.join(traverse_obj(e.cause.response.read().decode(), (
|
||||
{json.loads}, 'errors', lambda _, v: v['context'] == 'reason', 'message', {str})))
|
||||
if errors and 'location' in errors:
|
||||
# Can always be fatal since the video/media call will reach this code first
|
||||
self.raise_geo_restricted(errors)
|
||||
if fatal:
|
||||
raise
|
||||
self.report_warning(e.msg)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
|
||||
|
||||
self._raise_if_restricted(webpage)
|
||||
publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
data = {'video_id': video_id}
|
||||
media_url = self._call_api('video/media', data, video_id)['media_url']
|
||||
|
||||
formats = []
|
||||
for format_ in traverse_obj(entries, (0, 'formats', ...)):
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls', live=True))
|
||||
else:
|
||||
if self.get_param('check_formats') is not False:
|
||||
format_.update(self._check_format(format_.pop('url'), video_id) or {})
|
||||
if 'url' not in format_:
|
||||
continue
|
||||
formats.append(format_)
|
||||
if fmt := self._check_format(media_url, video_id):
|
||||
formats.append(fmt)
|
||||
else:
|
||||
formats.append({'url': media_url})
|
||||
|
||||
if not formats:
|
||||
self.raise_no_formats(
|
||||
'Video is unavailable. Please make sure this video is playable in the browser '
|
||||
'before reporting this issue.', expected=True, video_id=video_id)
|
||||
|
||||
details = get_element_by_class('details', webpage) or ''
|
||||
uploader_html = get_element_html_by_class('creator', details) or ''
|
||||
channel_html = get_element_html_by_class('name', details) or ''
|
||||
video = self._call_api('video', data, video_id, fatal=False)
|
||||
channel = None
|
||||
if channel_id := traverse_obj(video, ('channel', 'channel_id', {str})):
|
||||
channel = self._call_api('channel', {'channel_id': channel_id}, video_id, fatal=False)
|
||||
|
||||
return {
|
||||
**traverse_obj(video, {
|
||||
'title': ('video_name', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
'channel': ('channel', 'channel_name', {str}),
|
||||
'channel_id': ('channel', 'channel_id', {str}),
|
||||
'channel_url': ('channel', 'channel_url', {urljoin('https://www.bitchute.com/')}),
|
||||
'uploader_id': ('profile_id', {str}),
|
||||
'uploader_url': ('profile_id', {format_field(template=self._UPLOADER_URL_TMPL)}, filter),
|
||||
'timestamp': ('date_published', {parse_iso8601}),
|
||||
'duration': ('duration', {parse_duration}),
|
||||
'tags': ('hashtags', ..., {str}, filter, all, filter),
|
||||
'view_count': ('view_count', {int_or_none}),
|
||||
'is_live': ('state_id', {lambda x: x == 'live'}),
|
||||
}),
|
||||
**traverse_obj(channel, {
|
||||
'channel': ('channel_name', {str}),
|
||||
'channel_id': ('channel_id', {str}),
|
||||
'channel_url': ('url_slug', {format_field(template=self._CHANNEL_URL_TMPL)}, filter),
|
||||
'uploader': ('profile_name', {str}),
|
||||
'uploader_id': ('profile_id', {str}),
|
||||
'uploader_url': ('profile_id', {format_field(template=self._UPLOADER_URL_TMPL)}, filter),
|
||||
}),
|
||||
'id': video_id,
|
||||
'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader': clean_html(uploader_html),
|
||||
'uploader_url': self._make_url(uploader_html),
|
||||
'channel': clean_html(channel_html),
|
||||
'channel_url': self._make_url(channel_html),
|
||||
'upload_date': unified_strdate(self._search_regex(
|
||||
r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@ -190,7 +220,7 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'This is the first video on #BitChute !',
|
||||
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg$',
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20170103',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||
@ -198,6 +228,9 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
'duration': 16,
|
||||
'view_count': int,
|
||||
'uploader_id': 'I5NgtHZn9vPj',
|
||||
'channel_id': '1VBwRfyNcKdX',
|
||||
'timestamp': 1483425443,
|
||||
},
|
||||
},
|
||||
],
|
||||
@ -213,6 +246,7 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'title': 'Bruce MacDonald and "The Light of Darkness"',
|
||||
'description': 'md5:747724ef404eebdfc04277714f81863e',
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/',
|
||||
'only_matching': True,
|
||||
|
@ -7,6 +7,7 @@
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
parse_resolution,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
@ -110,24 +111,23 @@ def _parse_vue_attributes(self, name, string, video_id):
|
||||
|
||||
return attributes
|
||||
|
||||
@staticmethod
|
||||
def _process_source(source):
|
||||
def _process_source(self, source):
|
||||
url = url_or_none(source['src'])
|
||||
if not url:
|
||||
return None
|
||||
|
||||
source_type = source.get('type', '')
|
||||
extension = mimetype2ext(source_type)
|
||||
is_video = source_type.startswith('video')
|
||||
note = url.rpartition('.')[0].rpartition('_')[2] if is_video else None
|
||||
note = self._search_regex(r'[_-]([a-z]+)\.[\da-z]+(?:$|\?)', url, 'note', default=None)
|
||||
|
||||
return {
|
||||
'url': url,
|
||||
'ext': extension,
|
||||
'vcodec': None if is_video else 'none',
|
||||
'vcodec': None if source_type.startswith('video') else 'none',
|
||||
'quality': 10 if note == 'high' else 0,
|
||||
'format_note': note,
|
||||
'format_id': join_nonempty(extension, note),
|
||||
**parse_resolution(source.get('label')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -1,59 +0,0 @@
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CartoonNetworkIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
|
||||
_TEST = {
|
||||
'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
|
||||
'info_dict': {
|
||||
'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Draw Upgrade',
|
||||
'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
|
||||
metadata_re = ''
|
||||
if content_re:
|
||||
metadata_re = r'|video_metadata\.content_' + content_re
|
||||
return self._search_regex(
|
||||
rf'(?:_cnglobal\.currentVideo\.{global_re}{metadata_re})\s*=\s*"({value_re})";',
|
||||
webpage, name, fatal=fatal)
|
||||
|
||||
media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
|
||||
title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
|
||||
|
||||
info = self._extract_ngtv_info(
|
||||
media_id, {'networkId': 'cartoonnetwork'}, {
|
||||
'url': url,
|
||||
'site_name': 'CartoonNetwork',
|
||||
'auth_required': find_field('authType', 'auth type') != 'unauth',
|
||||
})
|
||||
|
||||
series = find_field(
|
||||
'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
|
||||
info.update({
|
||||
'id': media_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta('description', webpage),
|
||||
'series': series,
|
||||
'episode': title,
|
||||
})
|
||||
|
||||
for field in ('season', 'episode'):
|
||||
field_name = field + 'Number'
|
||||
info[field + '_number'] = int_or_none(find_field(
|
||||
field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
|
||||
|
||||
return info
|
@ -13,16 +13,17 @@
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
multipart_encode,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CDAIE(InfoExtractor):
|
||||
@ -290,34 +291,47 @@ def extract_format(page, version):
|
||||
if not video or 'file' not in video:
|
||||
self.report_warning(f'Unable to extract {version} version information')
|
||||
return
|
||||
if video['file'].startswith('uggc'):
|
||||
video['file'] = codecs.decode(video['file'], 'rot_13')
|
||||
if video['file'].endswith('adc.mp4'):
|
||||
video['file'] = video['file'].replace('adc.mp4', '.mp4')
|
||||
elif not video['file'].startswith('http'):
|
||||
video['file'] = decrypt_file(video['file'])
|
||||
video_quality = video.get('quality')
|
||||
qualities = video.get('qualities', {})
|
||||
video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality)
|
||||
info_dict['formats'].append({
|
||||
'url': video['file'],
|
||||
'format_id': video_quality,
|
||||
'height': int_or_none(video_quality[:-1]),
|
||||
})
|
||||
if video.get('file'):
|
||||
if video['file'].startswith('uggc'):
|
||||
video['file'] = codecs.decode(video['file'], 'rot_13')
|
||||
if video['file'].endswith('adc.mp4'):
|
||||
video['file'] = video['file'].replace('adc.mp4', '.mp4')
|
||||
elif not video['file'].startswith('http'):
|
||||
video['file'] = decrypt_file(video['file'])
|
||||
info_dict['formats'].append({
|
||||
'url': video['file'],
|
||||
'format_id': video_quality,
|
||||
'height': int_or_none(video_quality[:-1]),
|
||||
})
|
||||
for quality, cda_quality in qualities.items():
|
||||
if quality == video_quality:
|
||||
continue
|
||||
data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2,
|
||||
'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]}
|
||||
data = json.dumps(data).encode()
|
||||
video_url = self._download_json(
|
||||
response = self._download_json(
|
||||
f'https://www.cda.pl/video/{video_id}', video_id, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, data=data, note=f'Fetching {quality} url',
|
||||
errnote=f'Failed to fetch {quality} url', fatal=False)
|
||||
if try_get(video_url, lambda x: x['result']['status']) == 'ok':
|
||||
video_url = try_get(video_url, lambda x: x['result']['resp'])
|
||||
if (
|
||||
traverse_obj(response, ('result', 'status')) != 'ok'
|
||||
or not traverse_obj(response, ('result', 'resp', {url_or_none}))
|
||||
):
|
||||
continue
|
||||
video_url = response['result']['resp']
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'mpd':
|
||||
info_dict['formats'].extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id='dash', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
info_dict['formats'].extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
info_dict['formats'].append({
|
||||
'url': video_url,
|
||||
'format_id': quality,
|
||||
@ -353,7 +367,7 @@ def extract_format(page, version):
|
||||
|
||||
class CDAFolderIE(InfoExtractor):
|
||||
_MAX_PAGE_SIZE = 36
|
||||
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>\w+)/folder/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>[\w-]+)/folder/(?P<id>\d+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.cda.pl/domino264/folder/31188385',
|
||||
@ -378,6 +392,9 @@ class CDAFolderIE(InfoExtractor):
|
||||
'title': 'TESTY KOSMETYKÓW',
|
||||
},
|
||||
'playlist_mincount': 139,
|
||||
}, {
|
||||
'url': 'https://www.cda.pl/FILMY-SERIALE-ANIME-KRESKOWKI-BAJKI/folder/18493422',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -9,6 +9,7 @@
|
||||
ExtractorError,
|
||||
classproperty,
|
||||
float_or_none,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
@ -91,11 +92,15 @@ def _usp_signing_secret(self):
|
||||
# Rotates every so often, but hardcode a fallback in case of JS change/breakage before rotation
|
||||
return self._search_regex(
|
||||
r'\bUSP_SIGNING_SECRET\s*=\s*(["\'])(?P<secret>(?:(?!\1).)+)', player_js,
|
||||
'usp signing secret', group='secret', fatal=False) or 'odnInCGqhvtyRTtIiddxtuRtawYYICZP'
|
||||
'usp signing secret', group='secret', fatal=False) or 'hGDtqMKYVeFdofrAfFmBcrsakaZELajI'
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
|
||||
query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'}
|
||||
query = {
|
||||
'contentId': f'{user_id}-vod-{video_id}',
|
||||
'provider': 'universe',
|
||||
**traverse_obj(url, ({parse_qs}, 'uss_token', {'signedKey': -1})),
|
||||
}
|
||||
info = self._download_json(self._API_INFO_URL, video_id, query=query, fatal=False)
|
||||
access = self._download_json(
|
||||
'https://playback.dacast.com/content/access', video_id,
|
||||
|
@ -1,9 +1,15 @@
|
||||
from .zdf import ZDFBaseIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class DreiSatIE(ZDFBaseIE):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/?#]+/)*(?P<id>[^/?#&]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.3sat.de/dokumentation/reise/traumziele-suedostasiens-die-philippinen-und-vietnam-102.html',
|
||||
'info_dict': {
|
||||
@ -12,40 +18,59 @@ class DreiSatIE(ZDFBaseIE):
|
||||
'title': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam',
|
||||
'description': 'md5:26329ce5197775b596773b939354079d',
|
||||
'duration': 2625.0,
|
||||
'thumbnail': 'https://www.3sat.de/assets/traumziele-suedostasiens-die-philippinen-und-vietnam-100~2400x1350?cb=1699870351148',
|
||||
'thumbnail': 'https://www.3sat.de/assets/traumziele-suedostasiens-die-philippinen-und-vietnam-100~original?cb=1699870351148',
|
||||
'episode': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam',
|
||||
'episode_id': 'POS_cc7ff51c-98cf-4d12-b99d-f7a551de1c95',
|
||||
'timestamp': 1738593000,
|
||||
'upload_date': '20250203',
|
||||
'timestamp': 1747920900,
|
||||
'upload_date': '20250522',
|
||||
},
|
||||
}, {
|
||||
# Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html
|
||||
'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html',
|
||||
'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
|
||||
'url': 'https://www.3sat.de/film/ab-18/ab-18---mein-fremdes-ich-100.html',
|
||||
'md5': 'f92638413a11d759bdae95c9d8ec165c',
|
||||
'info_dict': {
|
||||
'id': '141007_ab18_10wochensommer_film',
|
||||
'id': '221128_mein_fremdes_ich2_ab18',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ab 18! - 10 Wochen Sommer',
|
||||
'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
|
||||
'duration': 2660,
|
||||
'timestamp': 1608604200,
|
||||
'upload_date': '20201222',
|
||||
'title': 'Ab 18! - Mein fremdes Ich',
|
||||
'description': 'md5:cae0c0b27b7426d62ca0dda181738bf0',
|
||||
'duration': 2625.0,
|
||||
'thumbnail': 'https://www.3sat.de/assets/ab-18---mein-fremdes-ich-106~original?cb=1666081865812',
|
||||
'episode': 'Ab 18! - Mein fremdes Ich',
|
||||
'episode_id': 'POS_6225d1ca-a0d5-45e3-870b-e783ee6c8a3f',
|
||||
'timestamp': 1695081600,
|
||||
'upload_date': '20230919',
|
||||
},
|
||||
'skip': '410 Gone',
|
||||
}, {
|
||||
'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html',
|
||||
'url': 'https://www.3sat.de/gesellschaft/37-grad-leben/aus-dem-leben-gerissen-102.html',
|
||||
'md5': 'a903eaf8d1fd635bd3317cd2ad87ec84',
|
||||
'info_dict': {
|
||||
'id': '140913_sendung_schweizweit',
|
||||
'id': '250323_0903_sendung_sgl',
|
||||
'ext': 'mp4',
|
||||
'title': 'Waidmannsheil',
|
||||
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
||||
'timestamp': 1410623100,
|
||||
'upload_date': '20140913',
|
||||
'title': 'Plötzlich ohne dich',
|
||||
'description': 'md5:380cc10659289dd91510ad8fa717c66b',
|
||||
'duration': 1620.0,
|
||||
'thumbnail': 'https://www.3sat.de/assets/37-grad-leben-106~original?cb=1645537156810',
|
||||
'episode': 'Plötzlich ohne dich',
|
||||
'episode_id': 'POS_faa7a93c-c0f2-4d51-823f-ce2ac3ee191b',
|
||||
'timestamp': 1743162540,
|
||||
'upload_date': '20250328',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}, {
|
||||
# Video with chapters
|
||||
'url': 'https://www.3sat.de/kultur/buchmesse/dein-buch-das-beste-von-der-leipziger-buchmesse-2025-teil-1-100.html',
|
||||
'md5': '6b95790ce52e75f0d050adcdd2711ee6',
|
||||
'info_dict': {
|
||||
'id': '250330_dein_buch1_bum',
|
||||
'ext': 'mp4',
|
||||
'title': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1',
|
||||
'description': 'md5:bae51bfc22f15563ce3acbf97d2e8844',
|
||||
'duration': 5399.0,
|
||||
'thumbnail': 'https://www.3sat.de/assets/buchmesse-kerkeling-100~original?cb=1743329640903',
|
||||
'chapters': 'count:24',
|
||||
'episode': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1',
|
||||
'episode_id': 'POS_1ef236cc-b390-401e-acd0-4fb4b04315fb',
|
||||
'timestamp': 1743327000,
|
||||
'upload_date': '20250330',
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html
|
||||
'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html',
|
||||
@ -58,11 +83,42 @@ class DreiSatIE(ZDFBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player = self._search_json(
|
||||
r'data-zdfplayer-jsb=(["\'])', webpage, 'player JSON', video_id)
|
||||
player_url = player['content']
|
||||
api_token = f'Bearer {player["apiToken"]}'
|
||||
|
||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||
if webpage:
|
||||
player = self._extract_player(webpage, url, fatal=False)
|
||||
if player:
|
||||
return self._extract_regular(url, player, video_id)
|
||||
content = self._call_api(player_url, video_id, 'video metadata', api_token)
|
||||
|
||||
return self._extract_mobile(video_id)
|
||||
video_target = content['mainVideoContent']['http://zdf.de/rels/target']
|
||||
ptmd_path = traverse_obj(video_target, (
|
||||
(('streams', 'default'), None),
|
||||
('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template'),
|
||||
{str}, any, {require('ptmd path')}))
|
||||
ptmd_url = self._expand_ptmd_template(player_url, ptmd_path)
|
||||
aspect_ratio = self._parse_aspect_ratio(video_target.get('aspectRatio'))
|
||||
info = self._extract_ptmd(ptmd_url, video_id, api_token, aspect_ratio)
|
||||
|
||||
return merge_dicts(info, {
|
||||
**traverse_obj(content, {
|
||||
'title': (('title', 'teaserHeadline'), {str}, any),
|
||||
'episode': (('title', 'teaserHeadline'), {str}, any),
|
||||
'description': (('leadParagraph', 'teasertext'), {str}, any),
|
||||
'timestamp': ('editorialDate', {parse_iso8601}),
|
||||
}),
|
||||
**traverse_obj(video_target, {
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'chapters': ('streamAnchorTag', {self._extract_chapters}),
|
||||
}),
|
||||
'thumbnails': self._extract_thumbnails(traverse_obj(content, ('teaserImageRef', 'layouts', {dict}))),
|
||||
**traverse_obj(content, ('programmeItem', 0, 'http://zdf.de/rels/target', {
|
||||
'series_id': ('http://zdf.de/rels/cmdm/series', 'seriesUuid', {str}),
|
||||
'series': ('http://zdf.de/rels/cmdm/series', 'seriesTitle', {str}),
|
||||
'season': ('http://zdf.de/rels/cmdm/season', 'seasonTitle', {str}),
|
||||
'season_number': ('http://zdf.de/rels/cmdm/season', 'seasonNumber', {int_or_none}),
|
||||
'season_id': ('http://zdf.de/rels/cmdm/season', 'seasonUuid', {str}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'episode_id': ('contentId', {str}),
|
||||
})),
|
||||
})
|
||||
|
@ -5,7 +5,6 @@
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from .common import InfoExtractor
|
||||
from .once import OnceIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
dict_get,
|
||||
@ -16,7 +15,7 @@
|
||||
)
|
||||
|
||||
|
||||
class ESPNIE(OnceIE):
|
||||
class ESPNIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
@ -131,9 +130,7 @@ def extract_source(source_url, source_id=None):
|
||||
return
|
||||
format_urls.add(source_url)
|
||||
ext = determine_ext(source_url)
|
||||
if OnceIE.suitable(source_url):
|
||||
formats.extend(self._extract_once_formats(source_url))
|
||||
elif ext == 'smil':
|
||||
if ext == 'smil':
|
||||
formats.extend(self._extract_smil_formats(
|
||||
source_url, video_id, fatal=False))
|
||||
elif ext == 'f4m':
|
||||
|
@ -2,11 +2,15 @@
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
qualities,
|
||||
join_nonempty,
|
||||
mimetype2ext,
|
||||
parse_qs,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class FirstTVIE(InfoExtractor):
|
||||
@ -15,40 +19,51 @@ class FirstTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:sport)?1tv\.ru/(?:[^/?#]+/)+(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
# single format
|
||||
'url': 'http://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015',
|
||||
'md5': 'a1b6b60d530ebcf8daacf4565762bbaf',
|
||||
# single format; has item.id
|
||||
'url': 'https://www.1tv.ru/shows/naedine-so-vsemi/vypuski/gost-lyudmila-senchina-naedine-so-vsemi-vypusk-ot-12-02-2015',
|
||||
'md5': '8011ae8e88ff4150107ab9c5a8f5b659',
|
||||
'info_dict': {
|
||||
'id': '40049',
|
||||
'ext': 'mp4',
|
||||
'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'upload_date': '20150212',
|
||||
'duration': 2694,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# multiple formats
|
||||
'url': 'http://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016',
|
||||
# multiple formats; has item.id
|
||||
'url': 'https://www.1tv.ru/shows/dobroe-utro/pro-zdorove/vesennyaya-allergiya-dobroe-utro-fragment-vypuska-ot-07042016',
|
||||
'info_dict': {
|
||||
'id': '364746',
|
||||
'ext': 'mp4',
|
||||
'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|JPG)$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'upload_date': '20160407',
|
||||
'duration': 179,
|
||||
'formats': 'mincount:3',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://www.1tv.ru/news/issue/2016-12-01/14:00',
|
||||
'url': 'https://www.1tv.ru/news/issue/2016-12-01/14:00',
|
||||
'info_dict': {
|
||||
'id': '14:00',
|
||||
'title': 'Выпуск новостей в 14:00 1 декабря 2016 года. Новости. Первый канал',
|
||||
'description': 'md5:2e921b948f8c1ff93901da78ebdb1dfd',
|
||||
'title': 'Выпуск программы «Время» в 20:00 1 декабря 2016 года. Новости. Первый канал',
|
||||
'thumbnail': 'https://static.1tv.ru/uploads/photo/image/8/big/338448_big_8fc7eb236f.jpg',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
# has timestamp; has item.uid but not item.id
|
||||
'url': 'https://www.1tv.ru/shows/segodnya-vecherom/vypuski/avtory-odnogo-hita-segodnya-vecherom-vypusk-ot-03-05-2025',
|
||||
'info_dict': {
|
||||
'id': '270411',
|
||||
'ext': 'mp4',
|
||||
'title': 'Авторы одного хита. Сегодня вечером. Выпуск от 03.05.2025',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1746286020,
|
||||
'upload_date': '20250503',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://www.1tv.ru/shows/tochvtoch-supersezon/vystupleniya/evgeniy-dyatlov-vladimir-vysockiy-koni-priveredlivye-toch-v-toch-supersezon-fragment-vypuska-ot-06-11-2016',
|
||||
'only_matching': True,
|
||||
@ -57,96 +72,60 @@ class FirstTVIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _entries(self, items):
|
||||
for item in items:
|
||||
video_id = str(item.get('id') or item['uid'])
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for f in traverse_obj(item, ('sources', lambda _, v: url_or_none(v['src']))):
|
||||
src = f['src']
|
||||
ext = mimetype2ext(f.get('type'), default=determine_ext(src))
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
src, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
src, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
tbr = self._search_regex(fr'_(\d{{3,}})\.{ext}', src, 'tbr', default=None)
|
||||
formats.append({
|
||||
'url': src,
|
||||
'ext': ext,
|
||||
'format_id': join_nonempty('http', ext, tbr),
|
||||
'tbr': int_or_none(tbr),
|
||||
# quality metadata of http formats may be incorrect
|
||||
'quality': -10,
|
||||
})
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
yield {
|
||||
**traverse_obj(item, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('poster', {url_or_none}),
|
||||
'timestamp': ('dvr_begin_at', {int_or_none}),
|
||||
'upload_date': ('date_air', {unified_strdate}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
}),
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
playlist_url = urllib.parse.urljoin(url, self._search_regex(
|
||||
playlist_url = urllib.parse.urljoin(url, self._html_search_regex(
|
||||
r'data-playlist-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'playlist url', group='url'))
|
||||
|
||||
parsed_url = urllib.parse.urlparse(playlist_url)
|
||||
qs = urllib.parse.parse_qs(parsed_url.query)
|
||||
item_ids = qs.get('videos_ids[]') or qs.get('news_ids[]')
|
||||
item_ids = traverse_obj(parse_qs(playlist_url), 'video_id', 'videos_ids[]', 'news_ids[]')
|
||||
items = traverse_obj(
|
||||
self._download_json(playlist_url, display_id),
|
||||
lambda _, v: v['uid'] and (str(v['uid']) in item_ids if item_ids else True))
|
||||
|
||||
items = self._download_json(playlist_url, display_id)
|
||||
|
||||
if item_ids:
|
||||
items = [
|
||||
item for item in items
|
||||
if item.get('uid') and str(item['uid']) in item_ids]
|
||||
else:
|
||||
items = [items[0]]
|
||||
|
||||
entries = []
|
||||
QUALITIES = ('ld', 'sd', 'hd')
|
||||
|
||||
for item in items:
|
||||
title = item['title']
|
||||
quality = qualities(QUALITIES)
|
||||
formats = []
|
||||
path = None
|
||||
for f in item.get('mbr', []):
|
||||
src = url_or_none(f.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
tbr = int_or_none(self._search_regex(
|
||||
r'_(\d{3,})\.mp4', src, 'tbr', default=None))
|
||||
if not path:
|
||||
path = self._search_regex(
|
||||
r'//[^/]+/(.+?)_\d+\.mp4', src,
|
||||
'm3u8 path', default=None)
|
||||
formats.append({
|
||||
'url': src,
|
||||
'format_id': f.get('name'),
|
||||
'tbr': tbr,
|
||||
'source_preference': quality(f.get('name')),
|
||||
# quality metadata of http formats may be incorrect
|
||||
'preference': -10,
|
||||
})
|
||||
# m3u8 URL format is reverse engineered from [1] (search for
|
||||
# master.m3u8). dashEdges (that is currently balancer-vod.1tv.ru)
|
||||
# is taken from [2].
|
||||
# 1. http://static.1tv.ru/player/eump1tv-current/eump-1tv.all.min.js?rnd=9097422834:formatted
|
||||
# 2. http://static.1tv.ru/player/eump1tv-config/config-main.js?rnd=9097422834
|
||||
if not path and len(formats) == 1:
|
||||
path = self._search_regex(
|
||||
r'//[^/]+/(.+?$)', formats[0]['url'],
|
||||
'm3u8 path', default=None)
|
||||
if path:
|
||||
if len(formats) == 1:
|
||||
m3u8_path = ','
|
||||
else:
|
||||
tbrs = [str(t) for t in sorted(f['tbr'] for f in formats)]
|
||||
m3u8_path = '_,{},{}'.format(','.join(tbrs), '.mp4')
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
f'http://balancer-vod.1tv.ru/{path}{m3u8_path}.urlset/master.m3u8',
|
||||
display_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
thumbnail = item.get('poster') or self._og_search_thumbnail(webpage)
|
||||
duration = int_or_none(item.get('duration') or self._html_search_meta(
|
||||
'video:duration', webpage, 'video duration', fatal=False))
|
||||
upload_date = unified_strdate(self._html_search_meta(
|
||||
'ya:ovs:upload_date', webpage, 'upload date', default=None))
|
||||
|
||||
entries.append({
|
||||
'id': str(item.get('id') or item['uid']),
|
||||
'thumbnail': thumbnail,
|
||||
'title': title,
|
||||
'upload_date': upload_date,
|
||||
'duration': int_or_none(duration),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
title = self._html_search_regex(
|
||||
(r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
|
||||
r"'title'\s*:\s*'([^']+)'"),
|
||||
webpage, 'title', default=None) or self._og_search_title(
|
||||
webpage, default=None)
|
||||
description = self._html_search_regex(
|
||||
r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>',
|
||||
webpage, 'description', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'description', default=None)
|
||||
|
||||
return self.playlist_result(entries, display_id, title, description)
|
||||
return self.playlist_result(
|
||||
self._entries(items), display_id, self._og_search_title(webpage, default=None),
|
||||
thumbnail=self._og_search_thumbnail(webpage, default=None))
|
||||
|
@ -1,9 +1,9 @@
|
||||
import urllib.parse
|
||||
|
||||
from .once import OnceIE
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class GameSpotIE(OnceIE):
|
||||
class GameSpotIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article|review)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
|
||||
|
@ -16,7 +16,6 @@
|
||||
MEDIA_EXTENSIONS,
|
||||
ExtractorError,
|
||||
UnsupportedError,
|
||||
base_url,
|
||||
determine_ext,
|
||||
determine_protocol,
|
||||
dict_get,
|
||||
@ -38,6 +37,7 @@
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
@ -2538,12 +2538,13 @@ def _real_extract(self, url):
|
||||
return self.playlist_result(
|
||||
self._parse_xspf(
|
||||
doc, video_id, xspf_url=url,
|
||||
xspf_base_url=full_response.url),
|
||||
xspf_base_url=new_url),
|
||||
video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
|
||||
doc,
|
||||
mpd_base_url=base_url(full_response.url),
|
||||
# Do not use yt_dlp.utils.base_url here since it will raise on file:// URLs
|
||||
mpd_base_url=update_url(new_url, query=None, fragment=None).rpartition('/')[0],
|
||||
mpd_url=url)
|
||||
info_dict['live_status'] = 'is_live' if doc.get('type') == 'dynamic' else None
|
||||
self._extra_manifest_info(info_dict, url)
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
|
||||
class GetCourseRuPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player02\.getcourse\.ru/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
|
||||
_VALID_URL = r'https?://(?:player02\.getcourse\.ru|cf-api-2\.vhcdn\.com)/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']
|
||||
_TESTS = [{
|
||||
'url': 'http://player02.getcourse.ru/sign-player/?json=eyJ2aWRlb19oYXNoIjoiMTkwYmRmOTNmMWIyOTczNTMwOTg1M2E3YTE5ZTI0YjMiLCJ1c2VyX2lkIjozNTk1MjUxODMsInN1Yl9sb2dpbl91c2VyX2lkIjpudWxsLCJsZXNzb25faWQiOm51bGwsImlwIjoiNDYuMTQyLjE4Mi4yNDciLCJnY19ob3N0IjoiYWNhZGVteW1lbC5vbmxpbmUiLCJ0aW1lIjoxNzA1NDQ5NjQyLCJwYXlsb2FkIjoidV8zNTk1MjUxODMiLCJ1aV9sYW5ndWFnZSI6InJ1IiwiaXNfaGF2ZV9jdXN0b21fc3R5bGUiOnRydWV9&s=354ad2c993d95d5ac629e3133d6cefea&vh-static-feature=zigzag',
|
||||
@ -20,6 +20,16 @@ class GetCourseRuPlayerIE(InfoExtractor):
|
||||
'duration': 1693,
|
||||
},
|
||||
'skip': 'JWT expired',
|
||||
}, {
|
||||
'url': 'https://cf-api-2.vhcdn.com/sign-player/?json=example',
|
||||
'info_dict': {
|
||||
'id': '435735291',
|
||||
'title': '8afd7c489952108e00f019590f3711f3',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://preview-htz.vhcdn.com/preview/8afd7c489952108e00f019590f3711f3/preview.jpg?version=1682170973&host=vh-72',
|
||||
'duration': 777,
|
||||
},
|
||||
'skip': 'JWT expired',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -168,7 +178,7 @@ def _real_extract(self, url):
|
||||
|
||||
playlist_id = self._search_regex(
|
||||
r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id)
|
||||
title = self._og_search_title(webpage) or self._html_extract_title(webpage)
|
||||
title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
|
||||
|
||||
return self.playlist_from_matches(
|
||||
re.findall(GetCourseRuPlayerIE._EMBED_REGEX[0], webpage),
|
||||
|
@ -1,3 +1,4 @@
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
@ -6,9 +7,7 @@
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
js_to_json,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@ -16,7 +15,6 @@ class IPrimaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?!cnn)(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_GEO_BYPASS = False
|
||||
_NETRC_MACHINE = 'iprima'
|
||||
_AUTH_ROOT = 'https://auth.iprima.cz'
|
||||
access_token = None
|
||||
|
||||
_TESTS = [{
|
||||
@ -86,48 +84,18 @@ def _perform_login(self, username, password):
|
||||
if self.access_token:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
f'{self._AUTH_ROOT}/oauth2/login', None, note='Downloading login page',
|
||||
errnote='Downloading login page failed')
|
||||
|
||||
login_form = self._hidden_inputs(login_page)
|
||||
|
||||
login_form.update({
|
||||
'_email': username,
|
||||
'_password': password})
|
||||
|
||||
profile_select_html, login_handle = self._download_webpage_handle(
|
||||
f'{self._AUTH_ROOT}/oauth2/login', None, data=urlencode_postdata(login_form),
|
||||
note='Logging in')
|
||||
|
||||
# a profile may need to be selected first, even when there is only a single one
|
||||
if '/profile-select' in login_handle.url:
|
||||
profile_id = self._search_regex(
|
||||
r'data-identifier\s*=\s*["\']?(\w+)', profile_select_html, 'profile id')
|
||||
|
||||
login_handle = self._request_webpage(
|
||||
f'{self._AUTH_ROOT}/user/profile-select-perform/{profile_id}', None,
|
||||
query={'continueUrl': '/user/login?redirect_uri=/user/'}, note='Selecting profile')
|
||||
|
||||
code = traverse_obj(login_handle.url, ({parse_qs}, 'code', 0))
|
||||
if not code:
|
||||
raise ExtractorError('Login failed', expected=True)
|
||||
|
||||
token_request_data = {
|
||||
'scope': 'openid+email+profile+phone+address+offline_access',
|
||||
'client_id': 'prima_sso',
|
||||
'grant_type': 'authorization_code',
|
||||
'code': code,
|
||||
'redirect_uri': f'{self._AUTH_ROOT}/sso/auth-check'}
|
||||
|
||||
token_data = self._download_json(
|
||||
f'{self._AUTH_ROOT}/oauth2/token', None,
|
||||
note='Downloading token', errnote='Downloading token failed',
|
||||
data=urlencode_postdata(token_request_data))
|
||||
'https://ucet.iprima.cz/api/session/create', None,
|
||||
note='Logging in', errnote='Failed to log in',
|
||||
data=json.dumps({
|
||||
'email': username,
|
||||
'password': password,
|
||||
'deviceName': 'Windows Chrome',
|
||||
}).encode(), headers={'content-type': 'application/json'})
|
||||
|
||||
self.access_token = token_data.get('access_token')
|
||||
if self.access_token is None:
|
||||
raise ExtractorError('Getting token failed', expected=True)
|
||||
self.access_token = token_data['accessToken']['value']
|
||||
if not self.access_token:
|
||||
raise ExtractorError('Failed to fetch access token')
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self.access_token:
|
||||
|
@ -1,23 +1,33 @@
|
||||
import functools
|
||||
import itertools
|
||||
import math
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
ISO639Utils,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
make_archive_id,
|
||||
orderedSet,
|
||||
smuggle_url,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class JioSaavnBaseIE(InfoExtractor):
|
||||
_URL_BASE_RE = r'https?://(?:www\.)?(?:jio)?saavn\.com'
|
||||
_API_URL = 'https://www.jiosaavn.com/api.php'
|
||||
_VALID_BITRATES = {'16', '32', '64', '128', '320'}
|
||||
|
||||
@ -30,16 +40,20 @@ def requested_bitrates(self):
|
||||
f'Valid bitrates are: {", ".join(sorted(self._VALID_BITRATES, key=int))}')
|
||||
return requested_bitrates
|
||||
|
||||
def _extract_formats(self, song_data):
|
||||
def _extract_formats(self, item_data):
|
||||
# Show/episode JSON data has a slightly different structure than song JSON data
|
||||
if media_url := traverse_obj(item_data, ('more_info', 'encrypted_media_url', {str})):
|
||||
item_data.setdefault('encrypted_media_url', media_url)
|
||||
|
||||
for bitrate in self.requested_bitrates:
|
||||
media_data = self._download_json(
|
||||
self._API_URL, song_data['id'],
|
||||
self._API_URL, item_data['id'],
|
||||
f'Downloading format info for {bitrate}',
|
||||
fatal=False, data=urlencode_postdata({
|
||||
'__call': 'song.generateAuthToken',
|
||||
'_format': 'json',
|
||||
'bitrate': bitrate,
|
||||
'url': song_data['encrypted_media_url'],
|
||||
'url': item_data['encrypted_media_url'],
|
||||
}))
|
||||
if not traverse_obj(media_data, ('auth_url', {url_or_none})):
|
||||
self.report_warning(f'Unable to extract format info for {bitrate}')
|
||||
@ -53,24 +67,6 @@ def _extract_formats(self, song_data):
|
||||
'vcodec': 'none',
|
||||
}
|
||||
|
||||
def _extract_song(self, song_data, url=None):
|
||||
info = traverse_obj(song_data, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('song', {clean_html}),
|
||||
'album': ('album', {clean_html}),
|
||||
'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'view_count': ('play_count', {int_or_none}),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
'artists': ('primary_artists', {lambda x: x.split(', ') if x else None}),
|
||||
'webpage_url': ('perma_url', {url_or_none}),
|
||||
})
|
||||
if webpage_url := info.get('webpage_url') or url:
|
||||
info['display_id'] = url_basename(webpage_url)
|
||||
info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]
|
||||
|
||||
return info
|
||||
|
||||
def _call_api(self, type_, token, note='API', params={}):
|
||||
return self._download_json(
|
||||
self._API_URL, token, f'Downloading {note} JSON', f'Unable to download {note} JSON',
|
||||
@ -84,19 +80,89 @@ def _call_api(self, type_, token, note='API', params={}):
|
||||
**params,
|
||||
})
|
||||
|
||||
def _yield_songs(self, playlist_data):
|
||||
for song_data in traverse_obj(playlist_data, ('songs', lambda _, v: v['id'] and v['perma_url'])):
|
||||
song_info = self._extract_song(song_data)
|
||||
url = smuggle_url(song_info['webpage_url'], {
|
||||
'id': song_data['id'],
|
||||
'encrypted_media_url': song_data['encrypted_media_url'],
|
||||
})
|
||||
yield self.url_result(url, JioSaavnSongIE, url_transparent=True, **song_info)
|
||||
@staticmethod
|
||||
def _extract_song(song_data, url=None):
|
||||
info = traverse_obj(song_data, {
|
||||
'id': ('id', {str}),
|
||||
'title': (('song', 'title'), {clean_html}, any),
|
||||
'album': ((None, 'more_info'), 'album', {clean_html}, any),
|
||||
'duration': ((None, 'more_info'), 'duration', {int_or_none}, any),
|
||||
'channel': ((None, 'more_info'), 'label', {str}, any),
|
||||
'channel_id': ((None, 'more_info'), 'label_id', {str}, any),
|
||||
'channel_url': ((None, 'more_info'), 'label_url', {urljoin('https://www.jiosaavn.com/')}, any),
|
||||
'release_date': ((None, 'more_info'), 'release_date', {unified_strdate}, any),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
|
||||
'view_count': ('play_count', {int_or_none}),
|
||||
'language': ('language', {lambda x: ISO639Utils.short2long(x.casefold()) or 'und'}),
|
||||
'webpage_url': ('perma_url', {url_or_none}),
|
||||
'artists': ('more_info', 'artistMap', 'primary_artists', ..., 'name', {str}, filter, all),
|
||||
})
|
||||
if webpage_url := info.get('webpage_url') or url:
|
||||
info['display_id'] = url_basename(webpage_url)
|
||||
info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]
|
||||
|
||||
if primary_artists := traverse_obj(song_data, ('primary_artists', {lambda x: x.split(', ') if x else None})):
|
||||
info['artists'].extend(primary_artists)
|
||||
if featured_artists := traverse_obj(song_data, ('featured_artists', {str}, filter)):
|
||||
info['artists'].extend(featured_artists.split(', '))
|
||||
info['artists'] = orderedSet(info['artists']) or None
|
||||
|
||||
return info
|
||||
|
||||
@staticmethod
|
||||
def _extract_episode(episode_data, url=None):
|
||||
info = JioSaavnBaseIE._extract_song(episode_data, url)
|
||||
info.pop('_old_archive_ids', None)
|
||||
info.update(traverse_obj(episode_data, {
|
||||
'description': ('more_info', 'description', {str}),
|
||||
'timestamp': ('more_info', 'release_time', {unified_timestamp}),
|
||||
'series': ('more_info', 'show_title', {str}),
|
||||
'series_id': ('more_info', 'show_id', {str}),
|
||||
'season': ('more_info', 'season_title', {str}),
|
||||
'season_number': ('more_info', 'season_no', {int_or_none}),
|
||||
'season_id': ('more_info', 'season_id', {str}),
|
||||
'episode_number': ('more_info', 'episode_number', {int_or_none}),
|
||||
'cast': ('starring', {lambda x: x.split(', ') if x else None}),
|
||||
}))
|
||||
return info
|
||||
|
||||
def _extract_jiosaavn_result(self, url, endpoint, response_key, parse_func):
|
||||
url, smuggled_data = unsmuggle_url(url)
|
||||
data = traverse_obj(smuggled_data, ({
|
||||
'id': ('id', {str}),
|
||||
'encrypted_media_url': ('encrypted_media_url', {str}),
|
||||
}))
|
||||
|
||||
if 'id' in data and 'encrypted_media_url' in data:
|
||||
result = {'id': data['id']}
|
||||
else:
|
||||
# only extract metadata if this is not a url_transparent result
|
||||
data = self._call_api(endpoint, self._match_id(url))[response_key][0]
|
||||
result = parse_func(data, url)
|
||||
|
||||
result['formats'] = list(self._extract_formats(data))
|
||||
return result
|
||||
|
||||
def _yield_items(self, playlist_data, keys=None, parse_func=None):
|
||||
"""Subclasses using this method must set _ENTRY_IE"""
|
||||
if parse_func is None:
|
||||
parse_func = self._extract_song
|
||||
|
||||
for item_data in traverse_obj(playlist_data, (
|
||||
*variadic(keys, (str, bytes, dict, set)), lambda _, v: v['id'] and v['perma_url'],
|
||||
)):
|
||||
info = parse_func(item_data)
|
||||
url = smuggle_url(info['webpage_url'], traverse_obj(item_data, {
|
||||
'id': ('id', {str}),
|
||||
'encrypted_media_url': ((None, 'more_info'), 'encrypted_media_url', {str}, any),
|
||||
}))
|
||||
yield self.url_result(url, self._ENTRY_IE, url_transparent=True, **info)
|
||||
|
||||
|
||||
class JioSaavnSongIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:song'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'(?:/song/[^/?#]+/|/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
|
||||
'md5': '3b84396d15ed9e083c3106f1fa589c04',
|
||||
@ -106,12 +172,38 @@ class JioSaavnSongIE(JioSaavnBaseIE):
|
||||
'ext': 'm4a',
|
||||
'title': 'Leja Re',
|
||||
'album': 'Leja Re',
|
||||
'thumbnail': r're:https?://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'duration': 205,
|
||||
'view_count': int,
|
||||
'release_year': 2018,
|
||||
'artists': ['Sandesh Shandilya', 'Dhvani Bhanushali', 'Tanishk Bagchi'],
|
||||
'_old_archive_ids': ['jiosaavnsong OQsEfQFVUXk'],
|
||||
'channel': 'T-Series',
|
||||
'language': 'hin',
|
||||
'channel_id': '34297',
|
||||
'channel_url': 'https://www.jiosaavn.com/label/t-series-albums/6DLuXO3VoTo_',
|
||||
'release_date': '20181124',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.jiosaavn.com/song/chuttamalle/P1FfWjZkQ0Q',
|
||||
'md5': '96296c58d6ce488a417ef0728fd2d680',
|
||||
'info_dict': {
|
||||
'id': 'O94kBTtw',
|
||||
'display_id': 'P1FfWjZkQ0Q',
|
||||
'ext': 'm4a',
|
||||
'title': 'Chuttamalle',
|
||||
'album': 'Devara Part 1 - Telugu',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'duration': 222,
|
||||
'view_count': int,
|
||||
'release_year': 2024,
|
||||
'artists': 'count:3',
|
||||
'_old_archive_ids': ['jiosaavnsong P1FfWjZkQ0Q'],
|
||||
'channel': 'T-Series',
|
||||
'language': 'tel',
|
||||
'channel_id': '34297',
|
||||
'channel_url': 'https://www.jiosaavn.com/label/t-series-albums/6DLuXO3VoTo_',
|
||||
'release_date': '20240926',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
|
||||
@ -119,26 +211,51 @@ class JioSaavnSongIE(JioSaavnBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url)
|
||||
song_data = traverse_obj(smuggled_data, ({
|
||||
'id': ('id', {str}),
|
||||
'encrypted_media_url': ('encrypted_media_url', {str}),
|
||||
}))
|
||||
return self._extract_jiosaavn_result(url, 'song', 'songs', self._extract_song)
|
||||
|
||||
if 'id' in song_data and 'encrypted_media_url' in song_data:
|
||||
result = {'id': song_data['id']}
|
||||
else:
|
||||
# only extract metadata if this is not a url_transparent result
|
||||
song_data = self._call_api('song', self._match_id(url))['songs'][0]
|
||||
result = self._extract_song(song_data, url)
|
||||
|
||||
result['formats'] = list(self._extract_formats(song_data))
|
||||
return result
|
||||
class JioSaavnShowIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:show'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/shows/[^/?#]+/(?P<id>[^/?#]{11,})/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/shows/non-food-ways-to-boost-your-energy/XFMcKICOCgc_',
|
||||
'md5': '0733cd254cfe74ef88bea1eaedcf1f4f',
|
||||
'info_dict': {
|
||||
'id': 'qqzh3RKZ',
|
||||
'display_id': 'XFMcKICOCgc_',
|
||||
'ext': 'mp3',
|
||||
'title': 'Non-Food Ways To Boost Your Energy',
|
||||
'description': 'md5:26e7129644b5c6aada32b8851c3997c8',
|
||||
'episode': 'Episode 1',
|
||||
'timestamp': 1640563200,
|
||||
'series': 'Holistic Lifestyle With Neha Ranglani',
|
||||
'series_id': '52397',
|
||||
'season': 'Holistic Lifestyle With Neha Ranglani',
|
||||
'season_number': 1,
|
||||
'season_id': '61273',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'duration': 311,
|
||||
'view_count': int,
|
||||
'release_year': 2021,
|
||||
'language': 'eng',
|
||||
'channel': 'Saavn OG',
|
||||
'channel_id': '1953876',
|
||||
'episode_number': 1,
|
||||
'upload_date': '20211227',
|
||||
'release_date': '20211227',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.jiosaavn.com/shows/himesh-reshammiya/Kr8fmfSN4vo_',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_jiosaavn_result(url, 'episode', 'episodes', self._extract_episode)
|
||||
|
||||
|
||||
class JioSaavnAlbumIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:album'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/album/[^/?#]+/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/album/[^/?#]+/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/album/96/buIOjYZDrNA_',
|
||||
'info_dict': {
|
||||
@ -147,18 +264,19 @@ class JioSaavnAlbumIE(JioSaavnBaseIE):
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}]
|
||||
_ENTRY_IE = JioSaavnSongIE
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
album_data = self._call_api('album', display_id)
|
||||
|
||||
return self.playlist_result(
|
||||
self._yield_songs(album_data), display_id, traverse_obj(album_data, ('title', {str})))
|
||||
self._yield_items(album_data, 'songs'), display_id, traverse_obj(album_data, ('title', {str})))
|
||||
|
||||
|
||||
class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/(?:s/playlist/(?:[^/?#]+/){2}|featured/[^/?#]+/)(?P<id>[^/?#]+)'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/(?:s/playlist/(?:[^/?#]+/){2}|featured/[^/?#]+/)(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-english/LlJ8ZWT1ibN5084vKHRj2Q__',
|
||||
'info_dict': {
|
||||
@ -172,15 +290,16 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
||||
'id': 'DVR,pFUOwyXqIp77B1JF,A__',
|
||||
'title': 'Mood Hindi',
|
||||
},
|
||||
'playlist_mincount': 801,
|
||||
'playlist_mincount': 750,
|
||||
}, {
|
||||
'url': 'https://www.jiosaavn.com/featured/taaza-tunes/Me5RridRfDk_',
|
||||
'info_dict': {
|
||||
'id': 'Me5RridRfDk_',
|
||||
'title': 'Taaza Tunes',
|
||||
},
|
||||
'playlist_mincount': 301,
|
||||
'playlist_mincount': 50,
|
||||
}]
|
||||
_ENTRY_IE = JioSaavnSongIE
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _fetch_page(self, token, page):
|
||||
@ -189,7 +308,7 @@ def _fetch_page(self, token, page):
|
||||
|
||||
def _entries(self, token, first_page_data, page):
|
||||
page_data = first_page_data if not page else self._fetch_page(token, page + 1)
|
||||
yield from self._yield_songs(page_data)
|
||||
yield from self._yield_items(page_data, 'songs')
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@ -199,3 +318,95 @@ def _real_extract(self, url):
|
||||
return self.playlist_result(InAdvancePagedList(
|
||||
functools.partial(self._entries, display_id, playlist_data),
|
||||
total_pages, self._PAGE_SIZE), display_id, traverse_obj(playlist_data, ('listname', {str})))
|
||||
|
||||
|
||||
class JioSaavnShowPlaylistIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:show:playlist'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/shows/(?P<show>[^#/?]+)/(?P<season>\d+)/[^/?#]+'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/shows/talking-music/1/PjReFP-Sguk_',
|
||||
'info_dict': {
|
||||
'id': 'talking-music-1',
|
||||
'title': 'Talking Music',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}]
|
||||
_ENTRY_IE = JioSaavnShowIE
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
def _fetch_page(self, show_id, season_id, page):
|
||||
return self._call_api('show', show_id, f'show page {page}', {
|
||||
'p': page,
|
||||
'__call': 'show.getAllEpisodes',
|
||||
'show_id': show_id,
|
||||
'season_number': season_id,
|
||||
'api_version': '4',
|
||||
'sort_order': 'desc',
|
||||
})
|
||||
|
||||
def _entries(self, show_id, season_id, page):
|
||||
page_data = self._fetch_page(show_id, season_id, page + 1)
|
||||
yield from self._yield_items(page_data, keys=None, parse_func=self._extract_episode)
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_slug, season_id = self._match_valid_url(url).group('show', 'season')
|
||||
playlist_id = f'{show_slug}-{season_id}'
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
show_info = self._search_json(
|
||||
r'window\.__INITIAL_DATA__\s*=', webpage, 'initial data',
|
||||
playlist_id, transform_source=js_to_json)['showView']
|
||||
show_id = show_info['current_id']
|
||||
|
||||
entries = OnDemandPagedList(functools.partial(self._entries, show_id, season_id), self._PAGE_SIZE)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, traverse_obj(show_info, ('show', 'title', 'text', {str})))
|
||||
|
||||
|
||||
class JioSaavnArtistIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:artist'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/artist/[^/?#]+/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/artist/krsna-songs/rYLBEve2z3U_',
|
||||
'info_dict': {
|
||||
'id': 'rYLBEve2z3U_',
|
||||
'title': 'KR$NA',
|
||||
},
|
||||
'playlist_mincount': 38,
|
||||
}, {
|
||||
'url': 'https://www.jiosaavn.com/artist/sanam-puri-songs/SkNEv3qRhDE_',
|
||||
'info_dict': {
|
||||
'id': 'SkNEv3qRhDE_',
|
||||
'title': 'Sanam Puri',
|
||||
},
|
||||
'playlist_mincount': 51,
|
||||
}]
|
||||
_ENTRY_IE = JioSaavnSongIE
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _fetch_page(self, artist_id, page):
|
||||
return self._call_api('artist', artist_id, f'artist page {page + 1}', {
|
||||
'p': page,
|
||||
'n_song': self._PAGE_SIZE,
|
||||
'n_album': self._PAGE_SIZE,
|
||||
'sub_type': '',
|
||||
'includeMetaTags': '',
|
||||
'api_version': '4',
|
||||
'category': 'alphabetical',
|
||||
'sort_order': 'asc',
|
||||
})
|
||||
|
||||
def _entries(self, artist_id, first_page):
|
||||
for page in itertools.count():
|
||||
playlist_data = first_page if not page else self._fetch_page(artist_id, page)
|
||||
if not traverse_obj(playlist_data, ('topSongs', ..., {dict})):
|
||||
break
|
||||
yield from self._yield_items(playlist_data, 'topSongs')
|
||||
|
||||
def _real_extract(self, url):
|
||||
artist_id = self._match_id(url)
|
||||
first_page = self._fetch_page(artist_id, 0)
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(artist_id, first_page), artist_id,
|
||||
traverse_obj(first_page, ('name', {str})))
|
||||
|
@ -1,4 +1,5 @@
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -9,12 +10,12 @@
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
srt_subtitles_timecode,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import find_elements, require, traverse_obj
|
||||
|
||||
|
||||
class LinkedInBaseIE(InfoExtractor):
|
||||
@ -82,7 +83,10 @@ def _get_video_id(self, video_data, course_slug, video_slug):
|
||||
|
||||
|
||||
class LinkedInIE(LinkedInBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)'
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)',
|
||||
r'https?://(?:www\.)?linkedin\.com/feed/update/urn:li:activity:(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20',
|
||||
'info_dict': {
|
||||
@ -106,6 +110,9 @@ class LinkedInIE(LinkedInBaseIE):
|
||||
'like_count': int,
|
||||
'subtitles': 'mincount:1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.linkedin.com/feed/update/urn:li:activity:7016901149999955968/?utm_source=share&utm_medium=member_desktop',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -271,3 +278,110 @@ def _real_extract(self, url):
|
||||
entries, course_slug,
|
||||
course_data.get('title'),
|
||||
course_data.get('description'))
|
||||
|
||||
|
||||
class LinkedInEventsIE(LinkedInBaseIE):
|
||||
IE_NAME = 'linkedin:events'
|
||||
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/events/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.linkedin.com/events/7084656651378536448/comments/',
|
||||
'info_dict': {
|
||||
'id': '7084656651378536448',
|
||||
'ext': 'mp4',
|
||||
'title': '#37 Aprende a hacer una entrevista en inglés para tu próximo trabajo remoto',
|
||||
'description': '¡Agarra para anotar que se viene tremendo evento!',
|
||||
'duration': 1765,
|
||||
'timestamp': 1689113772,
|
||||
'upload_date': '20230711',
|
||||
'release_timestamp': 1689174012,
|
||||
'release_date': '20230712',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.linkedin.com/events/27-02energyfreedombyenergyclub7295762520814874625/comments/',
|
||||
'info_dict': {
|
||||
'id': '27-02energyfreedombyenergyclub7295762520814874625',
|
||||
'ext': 'mp4',
|
||||
'title': '27.02 Energy Freedom by Energy Club',
|
||||
'description': 'md5:1292e6f31df998914c293787a02c3b91',
|
||||
'duration': 6420,
|
||||
'timestamp': 1739445333,
|
||||
'upload_date': '20250213',
|
||||
'release_timestamp': 1740657620,
|
||||
'release_date': '20250227',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._get_cookies('https://www.linkedin.com/').get('li_at'):
|
||||
self.raise_login_required()
|
||||
|
||||
def _real_extract(self, url):
|
||||
event_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, event_id)
|
||||
|
||||
base_data = traverse_obj(webpage, (
|
||||
{find_elements(tag='code', attr='style', value='display: none')}, ..., {json.loads}, 'included', ...))
|
||||
meta_data = traverse_obj(base_data, (
|
||||
lambda _, v: v['$type'] == 'com.linkedin.voyager.dash.events.ProfessionalEvent', any)) or {}
|
||||
|
||||
live_status = {
|
||||
'PAST': 'was_live',
|
||||
'ONGOING': 'is_live',
|
||||
'FUTURE': 'is_upcoming',
|
||||
}.get(meta_data.get('lifecycleState'))
|
||||
|
||||
if live_status == 'is_upcoming':
|
||||
player_data = {}
|
||||
if event_time := traverse_obj(meta_data, ('displayEventTime', {str})):
|
||||
message = f'This live event is scheduled for {event_time}'
|
||||
else:
|
||||
message = 'This live event has not yet started'
|
||||
self.raise_no_formats(message, expected=True, video_id=event_id)
|
||||
else:
|
||||
# TODO: Add support for audio-only live events
|
||||
player_data = traverse_obj(base_data, (
|
||||
lambda _, v: v['$type'] == 'com.linkedin.videocontent.VideoPlayMetadata',
|
||||
any, {require('video player data')}))
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for prog_fmts in traverse_obj(player_data, ('progressiveStreams', ..., {dict})):
|
||||
for fmt_url in traverse_obj(prog_fmts, ('streamingLocations', ..., 'url', {url_or_none})):
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
**traverse_obj(prog_fmts, {
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'tbr': ('bitRate', {int_or_none(scale=1000)}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
'ext': ('mediaType', {mimetype2ext}),
|
||||
}),
|
||||
})
|
||||
|
||||
for m3u8_url in traverse_obj(player_data, (
|
||||
'adaptiveStreams', lambda _, v: v['protocol'] == 'HLS', 'masterPlaylists', ..., 'url', {url_or_none},
|
||||
)):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, event_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': event_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'live_status': live_status,
|
||||
**traverse_obj(meta_data, {
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', 'text', {str}),
|
||||
'timestamp': ('createdAt', {int_or_none(scale=1000)}),
|
||||
# timeRange.start is available when the stream is_upcoming
|
||||
'release_timestamp': ('timeRange', 'start', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
**traverse_obj(player_data, {
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
# liveStreamCreatedAt is only available when the stream is_live or was_live
|
||||
'release_timestamp': ('liveStreamCreatedAt', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
|
@ -1,5 +1,9 @@
|
||||
import json
|
||||
import random
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, url_or_none
|
||||
from ..utils import int_or_none, jwt_decode_hs256, try_call, url_or_none
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
@ -55,13 +59,81 @@ class LocoIE(InfoExtractor):
|
||||
'upload_date': '20250226',
|
||||
'modified_date': '20250226',
|
||||
},
|
||||
}, {
|
||||
# Requires video authorization
|
||||
'url': 'https://loco.com/stream/ac854641-ae0f-497c-a8ea-4195f6d8cc53',
|
||||
'md5': '0513edf85c1e65c9521f555f665387d5',
|
||||
'info_dict': {
|
||||
'id': 'ac854641-ae0f-497c-a8ea-4195f6d8cc53',
|
||||
'ext': 'mp4',
|
||||
'title': 'DUAS CONTAS DESAFIANTE, RUSH TOP 1 NO BRASIL!',
|
||||
'description': 'md5:aa77818edd6fe00dd4b6be75cba5f826',
|
||||
'uploader_id': '7Y9JNAZC3Q',
|
||||
'channel': 'ayellol',
|
||||
'channel_follower_count': int,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 1229,
|
||||
'thumbnail': 'https://static.ivory.getloconow.com/default_thumb/f5aa678b-6d04-45d9-a89a-859af0a8028f.jpg',
|
||||
'tags': ['Gameplay', 'Carry'],
|
||||
'series': 'League of Legends',
|
||||
'timestamp': 1741182253,
|
||||
'upload_date': '20250305',
|
||||
'modified_timestamp': 1741182419,
|
||||
'modified_date': '20250305',
|
||||
},
|
||||
}]
|
||||
|
||||
# From _app.js
|
||||
_CLIENT_ID = 'TlwKp1zmF6eKFpcisn3FyR18WkhcPkZtzwPVEEC3'
|
||||
_CLIENT_SECRET = 'Kp7tYlUN7LXvtcSpwYvIitgYcLparbtsQSe5AdyyCdiEJBP53Vt9J8eB4AsLdChIpcO2BM19RA3HsGtqDJFjWmwoonvMSG3ZQmnS8x1YIM8yl82xMXZGbE3NKiqmgBVU'
|
||||
|
||||
def _is_jwt_expired(self, token):
|
||||
return jwt_decode_hs256(token)['exp'] - time.time() < 300
|
||||
|
||||
def _get_access_token(self, video_id):
|
||||
access_token = try_call(lambda: self._get_cookies('https://loco.com')['access_token'].value)
|
||||
if access_token and not self._is_jwt_expired(access_token):
|
||||
return access_token
|
||||
access_token = traverse_obj(self._download_json(
|
||||
'https://api.getloconow.com/v3/user/device_profile/', video_id,
|
||||
'Downloading access token', fatal=False, data=json.dumps({
|
||||
'platform': 7,
|
||||
'client_id': self._CLIENT_ID,
|
||||
'client_secret': self._CLIENT_SECRET,
|
||||
'model': 'Mozilla',
|
||||
'os_name': 'Win32',
|
||||
'os_ver': '5.0 (Windows)',
|
||||
'app_ver': '5.0 (Windows)',
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json;charset=utf-8',
|
||||
'DEVICE-ID': ''.join(random.choices('0123456789abcdef', k=32)) + 'live',
|
||||
'X-APP-LANG': 'en',
|
||||
'X-APP-LOCALE': 'en-US',
|
||||
'X-CLIENT-ID': self._CLIENT_ID,
|
||||
'X-CLIENT-SECRET': self._CLIENT_SECRET,
|
||||
'X-PLATFORM': '7',
|
||||
}), 'access_token')
|
||||
if access_token and not self._is_jwt_expired(access_token):
|
||||
self._set_cookie('.loco.com', 'access_token', access_token)
|
||||
return access_token
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_type, video_id = self._match_valid_url(url).group('type', 'id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
stream = traverse_obj(self._search_nextjs_data(webpage, video_id), (
|
||||
'props', 'pageProps', ('liveStreamData', 'stream'), {dict}, any, {require('stream info')}))
|
||||
'props', 'pageProps', ('liveStreamData', 'stream', 'liveStream'), {dict}, any, {require('stream info')}))
|
||||
|
||||
if access_token := self._get_access_token(video_id):
|
||||
self._request_webpage(
|
||||
'https://drm.loco.com/v1/streams/playback/', video_id,
|
||||
'Downloading video authorization', fatal=False, headers={
|
||||
'authorization': access_token,
|
||||
}, query={
|
||||
'stream_uid': stream['uid'],
|
||||
})
|
||||
|
||||
return {
|
||||
'formats': self._extract_m3u8_formats(stream['conf']['hls'], video_id),
|
||||
|
@ -2,7 +2,6 @@
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
merge_dicts,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
@ -138,13 +137,15 @@ def _real_extract(self, url):
|
||||
'https://www.lrt.lt/radioteka/api/media', video_id,
|
||||
query={'url': f'/mediateka/irasas/{video_id}/{path}'})
|
||||
|
||||
return traverse_obj(media, {
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'title': ('title', {str}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
'categories': ('playlist_item', 'category', {str}, filter, all, filter),
|
||||
'description': ('content', {clean_html}, {str}),
|
||||
'timestamp': ('date', {lambda x: x.replace('.', '/')}, {unified_timestamp}),
|
||||
'thumbnail': ('playlist_item', 'image', {urljoin('https://www.lrt.lt')}),
|
||||
'formats': ('playlist_item', 'file', {lambda x: self._extract_m3u8_formats(x, video_id)}),
|
||||
})
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(media['playlist_item']['file'], video_id),
|
||||
**traverse_obj(media, {
|
||||
'title': ('title', {str}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
'categories': ('playlist_item', 'category', {str}, filter, all, filter),
|
||||
'description': ('content', {clean_html}, {str}),
|
||||
'timestamp': ('date', {lambda x: x.replace('.', '/')}, {unified_timestamp}),
|
||||
'thumbnail': ('playlist_item', 'image', {urljoin('https://www.lrt.lt')}),
|
||||
}),
|
||||
}
|
||||
|
@ -1,31 +1,38 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
str_to_int,
|
||||
join_nonempty,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ManyVidsIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# preview video
|
||||
'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
|
||||
'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
|
||||
'url': 'https://www.manyvids.com/Video/530341/mv-tips-tricks',
|
||||
'md5': '738dc723f7735ee9602f7ea352a6d058',
|
||||
'info_dict': {
|
||||
'id': '133957',
|
||||
'id': '530341-preview',
|
||||
'ext': 'mp4',
|
||||
'title': 'everthing about me (Preview)',
|
||||
'uploader': 'ellyxxix',
|
||||
'title': 'MV Tips & Tricks (Preview)',
|
||||
'description': r're:I will take you on a tour around .{1313}$',
|
||||
'thumbnail': r're:https://cdn5\.manyvids\.com/php_uploads/video_images/DestinyDiaz/.+\.jpg',
|
||||
'uploader': 'DestinyDiaz',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'release_timestamp': 1508419904,
|
||||
'tags': ['AdultSchool', 'BBW', 'SFW', 'TeacherFetish'],
|
||||
'release_date': '20171019',
|
||||
'duration': 3167.0,
|
||||
},
|
||||
'expected_warnings': ['Only extracting preview'],
|
||||
}, {
|
||||
# full video
|
||||
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
|
||||
@ -34,129 +41,68 @@ class ManyVidsIE(InfoExtractor):
|
||||
'id': '935718',
|
||||
'ext': 'mp4',
|
||||
'title': 'MY FACE REVEAL',
|
||||
'description': 'md5:ec5901d41808b3746fed90face161612',
|
||||
'description': r're:Today is the day!! I am finally taking off my mask .{445}$',
|
||||
'thumbnail': r're:https://ods\.manyvids\.com/1001061960/3aa5397f2a723ec4597e344df66ab845/screenshots/.+\.jpg',
|
||||
'uploader': 'Sarah Calanthe',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'release_date': '20181110',
|
||||
'tags': ['EyeContact', 'Interviews', 'MaskFetish', 'MouthFetish', 'Redhead'],
|
||||
'release_timestamp': 1541851200,
|
||||
'duration': 224.0,
|
||||
},
|
||||
}]
|
||||
_API_BASE = 'https://www.manyvids.com/bff/store/video'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(f'{self._API_BASE}/{video_id}/private', video_id)['data']
|
||||
formats, preview_only = [], True
|
||||
|
||||
real_url = f'https://www.manyvids.com/video/{video_id}/gtm.js'
|
||||
try:
|
||||
webpage = self._download_webpage(real_url, video_id)
|
||||
except Exception:
|
||||
# probably useless fallback
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
info = self._search_regex(
|
||||
r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
|
||||
webpage, 'meta details', default='')
|
||||
info = extract_attributes(info)
|
||||
|
||||
player = self._search_regex(
|
||||
r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
|
||||
webpage, 'player details', default='')
|
||||
player = extract_attributes(player)
|
||||
|
||||
video_urls_and_ids = (
|
||||
(info.get('data-meta-video'), 'video'),
|
||||
(player.get('data-video-transcoded'), 'transcoded'),
|
||||
(player.get('data-video-filepath'), 'filepath'),
|
||||
(self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
|
||||
)
|
||||
|
||||
def txt_or_none(s, default=None):
|
||||
return (s.strip() or default) if isinstance(s, str) else default
|
||||
|
||||
uploader = txt_or_none(info.get('data-meta-author'))
|
||||
|
||||
def mung_title(s):
|
||||
if uploader:
|
||||
s = re.sub(rf'^\s*{re.escape(uploader)}\s+[|-]', '', s)
|
||||
return txt_or_none(s)
|
||||
|
||||
title = (
|
||||
mung_title(info.get('data-meta-title'))
|
||||
or self._html_search_regex(
|
||||
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
|
||||
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
|
||||
webpage, 'title', default=None)
|
||||
or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', fatal=True))
|
||||
|
||||
title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
|
||||
|
||||
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
|
||||
title += ' (Preview)'
|
||||
|
||||
mv_token = self._search_regex(
|
||||
r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'mv token', default=None, group='value')
|
||||
|
||||
if mv_token:
|
||||
# Sets some cookies
|
||||
self._download_webpage(
|
||||
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
|
||||
video_id, note='Setting format cookies', fatal=False,
|
||||
data=urlencode_postdata({
|
||||
'mvtoken': mv_token,
|
||||
'vid': video_id,
|
||||
}), headers={
|
||||
'Referer': url,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
})
|
||||
|
||||
formats = []
|
||||
for v_url, fmt in video_urls_and_ids:
|
||||
v_url = url_or_none(v_url)
|
||||
if not v_url:
|
||||
for format_id, path in [
|
||||
('preview', ['teaser', 'filepath']),
|
||||
('transcoded', ['transcodedFilepath']),
|
||||
('filepath', ['filepath']),
|
||||
]:
|
||||
format_url = traverse_obj(video_data, (*path, {url_or_none}))
|
||||
if not format_url:
|
||||
continue
|
||||
if determine_ext(v_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
v_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls'))
|
||||
if determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id=format_id))
|
||||
else:
|
||||
formats.append({
|
||||
'url': v_url,
|
||||
'format_id': fmt,
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'preference': -10 if format_id == 'preview' else None,
|
||||
'quality': 10 if format_id == 'filepath' else None,
|
||||
'height': int_or_none(
|
||||
self._search_regex(r'_(\d{2,3}[02468])_', format_url, 'height', default=None)),
|
||||
})
|
||||
if format_id != 'preview':
|
||||
preview_only = False
|
||||
|
||||
self._remove_duplicate_formats(formats)
|
||||
metadata = traverse_obj(
|
||||
self._download_json(f'{self._API_BASE}/{video_id}', video_id, fatal=False), 'data')
|
||||
title = traverse_obj(metadata, ('title', {clean_html}))
|
||||
|
||||
for f in formats:
|
||||
if f.get('height') is None:
|
||||
f['height'] = int_or_none(
|
||||
self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
|
||||
if '/preview/' in f['url']:
|
||||
f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
|
||||
f['preference'] = -10
|
||||
if 'transcoded' in f['format_id']:
|
||||
f['preference'] = f.get('preference', -1) - 1
|
||||
|
||||
def get_likes():
|
||||
likes = self._search_regex(
|
||||
rf'''(<a\b[^>]*\bdata-id\s*=\s*(['"]){video_id}\2[^>]*>)''',
|
||||
webpage, 'likes', default='')
|
||||
likes = extract_attributes(likes)
|
||||
return int_or_none(likes.get('data-likes'))
|
||||
|
||||
def get_views():
|
||||
return str_to_int(self._html_search_regex(
|
||||
r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
|
||||
webpage, 'view count', default=None))
|
||||
if preview_only:
|
||||
title = join_nonempty(title, '(Preview)', delim=' ')
|
||||
video_id += '-preview'
|
||||
self.report_warning(
|
||||
f'Only extracting preview. Video may be paid or subscription only. {self._login_hint()}')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': txt_or_none(info.get('data-meta-description')),
|
||||
'uploader': txt_or_none(info.get('data-meta-author')),
|
||||
'thumbnail': (
|
||||
url_or_none(info.get('data-meta-image'))
|
||||
or url_or_none(player.get('data-video-screenshot'))),
|
||||
'view_count': get_views(),
|
||||
'like_count': get_likes(),
|
||||
**traverse_obj(metadata, {
|
||||
'description': ('description', {clean_html}),
|
||||
'uploader': ('model', 'displayName', {clean_html}),
|
||||
'thumbnail': (('screenshot', 'thumbnail'), {url_or_none}, any),
|
||||
'view_count': ('views', {parse_count}),
|
||||
'like_count': ('likes', {parse_count}),
|
||||
'release_timestamp': ('launchDate', {parse_iso8601}),
|
||||
'duration': ('videoDuration', {parse_duration}),
|
||||
'tags': ('tagList', ..., 'label', {str}, filter, all, filter),
|
||||
}),
|
||||
}
|
||||
|
@ -365,13 +365,15 @@ def _real_initialize(self):
|
||||
'All videos are only available to registered users', method='password')
|
||||
|
||||
def _set_device_id(self, username):
|
||||
if not self._device_id:
|
||||
self._device_id = self.cache.load(
|
||||
self._NETRC_MACHINE, 'device_ids', default={}).get(username)
|
||||
if self._device_id:
|
||||
return
|
||||
device_id_cache = self.cache.load(self._NETRC_MACHINE, 'device_ids', default={})
|
||||
self._device_id = device_id_cache.get(username)
|
||||
if self._device_id:
|
||||
return
|
||||
self._device_id = str(uuid.uuid4())
|
||||
self.cache.store(self._NETRC_MACHINE, 'device_ids', {username: self._device_id})
|
||||
device_id_cache[username] = self._device_id
|
||||
self.cache.store(self._NETRC_MACHINE, 'device_ids', device_id_cache)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
try:
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
from .art19 import Art19IE
|
||||
from .common import InfoExtractor
|
||||
from ..networking import PATCHRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@ -74,7 +75,7 @@ def _extract_formats(self, content_id, slug):
|
||||
'app_version': '23.10.0',
|
||||
'platform': 'ios',
|
||||
})
|
||||
return {'formats': fmts, 'subtitles': subs}
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
self.raise_login_required()
|
||||
@ -84,6 +85,9 @@ def _extract_formats(self, content_id, slug):
|
||||
continue
|
||||
raise
|
||||
|
||||
self.mark_watched(content_id, slug)
|
||||
return {'formats': fmts, 'subtitles': subs}
|
||||
|
||||
def _extract_video_metadata(self, episode):
|
||||
channel_url = traverse_obj(
|
||||
episode, (('channel_slug', 'class_slug'), {urljoin('https://nebula.tv/')}), get_all=False)
|
||||
@ -111,6 +115,13 @@ def _extract_video_metadata(self, episode):
|
||||
'uploader_url': channel_url,
|
||||
}
|
||||
|
||||
def _mark_watched(self, content_id, slug):
|
||||
self._call_api(
|
||||
PATCHRequest(f'https://content.api.nebula.app/{content_id.split(":")[0]}s/{content_id}/progress/'),
|
||||
slug, 'Marking watched', 'Unable to mark watched', fatal=False,
|
||||
data=json.dumps({'completed': True}).encode(),
|
||||
headers={'content-type': 'application/json'})
|
||||
|
||||
|
||||
class NebulaIE(NebulaBaseIE):
|
||||
IE_NAME = 'nebula:video'
|
||||
@ -322,6 +333,7 @@ def _real_extract(self, url):
|
||||
if not episode_url and metadata.get('premium'):
|
||||
self.raise_login_required()
|
||||
|
||||
self.mark_watched(metadata['id'], slug)
|
||||
if Art19IE.suitable(episode_url):
|
||||
return self.url_result(episode_url, Art19IE)
|
||||
return traverse_obj(metadata, {
|
||||
|
@ -16,7 +16,7 @@
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_bitrate,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
@ -24,8 +24,6 @@
|
||||
qualities,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
@ -34,13 +32,70 @@
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import find_element, require, traverse_obj
|
||||
|
||||
|
||||
class NiconicoIE(InfoExtractor):
|
||||
class NiconicoBaseIE(InfoExtractor):
|
||||
_GEO_BYPASS = False
|
||||
_GEO_COUNTRIES = ['JP']
|
||||
_LOGIN_BASE = 'https://account.nicovideo.jp'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
|
||||
@property
|
||||
def is_logged_in(self):
|
||||
return bool(self._get_cookies('https://www.nicovideo.jp').get('user_session'))
|
||||
|
||||
def _raise_login_error(self, message, expected=True):
|
||||
raise ExtractorError(f'Unable to login: {message}', expected=expected)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self.is_logged_in:
|
||||
return
|
||||
|
||||
self._request_webpage(
|
||||
f'{self._LOGIN_BASE}/login', None, 'Requesting session cookies')
|
||||
webpage = self._download_webpage(
|
||||
f'{self._LOGIN_BASE}/login/redirector', None,
|
||||
'Logging in', 'Unable to log in', headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': f'{self._LOGIN_BASE}/login',
|
||||
}, data=urlencode_postdata({
|
||||
'mail_tel': username,
|
||||
'password': password,
|
||||
}))
|
||||
|
||||
if self.is_logged_in:
|
||||
return
|
||||
elif err_msg := traverse_obj(webpage, (
|
||||
{find_element(cls='notice error')}, {find_element(cls='notice__text')}, {clean_html},
|
||||
)):
|
||||
self._raise_login_error(err_msg or 'Invalid username or password')
|
||||
elif 'oneTimePw' in webpage:
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage, 'post url', group='url')
|
||||
mfa, urlh = self._download_webpage_handle(
|
||||
urljoin(self._LOGIN_BASE, post_url), None,
|
||||
'Performing MFA', 'Unable to complete MFA', headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}, data=urlencode_postdata({
|
||||
'otp': self._get_tfa_info('6 digit number shown on app'),
|
||||
}))
|
||||
if self.is_logged_in:
|
||||
return
|
||||
elif 'error-code' in parse_qs(urlh.url):
|
||||
err_msg = traverse_obj(mfa, ({find_element(cls='pageMainMsg')}, {clean_html}))
|
||||
self._raise_login_error(err_msg or 'MFA session expired')
|
||||
elif 'formError' in mfa:
|
||||
err_msg = traverse_obj(mfa, (
|
||||
{find_element(cls='formError')}, {find_element(tag='div')}, {clean_html}))
|
||||
self._raise_login_error(err_msg or 'MFA challenge failed')
|
||||
|
||||
self._raise_login_error('Unexpected login error', expected=False)
|
||||
|
||||
|
||||
class NiconicoIE(NiconicoBaseIE):
|
||||
IE_NAME = 'niconico'
|
||||
IE_DESC = 'ニコニコ動画'
|
||||
_GEO_COUNTRIES = ['JP']
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||
@ -180,229 +235,6 @@ class NiconicoIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
_VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
_API_HEADERS = {
|
||||
'X-Frontend-ID': '6',
|
||||
'X-Frontend-Version': '0',
|
||||
'X-Niconico-Language': 'en-us',
|
||||
'Referer': 'https://www.nicovideo.jp/',
|
||||
'Origin': 'https://www.nicovideo.jp',
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_ok = True
|
||||
login_form_strs = {
|
||||
'mail_tel': username,
|
||||
'password': password,
|
||||
}
|
||||
self._request_webpage(
|
||||
'https://account.nicovideo.jp/login', None,
|
||||
note='Acquiring Login session')
|
||||
page = self._download_webpage(
|
||||
'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None,
|
||||
note='Logging in', errnote='Unable to log in',
|
||||
data=urlencode_postdata(login_form_strs),
|
||||
headers={
|
||||
'Referer': 'https://account.nicovideo.jp/login',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
if 'oneTimePw' in page:
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page, 'post url', group='url')
|
||||
page = self._download_webpage(
|
||||
urljoin('https://account.nicovideo.jp', post_url), None,
|
||||
note='Performing MFA', errnote='Unable to complete MFA',
|
||||
data=urlencode_postdata({
|
||||
'otp': self._get_tfa_info('6 digits code'),
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
if 'oneTimePw' in page or 'formError' in page:
|
||||
err_msg = self._html_search_regex(
|
||||
r'formError["\']+>(.*?)</div>', page, 'form_error',
|
||||
default='There\'s an error but the message can\'t be parsed.',
|
||||
flags=re.DOTALL)
|
||||
self.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"')
|
||||
return False
|
||||
login_ok = 'class="notice error"' not in page
|
||||
if not login_ok:
|
||||
self.report_warning('Unable to log in: bad username or password')
|
||||
return login_ok
|
||||
|
||||
def _get_heartbeat_info(self, info_dict):
|
||||
video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
|
||||
dmc_protocol = info_dict['expected_protocol']
|
||||
|
||||
api_data = (
|
||||
info_dict.get('_api_data')
|
||||
or self._parse_json(
|
||||
self._html_search_regex(
|
||||
'data-api-data="([^"]+)"',
|
||||
self._download_webpage('https://www.nicovideo.jp/watch/' + video_id, video_id),
|
||||
'API data', default='{}'),
|
||||
video_id))
|
||||
|
||||
session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
|
||||
session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
|
||||
|
||||
def ping():
|
||||
tracking_id = traverse_obj(api_data, ('media', 'delivery', 'trackingId'))
|
||||
if tracking_id:
|
||||
tracking_url = update_url_query('https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', {'t': tracking_id})
|
||||
watch_request_response = self._download_json(
|
||||
tracking_url, video_id,
|
||||
note='Acquiring permission for downloading video', fatal=False,
|
||||
headers=self._API_HEADERS)
|
||||
if traverse_obj(watch_request_response, ('meta', 'status')) != 200:
|
||||
self.report_warning('Failed to acquire permission for playing video. Video download may fail.')
|
||||
|
||||
yesno = lambda x: 'yes' if x else 'no'
|
||||
|
||||
if dmc_protocol == 'http':
|
||||
protocol = 'http'
|
||||
protocol_parameters = {
|
||||
'http_output_download_parameters': {
|
||||
'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
|
||||
'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
|
||||
},
|
||||
}
|
||||
elif dmc_protocol == 'hls':
|
||||
protocol = 'm3u8'
|
||||
segment_duration = try_get(self._configuration_arg('segment_duration'), lambda x: int(x[0])) or 6000
|
||||
parsed_token = self._parse_json(session_api_data['token'], video_id)
|
||||
encryption = traverse_obj(api_data, ('media', 'delivery', 'encryption'))
|
||||
protocol_parameters = {
|
||||
'hls_parameters': {
|
||||
'segment_duration': segment_duration,
|
||||
'transfer_preset': '',
|
||||
'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
|
||||
'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
|
||||
},
|
||||
}
|
||||
if 'hls_encryption' in parsed_token and encryption:
|
||||
protocol_parameters['hls_parameters']['encryption'] = {
|
||||
parsed_token['hls_encryption']: {
|
||||
'encrypted_key': encryption['encryptedKey'],
|
||||
'key_uri': encryption['keyUri'],
|
||||
},
|
||||
}
|
||||
else:
|
||||
protocol = 'm3u8_native'
|
||||
else:
|
||||
raise ExtractorError(f'Unsupported DMC protocol: {dmc_protocol}')
|
||||
|
||||
session_response = self._download_json(
|
||||
session_api_endpoint['url'], video_id,
|
||||
query={'_format': 'json'},
|
||||
headers={'Content-Type': 'application/json'},
|
||||
note='Downloading JSON metadata for {}'.format(info_dict['format_id']),
|
||||
data=json.dumps({
|
||||
'session': {
|
||||
'client_info': {
|
||||
'player_id': session_api_data.get('playerId'),
|
||||
},
|
||||
'content_auth': {
|
||||
'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
|
||||
'content_key_timeout': session_api_data.get('contentKeyTimeout'),
|
||||
'service_id': 'nicovideo',
|
||||
'service_user_id': session_api_data.get('serviceUserId'),
|
||||
},
|
||||
'content_id': session_api_data.get('contentId'),
|
||||
'content_src_id_sets': [{
|
||||
'content_src_ids': [{
|
||||
'src_id_to_mux': {
|
||||
'audio_src_ids': [audio_src_id],
|
||||
'video_src_ids': [video_src_id],
|
||||
},
|
||||
}],
|
||||
}],
|
||||
'content_type': 'movie',
|
||||
'content_uri': '',
|
||||
'keep_method': {
|
||||
'heartbeat': {
|
||||
'lifetime': session_api_data.get('heartbeatLifetime'),
|
||||
},
|
||||
},
|
||||
'priority': session_api_data['priority'],
|
||||
'protocol': {
|
||||
'name': 'http',
|
||||
'parameters': {
|
||||
'http_parameters': {
|
||||
'parameters': protocol_parameters,
|
||||
},
|
||||
},
|
||||
},
|
||||
'recipe_id': session_api_data.get('recipeId'),
|
||||
'session_operation_auth': {
|
||||
'session_operation_auth_by_signature': {
|
||||
'signature': session_api_data.get('signature'),
|
||||
'token': session_api_data.get('token'),
|
||||
},
|
||||
},
|
||||
'timing_constraint': 'unlimited',
|
||||
},
|
||||
}).encode())
|
||||
|
||||
info_dict['url'] = session_response['data']['session']['content_uri']
|
||||
info_dict['protocol'] = protocol
|
||||
|
||||
# get heartbeat info
|
||||
heartbeat_info_dict = {
|
||||
'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
|
||||
'data': json.dumps(session_response['data']),
|
||||
# interval, convert milliseconds to seconds, then halve to make a buffer.
|
||||
'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
|
||||
'ping': ping,
|
||||
}
|
||||
|
||||
return info_dict, heartbeat_info_dict
|
||||
|
||||
def _extract_format_for_quality(self, video_id, audio_quality, video_quality, dmc_protocol):
|
||||
|
||||
if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
|
||||
return None
|
||||
|
||||
format_id = '-'.join(
|
||||
[remove_start(s['id'], 'archive_') for s in (video_quality, audio_quality)] + [dmc_protocol])
|
||||
|
||||
vid_qual_label = traverse_obj(video_quality, ('metadata', 'label'))
|
||||
|
||||
return {
|
||||
'url': 'niconico_dmc:{}/{}/{}'.format(video_id, video_quality['id'], audio_quality['id']),
|
||||
'format_id': format_id,
|
||||
'format_note': join_nonempty('DMC', vid_qual_label, dmc_protocol.upper(), delim=' '),
|
||||
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
||||
'acodec': 'aac',
|
||||
'vcodec': 'h264',
|
||||
**traverse_obj(audio_quality, ('metadata', {
|
||||
'abr': ('bitrate', {float_or_none(scale=1000)}),
|
||||
'asr': ('samplingRate', {int_or_none}),
|
||||
})),
|
||||
**traverse_obj(video_quality, ('metadata', {
|
||||
'vbr': ('bitrate', {float_or_none(scale=1000)}),
|
||||
'height': ('resolution', 'height', {int_or_none}),
|
||||
'width': ('resolution', 'width', {int_or_none}),
|
||||
})),
|
||||
'quality': -2 if 'low' in video_quality['id'] else None,
|
||||
'protocol': 'niconico_dmc',
|
||||
'expected_protocol': dmc_protocol, # XXX: This is not a documented field
|
||||
'http_headers': {
|
||||
'Origin': 'https://www.nicovideo.jp',
|
||||
'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
|
||||
},
|
||||
}
|
||||
|
||||
def _yield_dmc_formats(self, api_data, video_id):
|
||||
dmc_data = traverse_obj(api_data, ('media', 'delivery', 'movie'))
|
||||
audios = traverse_obj(dmc_data, ('audios', ..., {dict}))
|
||||
videos = traverse_obj(dmc_data, ('videos', ..., {dict}))
|
||||
protocols = traverse_obj(dmc_data, ('session', 'protocols', ..., {str}))
|
||||
if not all((audios, videos, protocols)):
|
||||
return
|
||||
|
||||
for audio_quality, video_quality, protocol in itertools.product(audios, videos, protocols):
|
||||
if fmt := self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol):
|
||||
yield fmt
|
||||
|
||||
def _yield_dms_formats(self, api_data, video_id):
|
||||
fmt_filter = lambda _, v: v['isAvailable'] and v['id']
|
||||
@ -451,42 +283,61 @@ def _yield_dms_formats(self, api_data, video_id):
|
||||
lambda _, v: v['id'] == video_fmt['format_id'], 'qualityLevel', {int_or_none}, any)) or -1
|
||||
yield video_fmt
|
||||
|
||||
def _extract_server_response(self, webpage, video_id, fatal=True):
|
||||
try:
|
||||
return traverse_obj(
|
||||
self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id),
|
||||
('data', 'response', {dict}, {require('server response')}))
|
||||
except ExtractorError:
|
||||
if not fatal:
|
||||
return {}
|
||||
raise
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
webpage, handle = self._download_webpage_handle(
|
||||
'https://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||
f'https://www.nicovideo.jp/watch/{video_id}', video_id,
|
||||
headers=self.geo_verification_headers())
|
||||
if video_id.startswith('so'):
|
||||
video_id = self._match_id(handle.url)
|
||||
|
||||
api_data = traverse_obj(
|
||||
self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id),
|
||||
('data', 'response', {dict}))
|
||||
if not api_data:
|
||||
raise ExtractorError('Server response data not found')
|
||||
api_data = self._extract_server_response(webpage, video_id)
|
||||
except ExtractorError as e:
|
||||
try:
|
||||
api_data = self._download_json(
|
||||
f'https://www.nicovideo.jp/api/watch/v3/{video_id}?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_{round(time.time() * 1000)}', video_id,
|
||||
note='Downloading API JSON', errnote='Unable to fetch data')['data']
|
||||
f'https://www.nicovideo.jp/api/watch/v3/{video_id}', video_id,
|
||||
'Downloading API JSON', 'Unable to fetch data', query={
|
||||
'_frontendId': '6',
|
||||
'_frontendVersion': '0',
|
||||
'actionTrackId': f'AAAAAAAAAA_{round(time.time() * 1000)}',
|
||||
}, headers=self.geo_verification_headers())['data']
|
||||
except ExtractorError:
|
||||
if not isinstance(e.cause, HTTPError):
|
||||
# Raise if original exception was from _parse_json or utils.traversal.require
|
||||
raise
|
||||
# The webpage server response has more detailed error info than the API response
|
||||
webpage = e.cause.response.read().decode('utf-8', 'replace')
|
||||
error_msg = self._html_search_regex(
|
||||
r'(?s)<section\s+class="(?:(?:ErrorMessage|WatchExceptionPage-message)\s*)+">(.+?)</section>',
|
||||
webpage, 'error reason', default=None)
|
||||
if not error_msg:
|
||||
reason_code = self._extract_server_response(
|
||||
webpage, video_id, fatal=False).get('reasonCode')
|
||||
if not reason_code:
|
||||
raise
|
||||
raise ExtractorError(clean_html(error_msg), expected=True)
|
||||
if reason_code in ('DOMESTIC_VIDEO', 'HIGH_RISK_COUNTRY_VIDEO'):
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
elif reason_code == 'HIDDEN_VIDEO':
|
||||
raise ExtractorError(
|
||||
'The viewing period of this video has expired', expected=True)
|
||||
elif reason_code == 'DELETED_VIDEO':
|
||||
raise ExtractorError('This video has been deleted', expected=True)
|
||||
raise ExtractorError(f'Niconico says: {reason_code}')
|
||||
|
||||
availability = self._availability(**(traverse_obj(api_data, ('payment', 'video', {
|
||||
'needs_premium': ('isPremium', {bool}),
|
||||
'needs_subscription': ('isAdmission', {bool}),
|
||||
})) or {'needs_auth': True}))
|
||||
formats = [*self._yield_dmc_formats(api_data, video_id),
|
||||
*self._yield_dms_formats(api_data, video_id)]
|
||||
|
||||
formats = list(self._yield_dms_formats(api_data, video_id))
|
||||
if not formats:
|
||||
fail_msg = clean_html(self._html_search_regex(
|
||||
r'<p[^>]+\bclass="fail-message"[^>]*>(?P<msg>.+?)</p>',
|
||||
@ -921,7 +772,7 @@ def _real_extract(self, url):
|
||||
return self.playlist_result(self._entries(list_id), list_id)
|
||||
|
||||
|
||||
class NiconicoLiveIE(InfoExtractor):
|
||||
class NiconicoLiveIE(NiconicoBaseIE):
|
||||
IE_NAME = 'niconico:live'
|
||||
IE_DESC = 'ニコニコ生放送'
|
||||
_VALID_URL = r'https?://(?:sp\.)?live2?\.nicovideo\.jp/(?:watch|gate)/(?P<id>lv\d+)'
|
||||
@ -953,8 +804,6 @@ class NiconicoLiveIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_KNOWN_LATENCY = ('high', 'low')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id)
|
||||
@ -970,22 +819,19 @@ def _real_extract(self, url):
|
||||
})
|
||||
|
||||
hostname = remove_start(urllib.parse.urlparse(urlh.url).hostname, 'sp.')
|
||||
latency = try_get(self._configuration_arg('latency'), lambda x: x[0])
|
||||
if latency not in self._KNOWN_LATENCY:
|
||||
latency = 'high'
|
||||
|
||||
ws = self._request_webpage(
|
||||
Request(ws_url, headers={'Origin': f'https://{hostname}'}),
|
||||
video_id=video_id, note='Connecting to WebSocket server')
|
||||
|
||||
self.write_debug('[debug] Sending HLS server request')
|
||||
self.write_debug('Sending HLS server request')
|
||||
ws.send(json.dumps({
|
||||
'type': 'startWatching',
|
||||
'data': {
|
||||
'stream': {
|
||||
'quality': 'abr',
|
||||
'protocol': 'hls+fmp4',
|
||||
'latency': latency,
|
||||
'protocol': 'hls',
|
||||
'latency': 'high',
|
||||
'accessRightMethod': 'single_cookie',
|
||||
'chasePlay': False,
|
||||
},
|
||||
@ -1049,18 +895,29 @@ def _real_extract(self, url):
|
||||
for cookie in cookies:
|
||||
self._set_cookie(
|
||||
cookie['domain'], cookie['name'], cookie['value'],
|
||||
expire_time=unified_timestamp(cookie['expires']), path=cookie['path'], secure=cookie['secure'])
|
||||
expire_time=unified_timestamp(cookie.get('expires')), path=cookie['path'], secure=cookie['secure'])
|
||||
|
||||
fmt_common = {
|
||||
'live_latency': 'high',
|
||||
'origin': hostname,
|
||||
'protocol': 'niconico_live',
|
||||
'video_id': video_id,
|
||||
'ws': ws,
|
||||
}
|
||||
q_iter = (q for q in qualities[1:] if not q.startswith('audio_')) # ignore initial 'abr'
|
||||
a_map = {96: 'audio_low', 192: 'audio_high'}
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
|
||||
for fmt, q in zip(formats, reversed(qualities[1:])):
|
||||
fmt.update({
|
||||
'format_id': q,
|
||||
'protocol': 'niconico_live',
|
||||
'ws': ws,
|
||||
'video_id': video_id,
|
||||
'live_latency': latency,
|
||||
'origin': hostname,
|
||||
})
|
||||
for fmt in formats:
|
||||
if fmt.get('acodec') == 'none':
|
||||
fmt['format_id'] = next(q_iter, fmt['format_id'])
|
||||
elif fmt.get('vcodec') == 'none':
|
||||
abr = parse_bitrate(fmt['url'].lower())
|
||||
fmt.update({
|
||||
'abr': abr,
|
||||
'format_id': a_map.get(abr, fmt['format_id']),
|
||||
})
|
||||
fmt.update(fmt_common)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -181,6 +181,7 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 119.0,
|
||||
},
|
||||
'skip': 'HTTP Error 500: Internal Server Error',
|
||||
}, {
|
||||
# article with audio and no video
|
||||
'url': 'https://www.nytimes.com/2023/09/29/health/mosquitoes-genetic-engineering.html',
|
||||
@ -190,13 +191,14 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||
'ext': 'mp3',
|
||||
'title': 'The Gamble: Can Genetically Modified Mosquitoes End Disease?',
|
||||
'description': 'md5:9ff8b47acbaf7f3ca8c732f5c815be2e',
|
||||
'timestamp': 1695960700,
|
||||
'timestamp': 1696008129,
|
||||
'upload_date': '20230929',
|
||||
'creator': 'Stephanie Nolen, Natalija Gormalova',
|
||||
'creators': ['Stephanie Nolen', 'Natalija Gormalova'],
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 1322,
|
||||
},
|
||||
}, {
|
||||
# lede_media_block already has sourceId
|
||||
'url': 'https://www.nytimes.com/2023/11/29/business/dealbook/kamala-harris-biden-voters.html',
|
||||
'md5': '3eb5ddb1d6f86254fe4f233826778737',
|
||||
'info_dict': {
|
||||
@ -207,7 +209,7 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||
'timestamp': 1701290997,
|
||||
'upload_date': '20231129',
|
||||
'uploader': 'By The New York Times',
|
||||
'creator': 'Katie Rogers',
|
||||
'creators': ['Katie Rogers'],
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 97.631,
|
||||
},
|
||||
@ -222,10 +224,22 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||
'title': 'Drunk and Asleep on the Job: Air Traffic Controllers Pushed to the Brink',
|
||||
'description': 'md5:549e5a5e935bf7d048be53ba3d2c863d',
|
||||
'upload_date': '20231202',
|
||||
'creator': 'Emily Steel, Sydney Ember',
|
||||
'creators': ['Emily Steel', 'Sydney Ember'],
|
||||
'timestamp': 1701511264,
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# lede_media_block does not have sourceId
|
||||
'url': 'https://www.nytimes.com/2025/04/30/well/move/hip-mobility-routine.html',
|
||||
'info_dict': {
|
||||
'id': 'hip-mobility-routine',
|
||||
'title': 'Tight Hips? These Moves Can Help.',
|
||||
'description': 'Sitting all day is hard on your hips. Try this simple routine for better mobility.',
|
||||
'creators': ['Alyssa Ages', 'Theodore Tae'],
|
||||
'timestamp': 1746003629,
|
||||
'upload_date': '20250430',
|
||||
},
|
||||
'playlist_count': 7,
|
||||
}, {
|
||||
'url': 'https://www.nytimes.com/2023/12/02/business/media/netflix-squid-game-challenge.html',
|
||||
'only_matching': True,
|
||||
@ -256,14 +270,18 @@ def _extract_content_from_block(self, block):
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
webpage = self._download_webpage(url, page_id, impersonate=True)
|
||||
art_json = self._search_json(
|
||||
r'window\.__preloadedData\s*=', webpage, 'media details', page_id,
|
||||
transform_source=lambda x: x.replace('undefined', 'null'))['initialData']['data']['article']
|
||||
content = art_json['sprinkledBody']['content']
|
||||
|
||||
blocks = traverse_obj(art_json, (
|
||||
'sprinkledBody', 'content', ..., ('ledeMedia', None),
|
||||
lambda _, v: v['__typename'] in ('Video', 'Audio')))
|
||||
blocks = []
|
||||
block_filter = lambda k, v: k == 'media' and v['__typename'] in ('Video', 'Audio')
|
||||
if lede_media_block := traverse_obj(content, (..., 'ledeMedia', block_filter, any)):
|
||||
lede_media_block.setdefault('sourceId', art_json.get('sourceId'))
|
||||
blocks.append(lede_media_block)
|
||||
blocks.extend(traverse_obj(content, (..., block_filter)))
|
||||
if not blocks:
|
||||
raise ExtractorError('Unable to extract any media blocks from webpage')
|
||||
|
||||
@ -273,8 +291,7 @@ def _real_extract(self, url):
|
||||
'sprinkledBody', 'content', ..., 'summary', 'content', ..., 'text', {str}),
|
||||
get_all=False) or self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
||||
'timestamp': traverse_obj(art_json, ('firstPublished', {parse_iso8601})),
|
||||
'creator': ', '.join(
|
||||
traverse_obj(art_json, ('bylines', ..., 'creators', ..., 'displayName'))), # TODO: change to 'creators' (list)
|
||||
'creators': traverse_obj(art_json, ('bylines', ..., 'creators', ..., 'displayName', {str})),
|
||||
'thumbnails': self._extract_thumbnails(traverse_obj(
|
||||
art_json, ('promotionalMedia', 'assetCrops', ..., 'renditions', ...))),
|
||||
}
|
||||
|
@ -1,40 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class OnceIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||
_VALID_URL = r'https?://.+?\.unicornmedia\.com/now/(?:ads/vmap/)?[^/]+/[^/]+/(?P<domain_id>[^/]+)/(?P<application_id>[^/]+)/(?:[^/]+/)?(?P<media_item_id>[^/]+)/content\.(?:once|m3u8|mp4)'
|
||||
ADAPTIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/master/playlist/%s/%s/%s/content.m3u8'
|
||||
PROGRESSIVE_URL_TEMPLATE = 'http://once.unicornmedia.com/now/media/progressive/%s/%s/%s/%s/content.mp4'
|
||||
|
||||
def _extract_once_formats(self, url, http_formats_preference=None):
|
||||
domain_id, application_id, media_item_id = re.match(
|
||||
OnceIE._VALID_URL, url).groups()
|
||||
formats = self._extract_m3u8_formats(
|
||||
self.ADAPTIVE_URL_TEMPLATE % (
|
||||
domain_id, application_id, media_item_id),
|
||||
media_item_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
progressive_formats = []
|
||||
for adaptive_format in formats:
|
||||
# Prevent advertisement from embedding into m3u8 playlist (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/8893#issuecomment-199912684)
|
||||
adaptive_format['url'] = re.sub(
|
||||
r'\badsegmentlength=\d+', r'adsegmentlength=0', adaptive_format['url'])
|
||||
rendition_id = self._search_regex(
|
||||
r'/now/media/playlist/[^/]+/[^/]+/([^/]+)',
|
||||
adaptive_format['url'], 'redition id', default=None)
|
||||
if rendition_id:
|
||||
progressive_format = adaptive_format.copy()
|
||||
progressive_format.update({
|
||||
'url': self.PROGRESSIVE_URL_TEMPLATE % (
|
||||
domain_id, application_id, rendition_id, media_item_id),
|
||||
'format_id': adaptive_format['format_id'].replace(
|
||||
'hls', 'http'),
|
||||
'protocol': 'http',
|
||||
'preference': http_formats_preference,
|
||||
})
|
||||
progressive_formats.append(progressive_format)
|
||||
self._check_formats(progressive_formats, media_item_id)
|
||||
formats.extend(progressive_formats)
|
||||
return formats
|
@ -14,8 +14,9 @@
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
srt_subtitles_timecode,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class PanoptoBaseIE(InfoExtractor):
|
||||
@ -345,21 +346,16 @@ def _extract_streams_formats_and_subtitles(self, video_id, streams, **fmt_kwargs
|
||||
subtitles = {}
|
||||
for stream in streams or []:
|
||||
stream_formats = []
|
||||
http_stream_url = stream.get('StreamHttpUrl')
|
||||
stream_url = stream.get('StreamUrl')
|
||||
|
||||
if http_stream_url:
|
||||
stream_formats.append({'url': http_stream_url})
|
||||
|
||||
if stream_url:
|
||||
for stream_url in set(traverse_obj(stream, (('StreamHttpUrl', 'StreamUrl'), {url_or_none}))):
|
||||
media_type = stream.get('ViewerMediaFileTypeName')
|
||||
if media_type in ('hls', ):
|
||||
m3u8_formats, stream_subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id)
|
||||
stream_formats.extend(m3u8_formats)
|
||||
subtitles = self._merge_subtitles(subtitles, stream_subtitles)
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(stream_url, video_id, m3u8_id='hls', fatal=False)
|
||||
stream_formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
stream_formats.append({
|
||||
'url': stream_url,
|
||||
'ext': media_type,
|
||||
})
|
||||
for fmt in stream_formats:
|
||||
fmt.update({
|
||||
|
@ -340,8 +340,9 @@ def _real_extract(self, url):
|
||||
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
|
||||
}))
|
||||
|
||||
# all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
|
||||
headers = {'referer': 'https://patreon.com/'}
|
||||
# Must be all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, and Vimeo.
|
||||
# patreon.com URLs redirect to www.patreon.com; this matters when requesting mux.com m3u8s
|
||||
headers = {'referer': 'https://www.patreon.com/'}
|
||||
|
||||
# handle Vimeo embeds
|
||||
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
|
||||
@ -352,7 +353,7 @@ def _real_extract(self, url):
|
||||
v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
|
||||
fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection
|
||||
entries.append(self.url_result(
|
||||
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
|
||||
VimeoIE._smuggle_referrer(v_url, headers['referer']),
|
||||
VimeoIE, url_transparent=True))
|
||||
|
||||
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
|
||||
@ -379,11 +380,13 @@ def _real_extract(self, url):
|
||||
'url': post_file['url'],
|
||||
})
|
||||
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
post_file['url'], video_id, headers=headers)
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': headers,
|
||||
})
|
||||
|
||||
can_view_post = traverse_obj(attributes, 'current_user_can_view')
|
||||
|
@ -1,5 +1,3 @@
|
||||
import re
|
||||
|
||||
from .youtube import YoutubeIE
|
||||
from .zdf import ZDFBaseIE
|
||||
from ..utils import (
|
||||
@ -7,44 +5,27 @@
|
||||
merge_dicts,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class PhoenixIE(ZDFBaseIE):
|
||||
IE_NAME = 'phoenix.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/?#]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
# Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html
|
||||
'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html',
|
||||
'md5': '34ec321e7eb34231fd88616c65c92db0',
|
||||
'url': 'https://www.phoenix.de/sendungen/dokumentationen/spitzbergen-a-893349.html',
|
||||
'md5': 'a79e86d9774d0b3f2102aff988a0bd32',
|
||||
'info_dict': {
|
||||
'id': '210222_phx_nachgehakt_corona_protest',
|
||||
'id': '221215_phx_spitzbergen',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wohin führt der Protest in der Pandemie?',
|
||||
'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
|
||||
'duration': 1691,
|
||||
'timestamp': 1613902500,
|
||||
'upload_date': '20210221',
|
||||
'title': 'Spitzbergen',
|
||||
'description': 'Film von Tilmann Bünz',
|
||||
'duration': 728.0,
|
||||
'timestamp': 1555600500,
|
||||
'upload_date': '20190418',
|
||||
'uploader': 'Phoenix',
|
||||
'series': 'corona nachgehakt',
|
||||
'episode': 'Wohin führt der Protest in der Pandemie?',
|
||||
},
|
||||
}, {
|
||||
# Youtube embed
|
||||
'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html',
|
||||
'info_dict': {
|
||||
'id': 'hMQtqFYjomk',
|
||||
'ext': 'mp4',
|
||||
'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?',
|
||||
'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd',
|
||||
'duration': 3509,
|
||||
'upload_date': '20201219',
|
||||
'uploader': 'phoenix',
|
||||
'uploader_id': 'phoenix',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'thumbnail': 'https://www.phoenix.de/sixcms/media.php/21/Bergspitzen1.png',
|
||||
'series': 'Dokumentationen',
|
||||
'episode': 'Spitzbergen',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html',
|
||||
@ -90,8 +71,8 @@ def _real_extract(self, url):
|
||||
content_id = details['tracking']['nielsen']['content']['assetid']
|
||||
|
||||
info = self._extract_ptmd(
|
||||
f'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/{content_id}',
|
||||
content_id, None, url)
|
||||
f'https://tmd.phoenix.de/tmd/2/android_native_6/vod/ptmd/phoenix/{content_id}',
|
||||
content_id)
|
||||
|
||||
duration = int_or_none(try_get(
|
||||
details, lambda x: x['tracking']['nielsen']['content']['length']))
|
||||
@ -101,20 +82,8 @@ def _real_extract(self, url):
|
||||
str)
|
||||
episode = title if details.get('contentType') == 'episode' else None
|
||||
|
||||
thumbnails = []
|
||||
teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {}
|
||||
for thumbnail_key, thumbnail_url in teaser_images.items():
|
||||
thumbnail_url = urljoin(url, thumbnail_url)
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnail = {
|
||||
'url': thumbnail_url,
|
||||
}
|
||||
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
|
||||
if m:
|
||||
thumbnail['width'] = int(m.group(1))
|
||||
thumbnail['height'] = int(m.group(2))
|
||||
thumbnails.append(thumbnail)
|
||||
thumbnails = self._extract_thumbnails(teaser_images)
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': content_id,
|
||||
|
@ -10,7 +10,8 @@
|
||||
|
||||
|
||||
class PicartoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
|
||||
IE_NAME = 'picarto'
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[^/#?]+)/?(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'https://picarto.tv/Setz',
|
||||
'info_dict': {
|
||||
@ -89,7 +90,8 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class PicartoVodIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?picarto\.tv/(?:videopopout|\w+/videos)/(?P<id>[^/?#&]+)'
|
||||
IE_NAME = 'picarto:vod'
|
||||
_VALID_URL = r'https?://(?:www\.)?picarto\.tv/(?:videopopout|\w+(?:/profile)?/videos)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
|
||||
'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
|
||||
@ -111,6 +113,18 @@ class PicartoVodIE(InfoExtractor):
|
||||
'channel': 'ArtofZod',
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://picarto.tv/DrechuArt/profile/videos/400347',
|
||||
'md5': 'f9ea54868b1d9dec40eb554b484cc7bf',
|
||||
'info_dict': {
|
||||
'id': '400347',
|
||||
'ext': 'mp4',
|
||||
'title': 'Welcome to the Show',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'channel': 'DrechuArt',
|
||||
'age_limit': 0,
|
||||
},
|
||||
|
||||
}, {
|
||||
'url': 'https://picarto.tv/videopopout/Plague',
|
||||
'only_matching': True,
|
||||
|
@ -7,11 +7,12 @@
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj, unpack
|
||||
|
||||
|
||||
class PlaySuisseIE(InfoExtractor):
|
||||
@ -26,12 +27,12 @@ class PlaySuisseIE(InfoExtractor):
|
||||
{
|
||||
# episode in a series
|
||||
'url': 'https://www.playsuisse.ch/watch/763182?episodeId=763211',
|
||||
'md5': '82df2a470b2dfa60c2d33772a8a60cf8',
|
||||
'md5': 'e20d1ede6872a03b41905ca1060a1ef2',
|
||||
'info_dict': {
|
||||
'id': '763211',
|
||||
'ext': 'mp4',
|
||||
'title': 'Knochen',
|
||||
'description': 'md5:8ea7a8076ba000cd9e8bc132fd0afdd8',
|
||||
'description': 'md5:3bdd80e2ce20227c47aab1df2a79a519',
|
||||
'duration': 3344,
|
||||
'series': 'Wilder',
|
||||
'season': 'Season 1',
|
||||
@ -42,24 +43,33 @@ class PlaySuisseIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
# film
|
||||
'url': 'https://www.playsuisse.ch/watch/808675',
|
||||
'md5': '818b94c1d2d7c4beef953f12cb8f3e75',
|
||||
'url': 'https://www.playsuisse.ch/detail/2573198',
|
||||
'md5': '1f115bb0a5191477b1a5771643a4283d',
|
||||
'info_dict': {
|
||||
'id': '808675',
|
||||
'id': '2573198',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der Läufer',
|
||||
'description': 'md5:9f61265c7e6dcc3e046137a792b275fd',
|
||||
'duration': 5280,
|
||||
'title': 'Azor',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'genres': ['Fiction'],
|
||||
'creators': ['Andreas Fontana'],
|
||||
'cast': ['Fabrizio Rongione', 'Stéphanie Cléau', 'Gilles Privat', 'Alexandre Trocki'],
|
||||
'location': 'France; Argentine',
|
||||
'release_year': 2021,
|
||||
'duration': 5981,
|
||||
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
|
||||
},
|
||||
}, {
|
||||
# series (treated as a playlist)
|
||||
'url': 'https://www.playsuisse.ch/detail/1115687',
|
||||
'info_dict': {
|
||||
'description': 'md5:e4a2ae29a8895823045b5c3145a02aa3',
|
||||
'id': '1115687',
|
||||
'series': 'They all came out to Montreux',
|
||||
'title': 'They all came out to Montreux',
|
||||
'description': 'md5:0fefd8c5b4468a0bb35e916887681520',
|
||||
'genres': ['Documentary'],
|
||||
'creators': ['Oliver Murray'],
|
||||
'location': 'Switzerland',
|
||||
'release_year': 2021,
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
@ -120,6 +130,12 @@ class PlaySuisseIE(InfoExtractor):
|
||||
id
|
||||
name
|
||||
description
|
||||
descriptionLong
|
||||
year
|
||||
contentTypes
|
||||
directors
|
||||
mainCast
|
||||
productionCountries
|
||||
duration
|
||||
episodeNumber
|
||||
seasonNumber
|
||||
@ -215,9 +231,7 @@ def _perform_login(self, username, password):
|
||||
if not self._ID_TOKEN:
|
||||
raise ExtractorError('Login failed')
|
||||
|
||||
def _get_media_data(self, media_id):
|
||||
# NOTE In the web app, the "locale" header is used to switch between languages,
|
||||
# However this doesn't seem to take effect when passing the header here.
|
||||
def _get_media_data(self, media_id, locale=None):
|
||||
response = self._download_json(
|
||||
'https://www.playsuisse.ch/api/graphql',
|
||||
media_id, data=json.dumps({
|
||||
@ -225,7 +239,7 @@ def _get_media_data(self, media_id):
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
'variables': {'assetId': media_id},
|
||||
}).encode(),
|
||||
headers={'Content-Type': 'application/json', 'locale': 'de'})
|
||||
headers={'Content-Type': 'application/json', 'locale': locale or 'de'})
|
||||
|
||||
return response['data']['assetV2']
|
||||
|
||||
@ -234,7 +248,7 @@ def _real_extract(self, url):
|
||||
self.raise_login_required(method='password')
|
||||
|
||||
media_id = self._match_id(url)
|
||||
media_data = self._get_media_data(media_id)
|
||||
media_data = self._get_media_data(media_id, traverse_obj(parse_qs(url), ('locale', 0)))
|
||||
info = self._extract_single(media_data)
|
||||
if media_data.get('episodes'):
|
||||
info.update({
|
||||
@ -257,15 +271,22 @@ def _extract_single(self, media_data):
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': media_data['id'],
|
||||
'title': media_data.get('name'),
|
||||
'description': media_data.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': int_or_none(media_data.get('duration')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'series': media_data.get('seriesName'),
|
||||
'season_number': int_or_none(media_data.get('seasonNumber')),
|
||||
'episode': media_data.get('name') if media_data.get('episodeNumber') else None,
|
||||
'episode_number': int_or_none(media_data.get('episodeNumber')),
|
||||
**traverse_obj(media_data, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('name', {str}),
|
||||
'description': (('descriptionLong', 'description'), {str}, any),
|
||||
'genres': ('contentTypes', ..., {str}),
|
||||
'creators': ('directors', ..., {str}),
|
||||
'cast': ('mainCast', ..., {str}),
|
||||
'location': ('productionCountries', ..., {str}, all, {unpack(join_nonempty, delim='; ')}, filter),
|
||||
'release_year': ('year', {str}, {lambda x: x[:4]}, {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'series': ('seriesName', {str}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
'episode': ('name', {str}, {lambda x: x if media_data['episodeNumber'] is not None else None}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
@ -5,11 +5,13 @@
|
||||
from ..utils import (
|
||||
OnDemandPagedList,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class PodchaserIE(InfoExtractor):
|
||||
@ -21,24 +23,25 @@ class PodchaserIE(InfoExtractor):
|
||||
'id': '104365585',
|
||||
'title': 'Ep. 285 – freeze me off',
|
||||
'description': 'cam ahn',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'ext': 'mp3',
|
||||
'categories': ['Comedy'],
|
||||
'categories': ['Comedy', 'News', 'Politics', 'Arts'],
|
||||
'tags': ['comedy', 'dark humor'],
|
||||
'series': 'Cum Town',
|
||||
'series': 'The Adam Friedland Show Podcast',
|
||||
'duration': 3708,
|
||||
'timestamp': 1636531259,
|
||||
'upload_date': '20211110',
|
||||
'average_rating': 4.0,
|
||||
'series_id': '36924',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853',
|
||||
'info_dict': {
|
||||
'id': '28853',
|
||||
'title': 'The Bone Zone',
|
||||
'description': 'Podcast by The Bone Zone',
|
||||
'description': r're:The official home of the Bone Zone podcast.+',
|
||||
},
|
||||
'playlist_count': 275,
|
||||
'playlist_mincount': 275,
|
||||
}, {
|
||||
'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes',
|
||||
'info_dict': {
|
||||
@ -51,19 +54,33 @@ class PodchaserIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _parse_episode(episode, podcast):
|
||||
return {
|
||||
'id': str(episode.get('id')),
|
||||
'title': episode.get('title'),
|
||||
'description': episode.get('description'),
|
||||
'url': episode.get('audio_url'),
|
||||
'thumbnail': episode.get('image_url'),
|
||||
'duration': str_to_int(episode.get('length')),
|
||||
'timestamp': unified_timestamp(episode.get('air_date')),
|
||||
'average_rating': float_or_none(episode.get('rating')),
|
||||
'categories': list(set(traverse_obj(podcast, (('summary', None), 'categories', ..., 'text')))),
|
||||
'tags': traverse_obj(podcast, ('tags', ..., 'text')),
|
||||
'series': podcast.get('title'),
|
||||
}
|
||||
info = traverse_obj(episode, {
|
||||
'id': ('id', {int}, {str_or_none}, {require('episode ID')}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'url': ('audio_url', {url_or_none}),
|
||||
'thumbnail': ('image_url', {url_or_none}),
|
||||
'duration': ('length', {int_or_none}),
|
||||
'timestamp': ('air_date', {unified_timestamp}),
|
||||
'average_rating': ('rating', {float_or_none}),
|
||||
})
|
||||
info.update(traverse_obj(podcast, {
|
||||
'series': ('title', {str}),
|
||||
'series_id': ('id', {int}, {str_or_none}),
|
||||
'categories': (('summary', None), 'categories', ..., 'text', {str}, filter, all, {orderedSet}),
|
||||
'tags': ('tags', ..., 'text', {str}),
|
||||
}))
|
||||
info['vcodec'] = 'none'
|
||||
|
||||
if info.get('series_id'):
|
||||
podcast_slug = traverse_obj(podcast, ('slug', {str})) or 'podcast'
|
||||
episode_slug = traverse_obj(episode, ('slug', {str})) or 'episode'
|
||||
info['webpage_url'] = '/'.join((
|
||||
'https://www.podchaser.com/podcasts',
|
||||
'-'.join((podcast_slug[:30].rstrip('-'), info['series_id'])),
|
||||
'-'.join((episode_slug[:30].rstrip('-'), info['id']))))
|
||||
|
||||
return info
|
||||
|
||||
def _call_api(self, path, *args, **kwargs):
|
||||
return self._download_json(f'https://api.podchaser.com/{path}', *args, **kwargs)
|
||||
@ -93,5 +110,5 @@ def _real_extract(self, url):
|
||||
OnDemandPagedList(functools.partial(self._fetch_page, podcast_id, podcast), self._PAGE_SIZE),
|
||||
str_or_none(podcast.get('id')), podcast.get('title'), podcast.get('description'))
|
||||
|
||||
episode = self._call_api(f'episodes/{episode_id}', episode_id)
|
||||
episode = self._call_api(f'podcasts/{podcast_id}/episodes/{episode_id}/player_ids', episode_id)
|
||||
return self._parse_episode(episode, podcast)
|
||||
|
@ -321,6 +321,27 @@ class RaiPlayIE(RaiBaseIE):
|
||||
'timestamp': 1348495020,
|
||||
'upload_date': '20120924',
|
||||
},
|
||||
}, {
|
||||
# checking program_info gives false positive for DRM
|
||||
'url': 'https://www.raiplay.it/video/2022/10/Ad-ogni-costo---Un-giorno-in-Pretura---Puntata-del-15102022-1dfd1295-ea38-4bac-b51e-f87e2881693b.html',
|
||||
'md5': '572c6f711b7c5f2d670ba419b4ae3b08',
|
||||
'info_dict': {
|
||||
'id': '1dfd1295-ea38-4bac-b51e-f87e2881693b',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ad ogni costo - Un giorno in Pretura - Puntata del 15/10/2022',
|
||||
'alt_title': 'St 2022/23 - Un giorno in pretura - Ad ogni costo',
|
||||
'description': 'md5:4046d97b2687f74f06a8b8270ba5599f',
|
||||
'uploader': 'Rai 3',
|
||||
'duration': 3773.0,
|
||||
'thumbnail': 'https://www.raiplay.it/dl/img/2022/10/12/1665586539957_2048x2048.png',
|
||||
'creators': ['Rai 3'],
|
||||
'series': 'Un giorno in pretura',
|
||||
'season': '2022/23',
|
||||
'episode': 'Ad ogni costo',
|
||||
'timestamp': 1665507240,
|
||||
'upload_date': '20221011',
|
||||
'release_year': 2025,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
||||
'only_matching': True,
|
||||
@ -340,9 +361,8 @@ def _real_extract(self, url):
|
||||
media = self._download_json(
|
||||
f'{base}.json', video_id, 'Downloading video JSON')
|
||||
|
||||
if not self.get_param('allow_unplayable_formats'):
|
||||
if traverse_obj(media, (('program_info', None), 'rights_management', 'rights', 'drm')):
|
||||
self.report_drm(video_id)
|
||||
if traverse_obj(media, ('rights_management', 'rights', 'drm')):
|
||||
self.report_drm(video_id)
|
||||
|
||||
video = media['video']
|
||||
relinker_info = self._extract_relinker_info(video['content_url'], video_id)
|
||||
|
@ -388,7 +388,8 @@ def add_thumbnail(src):
|
||||
})
|
||||
if entries:
|
||||
return self.playlist_result(entries, video_id, **info)
|
||||
raise ExtractorError('No media found', expected=True)
|
||||
self.raise_no_formats('No media found', expected=True, video_id=video_id)
|
||||
return {**info, 'id': video_id}
|
||||
|
||||
# Check if media is hosted on reddit:
|
||||
reddit_video = traverse_obj(data, (
|
||||
|
@ -1,35 +1,142 @@
|
||||
import base64
|
||||
import io
|
||||
import struct
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
make_archive_id,
|
||||
parse_iso8601,
|
||||
qualities,
|
||||
remove_end,
|
||||
remove_start,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import subs_list_to_dict, traverse_obj
|
||||
|
||||
|
||||
class RTVEALaCartaIE(InfoExtractor):
|
||||
class RTVEBaseIE(InfoExtractor):
|
||||
# Reimplementation of https://js2.rtve.es/pages/app-player/3.5.1/js/pf_video.js
|
||||
@staticmethod
|
||||
def _decrypt_url(png):
|
||||
encrypted_data = io.BytesIO(base64.b64decode(png)[8:])
|
||||
while True:
|
||||
length_data = encrypted_data.read(4)
|
||||
length = struct.unpack('!I', length_data)[0]
|
||||
chunk_type = encrypted_data.read(4)
|
||||
if chunk_type == b'IEND':
|
||||
break
|
||||
data = encrypted_data.read(length)
|
||||
if chunk_type == b'tEXt':
|
||||
data = bytes(filter(None, data))
|
||||
alphabet_data, _, url_data = data.partition(b'#')
|
||||
quality_str, _, url_data = url_data.rpartition(b'%%')
|
||||
quality_str = quality_str.decode() or ''
|
||||
alphabet = RTVEBaseIE._get_alphabet(alphabet_data)
|
||||
url = RTVEBaseIE._get_url(alphabet, url_data)
|
||||
yield quality_str, url
|
||||
encrypted_data.read(4) # CRC
|
||||
|
||||
@staticmethod
|
||||
def _get_url(alphabet, url_data):
|
||||
url = ''
|
||||
f = 0
|
||||
e = 3
|
||||
b = 1
|
||||
for char in url_data.decode('iso-8859-1'):
|
||||
if f == 0:
|
||||
l = int(char) * 10
|
||||
f = 1
|
||||
else:
|
||||
if e == 0:
|
||||
l += int(char)
|
||||
url += alphabet[l]
|
||||
e = (b + 3) % 4
|
||||
f = 0
|
||||
b += 1
|
||||
else:
|
||||
e -= 1
|
||||
return url
|
||||
|
||||
@staticmethod
|
||||
def _get_alphabet(alphabet_data):
|
||||
alphabet = []
|
||||
e = 0
|
||||
d = 0
|
||||
for char in alphabet_data.decode('iso-8859-1'):
|
||||
if d == 0:
|
||||
alphabet.append(char)
|
||||
d = e = (e + 1) % 4
|
||||
else:
|
||||
d -= 1
|
||||
return alphabet
|
||||
|
||||
def _extract_png_formats_and_subtitles(self, video_id, media_type='videos'):
|
||||
formats, subtitles = [], {}
|
||||
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
|
||||
for manager in ('rtveplayw', 'default'):
|
||||
png = self._download_webpage(
|
||||
f'http://www.rtve.es/ztnr/movil/thumbnail/{manager}/{media_type}/{video_id}.png',
|
||||
video_id, 'Downloading url information', query={'q': 'v2'}, fatal=False)
|
||||
if not png:
|
||||
continue
|
||||
|
||||
for quality, video_url in self._decrypt_url(png):
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
video_url, video_id, 'dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'quality': q(quality),
|
||||
'url': video_url,
|
||||
})
|
||||
return formats, subtitles
|
||||
|
||||
def _parse_metadata(self, metadata):
|
||||
return traverse_obj(metadata, {
|
||||
'title': ('title', {str.strip}),
|
||||
'alt_title': ('alt', {str.strip}),
|
||||
'description': ('description', {clean_html}),
|
||||
'timestamp': ('dateOfEmission', {parse_iso8601(delimiter=' ')}),
|
||||
'release_timestamp': ('publicationDate', {parse_iso8601(delimiter=' ')}),
|
||||
'modified_timestamp': ('modificationDate', {parse_iso8601(delimiter=' ')}),
|
||||
'thumbnail': (('thumbnail', 'image', 'imageSEO'), {url_or_none}, any),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'is_live': ('live', {bool}),
|
||||
'series': (('programTitle', ('programInfo', 'title')), {clean_html}, any),
|
||||
})
|
||||
|
||||
|
||||
class RTVEALaCartaIE(RTVEBaseIE):
|
||||
IE_NAME = 'rtve.es:alacarta'
|
||||
IE_DESC = 'RTVE a la carta'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)'
|
||||
IE_DESC = 'RTVE a la carta and Play'
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?rtve\.es/(?:m/)?(?:(?:alacarta|play)/videos|filmoteca)/(?!directo)(?:[^/?#]+/){2}(?P<id>\d+)',
|
||||
r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/?#]+/video/[^/?#]+/(?P<id>\d+)',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
|
||||
'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43',
|
||||
'url': 'http://www.rtve.es/alacarta/videos/la-aventura-del-saber/aventuraentornosilla/3088905/',
|
||||
'md5': 'a964547824359a5753aef09d79fe984b',
|
||||
'info_dict': {
|
||||
'id': '2491869',
|
||||
'id': '3088905',
|
||||
'ext': 'mp4',
|
||||
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
|
||||
'duration': 5024.566,
|
||||
'series': 'Balonmano',
|
||||
'title': 'En torno a la silla',
|
||||
'duration': 1216.981,
|
||||
'series': 'La aventura del Saber',
|
||||
'thumbnail': 'https://img2.rtve.es/v/aventuraentornosilla_3088905.png',
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||
}, {
|
||||
'note': 'Live stream',
|
||||
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
|
||||
@ -38,140 +145,88 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'is_live': True,
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'live stream',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
|
||||
'md5': 'd850f3c8731ea53952ebab489cf81cbf',
|
||||
'md5': 'f3cf0d1902d008c48c793e736706c174',
|
||||
'info_dict': {
|
||||
'id': '4236788',
|
||||
'ext': 'mp4',
|
||||
'title': 'Servir y proteger - Capítulo 104',
|
||||
'duration': 3222.0,
|
||||
'title': 'Episodio 104',
|
||||
'duration': 3222.8,
|
||||
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
|
||||
'series': 'Servir y proteger',
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.rtve.es/play/videos/saber-vivir/07-07-24/16177116/',
|
||||
'md5': 'a5b24fcdfa3ff5cb7908aba53d22d4b6',
|
||||
'info_dict': {
|
||||
'id': '16177116',
|
||||
'ext': 'mp4',
|
||||
'title': 'Saber vivir - 07/07/24',
|
||||
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
|
||||
'duration': 2162.68,
|
||||
'series': 'Saber vivir',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtve.es/infantil/serie/agus-lui-churros-crafts/video/gusano/7048976/',
|
||||
'info_dict': {
|
||||
'id': '7048976',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gusano',
|
||||
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
|
||||
'duration': 292.86,
|
||||
'series': 'Agus & Lui: Churros y Crafts',
|
||||
'_old_archive_ids': ['rtveinfantil 7048976'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode()).decode('utf-8')
|
||||
self._manager = self._download_json(
|
||||
'http://www.rtve.es/odin/loki/' + user_agent_b64,
|
||||
None, 'Fetching manager info')['manager']
|
||||
|
||||
@staticmethod
|
||||
def _decrypt_url(png):
|
||||
encrypted_data = io.BytesIO(base64.b64decode(png)[8:])
|
||||
while True:
|
||||
length = struct.unpack('!I', encrypted_data.read(4))[0]
|
||||
chunk_type = encrypted_data.read(4)
|
||||
if chunk_type == b'IEND':
|
||||
break
|
||||
data = encrypted_data.read(length)
|
||||
if chunk_type == b'tEXt':
|
||||
alphabet_data, text = data.split(b'\0')
|
||||
quality, url_data = text.split(b'%%')
|
||||
alphabet = []
|
||||
e = 0
|
||||
d = 0
|
||||
for l in alphabet_data.decode('iso-8859-1'):
|
||||
if d == 0:
|
||||
alphabet.append(l)
|
||||
d = e = (e + 1) % 4
|
||||
else:
|
||||
d -= 1
|
||||
url = ''
|
||||
f = 0
|
||||
e = 3
|
||||
b = 1
|
||||
for letter in url_data.decode('iso-8859-1'):
|
||||
if f == 0:
|
||||
l = int(letter) * 10
|
||||
f = 1
|
||||
else:
|
||||
if e == 0:
|
||||
l += int(letter)
|
||||
url += alphabet[l]
|
||||
e = (b + 3) % 4
|
||||
f = 0
|
||||
b += 1
|
||||
else:
|
||||
e -= 1
|
||||
|
||||
yield quality.decode(), url
|
||||
encrypted_data.read(4) # CRC
|
||||
|
||||
def _extract_png_formats(self, video_id):
|
||||
png = self._download_webpage(
|
||||
f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/videos/{video_id}.png',
|
||||
video_id, 'Downloading url information', query={'q': 'v2'})
|
||||
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
|
||||
formats = []
|
||||
for quality, video_url in self._decrypt_url(png):
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
video_url, video_id, 'dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'quality': q(quality),
|
||||
'url': video_url,
|
||||
})
|
||||
return formats
|
||||
def _get_subtitles(self, video_id):
|
||||
subtitle_data = self._download_json(
|
||||
f'https://api2.rtve.es/api/videos/{video_id}/subtitulos.json', video_id,
|
||||
'Downloading subtitles info')
|
||||
return traverse_obj(subtitle_data, ('page', 'items', ..., {
|
||||
'id': ('lang', {str}),
|
||||
'url': ('src', {url_or_none}),
|
||||
}, all, {subs_list_to_dict(lang='es')}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
info = self._download_json(
|
||||
metadata = self._download_json(
|
||||
f'http://www.rtve.es/api/videos/{video_id}/config/alacarta_videos.json',
|
||||
video_id)['page']['items'][0]
|
||||
if info['state'] == 'DESPU':
|
||||
if metadata['state'] == 'DESPU':
|
||||
raise ExtractorError('The video is no longer available', expected=True)
|
||||
title = info['title'].strip()
|
||||
formats = self._extract_png_formats(video_id)
|
||||
formats, subtitles = self._extract_png_formats_and_subtitles(video_id)
|
||||
|
||||
subtitles = None
|
||||
sbt_file = info.get('sbtFile')
|
||||
if sbt_file:
|
||||
subtitles = self.extract_subtitles(video_id, sbt_file)
|
||||
self._merge_subtitles(self.extract_subtitles(video_id), target=subtitles)
|
||||
|
||||
is_live = info.get('live') is True
|
||||
is_infantil = urllib.parse.urlparse(url).path.startswith('/infantil/')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': info.get('image'),
|
||||
'subtitles': subtitles,
|
||||
'duration': float_or_none(info.get('duration'), 1000),
|
||||
'is_live': is_live,
|
||||
'series': info.get('programTitle'),
|
||||
**self._parse_metadata(metadata),
|
||||
'_old_archive_ids': [make_archive_id('rtveinfantil', video_id)] if is_infantil else None,
|
||||
}
|
||||
|
||||
def _get_subtitles(self, video_id, sub_file):
|
||||
subs = self._download_json(
|
||||
sub_file + '.json', video_id,
|
||||
'Downloading subtitles info')['page']['items']
|
||||
return dict(
|
||||
(s['lang'], [{'ext': 'vtt', 'url': s['src']}])
|
||||
for s in subs)
|
||||
|
||||
|
||||
class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
class RTVEAudioIE(RTVEBaseIE):
|
||||
IE_NAME = 'rtve.es:audio'
|
||||
IE_DESC = 'RTVE audio'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/(?:[^/?#]+/){2}(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/',
|
||||
@ -180,9 +235,11 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
'id': '5889192',
|
||||
'ext': 'mp3',
|
||||
'title': 'Códigos informáticos',
|
||||
'thumbnail': r're:https?://.+/1598856591583.jpg',
|
||||
'alt_title': 'Códigos informáticos - Escuchar ahora',
|
||||
'duration': 349.440,
|
||||
'series': 'A hombros de gigantes',
|
||||
'description': 'md5:72b0d7c1ca20fd327bdfff7ac0171afb',
|
||||
'thumbnail': 'https://img2.rtve.es/a/palabra-ingeniero-codigos-informaticos-270421_5889192.png',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/',
|
||||
@ -191,9 +248,11 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
'id': '5791165',
|
||||
'ext': 'mp3',
|
||||
'title': 'Ignatius Farray',
|
||||
'alt_title': 'En Radio 3 - Ignatius Farray - 13/02/21 - escuchar ahora',
|
||||
'thumbnail': r're:https?://.+/1613243011863.jpg',
|
||||
'duration': 3559.559,
|
||||
'series': 'En Radio 3',
|
||||
'description': 'md5:124aa60b461e0b1724a380bad3bc4040',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/',
|
||||
@ -202,126 +261,101 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
'id': '6082623',
|
||||
'ext': 'mp3',
|
||||
'title': 'Capítulo 26 y último: La muerte de Victor',
|
||||
'alt_title': 'Frankenstein o el moderno Prometeo - Capítulo 26 y último: La muerte de Victor',
|
||||
'thumbnail': r're:https?://.+/1632147445707.jpg',
|
||||
'duration': 3174.086,
|
||||
'series': 'Frankenstein o el moderno Prometeo',
|
||||
'description': 'md5:4ee6fcb82ebe2e46d267e1d1c1a8f7b5',
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_png_formats(self, audio_id):
|
||||
"""
|
||||
This function retrieves media related png thumbnail which obfuscate
|
||||
valuable information about the media. This information is decrypted
|
||||
via base class _decrypt_url function providing media quality and
|
||||
media url
|
||||
"""
|
||||
png = self._download_webpage(
|
||||
f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/audios/{audio_id}.png',
|
||||
audio_id, 'Downloading url information', query={'q': 'v2'})
|
||||
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
|
||||
formats = []
|
||||
for quality, audio_url in self._decrypt_url(png):
|
||||
ext = determine_ext(audio_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
audio_url, audio_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
audio_url, audio_id, 'dash', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'quality': q(quality),
|
||||
'url': audio_url,
|
||||
})
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
info = self._download_json(
|
||||
f'https://www.rtve.es/api/audios/{audio_id}.json',
|
||||
audio_id)['page']['items'][0]
|
||||
metadata = self._download_json(
|
||||
f'https://www.rtve.es/api/audios/{audio_id}.json', audio_id)['page']['items'][0]
|
||||
|
||||
formats, subtitles = self._extract_png_formats_and_subtitles(audio_id, media_type='audios')
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'title': info['title'].strip(),
|
||||
'thumbnail': info.get('thumbnail'),
|
||||
'duration': float_or_none(info.get('duration'), 1000),
|
||||
'series': try_get(info, lambda x: x['programInfo']['title']),
|
||||
'formats': self._extract_png_formats(audio_id),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**self._parse_metadata(metadata),
|
||||
}
|
||||
|
||||
|
||||
class RTVEInfantilIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
IE_NAME = 'rtve.es:infantil'
|
||||
IE_DESC = 'RTVE infantil'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
|
||||
'md5': '5747454717aedf9f9fdf212d1bcfc48d',
|
||||
'info_dict': {
|
||||
'id': '3040283',
|
||||
'ext': 'mp4',
|
||||
'title': 'Maneras de vivir',
|
||||
'thumbnail': r're:https?://.+/1426182947956\.JPG',
|
||||
'duration': 357.958,
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||
}]
|
||||
|
||||
|
||||
class RTVELiveIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
|
||||
class RTVELiveIE(RTVEBaseIE):
|
||||
IE_NAME = 'rtve.es:live'
|
||||
IE_DESC = 'RTVE.es live streams'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)',
|
||||
r'https?://(?:www\.)?rtve\.es/play/videos/directo/[^/?#]+/(?P<id>[a-zA-Z0-9-]+)',
|
||||
]
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.rtve.es/directo/la-1/',
|
||||
'info_dict': {
|
||||
'id': 'la-1',
|
||||
'ext': 'mp4',
|
||||
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||
'live_status': 'is_live',
|
||||
'title': str,
|
||||
'description': str,
|
||||
'thumbnail': r're:https://img\d\.rtve\.es/resources/thumbslive/\d+\.jpg',
|
||||
'timestamp': int,
|
||||
'upload_date': str,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'live stream',
|
||||
'params': {'skip_download': 'live stream'},
|
||||
}, {
|
||||
'url': 'https://www.rtve.es/play/videos/directo/deportes/tdp/',
|
||||
'info_dict': {
|
||||
'id': 'tdp',
|
||||
'ext': 'mp4',
|
||||
'live_status': 'is_live',
|
||||
'title': str,
|
||||
'description': str,
|
||||
'thumbnail': r're:https://img2\d\.rtve\.es/resources/thumbslive/\d+\.jpg',
|
||||
'timestamp': int,
|
||||
'upload_date': str,
|
||||
},
|
||||
'params': {'skip_download': 'live stream'},
|
||||
}, {
|
||||
'url': 'http://www.rtve.es/play/videos/directo/canales-lineales/la-1/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
|
||||
title = remove_start(title, 'Estoy viendo ')
|
||||
|
||||
vidplayer_id = self._search_regex(
|
||||
(r'playerId=player([0-9]+)',
|
||||
r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
|
||||
r'data-id=["\'](\d+)'),
|
||||
webpage, 'internal video ID')
|
||||
data_setup = self._search_json(
|
||||
r'<div[^>]+class="[^"]*videoPlayer[^"]*"[^>]*data-setup=\'',
|
||||
webpage, 'data_setup', video_id)
|
||||
|
||||
formats, subtitles = self._extract_png_formats_and_subtitles(data_setup['idAsset'])
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': self._extract_png_formats(vidplayer_id),
|
||||
**self._search_json_ld(webpage, video_id, fatal=False),
|
||||
'title': self._html_extract_title(webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class RTVETelevisionIE(InfoExtractor):
|
||||
IE_NAME = 'rtve.es:television'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml'
|
||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/?#]+/[^/?#]+/(?P<id>\d+).shtml'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml',
|
||||
'url': 'https://www.rtve.es/television/20091103/video-inedito-del-8o-programa/299020.shtml',
|
||||
'info_dict': {
|
||||
'id': '3069778',
|
||||
'id': '572515',
|
||||
'ext': 'mp4',
|
||||
'title': 'Documentos TV - La revolución del móvil',
|
||||
'duration': 3496.948,
|
||||
'title': 'Clase inédita',
|
||||
'duration': 335.817,
|
||||
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
|
||||
'series': 'El coro de la cárcel',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -332,11 +366,8 @@ def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
alacarta_url = self._search_regex(
|
||||
r'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&',
|
||||
webpage, 'alacarta url', default=None)
|
||||
if alacarta_url is None:
|
||||
raise ExtractorError(
|
||||
'The webpage doesn\'t contain any video', expected=True)
|
||||
play_url = self._html_search_meta('contentUrl', webpage)
|
||||
if play_url is None:
|
||||
raise ExtractorError('The webpage doesn\'t contain any video', expected=True)
|
||||
|
||||
return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key())
|
||||
return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key())
|
||||
|
@ -697,7 +697,7 @@ def _real_extract(self, url):
|
||||
try:
|
||||
return self._extract_info_dict(info, full_title, token)
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
|
||||
if not isinstance(e.cause, HTTPError) or e.cause.status != 429:
|
||||
raise
|
||||
self.report_warning(
|
||||
'You have reached the API rate limit, which is ~600 requests per '
|
||||
|
@ -1,61 +0,0 @@
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class SproutIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:sproutonline|universalkids)\.com/(?:watch|(?:[^/]+/)*videos)/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.universalkids.com/shows/remy-and-boo/season/1/videos/robot-bike-race',
|
||||
'info_dict': {
|
||||
'id': 'bm0foJFaTKqb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Robot Bike Race',
|
||||
'description': 'md5:436b1d97117cc437f54c383f4debc66d',
|
||||
'timestamp': 1606148940,
|
||||
'upload_date': '20201123',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.universalkids.com/watch/robot-bike-race',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
mpx_metadata = self._download_json(
|
||||
# http://nbcuunikidsprod.apps.nbcuni.com/networks/universalkids/content/videos/
|
||||
'https://www.universalkids.com/_api/videos/' + display_id,
|
||||
display_id)['mpxMetadata']
|
||||
media_pid = mpx_metadata['mediaPid']
|
||||
theplatform_url = 'https://link.theplatform.com/s/HNK2IC/' + media_pid
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
if mpx_metadata.get('entitlement') == 'auth':
|
||||
query['auth'] = self._extract_mvpd_auth(url, media_pid, 'sprout', 'sprout')
|
||||
theplatform_url = smuggle_url(
|
||||
update_url_query(theplatform_url, query), {
|
||||
'force_smil_url': True,
|
||||
'geo_countries': self._GEO_COUNTRIES,
|
||||
})
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': media_pid,
|
||||
'url': theplatform_url,
|
||||
'series': mpx_metadata.get('seriesName'),
|
||||
'season_number': int_or_none(mpx_metadata.get('seasonNumber')),
|
||||
'episode_number': int_or_none(mpx_metadata.get('episodeNumber')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
@ -471,8 +471,7 @@ def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
urql_state = self._search_json(
|
||||
r'window\.svt\.(?:nyh\.)?urqlState\s*=', webpage, 'json data', display_id)
|
||||
urql_state = self._search_json(r'urqlState\s*[=:]', webpage, 'json data', display_id)
|
||||
|
||||
data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {}
|
||||
|
||||
|
@ -4,7 +4,6 @@
|
||||
import time
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from .once import OnceIE
|
||||
from ..networking import HEADRequest, Request
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@ -26,7 +25,7 @@
|
||||
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
|
||||
|
||||
|
||||
class ThePlatformBaseIE(OnceIE):
|
||||
class ThePlatformBaseIE(AdobePassIE):
|
||||
_TP_TLD = 'com'
|
||||
|
||||
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
|
||||
@ -54,16 +53,13 @@ def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL d
|
||||
|
||||
formats = []
|
||||
for _format in smil_formats:
|
||||
if OnceIE.suitable(_format['url']):
|
||||
formats.extend(self._extract_once_formats(_format['url']))
|
||||
else:
|
||||
media_url = _format['url']
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
hdnea2 = self._get_cookies(media_url).get('hdnea2')
|
||||
if hdnea2:
|
||||
_format['url'] = update_url_query(media_url, {'hdnea3': hdnea2.value})
|
||||
media_url = _format['url']
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
hdnea2 = self._get_cookies(media_url).get('hdnea2')
|
||||
if hdnea2:
|
||||
_format['url'] = update_url_query(media_url, {'hdnea3': hdnea2.value})
|
||||
|
||||
formats.append(_format)
|
||||
formats.append(_format)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
@ -129,7 +125,7 @@ def _extract_theplatform_metadata(self, path, video_id):
|
||||
return self._parse_theplatform_metadata(info)
|
||||
|
||||
|
||||
class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
|
||||
class ThePlatformIE(ThePlatformBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
|
||||
(?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)?|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
|
||||
|
121
yt_dlp/extractor/toutiao.py
Normal file
121
yt_dlp/extractor/toutiao.py
Normal file
@ -0,0 +1,121 @@
|
||||
import json
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_call,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class ToutiaoIE(InfoExtractor):
|
||||
IE_NAME = 'toutiao'
|
||||
IE_DESC = '今日头条'
|
||||
|
||||
_VALID_URL = r'https?://www\.toutiao\.com/video/(?P<id>\d+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.toutiao.com/video/7505382061495176511/',
|
||||
'info_dict': {
|
||||
'id': '7505382061495176511',
|
||||
'ext': 'mp4',
|
||||
'title': '新疆多地现不明飞行物,目击者称和月亮一样亮,几秒内突然加速消失,气象部门回应',
|
||||
'comment_count': int,
|
||||
'duration': 9.753,
|
||||
'like_count': int,
|
||||
'release_date': '20250517',
|
||||
'release_timestamp': 1747483344,
|
||||
'thumbnail': r're:https?://p\d+-sign\.toutiaoimg\.com/.+$',
|
||||
'uploader': '极目新闻',
|
||||
'uploader_id': 'MS4wLjABAAAAeateBb9Su8I3MJOZozmvyzWktmba5LMlliRDz1KffnM',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.toutiao.com/video/7479446610359878153/',
|
||||
'info_dict': {
|
||||
'id': '7479446610359878153',
|
||||
'ext': 'mp4',
|
||||
'title': '小伙竟然利用两块磁铁制作成磁力减震器,简直太有创意了!',
|
||||
'comment_count': int,
|
||||
'duration': 118.374,
|
||||
'like_count': int,
|
||||
'release_date': '20250308',
|
||||
'release_timestamp': 1741444368,
|
||||
'thumbnail': r're:https?://p\d+-sign\.toutiaoimg\.com/.+$',
|
||||
'uploader': '小莉创意发明',
|
||||
'uploader_id': 'MS4wLjABAAAA4f7d4mwtApALtHIiq-QM20dwXqe32NUz0DeWF7wbHKw',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._get_cookies('https://www.toutiao.com').get('ttwid'):
|
||||
return
|
||||
|
||||
urlh = self._request_webpage(
|
||||
'https://ttwid.bytedance.com/ttwid/union/register/', None,
|
||||
'Fetching ttwid', 'Unable to fetch ttwid', headers={
|
||||
'Content-Type': 'application/json',
|
||||
}, data=json.dumps({
|
||||
'aid': 24,
|
||||
'needFid': False,
|
||||
'region': 'cn',
|
||||
'service': 'www.toutiao.com',
|
||||
'union': True,
|
||||
}).encode(),
|
||||
)
|
||||
|
||||
if ttwid := try_call(lambda: self._get_cookies(urlh.url)['ttwid'].value):
|
||||
self._set_cookie('.toutiao.com', 'ttwid', ttwid)
|
||||
return
|
||||
|
||||
self.raise_login_required()
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_data = traverse_obj(webpage, (
|
||||
{find_element(tag='script', id='RENDER_DATA')},
|
||||
{urllib.parse.unquote}, {json.loads}, 'data', 'initialVideo',
|
||||
))
|
||||
|
||||
formats = []
|
||||
for video in traverse_obj(video_data, (
|
||||
'videoPlayInfo', 'video_list', lambda _, v: v['main_url'],
|
||||
)):
|
||||
formats.append({
|
||||
'url': video['main_url'],
|
||||
**traverse_obj(video, ('video_meta', {
|
||||
'acodec': ('audio_profile', {str}),
|
||||
'asr': ('audio_sample_rate', {int_or_none}),
|
||||
'audio_channels': ('audio_channels', {float_or_none}, {int_or_none}),
|
||||
'ext': ('vtype', {str}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
'format_id': ('definition', {str}),
|
||||
'fps': ('fps', {int_or_none}),
|
||||
'height': ('vheight', {int_or_none}),
|
||||
'tbr': ('real_bitrate', {float_or_none(scale=1000)}),
|
||||
'vcodec': ('codec_type', {str}),
|
||||
'width': ('vwidth', {int_or_none}),
|
||||
})),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
'comment_count': ('commentCount', {int_or_none}),
|
||||
'duration': ('videoPlayInfo', 'video_duration', {float_or_none}),
|
||||
'like_count': ('repinCount', {int_or_none}),
|
||||
'release_timestamp': ('publishTime', {int_or_none}),
|
||||
'thumbnail': (('poster', 'coverUrl'), {url_or_none}, any),
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('userInfo', 'name', {str}),
|
||||
'uploader_id': ('userInfo', 'userId', {str_or_none}),
|
||||
'view_count': ('playCount', {int_or_none}),
|
||||
'webpage_url': ('detailUrl', {url_or_none}),
|
||||
}),
|
||||
}
|
@ -2,12 +2,13 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .jwplatform import JWPlatformIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
js_to_json,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class TV2DKIE(InfoExtractor):
|
||||
@ -21,35 +22,46 @@ class TV2DKIE(InfoExtractor):
|
||||
tv2fyn|
|
||||
tv2east|
|
||||
tv2lorry|
|
||||
tv2nord
|
||||
tv2nord|
|
||||
tv2kosmopol
|
||||
)\.dk/
|
||||
(:[^/]+/)*
|
||||
(?:[^/?#]+/)*
|
||||
(?P<id>[^/?\#&]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tvsyd.dk/nyheder/28-10-2019/1930/1930-28-okt-2019?autoplay=1#player',
|
||||
'info_dict': {
|
||||
'id': '0_52jmwa0p',
|
||||
'id': 'sPp5z21q',
|
||||
'ext': 'mp4',
|
||||
'title': '19:30 - 28. okt. 2019',
|
||||
'timestamp': 1572290248,
|
||||
'description': '',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/sPp5z21q/poster.jpg?width=720',
|
||||
'timestamp': 1572287400,
|
||||
'upload_date': '20191028',
|
||||
'uploader_id': 'tvsyd',
|
||||
'duration': 1347,
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
}, {
|
||||
'url': 'https://www.tv2lorry.dk/gadekamp/gadekamp-6-hoejhuse-i-koebenhavn',
|
||||
'info_dict': {
|
||||
'id': '1_7iwll9n0',
|
||||
'id': 'oD9cyq0m',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20211027',
|
||||
'title': 'Gadekamp #6 - Højhuse i København',
|
||||
'uploader_id': 'tv2lorry',
|
||||
'timestamp': 1635345229,
|
||||
'description': '',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/oD9cyq0m/poster.jpg?width=720',
|
||||
'timestamp': 1635348600,
|
||||
'upload_date': '20211027',
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
}, {
|
||||
'url': 'https://www.tvsyd.dk/haderslev/x-factor-brodre-fulde-af-selvtillid-er-igen-hjemme-hos-mor-vores-diagnoser-har-vaeret-en-fordel',
|
||||
'info_dict': {
|
||||
'id': 'x-factor-brodre-fulde-af-selvtillid-er-igen-hjemme-hos-mor-vores-diagnoser-har-vaeret-en-fordel',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://www.tv2ostjylland.dk/aarhus/dom-kan-fa-alvorlige-konsekvenser',
|
||||
'info_dict': {
|
||||
'id': 'dom-kan-fa-alvorlige-konsekvenser',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.tv2ostjylland.dk/artikel/minister-gaar-ind-i-sag-om-diabetes-teknologi',
|
||||
'only_matching': True,
|
||||
@ -71,40 +83,22 @@ class TV2DKIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.tv2nord.dk/artikel/dybt-uacceptabelt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tv2kosmopol.dk/metropolen/chaufforer-beordres-til-at-kore-videre-i-ulovlige-busser-med-rode-advarselslamper',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
search_space = traverse_obj(webpage, {find_element(tag='article')}) or webpage
|
||||
|
||||
entries = []
|
||||
player_ids = traverse_obj(
|
||||
re.findall(r'x-data="(?:video_player|simple_player)\(({[^"]+})', search_space),
|
||||
(..., {js_to_json}, {json.loads}, ('jwpMediaId', 'videoId'), {str}))
|
||||
|
||||
def add_entry(partner_id, kaltura_id):
|
||||
entries.append(self.url_result(
|
||||
f'kaltura:{partner_id}:{kaltura_id}', 'Kaltura',
|
||||
video_id=kaltura_id))
|
||||
|
||||
for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage):
|
||||
video = extract_attributes(video_el)
|
||||
kaltura_id = video.get('data-entryid')
|
||||
if not kaltura_id:
|
||||
continue
|
||||
partner_id = video.get('data-partnerid')
|
||||
if not partner_id:
|
||||
continue
|
||||
add_entry(partner_id, kaltura_id)
|
||||
if not entries:
|
||||
kaltura_id = self._search_regex(
|
||||
(r'entry_id\s*:\s*["\']([0-9a-z_]+)',
|
||||
r'\\u002FentryId\\u002F(\w+)\\u002F'), webpage, 'kaltura id')
|
||||
partner_id = self._search_regex(
|
||||
(r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
|
||||
'partner id')
|
||||
add_entry(partner_id, kaltura_id)
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
return self.playlist_result(entries)
|
||||
return self.playlist_from_matches(
|
||||
player_ids, video_id, getter=lambda x: f'jwplatform:{x}', ie=JWPlatformIE)
|
||||
|
||||
|
||||
class TV2DKBornholmPlayIE(InfoExtractor):
|
||||
|
@ -513,7 +513,7 @@ def _parse_video(self, video, with_url=True):
|
||||
|
||||
class TVPVODVideoIE(TVPVODBaseIE):
|
||||
IE_NAME = 'tvp:vod'
|
||||
_VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)/?(?:[?#]|$)'
|
||||
_VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek--?\d+,S-?\d+E-?\d+)?,(?P<id>\d+)/?(?:[?#]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357',
|
||||
@ -568,6 +568,9 @@ class TVPVODVideoIE(TVPVODBaseIE):
|
||||
'live_status': 'is_live',
|
||||
'thumbnail': 're:https?://.+',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vod.tvp.pl/informacje-i-publicystyka,205/konskie-2025-debata-przedwyborcza-odcinki,2028435/odcinek--1,S01E-1,2028419',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -1,13 +1,21 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import clean_html, remove_end, unified_timestamp, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
parse_qs,
|
||||
remove_end,
|
||||
require,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class TvwIE(InfoExtractor):
|
||||
IE_NAME = 'tvw'
|
||||
_VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/',
|
||||
'md5': '9ceb94fe2bb7fd726f74f16356825703',
|
||||
@ -115,3 +123,43 @@ def _real_extract(self, url):
|
||||
'is_live': ('eventStatus', {lambda x: x == 'live'}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class TvwTvChannelsIE(InfoExtractor):
|
||||
IE_NAME = 'tvw:tvchannels'
|
||||
_VALID_URL = r'https?://(?:www\.)?tvw\.org/tvchannels/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tvw.org/tvchannels/air/',
|
||||
'info_dict': {
|
||||
'id': 'air',
|
||||
'ext': 'mp4',
|
||||
'title': r're:TVW Cable Channel Live Stream',
|
||||
'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)$',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tvw.org/tvchannels/tvw2/',
|
||||
'info_dict': {
|
||||
'id': 'tvw2',
|
||||
'ext': 'mp4',
|
||||
'title': r're:TVW-2 Broadcast Channel',
|
||||
'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)$',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
m3u8_url = traverse_obj(webpage, (
|
||||
{find_element(id='invintus-persistent-stream-frame', html=True)}, {extract_attributes},
|
||||
'src', {parse_qs}, 'encoder', 0, {json.loads}, 'live247URI', {url_or_none}, {require('stream url')}))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True),
|
||||
'title': remove_end(self._og_search_title(webpage, default=None), ' - TVW'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'is_live': True,
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
import base64
|
||||
import hashlib
|
||||
import itertools
|
||||
import re
|
||||
|
||||
@ -14,12 +15,14 @@
|
||||
parse_duration,
|
||||
qualities,
|
||||
str_to_int,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class TwitCastingIE(InfoExtractor):
|
||||
@ -138,13 +141,7 @@ def _real_extract(self, url):
|
||||
r'data-toggle="true"[^>]+datetime="([^"]+)"',
|
||||
webpage, 'datetime', None))
|
||||
|
||||
stream_server_data = self._download_json(
|
||||
f'https://twitcasting.tv/streamserver.php?target={uploader_id}&mode=client', video_id,
|
||||
'Downloading live info', fatal=False)
|
||||
|
||||
is_live = any(f'data-{x}' in webpage for x in ['is-onlive="true"', 'live-type="live"', 'status="online"'])
|
||||
if not traverse_obj(stream_server_data, 'llfmp4') and is_live:
|
||||
self.raise_login_required(method='cookies')
|
||||
|
||||
base_dict = {
|
||||
'title': title,
|
||||
@ -165,30 +162,43 @@ def find_dmu(x):
|
||||
return [data_movie_url]
|
||||
|
||||
m3u8_urls = (try_get(webpage, find_dmu, list)
|
||||
or traverse_obj(video_js_data, (..., 'source', 'url'))
|
||||
or ([f'https://twitcasting.tv/{uploader_id}/metastream.m3u8'] if is_live else None))
|
||||
if not m3u8_urls:
|
||||
raise ExtractorError('Failed to get m3u8 playlist')
|
||||
or traverse_obj(video_js_data, (..., 'source', 'url')))
|
||||
|
||||
if is_live:
|
||||
m3u8_url = m3u8_urls[0]
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', m3u8_id='hls',
|
||||
live=True, headers=self._M3U8_HEADERS)
|
||||
stream_data = self._download_json(
|
||||
'https://twitcasting.tv/streamserver.php',
|
||||
video_id, 'Downloading live info', query={
|
||||
'target': uploader_id,
|
||||
'mode': 'client',
|
||||
'player': 'pc_web',
|
||||
})
|
||||
|
||||
if traverse_obj(stream_server_data, ('hls', 'source')):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', m3u8_id='source',
|
||||
live=True, query={'mode': 'source'},
|
||||
note='Downloading source quality m3u8',
|
||||
headers=self._M3U8_HEADERS, fatal=False))
|
||||
password_params = {
|
||||
'word': hashlib.md5(video_password.encode()).hexdigest(),
|
||||
} if video_password else None
|
||||
|
||||
formats = []
|
||||
# low: 640x360, medium: 1280x720, high: 1920x1080
|
||||
qq = qualities(['low', 'medium', 'high'])
|
||||
for quality, m3u8_url in traverse_obj(stream_data, (
|
||||
'tc-hls', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
|
||||
)):
|
||||
formats.append({
|
||||
'url': update_url_query(m3u8_url, password_params),
|
||||
'format_id': f'hls-{quality}',
|
||||
'ext': 'mp4',
|
||||
'quality': qq(quality),
|
||||
'protocol': 'm3u8',
|
||||
'http_headers': self._M3U8_HEADERS,
|
||||
})
|
||||
|
||||
if websockets:
|
||||
qq = qualities(['base', 'mobilesource', 'main'])
|
||||
streams = traverse_obj(stream_server_data, ('llfmp4', 'streams')) or {}
|
||||
for mode, ws_url in streams.items():
|
||||
for mode, ws_url in traverse_obj(stream_data, (
|
||||
'llfmp4', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
|
||||
)):
|
||||
formats.append({
|
||||
'url': ws_url,
|
||||
'url': update_url_query(ws_url, password_params),
|
||||
'format_id': f'ws-{mode}',
|
||||
'ext': 'mp4',
|
||||
'quality': qq(mode),
|
||||
@ -197,10 +207,15 @@ def find_dmu(x):
|
||||
'protocol': 'websocket_frag',
|
||||
})
|
||||
|
||||
if not formats:
|
||||
self.raise_login_required()
|
||||
|
||||
infodict = {
|
||||
'formats': formats,
|
||||
'_format_sort_fields': ('source', ),
|
||||
}
|
||||
elif not m3u8_urls:
|
||||
raise ExtractorError('Failed to get m3u8 playlist')
|
||||
elif len(m3u8_urls) == 1:
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_urls[0], video_id, 'mp4', headers=self._M3U8_HEADERS)
|
||||
|
@ -187,7 +187,7 @@ def _get_thumbnails(self, thumbnail):
|
||||
'url': thumbnail,
|
||||
}] if thumbnail else None
|
||||
|
||||
def _extract_twitch_m3u8_formats(self, path, video_id, token, signature):
|
||||
def _extract_twitch_m3u8_formats(self, path, video_id, token, signature, live_from_start=False):
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'{self._USHER_BASE}/{path}/{video_id}.m3u8', video_id, 'mp4', query={
|
||||
'allow_source': 'true',
|
||||
@ -204,7 +204,10 @@ def _extract_twitch_m3u8_formats(self, path, video_id, token, signature):
|
||||
for fmt in formats:
|
||||
if fmt.get('vcodec') and fmt['vcodec'].startswith('av01'):
|
||||
# mpegts does not yet have proper support for av1
|
||||
fmt['downloader_options'] = {'ffmpeg_args_out': ['-f', 'mp4']}
|
||||
fmt.setdefault('downloader_options', {}).update({'ffmpeg_args_out': ['-f', 'mp4']})
|
||||
if live_from_start:
|
||||
fmt.setdefault('downloader_options', {}).update({'ffmpeg_args': ['-live_start_index', '0']})
|
||||
fmt['is_from_start'] = True
|
||||
|
||||
return formats
|
||||
|
||||
@ -550,7 +553,8 @@ def _real_extract(self, url):
|
||||
access_token = self._download_access_token(vod_id, 'video', 'id')
|
||||
|
||||
formats = self._extract_twitch_m3u8_formats(
|
||||
'vod', vod_id, access_token['value'], access_token['signature'])
|
||||
'vod', vod_id, access_token['value'], access_token['signature'],
|
||||
live_from_start=self.get_param('live_from_start'))
|
||||
formats.extend(self._extract_storyboard(vod_id, video.get('storyboard'), info.get('duration')))
|
||||
|
||||
self._prefer_source(formats)
|
||||
@ -633,6 +637,10 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
|
||||
_PAGE_LIMIT = 100
|
||||
|
||||
def _entries(self, channel_name, *args):
|
||||
"""
|
||||
Subclasses must define _make_variables() and _extract_entry(),
|
||||
as well as set _OPERATION_NAME, _ENTRY_KIND, _EDGE_KIND, and _NODE_KIND
|
||||
"""
|
||||
cursor = None
|
||||
variables_common = self._make_variables(channel_name, *args)
|
||||
entries_key = f'{self._ENTRY_KIND}s'
|
||||
@ -672,7 +680,22 @@ def _entries(self, channel_name, *args):
|
||||
break
|
||||
|
||||
|
||||
class TwitchVideosIE(TwitchPlaylistBaseIE):
|
||||
class TwitchVideosBaseIE(TwitchPlaylistBaseIE):
|
||||
_OPERATION_NAME = 'FilterableVideoTower_Videos'
|
||||
_ENTRY_KIND = 'video'
|
||||
_EDGE_KIND = 'VideoEdge'
|
||||
_NODE_KIND = 'Video'
|
||||
|
||||
@staticmethod
|
||||
def _make_variables(channel_name, broadcast_type, sort):
|
||||
return {
|
||||
'channelOwnerLogin': channel_name,
|
||||
'broadcastType': broadcast_type,
|
||||
'videoSort': sort.upper(),
|
||||
}
|
||||
|
||||
|
||||
class TwitchVideosIE(TwitchVideosBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
|
||||
|
||||
_TESTS = [{
|
||||
@ -751,11 +774,6 @@ class TwitchVideosIE(TwitchPlaylistBaseIE):
|
||||
'views': 'Popular',
|
||||
}
|
||||
|
||||
_OPERATION_NAME = 'FilterableVideoTower_Videos'
|
||||
_ENTRY_KIND = 'video'
|
||||
_EDGE_KIND = 'VideoEdge'
|
||||
_NODE_KIND = 'Video'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (False
|
||||
@ -764,14 +782,6 @@ def suitable(cls, url):
|
||||
TwitchVideosCollectionsIE))
|
||||
else super().suitable(url))
|
||||
|
||||
@staticmethod
|
||||
def _make_variables(channel_name, broadcast_type, sort):
|
||||
return {
|
||||
'channelOwnerLogin': channel_name,
|
||||
'broadcastType': broadcast_type,
|
||||
'videoSort': sort.upper(),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_entry(node):
|
||||
return _make_video_result(node)
|
||||
@ -919,7 +929,7 @@ def _real_extract(self, url):
|
||||
playlist_title=f'{channel_name} - Collections')
|
||||
|
||||
|
||||
class TwitchStreamIE(TwitchBaseIE):
|
||||
class TwitchStreamIE(TwitchVideosBaseIE):
|
||||
IE_NAME = 'twitch:stream'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
@ -982,6 +992,7 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}]
|
||||
_PAGE_LIMIT = 1
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
@ -995,6 +1006,20 @@ def suitable(cls, url):
|
||||
TwitchClipsIE))
|
||||
else super().suitable(url))
|
||||
|
||||
@staticmethod
|
||||
def _extract_entry(node):
|
||||
if not isinstance(node, dict) or not node.get('id'):
|
||||
return None
|
||||
video_id = node['id']
|
||||
return {
|
||||
'_type': 'url',
|
||||
'ie_key': TwitchVodIE.ie_key(),
|
||||
'id': 'v' + video_id,
|
||||
'url': f'https://www.twitch.tv/videos/{video_id}',
|
||||
'title': node.get('title'),
|
||||
'timestamp': unified_timestamp(node.get('publishedAt')) or 0,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name = self._match_id(url).lower()
|
||||
|
||||
@ -1029,6 +1054,16 @@ def _real_extract(self, url):
|
||||
if not stream:
|
||||
raise UserNotLive(video_id=channel_name)
|
||||
|
||||
timestamp = unified_timestamp(stream.get('createdAt'))
|
||||
|
||||
if self.get_param('live_from_start'):
|
||||
self.to_screen(f'{channel_name}: Extracting VOD to download live from start')
|
||||
entry = next(self._entries(channel_name, None, 'time'), None)
|
||||
if entry and entry.pop('timestamp') >= (timestamp or float('inf')):
|
||||
return entry
|
||||
self.report_warning(
|
||||
'Unable to extract the VOD associated with this livestream', video_id=channel_name)
|
||||
|
||||
access_token = self._download_access_token(
|
||||
channel_name, 'stream', 'channelName')
|
||||
|
||||
@ -1038,7 +1073,6 @@ def _real_extract(self, url):
|
||||
self._prefer_source(formats)
|
||||
|
||||
view_count = stream.get('viewers')
|
||||
timestamp = unified_timestamp(stream.get('createdAt'))
|
||||
|
||||
sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {}
|
||||
uploader = sq_user.get('displayName')
|
||||
@ -1225,8 +1259,8 @@ def _real_extract(self, url):
|
||||
'channel_id': ('broadcaster', 'id', {str}),
|
||||
'channel_follower_count': ('broadcaster', 'followers', 'totalCount', {int_or_none}),
|
||||
'channel_is_verified': ('broadcaster', 'isPartner', {bool}),
|
||||
'uploader': ('broadcaster', 'displayName', {str}),
|
||||
'uploader_id': ('broadcaster', 'id', {str}),
|
||||
'uploader': ('curator', 'displayName', {str}),
|
||||
'uploader_id': ('curator', 'id', {str}),
|
||||
'categories': ('game', 'displayName', {str}, filter, all, filter),
|
||||
}),
|
||||
}
|
||||
|
@ -20,7 +20,6 @@
|
||||
remove_end,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
truncate_string,
|
||||
try_call,
|
||||
try_get,
|
||||
@ -29,6 +28,7 @@
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class TwitterBaseIE(InfoExtractor):
|
||||
@ -1221,20 +1221,10 @@ class TwitterIE(TwitterBaseIE):
|
||||
}]
|
||||
|
||||
_MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
|
||||
|
||||
@property
|
||||
def _GRAPHQL_ENDPOINT(self):
|
||||
if self.is_logged_in:
|
||||
return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
|
||||
return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
|
||||
_GRAPHQL_ENDPOINT = '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
|
||||
|
||||
def _graphql_to_legacy(self, data, twid):
|
||||
result = traverse_obj(data, (
|
||||
'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
|
||||
lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
|
||||
'tweet_results', 'result', ('tweet', None), {dict},
|
||||
), default={}, get_all=False) if self.is_logged_in else traverse_obj(
|
||||
data, ('tweetResult', 'result', {dict}), default={})
|
||||
result = traverse_obj(data, ('tweetResult', 'result', {dict})) or {}
|
||||
|
||||
typename = result.get('__typename')
|
||||
if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
|
||||
@ -1278,37 +1268,6 @@ def _graphql_to_legacy(self, data, twid):
|
||||
|
||||
def _build_graphql_query(self, media_id):
|
||||
return {
|
||||
'variables': {
|
||||
'focalTweetId': media_id,
|
||||
'includePromotedContent': True,
|
||||
'with_rux_injections': False,
|
||||
'withBirdwatchNotes': True,
|
||||
'withCommunity': True,
|
||||
'withDownvotePerspective': False,
|
||||
'withQuickPromoteEligibilityTweetFields': True,
|
||||
'withReactionsMetadata': False,
|
||||
'withReactionsPerspective': False,
|
||||
'withSuperFollowsTweetFields': True,
|
||||
'withSuperFollowsUserFields': True,
|
||||
'withV2Timeline': True,
|
||||
'withVoice': True,
|
||||
},
|
||||
'features': {
|
||||
'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
|
||||
'interactive_text_enabled': True,
|
||||
'responsive_web_edit_tweet_api_enabled': True,
|
||||
'responsive_web_enhance_cards_enabled': True,
|
||||
'responsive_web_graphql_timeline_navigation_enabled': False,
|
||||
'responsive_web_text_conversations_enabled': False,
|
||||
'responsive_web_uc_gql_enabled': True,
|
||||
'standardized_nudges_misinfo': True,
|
||||
'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
|
||||
'tweetypie_unmention_optimization_enabled': True,
|
||||
'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
|
||||
'verified_phone_label_enabled': False,
|
||||
'vibe_api_enabled': True,
|
||||
},
|
||||
} if self.is_logged_in else {
|
||||
'variables': {
|
||||
'tweetId': media_id,
|
||||
'withCommunity': False,
|
||||
@ -1383,7 +1342,7 @@ def _extract_status(self, twid):
|
||||
'tweet_mode': 'extended',
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
|
||||
if not isinstance(e.cause, HTTPError) or e.cause.status != 429:
|
||||
raise
|
||||
self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
|
||||
status = self._call_syndication_api(twid)
|
||||
@ -1637,8 +1596,8 @@ def _find_dimension(target):
|
||||
|
||||
class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
IE_NAME = 'twitter:broadcast'
|
||||
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
|
||||
|
||||
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?P<type>broadcasts|events)/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
# untitled Periscope video
|
||||
'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
|
||||
@ -1646,6 +1605,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'id': '1yNGaQLWpejGj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Andrea May Sahouri - Periscope Broadcast',
|
||||
'display_id': '1yNGaQLWpejGj',
|
||||
'uploader': 'Andrea May Sahouri',
|
||||
'uploader_id': 'andreamsahouri',
|
||||
'uploader_url': 'https://twitter.com/andreamsahouri',
|
||||
@ -1653,6 +1613,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'upload_date': '20200601',
|
||||
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
|
||||
@ -1660,6 +1622,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'id': '1ZkKzeyrPbaxv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starship | SN10 | High-Altitude Flight Test',
|
||||
'display_id': '1ZkKzeyrPbaxv',
|
||||
'uploader': 'SpaceX',
|
||||
'uploader_id': 'SpaceX',
|
||||
'uploader_url': 'https://twitter.com/SpaceX',
|
||||
@ -1667,6 +1630,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'upload_date': '20210303',
|
||||
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
|
||||
@ -1674,6 +1639,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'id': '1OyKAVQrgzwGb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starship Flight Test',
|
||||
'display_id': '1OyKAVQrgzwGb',
|
||||
'uploader': 'SpaceX',
|
||||
'uploader_id': 'SpaceX',
|
||||
'uploader_url': 'https://twitter.com/SpaceX',
|
||||
@ -1681,21 +1647,58 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'upload_date': '20230420',
|
||||
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://x.com/i/events/1910629646300762112',
|
||||
'info_dict': {
|
||||
'id': '1LyxBWDRNqyKN',
|
||||
'ext': 'mp4',
|
||||
'title': '#ガンニバル ウォッチパーティー',
|
||||
'concurrent_view_count': int,
|
||||
'display_id': '1910629646300762112',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20250423',
|
||||
'release_timestamp': 1745409000,
|
||||
'tags': ['ガンニバル'],
|
||||
'thumbnail': r're:https?://[^?#]+\.jpg\?token=',
|
||||
'timestamp': 1745403328,
|
||||
'upload_date': '20250423',
|
||||
'uploader': 'ディズニープラス公式',
|
||||
'uploader_id': 'DisneyPlusJP',
|
||||
'uploader_url': 'https://twitter.com/DisneyPlusJP',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
broadcast_id = self._match_id(url)
|
||||
broadcast_type, display_id = self._match_valid_url(url).group('type', 'id')
|
||||
|
||||
if broadcast_type == 'events':
|
||||
timeline = self._call_api(
|
||||
f'live_event/1/{display_id}/timeline.json', display_id)
|
||||
broadcast_id = traverse_obj(timeline, (
|
||||
'twitter_objects', 'broadcasts', ..., ('id', 'broadcast_id'),
|
||||
{str}, any, {require('broadcast ID')}))
|
||||
else:
|
||||
broadcast_id = display_id
|
||||
|
||||
broadcast = self._call_api(
|
||||
'broadcasts/show.json', broadcast_id,
|
||||
{'ids': broadcast_id})['broadcasts'][broadcast_id]
|
||||
if not broadcast:
|
||||
raise ExtractorError('Broadcast no longer exists', expected=True)
|
||||
info = self._parse_broadcast_data(broadcast, broadcast_id)
|
||||
info['title'] = broadcast.get('status') or info.get('title')
|
||||
info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
|
||||
info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
|
||||
info.update({
|
||||
'display_id': display_id,
|
||||
'title': broadcast.get('status') or info.get('title'),
|
||||
'uploader_id': broadcast.get('twitter_username') or info.get('uploader_id'),
|
||||
'uploader_url': format_field(
|
||||
broadcast, 'twitter_username', 'https://twitter.com/%s', default=None),
|
||||
})
|
||||
if info['live_status'] == 'is_upcoming':
|
||||
self.raise_no_formats('This live broadcast has not yet started', expected=True)
|
||||
return info
|
||||
|
||||
media_key = broadcast['media_key']
|
||||
@ -1717,21 +1720,22 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
|
||||
'url': 'https://twitter.com/i/spaces/1OwxWwQOPlNxQ',
|
||||
'info_dict': {
|
||||
'id': '1RDxlgyvNXzJL',
|
||||
'id': '1OwxWwQOPlNxQ',
|
||||
'ext': 'm4a',
|
||||
'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
|
||||
'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
|
||||
'uploader': r're:Lucio Di Gaetano.*?',
|
||||
'uploader_id': 'luciodigaetano',
|
||||
'title': 'Everybody in: @mtbarra & @elonmusk discuss the future of EV charging',
|
||||
'description': 'Twitter Space participated by Elon Musk',
|
||||
'live_status': 'was_live',
|
||||
'timestamp': 1659877956,
|
||||
'upload_date': '20220807',
|
||||
'release_timestamp': 1659904215,
|
||||
'release_date': '20220807',
|
||||
'release_date': '20230608',
|
||||
'release_timestamp': 1686256230,
|
||||
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
|
||||
'timestamp': 1686254250,
|
||||
'upload_date': '20230608',
|
||||
'uploader': 'Mary Barra',
|
||||
'uploader_id': 'mtbarra',
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# post_live/TimedOut but downloadable
|
||||
'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
|
||||
@ -1743,9 +1747,10 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
'uploader': 'Google Cloud',
|
||||
'uploader_id': 'googlecloud',
|
||||
'live_status': 'post_live',
|
||||
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
|
||||
'timestamp': 1681409554,
|
||||
'upload_date': '20230413',
|
||||
'release_timestamp': 1681839000,
|
||||
'release_timestamp': 1681839082,
|
||||
'release_date': '20230418',
|
||||
'protocol': 'm3u8', # ffmpeg is forced
|
||||
'container': 'm4a_dash', # audio-only format fixup is applied
|
||||
@ -1762,6 +1767,9 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
'uploader': '息根とめる',
|
||||
'uploader_id': 'tomeru_ikinone',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20230601',
|
||||
'release_timestamp': 1685617200,
|
||||
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
|
||||
'timestamp': 1685617198,
|
||||
'upload_date': '20230601',
|
||||
'protocol': 'm3u8', # ffmpeg is forced
|
||||
@ -1779,9 +1787,10 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
'uploader': 'Candace Owens',
|
||||
'uploader_id': 'RealCandaceO',
|
||||
'live_status': 'was_live',
|
||||
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
|
||||
'timestamp': 1723931351,
|
||||
'upload_date': '20240817',
|
||||
'release_timestamp': 1723932000,
|
||||
'release_timestamp': 1723932056,
|
||||
'release_date': '20240817',
|
||||
'protocol': 'm3u8_native', # not ffmpeg, detected as video space
|
||||
},
|
||||
@ -1861,18 +1870,21 @@ def _real_extract(self, url):
|
||||
|
||||
return {
|
||||
'id': space_id,
|
||||
'title': metadata.get('title'),
|
||||
'description': f'Twitter Space participated by {participants}',
|
||||
'uploader': traverse_obj(
|
||||
metadata, ('creator_results', 'result', 'legacy', 'name')),
|
||||
'uploader_id': traverse_obj(
|
||||
metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
|
||||
'live_status': live_status,
|
||||
'release_timestamp': try_call(
|
||||
lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
|
||||
'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
|
||||
'formats': formats,
|
||||
'http_headers': headers,
|
||||
'live_status': live_status,
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
# started_at is None when stream is_upcoming so fallback to scheduled_start for --wait-for-video
|
||||
'release_timestamp': (('started_at', 'scheduled_start'), {int_or_none(scale=1000)}, any),
|
||||
'timestamp': ('created_at', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
**traverse_obj(metadata, ('creator_results', 'result', 'legacy', {
|
||||
'uploader': ('name', {str}),
|
||||
'uploader_id': ('screen_name', {str_or_none}),
|
||||
'thumbnail': ('profile_image_url_https', {lambda x: x.replace('_normal', '_400x400')}, {url_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -13,10 +14,12 @@
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
jwt_decode_hs256,
|
||||
merge_dicts,
|
||||
parse_filesize,
|
||||
parse_iso8601,
|
||||
@ -39,6 +42,18 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
_NETRC_MACHINE = 'vimeo'
|
||||
_LOGIN_REQUIRED = False
|
||||
_LOGIN_URL = 'https://vimeo.com/log_in'
|
||||
_REFERER_HINT = (
|
||||
'Cannot download embed-only video without embedding URL. Please call yt-dlp '
|
||||
'with the URL of the page that embeds this video.')
|
||||
_IOS_CLIENT_AUTH = 'MTMxNzViY2Y0NDE0YTQ5YzhjZTc0YmU0NjVjNDQxYzNkYWVjOWRlOTpHKzRvMmgzVUh4UkxjdU5FRW80cDNDbDhDWGR5dVJLNUJZZ055dHBHTTB4V1VzaG41bEx1a2hiN0NWYWNUcldSSW53dzRUdFRYZlJEZmFoTTArOTBUZkJHS3R4V2llYU04Qnl1bERSWWxUdXRidjNqR2J4SHFpVmtFSUcyRktuQw=='
|
||||
_IOS_CLIENT_HEADERS = {
|
||||
'Accept': 'application/vnd.vimeo.*+json; version=3.4.10',
|
||||
'Accept-Language': 'en',
|
||||
'User-Agent': 'Vimeo/11.10.0 (com.vimeo; build:250424.164813.0; iOS 18.4.1) Alamofire/5.9.0 VimeoNetworking/5.0.0',
|
||||
}
|
||||
_IOS_OAUTH_CACHE_KEY = 'oauth-token-ios'
|
||||
_ios_oauth_token = None
|
||||
_viewer_info = None
|
||||
|
||||
@staticmethod
|
||||
def _smuggle_referrer(url, referrer_url):
|
||||
@ -52,8 +67,21 @@ def _unsmuggle_headers(self, url):
|
||||
headers['Referer'] = data['referer']
|
||||
return url, data, headers
|
||||
|
||||
def _jwt_is_expired(self, token):
|
||||
return jwt_decode_hs256(token)['exp'] - time.time() < 120
|
||||
|
||||
def _fetch_viewer_info(self, display_id=None, fatal=True):
|
||||
if self._viewer_info and not self._jwt_is_expired(self._viewer_info['jwt']):
|
||||
return self._viewer_info
|
||||
|
||||
self._viewer_info = self._download_json(
|
||||
'https://vimeo.com/_next/viewer', display_id, 'Downloading web token info',
|
||||
'Failed to download web token info', fatal=fatal, headers={'Accept': 'application/json'})
|
||||
|
||||
return self._viewer_info
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
viewer = self._download_json('https://vimeo.com/_next/viewer', None, 'Downloading login token')
|
||||
viewer = self._fetch_viewer_info()
|
||||
data = {
|
||||
'action': 'login',
|
||||
'email': username,
|
||||
@ -88,13 +116,15 @@ def _get_video_password(self):
|
||||
expected=True)
|
||||
return password
|
||||
|
||||
def _verify_video_password(self, video_id, password, token):
|
||||
url = f'https://vimeo.com/{video_id}'
|
||||
def _verify_video_password(self, video_id, path=None):
|
||||
video_password = self._get_video_password()
|
||||
token = self._fetch_viewer_info(video_id)['xsrft']
|
||||
url = join_nonempty('https://vimeo.com', path, video_id, delim='/')
|
||||
try:
|
||||
return self._download_webpage(
|
||||
self._request_webpage(
|
||||
f'{url}/password', video_id,
|
||||
'Submitting video password', data=json.dumps({
|
||||
'password': password,
|
||||
'password': video_password,
|
||||
'token': token,
|
||||
}, separators=(',', ':')).encode(), headers={
|
||||
'Accept': '*/*',
|
||||
@ -106,6 +136,10 @@ def _verify_video_password(self, video_id, password, token):
|
||||
raise ExtractorError('Wrong password', expected=True)
|
||||
raise
|
||||
|
||||
def _extract_config_url(self, webpage, **kwargs):
|
||||
return self._html_search_regex(
|
||||
r'\bdata-config-url="([^"]+)"', webpage, 'config URL', **kwargs)
|
||||
|
||||
def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
|
||||
vimeo_config = self._search_regex(
|
||||
r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));',
|
||||
@ -153,6 +187,7 @@ def _parse_config(self, config, video_id):
|
||||
sep_pattern = r'/sep/video/'
|
||||
for files_type in ('hls', 'dash'):
|
||||
for cdn_name, cdn_data in (try_get(config_files, lambda x: x[files_type]['cdns']) or {}).items():
|
||||
# TODO: Also extract 'avc_url'? Investigate if there are 'hevc_url', 'av1_url'?
|
||||
manifest_url = cdn_data.get('url')
|
||||
if not manifest_url:
|
||||
continue
|
||||
@ -233,26 +268,48 @@ def _parse_config(self, config, video_id):
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'live_status': live_status,
|
||||
'release_timestamp': traverse_obj(live_event, ('ingest', 'scheduled_start_time', {parse_iso8601})),
|
||||
'release_timestamp': traverse_obj(live_event, ('ingest', (
|
||||
('scheduled_start_time', {parse_iso8601}),
|
||||
('start_time', {int_or_none}),
|
||||
), any)),
|
||||
# Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
|
||||
# at the same time without actual units specified.
|
||||
'_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
|
||||
}
|
||||
|
||||
def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None, **kwargs):
|
||||
def _fetch_oauth_token(self):
|
||||
if not self._ios_oauth_token:
|
||||
self._ios_oauth_token = self.cache.load(self._NETRC_MACHINE, self._IOS_OAUTH_CACHE_KEY)
|
||||
|
||||
if not self._ios_oauth_token:
|
||||
self._ios_oauth_token = self._download_json(
|
||||
'https://api.vimeo.com/oauth/authorize/client', None,
|
||||
'Fetching OAuth token', 'Failed to fetch OAuth token',
|
||||
headers={
|
||||
'Authorization': f'Basic {self._IOS_CLIENT_AUTH}',
|
||||
**self._IOS_CLIENT_HEADERS,
|
||||
}, data=urlencode_postdata({
|
||||
'grant_type': 'client_credentials',
|
||||
'scope': 'private public create edit delete interact upload purchased stats',
|
||||
}, quote_via=urllib.parse.quote))['access_token']
|
||||
self.cache.store(self._NETRC_MACHINE, self._IOS_OAUTH_CACHE_KEY, self._ios_oauth_token)
|
||||
|
||||
return self._ios_oauth_token
|
||||
|
||||
def _call_videos_api(self, video_id, unlisted_hash=None, **kwargs):
|
||||
return self._download_json(
|
||||
join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
|
||||
video_id, 'Downloading API JSON', headers={
|
||||
'Authorization': f'jwt {jwt_token}',
|
||||
'Accept': 'application/json',
|
||||
'Authorization': f'Bearer {self._fetch_oauth_token()}',
|
||||
**self._IOS_CLIENT_HEADERS,
|
||||
}, query={
|
||||
'fields': ','.join((
|
||||
'config_url', 'created_time', 'description', 'download', 'license',
|
||||
'metadata.connections.comments.total', 'metadata.connections.likes.total',
|
||||
'release_time', 'stats.plays')),
|
||||
'config_url', 'embed_player_config_url', 'player_embed_url', 'download', 'play',
|
||||
'files', 'description', 'license', 'release_time', 'created_time', 'stats.plays',
|
||||
'metadata.connections.comments.total', 'metadata.connections.likes.total')),
|
||||
}, **kwargs)
|
||||
|
||||
def _extract_original_format(self, url, video_id, unlisted_hash=None, jwt=None, api_data=None):
|
||||
def _extract_original_format(self, url, video_id, unlisted_hash=None, api_data=None):
|
||||
# Original/source formats are only available when logged in
|
||||
if not self._get_cookies('https://vimeo.com/').get('vimeo'):
|
||||
return
|
||||
@ -283,12 +340,8 @@ def _extract_original_format(self, url, video_id, unlisted_hash=None, jwt=None,
|
||||
'quality': 1,
|
||||
}
|
||||
|
||||
jwt = jwt or traverse_obj(self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', video_id, 'Downloading jwt token', fatal=False), ('jwt', {str}))
|
||||
if not jwt:
|
||||
return
|
||||
original_response = api_data or self._call_videos_api(
|
||||
video_id, jwt, unlisted_hash, fatal=False, expected_status=(403, 404))
|
||||
video_id, unlisted_hash, fatal=False, expected_status=(403, 404))
|
||||
for download_data in traverse_obj(original_response, ('download', ..., {dict})):
|
||||
download_url = download_data.get('link')
|
||||
if not download_url or download_data.get('quality') != 'source':
|
||||
@ -327,7 +380,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
(?:
|
||||
(?P<u>user)|
|
||||
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
||||
(?:.*?/)??
|
||||
(?:(?!event/).*?/)??
|
||||
(?P<q>
|
||||
(?:
|
||||
play_redirect_hls|
|
||||
@ -410,6 +463,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'duration': 10,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d',
|
||||
},
|
||||
'params': {
|
||||
@ -500,15 +554,16 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader': 'The DMCI',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci',
|
||||
'uploader_id': 'dmci',
|
||||
'timestamp': 1324343742,
|
||||
'timestamp': 1324361742,
|
||||
'upload_date': '20111220',
|
||||
'description': 'md5:ae23671e82d05415868f7ad1aec21147',
|
||||
'description': 'md5:f37b4ad0f3ded6fa16f38ecde16c3c44',
|
||||
'duration': 60,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d',
|
||||
'like_count': int,
|
||||
'tags': 'count:11',
|
||||
'release_timestamp': 1324361742,
|
||||
'release_date': '20111220',
|
||||
},
|
||||
# 'params': {'format': 'Original'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
@ -521,15 +576,18 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'id': '393756517',
|
||||
# 'ext': 'mov',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1582642091,
|
||||
'timestamp': 1582660091,
|
||||
'uploader_id': 'frameworkla',
|
||||
'title': 'Straight To Hell - Sabrina: Netflix',
|
||||
'uploader': 'Framework Studio',
|
||||
'description': 'md5:f2edc61af3ea7a5592681ddbb683db73',
|
||||
'upload_date': '20200225',
|
||||
'duration': 176,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d',
|
||||
'uploader_url': 'https://vimeo.com/frameworkla',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'release_timestamp': 1582660091,
|
||||
'release_date': '20200225',
|
||||
},
|
||||
# 'params': {'format': 'source'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
@ -630,7 +688,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'description': str, # FIXME: Dynamic SEO spam description
|
||||
'upload_date': '20150209',
|
||||
'timestamp': 1423518307,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/default',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/default',
|
||||
'duration': 10,
|
||||
'like_count': int,
|
||||
'uploader_url': 'https://vimeo.com/user20132939',
|
||||
@ -667,6 +725,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'like_count': int,
|
||||
'uploader_url': 'https://vimeo.com/aliniamedia',
|
||||
'release_date': '20160329',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
@ -678,18 +737,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
# 'ext': 'm4v',
|
||||
'ext': 'mp4',
|
||||
'title': 'Eastnor Castle 2015 Firework Champions - The Promo!',
|
||||
'description': 'md5:5967e090768a831488f6e74b7821b3c1',
|
||||
'description': 'md5:9441e6829ae94f380cc6417d982f63ac',
|
||||
'uploader_id': 'fireworkchampions',
|
||||
'uploader': 'Firework Champions',
|
||||
'upload_date': '20150910',
|
||||
'timestamp': 1441901895,
|
||||
'timestamp': 1441916295,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/534715882-6ff8e4660cbf2fea68282876d8d44f318825dfe572cc4016e73b3266eac8ae3a-d',
|
||||
'uploader_url': 'https://vimeo.com/fireworkchampions',
|
||||
'tags': 'count:6',
|
||||
'duration': 229,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1441916295,
|
||||
'release_date': '20150910',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -820,7 +880,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader': 'Raja Virdi',
|
||||
'uploader_id': 'rajavirdi',
|
||||
'uploader_url': 'https://vimeo.com/rajavirdi',
|
||||
'duration': 309,
|
||||
'duration': 300,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/1716727772-[\da-f]+-d',
|
||||
},
|
||||
# 'params': {'format': 'source'},
|
||||
@ -860,12 +920,9 @@ def _verify_player_video_password(self, url, video_id, headers):
|
||||
return checked
|
||||
|
||||
def _extract_from_api(self, video_id, unlisted_hash=None):
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info')
|
||||
|
||||
for retry in (False, True):
|
||||
try:
|
||||
video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash)
|
||||
video = self._call_videos_api(video_id, unlisted_hash)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400
|
||||
@ -873,15 +930,14 @@ def _extract_from_api(self, video_id, unlisted_hash=None):
|
||||
self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False),
|
||||
({json.loads}, 'invalid_parameters', ..., 'field'),
|
||||
)):
|
||||
self._verify_video_password(
|
||||
video_id, self._get_video_password(), viewer['xsrft'])
|
||||
self._verify_video_password(video_id)
|
||||
continue
|
||||
raise
|
||||
|
||||
info = self._parse_config(self._download_json(
|
||||
video['config_url'], video_id), video_id)
|
||||
source_format = self._extract_original_format(
|
||||
f'https://vimeo.com/{video_id}', video_id, unlisted_hash, jwt=viewer['jwt'], api_data=video)
|
||||
f'https://vimeo.com/{video_id}', video_id, unlisted_hash, api_data=video)
|
||||
if source_format:
|
||||
info['formats'].append(source_format)
|
||||
|
||||
@ -904,8 +960,7 @@ def _try_album_password(self, url):
|
||||
r'vimeo\.com/(?:album|showcase)/([^/]+)', url, 'album id', default=None)
|
||||
if not album_id:
|
||||
return
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', album_id, fatal=False)
|
||||
viewer = self._fetch_viewer_info(album_id, fatal=False)
|
||||
if not viewer:
|
||||
webpage = self._download_webpage(url, album_id)
|
||||
viewer = self._parse_json(self._search_regex(
|
||||
@ -963,9 +1018,7 @@ def _real_extract(self, url):
|
||||
raise
|
||||
errmsg = error.cause.response.read()
|
||||
if b'Because of its privacy settings, this video cannot be played here' in errmsg:
|
||||
raise ExtractorError(
|
||||
'Cannot download embed-only video without embedding URL. Please call yt-dlp '
|
||||
'with the URL of the page that embeds this video.', expected=True)
|
||||
raise ExtractorError(self._REFERER_HINT, expected=True)
|
||||
# 403 == vimeo.com TLS fingerprint or DC IP block; 429 == player.vimeo.com TLS FP block
|
||||
status = error.cause.status
|
||||
dcip_msg = 'If you are using a data center IP or VPN/proxy, your IP may be blocked'
|
||||
@ -1010,8 +1063,7 @@ def _real_extract(self, url):
|
||||
channel_id = self._search_regex(
|
||||
r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
|
||||
if channel_id:
|
||||
config_url = self._html_search_regex(
|
||||
r'\bdata-config-url="([^"]+)"', webpage, 'config URL', default=None)
|
||||
config_url = self._extract_config_url(webpage, default=None)
|
||||
video_description = clean_html(get_element_by_class('description', webpage))
|
||||
info_dict.update({
|
||||
'channel_id': channel_id,
|
||||
@ -1122,7 +1174,7 @@ class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
|
||||
'description': 'md5:aeeba3dbd4d04b0fa98a4fdc9c639998',
|
||||
'upload_date': '20140906',
|
||||
'timestamp': 1410032453,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/488238335-d7bf151c364cff8d467f1b73784668fe60aae28a54573a35d53a1210ae283bd8-d_1280',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'comment_count': int,
|
||||
'license': 'https://creativecommons.org/licenses/by-nc-nd/3.0/',
|
||||
'duration': 53,
|
||||
@ -1132,7 +1184,7 @@ class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
|
||||
'params': {
|
||||
'format': 'best[protocol=https]',
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# requires Referer to be passed along with og:video:url
|
||||
'url': 'https://vimeo.com/ondemand/36938/126682985',
|
||||
@ -1149,13 +1201,14 @@ class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
|
||||
'duration': 121,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/517077723-7066ae1d9a79d3eb361334fb5d58ec13c8f04b52f8dd5eadfbd6fb0bcf11f613-d_1280',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'like_count': int,
|
||||
'tags': 'count:5',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
'url': 'https://vimeo.com/ondemand/nazmaalik',
|
||||
'only_matching': True,
|
||||
@ -1237,7 +1290,7 @@ class VimeoUserIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/nkistudio/videos',
|
||||
'info_dict': {
|
||||
'title': 'Nki',
|
||||
'title': 'AKAMA',
|
||||
'id': 'nkistudio',
|
||||
},
|
||||
'playlist_mincount': 66,
|
||||
@ -1303,8 +1356,7 @@ def _fetch_page(self, album_id, authorization, hashed_pass, page):
|
||||
|
||||
def _real_extract(self, url):
|
||||
album_id = self._match_id(url)
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', album_id, fatal=False)
|
||||
viewer = self._fetch_viewer_info(album_id, fatal=False)
|
||||
if not viewer:
|
||||
webpage = self._download_webpage(url, album_id)
|
||||
viewer = self._parse_json(self._search_regex(
|
||||
@ -1370,10 +1422,10 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
'uploader_id': 'user170863801',
|
||||
'uploader_url': 'https://vimeo.com/user170863801',
|
||||
'duration': 30,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1912612821-09a43bd2e75c203d503aed89de7534f28fc4474a48f59c51999716931a246af5-d_1280',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML'],
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
|
||||
'md5': 'c507a72f780cacc12b2248bb4006d253',
|
||||
@ -1423,12 +1475,8 @@ def _real_extract(self, url):
|
||||
user, video_id, review_hash = self._match_valid_url(url).group('user', 'id', 'hash')
|
||||
data_url = f'https://vimeo.com/{user}/review/data/{video_id}/{review_hash}'
|
||||
data = self._download_json(data_url, video_id)
|
||||
viewer = {}
|
||||
if data.get('isLocked') is True:
|
||||
video_password = self._get_video_password()
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', video_id)
|
||||
self._verify_video_password(video_id, video_password, viewer['xsrft'])
|
||||
self._verify_video_password(video_id)
|
||||
data = self._download_json(data_url, video_id)
|
||||
clip_data = data['clipData']
|
||||
config_url = clip_data['configUrl']
|
||||
@ -1436,7 +1484,7 @@ def _real_extract(self, url):
|
||||
info_dict = self._parse_config(config, video_id)
|
||||
source_format = self._extract_original_format(
|
||||
f'https://vimeo.com/{user}/review/{video_id}/{review_hash}/action',
|
||||
video_id, unlisted_hash=clip_data.get('unlistedHash'), jwt=viewer.get('jwt'))
|
||||
video_id, unlisted_hash=clip_data.get('unlistedHash'))
|
||||
if source_format:
|
||||
info_dict['formats'].append(source_format)
|
||||
info_dict['description'] = clean_html(clip_data.get('description'))
|
||||
@ -1528,20 +1576,22 @@ class VimeoProIE(VimeoBaseInfoExtractor):
|
||||
'uploader_id': 'openstreetmapus',
|
||||
'uploader': 'OpenStreetMap US',
|
||||
'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
|
||||
'description': 'md5:2c362968038d4499f4d79f88458590c1',
|
||||
'description': 'md5:8cf69a1a435f2d763f4adf601e9c3125',
|
||||
'duration': 1595,
|
||||
'upload_date': '20130610',
|
||||
'timestamp': 1370893156,
|
||||
'timestamp': 1370907556,
|
||||
'license': 'by',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/440260469-19b0d92fca3bd84066623b53f1eb8aaa3980c6c809e2d67b6b39ab7b4a77a344-d_960',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'tags': 'count:1',
|
||||
'release_timestamp': 1370907556,
|
||||
'release_date': '20130610',
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[protocol=https]',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# password-protected VimeoPro page with Vimeo player embed
|
||||
'url': 'https://vimeopro.com/cadfem/simulation-conference-mechanische-systeme-in-perfektion',
|
||||
@ -1549,7 +1599,7 @@ class VimeoProIE(VimeoBaseInfoExtractor):
|
||||
'id': '764543723',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mechanische Systeme in Perfektion: Realität erfassen, Innovation treiben',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1543784598-a1a750494a485e601110136b9fe11e28c2131942452b3a5d30391cb3800ca8fd-d_1280',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'description': 'md5:2a9d195cd1b0f6f79827107dc88c2420',
|
||||
'uploader': 'CADFEM',
|
||||
'uploader_id': 'cadfem',
|
||||
@ -1561,6 +1611,7 @@ class VimeoProIE(VimeoBaseInfoExtractor):
|
||||
'videopassword': 'Conference2022',
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -1597,3 +1648,377 @@ def _real_extract(self, url):
|
||||
|
||||
return self.url_result(vimeo_url, VimeoIE, video_id, url_transparent=True,
|
||||
description=description)
|
||||
|
||||
|
||||
class VimeoEventIE(VimeoBaseInfoExtractor):
|
||||
IE_NAME = 'vimeo:event'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?vimeo\.com/event/(?P<id>\d+)(?:/
|
||||
(?:
|
||||
(?:embed/)?(?P<unlisted_hash>[\da-f]{10})|
|
||||
videos/(?P<video_id>\d+)
|
||||
)
|
||||
)?'''
|
||||
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>https?://vimeo\.com/event/\d+/embed(?:[/?][^"\']*)?)["\'][^>]*>']
|
||||
_TESTS = [{
|
||||
# stream_privacy.view: 'anybody'
|
||||
'url': 'https://vimeo.com/event/5116195',
|
||||
'info_dict': {
|
||||
'id': '1082194134',
|
||||
'ext': 'mp4',
|
||||
'display_id': '5116195',
|
||||
'title': 'Skidmore College Commencement 2025',
|
||||
'description': 'md5:1902dd5165d21f98aa198297cc729d23',
|
||||
'uploader': 'Skidmore College',
|
||||
'uploader_id': 'user116066434',
|
||||
'uploader_url': 'https://vimeo.com/user116066434',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'duration': 9810,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'timestamp': 1747502974,
|
||||
'upload_date': '20250517',
|
||||
'release_timestamp': 1747502998,
|
||||
'release_date': '20250517',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# stream_privacy.view: 'embed_only'
|
||||
'url': 'https://vimeo.com/event/5034253/embed',
|
||||
'info_dict': {
|
||||
'id': '1071439154',
|
||||
'ext': 'mp4',
|
||||
'display_id': '5034253',
|
||||
'title': 'Advancing Humans with AI',
|
||||
'description': r're:AI is here to stay, but how do we ensure that people flourish in a world of pervasive AI use.{322}$',
|
||||
'uploader': 'MIT Media Lab',
|
||||
'uploader_id': 'mitmedialab',
|
||||
'uploader_url': 'https://vimeo.com/mitmedialab',
|
||||
'duration': 23235,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'chapters': 'count:37',
|
||||
'release_timestamp': 1744290000,
|
||||
'release_date': '20250410',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
'http_headers': {'Referer': 'https://www.media.mit.edu/events/aha-symposium/'},
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# Last entry on 2nd page of the 37 video playlist, but use clip_to_play_id API param shortcut
|
||||
'url': 'https://vimeo.com/event/4753126/videos/1046153257',
|
||||
'info_dict': {
|
||||
'id': '1046153257',
|
||||
'ext': 'mp4',
|
||||
'display_id': '4753126',
|
||||
'title': 'January 12, 2025 The True Vine (Pastor John Mindrup)',
|
||||
'description': 'The True Vine (Pastor \tJohn Mindrup)',
|
||||
'uploader': 'Salem United Church of Christ',
|
||||
'uploader_id': 'user230181094',
|
||||
'uploader_url': 'https://vimeo.com/user230181094',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'duration': 4962,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'timestamp': 1736702464,
|
||||
'upload_date': '20250112',
|
||||
'release_timestamp': 1736702543,
|
||||
'release_date': '20250112',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# "24/7" livestream
|
||||
'url': 'https://vimeo.com/event/4768062',
|
||||
'info_dict': {
|
||||
'id': '1079901414',
|
||||
'ext': 'mp4',
|
||||
'display_id': '4768062',
|
||||
'title': r're:GRACELAND CAM \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'description': '24/7 camera at Graceland Mansion',
|
||||
'uploader': 'Elvis Presley\'s Graceland',
|
||||
'uploader_id': 'visitgraceland',
|
||||
'uploader_url': 'https://vimeo.com/visitgraceland',
|
||||
'release_timestamp': 1745975450,
|
||||
'release_date': '20250430',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {'skip_download': 'livestream'},
|
||||
}, {
|
||||
# stream_privacy.view: 'unlisted' with unlisted_hash in URL path (stream_privacy.embed: 'whitelist')
|
||||
'url': 'https://vimeo.com/event/4259978/3db517c479',
|
||||
'info_dict': {
|
||||
'id': '939104114',
|
||||
'ext': 'mp4',
|
||||
'display_id': '4259978',
|
||||
'title': 'Enhancing Credibility in Your Community Science Project',
|
||||
'description': 'md5:eab953341168b9c146bc3cfe3f716070',
|
||||
'uploader': 'NOAA Research',
|
||||
'uploader_id': 'noaaresearch',
|
||||
'uploader_url': 'https://vimeo.com/noaaresearch',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'duration': 3961,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'timestamp': 1716408008,
|
||||
'upload_date': '20240522',
|
||||
'release_timestamp': 1716408062,
|
||||
'release_date': '20240522',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# "done" event with video_id in URL and unlisted_hash in VimeoIE URL
|
||||
'url': 'https://vimeo.com/event/595460/videos/498149131/',
|
||||
'info_dict': {
|
||||
'id': '498149131',
|
||||
'ext': 'mp4',
|
||||
'display_id': '595460',
|
||||
'title': '2021 Eighth Annual John Cardinal Foley Lecture on Social Communications',
|
||||
'description': 'Replay: https://vimeo.com/catholicphilly/review/498149131/544f26a12f',
|
||||
'uploader': 'Kearns Media Consulting LLC',
|
||||
'uploader_id': 'kearnsmediaconsulting',
|
||||
'uploader_url': 'https://vimeo.com/kearnsmediaconsulting',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'duration': 4466,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'timestamp': 1612228466,
|
||||
'upload_date': '20210202',
|
||||
'release_timestamp': 1612228538,
|
||||
'release_date': '20210202',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# stream_privacy.view: 'password'; stream_privacy.embed: 'public'
|
||||
'url': 'https://vimeo.com/event/4940578',
|
||||
'info_dict': {
|
||||
'id': '1059263570',
|
||||
'ext': 'mp4',
|
||||
'display_id': '4940578',
|
||||
'title': 'TMAC AKC AGILITY 2-22-2025',
|
||||
'uploader': 'Paws \'N Effect',
|
||||
'uploader_id': 'pawsneffect',
|
||||
'uploader_url': 'https://vimeo.com/pawsneffect',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'duration': 33115,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'timestamp': 1740261836,
|
||||
'upload_date': '20250222',
|
||||
'release_timestamp': 1740261873,
|
||||
'release_date': '20250222',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'params': {
|
||||
'videopassword': '22',
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# API serves a playlist of 37 videos, but the site only streams the newest one (changes every Sunday)
|
||||
'url': 'https://vimeo.com/event/4753126',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Scheduled for 2025.05.15 but never started; "unavailable"; stream_privacy.view: "anybody"
|
||||
'url': 'https://vimeo.com/event/5120811/embed',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vimeo.com/event/5112969/embed?muted=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vimeo.com/event/5097437/embed/interaction?muted=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://vimeo.com/event/5113032/embed?autoplay=1&muted=1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Ended livestream with video_id
|
||||
'url': 'https://vimeo.com/event/595460/videos/507329569/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# stream_privacy.view: 'unlisted' with unlisted_hash in URL path (stream_privacy.embed: 'public')
|
||||
'url': 'https://vimeo.com/event/4606123/embed/358d60ce2e',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# Same result as https://vimeo.com/event/5034253/embed
|
||||
'url': 'https://www.media.mit.edu/events/aha-symposium/',
|
||||
'info_dict': {
|
||||
'id': '1071439154',
|
||||
'ext': 'mp4',
|
||||
'display_id': '5034253',
|
||||
'title': 'Advancing Humans with AI',
|
||||
'description': r're:AI is here to stay, but how do we ensure that people flourish in a world of pervasive AI use.{322}$',
|
||||
'uploader': 'MIT Media Lab',
|
||||
'uploader_id': 'mitmedialab',
|
||||
'uploader_url': 'https://vimeo.com/mitmedialab',
|
||||
'duration': 23235,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'chapters': 'count:37',
|
||||
'release_timestamp': 1744290000,
|
||||
'release_date': '20250410',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}]
|
||||
|
||||
_EVENT_FIELDS = (
|
||||
'title', 'uri', 'schedule', 'stream_description', 'stream_privacy.embed', 'stream_privacy.view',
|
||||
'clip_to_play.name', 'clip_to_play.uri', 'clip_to_play.config_url', 'clip_to_play.live.status',
|
||||
'clip_to_play.privacy.embed', 'clip_to_play.privacy.view', 'clip_to_play.password',
|
||||
'streamable_clip.name', 'streamable_clip.uri', 'streamable_clip.config_url', 'streamable_clip.live.status',
|
||||
)
|
||||
_VIDEOS_FIELDS = ('items', 'uri', 'name', 'config_url', 'duration', 'live.status')
|
||||
|
||||
def _call_events_api(
|
||||
self, event_id, ep=None, unlisted_hash=None, note=None,
|
||||
fields=(), referrer=None, query=None, headers=None,
|
||||
):
|
||||
resource = join_nonempty('event', ep, note, 'API JSON', delim=' ')
|
||||
|
||||
return self._download_json(
|
||||
join_nonempty(
|
||||
'https://api.vimeo.com/live_events',
|
||||
join_nonempty(event_id, unlisted_hash, delim=':'), ep, delim='/'),
|
||||
event_id, f'Downloading {resource}', f'Failed to download {resource}',
|
||||
query=filter_dict({
|
||||
'fields': ','.join(fields) or [],
|
||||
# Correct spelling with 4 R's is deliberate
|
||||
'referrer': referrer,
|
||||
**(query or {}),
|
||||
}), headers=filter_dict({
|
||||
'Accept': 'application/json',
|
||||
'Authorization': f'jwt {self._fetch_viewer_info(event_id)["jwt"]}',
|
||||
'Referer': referrer,
|
||||
**(headers or {}),
|
||||
}))
|
||||
|
||||
@staticmethod
|
||||
def _extract_video_id_and_unlisted_hash(video):
|
||||
if not traverse_obj(video, ('uri', {lambda x: x.startswith('/videos/')})):
|
||||
return None, None
|
||||
video_id, _, unlisted_hash = video['uri'][8:].partition(':')
|
||||
return video_id, unlisted_hash or None
|
||||
|
||||
def _vimeo_url_result(self, video_id, unlisted_hash=None, event_id=None):
|
||||
# VimeoIE can extract more metadata and formats for was_live event videos
|
||||
return self.url_result(
|
||||
join_nonempty('https://vimeo.com', video_id, unlisted_hash, delim='/'), VimeoIE,
|
||||
video_id, display_id=event_id, live_status='was_live', url_transparent=True)
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
for embed_url in super()._extract_embed_urls(url, webpage):
|
||||
yield cls._smuggle_referrer(embed_url, url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, _, headers = self._unsmuggle_headers(url)
|
||||
# XXX: Keep key name in sync with _unsmuggle_headers
|
||||
referrer = headers.get('Referer')
|
||||
event_id, unlisted_hash, video_id = self._match_valid_url(url).group('id', 'unlisted_hash', 'video_id')
|
||||
|
||||
for retry in (False, True):
|
||||
try:
|
||||
live_event_data = self._call_events_api(
|
||||
event_id, unlisted_hash=unlisted_hash, fields=self._EVENT_FIELDS,
|
||||
referrer=referrer, query={'clip_to_play_id': video_id or '0'},
|
||||
headers={'Accept': 'application/vnd.vimeo.*+json;version=3.4.9'})
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if retry or not isinstance(e.cause, HTTPError) or e.cause.status not in (400, 403):
|
||||
raise
|
||||
response = traverse_obj(e.cause.response.read(), ({json.loads}, {dict})) or {}
|
||||
error_code = response.get('error_code')
|
||||
if error_code == 2204:
|
||||
self._verify_video_password(event_id, path='event')
|
||||
continue
|
||||
if error_code == 3200:
|
||||
raise ExtractorError(self._REFERER_HINT, expected=True)
|
||||
if error_msg := response.get('error'):
|
||||
raise ExtractorError(f'Vimeo says: {error_msg}', expected=True)
|
||||
raise
|
||||
|
||||
# stream_privacy.view can be: 'anybody', 'embed_only', 'nobody', 'password', 'unlisted'
|
||||
view_policy = live_event_data['stream_privacy']['view']
|
||||
if view_policy == 'nobody':
|
||||
raise ExtractorError('This event has not been made available to anyone', expected=True)
|
||||
|
||||
clip_data = traverse_obj(live_event_data, ('clip_to_play', {dict})) or {}
|
||||
# live.status can be: 'streaming' (is_live), 'done' (was_live), 'unavailable' (is_upcoming OR dead)
|
||||
clip_status = traverse_obj(clip_data, ('live', 'status', {str}))
|
||||
start_time = traverse_obj(live_event_data, ('schedule', 'start_time', {str}))
|
||||
release_timestamp = parse_iso8601(start_time)
|
||||
|
||||
if clip_status == 'unavailable' and release_timestamp and release_timestamp > time.time():
|
||||
self.raise_no_formats(f'This live event is scheduled for {start_time}', expected=True)
|
||||
live_status = 'is_upcoming'
|
||||
config_url = None
|
||||
|
||||
elif view_policy == 'embed_only':
|
||||
webpage = self._download_webpage(
|
||||
join_nonempty('https://vimeo.com/event', event_id, 'embed', unlisted_hash, delim='/'),
|
||||
event_id, 'Downloading embed iframe webpage', impersonate=True, headers=headers)
|
||||
# The _parse_config result will overwrite live_status w/ 'is_live' if livestream is active
|
||||
live_status = 'was_live'
|
||||
config_url = self._extract_config_url(webpage)
|
||||
|
||||
else: # view_policy in ('anybody', 'password', 'unlisted')
|
||||
if video_id:
|
||||
clip_id, clip_hash = self._extract_video_id_and_unlisted_hash(clip_data)
|
||||
if video_id == clip_id and clip_status == 'done' and (clip_hash or view_policy != 'unlisted'):
|
||||
return self._vimeo_url_result(clip_id, clip_hash, event_id)
|
||||
|
||||
video_filter = lambda _, v: self._extract_video_id_and_unlisted_hash(v)[0] == video_id
|
||||
else:
|
||||
video_filter = lambda _, v: v['live']['status'] in ('streaming', 'done')
|
||||
|
||||
for page in itertools.count(1):
|
||||
videos_data = self._call_events_api(
|
||||
event_id, 'videos', unlisted_hash=unlisted_hash, note=f'page {page}',
|
||||
fields=self._VIDEOS_FIELDS, referrer=referrer, query={'page': page},
|
||||
headers={'Accept': 'application/vnd.vimeo.*;version=3.4.1'})
|
||||
|
||||
video = traverse_obj(videos_data, ('data', video_filter, any))
|
||||
if video or not traverse_obj(videos_data, ('paging', 'next', {str})):
|
||||
break
|
||||
|
||||
live_status = {
|
||||
'streaming': 'is_live',
|
||||
'done': 'was_live',
|
||||
}.get(traverse_obj(video, ('live', 'status', {str})))
|
||||
|
||||
if not live_status: # requested video_id is unavailable or no videos are available
|
||||
raise ExtractorError('This event video is unavailable', expected=True)
|
||||
elif live_status == 'was_live':
|
||||
return self._vimeo_url_result(*self._extract_video_id_and_unlisted_hash(video), event_id)
|
||||
config_url = video['config_url']
|
||||
|
||||
if config_url: # view_policy == 'embed_only' or live_status == 'is_live'
|
||||
info = filter_dict(self._parse_config(
|
||||
self._download_json(config_url, event_id, 'Downloading config JSON'), event_id))
|
||||
else: # live_status == 'is_upcoming'
|
||||
info = {'id': event_id}
|
||||
|
||||
if info.get('live_status') == 'post_live':
|
||||
self.report_warning('This live event recently ended and some formats may not yet be available')
|
||||
|
||||
return {
|
||||
**traverse_obj(live_event_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('stream_description', {str}),
|
||||
}),
|
||||
'display_id': event_id,
|
||||
'live_status': live_status,
|
||||
'release_timestamp': release_timestamp,
|
||||
**info,
|
||||
}
|
||||
|
@ -300,6 +300,24 @@ class VKIE(VKBaseIE):
|
||||
'upload_date': '20250130',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://vkvideo.ru/video-50883936_456244102',
|
||||
'info_dict': {
|
||||
'id': '-50883936_456244102',
|
||||
'ext': 'mp4',
|
||||
'title': 'Добивание Украины // Техник в коме // МОЯ ЗЛОСТЬ №140',
|
||||
'description': 'md5:a9bc46181e9ebd0fdd82cef6c0191140',
|
||||
'uploader': 'Стас Ай, Как Просто!',
|
||||
'uploader_id': '-50883936',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'duration': 4651,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'chapters': 'count:59',
|
||||
'timestamp': 1743333869,
|
||||
'upload_date': '20250330',
|
||||
},
|
||||
},
|
||||
{
|
||||
# live stream, hls and rtmp links, most likely already finished live
|
||||
# stream by the time you are reading this comment
|
||||
@ -540,7 +558,7 @@ def _real_extract(self, url):
|
||||
'title': ('md_title', {unescapeHTML}),
|
||||
'description': ('description', {clean_html}, filter),
|
||||
'thumbnail': ('jpg', {url_or_none}),
|
||||
'uploader': ('md_author', {str}),
|
||||
'uploader': ('md_author', {unescapeHTML}),
|
||||
'uploader_id': (('author_id', 'authorId'), {str_or_none}, any),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'chapters': ('time_codes', lambda _, v: isinstance(v['time'], int), {
|
||||
|
@ -1,7 +1,6 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .once import OnceIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@ -10,7 +9,7 @@
|
||||
)
|
||||
|
||||
|
||||
class VoxMediaVolumeIE(OnceIE):
|
||||
class VoxMediaVolumeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://volume\.vox-cdn\.com/embed/(?P<id>[0-9a-f]{9})'
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -57,7 +56,8 @@ def _real_extract(self, url):
|
||||
if not provider_video_id:
|
||||
continue
|
||||
if provider_video_type == 'brightcove':
|
||||
info['formats'] = self._extract_once_formats(provider_video_id)
|
||||
# TODO: Find embed example or confirm that Vox has stopped using Brightcove
|
||||
raise ExtractorError('Vox Brightcove embeds are currently unsupported')
|
||||
else:
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
@ -155,20 +155,6 @@ class VoxMediaIE(InfoExtractor):
|
||||
},
|
||||
}],
|
||||
'skip': 'Page no longer contain videos',
|
||||
}, {
|
||||
# volume embed, Brightcove Once
|
||||
'url': 'https://www.recode.net/2014/6/17/11628066/post-post-pc-ceo-the-full-code-conference-video-of-microsofts-satya',
|
||||
'md5': '2dbc77b8b0bff1894c2fce16eded637d',
|
||||
'info_dict': {
|
||||
'id': '1231c973d',
|
||||
'ext': 'mp4',
|
||||
'title': 'Post-Post-PC CEO: The Full Code Conference Video of Microsoft\'s Satya Nadella',
|
||||
'description': 'The longtime veteran was chosen earlier this year as the software giant\'s third leader in its history.',
|
||||
'timestamp': 1402938000,
|
||||
'upload_date': '20140616',
|
||||
'duration': 4114,
|
||||
},
|
||||
'add_ie': ['VoxMediaVolume'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -2,9 +2,11 @@
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class WatIE(InfoExtractor):
|
||||
@ -70,8 +72,14 @@ def _real_extract(self, url):
|
||||
|
||||
error_desc = video_info.get('error_desc')
|
||||
if error_desc:
|
||||
if video_info.get('error_code') == 'GEOBLOCKED':
|
||||
error_code = video_info.get('error_code')
|
||||
if error_code == 'GEOBLOCKED':
|
||||
self.raise_geo_restricted(error_desc, video_info.get('geoList'))
|
||||
elif error_code == 'DELIVERY_ERROR':
|
||||
if traverse_obj(video_data, ('delivery', 'code')) == 500:
|
||||
self.report_drm(video_id)
|
||||
error_desc = join_nonempty(
|
||||
error_desc, traverse_obj(video_data, ('delivery', 'error', {str})), delim=': ')
|
||||
raise ExtractorError(error_desc, expected=True)
|
||||
|
||||
title = video_info['title']
|
||||
|
@ -290,12 +290,14 @@ def _real_extract(self, url):
|
||||
|
||||
elif live_status == 'is_live':
|
||||
video_info = self._call_api(
|
||||
f'/video/v1.2/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
|
||||
f'/video/v1.3/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
|
||||
video_id, note='Downloading live JSON')
|
||||
playback = self._parse_json(video_info['lipPlayback'], video_id)
|
||||
m3u8_url = traverse_obj(playback, (
|
||||
'media', lambda _, v: v['protocol'] == 'HLS', 'path', {url_or_none}), get_all=False)
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True)
|
||||
# Live subtitles are not downloadable, but extract to silence "ignoring subs" warning
|
||||
formats, _ = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True)
|
||||
|
||||
elif live_status == 'post_live':
|
||||
if availability in ('premium_only', 'subscriber_only'):
|
||||
|
@ -45,7 +45,7 @@ class XinpianchangIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id=video_id)
|
||||
webpage = self._download_webpage(url, video_id=video_id, headers={'Referer': url})
|
||||
video_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['detail']['video']
|
||||
|
||||
data = self._download_json(
|
||||
|
@ -35,6 +35,7 @@
|
||||
class _PoTokenContext(enum.Enum):
|
||||
PLAYER = 'player'
|
||||
GVS = 'gvs'
|
||||
SUBS = 'subs'
|
||||
|
||||
|
||||
# any clients starting with _ cannot be explicitly requested by the user
|
||||
@ -417,6 +418,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
_NETRC_MACHINE = 'youtube'
|
||||
|
||||
_COOKIE_HOWTO_WIKI_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies'
|
||||
|
||||
def ucid_or_none(self, ucid):
|
||||
return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
|
||||
|
||||
@ -451,17 +454,15 @@ def _preferred_lang(self):
|
||||
return preferred_lang
|
||||
|
||||
def _initialize_consent(self):
|
||||
cookies = self._get_cookies('https://www.youtube.com/')
|
||||
if cookies.get('__Secure-3PSID'):
|
||||
if self._has_auth_cookies:
|
||||
return
|
||||
socs = cookies.get('SOCS')
|
||||
socs = self._youtube_cookies.get('SOCS')
|
||||
if socs and not socs.value.startswith('CAA'): # not consented
|
||||
return
|
||||
self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
|
||||
|
||||
def _initialize_pref(self):
|
||||
cookies = self._get_cookies('https://www.youtube.com/')
|
||||
pref_cookie = cookies.get('PREF')
|
||||
pref_cookie = self._youtube_cookies.get('PREF')
|
||||
pref = {}
|
||||
if pref_cookie:
|
||||
try:
|
||||
@ -472,8 +473,9 @@ def _initialize_pref(self):
|
||||
self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
|
||||
|
||||
def _initialize_cookie_auth(self):
|
||||
yt_sapisid, yt_1psapisid, yt_3psapisid = self._get_sid_cookies()
|
||||
if yt_sapisid or yt_1psapisid or yt_3psapisid:
|
||||
self._passed_auth_cookies = False
|
||||
if self._has_auth_cookies:
|
||||
self._passed_auth_cookies = True
|
||||
self.write_debug('Found YouTube account cookies')
|
||||
|
||||
def _real_initialize(self):
|
||||
@ -492,8 +494,7 @@ def _perform_login(self, username, password):
|
||||
|
||||
@property
|
||||
def _youtube_login_hint(self):
|
||||
return (f'{self._login_hint(method="cookies")}. Also see '
|
||||
'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies '
|
||||
return (f'{self._login_hint(method="cookies")}. Also see {self._COOKIE_HOWTO_WIKI_URL} '
|
||||
'for tips on effectively exporting YouTube cookies')
|
||||
|
||||
def _check_login_required(self):
|
||||
@ -553,12 +554,16 @@ def _make_sid_authorization(scheme, sid, origin, additional_parts):
|
||||
|
||||
return f'{scheme} {"_".join(parts)}'
|
||||
|
||||
@property
|
||||
def _youtube_cookies(self):
|
||||
return self._get_cookies('https://www.youtube.com')
|
||||
|
||||
def _get_sid_cookies(self):
|
||||
"""
|
||||
Get SAPISID, 1PSAPISID, 3PSAPISID cookie values
|
||||
@returns sapisid, 1psapisid, 3psapisid
|
||||
"""
|
||||
yt_cookies = self._get_cookies('https://www.youtube.com')
|
||||
yt_cookies = self._youtube_cookies
|
||||
yt_sapisid = try_call(lambda: yt_cookies['SAPISID'].value)
|
||||
yt_3papisid = try_call(lambda: yt_cookies['__Secure-3PAPISID'].value)
|
||||
yt_1papisid = try_call(lambda: yt_cookies['__Secure-1PAPISID'].value)
|
||||
@ -595,6 +600,31 @@ def _get_sid_authorization_header(self, origin='https://www.youtube.com', user_s
|
||||
|
||||
return ' '.join(authorizations)
|
||||
|
||||
@property
|
||||
def is_authenticated(self):
|
||||
return self._has_auth_cookies
|
||||
|
||||
@property
|
||||
def _has_auth_cookies(self):
|
||||
yt_sapisid, yt_1psapisid, yt_3psapisid = self._get_sid_cookies()
|
||||
# YouTube doesn't appear to clear 3PSAPISID when rotating cookies (as of 2025-04-26)
|
||||
# But LOGIN_INFO is cleared and should exist if logged in
|
||||
has_login_info = 'LOGIN_INFO' in self._youtube_cookies
|
||||
return bool(has_login_info and (yt_sapisid or yt_1psapisid or yt_3psapisid))
|
||||
|
||||
def _request_webpage(self, *args, **kwargs):
|
||||
response = super()._request_webpage(*args, **kwargs)
|
||||
|
||||
# Check that we are still logged-in and cookies have not rotated after every request
|
||||
if getattr(self, '_passed_auth_cookies', None) and not self._has_auth_cookies:
|
||||
self.report_warning(
|
||||
'The provided YouTube account cookies are no longer valid. '
|
||||
'They have likely been rotated in the browser as a security measure. '
|
||||
f'For tips on how to effectively export YouTube cookies, refer to {self._COOKIE_HOWTO_WIKI_URL} .',
|
||||
only_once=False)
|
||||
|
||||
return response
|
||||
|
||||
def _call_api(self, ep, query, video_id, fatal=True, headers=None,
|
||||
note='Downloading API JSON', errnote='Unable to download API page',
|
||||
context=None, api_key=None, api_hostname=None, default_client='web'):
|
||||
@ -695,10 +725,6 @@ def _extract_visitor_data(self, *args):
|
||||
args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
|
||||
expected_type=str)
|
||||
|
||||
@functools.cached_property
|
||||
def is_authenticated(self):
|
||||
return bool(self._get_sid_authorization_header())
|
||||
|
||||
def extract_ytcfg(self, video_id, webpage):
|
||||
if not webpage:
|
||||
return {}
|
||||
@ -762,6 +788,7 @@ def _download_webpage_with_retries(self, *args, retry_fatal=False, retry_on_stat
|
||||
|
||||
def _download_ytcfg(self, client, video_id):
|
||||
url = {
|
||||
'mweb': 'https://m.youtube.com',
|
||||
'web': 'https://www.youtube.com',
|
||||
'web_music': 'https://music.youtube.com',
|
||||
'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1',
|
||||
|
@ -37,6 +37,7 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
|
||||
'chapters': 'count:20',
|
||||
'comment_count': int,
|
||||
'heatmap': 'count:100',
|
||||
'media_type': 'clip',
|
||||
},
|
||||
}]
|
||||
|
||||
@ -59,6 +60,7 @@ def _real_extract(self, url):
|
||||
'url': f'https://www.youtube.com/watch?v={video_id}',
|
||||
'ie_key': YoutubeIE.ie_key(),
|
||||
'id': clip_id,
|
||||
'media_type': 'clip',
|
||||
'section_start': int(clip_data['startTimeMs']) / 1000,
|
||||
'section_end': int(clip_data['endTimeMs']) / 1000,
|
||||
'_format_sort_fields': ( # https protocol is prioritized for ffmpeg compatibility
|
||||
|
@ -35,6 +35,7 @@ class YoutubeYtBeIE(YoutubeBaseInfoExtractor):
|
||||
'duration': 59,
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int,
|
||||
'media_type': 'short',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
|
@ -524,10 +524,16 @@ def _entries(self, tab, item_id, ytcfg, delegated_session_id, visitor_data):
|
||||
response = self._extract_response(
|
||||
item_id=f'{item_id} page {page_num}',
|
||||
query=continuation, headers=headers, ytcfg=ytcfg,
|
||||
check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
|
||||
check_get_keys=(
|
||||
'continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints',
|
||||
# Playlist recommendations may return with no data - ignore
|
||||
('responseContext', 'serviceTrackingParams', ..., 'params', ..., lambda k, v: k == 'key' and v == 'GetRecommendedMusicPlaylists_rid'),
|
||||
))
|
||||
|
||||
if not response:
|
||||
break
|
||||
|
||||
continuation = None
|
||||
# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
|
||||
# See: https://github.com/ytdl-org/youtube-dl/issues/28702
|
||||
visitor_data = self._extract_visitor_data(response) or visitor_data
|
||||
@ -564,7 +570,13 @@ def _entries(self, tab, item_id, ytcfg, delegated_session_id, visitor_data):
|
||||
yield from func(video_items_renderer)
|
||||
continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
|
||||
|
||||
if not video_items_renderer:
|
||||
# In the case only a continuation is returned, try to follow it.
|
||||
# We extract this after trying to extract non-continuation items as otherwise this
|
||||
# may be prioritized over other continuations.
|
||||
# see: https://github.com/yt-dlp/yt-dlp/issues/12933
|
||||
continuation = continuation or self._extract_continuation({'contents': [continuation_item]})
|
||||
|
||||
if not continuation and not video_items_renderer:
|
||||
break
|
||||
|
||||
@staticmethod
|
||||
@ -999,14 +1011,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_mincount': 94,
|
||||
'info_dict': {
|
||||
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
||||
'title': 'Igor Kleiner Ph.D. - Playlists',
|
||||
'title': 'Igor Kleiner - Playlists',
|
||||
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
|
||||
'uploader': 'Igor Kleiner Ph.D.',
|
||||
'uploader': 'Igor Kleiner ',
|
||||
'uploader_id': '@IgorDataScience',
|
||||
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
|
||||
'channel': 'Igor Kleiner Ph.D.',
|
||||
'channel': 'Igor Kleiner ',
|
||||
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
||||
'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
|
||||
'tags': 'count:23',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
|
||||
'channel_follower_count': int,
|
||||
},
|
||||
@ -1016,18 +1028,19 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_mincount': 94,
|
||||
'info_dict': {
|
||||
'id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
||||
'title': 'Igor Kleiner Ph.D. - Playlists',
|
||||
'title': 'Igor Kleiner - Playlists',
|
||||
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
|
||||
'uploader': 'Igor Kleiner Ph.D.',
|
||||
'uploader': 'Igor Kleiner ',
|
||||
'uploader_id': '@IgorDataScience',
|
||||
'uploader_url': 'https://www.youtube.com/@IgorDataScience',
|
||||
'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
|
||||
'tags': 'count:23',
|
||||
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
|
||||
'channel': 'Igor Kleiner Ph.D.',
|
||||
'channel': 'Igor Kleiner ',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
|
||||
'channel_follower_count': int,
|
||||
},
|
||||
}, {
|
||||
# TODO: fix channel_is_verified extraction
|
||||
'note': 'playlists, series',
|
||||
'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
|
||||
'playlist_mincount': 5,
|
||||
@ -1066,22 +1079,23 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# TODO: fix availability extraction
|
||||
'note': 'basic, single video playlist',
|
||||
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU',
|
||||
'info_dict': {
|
||||
'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
|
||||
'title': 'youtube-dl public playlist',
|
||||
'id': 'PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU',
|
||||
'title': 'single video playlist',
|
||||
'description': '',
|
||||
'tags': [],
|
||||
'view_count': int,
|
||||
'modified_date': '20201130',
|
||||
'channel': 'Sergey M.',
|
||||
'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
'modified_date': '20250417',
|
||||
'channel': 'cole-dlp-test-acc',
|
||||
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||
'availability': 'public',
|
||||
'uploader': 'Sergey M.',
|
||||
'uploader_url': 'https://www.youtube.com/@sergeym.6173',
|
||||
'uploader_id': '@sergeym.6173',
|
||||
'uploader': 'cole-dlp-test-acc',
|
||||
'uploader_url': 'https://www.youtube.com/@coletdjnz',
|
||||
'uploader_id': '@coletdjnz',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
@ -1171,11 +1185,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 17,
|
||||
}, {
|
||||
'note': 'Community tab',
|
||||
'note': 'Posts tab',
|
||||
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
|
||||
'info_dict': {
|
||||
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||
'title': 'lex will - Community',
|
||||
'title': 'lex will - Posts',
|
||||
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
|
||||
'channel': 'lex will',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||
@ -1188,30 +1202,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 18,
|
||||
}, {
|
||||
'note': 'Channels tab',
|
||||
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
|
||||
'info_dict': {
|
||||
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||
'title': 'lex will - Channels',
|
||||
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
|
||||
'channel': 'lex will',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||
'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
|
||||
'tags': ['bible', 'history', 'prophesy'],
|
||||
'channel_follower_count': int,
|
||||
'uploader_url': 'https://www.youtube.com/@lexwill718',
|
||||
'uploader_id': '@lexwill718',
|
||||
'uploader': 'lex will',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
# TODO: fix channel_is_verified extraction
|
||||
'note': 'Search tab',
|
||||
'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
|
||||
'playlist_mincount': 40,
|
||||
'info_dict': {
|
||||
'id': 'UCYO_jab_esuFRV4b17AJtAw',
|
||||
'title': '3Blue1Brown - Search - linear algebra',
|
||||
'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
|
||||
'description': 'md5:602e3789e6a0cb7d9d352186b720e395',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
|
||||
'tags': ['Mathematics'],
|
||||
'channel': '3Blue1Brown',
|
||||
@ -1232,6 +1230,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# TODO: fix availability extraction
|
||||
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
|
||||
'info_dict': {
|
||||
@ -1294,24 +1293,25 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
},
|
||||
'playlist_mincount': 21,
|
||||
}, {
|
||||
# TODO: fix availability extraction
|
||||
'note': 'Playlist with "show unavailable videos" button',
|
||||
'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLYwq8WOe86_xGmR7FrcJq8Sb7VW8K3Tt2',
|
||||
'info_dict': {
|
||||
'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
|
||||
'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
|
||||
'title': 'The Memes Of 2010s.....',
|
||||
'id': 'PLYwq8WOe86_xGmR7FrcJq8Sb7VW8K3Tt2',
|
||||
'view_count': int,
|
||||
'channel': 'Phim Siêu Nhân Nhật Bản',
|
||||
'channel': "I'm Not JiNxEd",
|
||||
'tags': [],
|
||||
'description': '',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
|
||||
'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
|
||||
'description': 'md5:44dc3b315ba69394feaafa2f40e7b2a1',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC5H5H85D1QE5-fuWWQ1hdNg',
|
||||
'channel_id': 'UC5H5H85D1QE5-fuWWQ1hdNg',
|
||||
'modified_date': r're:\d{8}',
|
||||
'availability': 'public',
|
||||
'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
|
||||
'uploader_id': '@phimsieunhannhatban',
|
||||
'uploader': 'Phim Siêu Nhân Nhật Bản',
|
||||
'uploader_url': 'https://www.youtube.com/@imnotjinxed1998',
|
||||
'uploader_id': '@imnotjinxed1998',
|
||||
'uploader': "I'm Not JiNxEd",
|
||||
},
|
||||
'playlist_mincount': 200,
|
||||
'playlist_mincount': 150,
|
||||
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
||||
}, {
|
||||
'note': 'Playlist with unavailable videos in page 7',
|
||||
@ -1334,6 +1334,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_mincount': 1000,
|
||||
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
||||
}, {
|
||||
# TODO: fix availability extraction
|
||||
'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
|
||||
'info_dict': {
|
||||
@ -1384,7 +1385,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
|
||||
'info_dict': {
|
||||
'id': 'hGkQjiJLjWQ', # This will keep changing
|
||||
'id': 'YDvsBbKfLPA', # This will keep changing
|
||||
'ext': 'mp4',
|
||||
'title': str,
|
||||
'upload_date': r're:\d{8}',
|
||||
@ -1409,6 +1410,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'uploader_id': '@SkyNews',
|
||||
'uploader': 'Sky News',
|
||||
'channel_is_verified': True,
|
||||
'media_type': 'livestream',
|
||||
'timestamp': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -1496,6 +1499,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# TODO: fix availability extraction
|
||||
'note': 'VLPL, should redirect to playlist?list=PL...',
|
||||
'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
|
||||
'info_dict': {
|
||||
@ -1537,6 +1541,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
}, {
|
||||
# Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
|
||||
# Treat as a general feed
|
||||
# TODO: fix extraction
|
||||
'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
|
||||
'info_dict': {
|
||||
'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
|
||||
@ -1560,21 +1565,21 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'expected_warnings': ['YouTube Music is not directly supported'],
|
||||
}, {
|
||||
'note': 'unlisted single video playlist',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
|
||||
'info_dict': {
|
||||
'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
|
||||
'title': 'yt-dlp unlisted playlist test',
|
||||
'id': 'PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
|
||||
'title': 'unlisted playlist',
|
||||
'availability': 'unlisted',
|
||||
'tags': [],
|
||||
'modified_date': '20220418',
|
||||
'channel': 'colethedj',
|
||||
'modified_date': '20250417',
|
||||
'channel': 'cole-dlp-test-acc',
|
||||
'view_count': int,
|
||||
'description': '',
|
||||
'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
|
||||
'uploader_url': 'https://www.youtube.com/@colethedj1894',
|
||||
'uploader_id': '@colethedj1894',
|
||||
'uploader': 'colethedj',
|
||||
'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
|
||||
'uploader_url': 'https://www.youtube.com/@coletdjnz',
|
||||
'uploader_id': '@coletdjnz',
|
||||
'uploader': 'cole-dlp-test-acc',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
@ -1596,6 +1601,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_count': 1,
|
||||
'params': {'extract_flat': True},
|
||||
}, {
|
||||
# By default, recommended is always empty.
|
||||
'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
|
||||
'url': 'https://www.youtube.com/feed/recommended',
|
||||
'info_dict': {
|
||||
@ -1603,7 +1609,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'title': 'recommended',
|
||||
'tags': [],
|
||||
},
|
||||
'playlist_mincount': 50,
|
||||
'playlist_count': 0,
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'extractor_args': {'youtubetab': {'skip': ['webpage']}},
|
||||
@ -1628,6 +1634,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
},
|
||||
'skip': 'Query for sorting no longer works',
|
||||
}, {
|
||||
# TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
|
||||
'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
|
||||
'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
|
||||
'info_dict': {
|
||||
@ -1654,11 +1661,12 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# TODO: fix metadata extraction
|
||||
'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
|
||||
'info_dict': {
|
||||
'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
|
||||
'modified_date': '20220407',
|
||||
'modified_date': '20250115',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
|
||||
'tags': [],
|
||||
'availability': 'unlisted',
|
||||
@ -1692,6 +1700,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'expected_warnings': ['Preferring "ja"'],
|
||||
}, {
|
||||
# XXX: this should really check flat playlist entries, but the test suite doesn't support that
|
||||
# TODO: fix availability extraction
|
||||
'note': 'preferred lang set with playlist with translated video titles',
|
||||
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
|
||||
'info_dict': {
|
||||
@ -1714,6 +1723,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
}, {
|
||||
# shorts audio pivot for 2GtVksBMYFM.
|
||||
'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
|
||||
# TODO: fix extraction
|
||||
'info_dict': {
|
||||
'id': 'sfv_audio_pivot',
|
||||
'title': 'sfv_audio_pivot',
|
||||
@ -1751,6 +1761,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_mincount': 8,
|
||||
}, {
|
||||
# Should get three playlists for videos, shorts and streams tabs
|
||||
# TODO: fix channel_is_verified extraction
|
||||
'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
|
||||
'info_dict': {
|
||||
'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
|
||||
@ -1758,7 +1769,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'channel_follower_count': int,
|
||||
'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
|
||||
'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
|
||||
'description': 'md5:01e53f350ab8ad6fcf7c4fedb3c1b99f',
|
||||
'channel': 'Polka Ch. 尾丸ポルカ',
|
||||
'tags': 'count:35',
|
||||
'uploader_url': 'https://www.youtube.com/@OmaruPolka',
|
||||
@ -1769,14 +1780,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# Shorts tab with channel with handle
|
||||
# TODO: fix channel description
|
||||
# TODO: fix channel_is_verified extraction
|
||||
'url': 'https://www.youtube.com/@NotJustBikes/shorts',
|
||||
'info_dict': {
|
||||
'id': 'UC0intLFzLaudFG-xAvUEO-A',
|
||||
'title': 'Not Just Bikes - Shorts',
|
||||
'tags': 'count:10',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
|
||||
'description': 'md5:5e82545b3a041345927a92d0585df247',
|
||||
'description': 'md5:1d9fc1bad7f13a487299d1fe1712e031',
|
||||
'channel_follower_count': int,
|
||||
'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
|
||||
'channel': 'Not Just Bikes',
|
||||
@ -1797,7 +1808,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
|
||||
'channel': '中村悠一',
|
||||
'channel_follower_count': int,
|
||||
'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
|
||||
'description': 'md5:e8fd705073a594f27d6d6d020da560dc',
|
||||
'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
|
||||
'uploader_id': '@Yuichi-Nakamura',
|
||||
'uploader': '中村悠一',
|
||||
@ -1815,6 +1826,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# No videos tab but has a shorts tab
|
||||
# TODO: fix metadata extraction
|
||||
'url': 'https://www.youtube.com/c/TKFShorts',
|
||||
'info_dict': {
|
||||
'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
|
||||
@ -1851,6 +1863,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
}, {
|
||||
# Shorts url result in shorts tab
|
||||
# TODO: Fix channel id extraction
|
||||
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
|
||||
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
|
||||
'info_dict': {
|
||||
'id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
@ -1879,6 +1892,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'params': {'extract_flat': True},
|
||||
}, {
|
||||
# Live video status should be extracted
|
||||
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
|
||||
'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
|
||||
'info_dict': {
|
||||
'id': 'UCQvWX73GQygcwXOTSf_VDVg',
|
||||
@ -1907,6 +1921,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_mincount': 1,
|
||||
}, {
|
||||
# Channel renderer metadata. Contains number of videos on the channel
|
||||
# TODO: channels tab removed, change this test to use another page with channel renderer
|
||||
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
|
||||
'info_dict': {
|
||||
'id': 'UCiu-3thuViMebBjw_5nWYrA',
|
||||
@ -1940,7 +1955,9 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
},
|
||||
}],
|
||||
'params': {'extract_flat': True},
|
||||
'skip': 'channels tab removed',
|
||||
}, {
|
||||
# TODO: fix channel_is_verified extraction
|
||||
'url': 'https://www.youtube.com/@3blue1brown/about',
|
||||
'info_dict': {
|
||||
'id': '@3blue1brown',
|
||||
@ -1950,7 +1967,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
|
||||
'channel': '3Blue1Brown',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
|
||||
'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
|
||||
'description': 'md5:602e3789e6a0cb7d9d352186b720e395',
|
||||
'uploader_url': 'https://www.youtube.com/@3blue1brown',
|
||||
'uploader_id': '@3blue1brown',
|
||||
'uploader': '3Blue1Brown',
|
||||
@ -1976,6 +1993,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_count': 5,
|
||||
}, {
|
||||
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
|
||||
# TODO: fix channel_is_verified extraction
|
||||
'url': 'https://www.youtube.com/@AHimitsu/releases',
|
||||
'info_dict': {
|
||||
'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
|
||||
@ -2015,6 +2033,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'playlist_mincount': 100,
|
||||
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
|
||||
}, {
|
||||
# TODO: fix channel_is_verified extraction
|
||||
'note': 'Tags containing spaces',
|
||||
'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
|
||||
'playlist_count': 3,
|
||||
@ -2035,6 +2054,24 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
|
||||
'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
|
||||
'mark fischbach'],
|
||||
},
|
||||
}, {
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/12933
|
||||
'note': 'streams tab, some scheduled streams. Empty intermediate response with only continuation - must follow',
|
||||
'url': 'https://www.youtube.com/@sbcitygov/streams',
|
||||
'playlist_mincount': 150,
|
||||
'info_dict': {
|
||||
'id': 'UCH6-qfQwlUgz9SAf05jvc_w',
|
||||
'channel': 'sbcitygov',
|
||||
'channel_id': 'UCH6-qfQwlUgz9SAf05jvc_w',
|
||||
'title': 'sbcitygov - Live',
|
||||
'channel_follower_count': int,
|
||||
'description': 'md5:ca1a92059835c071e33b3db52f4a6d67',
|
||||
'uploader_id': '@sbcitygov',
|
||||
'uploader_url': 'https://www.youtube.com/@sbcitygov',
|
||||
'uploader': 'sbcitygov',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCH6-qfQwlUgz9SAf05jvc_w',
|
||||
'tags': [],
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
@ -23,6 +23,8 @@
|
||||
_split_innertube_client,
|
||||
short_client_name,
|
||||
)
|
||||
from .pot._director import initialize_pot_director
|
||||
from .pot.provider import PoTokenContext, PoTokenRequest
|
||||
from ..openload import PhantomJSwrapper
|
||||
from ...jsinterp import JSInterpreter
|
||||
from ...networking.exceptions import HTTPError
|
||||
@ -66,9 +68,13 @@
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
from ...utils.networking import clean_headers, clean_proxies, select_proxy
|
||||
|
||||
STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
|
||||
STREAMING_DATA_INITIAL_PO_TOKEN = '__yt_dlp_po_token'
|
||||
STREAMING_DATA_FETCH_SUBS_PO_TOKEN = '__yt_dlp_fetch_subs_po_token'
|
||||
STREAMING_DATA_INNERTUBE_CONTEXT = '__yt_dlp_innertube_context'
|
||||
|
||||
PO_TOKEN_GUIDE_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/PO-Token-Guide'
|
||||
|
||||
|
||||
@ -376,6 +382,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Afrojack',
|
||||
'uploader_url': 'https://www.youtube.com/@Afrojack',
|
||||
'uploader_id': '@Afrojack',
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'youtube_include_dash_manifest': True,
|
||||
@ -413,10 +420,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1401991663,
|
||||
'media_type': 'video',
|
||||
},
|
||||
},
|
||||
{
|
||||
'note': 'Age-gate video with embed allowed in public site',
|
||||
'note': 'Formerly an age-gate video with embed allowed in public site',
|
||||
'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
|
||||
'info_dict': {
|
||||
'id': 'HsUATh_Nc2U',
|
||||
@ -424,8 +432,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'title': 'Godzilla 2 (Official Video)',
|
||||
'description': 'md5:bf77e03fcae5529475e500129b05668a',
|
||||
'upload_date': '20200408',
|
||||
'age_limit': 18,
|
||||
'availability': 'needs_auth',
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
|
||||
'channel': 'FlyingKitty',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
|
||||
@ -443,8 +451,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@FlyingKitty900',
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'skip': 'Age-restricted; requires authentication',
|
||||
},
|
||||
{
|
||||
'note': 'Age-gate video embedable only with clientScreen=EMBED',
|
||||
@ -507,6 +515,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Herr Lurik',
|
||||
'uploader_url': 'https://www.youtube.com/@HerrLurik',
|
||||
'uploader_id': '@HerrLurik',
|
||||
'media_type': 'video',
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -546,6 +555,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'deadmau5',
|
||||
'uploader_url': 'https://www.youtube.com/@deadmau5',
|
||||
'uploader_id': '@deadmau5',
|
||||
'media_type': 'video',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'DASH manifest missing',
|
||||
@ -581,6 +591,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@Olympics',
|
||||
'channel_is_verified': True,
|
||||
'timestamp': 1440707674,
|
||||
'media_type': 'livestream',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'requires avconv',
|
||||
@ -615,6 +626,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/@AllenMeow',
|
||||
'uploader_id': '@AllenMeow',
|
||||
'timestamp': 1299776999,
|
||||
'media_type': 'video',
|
||||
},
|
||||
},
|
||||
# url_encoded_fmt_stream_map is empty string
|
||||
@ -809,6 +821,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
'channel_follower_count': int,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -868,6 +881,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@BKCHarvard',
|
||||
'uploader_url': 'https://www.youtube.com/@BKCHarvard',
|
||||
'timestamp': 1422422076,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -904,6 +918,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1447987198,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -968,6 +983,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'timestamp': 1484761047,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -1070,6 +1086,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'tags': 'count:11',
|
||||
'live_status': 'not_live',
|
||||
'channel_follower_count': int,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -1124,6 +1141,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
|
||||
'uploader_id': '@ElevageOrVert',
|
||||
'timestamp': 1497343210,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -1163,6 +1181,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1377976349,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -1207,6 +1226,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_follower_count': int,
|
||||
'uploader': 'The Cinematic Orchestra',
|
||||
'comment_count': int,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -1275,6 +1295,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
|
||||
'uploader_id': '@walkaroundjapan7124',
|
||||
'timestamp': 1605884416,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -1371,6 +1392,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1395685455,
|
||||
'media_type': 'video',
|
||||
}, 'params': {'format': 'mhtml', 'skip_download': True},
|
||||
}, {
|
||||
# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
|
||||
@ -1401,6 +1423,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@LeonNguyen',
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1641170939,
|
||||
'media_type': 'video',
|
||||
},
|
||||
}, {
|
||||
# date text is premiered video, ensure upload date in UTC (published 1641172509)
|
||||
@ -1434,6 +1457,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1641172509,
|
||||
'media_type': 'video',
|
||||
},
|
||||
},
|
||||
{ # continuous livestream.
|
||||
@ -1495,6 +1519,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Lesmiscore',
|
||||
'uploader_url': 'https://www.youtube.com/@lesmiscore',
|
||||
'timestamp': 1648005313,
|
||||
'media_type': 'short',
|
||||
},
|
||||
}, {
|
||||
# Prefer primary title+description language metadata by default
|
||||
@ -1523,6 +1548,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@coletdjnz',
|
||||
'uploader': 'cole-dlp-test-acc',
|
||||
'timestamp': 1662677394,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
@ -1551,6 +1577,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'cole-dlp-test-acc',
|
||||
'timestamp': 1659073275,
|
||||
'like_count': int,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
|
||||
'expected_warnings': [r'Preferring "fr" translated fields'],
|
||||
@ -1587,6 +1614,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
|
||||
}, {
|
||||
@ -1687,6 +1715,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
|
||||
@ -1719,6 +1748,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_follower_count': int,
|
||||
'categories': ['People & Blogs'],
|
||||
'tags': [],
|
||||
'media_type': 'short',
|
||||
},
|
||||
},
|
||||
]
|
||||
@ -1754,6 +1784,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@ChristopherSykesDocumentaries',
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1211825920,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -1784,6 +1815,11 @@ def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self._code_cache = {}
|
||||
self._player_cache = {}
|
||||
self._pot_director = None
|
||||
|
||||
def _real_initialize(self):
|
||||
super()._real_initialize()
|
||||
self._pot_director = initialize_pot_director(self)
|
||||
|
||||
def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
|
||||
lock = threading.Lock()
|
||||
@ -1819,6 +1855,12 @@ def mpd_feed(format_id, delay):
|
||||
else:
|
||||
retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
|
||||
continue
|
||||
|
||||
# Formats from ended premieres will be missing a manifest_url
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/8543
|
||||
if not f.get('manifest_url'):
|
||||
break
|
||||
|
||||
return f['manifest_url'], f['manifest_stream_number'], is_live
|
||||
return None
|
||||
|
||||
@ -1982,7 +2024,9 @@ def _download_player_url(self, video_id, fatal=False):
|
||||
def _player_js_cache_key(self, player_url):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
player_path = remove_start(urllib.parse.urlparse(player_url).path, f'/s/player/{player_id}/')
|
||||
variant = self._INVERSE_PLAYER_JS_VARIANT_MAP.get(player_path)
|
||||
variant = self._INVERSE_PLAYER_JS_VARIANT_MAP.get(player_path) or next((
|
||||
v for k, v in self._INVERSE_PLAYER_JS_VARIANT_MAP.items()
|
||||
if re.fullmatch(re.escape(k).replace('en_US', r'[a-zA-Z0-9_]+'), player_path)), None)
|
||||
if not variant:
|
||||
self.write_debug(
|
||||
f'Unable to determine player JS variant\n'
|
||||
@ -2120,23 +2164,23 @@ def inner(*args, **kwargs):
|
||||
return ret
|
||||
return inner
|
||||
|
||||
def _load_nsig_code_from_cache(self, player_url):
|
||||
cache_id = ('youtube-nsig', self._player_js_cache_key(player_url))
|
||||
def _load_player_data_from_cache(self, name, player_url):
|
||||
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
|
||||
|
||||
if func_code := self._player_cache.get(cache_id):
|
||||
return func_code
|
||||
if data := self._player_cache.get(cache_id):
|
||||
return data
|
||||
|
||||
func_code = self.cache.load(*cache_id, min_ver='2025.03.31')
|
||||
if func_code:
|
||||
self._player_cache[cache_id] = func_code
|
||||
data = self.cache.load(*cache_id, min_ver='2025.03.31')
|
||||
if data:
|
||||
self._player_cache[cache_id] = data
|
||||
|
||||
return func_code
|
||||
return data
|
||||
|
||||
def _store_nsig_code_to_cache(self, player_url, func_code):
|
||||
cache_id = ('youtube-nsig', self._player_js_cache_key(player_url))
|
||||
def _store_player_data_to_cache(self, name, player_url, data):
|
||||
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
|
||||
if cache_id not in self._player_cache:
|
||||
self.cache.store(*cache_id, func_code)
|
||||
self._player_cache[cache_id] = func_code
|
||||
self.cache.store(*cache_id, data)
|
||||
self._player_cache[cache_id] = data
|
||||
|
||||
def _decrypt_signature(self, s, video_id, player_url):
|
||||
"""Turn the encrypted s field into a working signature"""
|
||||
@ -2179,7 +2223,7 @@ def _decrypt_nsig(self, s, video_id, player_url):
|
||||
|
||||
self.write_debug(f'Decrypted nsig {s} => {ret}')
|
||||
# Only cache nsig func JS code to disk if successful, and only once
|
||||
self._store_nsig_code_to_cache(player_url, func_code)
|
||||
self._store_player_data_to_cache('nsig', player_url, func_code)
|
||||
return ret
|
||||
|
||||
def _extract_n_function_name(self, jscode, player_url=None):
|
||||
@ -2298,7 +2342,7 @@ def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
|
||||
|
||||
def _extract_n_function_code(self, video_id, player_url):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
func_code = self._load_nsig_code_from_cache(player_url)
|
||||
func_code = self._load_player_data_from_cache('nsig', player_url)
|
||||
jscode = func_code or self._load_player(video_id, player_url)
|
||||
jsi = JSInterpreter(jscode)
|
||||
|
||||
@ -2334,23 +2378,27 @@ def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=F
|
||||
Extract signatureTimestamp (sts)
|
||||
Required to tell API what sig/player version is in use.
|
||||
"""
|
||||
sts = None
|
||||
if isinstance(ytcfg, dict):
|
||||
sts = int_or_none(ytcfg.get('STS'))
|
||||
if sts := traverse_obj(ytcfg, ('STS', {int_or_none})):
|
||||
return sts
|
||||
|
||||
if not player_url:
|
||||
error_msg = 'Cannot extract signature timestamp without player url'
|
||||
if fatal:
|
||||
raise ExtractorError(error_msg)
|
||||
self.report_warning(error_msg)
|
||||
return None
|
||||
|
||||
sts = self._load_player_data_from_cache('sts', player_url)
|
||||
if sts:
|
||||
return sts
|
||||
|
||||
if code := self._load_player(video_id, player_url, fatal=fatal):
|
||||
sts = int_or_none(self._search_regex(
|
||||
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
|
||||
'JS player signature timestamp', group='sts', fatal=fatal))
|
||||
if sts:
|
||||
self._store_player_data_to_cache('sts', player_url, sts)
|
||||
|
||||
if not sts:
|
||||
# Attempt to extract from player
|
||||
if player_url is None:
|
||||
error_msg = 'Cannot extract signature timestamp without player_url.'
|
||||
if fatal:
|
||||
raise ExtractorError(error_msg)
|
||||
self.report_warning(error_msg)
|
||||
return
|
||||
code = self._load_player(video_id, player_url, fatal=fatal)
|
||||
if code:
|
||||
sts = int_or_none(self._search_regex(
|
||||
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
|
||||
'JS player signature timestamp', group='sts', fatal=fatal))
|
||||
return sts
|
||||
|
||||
def _mark_watched(self, video_id, player_responses):
|
||||
@ -2818,7 +2866,8 @@ def _get_config_po_token(self, client: str, context: _PoTokenContext):
|
||||
continue
|
||||
|
||||
def fetch_po_token(self, client='web', context=_PoTokenContext.GVS, ytcfg=None, visitor_data=None,
|
||||
data_sync_id=None, session_index=None, player_url=None, video_id=None, **kwargs):
|
||||
data_sync_id=None, session_index=None, player_url=None, video_id=None, webpage=None,
|
||||
required=False, **kwargs):
|
||||
"""
|
||||
Fetch a PO Token for a given client and context. This function will validate required parameters for a given context and client.
|
||||
|
||||
@ -2832,10 +2881,15 @@ def fetch_po_token(self, client='web', context=_PoTokenContext.GVS, ytcfg=None,
|
||||
@param session_index: session index.
|
||||
@param player_url: player URL.
|
||||
@param video_id: video ID.
|
||||
@param webpage: video webpage.
|
||||
@param required: Whether the PO Token is required (i.e. try to fetch unless policy is "never").
|
||||
@param kwargs: Additional arguments to pass down. May be more added in the future.
|
||||
@return: The fetched PO Token. None if it could not be fetched.
|
||||
"""
|
||||
|
||||
# TODO(future): This validation should be moved into pot framework.
|
||||
# Some sort of middleware or validation provider perhaps?
|
||||
|
||||
# GVS WebPO Token is bound to visitor_data / Visitor ID when logged out.
|
||||
# Must have visitor_data for it to function.
|
||||
if player_url and context == _PoTokenContext.GVS and not visitor_data and not self.is_authenticated:
|
||||
@ -2857,6 +2911,7 @@ def fetch_po_token(self, client='web', context=_PoTokenContext.GVS, ytcfg=None,
|
||||
f'Got a GVS PO Token for {client} client, but missing Data Sync ID for account. Formats may not work.'
|
||||
f'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
|
||||
|
||||
self.write_debug(f'{video_id}: Retrieved a {context.value} PO Token for {client} client from config')
|
||||
return config_po_token
|
||||
|
||||
# Require GVS WebPO Token if logged in for external fetching
|
||||
@ -2866,7 +2921,7 @@ def fetch_po_token(self, client='web', context=_PoTokenContext.GVS, ytcfg=None,
|
||||
f'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
|
||||
return
|
||||
|
||||
return self._fetch_po_token(
|
||||
po_token = self._fetch_po_token(
|
||||
client=client,
|
||||
context=context.value,
|
||||
ytcfg=ytcfg,
|
||||
@ -2875,11 +2930,68 @@ def fetch_po_token(self, client='web', context=_PoTokenContext.GVS, ytcfg=None,
|
||||
session_index=session_index,
|
||||
player_url=player_url,
|
||||
video_id=video_id,
|
||||
video_webpage=webpage,
|
||||
required=required,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
if po_token:
|
||||
self.write_debug(f'{video_id}: Retrieved a {context.value} PO Token for {client} client')
|
||||
return po_token
|
||||
|
||||
def _fetch_po_token(self, client, **kwargs):
|
||||
"""(Unstable) External PO Token fetch stub"""
|
||||
context = kwargs.get('context')
|
||||
|
||||
# Avoid fetching PO Tokens when not required
|
||||
fetch_pot_policy = self._configuration_arg('fetch_pot', [''], ie_key=YoutubeIE)[0]
|
||||
if fetch_pot_policy not in ('never', 'auto', 'always'):
|
||||
fetch_pot_policy = 'auto'
|
||||
if (
|
||||
fetch_pot_policy == 'never'
|
||||
or (
|
||||
fetch_pot_policy == 'auto'
|
||||
and _PoTokenContext(context) not in self._get_default_ytcfg(client)['PO_TOKEN_REQUIRED_CONTEXTS']
|
||||
and not kwargs.get('required', False)
|
||||
)
|
||||
):
|
||||
return None
|
||||
|
||||
headers = self.get_param('http_headers').copy()
|
||||
proxies = self._downloader.proxies.copy()
|
||||
clean_headers(headers)
|
||||
clean_proxies(proxies, headers)
|
||||
|
||||
innertube_host = self._select_api_hostname(None, default_client=client)
|
||||
|
||||
pot_request = PoTokenRequest(
|
||||
context=PoTokenContext(context),
|
||||
innertube_context=traverse_obj(kwargs, ('ytcfg', 'INNERTUBE_CONTEXT')),
|
||||
innertube_host=innertube_host,
|
||||
internal_client_name=client,
|
||||
session_index=kwargs.get('session_index'),
|
||||
player_url=kwargs.get('player_url'),
|
||||
video_webpage=kwargs.get('video_webpage'),
|
||||
is_authenticated=self.is_authenticated,
|
||||
visitor_data=kwargs.get('visitor_data'),
|
||||
data_sync_id=kwargs.get('data_sync_id'),
|
||||
video_id=kwargs.get('video_id'),
|
||||
request_cookiejar=self._downloader.cookiejar,
|
||||
|
||||
# All requests that would need to be proxied should be in the
|
||||
# context of www.youtube.com or the innertube host
|
||||
request_proxy=(
|
||||
select_proxy('https://www.youtube.com', proxies)
|
||||
or select_proxy(f'https://{innertube_host}', proxies)
|
||||
),
|
||||
request_headers=headers,
|
||||
request_timeout=self.get_param('socket_timeout'),
|
||||
request_verify_tls=not self.get_param('nocheckcertificate'),
|
||||
request_source_address=self.get_param('source_address'),
|
||||
|
||||
bypass_cache=False,
|
||||
)
|
||||
|
||||
return self._pot_director.get_po_token(pot_request)
|
||||
|
||||
@staticmethod
|
||||
def _is_agegated(player_response):
|
||||
@ -3028,6 +3140,8 @@ def append_client(*client_names):
|
||||
player_url = self._download_player_url(video_id)
|
||||
tried_iframe_fallback = True
|
||||
|
||||
pr = initial_pr if client == 'web' else None
|
||||
|
||||
visitor_data = visitor_data or self._extract_visitor_data(master_ytcfg, initial_pr, player_ytcfg)
|
||||
data_sync_id = data_sync_id or self._extract_data_sync_id(master_ytcfg, initial_pr, player_ytcfg)
|
||||
|
||||
@ -3037,16 +3151,24 @@ def append_client(*client_names):
|
||||
'video_id': video_id,
|
||||
'data_sync_id': data_sync_id if self.is_authenticated else None,
|
||||
'player_url': player_url if require_js_player else None,
|
||||
'webpage': webpage,
|
||||
'session_index': self._extract_session_index(master_ytcfg, player_ytcfg),
|
||||
'ytcfg': player_ytcfg,
|
||||
'ytcfg': player_ytcfg or self._get_default_ytcfg(client),
|
||||
}
|
||||
|
||||
player_po_token = self.fetch_po_token(
|
||||
# Don't need a player PO token for WEB if using player response from webpage
|
||||
player_po_token = None if pr else self.fetch_po_token(
|
||||
context=_PoTokenContext.PLAYER, **fetch_po_token_args)
|
||||
|
||||
gvs_po_token = self.fetch_po_token(
|
||||
context=_PoTokenContext.GVS, **fetch_po_token_args)
|
||||
|
||||
fetch_subs_po_token_func = functools.partial(
|
||||
self.fetch_po_token,
|
||||
context=_PoTokenContext.SUBS,
|
||||
**fetch_po_token_args,
|
||||
)
|
||||
|
||||
required_pot_contexts = self._get_default_ytcfg(client)['PO_TOKEN_REQUIRED_CONTEXTS']
|
||||
|
||||
if (
|
||||
@ -3073,7 +3195,6 @@ def append_client(*client_names):
|
||||
only_once=True)
|
||||
deprioritize_pr = True
|
||||
|
||||
pr = initial_pr if client == 'web' else None
|
||||
try:
|
||||
pr = pr or self._extract_player_response(
|
||||
client, video_id,
|
||||
@ -3091,10 +3212,13 @@ def append_client(*client_names):
|
||||
if pr_id := self._invalid_player_response(pr, video_id):
|
||||
skipped_clients[client] = pr_id
|
||||
elif pr:
|
||||
# Save client name for introspection later
|
||||
sd = traverse_obj(pr, ('streamingData', {dict})) or {}
|
||||
# Save client details for introspection later
|
||||
innertube_context = traverse_obj(player_ytcfg or self._get_default_ytcfg(client), 'INNERTUBE_CONTEXT')
|
||||
sd = pr.setdefault('streamingData', {})
|
||||
sd[STREAMING_DATA_CLIENT_NAME] = client
|
||||
sd[STREAMING_DATA_INITIAL_PO_TOKEN] = gvs_po_token
|
||||
sd[STREAMING_DATA_INNERTUBE_CONTEXT] = innertube_context
|
||||
sd[STREAMING_DATA_FETCH_SUBS_PO_TOKEN] = fetch_subs_po_token_func
|
||||
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
|
||||
f[STREAMING_DATA_CLIENT_NAME] = client
|
||||
f[STREAMING_DATA_INITIAL_PO_TOKEN] = gvs_po_token
|
||||
@ -3103,9 +3227,19 @@ def append_client(*client_names):
|
||||
else:
|
||||
prs.append(pr)
|
||||
|
||||
# web_embedded can work around age-gate and age-verification for some embeddable videos
|
||||
if self._is_agegated(pr) and variant != 'web_embedded':
|
||||
append_client(f'web_embedded.{base_client}')
|
||||
# Unauthenticated users will only get web_embedded client formats if age-gated
|
||||
if self._is_agegated(pr) and not self.is_authenticated:
|
||||
self.to_screen(
|
||||
f'{video_id}: This video is age-restricted; some formats may be missing '
|
||||
f'without authentication. {self._youtube_login_hint}', only_once=True)
|
||||
|
||||
# EU countries require age-verification for accounts to access age-restricted videos
|
||||
# If account is not age-verified, _is_agegated() will be truthy for non-embedded clients
|
||||
if self.is_authenticated and self._is_agegated(pr):
|
||||
embedding_is_disabled = variant == 'web_embedded' and self._is_unplayable(pr)
|
||||
if self.is_authenticated and (self._is_agegated(pr) or embedding_is_disabled):
|
||||
self.to_screen(
|
||||
f'{video_id}: This video is age-restricted and YouTube is requiring '
|
||||
'account age-verification; some formats may be missing', only_once=True)
|
||||
@ -3146,6 +3280,25 @@ def _report_pot_format_skipped(self, video_id, client_name, proto):
|
||||
else:
|
||||
self.report_warning(msg, only_once=True)
|
||||
|
||||
def _report_pot_subtitles_skipped(self, video_id, client_name, msg=None):
|
||||
msg = msg or (
|
||||
f'{video_id}: Some {client_name} client subtitles require a PO Token which was not provided. '
|
||||
'They will be discarded since they are not downloadable as-is. '
|
||||
f'You can manually pass a Subtitles PO Token for this client with '
|
||||
f'--extractor-args "youtube:po_token={client_name}.subs+XXX" . '
|
||||
f'For more information, refer to {PO_TOKEN_GUIDE_URL}')
|
||||
|
||||
subs_wanted = any((
|
||||
self.get_param('writesubtitles'),
|
||||
self.get_param('writeautomaticsub'),
|
||||
self.get_param('listsubtitles')))
|
||||
|
||||
# Only raise a warning for non-default clients, to not confuse users.
|
||||
if not subs_wanted or client_name in (*self._DEFAULT_CLIENTS, *self._DEFAULT_AUTHED_CLIENTS):
|
||||
self.write_debug(msg, only_once=True)
|
||||
else:
|
||||
self.report_warning(msg, only_once=True)
|
||||
|
||||
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
|
||||
CHUNK_SIZE = 10 << 20
|
||||
PREFERRED_LANG_VALUE = 10
|
||||
@ -3232,12 +3385,16 @@ def build_fragments(f):
|
||||
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
|
||||
encrypted_sig = try_get(sc, lambda x: x['s'][0])
|
||||
if not all((sc, fmt_url, player_url, encrypted_sig)):
|
||||
self.report_warning(
|
||||
f'Some {client_name} client https formats have been skipped as they are missing a url. '
|
||||
f'{"Your account" if self.is_authenticated else "The current session"} may have '
|
||||
f'the SSAP (server-side ads) experiment which interferes with yt-dlp. '
|
||||
f'Please see https://github.com/yt-dlp/yt-dlp/issues/12482 for more details.',
|
||||
video_id, only_once=True)
|
||||
msg = f'Some {client_name} client https formats have been skipped as they are missing a url. '
|
||||
if client_name == 'web':
|
||||
msg += 'YouTube is forcing SABR streaming for this client. '
|
||||
else:
|
||||
msg += (
|
||||
f'YouTube may have enabled the SABR-only or Server-Side Ad Placement experiment for '
|
||||
f'{"your account" if self.is_authenticated else "the current session"}. '
|
||||
)
|
||||
msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details'
|
||||
self.report_warning(msg, video_id, only_once=True)
|
||||
continue
|
||||
try:
|
||||
fmt_url += '&{}={}'.format(
|
||||
@ -3324,8 +3481,8 @@ def build_fragments(f):
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
|
||||
'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1,
|
||||
# Strictly de-prioritize broken, damaged and 3gp formats
|
||||
'preference': -20 if require_po_token else -10 if is_damaged else -2 if itag == '17' else None,
|
||||
# Strictly de-prioritize damaged and 3gp formats
|
||||
'preference': -10 if is_damaged else -2 if itag == '17' else None,
|
||||
}
|
||||
mime_mobj = re.match(
|
||||
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
|
||||
@ -3433,6 +3590,9 @@ def process_manifest_format(f, proto, client_name, itag, po_token):
|
||||
hls_manifest_url = hls_manifest_url.rstrip('/') + f'/pot/{po_token}'
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
|
||||
for sub in traverse_obj(subs, (..., ..., {dict})):
|
||||
# HLS subs (m3u8) do not need a PO token; save client name for debugging
|
||||
sub[STREAMING_DATA_CLIENT_NAME] = client_name
|
||||
subtitles = self._merge_subtitles(subs, subtitles)
|
||||
for f in fmts:
|
||||
if process_manifest_format(f, 'hls', client_name, self._search_regex(
|
||||
@ -3444,6 +3604,9 @@ def process_manifest_format(f, proto, client_name, itag, po_token):
|
||||
if po_token:
|
||||
dash_manifest_url = dash_manifest_url.rstrip('/') + f'/pot/{po_token}'
|
||||
formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
|
||||
for sub in traverse_obj(subs, (..., ..., {dict})):
|
||||
# TODO: Investigate if DASH subs ever need a PO token; save client name for debugging
|
||||
sub[STREAMING_DATA_CLIENT_NAME] = client_name
|
||||
subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
|
||||
for f in formats:
|
||||
if process_manifest_format(f, 'dash', client_name, f['format_id'], po_token):
|
||||
@ -3635,7 +3798,7 @@ def feed_entry(name):
|
||||
reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
|
||||
subreason = clean_html(self._get_text(pemr, 'subreason') or '')
|
||||
if subreason:
|
||||
if subreason == 'The uploader has not made this video available in your country.':
|
||||
if subreason.startswith('The uploader has not made this video available in your country'):
|
||||
countries = get_first(microformats, 'availableCountries')
|
||||
if not countries:
|
||||
regions_allowed = search_meta('regionsAllowed')
|
||||
@ -3646,6 +3809,15 @@ def feed_entry(name):
|
||||
if 'sign in' in reason.lower():
|
||||
reason = remove_end(reason, 'This helps protect our community. Learn more')
|
||||
reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}'
|
||||
elif get_first(playability_statuses, ('errorScreen', 'playerCaptchaViewModel', {dict})):
|
||||
reason += '. YouTube is requiring a captcha challenge before playback'
|
||||
elif "This content isn't available, try again later" in reason:
|
||||
reason = (
|
||||
f'{remove_end(reason.strip(), ".")}. {"Your account" if self.is_authenticated else "The current session"} '
|
||||
f'has been rate-limited by YouTube for up to an hour. It is recommended to use `-t sleep` to add a delay '
|
||||
f'between video requests to avoid exceeding the rate limit. For more information, refer to '
|
||||
f'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#this-content-isnt-available-try-again-later'
|
||||
)
|
||||
self.raise_no_formats(reason, expected=True)
|
||||
|
||||
keywords = get_first(video_details, 'keywords', expected_type=list) or []
|
||||
@ -3752,53 +3924,94 @@ def is_bad_format(fmt):
|
||||
'tags': keywords,
|
||||
'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
|
||||
'live_status': live_status,
|
||||
'media_type': 'livestream' if get_first(video_details, 'isLiveContent') else None,
|
||||
'media_type': (
|
||||
'livestream' if get_first(video_details, 'isLiveContent')
|
||||
else 'short' if get_first(microformats, 'isShortsEligible')
|
||||
else 'video'),
|
||||
'release_timestamp': live_start_time,
|
||||
'_format_sort_fields': ( # source_preference is lower for potentially damaged formats
|
||||
'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec', 'channels', 'acodec', 'lang', 'proto'),
|
||||
}
|
||||
|
||||
def get_lang_code(track):
|
||||
return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
|
||||
or track.get('languageCode'))
|
||||
|
||||
def process_language(container, base_url, lang_code, sub_name, client_name, query):
|
||||
lang_subs = container.setdefault(lang_code, [])
|
||||
for fmt in self._SUBTITLE_FORMATS:
|
||||
query = {**query, 'fmt': fmt}
|
||||
lang_subs.append({
|
||||
'ext': fmt,
|
||||
'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
|
||||
'name': sub_name,
|
||||
STREAMING_DATA_CLIENT_NAME: client_name,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
|
||||
if pctr:
|
||||
def get_lang_code(track):
|
||||
return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
|
||||
or track.get('languageCode'))
|
||||
skipped_subs_clients = set()
|
||||
|
||||
# Converted into dicts to remove duplicates
|
||||
captions = {
|
||||
get_lang_code(sub): sub
|
||||
for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}
|
||||
translation_languages = {
|
||||
lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
|
||||
for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}
|
||||
# Only web/mweb clients provide translationLanguages, so include initial_pr in the traversal
|
||||
translation_languages = {
|
||||
lang['languageCode']: self._get_text(lang['languageName'], max_runs=1)
|
||||
for lang in traverse_obj(player_responses, (
|
||||
..., 'captions', 'playerCaptionsTracklistRenderer', 'translationLanguages',
|
||||
lambda _, v: v['languageCode'] and v['languageName']))
|
||||
}
|
||||
# NB: Constructing the full subtitle dictionary is slow
|
||||
get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
|
||||
self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
|
||||
|
||||
def process_language(container, base_url, lang_code, sub_name, query):
|
||||
lang_subs = container.setdefault(lang_code, [])
|
||||
for fmt in self._SUBTITLE_FORMATS:
|
||||
query.update({
|
||||
'fmt': fmt,
|
||||
})
|
||||
lang_subs.append({
|
||||
'ext': fmt,
|
||||
'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
|
||||
'name': sub_name,
|
||||
})
|
||||
# Filter out initial_pr which does not have streamingData (smuggled client context)
|
||||
prs = traverse_obj(player_responses, (
|
||||
lambda _, v: v['streamingData'] and v['captions']['playerCaptionsTracklistRenderer']))
|
||||
all_captions = traverse_obj(prs, (
|
||||
..., 'captions', 'playerCaptionsTracklistRenderer', 'captionTracks', ..., {dict}))
|
||||
need_subs_langs = {get_lang_code(sub) for sub in all_captions if sub.get('kind') != 'asr'}
|
||||
need_caps_langs = {
|
||||
remove_start(get_lang_code(sub), 'a-')
|
||||
for sub in all_captions if sub.get('kind') == 'asr'}
|
||||
|
||||
# NB: Constructing the full subtitle dictionary is slow
|
||||
get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
|
||||
self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
|
||||
for lang_code, caption_track in captions.items():
|
||||
base_url = caption_track.get('baseUrl')
|
||||
orig_lang = parse_qs(base_url).get('lang', [None])[-1]
|
||||
if not base_url:
|
||||
continue
|
||||
for pr in prs:
|
||||
pctr = pr['captions']['playerCaptionsTracklistRenderer']
|
||||
client_name = pr['streamingData'][STREAMING_DATA_CLIENT_NAME]
|
||||
innertube_client_name = pr['streamingData'][STREAMING_DATA_INNERTUBE_CONTEXT]['client']['clientName']
|
||||
required_contexts = self._get_default_ytcfg(client_name)['PO_TOKEN_REQUIRED_CONTEXTS']
|
||||
fetch_subs_po_token_func = pr['streamingData'][STREAMING_DATA_FETCH_SUBS_PO_TOKEN]
|
||||
|
||||
pot_params = {}
|
||||
already_fetched_pot = False
|
||||
|
||||
for caption_track in traverse_obj(pctr, ('captionTracks', lambda _, v: v['baseUrl'])):
|
||||
base_url = caption_track['baseUrl']
|
||||
qs = parse_qs(base_url)
|
||||
lang_code = get_lang_code(caption_track)
|
||||
requires_pot = (
|
||||
# We can detect the experiment for now
|
||||
any(e in traverse_obj(qs, ('exp', ...)) for e in ('xpe', 'xpv'))
|
||||
or _PoTokenContext.SUBS in required_contexts)
|
||||
|
||||
if not already_fetched_pot:
|
||||
already_fetched_pot = True
|
||||
if subs_po_token := fetch_subs_po_token_func(required=requires_pot):
|
||||
pot_params.update({
|
||||
'pot': subs_po_token,
|
||||
'potc': '1',
|
||||
'c': innertube_client_name,
|
||||
})
|
||||
|
||||
if not pot_params and requires_pot:
|
||||
skipped_subs_clients.add(client_name)
|
||||
self._report_pot_subtitles_skipped(video_id, client_name)
|
||||
break
|
||||
|
||||
orig_lang = qs.get('lang', [None])[-1]
|
||||
lang_name = self._get_text(caption_track, 'name', max_runs=1)
|
||||
if caption_track.get('kind') != 'asr':
|
||||
if not lang_code:
|
||||
continue
|
||||
process_language(
|
||||
subtitles, base_url, lang_code, lang_name, {})
|
||||
subtitles, base_url, lang_code, lang_name, client_name, pot_params)
|
||||
if not caption_track.get('isTranslatable'):
|
||||
continue
|
||||
for trans_code, trans_name in translation_languages.items():
|
||||
@ -3818,10 +4031,25 @@ def process_language(container, base_url, lang_code, sub_name, query):
|
||||
# Add an "-orig" label to the original language so that it can be distinguished.
|
||||
# The subs are returned without "-orig" as well for compatibility
|
||||
process_language(
|
||||
automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
|
||||
automatic_captions, base_url, f'{trans_code}-orig',
|
||||
f'{trans_name} (Original)', client_name, pot_params)
|
||||
# Setting tlang=lang returns damaged subtitles.
|
||||
process_language(automatic_captions, base_url, trans_code, trans_name,
|
||||
{} if orig_lang == orig_trans_code else {'tlang': trans_code})
|
||||
process_language(
|
||||
automatic_captions, base_url, trans_code, trans_name, client_name,
|
||||
pot_params if orig_lang == orig_trans_code else {'tlang': trans_code, **pot_params})
|
||||
|
||||
# Avoid duplication if we've already got everything we need
|
||||
need_subs_langs.difference_update(subtitles)
|
||||
need_caps_langs.difference_update(automatic_captions)
|
||||
if not (need_subs_langs or need_caps_langs):
|
||||
break
|
||||
|
||||
if skipped_subs_clients and (need_subs_langs or need_caps_langs):
|
||||
self._report_pot_subtitles_skipped(video_id, True, msg=join_nonempty(
|
||||
f'{video_id}: There are missing subtitles languages because a PO token was not provided.',
|
||||
need_subs_langs and f'Subtitles for these languages are missing: {", ".join(need_subs_langs)}.',
|
||||
need_caps_langs and f'Automatic captions for {len(need_caps_langs)} languages are missing.',
|
||||
delim=' '))
|
||||
|
||||
info['automatic_captions'] = automatic_captions
|
||||
info['subtitles'] = subtitles
|
||||
@ -3874,7 +4102,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
|
||||
if not traverse_obj(initial_data, 'contents'):
|
||||
self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
|
||||
initial_data = None
|
||||
if not initial_data:
|
||||
if not initial_data and 'initial_data' not in self._configuration_arg('player_skip'):
|
||||
query = {'videoId': video_id}
|
||||
query.update(self._get_checkok_params())
|
||||
initial_data = self._extract_response(
|
||||
|
309
yt_dlp/extractor/youtube/pot/README.md
Normal file
309
yt_dlp/extractor/youtube/pot/README.md
Normal file
@ -0,0 +1,309 @@
|
||||
# YoutubeIE PO Token Provider Framework
|
||||
|
||||
As part of the YouTube extractor, we have a framework for providing PO Tokens programmatically. This can be used by plugins.
|
||||
|
||||
Refer to the [PO Token Guide](https://github.com/yt-dlp/yt-dlp/wiki/PO-Token-Guide) for more information on PO Tokens.
|
||||
|
||||
> [!TIP]
|
||||
> If publishing a PO Token Provider plugin to GitHub, add the [yt-dlp-pot-provider](https://github.com/topics/yt-dlp-pot-provider) topic to your repository to help users find it.
|
||||
|
||||
|
||||
## Public APIs
|
||||
|
||||
- `yt_dlp.extractor.youtube.pot.cache`
|
||||
- `yt_dlp.extractor.youtube.pot.provider`
|
||||
- `yt_dlp.extractor.youtube.pot.utils`
|
||||
|
||||
Everything else is internal-only and no guarantees are made about the API stability.
|
||||
|
||||
> [!WARNING]
|
||||
> We will try our best to maintain stability with the public APIs.
|
||||
> However, due to the nature of extractors and YouTube, we may need to remove or change APIs in the future.
|
||||
> If you are using these APIs outside yt-dlp plugins, please account for this by importing them safely.
|
||||
|
||||
## PO Token Provider
|
||||
|
||||
`yt_dlp.extractor.youtube.pot.provider`
|
||||
|
||||
```python
|
||||
from yt_dlp.extractor.youtube.pot.provider import (
|
||||
PoTokenRequest,
|
||||
PoTokenContext,
|
||||
PoTokenProvider,
|
||||
PoTokenResponse,
|
||||
PoTokenProviderError,
|
||||
PoTokenProviderRejectedRequest,
|
||||
register_provider,
|
||||
register_preference,
|
||||
ExternalRequestFeature,
|
||||
)
|
||||
from yt_dlp.networking.common import Request
|
||||
from yt_dlp.extractor.youtube.pot.utils import get_webpo_content_binding
|
||||
from yt_dlp.utils import traverse_obj
|
||||
from yt_dlp.networking.exceptions import RequestError
|
||||
import json
|
||||
|
||||
|
||||
@register_provider
|
||||
class MyPoTokenProviderPTP(PoTokenProvider): # Provider class name must end with "PTP"
|
||||
PROVIDER_VERSION = '0.2.1'
|
||||
# Define a unique display name for the provider
|
||||
PROVIDER_NAME = 'my-provider'
|
||||
BUG_REPORT_LOCATION = 'https://issues.example.com/report'
|
||||
|
||||
# -- Validation shortcuts. Set these to None to disable. --
|
||||
|
||||
# Innertube Client Name.
|
||||
# For example, "WEB", "ANDROID", "TVHTML5".
|
||||
# For a list of WebPO client names,
|
||||
# see yt_dlp.extractor.youtube.pot.utils.WEBPO_CLIENTS.
|
||||
# Also see yt_dlp.extractor.youtube._base.INNERTUBE_CLIENTS
|
||||
# for a list of client names currently supported by the YouTube extractor.
|
||||
_SUPPORTED_CLIENTS = ('WEB', 'TVHTML5')
|
||||
|
||||
_SUPPORTED_CONTEXTS = (
|
||||
PoTokenContext.GVS,
|
||||
)
|
||||
|
||||
# If your provider makes external requests to websites (i.e. to youtube.com)
|
||||
# using another library or service (i.e., not _request_webpage),
|
||||
# set the request features that are supported here.
|
||||
# If only using _request_webpage to make external requests, set this to None.
|
||||
_SUPPORTED_EXTERNAL_REQUEST_FEATURES = (
|
||||
ExternalRequestFeature.PROXY_SCHEME_HTTP,
|
||||
ExternalRequestFeature.SOURCE_ADDRESS,
|
||||
ExternalRequestFeature.DISABLE_TLS_VERIFICATION
|
||||
)
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""
|
||||
Check if the provider is available (e.g. all required dependencies are available)
|
||||
This is used to determine if the provider should be used and to provide debug information.
|
||||
|
||||
IMPORTANT: This method SHOULD NOT make any network requests or perform any expensive operations.
|
||||
|
||||
Since this is called multiple times, we recommend caching the result.
|
||||
"""
|
||||
return True
|
||||
|
||||
def close(self):
|
||||
# Optional close hook, called when YoutubeDL is closed.
|
||||
pass
|
||||
|
||||
def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
|
||||
# ℹ️ If you need to validate the request before making the request to the external source.
|
||||
# Raise yt_dlp.extractor.youtube.pot.provider.PoTokenProviderRejectedRequest if the request is not supported.
|
||||
if request.is_authenticated:
|
||||
raise PoTokenProviderRejectedRequest(
|
||||
'This provider does not support authenticated requests'
|
||||
)
|
||||
|
||||
# ℹ️ Settings are pulled from extractor args passed to yt-dlp with the key `youtubepot-<PROVIDER_KEY>`.
|
||||
# For this example, the extractor arg would be:
|
||||
# `--extractor-args "youtubepot-mypotokenprovider:url=https://custom.example.com/get_pot"`
|
||||
external_provider_url = self._configuration_arg(
|
||||
'url', default=['https://provider.example.com/get_pot'])[0]
|
||||
|
||||
# See below for logging guidelines
|
||||
self.logger.trace(f'Using external provider URL: {external_provider_url}')
|
||||
|
||||
# You should use the internal HTTP client to make requests where possible,
|
||||
# as it will handle cookies and other networking settings passed to yt-dlp.
|
||||
try:
|
||||
# See docstring in _request_webpage method for request tips
|
||||
response = self._request_webpage(
|
||||
Request(external_provider_url, data=json.dumps({
|
||||
'content_binding': get_webpo_content_binding(request),
|
||||
'proxy': request.request_proxy,
|
||||
'headers': request.request_headers,
|
||||
'source_address': request.request_source_address,
|
||||
'verify_tls': request.request_verify_tls,
|
||||
# Important: If your provider has its own caching, please respect `bypass_cache`.
|
||||
# This may be used in the future to request a fresh PO Token if required.
|
||||
'do_not_cache': request.bypass_cache,
|
||||
}).encode(), proxies={'all': None}),
|
||||
pot_request=request,
|
||||
note=(
|
||||
f'Requesting {request.context.value} PO Token '
|
||||
f'for {request.internal_client_name} client from external provider'),
|
||||
)
|
||||
|
||||
except RequestError as e:
|
||||
# ℹ️ If there is an error, raise PoTokenProviderError.
|
||||
# You can specify whether it is expected or not. If it is unexpected,
|
||||
# the log will include a link to the bug report location (BUG_REPORT_LOCATION).
|
||||
raise PoTokenProviderError(
|
||||
'Networking error while fetching to get PO Token from external provider',
|
||||
expected=True
|
||||
) from e
|
||||
|
||||
# Note: PO Token is expected to be base64url encoded
|
||||
po_token = traverse_obj(response, 'po_token')
|
||||
if not po_token:
|
||||
raise PoTokenProviderError(
|
||||
'Bad PO Token Response from external provider',
|
||||
expected=False
|
||||
)
|
||||
|
||||
return PoTokenResponse(
|
||||
po_token=po_token,
|
||||
# Optional, add a custom expiration timestamp for the token. Use for caching.
|
||||
# By default, yt-dlp will use the default ttl from a registered cache spec (see below)
|
||||
# Set to 0 or -1 to not cache this response.
|
||||
expires_at=None,
|
||||
)
|
||||
|
||||
|
||||
# If there are multiple PO Token Providers that can handle the same PoTokenRequest,
|
||||
# you can define a preference function to increase/decrease the priority of providers.
|
||||
|
||||
@register_preference(MyPoTokenProviderPTP)
|
||||
def my_provider_preference(provider: PoTokenProvider, request: PoTokenRequest) -> int:
|
||||
return 50
|
||||
```
|
||||
|
||||
## Logging Guidelines
|
||||
|
||||
- Use the `self.logger` object to log messages.
|
||||
- When making HTTP requests or any other expensive operation, use `self.logger.info` to log a message to standard non-verbose output.
|
||||
- This lets users know what is happening when a time-expensive operation is taking place.
|
||||
- It is recommended to include the PO Token context and internal client name in the message if possible.
|
||||
- For example, `self.logger.info(f'Requesting {request.context.value} PO Token for {request.internal_client_name} client from external provider')`.
|
||||
- Use `self.logger.debug` to log a message to the verbose output (`--verbose`).
|
||||
- For debugging information visible to users posting verbose logs.
|
||||
- Try to not log too much, prefer using trace logging for detailed debug messages.
|
||||
- Use `self.logger.trace` to log a message to the PO Token debug output (`--extractor-args "youtube:pot_trace=true"`).
|
||||
- Log as much as you like here as needed for debugging your provider.
|
||||
- Avoid logging PO Tokens or any sensitive information to debug or info output.
|
||||
|
||||
## Debugging
|
||||
|
||||
- Use `-v --extractor-args "youtube:pot_trace=true"` to enable PO Token debug output.
|
||||
|
||||
## Caching
|
||||
|
||||
> [!WARNING]
|
||||
> The following describes more advance features that most users/developers will not need to use.
|
||||
|
||||
> [!IMPORTANT]
|
||||
> yt-dlp currently has a built-in LRU Memory Cache Provider and a cache spec provider for WebPO Tokens.
|
||||
> You should only need to implement cache providers if you want an external cache, or a cache spec if you are handling non-WebPO Tokens.
|
||||
|
||||
### Cache Providers
|
||||
|
||||
`yt_dlp.extractor.youtube.pot.cache`
|
||||
|
||||
```python
|
||||
from yt_dlp.extractor.youtube.pot.cache import (
|
||||
PoTokenCacheProvider,
|
||||
register_preference,
|
||||
register_provider
|
||||
)
|
||||
|
||||
from yt_dlp.extractor.youtube.pot.provider import PoTokenRequest
|
||||
|
||||
|
||||
@register_provider
|
||||
class MyCacheProviderPCP(PoTokenCacheProvider): # Provider class name must end with "PCP"
|
||||
PROVIDER_VERSION = '0.1.0'
|
||||
# Define a unique display name for the provider
|
||||
PROVIDER_NAME = 'my-cache-provider'
|
||||
BUG_REPORT_LOCATION = 'https://issues.example.com/report'
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""
|
||||
Check if the provider is available (e.g. all required dependencies are available)
|
||||
This is used to determine if the provider should be used and to provide debug information.
|
||||
|
||||
IMPORTANT: This method SHOULD NOT make any network requests or perform any expensive operations.
|
||||
|
||||
Since this is called multiple times, we recommend caching the result.
|
||||
"""
|
||||
return True
|
||||
|
||||
def get(self, key: str):
|
||||
# ℹ️ Similar to PO Token Providers, Cache Providers and Cache Spec Providers
|
||||
# are passed down extractor args matching key youtubepot-<PROVIDER_KEY>.
|
||||
some_setting = self._configuration_arg('some_setting', default=['default_value'])[0]
|
||||
return self.my_cache.get(key)
|
||||
|
||||
def store(self, key: str, value: str, expires_at: int):
|
||||
# ⚠ expires_at MUST be respected.
|
||||
# Cache entries should not be returned if they have expired.
|
||||
self.my_cache.store(key, value, expires_at)
|
||||
|
||||
def delete(self, key: str):
|
||||
self.my_cache.delete(key)
|
||||
|
||||
def close(self):
|
||||
# Optional close hook, called when the YoutubeDL instance is closed.
|
||||
pass
|
||||
|
||||
# If there are multiple PO Token Cache Providers available, you can
|
||||
# define a preference function to increase/decrease the priority of providers.
|
||||
|
||||
# IMPORTANT: Providers should be in preference of cache lookup time.
|
||||
# For example, a memory cache should have a higher preference than a disk cache.
|
||||
|
||||
# VERY IMPORTANT: yt-dlp has a built-in memory cache with a priority of 10000.
|
||||
# Your cache provider should be lower than this.
|
||||
|
||||
|
||||
@register_preference(MyCacheProviderPCP)
|
||||
def my_cache_preference(provider: PoTokenCacheProvider, request: PoTokenRequest) -> int:
|
||||
return 50
|
||||
```
|
||||
|
||||
### Cache Specs
|
||||
|
||||
`yt_dlp.extractor.youtube.pot.cache`
|
||||
|
||||
These are used to provide information on how to cache a particular PO Token Request.
|
||||
You might have a different cache spec for different kinds of PO Tokens.
|
||||
|
||||
```python
|
||||
from yt_dlp.extractor.youtube.pot.cache import (
|
||||
PoTokenCacheSpec,
|
||||
PoTokenCacheSpecProvider,
|
||||
CacheProviderWritePolicy,
|
||||
register_spec,
|
||||
)
|
||||
from yt_dlp.utils import traverse_obj
|
||||
from yt_dlp.extractor.youtube.pot.provider import PoTokenRequest
|
||||
|
||||
|
||||
@register_spec
|
||||
class MyCacheSpecProviderPCSP(PoTokenCacheSpecProvider): # Provider class name must end with "PCSP"
|
||||
PROVIDER_VERSION = '0.1.0'
|
||||
# Define a unique display name for the provider
|
||||
PROVIDER_NAME = 'mycachespec'
|
||||
BUG_REPORT_LOCATION = 'https://issues.example.com/report'
|
||||
|
||||
def generate_cache_spec(self, request: PoTokenRequest):
|
||||
|
||||
client_name = traverse_obj(request.innertube_context, ('client', 'clientName'))
|
||||
if client_name != 'ANDROID':
|
||||
# ℹ️ If the request is not supported by the cache spec, return None
|
||||
return None
|
||||
|
||||
# Generate a cache spec for the request
|
||||
return PoTokenCacheSpec(
|
||||
# Key bindings to uniquely identify the request. These are used to generate a cache key.
|
||||
key_bindings={
|
||||
'client_name': client_name,
|
||||
'content_binding': 'unique_content_binding',
|
||||
'ip': traverse_obj(request.innertube_context, ('client', 'remoteHost')),
|
||||
'source_address': request.request_source_address,
|
||||
'proxy': request.request_proxy,
|
||||
},
|
||||
# Default Cache TTL in seconds
|
||||
default_ttl=21600,
|
||||
|
||||
# Optional: Specify a write policy.
|
||||
# WRITE_FIRST will write to the highest priority provider only,
|
||||
# whereas WRITE_ALL will write to all providers.
|
||||
# WRITE_FIRST may be useful if the PO Token is short-lived
|
||||
# and there is no use writing to all providers.
|
||||
write_policy=CacheProviderWritePolicy.WRITE_ALL,
|
||||
)
|
||||
```
|
3
yt_dlp/extractor/youtube/pot/__init__.py
Normal file
3
yt_dlp/extractor/youtube/pot/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
# Trigger import of built-in providers
|
||||
from ._builtin.memory_cache import MemoryLRUPCP as _MemoryLRUPCP # noqa: F401
|
||||
from ._builtin.webpo_cachespec import WebPoPCSP as _WebPoPCSP # noqa: F401
|
0
yt_dlp/extractor/youtube/pot/_builtin/__init__.py
Normal file
0
yt_dlp/extractor/youtube/pot/_builtin/__init__.py
Normal file
78
yt_dlp/extractor/youtube/pot/_builtin/memory_cache.py
Normal file
78
yt_dlp/extractor/youtube/pot/_builtin/memory_cache.py
Normal file
@ -0,0 +1,78 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import typing
|
||||
from threading import Lock
|
||||
|
||||
from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
|
||||
from yt_dlp.extractor.youtube.pot._registry import _pot_memory_cache
|
||||
from yt_dlp.extractor.youtube.pot.cache import (
|
||||
PoTokenCacheProvider,
|
||||
register_preference,
|
||||
register_provider,
|
||||
)
|
||||
|
||||
|
||||
def initialize_global_cache(max_size: int):
|
||||
if _pot_memory_cache.value.get('cache') is None:
|
||||
_pot_memory_cache.value['cache'] = {}
|
||||
_pot_memory_cache.value['lock'] = Lock()
|
||||
_pot_memory_cache.value['max_size'] = max_size
|
||||
|
||||
if _pot_memory_cache.value['max_size'] != max_size:
|
||||
raise ValueError('Cannot change max_size of initialized global memory cache')
|
||||
|
||||
return (
|
||||
_pot_memory_cache.value['cache'],
|
||||
_pot_memory_cache.value['lock'],
|
||||
_pot_memory_cache.value['max_size'],
|
||||
)
|
||||
|
||||
|
||||
@register_provider
|
||||
class MemoryLRUPCP(PoTokenCacheProvider, BuiltinIEContentProvider):
|
||||
PROVIDER_NAME = 'memory'
|
||||
DEFAULT_CACHE_SIZE = 25
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args,
|
||||
initialize_cache: typing.Callable[[int], tuple[dict[str, tuple[str, int]], Lock, int]] = initialize_global_cache,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.cache, self.lock, self.max_size = initialize_cache(self.DEFAULT_CACHE_SIZE)
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
def get(self, key: str) -> str | None:
|
||||
with self.lock:
|
||||
if key not in self.cache:
|
||||
return None
|
||||
value, expires_at = self.cache.pop(key)
|
||||
if expires_at < int(dt.datetime.now(dt.timezone.utc).timestamp()):
|
||||
return None
|
||||
self.cache[key] = (value, expires_at)
|
||||
return value
|
||||
|
||||
def store(self, key: str, value: str, expires_at: int):
|
||||
with self.lock:
|
||||
if expires_at < int(dt.datetime.now(dt.timezone.utc).timestamp()):
|
||||
return
|
||||
if key in self.cache:
|
||||
self.cache.pop(key)
|
||||
self.cache[key] = (value, expires_at)
|
||||
if len(self.cache) > self.max_size:
|
||||
oldest_key = next(iter(self.cache))
|
||||
self.cache.pop(oldest_key)
|
||||
|
||||
def delete(self, key: str):
|
||||
with self.lock:
|
||||
self.cache.pop(key, None)
|
||||
|
||||
|
||||
@register_preference(MemoryLRUPCP)
|
||||
def memorylru_preference(*_, **__):
|
||||
# Memory LRU Cache SHOULD be the highest priority
|
||||
return 10000
|
48
yt_dlp/extractor/youtube/pot/_builtin/webpo_cachespec.py
Normal file
48
yt_dlp/extractor/youtube/pot/_builtin/webpo_cachespec.py
Normal file
@ -0,0 +1,48 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider
|
||||
from yt_dlp.extractor.youtube.pot.cache import (
|
||||
CacheProviderWritePolicy,
|
||||
PoTokenCacheSpec,
|
||||
PoTokenCacheSpecProvider,
|
||||
register_spec,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot.provider import (
|
||||
PoTokenRequest,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot.utils import ContentBindingType, get_webpo_content_binding
|
||||
from yt_dlp.utils import traverse_obj
|
||||
|
||||
|
||||
@register_spec
|
||||
class WebPoPCSP(PoTokenCacheSpecProvider, BuiltinIEContentProvider):
|
||||
PROVIDER_NAME = 'webpo'
|
||||
|
||||
def generate_cache_spec(self, request: PoTokenRequest) -> PoTokenCacheSpec | None:
|
||||
bind_to_visitor_id = self._configuration_arg(
|
||||
'bind_to_visitor_id', default=['true'])[0] == 'true'
|
||||
|
||||
content_binding, content_binding_type = get_webpo_content_binding(
|
||||
request, bind_to_visitor_id=bind_to_visitor_id)
|
||||
|
||||
if not content_binding or not content_binding_type:
|
||||
return None
|
||||
|
||||
write_policy = CacheProviderWritePolicy.WRITE_ALL
|
||||
if content_binding_type == ContentBindingType.VIDEO_ID:
|
||||
write_policy = CacheProviderWritePolicy.WRITE_FIRST
|
||||
|
||||
return PoTokenCacheSpec(
|
||||
key_bindings={
|
||||
't': 'webpo',
|
||||
'cb': content_binding,
|
||||
'cbt': content_binding_type.value,
|
||||
'ip': traverse_obj(request.innertube_context, ('client', 'remoteHost')),
|
||||
'sa': request.request_source_address,
|
||||
'px': request.request_proxy,
|
||||
},
|
||||
# Integrity token response usually states it has a ttl of 12 hours (43200 seconds).
|
||||
# We will default to 6 hours to be safe.
|
||||
default_ttl=21600,
|
||||
write_policy=write_policy,
|
||||
)
|
468
yt_dlp/extractor/youtube/pot/_director.py
Normal file
468
yt_dlp/extractor/youtube/pot/_director.py
Normal file
@ -0,0 +1,468 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import dataclasses
|
||||
import datetime as dt
|
||||
import hashlib
|
||||
import json
|
||||
import typing
|
||||
import urllib.parse
|
||||
from collections.abc import Iterable
|
||||
|
||||
from yt_dlp.extractor.youtube.pot._provider import (
|
||||
BuiltinIEContentProvider,
|
||||
IEContentProvider,
|
||||
IEContentProviderLogger,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot._registry import (
|
||||
_pot_cache_provider_preferences,
|
||||
_pot_cache_providers,
|
||||
_pot_pcs_providers,
|
||||
_pot_providers,
|
||||
_ptp_preferences,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot.cache import (
|
||||
CacheProviderWritePolicy,
|
||||
PoTokenCacheProvider,
|
||||
PoTokenCacheProviderError,
|
||||
PoTokenCacheSpec,
|
||||
PoTokenCacheSpecProvider,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot.provider import (
|
||||
PoTokenProvider,
|
||||
PoTokenProviderError,
|
||||
PoTokenProviderRejectedRequest,
|
||||
PoTokenRequest,
|
||||
PoTokenResponse,
|
||||
provider_bug_report_message,
|
||||
)
|
||||
from yt_dlp.utils import bug_reports_message, format_field, join_nonempty
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from yt_dlp.extractor.youtube.pot.cache import CacheProviderPreference
|
||||
from yt_dlp.extractor.youtube.pot.provider import Preference
|
||||
|
||||
|
||||
class YoutubeIEContentProviderLogger(IEContentProviderLogger):
|
||||
def __init__(self, ie, prefix, log_level: IEContentProviderLogger.LogLevel | None = None):
|
||||
self.__ie = ie
|
||||
self.prefix = prefix
|
||||
self.log_level = log_level if log_level is not None else self.LogLevel.INFO
|
||||
|
||||
def _format_msg(self, message: str):
|
||||
prefixstr = format_field(self.prefix, None, '[%s] ')
|
||||
return f'{prefixstr}{message}'
|
||||
|
||||
def trace(self, message: str):
|
||||
if self.log_level <= self.LogLevel.TRACE:
|
||||
self.__ie.write_debug(self._format_msg('TRACE: ' + message))
|
||||
|
||||
def debug(self, message: str):
|
||||
if self.log_level <= self.LogLevel.DEBUG:
|
||||
self.__ie.write_debug(self._format_msg(message))
|
||||
|
||||
def info(self, message: str):
|
||||
if self.log_level <= self.LogLevel.INFO:
|
||||
self.__ie.to_screen(self._format_msg(message))
|
||||
|
||||
def warning(self, message: str, *, once=False):
|
||||
if self.log_level <= self.LogLevel.WARNING:
|
||||
self.__ie.report_warning(self._format_msg(message), only_once=once)
|
||||
|
||||
def error(self, message: str):
|
||||
if self.log_level <= self.LogLevel.ERROR:
|
||||
self.__ie._downloader.report_error(self._format_msg(message), is_error=False)
|
||||
|
||||
|
||||
class PoTokenCache:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
logger: IEContentProviderLogger,
|
||||
cache_providers: list[PoTokenCacheProvider],
|
||||
cache_spec_providers: list[PoTokenCacheSpecProvider],
|
||||
cache_provider_preferences: list[CacheProviderPreference] | None = None,
|
||||
):
|
||||
self.cache_providers: dict[str, PoTokenCacheProvider] = {
|
||||
provider.PROVIDER_KEY: provider for provider in (cache_providers or [])}
|
||||
self.cache_provider_preferences: list[CacheProviderPreference] = cache_provider_preferences or []
|
||||
self.cache_spec_providers: dict[str, PoTokenCacheSpecProvider] = {
|
||||
provider.PROVIDER_KEY: provider for provider in (cache_spec_providers or [])}
|
||||
self.logger = logger
|
||||
|
||||
def _get_cache_providers(self, request: PoTokenRequest) -> Iterable[PoTokenCacheProvider]:
|
||||
"""Sorts available cache providers by preference, given a request"""
|
||||
preferences = {
|
||||
provider: sum(pref(provider, request) for pref in self.cache_provider_preferences)
|
||||
for provider in self.cache_providers.values()
|
||||
}
|
||||
if self.logger.log_level <= self.logger.LogLevel.TRACE:
|
||||
# calling is_available() for every PO Token provider upfront may have some overhead
|
||||
self.logger.trace(f'PO Token Cache Providers: {provider_display_list(self.cache_providers.values())}')
|
||||
self.logger.trace('Cache Provider preferences for this request: {}'.format(', '.join(
|
||||
f'{provider.PROVIDER_KEY}={pref}' for provider, pref in preferences.items())))
|
||||
|
||||
return (
|
||||
provider for provider in sorted(
|
||||
self.cache_providers.values(), key=preferences.get, reverse=True) if provider.is_available())
|
||||
|
||||
def _get_cache_spec(self, request: PoTokenRequest) -> PoTokenCacheSpec | None:
|
||||
for provider in self.cache_spec_providers.values():
|
||||
if not provider.is_available():
|
||||
continue
|
||||
try:
|
||||
spec = provider.generate_cache_spec(request)
|
||||
if not spec:
|
||||
continue
|
||||
if not validate_cache_spec(spec):
|
||||
self.logger.error(
|
||||
f'PoTokenCacheSpecProvider "{provider.PROVIDER_KEY}" generate_cache_spec() '
|
||||
f'returned invalid spec {spec}{provider_bug_report_message(provider)}')
|
||||
continue
|
||||
spec = dataclasses.replace(spec, _provider=provider)
|
||||
self.logger.trace(
|
||||
f'Retrieved cache spec {spec} from cache spec provider "{provider.PROVIDER_NAME}"')
|
||||
return spec
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
f'Error occurred with "{provider.PROVIDER_NAME}" PO Token cache spec provider: '
|
||||
f'{e!r}{provider_bug_report_message(provider)}')
|
||||
continue
|
||||
return None
|
||||
|
||||
def _generate_key_bindings(self, spec: PoTokenCacheSpec) -> dict[str, str]:
|
||||
bindings_cleaned = {
|
||||
**{k: v for k, v in spec.key_bindings.items() if v is not None},
|
||||
# Allow us to invalidate caches if such need arises
|
||||
'_dlp_cache': 'v1',
|
||||
}
|
||||
if spec._provider:
|
||||
bindings_cleaned['_p'] = spec._provider.PROVIDER_KEY
|
||||
self.logger.trace(f'Generated cache key bindings: {bindings_cleaned}')
|
||||
return bindings_cleaned
|
||||
|
||||
def _generate_key(self, bindings: dict) -> str:
|
||||
binding_string = ''.join(repr(dict(sorted(bindings.items()))))
|
||||
return hashlib.sha256(binding_string.encode()).hexdigest()
|
||||
|
||||
def get(self, request: PoTokenRequest) -> PoTokenResponse | None:
|
||||
spec = self._get_cache_spec(request)
|
||||
if not spec:
|
||||
self.logger.trace('No cache spec available for this request, unable to fetch from cache')
|
||||
return None
|
||||
|
||||
cache_key = self._generate_key(self._generate_key_bindings(spec))
|
||||
self.logger.trace(f'Attempting to access PO Token cache using key: {cache_key}')
|
||||
|
||||
for idx, provider in enumerate(self._get_cache_providers(request)):
|
||||
try:
|
||||
self.logger.trace(
|
||||
f'Attempting to fetch PO Token response from "{provider.PROVIDER_NAME}" cache provider')
|
||||
cache_response = provider.get(cache_key)
|
||||
if not cache_response:
|
||||
continue
|
||||
try:
|
||||
po_token_response = PoTokenResponse(**json.loads(cache_response))
|
||||
except (TypeError, ValueError, json.JSONDecodeError):
|
||||
po_token_response = None
|
||||
if not validate_response(po_token_response):
|
||||
self.logger.error(
|
||||
f'Invalid PO Token response retrieved from cache provider "{provider.PROVIDER_NAME}": '
|
||||
f'{cache_response}{provider_bug_report_message(provider)}')
|
||||
provider.delete(cache_key)
|
||||
continue
|
||||
self.logger.trace(
|
||||
f'PO Token response retrieved from cache using "{provider.PROVIDER_NAME}" provider: '
|
||||
f'{po_token_response}')
|
||||
if idx > 0:
|
||||
# Write back to the highest priority cache provider,
|
||||
# so we stop trying to fetch from lower priority providers
|
||||
self.logger.trace('Writing PO Token response to highest priority cache provider')
|
||||
self.store(request, po_token_response, write_policy=CacheProviderWritePolicy.WRITE_FIRST)
|
||||
|
||||
return po_token_response
|
||||
except PoTokenCacheProviderError as e:
|
||||
self.logger.warning(
|
||||
f'Error from "{provider.PROVIDER_NAME}" PO Token cache provider: '
|
||||
f'{e!r}{provider_bug_report_message(provider) if not e.expected else ""}')
|
||||
continue
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
f'Error occurred with "{provider.PROVIDER_NAME}" PO Token cache provider: '
|
||||
f'{e!r}{provider_bug_report_message(provider)}',
|
||||
)
|
||||
continue
|
||||
return None
|
||||
|
||||
def store(
|
||||
self,
|
||||
request: PoTokenRequest,
|
||||
response: PoTokenResponse,
|
||||
write_policy: CacheProviderWritePolicy | None = None,
|
||||
):
|
||||
spec = self._get_cache_spec(request)
|
||||
if not spec:
|
||||
self.logger.trace('No cache spec available for this request. Not caching.')
|
||||
return
|
||||
|
||||
if not validate_response(response):
|
||||
self.logger.error(
|
||||
f'Invalid PO Token response provided to PoTokenCache.store(): '
|
||||
f'{response}{bug_reports_message()}')
|
||||
return
|
||||
|
||||
cache_key = self._generate_key(self._generate_key_bindings(spec))
|
||||
self.logger.trace(f'Attempting to access PO Token cache using key: {cache_key}')
|
||||
|
||||
default_expires_at = int(dt.datetime.now(dt.timezone.utc).timestamp()) + spec.default_ttl
|
||||
cache_response = dataclasses.replace(response, expires_at=response.expires_at or default_expires_at)
|
||||
|
||||
write_policy = write_policy or spec.write_policy
|
||||
self.logger.trace(f'Using write policy: {write_policy}')
|
||||
|
||||
for idx, provider in enumerate(self._get_cache_providers(request)):
|
||||
try:
|
||||
self.logger.trace(
|
||||
f'Caching PO Token response in "{provider.PROVIDER_NAME}" cache provider '
|
||||
f'(key={cache_key}, expires_at={cache_response.expires_at})')
|
||||
provider.store(
|
||||
key=cache_key,
|
||||
value=json.dumps(dataclasses.asdict(cache_response)),
|
||||
expires_at=cache_response.expires_at)
|
||||
except PoTokenCacheProviderError as e:
|
||||
self.logger.warning(
|
||||
f'Error from "{provider.PROVIDER_NAME}" PO Token cache provider: '
|
||||
f'{e!r}{provider_bug_report_message(provider) if not e.expected else ""}')
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
f'Error occurred with "{provider.PROVIDER_NAME}" PO Token cache provider: '
|
||||
f'{e!r}{provider_bug_report_message(provider)}')
|
||||
|
||||
# WRITE_FIRST should not write to lower priority providers in the case the highest priority provider fails
|
||||
if idx == 0 and write_policy == CacheProviderWritePolicy.WRITE_FIRST:
|
||||
return
|
||||
|
||||
def close(self):
|
||||
for provider in self.cache_providers.values():
|
||||
provider.close()
|
||||
for spec_provider in self.cache_spec_providers.values():
|
||||
spec_provider.close()
|
||||
|
||||
|
||||
class PoTokenRequestDirector:
|
||||
|
||||
def __init__(self, logger: IEContentProviderLogger, cache: PoTokenCache):
|
||||
self.providers: dict[str, PoTokenProvider] = {}
|
||||
self.preferences: list[Preference] = []
|
||||
self.cache = cache
|
||||
self.logger = logger
|
||||
|
||||
def register_provider(self, provider: PoTokenProvider):
|
||||
self.providers[provider.PROVIDER_KEY] = provider
|
||||
|
||||
def register_preference(self, preference: Preference):
|
||||
self.preferences.append(preference)
|
||||
|
||||
def _get_providers(self, request: PoTokenRequest) -> Iterable[PoTokenProvider]:
|
||||
"""Sorts available providers by preference, given a request"""
|
||||
preferences = {
|
||||
provider: sum(pref(provider, request) for pref in self.preferences)
|
||||
for provider in self.providers.values()
|
||||
}
|
||||
if self.logger.log_level <= self.logger.LogLevel.TRACE:
|
||||
# calling is_available() for every PO Token provider upfront may have some overhead
|
||||
self.logger.trace(f'PO Token Providers: {provider_display_list(self.providers.values())}')
|
||||
self.logger.trace('Provider preferences for this request: {}'.format(', '.join(
|
||||
f'{provider.PROVIDER_NAME}={pref}' for provider, pref in preferences.items())))
|
||||
|
||||
return (
|
||||
provider for provider in sorted(
|
||||
self.providers.values(), key=preferences.get, reverse=True)
|
||||
if provider.is_available()
|
||||
)
|
||||
|
||||
def _get_po_token(self, request) -> PoTokenResponse | None:
|
||||
for provider in self._get_providers(request):
|
||||
try:
|
||||
self.logger.trace(
|
||||
f'Attempting to fetch a PO Token from "{provider.PROVIDER_NAME}" provider')
|
||||
response = provider.request_pot(request.copy())
|
||||
except PoTokenProviderRejectedRequest as e:
|
||||
self.logger.trace(
|
||||
f'PO Token Provider "{provider.PROVIDER_NAME}" rejected this request, '
|
||||
f'trying next available provider. Reason: {e}')
|
||||
continue
|
||||
except PoTokenProviderError as e:
|
||||
self.logger.warning(
|
||||
f'Error fetching PO Token from "{provider.PROVIDER_NAME}" provider: '
|
||||
f'{e!r}{provider_bug_report_message(provider) if not e.expected else ""}')
|
||||
continue
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
f'Unexpected error when fetching PO Token from "{provider.PROVIDER_NAME}" provider: '
|
||||
f'{e!r}{provider_bug_report_message(provider)}')
|
||||
continue
|
||||
|
||||
self.logger.trace(f'PO Token response from "{provider.PROVIDER_NAME}" provider: {response}')
|
||||
|
||||
if not validate_response(response):
|
||||
self.logger.error(
|
||||
f'Invalid PO Token response received from "{provider.PROVIDER_NAME}" provider: '
|
||||
f'{response}{provider_bug_report_message(provider)}')
|
||||
continue
|
||||
|
||||
return response
|
||||
|
||||
self.logger.trace('No PO Token providers were able to provide a valid PO Token')
|
||||
return None
|
||||
|
||||
def get_po_token(self, request: PoTokenRequest) -> str | None:
|
||||
if not request.bypass_cache:
|
||||
if pot_response := self.cache.get(request):
|
||||
return clean_pot(pot_response.po_token)
|
||||
|
||||
if not self.providers:
|
||||
self.logger.trace('No PO Token providers registered')
|
||||
return None
|
||||
|
||||
pot_response = self._get_po_token(request)
|
||||
if not pot_response:
|
||||
return None
|
||||
|
||||
pot_response.po_token = clean_pot(pot_response.po_token)
|
||||
|
||||
if pot_response.expires_at is None or pot_response.expires_at > 0:
|
||||
self.cache.store(request, pot_response)
|
||||
else:
|
||||
self.logger.trace(
|
||||
f'PO Token response will not be cached (expires_at={pot_response.expires_at})')
|
||||
|
||||
return pot_response.po_token
|
||||
|
||||
def close(self):
|
||||
for provider in self.providers.values():
|
||||
provider.close()
|
||||
self.cache.close()
|
||||
|
||||
|
||||
EXTRACTOR_ARG_PREFIX = 'youtubepot'
|
||||
|
||||
|
||||
def initialize_pot_director(ie):
|
||||
assert ie._downloader is not None, 'Downloader not set'
|
||||
|
||||
enable_trace = ie._configuration_arg(
|
||||
'pot_trace', ['false'], ie_key='youtube', casesense=False)[0] == 'true'
|
||||
|
||||
if enable_trace:
|
||||
log_level = IEContentProviderLogger.LogLevel.TRACE
|
||||
elif ie.get_param('verbose', False):
|
||||
log_level = IEContentProviderLogger.LogLevel.DEBUG
|
||||
else:
|
||||
log_level = IEContentProviderLogger.LogLevel.INFO
|
||||
|
||||
def get_provider_logger_and_settings(provider, logger_key):
|
||||
logger_prefix = f'{logger_key}:{provider.PROVIDER_NAME}'
|
||||
extractor_key = f'{EXTRACTOR_ARG_PREFIX}-{provider.PROVIDER_KEY.lower()}'
|
||||
return (
|
||||
YoutubeIEContentProviderLogger(ie, logger_prefix, log_level=log_level),
|
||||
ie.get_param('extractor_args', {}).get(extractor_key, {}))
|
||||
|
||||
cache_providers = []
|
||||
for cache_provider in _pot_cache_providers.value.values():
|
||||
logger, settings = get_provider_logger_and_settings(cache_provider, 'pot:cache')
|
||||
cache_providers.append(cache_provider(ie, logger, settings))
|
||||
cache_spec_providers = []
|
||||
for cache_spec_provider in _pot_pcs_providers.value.values():
|
||||
logger, settings = get_provider_logger_and_settings(cache_spec_provider, 'pot:cache:spec')
|
||||
cache_spec_providers.append(cache_spec_provider(ie, logger, settings))
|
||||
|
||||
cache = PoTokenCache(
|
||||
logger=YoutubeIEContentProviderLogger(ie, 'pot:cache', log_level=log_level),
|
||||
cache_providers=cache_providers,
|
||||
cache_spec_providers=cache_spec_providers,
|
||||
cache_provider_preferences=list(_pot_cache_provider_preferences.value),
|
||||
)
|
||||
|
||||
director = PoTokenRequestDirector(
|
||||
logger=YoutubeIEContentProviderLogger(ie, 'pot', log_level=log_level),
|
||||
cache=cache,
|
||||
)
|
||||
|
||||
ie._downloader.add_close_hook(director.close)
|
||||
|
||||
for provider in _pot_providers.value.values():
|
||||
logger, settings = get_provider_logger_and_settings(provider, 'pot')
|
||||
director.register_provider(provider(ie, logger, settings))
|
||||
|
||||
for preference in _ptp_preferences.value:
|
||||
director.register_preference(preference)
|
||||
|
||||
if director.logger.log_level <= director.logger.LogLevel.DEBUG:
|
||||
# calling is_available() for every PO Token provider upfront may have some overhead
|
||||
director.logger.debug(f'PO Token Providers: {provider_display_list(director.providers.values())}')
|
||||
director.logger.debug(f'PO Token Cache Providers: {provider_display_list(cache.cache_providers.values())}')
|
||||
director.logger.debug(f'PO Token Cache Spec Providers: {provider_display_list(cache.cache_spec_providers.values())}')
|
||||
director.logger.trace(f'Registered {len(director.preferences)} provider preferences')
|
||||
director.logger.trace(f'Registered {len(cache.cache_provider_preferences)} cache provider preferences')
|
||||
|
||||
return director
|
||||
|
||||
|
||||
def provider_display_list(providers: Iterable[IEContentProvider]):
|
||||
def provider_display_name(provider):
|
||||
display_str = join_nonempty(
|
||||
provider.PROVIDER_NAME,
|
||||
provider.PROVIDER_VERSION if not isinstance(provider, BuiltinIEContentProvider) else None)
|
||||
statuses = []
|
||||
if not isinstance(provider, BuiltinIEContentProvider):
|
||||
statuses.append('external')
|
||||
if not provider.is_available():
|
||||
statuses.append('unavailable')
|
||||
if statuses:
|
||||
display_str += f' ({", ".join(statuses)})'
|
||||
return display_str
|
||||
|
||||
return ', '.join(provider_display_name(provider) for provider in providers) or 'none'
|
||||
|
||||
|
||||
def clean_pot(po_token: str):
|
||||
# Clean and validate the PO Token. This will strip invalid characters off
|
||||
# (e.g. additional url params the user may accidentally include)
|
||||
try:
|
||||
return base64.urlsafe_b64encode(
|
||||
base64.urlsafe_b64decode(urllib.parse.unquote(po_token))).decode()
|
||||
except (binascii.Error, ValueError):
|
||||
raise ValueError('Invalid PO Token')
|
||||
|
||||
|
||||
def validate_response(response: PoTokenResponse | None):
|
||||
if (
|
||||
not isinstance(response, PoTokenResponse)
|
||||
or not isinstance(response.po_token, str)
|
||||
or not response.po_token
|
||||
): # noqa: SIM103
|
||||
return False
|
||||
|
||||
try:
|
||||
clean_pot(response.po_token)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
if not isinstance(response.expires_at, int):
|
||||
return response.expires_at is None
|
||||
|
||||
return response.expires_at <= 0 or response.expires_at > int(dt.datetime.now(dt.timezone.utc).timestamp())
|
||||
|
||||
|
||||
def validate_cache_spec(spec: PoTokenCacheSpec):
|
||||
return (
|
||||
isinstance(spec, PoTokenCacheSpec)
|
||||
and isinstance(spec.write_policy, CacheProviderWritePolicy)
|
||||
and isinstance(spec.default_ttl, int)
|
||||
and isinstance(spec.key_bindings, dict)
|
||||
and all(isinstance(k, str) for k in spec.key_bindings)
|
||||
and all(v is None or isinstance(v, str) for v in spec.key_bindings.values())
|
||||
and bool([v for v in spec.key_bindings.values() if v is not None])
|
||||
)
|
156
yt_dlp/extractor/youtube/pot/_provider.py
Normal file
156
yt_dlp/extractor/youtube/pot/_provider.py
Normal file
@ -0,0 +1,156 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import enum
|
||||
import functools
|
||||
|
||||
from yt_dlp.extractor.common import InfoExtractor
|
||||
from yt_dlp.utils import NO_DEFAULT, bug_reports_message, classproperty, traverse_obj
|
||||
from yt_dlp.version import __version__
|
||||
|
||||
# xxx: these could be generalized outside YoutubeIE eventually
|
||||
|
||||
|
||||
class IEContentProviderLogger(abc.ABC):
|
||||
|
||||
class LogLevel(enum.IntEnum):
|
||||
TRACE = 0
|
||||
DEBUG = 10
|
||||
INFO = 20
|
||||
WARNING = 30
|
||||
ERROR = 40
|
||||
|
||||
@classmethod
|
||||
def _missing_(cls, value):
|
||||
if isinstance(value, str):
|
||||
value = value.upper()
|
||||
if value in dir(cls):
|
||||
return cls[value]
|
||||
|
||||
return cls.INFO
|
||||
|
||||
log_level = LogLevel.INFO
|
||||
|
||||
@abc.abstractmethod
|
||||
def trace(self, message: str):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def debug(self, message: str):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def info(self, message: str):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def warning(self, message: str, *, once=False):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def error(self, message: str):
|
||||
pass
|
||||
|
||||
|
||||
class IEContentProviderError(Exception):
|
||||
def __init__(self, msg=None, expected=False):
|
||||
super().__init__(msg)
|
||||
self.expected = expected
|
||||
|
||||
|
||||
class IEContentProvider(abc.ABC):
|
||||
PROVIDER_VERSION: str = '0.0.0'
|
||||
BUG_REPORT_LOCATION: str = '(developer has not provided a bug report location)'
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ie: InfoExtractor,
|
||||
logger: IEContentProviderLogger,
|
||||
settings: dict[str, list[str]], *_, **__,
|
||||
):
|
||||
self.ie = ie
|
||||
self.settings = settings or {}
|
||||
self.logger = logger
|
||||
super().__init__()
|
||||
|
||||
@classmethod
|
||||
def __init_subclass__(cls, *, suffix=None, **kwargs):
|
||||
if suffix:
|
||||
cls._PROVIDER_KEY_SUFFIX = suffix
|
||||
return super().__init_subclass__(**kwargs)
|
||||
|
||||
@classproperty
|
||||
def PROVIDER_NAME(cls) -> str:
|
||||
return cls.__name__[:-len(cls._PROVIDER_KEY_SUFFIX)]
|
||||
|
||||
@classproperty
|
||||
def BUG_REPORT_MESSAGE(cls):
|
||||
return f'please report this issue to the provider developer at {cls.BUG_REPORT_LOCATION} .'
|
||||
|
||||
@classproperty
|
||||
def PROVIDER_KEY(cls) -> str:
|
||||
assert hasattr(cls, '_PROVIDER_KEY_SUFFIX'), 'Content Provider implementation must define a suffix for the provider key'
|
||||
assert cls.__name__.endswith(cls._PROVIDER_KEY_SUFFIX), f'PoTokenProvider class names must end with "{cls._PROVIDER_KEY_SUFFIX}"'
|
||||
return cls.__name__[:-len(cls._PROVIDER_KEY_SUFFIX)]
|
||||
|
||||
@abc.abstractmethod
|
||||
def is_available(self) -> bool:
|
||||
"""
|
||||
Check if the provider is available (e.g. all required dependencies are available)
|
||||
This is used to determine if the provider should be used and to provide debug information.
|
||||
|
||||
IMPORTANT: This method should not make any network requests or perform any expensive operations.
|
||||
It is called multiple times.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def close(self): # noqa: B027
|
||||
pass
|
||||
|
||||
def _configuration_arg(self, key, default=NO_DEFAULT, *, casesense=False):
|
||||
"""
|
||||
@returns A list of values for the setting given by "key"
|
||||
or "default" if no such key is present
|
||||
@param default The default value to return when the key is not present (default: [])
|
||||
@param casesense When false, the values are converted to lower case
|
||||
"""
|
||||
val = traverse_obj(self.settings, key)
|
||||
if val is None:
|
||||
return [] if default is NO_DEFAULT else default
|
||||
return list(val) if casesense else [x.lower() for x in val]
|
||||
|
||||
|
||||
class BuiltinIEContentProvider(IEContentProvider, abc.ABC):
|
||||
PROVIDER_VERSION = __version__
|
||||
BUG_REPORT_MESSAGE = bug_reports_message(before='')
|
||||
|
||||
|
||||
def register_provider_generic(
|
||||
provider,
|
||||
base_class,
|
||||
registry,
|
||||
):
|
||||
"""Generic function to register a provider class"""
|
||||
assert issubclass(provider, base_class), f'{provider} must be a subclass of {base_class.__name__}'
|
||||
assert provider.PROVIDER_KEY not in registry, f'{base_class.__name__} {provider.PROVIDER_KEY} already registered'
|
||||
registry[provider.PROVIDER_KEY] = provider
|
||||
return provider
|
||||
|
||||
|
||||
def register_preference_generic(
|
||||
base_class,
|
||||
registry,
|
||||
*providers,
|
||||
):
|
||||
"""Generic function to register a preference for a provider"""
|
||||
assert all(issubclass(provider, base_class) for provider in providers)
|
||||
|
||||
def outer(preference):
|
||||
@functools.wraps(preference)
|
||||
def inner(provider, *args, **kwargs):
|
||||
if not providers or isinstance(provider, providers):
|
||||
return preference(provider, *args, **kwargs)
|
||||
return 0
|
||||
registry.add(inner)
|
||||
return preference
|
||||
return outer
|
8
yt_dlp/extractor/youtube/pot/_registry.py
Normal file
8
yt_dlp/extractor/youtube/pot/_registry.py
Normal file
@ -0,0 +1,8 @@
|
||||
from yt_dlp.globals import Indirect
|
||||
|
||||
_pot_providers = Indirect({})
|
||||
_ptp_preferences = Indirect(set())
|
||||
_pot_pcs_providers = Indirect({})
|
||||
_pot_cache_providers = Indirect({})
|
||||
_pot_cache_provider_preferences = Indirect(set())
|
||||
_pot_memory_cache = Indirect({})
|
97
yt_dlp/extractor/youtube/pot/cache.py
Normal file
97
yt_dlp/extractor/youtube/pot/cache.py
Normal file
@ -0,0 +1,97 @@
|
||||
"""PUBLIC API"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import dataclasses
|
||||
import enum
|
||||
import typing
|
||||
|
||||
from yt_dlp.extractor.youtube.pot._provider import (
|
||||
IEContentProvider,
|
||||
IEContentProviderError,
|
||||
register_preference_generic,
|
||||
register_provider_generic,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot._registry import (
|
||||
_pot_cache_provider_preferences,
|
||||
_pot_cache_providers,
|
||||
_pot_pcs_providers,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot.provider import PoTokenRequest
|
||||
|
||||
|
||||
class PoTokenCacheProviderError(IEContentProviderError):
|
||||
"""An error occurred while fetching a PO Token"""
|
||||
|
||||
|
||||
class PoTokenCacheProvider(IEContentProvider, abc.ABC, suffix='PCP'):
|
||||
@abc.abstractmethod
|
||||
def get(self, key: str) -> str | None:
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def store(self, key: str, value: str, expires_at: int):
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def delete(self, key: str):
|
||||
pass
|
||||
|
||||
|
||||
class CacheProviderWritePolicy(enum.Enum):
|
||||
WRITE_ALL = enum.auto() # Write to all cache providers
|
||||
WRITE_FIRST = enum.auto() # Write to only the first cache provider
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PoTokenCacheSpec:
|
||||
key_bindings: dict[str, str | None]
|
||||
default_ttl: int
|
||||
write_policy: CacheProviderWritePolicy = CacheProviderWritePolicy.WRITE_ALL
|
||||
|
||||
# Internal
|
||||
_provider: PoTokenCacheSpecProvider | None = None
|
||||
|
||||
|
||||
class PoTokenCacheSpecProvider(IEContentProvider, abc.ABC, suffix='PCSP'):
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
@abc.abstractmethod
|
||||
def generate_cache_spec(self, request: PoTokenRequest) -> PoTokenCacheSpec | None:
|
||||
"""Generate a cache spec for the given request"""
|
||||
pass
|
||||
|
||||
|
||||
def register_provider(provider: type[PoTokenCacheProvider]):
|
||||
"""Register a PoTokenCacheProvider class"""
|
||||
return register_provider_generic(
|
||||
provider=provider,
|
||||
base_class=PoTokenCacheProvider,
|
||||
registry=_pot_cache_providers.value,
|
||||
)
|
||||
|
||||
|
||||
def register_spec(provider: type[PoTokenCacheSpecProvider]):
|
||||
"""Register a PoTokenCacheSpecProvider class"""
|
||||
return register_provider_generic(
|
||||
provider=provider,
|
||||
base_class=PoTokenCacheSpecProvider,
|
||||
registry=_pot_pcs_providers.value,
|
||||
)
|
||||
|
||||
|
||||
def register_preference(
|
||||
*providers: type[PoTokenCacheProvider]) -> typing.Callable[[CacheProviderPreference], CacheProviderPreference]:
|
||||
"""Register a preference for a PoTokenCacheProvider"""
|
||||
return register_preference_generic(
|
||||
PoTokenCacheProvider,
|
||||
_pot_cache_provider_preferences.value,
|
||||
*providers,
|
||||
)
|
||||
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
CacheProviderPreference = typing.Callable[[PoTokenCacheProvider, PoTokenRequest], int]
|
281
yt_dlp/extractor/youtube/pot/provider.py
Normal file
281
yt_dlp/extractor/youtube/pot/provider.py
Normal file
@ -0,0 +1,281 @@
|
||||
"""PUBLIC API"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
import copy
|
||||
import dataclasses
|
||||
import enum
|
||||
import functools
|
||||
import typing
|
||||
import urllib.parse
|
||||
|
||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||
from yt_dlp.extractor.youtube.pot._provider import (
|
||||
IEContentProvider,
|
||||
IEContentProviderError,
|
||||
register_preference_generic,
|
||||
register_provider_generic,
|
||||
)
|
||||
from yt_dlp.extractor.youtube.pot._registry import _pot_providers, _ptp_preferences
|
||||
from yt_dlp.networking import Request, Response
|
||||
from yt_dlp.utils import traverse_obj
|
||||
from yt_dlp.utils.networking import HTTPHeaderDict
|
||||
|
||||
__all__ = [
|
||||
'ExternalRequestFeature',
|
||||
'PoTokenContext',
|
||||
'PoTokenProvider',
|
||||
'PoTokenProviderError',
|
||||
'PoTokenProviderRejectedRequest',
|
||||
'PoTokenRequest',
|
||||
'PoTokenResponse',
|
||||
'provider_bug_report_message',
|
||||
'register_preference',
|
||||
'register_provider',
|
||||
]
|
||||
|
||||
|
||||
class PoTokenContext(enum.Enum):
|
||||
GVS = 'gvs'
|
||||
PLAYER = 'player'
|
||||
SUBS = 'subs'
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PoTokenRequest:
|
||||
# YouTube parameters
|
||||
context: PoTokenContext
|
||||
innertube_context: InnertubeContext
|
||||
innertube_host: str | None = None
|
||||
session_index: str | None = None
|
||||
player_url: str | None = None
|
||||
is_authenticated: bool = False
|
||||
video_webpage: str | None = None
|
||||
internal_client_name: str | None = None
|
||||
|
||||
# Content binding parameters
|
||||
visitor_data: str | None = None
|
||||
data_sync_id: str | None = None
|
||||
video_id: str | None = None
|
||||
|
||||
# Networking parameters
|
||||
request_cookiejar: YoutubeDLCookieJar = dataclasses.field(default_factory=YoutubeDLCookieJar)
|
||||
request_proxy: str | None = None
|
||||
request_headers: HTTPHeaderDict = dataclasses.field(default_factory=HTTPHeaderDict)
|
||||
request_timeout: float | None = None
|
||||
request_source_address: str | None = None
|
||||
request_verify_tls: bool = True
|
||||
|
||||
# Generate a new token, do not used a cached token
|
||||
# The token should still be cached for future requests
|
||||
bypass_cache: bool = False
|
||||
|
||||
def copy(self):
|
||||
return dataclasses.replace(
|
||||
self,
|
||||
request_headers=HTTPHeaderDict(self.request_headers),
|
||||
innertube_context=copy.deepcopy(self.innertube_context),
|
||||
)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PoTokenResponse:
|
||||
po_token: str
|
||||
expires_at: int | None = None
|
||||
|
||||
|
||||
class PoTokenProviderRejectedRequest(IEContentProviderError):
|
||||
"""Reject the PoTokenRequest (cannot handle the request)"""
|
||||
|
||||
|
||||
class PoTokenProviderError(IEContentProviderError):
|
||||
"""An error occurred while fetching a PO Token"""
|
||||
|
||||
|
||||
class ExternalRequestFeature(enum.Enum):
|
||||
PROXY_SCHEME_HTTP = enum.auto()
|
||||
PROXY_SCHEME_HTTPS = enum.auto()
|
||||
PROXY_SCHEME_SOCKS4 = enum.auto()
|
||||
PROXY_SCHEME_SOCKS4A = enum.auto()
|
||||
PROXY_SCHEME_SOCKS5 = enum.auto()
|
||||
PROXY_SCHEME_SOCKS5H = enum.auto()
|
||||
SOURCE_ADDRESS = enum.auto()
|
||||
DISABLE_TLS_VERIFICATION = enum.auto()
|
||||
|
||||
|
||||
class PoTokenProvider(IEContentProvider, abc.ABC, suffix='PTP'):
|
||||
|
||||
# Set to None to disable the check
|
||||
_SUPPORTED_CONTEXTS: tuple[PoTokenContext] | None = ()
|
||||
|
||||
# Innertube Client Name.
|
||||
# For example, "WEB", "ANDROID", "TVHTML5".
|
||||
# For a list of WebPO client names, see yt_dlp.extractor.youtube.pot.utils.WEBPO_CLIENTS.
|
||||
# Also see yt_dlp.extractor.youtube._base.INNERTUBE_CLIENTS
|
||||
# for a list of client names currently supported by the YouTube extractor.
|
||||
_SUPPORTED_CLIENTS: tuple[str] | None = ()
|
||||
|
||||
# If making external requests to websites (i.e. to youtube.com)
|
||||
# using another library or service (i.e., not _request_webpage),
|
||||
# add the request features that are supported.
|
||||
# If only using _request_webpage to make external requests, set this to None.
|
||||
_SUPPORTED_EXTERNAL_REQUEST_FEATURES: tuple[ExternalRequestFeature] | None = ()
|
||||
|
||||
def __validate_request(self, request: PoTokenRequest):
|
||||
if not self.is_available():
|
||||
raise PoTokenProviderRejectedRequest(f'{self.PROVIDER_NAME} is not available')
|
||||
|
||||
# Validate request using built-in settings
|
||||
if (
|
||||
self._SUPPORTED_CONTEXTS is not None
|
||||
and request.context not in self._SUPPORTED_CONTEXTS
|
||||
):
|
||||
raise PoTokenProviderRejectedRequest(
|
||||
f'PO Token Context "{request.context}" is not supported by {self.PROVIDER_NAME}')
|
||||
|
||||
if self._SUPPORTED_CLIENTS is not None:
|
||||
client_name = traverse_obj(
|
||||
request.innertube_context, ('client', 'clientName'))
|
||||
if client_name not in self._SUPPORTED_CLIENTS:
|
||||
raise PoTokenProviderRejectedRequest(
|
||||
f'Client "{client_name}" is not supported by {self.PROVIDER_NAME}. '
|
||||
f'Supported clients: {", ".join(self._SUPPORTED_CLIENTS) or "none"}')
|
||||
|
||||
self.__validate_external_request_features(request)
|
||||
|
||||
@functools.cached_property
|
||||
def _supported_proxy_schemes(self):
|
||||
return {
|
||||
scheme: feature
|
||||
for scheme, feature in {
|
||||
'http': ExternalRequestFeature.PROXY_SCHEME_HTTP,
|
||||
'https': ExternalRequestFeature.PROXY_SCHEME_HTTPS,
|
||||
'socks4': ExternalRequestFeature.PROXY_SCHEME_SOCKS4,
|
||||
'socks4a': ExternalRequestFeature.PROXY_SCHEME_SOCKS4A,
|
||||
'socks5': ExternalRequestFeature.PROXY_SCHEME_SOCKS5,
|
||||
'socks5h': ExternalRequestFeature.PROXY_SCHEME_SOCKS5H,
|
||||
}.items()
|
||||
if feature in (self._SUPPORTED_EXTERNAL_REQUEST_FEATURES or [])
|
||||
}
|
||||
|
||||
def __validate_external_request_features(self, request: PoTokenRequest):
|
||||
if self._SUPPORTED_EXTERNAL_REQUEST_FEATURES is None:
|
||||
return
|
||||
|
||||
if request.request_proxy:
|
||||
scheme = urllib.parse.urlparse(request.request_proxy).scheme
|
||||
if scheme.lower() not in self._supported_proxy_schemes:
|
||||
raise PoTokenProviderRejectedRequest(
|
||||
f'External requests by "{self.PROVIDER_NAME}" provider do not '
|
||||
f'support proxy scheme "{scheme}". Supported proxy schemes: '
|
||||
f'{", ".join(self._supported_proxy_schemes) or "none"}')
|
||||
|
||||
if (
|
||||
request.request_source_address
|
||||
and ExternalRequestFeature.SOURCE_ADDRESS not in self._SUPPORTED_EXTERNAL_REQUEST_FEATURES
|
||||
):
|
||||
raise PoTokenProviderRejectedRequest(
|
||||
f'External requests by "{self.PROVIDER_NAME}" provider '
|
||||
f'do not support setting source address')
|
||||
|
||||
if (
|
||||
not request.request_verify_tls
|
||||
and ExternalRequestFeature.DISABLE_TLS_VERIFICATION not in self._SUPPORTED_EXTERNAL_REQUEST_FEATURES
|
||||
):
|
||||
raise PoTokenProviderRejectedRequest(
|
||||
f'External requests by "{self.PROVIDER_NAME}" provider '
|
||||
f'do not support ignoring TLS certificate failures')
|
||||
|
||||
def request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
|
||||
self.__validate_request(request)
|
||||
return self._real_request_pot(request)
|
||||
|
||||
@abc.abstractmethod
|
||||
def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
|
||||
"""To be implemented by subclasses"""
|
||||
pass
|
||||
|
||||
# Helper functions
|
||||
|
||||
def _request_webpage(self, request: Request, pot_request: PoTokenRequest | None = None, note=None, **kwargs) -> Response:
|
||||
"""Make a request using the internal HTTP Client.
|
||||
Use this instead of calling requests, urllib3 or other HTTP client libraries directly!
|
||||
|
||||
YouTube cookies will be automatically applied if this request is made to YouTube.
|
||||
|
||||
@param request: The request to make
|
||||
@param pot_request: The PoTokenRequest to use. Request parameters will be merged from it.
|
||||
@param note: Custom log message to display when making the request. Set to `False` to disable logging.
|
||||
|
||||
Tips:
|
||||
- Disable proxy (e.g. if calling local service): Request(..., proxies={'all': None})
|
||||
- Set request timeout: Request(..., extensions={'timeout': 5.0})
|
||||
"""
|
||||
req = request.copy()
|
||||
|
||||
# Merge some ctx request settings into the request
|
||||
# Most of these will already be used by the configured ydl instance,
|
||||
# however, the YouTube extractor may override some.
|
||||
if pot_request is not None:
|
||||
req.headers = HTTPHeaderDict(pot_request.request_headers, req.headers)
|
||||
req.proxies = req.proxies or ({'all': pot_request.request_proxy} if pot_request.request_proxy else {})
|
||||
|
||||
if pot_request.request_cookiejar is not None:
|
||||
req.extensions['cookiejar'] = req.extensions.get('cookiejar', pot_request.request_cookiejar)
|
||||
|
||||
if note is not False:
|
||||
self.logger.info(str(note) if note else 'Requesting webpage')
|
||||
return self.ie._downloader.urlopen(req)
|
||||
|
||||
|
||||
def register_provider(provider: type[PoTokenProvider]):
|
||||
"""Register a PoTokenProvider class"""
|
||||
return register_provider_generic(
|
||||
provider=provider,
|
||||
base_class=PoTokenProvider,
|
||||
registry=_pot_providers.value,
|
||||
)
|
||||
|
||||
|
||||
def provider_bug_report_message(provider: IEContentProvider, before=';'):
|
||||
msg = provider.BUG_REPORT_MESSAGE
|
||||
|
||||
before = before.rstrip()
|
||||
if not before or before.endswith(('.', '!', '?')):
|
||||
msg = msg[0].title() + msg[1:]
|
||||
|
||||
return f'{before} {msg}' if before else msg
|
||||
|
||||
|
||||
def register_preference(*providers: type[PoTokenProvider]) -> typing.Callable[[Preference], Preference]:
|
||||
"""Register a preference for a PoTokenProvider"""
|
||||
return register_preference_generic(
|
||||
PoTokenProvider,
|
||||
_ptp_preferences.value,
|
||||
*providers,
|
||||
)
|
||||
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
Preference = typing.Callable[[PoTokenProvider, PoTokenRequest], int]
|
||||
__all__.append('Preference')
|
||||
|
||||
# Barebones innertube context. There may be more fields.
|
||||
class ClientInfo(typing.TypedDict, total=False):
|
||||
hl: str | None
|
||||
gl: str | None
|
||||
remoteHost: str | None
|
||||
deviceMake: str | None
|
||||
deviceModel: str | None
|
||||
visitorData: str | None
|
||||
userAgent: str | None
|
||||
clientName: str
|
||||
clientVersion: str
|
||||
osName: str | None
|
||||
osVersion: str | None
|
||||
|
||||
class InnertubeContext(typing.TypedDict, total=False):
|
||||
client: ClientInfo
|
||||
request: dict
|
||||
user: dict
|
73
yt_dlp/extractor/youtube/pot/utils.py
Normal file
73
yt_dlp/extractor/youtube/pot/utils.py
Normal file
@ -0,0 +1,73 @@
|
||||
"""PUBLIC API"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import contextlib
|
||||
import enum
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from yt_dlp.extractor.youtube.pot.provider import PoTokenContext, PoTokenRequest
|
||||
from yt_dlp.utils import traverse_obj
|
||||
|
||||
__all__ = ['WEBPO_CLIENTS', 'ContentBindingType', 'get_webpo_content_binding']
|
||||
|
||||
WEBPO_CLIENTS = (
|
||||
'WEB',
|
||||
'MWEB',
|
||||
'TVHTML5',
|
||||
'WEB_EMBEDDED_PLAYER',
|
||||
'WEB_CREATOR',
|
||||
'WEB_REMIX',
|
||||
'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
|
||||
)
|
||||
|
||||
|
||||
class ContentBindingType(enum.Enum):
|
||||
VISITOR_DATA = 'visitor_data'
|
||||
DATASYNC_ID = 'datasync_id'
|
||||
VIDEO_ID = 'video_id'
|
||||
VISITOR_ID = 'visitor_id'
|
||||
|
||||
|
||||
def get_webpo_content_binding(
|
||||
request: PoTokenRequest,
|
||||
webpo_clients=WEBPO_CLIENTS,
|
||||
bind_to_visitor_id=False,
|
||||
) -> tuple[str | None, ContentBindingType | None]:
|
||||
|
||||
client_name = traverse_obj(request.innertube_context, ('client', 'clientName'))
|
||||
if not client_name or client_name not in webpo_clients:
|
||||
return None, None
|
||||
|
||||
if request.context == PoTokenContext.GVS or client_name in ('WEB_REMIX', ):
|
||||
if request.is_authenticated:
|
||||
return request.data_sync_id, ContentBindingType.DATASYNC_ID
|
||||
else:
|
||||
if bind_to_visitor_id:
|
||||
visitor_id = _extract_visitor_id(request.visitor_data)
|
||||
if visitor_id:
|
||||
return visitor_id, ContentBindingType.VISITOR_ID
|
||||
return request.visitor_data, ContentBindingType.VISITOR_DATA
|
||||
|
||||
elif request.context in (PoTokenContext.PLAYER, PoTokenContext.SUBS):
|
||||
return request.video_id, ContentBindingType.VIDEO_ID
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
def _extract_visitor_id(visitor_data):
|
||||
if not visitor_data:
|
||||
return None
|
||||
|
||||
# Attempt to extract the visitor ID from the visitor_data protobuf
|
||||
# xxx: ideally should use a protobuf parser
|
||||
with contextlib.suppress(Exception):
|
||||
visitor_id = base64.urlsafe_b64decode(
|
||||
urllib.parse.unquote_plus(visitor_data))[2:13].decode()
|
||||
# check that visitor id is all letters and numbers
|
||||
if re.fullmatch(r'[A-Za-z0-9_-]{11}', visitor_id):
|
||||
return visitor_id
|
||||
|
||||
return None
|
File diff suppressed because it is too large
Load Diff
@ -590,39 +590,12 @@ def dict_item(key, val):
|
||||
return ret, True
|
||||
return ret, False
|
||||
|
||||
for m in re.finditer(rf'''(?x)
|
||||
(?P<pre_sign>\+\+|--)(?P<var1>{_NAME_RE})|
|
||||
(?P<var2>{_NAME_RE})(?P<post_sign>\+\+|--)''', expr):
|
||||
var = m.group('var1') or m.group('var2')
|
||||
start, end = m.span()
|
||||
sign = m.group('pre_sign') or m.group('post_sign')
|
||||
ret = local_vars[var]
|
||||
local_vars[var] += 1 if sign[0] == '+' else -1
|
||||
if m.group('pre_sign'):
|
||||
ret = local_vars[var]
|
||||
expr = expr[:start] + self._dump(ret, local_vars) + expr[end:]
|
||||
|
||||
if not expr:
|
||||
return None, should_return
|
||||
|
||||
m = re.match(fr'''(?x)
|
||||
(?P<assign>
|
||||
(?P<out>{_NAME_RE})(?:\[(?P<index>{_NESTED_BRACKETS})\])?\s*
|
||||
(?P<op>{"|".join(map(re.escape, set(_OPERATORS) - _COMP_OPERATORS))})?
|
||||
=(?!=)(?P<expr>.*)$
|
||||
)|(?P<return>
|
||||
(?!if|return|true|false|null|undefined|NaN)(?P<name>{_NAME_RE})$
|
||||
)|(?P<attribute>
|
||||
(?P<var>{_NAME_RE})(?:
|
||||
(?P<nullish>\?)?\.(?P<member>[^(]+)|
|
||||
\[(?P<member2>{_NESTED_BRACKETS})\]
|
||||
)\s*
|
||||
)|(?P<indexing>
|
||||
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
|
||||
)|(?P<function>
|
||||
(?P<fname>{_NAME_RE})\((?P<args>.*)\)$
|
||||
)''', expr)
|
||||
if m and m.group('assign'):
|
||||
''', expr)
|
||||
if m: # We are assigning a value to a variable
|
||||
left_val = local_vars.get(m.group('out'))
|
||||
|
||||
if not m.group('index'):
|
||||
@ -640,7 +613,35 @@ def dict_item(key, val):
|
||||
m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion)
|
||||
return left_val[idx], should_return
|
||||
|
||||
elif expr.isdigit():
|
||||
for m in re.finditer(rf'''(?x)
|
||||
(?P<pre_sign>\+\+|--)(?P<var1>{_NAME_RE})|
|
||||
(?P<var2>{_NAME_RE})(?P<post_sign>\+\+|--)''', expr):
|
||||
var = m.group('var1') or m.group('var2')
|
||||
start, end = m.span()
|
||||
sign = m.group('pre_sign') or m.group('post_sign')
|
||||
ret = local_vars[var]
|
||||
local_vars[var] += 1 if sign[0] == '+' else -1
|
||||
if m.group('pre_sign'):
|
||||
ret = local_vars[var]
|
||||
expr = expr[:start] + self._dump(ret, local_vars) + expr[end:]
|
||||
|
||||
if not expr:
|
||||
return None, should_return
|
||||
|
||||
m = re.match(fr'''(?x)
|
||||
(?P<return>
|
||||
(?!if|return|true|false|null|undefined|NaN)(?P<name>{_NAME_RE})$
|
||||
)|(?P<attribute>
|
||||
(?P<var>{_NAME_RE})(?:
|
||||
(?P<nullish>\?)?\.(?P<member>[^(]+)|
|
||||
\[(?P<member2>{_NESTED_BRACKETS})\]
|
||||
)\s*
|
||||
)|(?P<indexing>
|
||||
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
|
||||
)|(?P<function>
|
||||
(?P<fname>{_NAME_RE})\((?P<args>.*)\)$
|
||||
)''', expr)
|
||||
if expr.isdigit():
|
||||
return int(expr), should_return
|
||||
|
||||
elif expr == 'break':
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
from .common import (
|
||||
HEADRequest,
|
||||
PATCHRequest,
|
||||
PUTRequest,
|
||||
Request,
|
||||
RequestDirector,
|
||||
|
@ -6,7 +6,8 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from ._helper import InstanceStoreMixin, select_proxy
|
||||
from ._helper import InstanceStoreMixin
|
||||
from ..utils.networking import select_proxy
|
||||
from .common import (
|
||||
Features,
|
||||
Request,
|
||||
|
@ -13,7 +13,6 @@
|
||||
from .exceptions import RequestError
|
||||
from ..dependencies import certifi
|
||||
from ..socks import ProxyType, sockssocket
|
||||
from ..utils import format_field, traverse_obj
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from collections.abc import Iterable
|
||||
@ -82,19 +81,6 @@ def unquote_if_non_empty(s):
|
||||
}
|
||||
|
||||
|
||||
def select_proxy(url, proxies):
|
||||
"""Unified proxy selector for all backends"""
|
||||
url_components = urllib.parse.urlparse(url)
|
||||
if 'no' in proxies:
|
||||
hostport = url_components.hostname + format_field(url_components.port, None, ':%s')
|
||||
if urllib.request.proxy_bypass_environment(hostport, {'no': proxies['no']}):
|
||||
return
|
||||
elif urllib.request.proxy_bypass(hostport): # check system settings
|
||||
return
|
||||
|
||||
return traverse_obj(proxies, url_components.scheme or 'http', 'all')
|
||||
|
||||
|
||||
def get_redirect_method(method, status):
|
||||
"""Unified redirect method handling"""
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
|
||||
from ..dependencies import brotli, requests, urllib3
|
||||
from ..utils import bug_reports_message, int_or_none, variadic
|
||||
from ..utils.networking import normalize_url
|
||||
from ..utils.networking import normalize_url, select_proxy
|
||||
|
||||
if requests is None:
|
||||
raise ImportError('requests module is not installed')
|
||||
@ -41,7 +41,6 @@
|
||||
create_socks_proxy_socket,
|
||||
get_redirect_method,
|
||||
make_socks_proxy_opts,
|
||||
select_proxy,
|
||||
)
|
||||
from .common import (
|
||||
Features,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user