mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-27 08:58:30 +00:00
Merge remote-tracking branch 'origin/master' into tvw-news
This commit is contained in:
commit
defa3c11b6
6
.github/workflows/build.yml
vendored
6
.github/workflows/build.yml
vendored
@ -192,7 +192,7 @@ jobs:
|
||||
with:
|
||||
path: ./repo
|
||||
- name: Virtualized Install, Prepare & Build
|
||||
uses: yt-dlp/run-on-arch-action@v2
|
||||
uses: yt-dlp/run-on-arch-action@v3
|
||||
with:
|
||||
# Ref: https://github.com/uraimo/run-on-arch-action/issues/55
|
||||
env: |
|
||||
@ -411,7 +411,7 @@ jobs:
|
||||
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||
python devscripts/install_deps.py -o --include build
|
||||
python devscripts/install_deps.py --include curl-cffi
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.11.1-py3-none-any.whl"
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.13.0-py3-none-any.whl"
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
@ -460,7 +460,7 @@ jobs:
|
||||
run: |
|
||||
python devscripts/install_deps.py -o --include build
|
||||
python devscripts/install_deps.py
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.11.1-py3-none-any.whl"
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.13.0-py3-none-any.whl"
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
|
4
.github/workflows/core.yml
vendored
4
.github/workflows/core.yml
vendored
@ -6,7 +6,7 @@ on:
|
||||
- devscripts/**
|
||||
- test/**
|
||||
- yt_dlp/**.py
|
||||
- '!yt_dlp/extractor/*.py'
|
||||
- '!yt_dlp/extractor/**.py'
|
||||
- yt_dlp/extractor/__init__.py
|
||||
- yt_dlp/extractor/common.py
|
||||
- yt_dlp/extractor/extractors.py
|
||||
@ -16,7 +16,7 @@ on:
|
||||
- devscripts/**
|
||||
- test/**
|
||||
- yt_dlp/**.py
|
||||
- '!yt_dlp/extractor/*.py'
|
||||
- '!yt_dlp/extractor/**.py'
|
||||
- yt_dlp/extractor/__init__.py
|
||||
- yt_dlp/extractor/common.py
|
||||
- yt_dlp/extractor/extractors.py
|
||||
|
2
.github/workflows/quick-test.yml
vendored
2
.github/workflows/quick-test.yml
vendored
@ -38,3 +38,5 @@ jobs:
|
||||
run: ruff check --output-format github .
|
||||
- name: Run autopep8
|
||||
run: autopep8 --diff .
|
||||
- name: Check file mode
|
||||
run: git ls-files --format="%(objectmode) %(path)" yt_dlp/ | ( ! grep -v "^100644" )
|
||||
|
10
CONTRIBUTORS
10
CONTRIBUTORS
@ -760,3 +760,13 @@ vallovic
|
||||
arabcoders
|
||||
mireq
|
||||
mlabeeb03
|
||||
1271
|
||||
CasperMcFadden95
|
||||
Kicer86
|
||||
Kiritomo
|
||||
leeblackc
|
||||
meGAmeS1
|
||||
NeonMan
|
||||
pj47x
|
||||
troex
|
||||
WouterGordts
|
||||
|
79
Changelog.md
79
Changelog.md
@ -4,6 +4,85 @@ # Changelog
|
||||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||
-->
|
||||
|
||||
### 2025.04.30
|
||||
|
||||
#### Important changes
|
||||
- **New option `--preset-alias`/`-t` has been added**
|
||||
This provides convenient predefined aliases for common use cases. Available presets include `mp4`, `mp3`, `mkv`, `aac`, and `sleep`. See [the README](https://github.com/yt-dlp/yt-dlp/blob/master/README.md#preset-aliases) for more details.
|
||||
|
||||
#### Core changes
|
||||
- [Add `--preset-alias` option](https://github.com/yt-dlp/yt-dlp/commit/88eb1e7a9a2720ac89d653c0d0e40292388823bb) ([#12839](https://github.com/yt-dlp/yt-dlp/issues/12839)) by [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev)
|
||||
- **utils**
|
||||
- `_yield_json_ld`: [Make function less fatal](https://github.com/yt-dlp/yt-dlp/commit/45f01de00e1bc076b7f676a669736326178647b1) ([#12855](https://github.com/yt-dlp/yt-dlp/issues/12855)) by [seproDev](https://github.com/seproDev)
|
||||
- `url_or_none`: [Support WebSocket URLs](https://github.com/yt-dlp/yt-dlp/commit/a473e592337edb8ca40cde52c1fcaee261c54df9) ([#12848](https://github.com/yt-dlp/yt-dlp/issues/12848)) by [doe1080](https://github.com/doe1080)
|
||||
|
||||
#### Extractor changes
|
||||
- **abematv**: [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/f5736bb35bde62348caebf7b188668655e316deb) ([#12859](https://github.com/yt-dlp/yt-dlp/issues/12859)) by [Kiritomo](https://github.com/Kiritomo)
|
||||
- **atresplayer**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/839d64325356310e6de6cd9cad28fb546619ca63) ([#11424](https://github.com/yt-dlp/yt-dlp/issues/11424)) by [meGAmeS1](https://github.com/meGAmeS1), [seproDev](https://github.com/seproDev)
|
||||
- **bpb**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/80736b9c90818adee933a155079b8535bc06819f) ([#13015](https://github.com/yt-dlp/yt-dlp/issues/13015)) by [bashonly](https://github.com/bashonly)
|
||||
- **cda**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/9032f981362ea0be90626fab51ec37934feded6d) ([#12975](https://github.com/yt-dlp/yt-dlp/issues/12975)) by [bashonly](https://github.com/bashonly)
|
||||
- **cdafolder**: [Extend `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/cb271d445bc2d866c9a3404b1d8f59bcb77447df) ([#12919](https://github.com/yt-dlp/yt-dlp/issues/12919)) by [fireattack](https://github.com/fireattack), [Kicer86](https://github.com/Kicer86)
|
||||
- **crowdbunker**: [Make format extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/4ebf41309d04a6e196944f1c0f5f0154cff0055a) ([#12836](https://github.com/yt-dlp/yt-dlp/issues/12836)) by [seproDev](https://github.com/seproDev)
|
||||
- **dacast**: [Support tokenized URLs](https://github.com/yt-dlp/yt-dlp/commit/e7e3b7a55c456da4a5a812b4fefce4dce8e6a616) ([#12979](https://github.com/yt-dlp/yt-dlp/issues/12979)) by [bashonly](https://github.com/bashonly)
|
||||
- **dzen.ru**: [Rework extractors](https://github.com/yt-dlp/yt-dlp/commit/a3f2b54c2535d862de6efa9cfaa6ca9a2b2f7dd6) ([#12852](https://github.com/yt-dlp/yt-dlp/issues/12852)) by [seproDev](https://github.com/seproDev)
|
||||
- **generic**: [Fix MPD extraction for `file://` URLs](https://github.com/yt-dlp/yt-dlp/commit/34a061a295d156934417c67ee98070b94943006b) ([#12978](https://github.com/yt-dlp/yt-dlp/issues/12978)) by [bashonly](https://github.com/bashonly)
|
||||
- **getcourseru**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/741fd809bc4d301c19b53877692ae510334a6750) ([#12943](https://github.com/yt-dlp/yt-dlp/issues/12943)) by [troex](https://github.com/troex)
|
||||
- **ivoox**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/7faa18b83dcfc74a1a1e2034e6b0369c495ca645) ([#12768](https://github.com/yt-dlp/yt-dlp/issues/12768)) by [NeonMan](https://github.com/NeonMan), [seproDev](https://github.com/seproDev)
|
||||
- **kika**: [Add playlist extractor](https://github.com/yt-dlp/yt-dlp/commit/3c1c75ecb8ab352f422b59af46fff2be992e4115) ([#12832](https://github.com/yt-dlp/yt-dlp/issues/12832)) by [1100101](https://github.com/1100101)
|
||||
- **linkedin**
|
||||
- [Support feed URLs](https://github.com/yt-dlp/yt-dlp/commit/73a26f9ee68610e33c0b4407b77355f2ab7afd0e) ([#12927](https://github.com/yt-dlp/yt-dlp/issues/12927)) by [seproDev](https://github.com/seproDev)
|
||||
- events: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b37ff4de5baf4e4e70c6a0ec34e136a279ad20af) ([#12926](https://github.com/yt-dlp/yt-dlp/issues/12926)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||
- **loco**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f5a37ea40e20865b976ffeeff13eeae60292eb23) ([#12934](https://github.com/yt-dlp/yt-dlp/issues/12934)) by [seproDev](https://github.com/seproDev)
|
||||
- **lrtradio**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/74e90dd9b8f9c1a5c48a2515126654f4d398d687) ([#12801](https://github.com/yt-dlp/yt-dlp/issues/12801)) by [subrat-lima](https://github.com/subrat-lima)
|
||||
- **manyvids**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/77aa15e98f34c4ad425aabf39dd1ee37b48f772c) ([#10907](https://github.com/yt-dlp/yt-dlp/issues/10907)) by [pj47x](https://github.com/pj47x)
|
||||
- **mixcloud**: [Refactor extractor](https://github.com/yt-dlp/yt-dlp/commit/db6d1f145ad583e0220637726029f8f2fa6200a0) ([#12830](https://github.com/yt-dlp/yt-dlp/issues/12830)) by [seproDev](https://github.com/seproDev), [WouterGordts](https://github.com/WouterGordts)
|
||||
- **mlbtv**: [Fix device ID caching](https://github.com/yt-dlp/yt-dlp/commit/36da6360e130197df927ee93409519ce3f4075f5) ([#12980](https://github.com/yt-dlp/yt-dlp/issues/12980)) by [bashonly](https://github.com/bashonly)
|
||||
- **niconico**
|
||||
- [Fix login support](https://github.com/yt-dlp/yt-dlp/commit/25cd7c1ecbb6cbf21dd3a6e59608e4af94715ecc) ([#13008](https://github.com/yt-dlp/yt-dlp/issues/13008)) by [doe1080](https://github.com/doe1080)
|
||||
- [Remove DMC formats support](https://github.com/yt-dlp/yt-dlp/commit/7d05aa99c65352feae1cd9a3ff8784b64bfe382a) ([#12916](https://github.com/yt-dlp/yt-dlp/issues/12916)) by [doe1080](https://github.com/doe1080)
|
||||
- live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1d45e30537bf83e069184a440703e4c43b2e0198) ([#12809](https://github.com/yt-dlp/yt-dlp/issues/12809)) by [Snack-X](https://github.com/Snack-X)
|
||||
- **panopto**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/9d26daa04ad5108257bc5e30f7f040c7f1fe7a5a) ([#12925](https://github.com/yt-dlp/yt-dlp/issues/12925)) by [seproDev](https://github.com/seproDev)
|
||||
- **parti**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/425017531fbc3369becb5a44013e26f26efabf45) ([#12769](https://github.com/yt-dlp/yt-dlp/issues/12769)) by [benfaerber](https://github.com/benfaerber)
|
||||
- **raiplay**: [Fix DRM detection](https://github.com/yt-dlp/yt-dlp/commit/dce82346245e35a46fda836ca2089805d2347935) ([#12971](https://github.com/yt-dlp/yt-dlp/issues/12971)) by [DTrombett](https://github.com/DTrombett)
|
||||
- **reddit**: [Support `--ignore-no-formats-error`](https://github.com/yt-dlp/yt-dlp/commit/28f04e8a5e383ff531db646190b4be45554610d6) ([#12993](https://github.com/yt-dlp/yt-dlp/issues/12993)) by [bashonly](https://github.com/bashonly)
|
||||
- **royalive**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e1847535e28788414a25546a45bebcada2f34558) ([#12817](https://github.com/yt-dlp/yt-dlp/issues/12817)) by [CasperMcFadden95](https://github.com/CasperMcFadden95)
|
||||
- **rtve**: [Rework extractors](https://github.com/yt-dlp/yt-dlp/commit/f07ee91c71920ab1187a7ea756720e81aa406a9d) ([#10388](https://github.com/yt-dlp/yt-dlp/issues/10388)) by [meGAmeS1](https://github.com/meGAmeS1), [seproDev](https://github.com/seproDev)
|
||||
- **rumble**: [Improve format extraction](https://github.com/yt-dlp/yt-dlp/commit/58d0c83457b93b3c9a81eb6bc5a4c65f25e949df) ([#12838](https://github.com/yt-dlp/yt-dlp/issues/12838)) by [seproDev](https://github.com/seproDev)
|
||||
- **tokfmpodcast**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/91832111a12d87499294a0f430829b8c2254c339) ([#12842](https://github.com/yt-dlp/yt-dlp/issues/12842)) by [selfisekai](https://github.com/selfisekai)
|
||||
- **tv2dk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a3e91df30a45943f40759d2c1e0b6c2ca4b2a263) ([#12945](https://github.com/yt-dlp/yt-dlp/issues/12945)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||
- **tvp**: vod: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/4e69a626cce51428bc1d66dc606a56d9498b03a5) ([#12923](https://github.com/yt-dlp/yt-dlp/issues/12923)) by [seproDev](https://github.com/seproDev)
|
||||
- **tvw**: tvchannels: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ed8ad1b4d6b9d7a1426ff5192ff924f3371e4721) ([#12721](https://github.com/yt-dlp/yt-dlp/issues/12721)) by [fries1234](https://github.com/fries1234)
|
||||
- **twitcasting**: [Fix livestream extraction](https://github.com/yt-dlp/yt-dlp/commit/de271a06fd6d20d4f55597ff7f90e4d913de0a52) ([#12977](https://github.com/yt-dlp/yt-dlp/issues/12977)) by [bashonly](https://github.com/bashonly)
|
||||
- **twitch**: clips: [Fix uploader metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/1ae6bff564a65af41e94f1a4727892471ecdd05a) ([#13022](https://github.com/yt-dlp/yt-dlp/issues/13022)) by [1271](https://github.com/1271)
|
||||
- **twitter**
|
||||
- [Fix extraction when logged-in](https://github.com/yt-dlp/yt-dlp/commit/1cf39ddf3d10b6512daa7dd139e5f6c0dc548bbc) ([#13024](https://github.com/yt-dlp/yt-dlp/issues/13024)) by [bashonly](https://github.com/bashonly)
|
||||
- spaces: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/70599e53b736bb75922b737e6e0d4f76e419bb20) ([#12911](https://github.com/yt-dlp/yt-dlp/issues/12911)) by [doe1080](https://github.com/doe1080)
|
||||
- **vimeo**: [Extract from mobile API](https://github.com/yt-dlp/yt-dlp/commit/22ac81a0692019ac833cf282e4ef99718e9ef3fa) ([#13034](https://github.com/yt-dlp/yt-dlp/issues/13034)) by [bashonly](https://github.com/bashonly)
|
||||
- **vk**
|
||||
- [Fix chapters extraction](https://github.com/yt-dlp/yt-dlp/commit/5361a7c6e2933c919716e0cb1e3116c28c40419f) ([#12821](https://github.com/yt-dlp/yt-dlp/issues/12821)) by [seproDev](https://github.com/seproDev)
|
||||
- [Fix uploader extraction](https://github.com/yt-dlp/yt-dlp/commit/2381881fe58a723853350a6ab750a5efc9f10c85) ([#12985](https://github.com/yt-dlp/yt-dlp/issues/12985)) by [seproDev](https://github.com/seproDev)
|
||||
- **youtube**
|
||||
- [Add context to video request rate limit error](https://github.com/yt-dlp/yt-dlp/commit/26feac3dd142536ad08ad1ed731378cb88e63602) ([#12958](https://github.com/yt-dlp/yt-dlp/issues/12958)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Add extractor arg to skip "initial_data" request](https://github.com/yt-dlp/yt-dlp/commit/ed6c6d7eefbc78fa72e4e60ad6edaa3ee2acc715) ([#12865](https://github.com/yt-dlp/yt-dlp/issues/12865)) by [leeblackc](https://github.com/leeblackc)
|
||||
- [Add warning on video captcha challenge](https://github.com/yt-dlp/yt-dlp/commit/f484c51599a6cd01eb078ea7dc9bbba942967774) ([#12939](https://github.com/yt-dlp/yt-dlp/issues/12939)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Cache signature timestamps](https://github.com/yt-dlp/yt-dlp/commit/61c9a938b390b8334ee3a879fe2d93f714e30138) ([#13047](https://github.com/yt-dlp/yt-dlp/issues/13047)) by [bashonly](https://github.com/bashonly)
|
||||
- [Detect and warn when account cookies are rotated](https://github.com/yt-dlp/yt-dlp/commit/8cb08028f5be2acb9835ce1670b196b9b077052f) ([#13014](https://github.com/yt-dlp/yt-dlp/issues/13014)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Detect player JS variants for any locale](https://github.com/yt-dlp/yt-dlp/commit/c2d6659d1069f8cff97e1fd61d1c59e949e1e63d) ([#13003](https://github.com/yt-dlp/yt-dlp/issues/13003)) by [bashonly](https://github.com/bashonly)
|
||||
- [Do not strictly deprioritize `missing_pot` formats](https://github.com/yt-dlp/yt-dlp/commit/74fc2ae12c24eb6b4e02c6360c89bd05f3c8f740) ([#13061](https://github.com/yt-dlp/yt-dlp/issues/13061)) by [bashonly](https://github.com/bashonly)
|
||||
- [Improve warning for SABR-only/SSAP player responses](https://github.com/yt-dlp/yt-dlp/commit/fd8394bc50301ac5e930aa65aa71ab1b8372b8ab) ([#13049](https://github.com/yt-dlp/yt-dlp/issues/13049)) by [bashonly](https://github.com/bashonly)
|
||||
- tab: [Extract continuation from empty page](https://github.com/yt-dlp/yt-dlp/commit/72ba4879304c2082fecbb472e6cc05ee2d154a3b) ([#12938](https://github.com/yt-dlp/yt-dlp/issues/12938)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- **zdf**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/7be14109a6bd493a2e881da4f9e30adaf3e7e5d5) ([#12779](https://github.com/yt-dlp/yt-dlp/issues/12779)) by [bashonly](https://github.com/bashonly), [InvalidUsernameException](https://github.com/InvalidUsernameException)
|
||||
|
||||
#### Downloader changes
|
||||
- **niconicodmc**: [Remove downloader](https://github.com/yt-dlp/yt-dlp/commit/8d127b18f81131453eaba05d3bb810d9b73adb75) ([#12916](https://github.com/yt-dlp/yt-dlp/issues/12916)) by [doe1080](https://github.com/doe1080)
|
||||
|
||||
#### Networking changes
|
||||
- [Add PATCH request shortcut](https://github.com/yt-dlp/yt-dlp/commit/ceab4d5ed63a1f135a1816fe967c9d9a1ec7e6e8) ([#12884](https://github.com/yt-dlp/yt-dlp/issues/12884)) by [doe1080](https://github.com/doe1080)
|
||||
|
||||
#### Misc. changes
|
||||
- **ci**: [Add file mode test to code check](https://github.com/yt-dlp/yt-dlp/commit/3690e91265d1d0bbeffaf6a9b8cc9baded1367bd) ([#13036](https://github.com/yt-dlp/yt-dlp/issues/13036)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **cleanup**: Miscellaneous: [505b400](https://github.com/yt-dlp/yt-dlp/commit/505b400795af557bdcfd9d4fa7e9133b26ef431c) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||
|
||||
### 2025.03.31
|
||||
|
||||
#### Core changes
|
||||
|
30
README.md
30
README.md
@ -386,6 +386,12 @@ ## General Options:
|
||||
recursive options. As a safety measure, each
|
||||
alias may be triggered a maximum of 100
|
||||
times. This option can be used multiple times
|
||||
-t, --preset-alias PRESET Applies a predefined set of options. e.g.
|
||||
--preset-alias mp3. The following presets
|
||||
are available: mp3, aac, mp4, mkv, sleep.
|
||||
See the "Preset Aliases" section at the end
|
||||
for more info. This option can be used
|
||||
multiple times
|
||||
|
||||
## Network Options:
|
||||
--proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. To
|
||||
@ -1098,6 +1104,23 @@ ## Extractor Options:
|
||||
can use this option multiple times to give
|
||||
arguments for different extractors
|
||||
|
||||
## Preset Aliases:
|
||||
-t mp3 -f 'ba[acodec^=mp3]/ba/b' -x --audio-format
|
||||
mp3
|
||||
|
||||
-t aac -f
|
||||
'ba[acodec^=aac]/ba[acodec^=mp4a.40.]/ba/b'
|
||||
-x --audio-format aac
|
||||
|
||||
-t mp4 --merge-output-format mp4 --remux-video mp4
|
||||
-S vcodec:h264,lang,quality,res,fps,hdr:12,a
|
||||
codec:aac
|
||||
|
||||
-t mkv --merge-output-format mkv --remux-video mkv
|
||||
|
||||
-t sleep --sleep-subtitles 5 --sleep-requests 0.75
|
||||
--sleep-interval 10 --max-sleep-interval 20
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
You can configure yt-dlp by placing any supported command line option in a configuration file. The configuration is loaded from the following locations:
|
||||
@ -1769,7 +1792,7 @@ # EXTRACTOR ARGUMENTS
|
||||
#### youtube
|
||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
||||
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
|
||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
@ -1799,9 +1822,6 @@ #### generic
|
||||
#### vikichannel
|
||||
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
|
||||
|
||||
#### niconico
|
||||
* `segment_duration`: Segment duration in milliseconds for HLS-DMC formats. Use it at your own risk since this feature **may result in your account termination.**
|
||||
|
||||
#### youtubewebarchive
|
||||
* `check_all`: Try to check more at the cost of more requests. One or more of `thumbnails`, `captures`
|
||||
|
||||
@ -2153,7 +2173,7 @@ ### New features
|
||||
|
||||
* **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples))
|
||||
|
||||
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
||||
* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details.
|
||||
|
||||
* **YouTube improvements**:
|
||||
* Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`)
|
||||
|
@ -245,5 +245,14 @@
|
||||
"when": "76ac023ff02f06e8c003d104f02a03deeddebdcd",
|
||||
"short": "[ie/youtube:tab] Improve shorts title extraction (#11997)",
|
||||
"authors": ["bashonly", "d3d9"]
|
||||
},
|
||||
{
|
||||
"action": "add",
|
||||
"when": "88eb1e7a9a2720ac89d653c0d0e40292388823bb",
|
||||
"short": "[priority] **New option `--preset-alias`/`-t` has been added**\nThis provides convenient predefined aliases for common use cases. Available presets include `mp4`, `mp3`, `mkv`, `aac`, and `sleep`. See [the README](https://github.com/yt-dlp/yt-dlp/blob/master/README.md#preset-aliases) for more details."
|
||||
},
|
||||
{
|
||||
"action": "remove",
|
||||
"when": "d596824c2f8428362c072518856065070616e348"
|
||||
}
|
||||
]
|
||||
|
@ -82,7 +82,7 @@ test = [
|
||||
"pytest-rerunfailures~=14.0",
|
||||
]
|
||||
pyinstaller = [
|
||||
"pyinstaller>=6.11.1", # Windows temp cleanup fixed in 6.11.1
|
||||
"pyinstaller>=6.13.0", # Windows temp cleanup fixed in 6.13.0
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
@ -394,6 +394,8 @@ # Supported sites
|
||||
- **dvtv**: http://video.aktualne.cz/
|
||||
- **dw**: (**Currently broken**)
|
||||
- **dw:article**: (**Currently broken**)
|
||||
- **dzen.ru**: Дзен (dzen) formerly Яндекс.Дзен (Yandex Zen)
|
||||
- **dzen.ru:channel**
|
||||
- **EaglePlatform**
|
||||
- **EbaumsWorld**
|
||||
- **Ebay**
|
||||
@ -634,6 +636,7 @@ # Supported sites
|
||||
- **ivi**: ivi.ru
|
||||
- **ivi:compilation**: ivi.ru compilations
|
||||
- **ivideon**: Ivideon TV
|
||||
- **Ivoox**
|
||||
- **IVXPlayer**
|
||||
- **iwara**: [*iwara*](## "netrc machine")
|
||||
- **iwara:playlist**: [*iwara*](## "netrc machine")
|
||||
@ -671,6 +674,7 @@ # Supported sites
|
||||
- **Kicker**
|
||||
- **KickStarter**
|
||||
- **Kika**: KiKA.de
|
||||
- **KikaPlaylist**
|
||||
- **kinja:embed**
|
||||
- **KinoPoisk**
|
||||
- **Kommunetv**
|
||||
@ -723,6 +727,7 @@ # Supported sites
|
||||
- **limelight:channel**
|
||||
- **limelight:channel_list**
|
||||
- **LinkedIn**: [*linkedin*](## "netrc machine")
|
||||
- **linkedin:events**: [*linkedin*](## "netrc machine")
|
||||
- **linkedin:learning**: [*linkedin*](## "netrc machine")
|
||||
- **linkedin:learning:course**: [*linkedin*](## "netrc machine")
|
||||
- **Liputan6**
|
||||
@ -738,6 +743,7 @@ # Supported sites
|
||||
- **loom**
|
||||
- **loom:folder**
|
||||
- **LoveHomePorn**
|
||||
- **LRTRadio**
|
||||
- **LRTStream**
|
||||
- **LRTVOD**
|
||||
- **LSMLREmbed**
|
||||
@ -759,7 +765,7 @@ # Supported sites
|
||||
- **ManotoTV**: Manoto TV (Episode)
|
||||
- **ManotoTVLive**: Manoto TV (Live)
|
||||
- **ManotoTVShow**: Manoto TV (Show)
|
||||
- **ManyVids**: (**Currently broken**)
|
||||
- **ManyVids**
|
||||
- **MaoriTV**
|
||||
- **Markiza**: (**Currently broken**)
|
||||
- **MarkizaPage**: (**Currently broken**)
|
||||
@ -946,7 +952,7 @@ # Supported sites
|
||||
- **nickelodeonru**
|
||||
- **niconico**: [*niconico*](## "netrc machine") ニコニコ動画
|
||||
- **niconico:history**: NicoNico user history or likes. Requires cookies.
|
||||
- **niconico:live**: ニコニコ生放送
|
||||
- **niconico:live**: [*niconico*](## "netrc machine") ニコニコ生放送
|
||||
- **niconico:playlist**
|
||||
- **niconico:series**
|
||||
- **niconico:tag**: NicoNico video tag URLs
|
||||
@ -1053,6 +1059,8 @@ # Supported sites
|
||||
- **Parler**: Posts on parler.com
|
||||
- **parliamentlive.tv**: UK parliament videos
|
||||
- **Parlview**: (**Currently broken**)
|
||||
- **parti:livestream**
|
||||
- **parti:video**
|
||||
- **patreon**
|
||||
- **patreon:campaign**
|
||||
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
|
||||
@ -1227,6 +1235,7 @@ # Supported sites
|
||||
- **RoosterTeeth**: [*roosterteeth*](## "netrc machine")
|
||||
- **RoosterTeethSeries**: [*roosterteeth*](## "netrc machine")
|
||||
- **RottenTomatoes**
|
||||
- **RoyaLive**
|
||||
- **Rozhlas**
|
||||
- **RozhlasVltava**
|
||||
- **RTBF**: [*rtbf*](## "netrc machine") (**Currently broken**)
|
||||
@ -1247,9 +1256,8 @@ # Supported sites
|
||||
- **RTVCKaltura**
|
||||
- **RTVCPlay**
|
||||
- **RTVCPlayEmbed**
|
||||
- **rtve.es:alacarta**: RTVE a la carta
|
||||
- **rtve.es:alacarta**: RTVE a la carta and Play
|
||||
- **rtve.es:audio**: RTVE audio
|
||||
- **rtve.es:infantil**: RTVE infantil
|
||||
- **rtve.es:live**: RTVE.es live streams
|
||||
- **rtve.es:television**
|
||||
- **rtvslo.si**
|
||||
@ -1562,7 +1570,8 @@ # Supported sites
|
||||
- **tvp:vod:series**
|
||||
- **TVPlayer**
|
||||
- **TVPlayHome**
|
||||
- **Tvw**
|
||||
- **tvw**
|
||||
- **tvw:tvchannels**
|
||||
- **Tweakers**
|
||||
- **TwitCasting**
|
||||
- **TwitCastingLive**
|
||||
@ -1821,14 +1830,12 @@ # Supported sites
|
||||
- **ZattooLive**: [*zattoo*](## "netrc machine")
|
||||
- **ZattooMovies**: [*zattoo*](## "netrc machine")
|
||||
- **ZattooRecordings**: [*zattoo*](## "netrc machine")
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **zdf**
|
||||
- **zdf:channel**
|
||||
- **Zee5**: [*zee5*](## "netrc machine")
|
||||
- **zee5:series**
|
||||
- **ZeeNews**: (**Currently broken**)
|
||||
- **ZenPorn**
|
||||
- **ZenYandex**
|
||||
- **ZenYandexChannel**
|
||||
- **ZetlandDKArticle**
|
||||
- **Zhihu**
|
||||
- **zingmp3**: zingmp3.vn
|
||||
|
@ -30,7 +30,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
|
||||
from .http import HttpFD
|
||||
from .ism import IsmFD
|
||||
from .mhtml import MhtmlFD
|
||||
from .niconico import NiconicoDmcFD, NiconicoLiveFD
|
||||
from .niconico import NiconicoLiveFD
|
||||
from .rtmp import RtmpFD
|
||||
from .rtsp import RtspFD
|
||||
from .websocket import WebSocketFragmentFD
|
||||
@ -50,7 +50,6 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
|
||||
'http_dash_segments_generator': DashSegmentsFD,
|
||||
'ism': IsmFD,
|
||||
'mhtml': MhtmlFD,
|
||||
'niconico_dmc': NiconicoDmcFD,
|
||||
'niconico_live': NiconicoLiveFD,
|
||||
'fc2_live': FC2LiveFD,
|
||||
'websocket_frag': WebSocketFragmentFD,
|
||||
@ -67,7 +66,6 @@ def shorten_protocol_name(proto, simplify=False):
|
||||
'rtmp_ffmpeg': 'rtmpF',
|
||||
'http_dash_segments': 'dash',
|
||||
'http_dash_segments_generator': 'dashG',
|
||||
'niconico_dmc': 'dmc',
|
||||
'websocket_frag': 'WSfrag',
|
||||
}
|
||||
if simplify:
|
||||
|
@ -2,60 +2,12 @@
|
||||
import threading
|
||||
import time
|
||||
|
||||
from . import get_suitable_downloader
|
||||
from .common import FileDownloader
|
||||
from .external import FFmpegFD
|
||||
from ..networking import Request
|
||||
from ..utils import DownloadError, str_or_none, try_get
|
||||
|
||||
|
||||
class NiconicoDmcFD(FileDownloader):
|
||||
""" Downloading niconico douga from DMC with heartbeat """
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
from ..extractor.niconico import NiconicoIE
|
||||
|
||||
self.to_screen(f'[{self.FD_NAME}] Downloading from DMC')
|
||||
ie = NiconicoIE(self.ydl)
|
||||
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
|
||||
|
||||
fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params)
|
||||
|
||||
success = download_complete = False
|
||||
timer = [None]
|
||||
heartbeat_lock = threading.Lock()
|
||||
heartbeat_url = heartbeat_info_dict['url']
|
||||
heartbeat_data = heartbeat_info_dict['data'].encode()
|
||||
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
|
||||
|
||||
request = Request(heartbeat_url, heartbeat_data)
|
||||
|
||||
def heartbeat():
|
||||
try:
|
||||
self.ydl.urlopen(request).read()
|
||||
except Exception:
|
||||
self.to_screen(f'[{self.FD_NAME}] Heartbeat failed')
|
||||
|
||||
with heartbeat_lock:
|
||||
if not download_complete:
|
||||
timer[0] = threading.Timer(heartbeat_interval, heartbeat)
|
||||
timer[0].start()
|
||||
|
||||
heartbeat_info_dict['ping']()
|
||||
self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
|
||||
try:
|
||||
heartbeat()
|
||||
if type(fd).__name__ == 'HlsFD':
|
||||
info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
|
||||
success = fd.real_download(filename, info_dict)
|
||||
finally:
|
||||
if heartbeat_lock:
|
||||
with heartbeat_lock:
|
||||
timer[0].cancel()
|
||||
download_complete = True
|
||||
return success
|
||||
|
||||
|
||||
class NiconicoLiveFD(FileDownloader):
|
||||
""" Downloads niconico live without being stopped """
|
||||
|
||||
|
@ -338,7 +338,6 @@
|
||||
from .canalplus import CanalplusIE
|
||||
from .canalsurmas import CanalsurmasIE
|
||||
from .caracoltv import CaracolTvPlayIE
|
||||
from .cartoonnetwork import CartoonNetworkIE
|
||||
from .cbc import (
|
||||
CBCIE,
|
||||
CBCGemIE,
|
||||
@ -929,7 +928,10 @@
|
||||
)
|
||||
from .jiosaavn import (
|
||||
JioSaavnAlbumIE,
|
||||
JioSaavnArtistIE,
|
||||
JioSaavnPlaylistIE,
|
||||
JioSaavnShowIE,
|
||||
JioSaavnShowPlaylistIE,
|
||||
JioSaavnSongIE,
|
||||
)
|
||||
from .joj import JojIE
|
||||
@ -1042,6 +1044,7 @@
|
||||
LimelightMediaIE,
|
||||
)
|
||||
from .linkedin import (
|
||||
LinkedInEventsIE,
|
||||
LinkedInIE,
|
||||
LinkedInLearningCourseIE,
|
||||
LinkedInLearningIE,
|
||||
@ -1963,7 +1966,6 @@
|
||||
SpreakerShowIE,
|
||||
)
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
from .sproutvideo import (
|
||||
SproutVideoIE,
|
||||
VidsIoIE,
|
||||
|
@ -1,32 +1,24 @@
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
try_get,
|
||||
update_url_query,
|
||||
)
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
|
||||
class AMCNetworksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/?#]+)+)/[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
|
||||
'url': 'https://www.amc.com/shows/dark-winds/videos/dark-winds-a-look-at-season-3--1072027',
|
||||
'info_dict': {
|
||||
'id': '4Lq1dzOnZGt0',
|
||||
'id': '6369261343112',
|
||||
'ext': 'mp4',
|
||||
'title': "The Graham Norton Show - Season 28 - Tina Fey's Adorable Airline-Themed Family Dinner",
|
||||
'description': "It turns out child stewardesses are very generous with the wine! All-new episodes of 'The Graham Norton Show' premiere Fridays at 11/10c on BBC America.",
|
||||
'upload_date': '20201120',
|
||||
'timestamp': 1605904350,
|
||||
'uploader': 'AMCN',
|
||||
'title': 'Dark Winds: A Look at Season 3',
|
||||
'uploader_id': '6240731308001',
|
||||
'duration': 176.427,
|
||||
'thumbnail': r're:https://[^/]+\.boltdns\.net/.+/image\.jpg',
|
||||
'tags': [],
|
||||
'timestamp': 1740414792,
|
||||
'upload_date': '20250224',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||
'only_matching': True,
|
||||
@ -52,96 +44,18 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_REQUESTOR_ID_MAP = {
|
||||
'amc': 'AMC',
|
||||
'bbcamerica': 'BBCA',
|
||||
'ifc': 'IFC',
|
||||
'sundancetv': 'SUNDANCE',
|
||||
'wetv': 'WETV',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, display_id = self._match_valid_url(url).groups()
|
||||
requestor_id = self._REQUESTOR_ID_MAP[site]
|
||||
page_data = self._download_json(
|
||||
f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}',
|
||||
display_id)['data']
|
||||
properties = page_data.get('properties') or {}
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
initial_data = self._search_json(
|
||||
r'window\.initialData\s*=\s*JSON\.parse\(String\.raw`', webpage, 'initial data', display_id)
|
||||
video_id = traverse_obj(initial_data, ('initialData', 'properties', 'videoId', {str}))
|
||||
if not video_id: # All locked videos are now DRM-protected
|
||||
self.report_drm(display_id)
|
||||
account_id = initial_data['config']['brightcove']['accountId']
|
||||
player_id = initial_data['config']['brightcove']['playerId']
|
||||
|
||||
video_player_count = 0
|
||||
try:
|
||||
for v in page_data['children']:
|
||||
if v.get('type') == 'video-player':
|
||||
release_pid = v['properties']['currentVideo']['meta']['releasePid']
|
||||
tp_path = 'M_UwQC/' + release_pid
|
||||
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
video_player_count += 1
|
||||
except KeyError:
|
||||
pass
|
||||
if video_player_count > 1:
|
||||
self.report_warning(
|
||||
f'The JSON data has {video_player_count} video players. Only one will be extracted')
|
||||
|
||||
# Fall back to videoPid if releasePid not found.
|
||||
# TODO: Fall back to videoPid if releasePid manifest uses DRM.
|
||||
if not video_player_count:
|
||||
tp_path = 'M_UwQC/media/' + properties['videoPid']
|
||||
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
|
||||
theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
|
||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||
video_id = theplatform_metadata['pid']
|
||||
title = theplatform_metadata['title']
|
||||
rating = try_get(
|
||||
theplatform_metadata, lambda x: x['ratings'][0]['rating'])
|
||||
video_category = properties.get('videoCategory')
|
||||
if video_category and video_category.endswith('-Auth'):
|
||||
resource = self._get_mvpd_resource(
|
||||
requestor_id, title, video_id, rating)
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
media_url = update_url_query(media_url, query)
|
||||
formats, subtitles = self._extract_theplatform_smil(
|
||||
media_url, video_id)
|
||||
|
||||
thumbnails = []
|
||||
thumbnail_urls = [properties.get('imageDesktop')]
|
||||
if 'thumbnail' in info:
|
||||
thumbnail_urls.append(info.pop('thumbnail'))
|
||||
for thumbnail_url in thumbnail_urls:
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
mobj = re.search(r'(\d+)x(\d+)', thumbnail_url)
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': int(mobj.group(1)) if mobj else None,
|
||||
'height': int(mobj.group(2)) if mobj else None,
|
||||
})
|
||||
|
||||
info.update({
|
||||
'age_limit': parse_age_limit(rating),
|
||||
'formats': formats,
|
||||
'id': video_id,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': thumbnails,
|
||||
})
|
||||
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
|
||||
if ns_keys:
|
||||
ns = next(iter(ns_keys))
|
||||
episode = theplatform_metadata.get(ns + '$episodeTitle') or None
|
||||
episode_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$episode'))
|
||||
season_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$season'))
|
||||
series = theplatform_metadata.get(ns + '$show') or None
|
||||
info.update({
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'season_number': season_number,
|
||||
'series': series,
|
||||
})
|
||||
return info
|
||||
return self.url_result(
|
||||
f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}',
|
||||
BrightcoveNewIE, video_id)
|
||||
|
@ -1,30 +1,32 @@
|
||||
import functools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
determine_ext,
|
||||
format_field,
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
get_element_html_by_class,
|
||||
get_elements_html_by_class,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_count,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
unified_strdate,
|
||||
parse_iso8601,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BitChuteIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/?#]+)/(?P<id>[^/?#&]+)'
|
||||
_EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
|
||||
@ -34,12 +36,17 @@ class BitChuteIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'This is the first video on #BitChute !',
|
||||
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg$',
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20170103',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||
'channel': 'BitChute',
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
'uploader_id': 'I5NgtHZn9vPj',
|
||||
'channel_id': '1VBwRfyNcKdX',
|
||||
'view_count': int,
|
||||
'duration': 16.0,
|
||||
'timestamp': 1483425443,
|
||||
},
|
||||
}, {
|
||||
# test case: video with different channel and uploader
|
||||
@ -49,13 +56,18 @@ class BitChuteIE(InfoExtractor):
|
||||
'id': 'Yti_j9A-UZ4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Israel at War | Full Measure',
|
||||
'description': 'md5:38cf7bc6f42da1a877835539111c69ef',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:e60198b89971966d6030d22b3268f08f',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg$',
|
||||
'uploader': 'sharylattkisson',
|
||||
'upload_date': '20231106',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
|
||||
'channel': 'Full Measure with Sharyl Attkisson',
|
||||
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/',
|
||||
'uploader_id': '9K0kUWA9zmd9',
|
||||
'channel_id': 'NpdxoCRv3ZLb',
|
||||
'view_count': int,
|
||||
'duration': 554.0,
|
||||
'timestamp': 1699296106,
|
||||
},
|
||||
}, {
|
||||
# video not downloadable in browser, but we can recover it
|
||||
@ -66,25 +78,21 @@ class BitChuteIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'filesize': 71537926,
|
||||
'title': 'STYXHEXENHAMMER666 - Election Fraud, Clinton 2020, EU Armies, and Gun Control',
|
||||
'description': 'md5:228ee93bd840a24938f536aeac9cf749',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:2029c7c212ccd4b040f52bb2d036ef4e',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg$',
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20181113',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||
'channel': 'BitChute',
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
'uploader_id': 'I5NgtHZn9vPj',
|
||||
'channel_id': '1VBwRfyNcKdX',
|
||||
'view_count': int,
|
||||
'duration': 1701.0,
|
||||
'tags': ['bitchute'],
|
||||
'timestamp': 1542130287,
|
||||
},
|
||||
'params': {'check_formats': None},
|
||||
}, {
|
||||
# restricted video
|
||||
'url': 'https://www.bitchute.com/video/WEnQU7XGcTdl/',
|
||||
'info_dict': {
|
||||
'id': 'WEnQU7XGcTdl',
|
||||
'ext': 'mp4',
|
||||
'title': 'Impartial Truth - Ein Letzter Appell an die Vernunft',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'Georestricted in DE',
|
||||
}, {
|
||||
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
|
||||
'only_matching': True,
|
||||
@ -96,11 +104,8 @@ class BitChuteIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
||||
'Referer': 'https://www.bitchute.com/',
|
||||
}
|
||||
_UPLOADER_URL_TMPL = 'https://www.bitchute.com/profile/%s/'
|
||||
_CHANNEL_URL_TMPL = 'https://www.bitchute.com/channel/%s/'
|
||||
|
||||
def _check_format(self, video_url, video_id):
|
||||
urls = orderedSet(
|
||||
@ -112,7 +117,7 @@ def _check_format(self, video_url, video_id):
|
||||
for url in urls:
|
||||
try:
|
||||
response = self._request_webpage(
|
||||
HEADRequest(url), video_id=video_id, note=f'Checking {url}', headers=self._HEADERS)
|
||||
HEADRequest(url), video_id=video_id, note=f'Checking {url}')
|
||||
except ExtractorError as e:
|
||||
self.to_screen(f'{video_id}: URL is invalid, skipping: {e.cause}')
|
||||
continue
|
||||
@ -121,54 +126,79 @@ def _check_format(self, video_url, video_id):
|
||||
'filesize': int_or_none(response.headers.get('Content-Length')),
|
||||
}
|
||||
|
||||
def _raise_if_restricted(self, webpage):
|
||||
page_title = clean_html(get_element_by_class('page-title', webpage)) or ''
|
||||
if re.fullmatch(r'(?:Channel|Video) Restricted', page_title):
|
||||
reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title
|
||||
self.raise_geo_restricted(reason)
|
||||
|
||||
@staticmethod
|
||||
def _make_url(html):
|
||||
path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href')
|
||||
return urljoin('https://www.bitchute.com', path)
|
||||
def _call_api(self, endpoint, data, display_id, fatal=True):
|
||||
note = endpoint.rpartition('/')[2]
|
||||
try:
|
||||
return self._download_json(
|
||||
f'https://api.bitchute.com/api/beta/{endpoint}', display_id,
|
||||
f'Downloading {note} API JSON', f'Unable to download {note} API JSON',
|
||||
data=json.dumps(data).encode(),
|
||||
headers={
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
errors = '. '.join(traverse_obj(e.cause.response.read().decode(), (
|
||||
{json.loads}, 'errors', lambda _, v: v['context'] == 'reason', 'message', {str})))
|
||||
if errors and 'location' in errors:
|
||||
# Can always be fatal since the video/media call will reach this code first
|
||||
self.raise_geo_restricted(errors)
|
||||
if fatal:
|
||||
raise
|
||||
self.report_warning(e.msg)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
|
||||
|
||||
self._raise_if_restricted(webpage)
|
||||
publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||
data = {'video_id': video_id}
|
||||
media_url = self._call_api('video/media', data, video_id)['media_url']
|
||||
|
||||
formats = []
|
||||
for format_ in traverse_obj(entries, (0, 'formats', ...)):
|
||||
if determine_ext(media_url) == 'm3u8':
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls', live=True))
|
||||
else:
|
||||
if self.get_param('check_formats') is not False:
|
||||
format_.update(self._check_format(format_.pop('url'), video_id) or {})
|
||||
if 'url' not in format_:
|
||||
continue
|
||||
formats.append(format_)
|
||||
if fmt := self._check_format(media_url, video_id):
|
||||
formats.append(fmt)
|
||||
else:
|
||||
formats.append({'url': media_url})
|
||||
|
||||
if not formats:
|
||||
self.raise_no_formats(
|
||||
'Video is unavailable. Please make sure this video is playable in the browser '
|
||||
'before reporting this issue.', expected=True, video_id=video_id)
|
||||
|
||||
details = get_element_by_class('details', webpage) or ''
|
||||
uploader_html = get_element_html_by_class('creator', details) or ''
|
||||
channel_html = get_element_html_by_class('name', details) or ''
|
||||
video = self._call_api('video', data, video_id, fatal=False)
|
||||
channel = None
|
||||
if channel_id := traverse_obj(video, ('channel', 'channel_id', {str})):
|
||||
channel = self._call_api('channel', {'channel_id': channel_id}, video_id, fatal=False)
|
||||
|
||||
return {
|
||||
**traverse_obj(video, {
|
||||
'title': ('video_name', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
'channel': ('channel', 'channel_name', {str}),
|
||||
'channel_id': ('channel', 'channel_id', {str}),
|
||||
'channel_url': ('channel', 'channel_url', {urljoin('https://www.bitchute.com/')}),
|
||||
'uploader_id': ('profile_id', {str}),
|
||||
'uploader_url': ('profile_id', {format_field(template=self._UPLOADER_URL_TMPL)}, filter),
|
||||
'timestamp': ('date_published', {parse_iso8601}),
|
||||
'duration': ('duration', {parse_duration}),
|
||||
'tags': ('hashtags', ..., {str}, filter, all, filter),
|
||||
'view_count': ('view_count', {int_or_none}),
|
||||
'is_live': ('state_id', {lambda x: x == 'live'}),
|
||||
}),
|
||||
**traverse_obj(channel, {
|
||||
'channel': ('channel_name', {str}),
|
||||
'channel_id': ('channel_id', {str}),
|
||||
'channel_url': ('url_slug', {format_field(template=self._CHANNEL_URL_TMPL)}, filter),
|
||||
'uploader': ('profile_name', {str}),
|
||||
'uploader_id': ('profile_id', {str}),
|
||||
'uploader_url': ('profile_id', {format_field(template=self._UPLOADER_URL_TMPL)}, filter),
|
||||
}),
|
||||
'id': video_id,
|
||||
'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'uploader': clean_html(uploader_html),
|
||||
'uploader_url': self._make_url(uploader_html),
|
||||
'channel': clean_html(channel_html),
|
||||
'channel_url': self._make_url(channel_html),
|
||||
'upload_date': unified_strdate(self._search_regex(
|
||||
r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@ -190,7 +220,7 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'This is the first video on #BitChute !',
|
||||
'description': 'md5:a0337e7b1fe39e32336974af8173a034',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg$',
|
||||
'uploader': 'BitChute',
|
||||
'upload_date': '20170103',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||
@ -198,6 +228,9 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
'duration': 16,
|
||||
'view_count': int,
|
||||
'uploader_id': 'I5NgtHZn9vPj',
|
||||
'channel_id': '1VBwRfyNcKdX',
|
||||
'timestamp': 1483425443,
|
||||
},
|
||||
},
|
||||
],
|
||||
@ -213,6 +246,7 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'title': 'Bruce MacDonald and "The Light of Darkness"',
|
||||
'description': 'md5:747724ef404eebdfc04277714f81863e',
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/',
|
||||
'only_matching': True,
|
||||
|
@ -7,6 +7,7 @@
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
parse_resolution,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
@ -110,24 +111,23 @@ def _parse_vue_attributes(self, name, string, video_id):
|
||||
|
||||
return attributes
|
||||
|
||||
@staticmethod
|
||||
def _process_source(source):
|
||||
def _process_source(self, source):
|
||||
url = url_or_none(source['src'])
|
||||
if not url:
|
||||
return None
|
||||
|
||||
source_type = source.get('type', '')
|
||||
extension = mimetype2ext(source_type)
|
||||
is_video = source_type.startswith('video')
|
||||
note = url.rpartition('.')[0].rpartition('_')[2] if is_video else None
|
||||
note = self._search_regex(r'[_-]([a-z]+)\.[\da-z]+(?:$|\?)', url, 'note', default=None)
|
||||
|
||||
return {
|
||||
'url': url,
|
||||
'ext': extension,
|
||||
'vcodec': None if is_video else 'none',
|
||||
'vcodec': None if source_type.startswith('video') else 'none',
|
||||
'quality': 10 if note == 'high' else 0,
|
||||
'format_note': note,
|
||||
'format_id': join_nonempty(extension, note),
|
||||
**parse_resolution(source.get('label')),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -1,59 +0,0 @@
|
||||
from .turner import TurnerBaseIE
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class CartoonNetworkIE(TurnerBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
|
||||
_TEST = {
|
||||
'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
|
||||
'info_dict': {
|
||||
'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Draw Upgrade',
|
||||
'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
|
||||
metadata_re = ''
|
||||
if content_re:
|
||||
metadata_re = r'|video_metadata\.content_' + content_re
|
||||
return self._search_regex(
|
||||
rf'(?:_cnglobal\.currentVideo\.{global_re}{metadata_re})\s*=\s*"({value_re})";',
|
||||
webpage, name, fatal=fatal)
|
||||
|
||||
media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
|
||||
title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
|
||||
|
||||
info = self._extract_ngtv_info(
|
||||
media_id, {'networkId': 'cartoonnetwork'}, {
|
||||
'url': url,
|
||||
'site_name': 'CartoonNetwork',
|
||||
'auth_required': find_field('authType', 'auth type') != 'unauth',
|
||||
})
|
||||
|
||||
series = find_field(
|
||||
'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
|
||||
info.update({
|
||||
'id': media_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': self._html_search_meta('description', webpage),
|
||||
'series': series,
|
||||
'episode': title,
|
||||
})
|
||||
|
||||
for field in ('season', 'episode'):
|
||||
field_name = field + 'Number'
|
||||
info[field + '_number'] = int_or_none(find_field(
|
||||
field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
|
||||
|
||||
return info
|
@ -13,16 +13,17 @@
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
multipart_encode,
|
||||
parse_duration,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CDAIE(InfoExtractor):
|
||||
@ -290,34 +291,47 @@ def extract_format(page, version):
|
||||
if not video or 'file' not in video:
|
||||
self.report_warning(f'Unable to extract {version} version information')
|
||||
return
|
||||
if video['file'].startswith('uggc'):
|
||||
video['file'] = codecs.decode(video['file'], 'rot_13')
|
||||
if video['file'].endswith('adc.mp4'):
|
||||
video['file'] = video['file'].replace('adc.mp4', '.mp4')
|
||||
elif not video['file'].startswith('http'):
|
||||
video['file'] = decrypt_file(video['file'])
|
||||
video_quality = video.get('quality')
|
||||
qualities = video.get('qualities', {})
|
||||
video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality)
|
||||
info_dict['formats'].append({
|
||||
'url': video['file'],
|
||||
'format_id': video_quality,
|
||||
'height': int_or_none(video_quality[:-1]),
|
||||
})
|
||||
if video.get('file'):
|
||||
if video['file'].startswith('uggc'):
|
||||
video['file'] = codecs.decode(video['file'], 'rot_13')
|
||||
if video['file'].endswith('adc.mp4'):
|
||||
video['file'] = video['file'].replace('adc.mp4', '.mp4')
|
||||
elif not video['file'].startswith('http'):
|
||||
video['file'] = decrypt_file(video['file'])
|
||||
info_dict['formats'].append({
|
||||
'url': video['file'],
|
||||
'format_id': video_quality,
|
||||
'height': int_or_none(video_quality[:-1]),
|
||||
})
|
||||
for quality, cda_quality in qualities.items():
|
||||
if quality == video_quality:
|
||||
continue
|
||||
data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2,
|
||||
'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]}
|
||||
data = json.dumps(data).encode()
|
||||
video_url = self._download_json(
|
||||
response = self._download_json(
|
||||
f'https://www.cda.pl/video/{video_id}', video_id, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, data=data, note=f'Fetching {quality} url',
|
||||
errnote=f'Failed to fetch {quality} url', fatal=False)
|
||||
if try_get(video_url, lambda x: x['result']['status']) == 'ok':
|
||||
video_url = try_get(video_url, lambda x: x['result']['resp'])
|
||||
if (
|
||||
traverse_obj(response, ('result', 'status')) != 'ok'
|
||||
or not traverse_obj(response, ('result', 'resp', {url_or_none}))
|
||||
):
|
||||
continue
|
||||
video_url = response['result']['resp']
|
||||
ext = determine_ext(video_url)
|
||||
if ext == 'mpd':
|
||||
info_dict['formats'].extend(self._extract_mpd_formats(
|
||||
video_url, video_id, mpd_id='dash', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
info_dict['formats'].extend(self._extract_m3u8_formats(
|
||||
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
info_dict['formats'].append({
|
||||
'url': video_url,
|
||||
'format_id': quality,
|
||||
|
@ -9,6 +9,7 @@
|
||||
ExtractorError,
|
||||
classproperty,
|
||||
float_or_none,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
@ -91,11 +92,15 @@ def _usp_signing_secret(self):
|
||||
# Rotates every so often, but hardcode a fallback in case of JS change/breakage before rotation
|
||||
return self._search_regex(
|
||||
r'\bUSP_SIGNING_SECRET\s*=\s*(["\'])(?P<secret>(?:(?!\1).)+)', player_js,
|
||||
'usp signing secret', group='secret', fatal=False) or 'odnInCGqhvtyRTtIiddxtuRtawYYICZP'
|
||||
'usp signing secret', group='secret', fatal=False) or 'hGDtqMKYVeFdofrAfFmBcrsakaZELajI'
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
|
||||
query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'}
|
||||
query = {
|
||||
'contentId': f'{user_id}-vod-{video_id}',
|
||||
'provider': 'universe',
|
||||
**traverse_obj(url, ({parse_qs}, 'uss_token', {'signedKey': -1})),
|
||||
}
|
||||
info = self._download_json(self._API_INFO_URL, video_id, query=query, fatal=False)
|
||||
access = self._download_json(
|
||||
'https://playback.dacast.com/content/access', video_id,
|
||||
|
@ -1,9 +1,15 @@
|
||||
from .zdf import ZDFBaseIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class DreiSatIE(ZDFBaseIE):
|
||||
IE_NAME = '3sat'
|
||||
_VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/?#]+/)*(?P<id>[^/?#&]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.3sat.de/dokumentation/reise/traumziele-suedostasiens-die-philippinen-und-vietnam-102.html',
|
||||
'info_dict': {
|
||||
@ -12,40 +18,59 @@ class DreiSatIE(ZDFBaseIE):
|
||||
'title': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam',
|
||||
'description': 'md5:26329ce5197775b596773b939354079d',
|
||||
'duration': 2625.0,
|
||||
'thumbnail': 'https://www.3sat.de/assets/traumziele-suedostasiens-die-philippinen-und-vietnam-100~2400x1350?cb=1699870351148',
|
||||
'thumbnail': 'https://www.3sat.de/assets/traumziele-suedostasiens-die-philippinen-und-vietnam-100~original?cb=1699870351148',
|
||||
'episode': 'Traumziele Südostasiens (1/2): Die Philippinen und Vietnam',
|
||||
'episode_id': 'POS_cc7ff51c-98cf-4d12-b99d-f7a551de1c95',
|
||||
'timestamp': 1738593000,
|
||||
'upload_date': '20250203',
|
||||
'timestamp': 1747920900,
|
||||
'upload_date': '20250522',
|
||||
},
|
||||
}, {
|
||||
# Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html
|
||||
'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html',
|
||||
'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
|
||||
'url': 'https://www.3sat.de/film/ab-18/ab-18---mein-fremdes-ich-100.html',
|
||||
'md5': 'f92638413a11d759bdae95c9d8ec165c',
|
||||
'info_dict': {
|
||||
'id': '141007_ab18_10wochensommer_film',
|
||||
'id': '221128_mein_fremdes_ich2_ab18',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ab 18! - 10 Wochen Sommer',
|
||||
'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
|
||||
'duration': 2660,
|
||||
'timestamp': 1608604200,
|
||||
'upload_date': '20201222',
|
||||
'title': 'Ab 18! - Mein fremdes Ich',
|
||||
'description': 'md5:cae0c0b27b7426d62ca0dda181738bf0',
|
||||
'duration': 2625.0,
|
||||
'thumbnail': 'https://www.3sat.de/assets/ab-18---mein-fremdes-ich-106~original?cb=1666081865812',
|
||||
'episode': 'Ab 18! - Mein fremdes Ich',
|
||||
'episode_id': 'POS_6225d1ca-a0d5-45e3-870b-e783ee6c8a3f',
|
||||
'timestamp': 1695081600,
|
||||
'upload_date': '20230919',
|
||||
},
|
||||
'skip': '410 Gone',
|
||||
}, {
|
||||
'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html',
|
||||
'url': 'https://www.3sat.de/gesellschaft/37-grad-leben/aus-dem-leben-gerissen-102.html',
|
||||
'md5': 'a903eaf8d1fd635bd3317cd2ad87ec84',
|
||||
'info_dict': {
|
||||
'id': '140913_sendung_schweizweit',
|
||||
'id': '250323_0903_sendung_sgl',
|
||||
'ext': 'mp4',
|
||||
'title': 'Waidmannsheil',
|
||||
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
||||
'timestamp': 1410623100,
|
||||
'upload_date': '20140913',
|
||||
'title': 'Plötzlich ohne dich',
|
||||
'description': 'md5:380cc10659289dd91510ad8fa717c66b',
|
||||
'duration': 1620.0,
|
||||
'thumbnail': 'https://www.3sat.de/assets/37-grad-leben-106~original?cb=1645537156810',
|
||||
'episode': 'Plötzlich ohne dich',
|
||||
'episode_id': 'POS_faa7a93c-c0f2-4d51-823f-ce2ac3ee191b',
|
||||
'timestamp': 1743162540,
|
||||
'upload_date': '20250328',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}, {
|
||||
# Video with chapters
|
||||
'url': 'https://www.3sat.de/kultur/buchmesse/dein-buch-das-beste-von-der-leipziger-buchmesse-2025-teil-1-100.html',
|
||||
'md5': '6b95790ce52e75f0d050adcdd2711ee6',
|
||||
'info_dict': {
|
||||
'id': '250330_dein_buch1_bum',
|
||||
'ext': 'mp4',
|
||||
'title': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1',
|
||||
'description': 'md5:bae51bfc22f15563ce3acbf97d2e8844',
|
||||
'duration': 5399.0,
|
||||
'thumbnail': 'https://www.3sat.de/assets/buchmesse-kerkeling-100~original?cb=1743329640903',
|
||||
'chapters': 'count:24',
|
||||
'episode': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1',
|
||||
'episode_id': 'POS_1ef236cc-b390-401e-acd0-4fb4b04315fb',
|
||||
'timestamp': 1743327000,
|
||||
'upload_date': '20250330',
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html
|
||||
'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html',
|
||||
@ -58,11 +83,42 @@ class DreiSatIE(ZDFBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player = self._search_json(
|
||||
r'data-zdfplayer-jsb=(["\'])', webpage, 'player JSON', video_id)
|
||||
player_url = player['content']
|
||||
api_token = f'Bearer {player["apiToken"]}'
|
||||
|
||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||
if webpage:
|
||||
player = self._extract_player(webpage, url, fatal=False)
|
||||
if player:
|
||||
return self._extract_regular(url, player, video_id)
|
||||
content = self._call_api(player_url, video_id, 'video metadata', api_token)
|
||||
|
||||
return self._extract_mobile(video_id)
|
||||
video_target = content['mainVideoContent']['http://zdf.de/rels/target']
|
||||
ptmd_path = traverse_obj(video_target, (
|
||||
(('streams', 'default'), None),
|
||||
('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template'),
|
||||
{str}, any, {require('ptmd path')}))
|
||||
ptmd_url = self._expand_ptmd_template(player_url, ptmd_path)
|
||||
aspect_ratio = self._parse_aspect_ratio(video_target.get('aspectRatio'))
|
||||
info = self._extract_ptmd(ptmd_url, video_id, api_token, aspect_ratio)
|
||||
|
||||
return merge_dicts(info, {
|
||||
**traverse_obj(content, {
|
||||
'title': (('title', 'teaserHeadline'), {str}, any),
|
||||
'episode': (('title', 'teaserHeadline'), {str}, any),
|
||||
'description': (('leadParagraph', 'teasertext'), {str}, any),
|
||||
'timestamp': ('editorialDate', {parse_iso8601}),
|
||||
}),
|
||||
**traverse_obj(video_target, {
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'chapters': ('streamAnchorTag', {self._extract_chapters}),
|
||||
}),
|
||||
'thumbnails': self._extract_thumbnails(traverse_obj(content, ('teaserImageRef', 'layouts', {dict}))),
|
||||
**traverse_obj(content, ('programmeItem', 0, 'http://zdf.de/rels/target', {
|
||||
'series_id': ('http://zdf.de/rels/cmdm/series', 'seriesUuid', {str}),
|
||||
'series': ('http://zdf.de/rels/cmdm/series', 'seriesTitle', {str}),
|
||||
'season': ('http://zdf.de/rels/cmdm/season', 'seasonTitle', {str}),
|
||||
'season_number': ('http://zdf.de/rels/cmdm/season', 'seasonNumber', {int_or_none}),
|
||||
'season_id': ('http://zdf.de/rels/cmdm/season', 'seasonUuid', {str}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'episode_id': ('contentId', {str}),
|
||||
})),
|
||||
})
|
||||
|
@ -16,7 +16,6 @@
|
||||
MEDIA_EXTENSIONS,
|
||||
ExtractorError,
|
||||
UnsupportedError,
|
||||
base_url,
|
||||
determine_ext,
|
||||
determine_protocol,
|
||||
dict_get,
|
||||
@ -38,6 +37,7 @@
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
@ -2538,12 +2538,13 @@ def _real_extract(self, url):
|
||||
return self.playlist_result(
|
||||
self._parse_xspf(
|
||||
doc, video_id, xspf_url=url,
|
||||
xspf_base_url=full_response.url),
|
||||
xspf_base_url=new_url),
|
||||
video_id)
|
||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||
info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
|
||||
doc,
|
||||
mpd_base_url=base_url(full_response.url),
|
||||
# Do not use yt_dlp.utils.base_url here since it will raise on file:// URLs
|
||||
mpd_base_url=update_url(new_url, query=None, fragment=None).rpartition('/')[0],
|
||||
mpd_url=url)
|
||||
info_dict['live_status'] = 'is_live' if doc.get('type') == 'dynamic' else None
|
||||
self._extra_manifest_info(info_dict, url)
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
|
||||
class GetCourseRuPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player02\.getcourse\.ru/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
|
||||
_VALID_URL = r'https?://(?:player02\.getcourse\.ru|cf-api-2\.vhcdn\.com)/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']
|
||||
_TESTS = [{
|
||||
'url': 'http://player02.getcourse.ru/sign-player/?json=eyJ2aWRlb19oYXNoIjoiMTkwYmRmOTNmMWIyOTczNTMwOTg1M2E3YTE5ZTI0YjMiLCJ1c2VyX2lkIjozNTk1MjUxODMsInN1Yl9sb2dpbl91c2VyX2lkIjpudWxsLCJsZXNzb25faWQiOm51bGwsImlwIjoiNDYuMTQyLjE4Mi4yNDciLCJnY19ob3N0IjoiYWNhZGVteW1lbC5vbmxpbmUiLCJ0aW1lIjoxNzA1NDQ5NjQyLCJwYXlsb2FkIjoidV8zNTk1MjUxODMiLCJ1aV9sYW5ndWFnZSI6InJ1IiwiaXNfaGF2ZV9jdXN0b21fc3R5bGUiOnRydWV9&s=354ad2c993d95d5ac629e3133d6cefea&vh-static-feature=zigzag',
|
||||
@ -20,6 +20,16 @@ class GetCourseRuPlayerIE(InfoExtractor):
|
||||
'duration': 1693,
|
||||
},
|
||||
'skip': 'JWT expired',
|
||||
}, {
|
||||
'url': 'https://cf-api-2.vhcdn.com/sign-player/?json=example',
|
||||
'info_dict': {
|
||||
'id': '435735291',
|
||||
'title': '8afd7c489952108e00f019590f3711f3',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://preview-htz.vhcdn.com/preview/8afd7c489952108e00f019590f3711f3/preview.jpg?version=1682170973&host=vh-72',
|
||||
'duration': 777,
|
||||
},
|
||||
'skip': 'JWT expired',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -168,7 +178,7 @@ def _real_extract(self, url):
|
||||
|
||||
playlist_id = self._search_regex(
|
||||
r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id)
|
||||
title = self._og_search_title(webpage) or self._html_extract_title(webpage)
|
||||
title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
|
||||
|
||||
return self.playlist_from_matches(
|
||||
re.findall(GetCourseRuPlayerIE._EMBED_REGEX[0], webpage),
|
||||
|
@ -1,23 +1,33 @@
|
||||
import functools
|
||||
import itertools
|
||||
import math
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
InAdvancePagedList,
|
||||
ISO639Utils,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
make_archive_id,
|
||||
orderedSet,
|
||||
smuggle_url,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
unsmuggle_url,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class JioSaavnBaseIE(InfoExtractor):
|
||||
_URL_BASE_RE = r'https?://(?:www\.)?(?:jio)?saavn\.com'
|
||||
_API_URL = 'https://www.jiosaavn.com/api.php'
|
||||
_VALID_BITRATES = {'16', '32', '64', '128', '320'}
|
||||
|
||||
@ -30,16 +40,20 @@ def requested_bitrates(self):
|
||||
f'Valid bitrates are: {", ".join(sorted(self._VALID_BITRATES, key=int))}')
|
||||
return requested_bitrates
|
||||
|
||||
def _extract_formats(self, song_data):
|
||||
def _extract_formats(self, item_data):
|
||||
# Show/episode JSON data has a slightly different structure than song JSON data
|
||||
if media_url := traverse_obj(item_data, ('more_info', 'encrypted_media_url', {str})):
|
||||
item_data.setdefault('encrypted_media_url', media_url)
|
||||
|
||||
for bitrate in self.requested_bitrates:
|
||||
media_data = self._download_json(
|
||||
self._API_URL, song_data['id'],
|
||||
self._API_URL, item_data['id'],
|
||||
f'Downloading format info for {bitrate}',
|
||||
fatal=False, data=urlencode_postdata({
|
||||
'__call': 'song.generateAuthToken',
|
||||
'_format': 'json',
|
||||
'bitrate': bitrate,
|
||||
'url': song_data['encrypted_media_url'],
|
||||
'url': item_data['encrypted_media_url'],
|
||||
}))
|
||||
if not traverse_obj(media_data, ('auth_url', {url_or_none})):
|
||||
self.report_warning(f'Unable to extract format info for {bitrate}')
|
||||
@ -53,24 +67,6 @@ def _extract_formats(self, song_data):
|
||||
'vcodec': 'none',
|
||||
}
|
||||
|
||||
def _extract_song(self, song_data, url=None):
|
||||
info = traverse_obj(song_data, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('song', {clean_html}),
|
||||
'album': ('album', {clean_html}),
|
||||
'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'view_count': ('play_count', {int_or_none}),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
'artists': ('primary_artists', {lambda x: x.split(', ') if x else None}),
|
||||
'webpage_url': ('perma_url', {url_or_none}),
|
||||
})
|
||||
if webpage_url := info.get('webpage_url') or url:
|
||||
info['display_id'] = url_basename(webpage_url)
|
||||
info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]
|
||||
|
||||
return info
|
||||
|
||||
def _call_api(self, type_, token, note='API', params={}):
|
||||
return self._download_json(
|
||||
self._API_URL, token, f'Downloading {note} JSON', f'Unable to download {note} JSON',
|
||||
@ -84,19 +80,89 @@ def _call_api(self, type_, token, note='API', params={}):
|
||||
**params,
|
||||
})
|
||||
|
||||
def _yield_songs(self, playlist_data):
|
||||
for song_data in traverse_obj(playlist_data, ('songs', lambda _, v: v['id'] and v['perma_url'])):
|
||||
song_info = self._extract_song(song_data)
|
||||
url = smuggle_url(song_info['webpage_url'], {
|
||||
'id': song_data['id'],
|
||||
'encrypted_media_url': song_data['encrypted_media_url'],
|
||||
})
|
||||
yield self.url_result(url, JioSaavnSongIE, url_transparent=True, **song_info)
|
||||
@staticmethod
|
||||
def _extract_song(song_data, url=None):
|
||||
info = traverse_obj(song_data, {
|
||||
'id': ('id', {str}),
|
||||
'title': (('song', 'title'), {clean_html}, any),
|
||||
'album': ((None, 'more_info'), 'album', {clean_html}, any),
|
||||
'duration': ((None, 'more_info'), 'duration', {int_or_none}, any),
|
||||
'channel': ((None, 'more_info'), 'label', {str}, any),
|
||||
'channel_id': ((None, 'more_info'), 'label_id', {str}, any),
|
||||
'channel_url': ((None, 'more_info'), 'label_url', {urljoin('https://www.jiosaavn.com/')}, any),
|
||||
'release_date': ((None, 'more_info'), 'release_date', {unified_strdate}, any),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
|
||||
'view_count': ('play_count', {int_or_none}),
|
||||
'language': ('language', {lambda x: ISO639Utils.short2long(x.casefold()) or 'und'}),
|
||||
'webpage_url': ('perma_url', {url_or_none}),
|
||||
'artists': ('more_info', 'artistMap', 'primary_artists', ..., 'name', {str}, filter, all),
|
||||
})
|
||||
if webpage_url := info.get('webpage_url') or url:
|
||||
info['display_id'] = url_basename(webpage_url)
|
||||
info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]
|
||||
|
||||
if primary_artists := traverse_obj(song_data, ('primary_artists', {lambda x: x.split(', ') if x else None})):
|
||||
info['artists'].extend(primary_artists)
|
||||
if featured_artists := traverse_obj(song_data, ('featured_artists', {str}, filter)):
|
||||
info['artists'].extend(featured_artists.split(', '))
|
||||
info['artists'] = orderedSet(info['artists']) or None
|
||||
|
||||
return info
|
||||
|
||||
@staticmethod
|
||||
def _extract_episode(episode_data, url=None):
|
||||
info = JioSaavnBaseIE._extract_song(episode_data, url)
|
||||
info.pop('_old_archive_ids', None)
|
||||
info.update(traverse_obj(episode_data, {
|
||||
'description': ('more_info', 'description', {str}),
|
||||
'timestamp': ('more_info', 'release_time', {unified_timestamp}),
|
||||
'series': ('more_info', 'show_title', {str}),
|
||||
'series_id': ('more_info', 'show_id', {str}),
|
||||
'season': ('more_info', 'season_title', {str}),
|
||||
'season_number': ('more_info', 'season_no', {int_or_none}),
|
||||
'season_id': ('more_info', 'season_id', {str}),
|
||||
'episode_number': ('more_info', 'episode_number', {int_or_none}),
|
||||
'cast': ('starring', {lambda x: x.split(', ') if x else None}),
|
||||
}))
|
||||
return info
|
||||
|
||||
def _extract_jiosaavn_result(self, url, endpoint, response_key, parse_func):
|
||||
url, smuggled_data = unsmuggle_url(url)
|
||||
data = traverse_obj(smuggled_data, ({
|
||||
'id': ('id', {str}),
|
||||
'encrypted_media_url': ('encrypted_media_url', {str}),
|
||||
}))
|
||||
|
||||
if 'id' in data and 'encrypted_media_url' in data:
|
||||
result = {'id': data['id']}
|
||||
else:
|
||||
# only extract metadata if this is not a url_transparent result
|
||||
data = self._call_api(endpoint, self._match_id(url))[response_key][0]
|
||||
result = parse_func(data, url)
|
||||
|
||||
result['formats'] = list(self._extract_formats(data))
|
||||
return result
|
||||
|
||||
def _yield_items(self, playlist_data, keys=None, parse_func=None):
|
||||
"""Subclasses using this method must set _ENTRY_IE"""
|
||||
if parse_func is None:
|
||||
parse_func = self._extract_song
|
||||
|
||||
for item_data in traverse_obj(playlist_data, (
|
||||
*variadic(keys, (str, bytes, dict, set)), lambda _, v: v['id'] and v['perma_url'],
|
||||
)):
|
||||
info = parse_func(item_data)
|
||||
url = smuggle_url(info['webpage_url'], traverse_obj(item_data, {
|
||||
'id': ('id', {str}),
|
||||
'encrypted_media_url': ((None, 'more_info'), 'encrypted_media_url', {str}, any),
|
||||
}))
|
||||
yield self.url_result(url, self._ENTRY_IE, url_transparent=True, **info)
|
||||
|
||||
|
||||
class JioSaavnSongIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:song'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'(?:/song/[^/?#]+/|/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
|
||||
'md5': '3b84396d15ed9e083c3106f1fa589c04',
|
||||
@ -106,12 +172,38 @@ class JioSaavnSongIE(JioSaavnBaseIE):
|
||||
'ext': 'm4a',
|
||||
'title': 'Leja Re',
|
||||
'album': 'Leja Re',
|
||||
'thumbnail': r're:https?://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'duration': 205,
|
||||
'view_count': int,
|
||||
'release_year': 2018,
|
||||
'artists': ['Sandesh Shandilya', 'Dhvani Bhanushali', 'Tanishk Bagchi'],
|
||||
'_old_archive_ids': ['jiosaavnsong OQsEfQFVUXk'],
|
||||
'channel': 'T-Series',
|
||||
'language': 'hin',
|
||||
'channel_id': '34297',
|
||||
'channel_url': 'https://www.jiosaavn.com/label/t-series-albums/6DLuXO3VoTo_',
|
||||
'release_date': '20181124',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.jiosaavn.com/song/chuttamalle/P1FfWjZkQ0Q',
|
||||
'md5': '96296c58d6ce488a417ef0728fd2d680',
|
||||
'info_dict': {
|
||||
'id': 'O94kBTtw',
|
||||
'display_id': 'P1FfWjZkQ0Q',
|
||||
'ext': 'm4a',
|
||||
'title': 'Chuttamalle',
|
||||
'album': 'Devara Part 1 - Telugu',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'duration': 222,
|
||||
'view_count': int,
|
||||
'release_year': 2024,
|
||||
'artists': 'count:3',
|
||||
'_old_archive_ids': ['jiosaavnsong P1FfWjZkQ0Q'],
|
||||
'channel': 'T-Series',
|
||||
'language': 'tel',
|
||||
'channel_id': '34297',
|
||||
'channel_url': 'https://www.jiosaavn.com/label/t-series-albums/6DLuXO3VoTo_',
|
||||
'release_date': '20240926',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
|
||||
@ -119,26 +211,51 @@ class JioSaavnSongIE(JioSaavnBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url)
|
||||
song_data = traverse_obj(smuggled_data, ({
|
||||
'id': ('id', {str}),
|
||||
'encrypted_media_url': ('encrypted_media_url', {str}),
|
||||
}))
|
||||
return self._extract_jiosaavn_result(url, 'song', 'songs', self._extract_song)
|
||||
|
||||
if 'id' in song_data and 'encrypted_media_url' in song_data:
|
||||
result = {'id': song_data['id']}
|
||||
else:
|
||||
# only extract metadata if this is not a url_transparent result
|
||||
song_data = self._call_api('song', self._match_id(url))['songs'][0]
|
||||
result = self._extract_song(song_data, url)
|
||||
|
||||
result['formats'] = list(self._extract_formats(song_data))
|
||||
return result
|
||||
class JioSaavnShowIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:show'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/shows/[^/?#]+/(?P<id>[^/?#]{11,})/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/shows/non-food-ways-to-boost-your-energy/XFMcKICOCgc_',
|
||||
'md5': '0733cd254cfe74ef88bea1eaedcf1f4f',
|
||||
'info_dict': {
|
||||
'id': 'qqzh3RKZ',
|
||||
'display_id': 'XFMcKICOCgc_',
|
||||
'ext': 'mp3',
|
||||
'title': 'Non-Food Ways To Boost Your Energy',
|
||||
'description': 'md5:26e7129644b5c6aada32b8851c3997c8',
|
||||
'episode': 'Episode 1',
|
||||
'timestamp': 1640563200,
|
||||
'series': 'Holistic Lifestyle With Neha Ranglani',
|
||||
'series_id': '52397',
|
||||
'season': 'Holistic Lifestyle With Neha Ranglani',
|
||||
'season_number': 1,
|
||||
'season_id': '61273',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'duration': 311,
|
||||
'view_count': int,
|
||||
'release_year': 2021,
|
||||
'language': 'eng',
|
||||
'channel': 'Saavn OG',
|
||||
'channel_id': '1953876',
|
||||
'episode_number': 1,
|
||||
'upload_date': '20211227',
|
||||
'release_date': '20211227',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.jiosaavn.com/shows/himesh-reshammiya/Kr8fmfSN4vo_',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self._extract_jiosaavn_result(url, 'episode', 'episodes', self._extract_episode)
|
||||
|
||||
|
||||
class JioSaavnAlbumIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:album'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/album/[^/?#]+/(?P<id>[^/?#]+)'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/album/[^/?#]+/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/album/96/buIOjYZDrNA_',
|
||||
'info_dict': {
|
||||
@ -147,18 +264,19 @@ class JioSaavnAlbumIE(JioSaavnBaseIE):
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}]
|
||||
_ENTRY_IE = JioSaavnSongIE
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
album_data = self._call_api('album', display_id)
|
||||
|
||||
return self.playlist_result(
|
||||
self._yield_songs(album_data), display_id, traverse_obj(album_data, ('title', {str})))
|
||||
self._yield_items(album_data, 'songs'), display_id, traverse_obj(album_data, ('title', {str})))
|
||||
|
||||
|
||||
class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/(?:s/playlist/(?:[^/?#]+/){2}|featured/[^/?#]+/)(?P<id>[^/?#]+)'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/(?:s/playlist/(?:[^/?#]+/){2}|featured/[^/?#]+/)(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-english/LlJ8ZWT1ibN5084vKHRj2Q__',
|
||||
'info_dict': {
|
||||
@ -172,15 +290,16 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
||||
'id': 'DVR,pFUOwyXqIp77B1JF,A__',
|
||||
'title': 'Mood Hindi',
|
||||
},
|
||||
'playlist_mincount': 801,
|
||||
'playlist_mincount': 750,
|
||||
}, {
|
||||
'url': 'https://www.jiosaavn.com/featured/taaza-tunes/Me5RridRfDk_',
|
||||
'info_dict': {
|
||||
'id': 'Me5RridRfDk_',
|
||||
'title': 'Taaza Tunes',
|
||||
},
|
||||
'playlist_mincount': 301,
|
||||
'playlist_mincount': 50,
|
||||
}]
|
||||
_ENTRY_IE = JioSaavnSongIE
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _fetch_page(self, token, page):
|
||||
@ -189,7 +308,7 @@ def _fetch_page(self, token, page):
|
||||
|
||||
def _entries(self, token, first_page_data, page):
|
||||
page_data = first_page_data if not page else self._fetch_page(token, page + 1)
|
||||
yield from self._yield_songs(page_data)
|
||||
yield from self._yield_items(page_data, 'songs')
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@ -199,3 +318,95 @@ def _real_extract(self, url):
|
||||
return self.playlist_result(InAdvancePagedList(
|
||||
functools.partial(self._entries, display_id, playlist_data),
|
||||
total_pages, self._PAGE_SIZE), display_id, traverse_obj(playlist_data, ('listname', {str})))
|
||||
|
||||
|
||||
class JioSaavnShowPlaylistIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:show:playlist'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/shows/(?P<show>[^#/?]+)/(?P<season>\d+)/[^/?#]+'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/shows/talking-music/1/PjReFP-Sguk_',
|
||||
'info_dict': {
|
||||
'id': 'talking-music-1',
|
||||
'title': 'Talking Music',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}]
|
||||
_ENTRY_IE = JioSaavnShowIE
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
def _fetch_page(self, show_id, season_id, page):
|
||||
return self._call_api('show', show_id, f'show page {page}', {
|
||||
'p': page,
|
||||
'__call': 'show.getAllEpisodes',
|
||||
'show_id': show_id,
|
||||
'season_number': season_id,
|
||||
'api_version': '4',
|
||||
'sort_order': 'desc',
|
||||
})
|
||||
|
||||
def _entries(self, show_id, season_id, page):
|
||||
page_data = self._fetch_page(show_id, season_id, page + 1)
|
||||
yield from self._yield_items(page_data, keys=None, parse_func=self._extract_episode)
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_slug, season_id = self._match_valid_url(url).group('show', 'season')
|
||||
playlist_id = f'{show_slug}-{season_id}'
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
show_info = self._search_json(
|
||||
r'window\.__INITIAL_DATA__\s*=', webpage, 'initial data',
|
||||
playlist_id, transform_source=js_to_json)['showView']
|
||||
show_id = show_info['current_id']
|
||||
|
||||
entries = OnDemandPagedList(functools.partial(self._entries, show_id, season_id), self._PAGE_SIZE)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, traverse_obj(show_info, ('show', 'title', 'text', {str})))
|
||||
|
||||
|
||||
class JioSaavnArtistIE(JioSaavnBaseIE):
|
||||
IE_NAME = 'jiosaavn:artist'
|
||||
_VALID_URL = JioSaavnBaseIE._URL_BASE_RE + r'/artist/[^/?#]+/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiosaavn.com/artist/krsna-songs/rYLBEve2z3U_',
|
||||
'info_dict': {
|
||||
'id': 'rYLBEve2z3U_',
|
||||
'title': 'KR$NA',
|
||||
},
|
||||
'playlist_mincount': 38,
|
||||
}, {
|
||||
'url': 'https://www.jiosaavn.com/artist/sanam-puri-songs/SkNEv3qRhDE_',
|
||||
'info_dict': {
|
||||
'id': 'SkNEv3qRhDE_',
|
||||
'title': 'Sanam Puri',
|
||||
},
|
||||
'playlist_mincount': 51,
|
||||
}]
|
||||
_ENTRY_IE = JioSaavnSongIE
|
||||
_PAGE_SIZE = 50
|
||||
|
||||
def _fetch_page(self, artist_id, page):
|
||||
return self._call_api('artist', artist_id, f'artist page {page + 1}', {
|
||||
'p': page,
|
||||
'n_song': self._PAGE_SIZE,
|
||||
'n_album': self._PAGE_SIZE,
|
||||
'sub_type': '',
|
||||
'includeMetaTags': '',
|
||||
'api_version': '4',
|
||||
'category': 'alphabetical',
|
||||
'sort_order': 'asc',
|
||||
})
|
||||
|
||||
def _entries(self, artist_id, first_page):
|
||||
for page in itertools.count():
|
||||
playlist_data = first_page if not page else self._fetch_page(artist_id, page)
|
||||
if not traverse_obj(playlist_data, ('topSongs', ..., {dict})):
|
||||
break
|
||||
yield from self._yield_items(playlist_data, 'topSongs')
|
||||
|
||||
def _real_extract(self, url):
|
||||
artist_id = self._match_id(url)
|
||||
first_page = self._fetch_page(artist_id, 0)
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(artist_id, first_page), artist_id,
|
||||
traverse_obj(first_page, ('name', {str})))
|
||||
|
@ -1,4 +1,5 @@
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -9,12 +10,12 @@
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
srt_subtitles_timecode,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import find_elements, require, traverse_obj
|
||||
|
||||
|
||||
class LinkedInBaseIE(InfoExtractor):
|
||||
@ -277,3 +278,110 @@ def _real_extract(self, url):
|
||||
entries, course_slug,
|
||||
course_data.get('title'),
|
||||
course_data.get('description'))
|
||||
|
||||
|
||||
class LinkedInEventsIE(LinkedInBaseIE):
|
||||
IE_NAME = 'linkedin:events'
|
||||
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/events/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.linkedin.com/events/7084656651378536448/comments/',
|
||||
'info_dict': {
|
||||
'id': '7084656651378536448',
|
||||
'ext': 'mp4',
|
||||
'title': '#37 Aprende a hacer una entrevista en inglés para tu próximo trabajo remoto',
|
||||
'description': '¡Agarra para anotar que se viene tremendo evento!',
|
||||
'duration': 1765,
|
||||
'timestamp': 1689113772,
|
||||
'upload_date': '20230711',
|
||||
'release_timestamp': 1689174012,
|
||||
'release_date': '20230712',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.linkedin.com/events/27-02energyfreedombyenergyclub7295762520814874625/comments/',
|
||||
'info_dict': {
|
||||
'id': '27-02energyfreedombyenergyclub7295762520814874625',
|
||||
'ext': 'mp4',
|
||||
'title': '27.02 Energy Freedom by Energy Club',
|
||||
'description': 'md5:1292e6f31df998914c293787a02c3b91',
|
||||
'duration': 6420,
|
||||
'timestamp': 1739445333,
|
||||
'upload_date': '20250213',
|
||||
'release_timestamp': 1740657620,
|
||||
'release_date': '20250227',
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._get_cookies('https://www.linkedin.com/').get('li_at'):
|
||||
self.raise_login_required()
|
||||
|
||||
def _real_extract(self, url):
|
||||
event_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, event_id)
|
||||
|
||||
base_data = traverse_obj(webpage, (
|
||||
{find_elements(tag='code', attr='style', value='display: none')}, ..., {json.loads}, 'included', ...))
|
||||
meta_data = traverse_obj(base_data, (
|
||||
lambda _, v: v['$type'] == 'com.linkedin.voyager.dash.events.ProfessionalEvent', any)) or {}
|
||||
|
||||
live_status = {
|
||||
'PAST': 'was_live',
|
||||
'ONGOING': 'is_live',
|
||||
'FUTURE': 'is_upcoming',
|
||||
}.get(meta_data.get('lifecycleState'))
|
||||
|
||||
if live_status == 'is_upcoming':
|
||||
player_data = {}
|
||||
if event_time := traverse_obj(meta_data, ('displayEventTime', {str})):
|
||||
message = f'This live event is scheduled for {event_time}'
|
||||
else:
|
||||
message = 'This live event has not yet started'
|
||||
self.raise_no_formats(message, expected=True, video_id=event_id)
|
||||
else:
|
||||
# TODO: Add support for audio-only live events
|
||||
player_data = traverse_obj(base_data, (
|
||||
lambda _, v: v['$type'] == 'com.linkedin.videocontent.VideoPlayMetadata',
|
||||
any, {require('video player data')}))
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for prog_fmts in traverse_obj(player_data, ('progressiveStreams', ..., {dict})):
|
||||
for fmt_url in traverse_obj(prog_fmts, ('streamingLocations', ..., 'url', {url_or_none})):
|
||||
formats.append({
|
||||
'url': fmt_url,
|
||||
**traverse_obj(prog_fmts, {
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'tbr': ('bitRate', {int_or_none(scale=1000)}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
'ext': ('mediaType', {mimetype2ext}),
|
||||
}),
|
||||
})
|
||||
|
||||
for m3u8_url in traverse_obj(player_data, (
|
||||
'adaptiveStreams', lambda _, v: v['protocol'] == 'HLS', 'masterPlaylists', ..., 'url', {url_or_none},
|
||||
)):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, event_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': event_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'live_status': live_status,
|
||||
**traverse_obj(meta_data, {
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', 'text', {str}),
|
||||
'timestamp': ('createdAt', {int_or_none(scale=1000)}),
|
||||
# timeRange.start is available when the stream is_upcoming
|
||||
'release_timestamp': ('timeRange', 'start', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
**traverse_obj(player_data, {
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
# liveStreamCreatedAt is only available when the stream is_live or was_live
|
||||
'release_timestamp': ('liveStreamCreatedAt', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
}
|
||||
|
@ -365,13 +365,15 @@ def _real_initialize(self):
|
||||
'All videos are only available to registered users', method='password')
|
||||
|
||||
def _set_device_id(self, username):
|
||||
if not self._device_id:
|
||||
self._device_id = self.cache.load(
|
||||
self._NETRC_MACHINE, 'device_ids', default={}).get(username)
|
||||
if self._device_id:
|
||||
return
|
||||
device_id_cache = self.cache.load(self._NETRC_MACHINE, 'device_ids', default={})
|
||||
self._device_id = device_id_cache.get(username)
|
||||
if self._device_id:
|
||||
return
|
||||
self._device_id = str(uuid.uuid4())
|
||||
self.cache.store(self._NETRC_MACHINE, 'device_ids', {username: self._device_id})
|
||||
device_id_cache[username] = self._device_id
|
||||
self.cache.store(self._NETRC_MACHINE, 'device_ids', device_id_cache)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
try:
|
||||
|
@ -16,7 +16,7 @@
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_bitrate,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
@ -24,8 +24,6 @@
|
||||
qualities,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
@ -34,13 +32,70 @@
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class NiconicoIE(InfoExtractor):
|
||||
class NiconicoBaseIE(InfoExtractor):
|
||||
_GEO_BYPASS = False
|
||||
_GEO_COUNTRIES = ['JP']
|
||||
_LOGIN_BASE = 'https://account.nicovideo.jp'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
|
||||
@property
|
||||
def is_logged_in(self):
|
||||
return bool(self._get_cookies('https://www.nicovideo.jp').get('user_session'))
|
||||
|
||||
def _raise_login_error(self, message, expected=True):
|
||||
raise ExtractorError(f'Unable to login: {message}', expected=expected)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self.is_logged_in:
|
||||
return
|
||||
|
||||
self._request_webpage(
|
||||
f'{self._LOGIN_BASE}/login', None, 'Requesting session cookies')
|
||||
webpage = self._download_webpage(
|
||||
f'{self._LOGIN_BASE}/login/redirector', None,
|
||||
'Logging in', 'Unable to log in', headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': f'{self._LOGIN_BASE}/login',
|
||||
}, data=urlencode_postdata({
|
||||
'mail_tel': username,
|
||||
'password': password,
|
||||
}))
|
||||
|
||||
if self.is_logged_in:
|
||||
return
|
||||
elif err_msg := traverse_obj(webpage, (
|
||||
{find_element(cls='notice error')}, {find_element(cls='notice__text')}, {clean_html},
|
||||
)):
|
||||
self._raise_login_error(err_msg or 'Invalid username or password')
|
||||
elif 'oneTimePw' in webpage:
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage, 'post url', group='url')
|
||||
mfa, urlh = self._download_webpage_handle(
|
||||
urljoin(self._LOGIN_BASE, post_url), None,
|
||||
'Performing MFA', 'Unable to complete MFA', headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}, data=urlencode_postdata({
|
||||
'otp': self._get_tfa_info('6 digit number shown on app'),
|
||||
}))
|
||||
if self.is_logged_in:
|
||||
return
|
||||
elif 'error-code' in parse_qs(urlh.url):
|
||||
err_msg = traverse_obj(mfa, ({find_element(cls='pageMainMsg')}, {clean_html}))
|
||||
self._raise_login_error(err_msg or 'MFA session expired')
|
||||
elif 'formError' in mfa:
|
||||
err_msg = traverse_obj(mfa, (
|
||||
{find_element(cls='formError')}, {find_element(tag='div')}, {clean_html}))
|
||||
self._raise_login_error(err_msg or 'MFA challenge failed')
|
||||
|
||||
self._raise_login_error('Unexpected login error', expected=False)
|
||||
|
||||
|
||||
class NiconicoIE(NiconicoBaseIE):
|
||||
IE_NAME = 'niconico'
|
||||
IE_DESC = 'ニコニコ動画'
|
||||
_GEO_COUNTRIES = ['JP']
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||
@ -180,229 +235,6 @@ class NiconicoIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
_VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||
_NETRC_MACHINE = 'niconico'
|
||||
_API_HEADERS = {
|
||||
'X-Frontend-ID': '6',
|
||||
'X-Frontend-Version': '0',
|
||||
'X-Niconico-Language': 'en-us',
|
||||
'Referer': 'https://www.nicovideo.jp/',
|
||||
'Origin': 'https://www.nicovideo.jp',
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_ok = True
|
||||
login_form_strs = {
|
||||
'mail_tel': username,
|
||||
'password': password,
|
||||
}
|
||||
self._request_webpage(
|
||||
'https://account.nicovideo.jp/login', None,
|
||||
note='Acquiring Login session')
|
||||
page = self._download_webpage(
|
||||
'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None,
|
||||
note='Logging in', errnote='Unable to log in',
|
||||
data=urlencode_postdata(login_form_strs),
|
||||
headers={
|
||||
'Referer': 'https://account.nicovideo.jp/login',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
if 'oneTimePw' in page:
|
||||
post_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page, 'post url', group='url')
|
||||
page = self._download_webpage(
|
||||
urljoin('https://account.nicovideo.jp', post_url), None,
|
||||
note='Performing MFA', errnote='Unable to complete MFA',
|
||||
data=urlencode_postdata({
|
||||
'otp': self._get_tfa_info('6 digits code'),
|
||||
}), headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
if 'oneTimePw' in page or 'formError' in page:
|
||||
err_msg = self._html_search_regex(
|
||||
r'formError["\']+>(.*?)</div>', page, 'form_error',
|
||||
default='There\'s an error but the message can\'t be parsed.',
|
||||
flags=re.DOTALL)
|
||||
self.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"')
|
||||
return False
|
||||
login_ok = 'class="notice error"' not in page
|
||||
if not login_ok:
|
||||
self.report_warning('Unable to log in: bad username or password')
|
||||
return login_ok
|
||||
|
||||
def _get_heartbeat_info(self, info_dict):
|
||||
video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
|
||||
dmc_protocol = info_dict['expected_protocol']
|
||||
|
||||
api_data = (
|
||||
info_dict.get('_api_data')
|
||||
or self._parse_json(
|
||||
self._html_search_regex(
|
||||
'data-api-data="([^"]+)"',
|
||||
self._download_webpage('https://www.nicovideo.jp/watch/' + video_id, video_id),
|
||||
'API data', default='{}'),
|
||||
video_id))
|
||||
|
||||
session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
|
||||
session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
|
||||
|
||||
def ping():
|
||||
tracking_id = traverse_obj(api_data, ('media', 'delivery', 'trackingId'))
|
||||
if tracking_id:
|
||||
tracking_url = update_url_query('https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', {'t': tracking_id})
|
||||
watch_request_response = self._download_json(
|
||||
tracking_url, video_id,
|
||||
note='Acquiring permission for downloading video', fatal=False,
|
||||
headers=self._API_HEADERS)
|
||||
if traverse_obj(watch_request_response, ('meta', 'status')) != 200:
|
||||
self.report_warning('Failed to acquire permission for playing video. Video download may fail.')
|
||||
|
||||
yesno = lambda x: 'yes' if x else 'no'
|
||||
|
||||
if dmc_protocol == 'http':
|
||||
protocol = 'http'
|
||||
protocol_parameters = {
|
||||
'http_output_download_parameters': {
|
||||
'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
|
||||
'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
|
||||
},
|
||||
}
|
||||
elif dmc_protocol == 'hls':
|
||||
protocol = 'm3u8'
|
||||
segment_duration = try_get(self._configuration_arg('segment_duration'), lambda x: int(x[0])) or 6000
|
||||
parsed_token = self._parse_json(session_api_data['token'], video_id)
|
||||
encryption = traverse_obj(api_data, ('media', 'delivery', 'encryption'))
|
||||
protocol_parameters = {
|
||||
'hls_parameters': {
|
||||
'segment_duration': segment_duration,
|
||||
'transfer_preset': '',
|
||||
'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
|
||||
'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
|
||||
},
|
||||
}
|
||||
if 'hls_encryption' in parsed_token and encryption:
|
||||
protocol_parameters['hls_parameters']['encryption'] = {
|
||||
parsed_token['hls_encryption']: {
|
||||
'encrypted_key': encryption['encryptedKey'],
|
||||
'key_uri': encryption['keyUri'],
|
||||
},
|
||||
}
|
||||
else:
|
||||
protocol = 'm3u8_native'
|
||||
else:
|
||||
raise ExtractorError(f'Unsupported DMC protocol: {dmc_protocol}')
|
||||
|
||||
session_response = self._download_json(
|
||||
session_api_endpoint['url'], video_id,
|
||||
query={'_format': 'json'},
|
||||
headers={'Content-Type': 'application/json'},
|
||||
note='Downloading JSON metadata for {}'.format(info_dict['format_id']),
|
||||
data=json.dumps({
|
||||
'session': {
|
||||
'client_info': {
|
||||
'player_id': session_api_data.get('playerId'),
|
||||
},
|
||||
'content_auth': {
|
||||
'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
|
||||
'content_key_timeout': session_api_data.get('contentKeyTimeout'),
|
||||
'service_id': 'nicovideo',
|
||||
'service_user_id': session_api_data.get('serviceUserId'),
|
||||
},
|
||||
'content_id': session_api_data.get('contentId'),
|
||||
'content_src_id_sets': [{
|
||||
'content_src_ids': [{
|
||||
'src_id_to_mux': {
|
||||
'audio_src_ids': [audio_src_id],
|
||||
'video_src_ids': [video_src_id],
|
||||
},
|
||||
}],
|
||||
}],
|
||||
'content_type': 'movie',
|
||||
'content_uri': '',
|
||||
'keep_method': {
|
||||
'heartbeat': {
|
||||
'lifetime': session_api_data.get('heartbeatLifetime'),
|
||||
},
|
||||
},
|
||||
'priority': session_api_data['priority'],
|
||||
'protocol': {
|
||||
'name': 'http',
|
||||
'parameters': {
|
||||
'http_parameters': {
|
||||
'parameters': protocol_parameters,
|
||||
},
|
||||
},
|
||||
},
|
||||
'recipe_id': session_api_data.get('recipeId'),
|
||||
'session_operation_auth': {
|
||||
'session_operation_auth_by_signature': {
|
||||
'signature': session_api_data.get('signature'),
|
||||
'token': session_api_data.get('token'),
|
||||
},
|
||||
},
|
||||
'timing_constraint': 'unlimited',
|
||||
},
|
||||
}).encode())
|
||||
|
||||
info_dict['url'] = session_response['data']['session']['content_uri']
|
||||
info_dict['protocol'] = protocol
|
||||
|
||||
# get heartbeat info
|
||||
heartbeat_info_dict = {
|
||||
'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
|
||||
'data': json.dumps(session_response['data']),
|
||||
# interval, convert milliseconds to seconds, then halve to make a buffer.
|
||||
'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
|
||||
'ping': ping,
|
||||
}
|
||||
|
||||
return info_dict, heartbeat_info_dict
|
||||
|
||||
def _extract_format_for_quality(self, video_id, audio_quality, video_quality, dmc_protocol):
|
||||
|
||||
if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
|
||||
return None
|
||||
|
||||
format_id = '-'.join(
|
||||
[remove_start(s['id'], 'archive_') for s in (video_quality, audio_quality)] + [dmc_protocol])
|
||||
|
||||
vid_qual_label = traverse_obj(video_quality, ('metadata', 'label'))
|
||||
|
||||
return {
|
||||
'url': 'niconico_dmc:{}/{}/{}'.format(video_id, video_quality['id'], audio_quality['id']),
|
||||
'format_id': format_id,
|
||||
'format_note': join_nonempty('DMC', vid_qual_label, dmc_protocol.upper(), delim=' '),
|
||||
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
||||
'acodec': 'aac',
|
||||
'vcodec': 'h264',
|
||||
**traverse_obj(audio_quality, ('metadata', {
|
||||
'abr': ('bitrate', {float_or_none(scale=1000)}),
|
||||
'asr': ('samplingRate', {int_or_none}),
|
||||
})),
|
||||
**traverse_obj(video_quality, ('metadata', {
|
||||
'vbr': ('bitrate', {float_or_none(scale=1000)}),
|
||||
'height': ('resolution', 'height', {int_or_none}),
|
||||
'width': ('resolution', 'width', {int_or_none}),
|
||||
})),
|
||||
'quality': -2 if 'low' in video_quality['id'] else None,
|
||||
'protocol': 'niconico_dmc',
|
||||
'expected_protocol': dmc_protocol, # XXX: This is not a documented field
|
||||
'http_headers': {
|
||||
'Origin': 'https://www.nicovideo.jp',
|
||||
'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
|
||||
},
|
||||
}
|
||||
|
||||
def _yield_dmc_formats(self, api_data, video_id):
|
||||
dmc_data = traverse_obj(api_data, ('media', 'delivery', 'movie'))
|
||||
audios = traverse_obj(dmc_data, ('audios', ..., {dict}))
|
||||
videos = traverse_obj(dmc_data, ('videos', ..., {dict}))
|
||||
protocols = traverse_obj(dmc_data, ('session', 'protocols', ..., {str}))
|
||||
if not all((audios, videos, protocols)):
|
||||
return
|
||||
|
||||
for audio_quality, video_quality, protocol in itertools.product(audios, videos, protocols):
|
||||
if fmt := self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol):
|
||||
yield fmt
|
||||
|
||||
def _yield_dms_formats(self, api_data, video_id):
|
||||
fmt_filter = lambda _, v: v['isAvailable'] and v['id']
|
||||
@ -485,8 +317,8 @@ def _real_extract(self, url):
|
||||
'needs_premium': ('isPremium', {bool}),
|
||||
'needs_subscription': ('isAdmission', {bool}),
|
||||
})) or {'needs_auth': True}))
|
||||
formats = [*self._yield_dmc_formats(api_data, video_id),
|
||||
*self._yield_dms_formats(api_data, video_id)]
|
||||
|
||||
formats = list(self._yield_dms_formats(api_data, video_id))
|
||||
if not formats:
|
||||
fail_msg = clean_html(self._html_search_regex(
|
||||
r'<p[^>]+\bclass="fail-message"[^>]*>(?P<msg>.+?)</p>',
|
||||
@ -921,7 +753,7 @@ def _real_extract(self, url):
|
||||
return self.playlist_result(self._entries(list_id), list_id)
|
||||
|
||||
|
||||
class NiconicoLiveIE(InfoExtractor):
|
||||
class NiconicoLiveIE(NiconicoBaseIE):
|
||||
IE_NAME = 'niconico:live'
|
||||
IE_DESC = 'ニコニコ生放送'
|
||||
_VALID_URL = r'https?://(?:sp\.)?live2?\.nicovideo\.jp/(?:watch|gate)/(?P<id>lv\d+)'
|
||||
@ -953,8 +785,6 @@ class NiconicoLiveIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_KNOWN_LATENCY = ('high', 'low')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id)
|
||||
@ -970,22 +800,19 @@ def _real_extract(self, url):
|
||||
})
|
||||
|
||||
hostname = remove_start(urllib.parse.urlparse(urlh.url).hostname, 'sp.')
|
||||
latency = try_get(self._configuration_arg('latency'), lambda x: x[0])
|
||||
if latency not in self._KNOWN_LATENCY:
|
||||
latency = 'high'
|
||||
|
||||
ws = self._request_webpage(
|
||||
Request(ws_url, headers={'Origin': f'https://{hostname}'}),
|
||||
video_id=video_id, note='Connecting to WebSocket server')
|
||||
|
||||
self.write_debug('[debug] Sending HLS server request')
|
||||
self.write_debug('Sending HLS server request')
|
||||
ws.send(json.dumps({
|
||||
'type': 'startWatching',
|
||||
'data': {
|
||||
'stream': {
|
||||
'quality': 'abr',
|
||||
'protocol': 'hls+fmp4',
|
||||
'latency': latency,
|
||||
'protocol': 'hls',
|
||||
'latency': 'high',
|
||||
'accessRightMethod': 'single_cookie',
|
||||
'chasePlay': False,
|
||||
},
|
||||
@ -1049,18 +876,29 @@ def _real_extract(self, url):
|
||||
for cookie in cookies:
|
||||
self._set_cookie(
|
||||
cookie['domain'], cookie['name'], cookie['value'],
|
||||
expire_time=unified_timestamp(cookie['expires']), path=cookie['path'], secure=cookie['secure'])
|
||||
expire_time=unified_timestamp(cookie.get('expires')), path=cookie['path'], secure=cookie['secure'])
|
||||
|
||||
fmt_common = {
|
||||
'live_latency': 'high',
|
||||
'origin': hostname,
|
||||
'protocol': 'niconico_live',
|
||||
'video_id': video_id,
|
||||
'ws': ws,
|
||||
}
|
||||
q_iter = (q for q in qualities[1:] if not q.startswith('audio_')) # ignore initial 'abr'
|
||||
a_map = {96: 'audio_low', 192: 'audio_high'}
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
|
||||
for fmt, q in zip(formats, reversed(qualities[1:])):
|
||||
fmt.update({
|
||||
'format_id': q,
|
||||
'protocol': 'niconico_live',
|
||||
'ws': ws,
|
||||
'video_id': video_id,
|
||||
'live_latency': latency,
|
||||
'origin': hostname,
|
||||
})
|
||||
for fmt in formats:
|
||||
if fmt.get('acodec') == 'none':
|
||||
fmt['format_id'] = next(q_iter, fmt['format_id'])
|
||||
elif fmt.get('vcodec') == 'none':
|
||||
abr = parse_bitrate(fmt['url'].lower())
|
||||
fmt.update({
|
||||
'abr': abr,
|
||||
'format_id': a_map.get(abr, fmt['format_id']),
|
||||
})
|
||||
fmt.update(fmt_common)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -181,6 +181,7 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 119.0,
|
||||
},
|
||||
'skip': 'HTTP Error 500: Internal Server Error',
|
||||
}, {
|
||||
# article with audio and no video
|
||||
'url': 'https://www.nytimes.com/2023/09/29/health/mosquitoes-genetic-engineering.html',
|
||||
@ -190,13 +191,14 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||
'ext': 'mp3',
|
||||
'title': 'The Gamble: Can Genetically Modified Mosquitoes End Disease?',
|
||||
'description': 'md5:9ff8b47acbaf7f3ca8c732f5c815be2e',
|
||||
'timestamp': 1695960700,
|
||||
'timestamp': 1696008129,
|
||||
'upload_date': '20230929',
|
||||
'creator': 'Stephanie Nolen, Natalija Gormalova',
|
||||
'creators': ['Stephanie Nolen', 'Natalija Gormalova'],
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 1322,
|
||||
},
|
||||
}, {
|
||||
# lede_media_block already has sourceId
|
||||
'url': 'https://www.nytimes.com/2023/11/29/business/dealbook/kamala-harris-biden-voters.html',
|
||||
'md5': '3eb5ddb1d6f86254fe4f233826778737',
|
||||
'info_dict': {
|
||||
@ -207,7 +209,7 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||
'timestamp': 1701290997,
|
||||
'upload_date': '20231129',
|
||||
'uploader': 'By The New York Times',
|
||||
'creator': 'Katie Rogers',
|
||||
'creators': ['Katie Rogers'],
|
||||
'thumbnail': r're:https?://\w+\.nyt.com/images/.*\.jpg',
|
||||
'duration': 97.631,
|
||||
},
|
||||
@ -222,10 +224,22 @@ class NYTimesArticleIE(NYTimesBaseIE):
|
||||
'title': 'Drunk and Asleep on the Job: Air Traffic Controllers Pushed to the Brink',
|
||||
'description': 'md5:549e5a5e935bf7d048be53ba3d2c863d',
|
||||
'upload_date': '20231202',
|
||||
'creator': 'Emily Steel, Sydney Ember',
|
||||
'creators': ['Emily Steel', 'Sydney Ember'],
|
||||
'timestamp': 1701511264,
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# lede_media_block does not have sourceId
|
||||
'url': 'https://www.nytimes.com/2025/04/30/well/move/hip-mobility-routine.html',
|
||||
'info_dict': {
|
||||
'id': 'hip-mobility-routine',
|
||||
'title': 'Tight Hips? These Moves Can Help.',
|
||||
'description': 'Sitting all day is hard on your hips. Try this simple routine for better mobility.',
|
||||
'creators': ['Alyssa Ages', 'Theodore Tae'],
|
||||
'timestamp': 1746003629,
|
||||
'upload_date': '20250430',
|
||||
},
|
||||
'playlist_count': 7,
|
||||
}, {
|
||||
'url': 'https://www.nytimes.com/2023/12/02/business/media/netflix-squid-game-challenge.html',
|
||||
'only_matching': True,
|
||||
@ -256,14 +270,18 @@ def _extract_content_from_block(self, block):
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
webpage = self._download_webpage(url, page_id, impersonate=True)
|
||||
art_json = self._search_json(
|
||||
r'window\.__preloadedData\s*=', webpage, 'media details', page_id,
|
||||
transform_source=lambda x: x.replace('undefined', 'null'))['initialData']['data']['article']
|
||||
content = art_json['sprinkledBody']['content']
|
||||
|
||||
blocks = traverse_obj(art_json, (
|
||||
'sprinkledBody', 'content', ..., ('ledeMedia', None),
|
||||
lambda _, v: v['__typename'] in ('Video', 'Audio')))
|
||||
blocks = []
|
||||
block_filter = lambda k, v: k == 'media' and v['__typename'] in ('Video', 'Audio')
|
||||
if lede_media_block := traverse_obj(content, (..., 'ledeMedia', block_filter, any)):
|
||||
lede_media_block.setdefault('sourceId', art_json.get('sourceId'))
|
||||
blocks.append(lede_media_block)
|
||||
blocks.extend(traverse_obj(content, (..., block_filter)))
|
||||
if not blocks:
|
||||
raise ExtractorError('Unable to extract any media blocks from webpage')
|
||||
|
||||
@ -273,8 +291,7 @@ def _real_extract(self, url):
|
||||
'sprinkledBody', 'content', ..., 'summary', 'content', ..., 'text', {str}),
|
||||
get_all=False) or self._html_search_meta(['og:description', 'twitter:description'], webpage),
|
||||
'timestamp': traverse_obj(art_json, ('firstPublished', {parse_iso8601})),
|
||||
'creator': ', '.join(
|
||||
traverse_obj(art_json, ('bylines', ..., 'creators', ..., 'displayName'))), # TODO: change to 'creators' (list)
|
||||
'creators': traverse_obj(art_json, ('bylines', ..., 'creators', ..., 'displayName', {str})),
|
||||
'thumbnails': self._extract_thumbnails(traverse_obj(
|
||||
art_json, ('promotionalMedia', 'assetCrops', ..., 'renditions', ...))),
|
||||
}
|
||||
|
@ -1,5 +1,3 @@
|
||||
import re
|
||||
|
||||
from .youtube import YoutubeIE
|
||||
from .zdf import ZDFBaseIE
|
||||
from ..utils import (
|
||||
@ -7,44 +5,27 @@
|
||||
merge_dicts,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class PhoenixIE(ZDFBaseIE):
|
||||
IE_NAME = 'phoenix.de'
|
||||
_VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
|
||||
_VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/?#]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
|
||||
_TESTS = [{
|
||||
# Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html
|
||||
'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html',
|
||||
'md5': '34ec321e7eb34231fd88616c65c92db0',
|
||||
'url': 'https://www.phoenix.de/sendungen/dokumentationen/spitzbergen-a-893349.html',
|
||||
'md5': 'a79e86d9774d0b3f2102aff988a0bd32',
|
||||
'info_dict': {
|
||||
'id': '210222_phx_nachgehakt_corona_protest',
|
||||
'id': '221215_phx_spitzbergen',
|
||||
'ext': 'mp4',
|
||||
'title': 'Wohin führt der Protest in der Pandemie?',
|
||||
'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
|
||||
'duration': 1691,
|
||||
'timestamp': 1613902500,
|
||||
'upload_date': '20210221',
|
||||
'title': 'Spitzbergen',
|
||||
'description': 'Film von Tilmann Bünz',
|
||||
'duration': 728.0,
|
||||
'timestamp': 1555600500,
|
||||
'upload_date': '20190418',
|
||||
'uploader': 'Phoenix',
|
||||
'series': 'corona nachgehakt',
|
||||
'episode': 'Wohin führt der Protest in der Pandemie?',
|
||||
},
|
||||
}, {
|
||||
# Youtube embed
|
||||
'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html',
|
||||
'info_dict': {
|
||||
'id': 'hMQtqFYjomk',
|
||||
'ext': 'mp4',
|
||||
'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?',
|
||||
'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd',
|
||||
'duration': 3509,
|
||||
'upload_date': '20201219',
|
||||
'uploader': 'phoenix',
|
||||
'uploader_id': 'phoenix',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'thumbnail': 'https://www.phoenix.de/sixcms/media.php/21/Bergspitzen1.png',
|
||||
'series': 'Dokumentationen',
|
||||
'episode': 'Spitzbergen',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html',
|
||||
@ -90,8 +71,8 @@ def _real_extract(self, url):
|
||||
content_id = details['tracking']['nielsen']['content']['assetid']
|
||||
|
||||
info = self._extract_ptmd(
|
||||
f'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/{content_id}',
|
||||
content_id, None, url)
|
||||
f'https://tmd.phoenix.de/tmd/2/android_native_6/vod/ptmd/phoenix/{content_id}',
|
||||
content_id)
|
||||
|
||||
duration = int_or_none(try_get(
|
||||
details, lambda x: x['tracking']['nielsen']['content']['length']))
|
||||
@ -101,20 +82,8 @@ def _real_extract(self, url):
|
||||
str)
|
||||
episode = title if details.get('contentType') == 'episode' else None
|
||||
|
||||
thumbnails = []
|
||||
teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {}
|
||||
for thumbnail_key, thumbnail_url in teaser_images.items():
|
||||
thumbnail_url = urljoin(url, thumbnail_url)
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnail = {
|
||||
'url': thumbnail_url,
|
||||
}
|
||||
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
|
||||
if m:
|
||||
thumbnail['width'] = int(m.group(1))
|
||||
thumbnail['height'] = int(m.group(2))
|
||||
thumbnails.append(thumbnail)
|
||||
thumbnails = self._extract_thumbnails(teaser_images)
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': content_id,
|
||||
|
@ -7,11 +7,13 @@
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import unpack
|
||||
|
||||
|
||||
class PlaySuisseIE(InfoExtractor):
|
||||
@ -26,12 +28,12 @@ class PlaySuisseIE(InfoExtractor):
|
||||
{
|
||||
# episode in a series
|
||||
'url': 'https://www.playsuisse.ch/watch/763182?episodeId=763211',
|
||||
'md5': '82df2a470b2dfa60c2d33772a8a60cf8',
|
||||
'md5': 'e20d1ede6872a03b41905ca1060a1ef2',
|
||||
'info_dict': {
|
||||
'id': '763211',
|
||||
'ext': 'mp4',
|
||||
'title': 'Knochen',
|
||||
'description': 'md5:8ea7a8076ba000cd9e8bc132fd0afdd8',
|
||||
'description': 'md5:3bdd80e2ce20227c47aab1df2a79a519',
|
||||
'duration': 3344,
|
||||
'series': 'Wilder',
|
||||
'season': 'Season 1',
|
||||
@ -42,24 +44,33 @@ class PlaySuisseIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
# film
|
||||
'url': 'https://www.playsuisse.ch/watch/808675',
|
||||
'md5': '818b94c1d2d7c4beef953f12cb8f3e75',
|
||||
'url': 'https://www.playsuisse.ch/detail/2573198',
|
||||
'md5': '1f115bb0a5191477b1a5771643a4283d',
|
||||
'info_dict': {
|
||||
'id': '808675',
|
||||
'id': '2573198',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der Läufer',
|
||||
'description': 'md5:9f61265c7e6dcc3e046137a792b275fd',
|
||||
'duration': 5280,
|
||||
'title': 'Azor',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'genres': ['Fiction'],
|
||||
'creators': ['Andreas Fontana'],
|
||||
'cast': ['Fabrizio Rongione', 'Stéphanie Cléau', 'Gilles Privat', 'Alexandre Trocki'],
|
||||
'location': 'France; Argentine',
|
||||
'release_year': 2021,
|
||||
'duration': 5981,
|
||||
'thumbnail': 're:https://playsuisse-img.akamaized.net/',
|
||||
},
|
||||
}, {
|
||||
# series (treated as a playlist)
|
||||
'url': 'https://www.playsuisse.ch/detail/1115687',
|
||||
'info_dict': {
|
||||
'description': 'md5:e4a2ae29a8895823045b5c3145a02aa3',
|
||||
'id': '1115687',
|
||||
'series': 'They all came out to Montreux',
|
||||
'title': 'They all came out to Montreux',
|
||||
'description': 'md5:0fefd8c5b4468a0bb35e916887681520',
|
||||
'genres': ['Documentary'],
|
||||
'creators': ['Oliver Murray'],
|
||||
'location': 'Switzerland',
|
||||
'release_year': 2021,
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
@ -120,6 +131,12 @@ class PlaySuisseIE(InfoExtractor):
|
||||
id
|
||||
name
|
||||
description
|
||||
descriptionLong
|
||||
year
|
||||
contentTypes
|
||||
directors
|
||||
mainCast
|
||||
productionCountries
|
||||
duration
|
||||
episodeNumber
|
||||
seasonNumber
|
||||
@ -215,9 +232,7 @@ def _perform_login(self, username, password):
|
||||
if not self._ID_TOKEN:
|
||||
raise ExtractorError('Login failed')
|
||||
|
||||
def _get_media_data(self, media_id):
|
||||
# NOTE In the web app, the "locale" header is used to switch between languages,
|
||||
# However this doesn't seem to take effect when passing the header here.
|
||||
def _get_media_data(self, media_id, locale=None):
|
||||
response = self._download_json(
|
||||
'https://www.playsuisse.ch/api/graphql',
|
||||
media_id, data=json.dumps({
|
||||
@ -225,7 +240,7 @@ def _get_media_data(self, media_id):
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
'variables': {'assetId': media_id},
|
||||
}).encode(),
|
||||
headers={'Content-Type': 'application/json', 'locale': 'de'})
|
||||
headers={'Content-Type': 'application/json', 'locale': locale or 'de'})
|
||||
|
||||
return response['data']['assetV2']
|
||||
|
||||
@ -234,7 +249,7 @@ def _real_extract(self, url):
|
||||
self.raise_login_required(method='password')
|
||||
|
||||
media_id = self._match_id(url)
|
||||
media_data = self._get_media_data(media_id)
|
||||
media_data = self._get_media_data(media_id, traverse_obj(parse_qs(url), ('locale', 0)))
|
||||
info = self._extract_single(media_data)
|
||||
if media_data.get('episodes'):
|
||||
info.update({
|
||||
@ -257,15 +272,22 @@ def _extract_single(self, media_data):
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': media_data['id'],
|
||||
'title': media_data.get('name'),
|
||||
'description': media_data.get('description'),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': int_or_none(media_data.get('duration')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'series': media_data.get('seriesName'),
|
||||
'season_number': int_or_none(media_data.get('seasonNumber')),
|
||||
'episode': media_data.get('name') if media_data.get('episodeNumber') else None,
|
||||
'episode_number': int_or_none(media_data.get('episodeNumber')),
|
||||
**traverse_obj(media_data, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('name', {str}),
|
||||
'description': (('descriptionLong', 'description'), {str}, any),
|
||||
'genres': ('contentTypes', ..., {str}),
|
||||
'creators': ('directors', ..., {str}),
|
||||
'cast': ('mainCast', ..., {str}),
|
||||
'location': ('productionCountries', ..., {str}, all, {unpack(join_nonempty, delim='; ')}, filter),
|
||||
'release_year': ('year', {str}, {lambda x: x[:4]}, {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'series': ('seriesName', {str}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
'episode': ('name', {str}, {lambda x: x if media_data['episodeNumber'] is not None else None}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
@ -321,6 +321,27 @@ class RaiPlayIE(RaiBaseIE):
|
||||
'timestamp': 1348495020,
|
||||
'upload_date': '20120924',
|
||||
},
|
||||
}, {
|
||||
# checking program_info gives false positive for DRM
|
||||
'url': 'https://www.raiplay.it/video/2022/10/Ad-ogni-costo---Un-giorno-in-Pretura---Puntata-del-15102022-1dfd1295-ea38-4bac-b51e-f87e2881693b.html',
|
||||
'md5': '572c6f711b7c5f2d670ba419b4ae3b08',
|
||||
'info_dict': {
|
||||
'id': '1dfd1295-ea38-4bac-b51e-f87e2881693b',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ad ogni costo - Un giorno in Pretura - Puntata del 15/10/2022',
|
||||
'alt_title': 'St 2022/23 - Un giorno in pretura - Ad ogni costo',
|
||||
'description': 'md5:4046d97b2687f74f06a8b8270ba5599f',
|
||||
'uploader': 'Rai 3',
|
||||
'duration': 3773.0,
|
||||
'thumbnail': 'https://www.raiplay.it/dl/img/2022/10/12/1665586539957_2048x2048.png',
|
||||
'creators': ['Rai 3'],
|
||||
'series': 'Un giorno in pretura',
|
||||
'season': '2022/23',
|
||||
'episode': 'Ad ogni costo',
|
||||
'timestamp': 1665507240,
|
||||
'upload_date': '20221011',
|
||||
'release_year': 2025,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
|
||||
'only_matching': True,
|
||||
@ -340,9 +361,8 @@ def _real_extract(self, url):
|
||||
media = self._download_json(
|
||||
f'{base}.json', video_id, 'Downloading video JSON')
|
||||
|
||||
if not self.get_param('allow_unplayable_formats'):
|
||||
if traverse_obj(media, (('program_info', None), 'rights_management', 'rights', 'drm')):
|
||||
self.report_drm(video_id)
|
||||
if traverse_obj(media, ('rights_management', 'rights', 'drm')):
|
||||
self.report_drm(video_id)
|
||||
|
||||
video = media['video']
|
||||
relinker_info = self._extract_relinker_info(video['content_url'], video_id)
|
||||
|
@ -388,7 +388,8 @@ def add_thumbnail(src):
|
||||
})
|
||||
if entries:
|
||||
return self.playlist_result(entries, video_id, **info)
|
||||
raise ExtractorError('No media found', expected=True)
|
||||
self.raise_no_formats('No media found', expected=True, video_id=video_id)
|
||||
return {**info, 'id': video_id}
|
||||
|
||||
# Check if media is hosted on reddit:
|
||||
reddit_video = traverse_obj(data, (
|
||||
|
@ -1,61 +0,0 @@
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class SproutIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:sproutonline|universalkids)\.com/(?:watch|(?:[^/]+/)*videos)/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.universalkids.com/shows/remy-and-boo/season/1/videos/robot-bike-race',
|
||||
'info_dict': {
|
||||
'id': 'bm0foJFaTKqb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Robot Bike Race',
|
||||
'description': 'md5:436b1d97117cc437f54c383f4debc66d',
|
||||
'timestamp': 1606148940,
|
||||
'upload_date': '20201123',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.universalkids.com/watch/robot-bike-race',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
mpx_metadata = self._download_json(
|
||||
# http://nbcuunikidsprod.apps.nbcuni.com/networks/universalkids/content/videos/
|
||||
'https://www.universalkids.com/_api/videos/' + display_id,
|
||||
display_id)['mpxMetadata']
|
||||
media_pid = mpx_metadata['mediaPid']
|
||||
theplatform_url = 'https://link.theplatform.com/s/HNK2IC/' + media_pid
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
if mpx_metadata.get('entitlement') == 'auth':
|
||||
query['auth'] = self._extract_mvpd_auth(url, media_pid, 'sprout', 'sprout')
|
||||
theplatform_url = smuggle_url(
|
||||
update_url_query(theplatform_url, query), {
|
||||
'force_smil_url': True,
|
||||
'geo_countries': self._GEO_COUNTRIES,
|
||||
})
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': media_pid,
|
||||
'url': theplatform_url,
|
||||
'series': mpx_metadata.get('seriesName'),
|
||||
'season_number': int_or_none(mpx_metadata.get('seasonNumber')),
|
||||
'episode_number': int_or_none(mpx_metadata.get('episodeNumber')),
|
||||
'ie_key': 'ThePlatform',
|
||||
}
|
@ -471,8 +471,7 @@ def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._og_search_title(webpage)
|
||||
|
||||
urql_state = self._search_json(
|
||||
r'window\.svt\.(?:nyh\.)?urqlState\s*=', webpage, 'json data', display_id)
|
||||
urql_state = self._search_json(r'urqlState\s*[=:]', webpage, 'json data', display_id)
|
||||
|
||||
data = traverse_obj(urql_state, (..., 'data', {str}, {json.loads}), get_all=False) or {}
|
||||
|
||||
|
@ -2,12 +2,13 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .jwplatform import JWPlatformIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
js_to_json,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class TV2DKIE(InfoExtractor):
|
||||
@ -21,35 +22,46 @@ class TV2DKIE(InfoExtractor):
|
||||
tv2fyn|
|
||||
tv2east|
|
||||
tv2lorry|
|
||||
tv2nord
|
||||
tv2nord|
|
||||
tv2kosmopol
|
||||
)\.dk/
|
||||
(:[^/]+/)*
|
||||
(?:[^/?#]+/)*
|
||||
(?P<id>[^/?\#&]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tvsyd.dk/nyheder/28-10-2019/1930/1930-28-okt-2019?autoplay=1#player',
|
||||
'info_dict': {
|
||||
'id': '0_52jmwa0p',
|
||||
'id': 'sPp5z21q',
|
||||
'ext': 'mp4',
|
||||
'title': '19:30 - 28. okt. 2019',
|
||||
'timestamp': 1572290248,
|
||||
'description': '',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/sPp5z21q/poster.jpg?width=720',
|
||||
'timestamp': 1572287400,
|
||||
'upload_date': '20191028',
|
||||
'uploader_id': 'tvsyd',
|
||||
'duration': 1347,
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
}, {
|
||||
'url': 'https://www.tv2lorry.dk/gadekamp/gadekamp-6-hoejhuse-i-koebenhavn',
|
||||
'info_dict': {
|
||||
'id': '1_7iwll9n0',
|
||||
'id': 'oD9cyq0m',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20211027',
|
||||
'title': 'Gadekamp #6 - Højhuse i København',
|
||||
'uploader_id': 'tv2lorry',
|
||||
'timestamp': 1635345229,
|
||||
'description': '',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/oD9cyq0m/poster.jpg?width=720',
|
||||
'timestamp': 1635348600,
|
||||
'upload_date': '20211027',
|
||||
},
|
||||
'add_ie': ['Kaltura'],
|
||||
}, {
|
||||
'url': 'https://www.tvsyd.dk/haderslev/x-factor-brodre-fulde-af-selvtillid-er-igen-hjemme-hos-mor-vores-diagnoser-har-vaeret-en-fordel',
|
||||
'info_dict': {
|
||||
'id': 'x-factor-brodre-fulde-af-selvtillid-er-igen-hjemme-hos-mor-vores-diagnoser-har-vaeret-en-fordel',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
'url': 'https://www.tv2ostjylland.dk/aarhus/dom-kan-fa-alvorlige-konsekvenser',
|
||||
'info_dict': {
|
||||
'id': 'dom-kan-fa-alvorlige-konsekvenser',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
'url': 'https://www.tv2ostjylland.dk/artikel/minister-gaar-ind-i-sag-om-diabetes-teknologi',
|
||||
'only_matching': True,
|
||||
@ -71,40 +83,22 @@ class TV2DKIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.tv2nord.dk/artikel/dybt-uacceptabelt',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tv2kosmopol.dk/metropolen/chaufforer-beordres-til-at-kore-videre-i-ulovlige-busser-med-rode-advarselslamper',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
search_space = traverse_obj(webpage, {find_element(tag='article')}) or webpage
|
||||
|
||||
entries = []
|
||||
player_ids = traverse_obj(
|
||||
re.findall(r'x-data="(?:video_player|simple_player)\(({[^"]+})', search_space),
|
||||
(..., {js_to_json}, {json.loads}, ('jwpMediaId', 'videoId'), {str}))
|
||||
|
||||
def add_entry(partner_id, kaltura_id):
|
||||
entries.append(self.url_result(
|
||||
f'kaltura:{partner_id}:{kaltura_id}', 'Kaltura',
|
||||
video_id=kaltura_id))
|
||||
|
||||
for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage):
|
||||
video = extract_attributes(video_el)
|
||||
kaltura_id = video.get('data-entryid')
|
||||
if not kaltura_id:
|
||||
continue
|
||||
partner_id = video.get('data-partnerid')
|
||||
if not partner_id:
|
||||
continue
|
||||
add_entry(partner_id, kaltura_id)
|
||||
if not entries:
|
||||
kaltura_id = self._search_regex(
|
||||
(r'entry_id\s*:\s*["\']([0-9a-z_]+)',
|
||||
r'\\u002FentryId\\u002F(\w+)\\u002F'), webpage, 'kaltura id')
|
||||
partner_id = self._search_regex(
|
||||
(r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
|
||||
'partner id')
|
||||
add_entry(partner_id, kaltura_id)
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
return self.playlist_result(entries)
|
||||
return self.playlist_from_matches(
|
||||
player_ids, video_id, getter=lambda x: f'jwplatform:{x}', ie=JWPlatformIE)
|
||||
|
||||
|
||||
class TV2DKBornholmPlayIE(InfoExtractor):
|
||||
|
@ -14,12 +14,13 @@
|
||||
parse_duration,
|
||||
qualities,
|
||||
str_to_int,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class TwitCastingIE(InfoExtractor):
|
||||
@ -138,13 +139,7 @@ def _real_extract(self, url):
|
||||
r'data-toggle="true"[^>]+datetime="([^"]+)"',
|
||||
webpage, 'datetime', None))
|
||||
|
||||
stream_server_data = self._download_json(
|
||||
f'https://twitcasting.tv/streamserver.php?target={uploader_id}&mode=client', video_id,
|
||||
'Downloading live info', fatal=False)
|
||||
|
||||
is_live = any(f'data-{x}' in webpage for x in ['is-onlive="true"', 'live-type="live"', 'status="online"'])
|
||||
if not traverse_obj(stream_server_data, 'llfmp4') and is_live:
|
||||
self.raise_login_required(method='cookies')
|
||||
|
||||
base_dict = {
|
||||
'title': title,
|
||||
@ -165,28 +160,37 @@ def find_dmu(x):
|
||||
return [data_movie_url]
|
||||
|
||||
m3u8_urls = (try_get(webpage, find_dmu, list)
|
||||
or traverse_obj(video_js_data, (..., 'source', 'url'))
|
||||
or ([f'https://twitcasting.tv/{uploader_id}/metastream.m3u8'] if is_live else None))
|
||||
if not m3u8_urls:
|
||||
raise ExtractorError('Failed to get m3u8 playlist')
|
||||
or traverse_obj(video_js_data, (..., 'source', 'url')))
|
||||
|
||||
if is_live:
|
||||
m3u8_url = m3u8_urls[0]
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', m3u8_id='hls',
|
||||
live=True, headers=self._M3U8_HEADERS)
|
||||
stream_data = self._download_json(
|
||||
'https://twitcasting.tv/streamserver.php',
|
||||
video_id, 'Downloading live info', query={
|
||||
'target': uploader_id,
|
||||
'mode': 'client',
|
||||
'player': 'pc_web',
|
||||
})
|
||||
|
||||
if traverse_obj(stream_server_data, ('hls', 'source')):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', m3u8_id='source',
|
||||
live=True, query={'mode': 'source'},
|
||||
note='Downloading source quality m3u8',
|
||||
headers=self._M3U8_HEADERS, fatal=False))
|
||||
formats = []
|
||||
# low: 640x360, medium: 1280x720, high: 1920x1080
|
||||
qq = qualities(['low', 'medium', 'high'])
|
||||
for quality, m3u8_url in traverse_obj(stream_data, (
|
||||
'tc-hls', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
|
||||
)):
|
||||
formats.append({
|
||||
'url': m3u8_url,
|
||||
'format_id': f'hls-{quality}',
|
||||
'ext': 'mp4',
|
||||
'quality': qq(quality),
|
||||
'protocol': 'm3u8',
|
||||
'http_headers': self._M3U8_HEADERS,
|
||||
})
|
||||
|
||||
if websockets:
|
||||
qq = qualities(['base', 'mobilesource', 'main'])
|
||||
streams = traverse_obj(stream_server_data, ('llfmp4', 'streams')) or {}
|
||||
for mode, ws_url in streams.items():
|
||||
for mode, ws_url in traverse_obj(stream_data, (
|
||||
'llfmp4', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
|
||||
)):
|
||||
formats.append({
|
||||
'url': ws_url,
|
||||
'format_id': f'ws-{mode}',
|
||||
@ -197,10 +201,15 @@ def find_dmu(x):
|
||||
'protocol': 'websocket_frag',
|
||||
})
|
||||
|
||||
if not formats:
|
||||
self.raise_login_required()
|
||||
|
||||
infodict = {
|
||||
'formats': formats,
|
||||
'_format_sort_fields': ('source', ),
|
||||
}
|
||||
elif not m3u8_urls:
|
||||
raise ExtractorError('Failed to get m3u8 playlist')
|
||||
elif len(m3u8_urls) == 1:
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_urls[0], video_id, 'mp4', headers=self._M3U8_HEADERS)
|
||||
|
@ -1225,8 +1225,8 @@ def _real_extract(self, url):
|
||||
'channel_id': ('broadcaster', 'id', {str}),
|
||||
'channel_follower_count': ('broadcaster', 'followers', 'totalCount', {int_or_none}),
|
||||
'channel_is_verified': ('broadcaster', 'isPartner', {bool}),
|
||||
'uploader': ('broadcaster', 'displayName', {str}),
|
||||
'uploader_id': ('broadcaster', 'id', {str}),
|
||||
'uploader': ('curator', 'displayName', {str}),
|
||||
'uploader_id': ('curator', 'id', {str}),
|
||||
'categories': ('game', 'displayName', {str}, filter, all, filter),
|
||||
}),
|
||||
}
|
||||
|
@ -1221,20 +1221,10 @@ class TwitterIE(TwitterBaseIE):
|
||||
}]
|
||||
|
||||
_MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
|
||||
|
||||
@property
|
||||
def _GRAPHQL_ENDPOINT(self):
|
||||
if self.is_logged_in:
|
||||
return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
|
||||
return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
|
||||
_GRAPHQL_ENDPOINT = '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
|
||||
|
||||
def _graphql_to_legacy(self, data, twid):
|
||||
result = traverse_obj(data, (
|
||||
'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
|
||||
lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
|
||||
'tweet_results', 'result', ('tweet', None), {dict},
|
||||
), default={}, get_all=False) if self.is_logged_in else traverse_obj(
|
||||
data, ('tweetResult', 'result', {dict}), default={})
|
||||
result = traverse_obj(data, ('tweetResult', 'result', {dict})) or {}
|
||||
|
||||
typename = result.get('__typename')
|
||||
if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
|
||||
@ -1278,37 +1268,6 @@ def _graphql_to_legacy(self, data, twid):
|
||||
|
||||
def _build_graphql_query(self, media_id):
|
||||
return {
|
||||
'variables': {
|
||||
'focalTweetId': media_id,
|
||||
'includePromotedContent': True,
|
||||
'with_rux_injections': False,
|
||||
'withBirdwatchNotes': True,
|
||||
'withCommunity': True,
|
||||
'withDownvotePerspective': False,
|
||||
'withQuickPromoteEligibilityTweetFields': True,
|
||||
'withReactionsMetadata': False,
|
||||
'withReactionsPerspective': False,
|
||||
'withSuperFollowsTweetFields': True,
|
||||
'withSuperFollowsUserFields': True,
|
||||
'withV2Timeline': True,
|
||||
'withVoice': True,
|
||||
},
|
||||
'features': {
|
||||
'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
|
||||
'interactive_text_enabled': True,
|
||||
'responsive_web_edit_tweet_api_enabled': True,
|
||||
'responsive_web_enhance_cards_enabled': True,
|
||||
'responsive_web_graphql_timeline_navigation_enabled': False,
|
||||
'responsive_web_text_conversations_enabled': False,
|
||||
'responsive_web_uc_gql_enabled': True,
|
||||
'standardized_nudges_misinfo': True,
|
||||
'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
|
||||
'tweetypie_unmention_optimization_enabled': True,
|
||||
'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
|
||||
'verified_phone_label_enabled': False,
|
||||
'vibe_api_enabled': True,
|
||||
},
|
||||
} if self.is_logged_in else {
|
||||
'variables': {
|
||||
'tweetId': media_id,
|
||||
'withCommunity': False,
|
||||
@ -1717,21 +1676,22 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
|
||||
'url': 'https://twitter.com/i/spaces/1OwxWwQOPlNxQ',
|
||||
'info_dict': {
|
||||
'id': '1RDxlgyvNXzJL',
|
||||
'id': '1OwxWwQOPlNxQ',
|
||||
'ext': 'm4a',
|
||||
'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
|
||||
'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
|
||||
'uploader': r're:Lucio Di Gaetano.*?',
|
||||
'uploader_id': 'luciodigaetano',
|
||||
'title': 'Everybody in: @mtbarra & @elonmusk discuss the future of EV charging',
|
||||
'description': 'Twitter Space participated by Elon Musk',
|
||||
'live_status': 'was_live',
|
||||
'timestamp': 1659877956,
|
||||
'upload_date': '20220807',
|
||||
'release_timestamp': 1659904215,
|
||||
'release_date': '20220807',
|
||||
'release_date': '20230608',
|
||||
'release_timestamp': 1686256230,
|
||||
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
|
||||
'timestamp': 1686254250,
|
||||
'upload_date': '20230608',
|
||||
'uploader': 'Mary Barra',
|
||||
'uploader_id': 'mtbarra',
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# post_live/TimedOut but downloadable
|
||||
'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
|
||||
@ -1743,9 +1703,10 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
'uploader': 'Google Cloud',
|
||||
'uploader_id': 'googlecloud',
|
||||
'live_status': 'post_live',
|
||||
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
|
||||
'timestamp': 1681409554,
|
||||
'upload_date': '20230413',
|
||||
'release_timestamp': 1681839000,
|
||||
'release_timestamp': 1681839082,
|
||||
'release_date': '20230418',
|
||||
'protocol': 'm3u8', # ffmpeg is forced
|
||||
'container': 'm4a_dash', # audio-only format fixup is applied
|
||||
@ -1762,6 +1723,9 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
'uploader': '息根とめる',
|
||||
'uploader_id': 'tomeru_ikinone',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20230601',
|
||||
'release_timestamp': 1685617200,
|
||||
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
|
||||
'timestamp': 1685617198,
|
||||
'upload_date': '20230601',
|
||||
'protocol': 'm3u8', # ffmpeg is forced
|
||||
@ -1779,9 +1743,10 @@ class TwitterSpacesIE(TwitterBaseIE):
|
||||
'uploader': 'Candace Owens',
|
||||
'uploader_id': 'RealCandaceO',
|
||||
'live_status': 'was_live',
|
||||
'thumbnail': r're:https?://pbs\.twimg\.com/profile_images/.+',
|
||||
'timestamp': 1723931351,
|
||||
'upload_date': '20240817',
|
||||
'release_timestamp': 1723932000,
|
||||
'release_timestamp': 1723932056,
|
||||
'release_date': '20240817',
|
||||
'protocol': 'm3u8_native', # not ffmpeg, detected as video space
|
||||
},
|
||||
@ -1861,18 +1826,21 @@ def _real_extract(self, url):
|
||||
|
||||
return {
|
||||
'id': space_id,
|
||||
'title': metadata.get('title'),
|
||||
'description': f'Twitter Space participated by {participants}',
|
||||
'uploader': traverse_obj(
|
||||
metadata, ('creator_results', 'result', 'legacy', 'name')),
|
||||
'uploader_id': traverse_obj(
|
||||
metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
|
||||
'live_status': live_status,
|
||||
'release_timestamp': try_call(
|
||||
lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
|
||||
'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
|
||||
'formats': formats,
|
||||
'http_headers': headers,
|
||||
'live_status': live_status,
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
# started_at is None when stream is_upcoming so fallback to scheduled_start for --wait-for-video
|
||||
'release_timestamp': (('started_at', 'scheduled_start'), {int_or_none(scale=1000)}, any),
|
||||
'timestamp': ('created_at', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
**traverse_obj(metadata, ('creator_results', 'result', 'legacy', {
|
||||
'uploader': ('name', {str}),
|
||||
'uploader_id': ('screen_name', {str_or_none}),
|
||||
'thumbnail': ('profile_image_url_https', {lambda x: x.replace('_normal', '_400x400')}, {url_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
|
@ -39,6 +39,14 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
_NETRC_MACHINE = 'vimeo'
|
||||
_LOGIN_REQUIRED = False
|
||||
_LOGIN_URL = 'https://vimeo.com/log_in'
|
||||
_IOS_CLIENT_AUTH = 'MTMxNzViY2Y0NDE0YTQ5YzhjZTc0YmU0NjVjNDQxYzNkYWVjOWRlOTpHKzRvMmgzVUh4UkxjdU5FRW80cDNDbDhDWGR5dVJLNUJZZ055dHBHTTB4V1VzaG41bEx1a2hiN0NWYWNUcldSSW53dzRUdFRYZlJEZmFoTTArOTBUZkJHS3R4V2llYU04Qnl1bERSWWxUdXRidjNqR2J4SHFpVmtFSUcyRktuQw=='
|
||||
_IOS_CLIENT_HEADERS = {
|
||||
'Accept': 'application/vnd.vimeo.*+json; version=3.4.10',
|
||||
'Accept-Language': 'en',
|
||||
'User-Agent': 'Vimeo/11.10.0 (com.vimeo; build:250424.164813.0; iOS 18.4.1) Alamofire/5.9.0 VimeoNetworking/5.0.0',
|
||||
}
|
||||
_IOS_OAUTH_CACHE_KEY = 'oauth-token-ios'
|
||||
_ios_oauth_token = None
|
||||
|
||||
@staticmethod
|
||||
def _smuggle_referrer(url, referrer_url):
|
||||
@ -88,13 +96,16 @@ def _get_video_password(self):
|
||||
expected=True)
|
||||
return password
|
||||
|
||||
def _verify_video_password(self, video_id, password, token):
|
||||
def _verify_video_password(self, video_id):
|
||||
video_password = self._get_video_password()
|
||||
token = self._download_json(
|
||||
'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info')['xsrft']
|
||||
url = f'https://vimeo.com/{video_id}'
|
||||
try:
|
||||
return self._download_webpage(
|
||||
self._request_webpage(
|
||||
f'{url}/password', video_id,
|
||||
'Submitting video password', data=json.dumps({
|
||||
'password': password,
|
||||
'password': video_password,
|
||||
'token': token,
|
||||
}, separators=(',', ':')).encode(), headers={
|
||||
'Accept': '*/*',
|
||||
@ -239,20 +250,39 @@ def _parse_config(self, config, video_id):
|
||||
'_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
|
||||
}
|
||||
|
||||
def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None, **kwargs):
|
||||
def _fetch_oauth_token(self):
|
||||
if not self._ios_oauth_token:
|
||||
self._ios_oauth_token = self.cache.load(self._NETRC_MACHINE, self._IOS_OAUTH_CACHE_KEY)
|
||||
|
||||
if not self._ios_oauth_token:
|
||||
self._ios_oauth_token = self._download_json(
|
||||
'https://api.vimeo.com/oauth/authorize/client', None,
|
||||
'Fetching OAuth token', 'Failed to fetch OAuth token',
|
||||
headers={
|
||||
'Authorization': f'Basic {self._IOS_CLIENT_AUTH}',
|
||||
**self._IOS_CLIENT_HEADERS,
|
||||
}, data=urlencode_postdata({
|
||||
'grant_type': 'client_credentials',
|
||||
'scope': 'private public create edit delete interact upload purchased stats',
|
||||
}, quote_via=urllib.parse.quote))['access_token']
|
||||
self.cache.store(self._NETRC_MACHINE, self._IOS_OAUTH_CACHE_KEY, self._ios_oauth_token)
|
||||
|
||||
return self._ios_oauth_token
|
||||
|
||||
def _call_videos_api(self, video_id, unlisted_hash=None, **kwargs):
|
||||
return self._download_json(
|
||||
join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'),
|
||||
video_id, 'Downloading API JSON', headers={
|
||||
'Authorization': f'jwt {jwt_token}',
|
||||
'Accept': 'application/json',
|
||||
'Authorization': f'Bearer {self._fetch_oauth_token()}',
|
||||
**self._IOS_CLIENT_HEADERS,
|
||||
}, query={
|
||||
'fields': ','.join((
|
||||
'config_url', 'created_time', 'description', 'download', 'license',
|
||||
'metadata.connections.comments.total', 'metadata.connections.likes.total',
|
||||
'release_time', 'stats.plays')),
|
||||
'config_url', 'embed_player_config_url', 'player_embed_url', 'download', 'play',
|
||||
'files', 'description', 'license', 'release_time', 'created_time', 'stats.plays',
|
||||
'metadata.connections.comments.total', 'metadata.connections.likes.total')),
|
||||
}, **kwargs)
|
||||
|
||||
def _extract_original_format(self, url, video_id, unlisted_hash=None, jwt=None, api_data=None):
|
||||
def _extract_original_format(self, url, video_id, unlisted_hash=None, api_data=None):
|
||||
# Original/source formats are only available when logged in
|
||||
if not self._get_cookies('https://vimeo.com/').get('vimeo'):
|
||||
return
|
||||
@ -283,12 +313,8 @@ def _extract_original_format(self, url, video_id, unlisted_hash=None, jwt=None,
|
||||
'quality': 1,
|
||||
}
|
||||
|
||||
jwt = jwt or traverse_obj(self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', video_id, 'Downloading jwt token', fatal=False), ('jwt', {str}))
|
||||
if not jwt:
|
||||
return
|
||||
original_response = api_data or self._call_videos_api(
|
||||
video_id, jwt, unlisted_hash, fatal=False, expected_status=(403, 404))
|
||||
video_id, unlisted_hash, fatal=False, expected_status=(403, 404))
|
||||
for download_data in traverse_obj(original_response, ('download', ..., {dict})):
|
||||
download_url = download_data.get('link')
|
||||
if not download_url or download_data.get('quality') != 'source':
|
||||
@ -410,6 +436,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'duration': 10,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d',
|
||||
},
|
||||
'params': {
|
||||
@ -500,15 +527,16 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader': 'The DMCI',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci',
|
||||
'uploader_id': 'dmci',
|
||||
'timestamp': 1324343742,
|
||||
'timestamp': 1324361742,
|
||||
'upload_date': '20111220',
|
||||
'description': 'md5:ae23671e82d05415868f7ad1aec21147',
|
||||
'description': 'md5:f37b4ad0f3ded6fa16f38ecde16c3c44',
|
||||
'duration': 60,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d',
|
||||
'like_count': int,
|
||||
'tags': 'count:11',
|
||||
'release_timestamp': 1324361742,
|
||||
'release_date': '20111220',
|
||||
},
|
||||
# 'params': {'format': 'Original'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
@ -521,15 +549,18 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'id': '393756517',
|
||||
# 'ext': 'mov',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1582642091,
|
||||
'timestamp': 1582660091,
|
||||
'uploader_id': 'frameworkla',
|
||||
'title': 'Straight To Hell - Sabrina: Netflix',
|
||||
'uploader': 'Framework Studio',
|
||||
'description': 'md5:f2edc61af3ea7a5592681ddbb683db73',
|
||||
'upload_date': '20200225',
|
||||
'duration': 176,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d',
|
||||
'uploader_url': 'https://vimeo.com/frameworkla',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'release_timestamp': 1582660091,
|
||||
'release_date': '20200225',
|
||||
},
|
||||
# 'params': {'format': 'source'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
@ -630,7 +661,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'description': str, # FIXME: Dynamic SEO spam description
|
||||
'upload_date': '20150209',
|
||||
'timestamp': 1423518307,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/default',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/default',
|
||||
'duration': 10,
|
||||
'like_count': int,
|
||||
'uploader_url': 'https://vimeo.com/user20132939',
|
||||
@ -667,6 +698,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'like_count': int,
|
||||
'uploader_url': 'https://vimeo.com/aliniamedia',
|
||||
'release_date': '20160329',
|
||||
'view_count': int,
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
@ -678,18 +710,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
# 'ext': 'm4v',
|
||||
'ext': 'mp4',
|
||||
'title': 'Eastnor Castle 2015 Firework Champions - The Promo!',
|
||||
'description': 'md5:5967e090768a831488f6e74b7821b3c1',
|
||||
'description': 'md5:9441e6829ae94f380cc6417d982f63ac',
|
||||
'uploader_id': 'fireworkchampions',
|
||||
'uploader': 'Firework Champions',
|
||||
'upload_date': '20150910',
|
||||
'timestamp': 1441901895,
|
||||
'timestamp': 1441916295,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/534715882-6ff8e4660cbf2fea68282876d8d44f318825dfe572cc4016e73b3266eac8ae3a-d',
|
||||
'uploader_url': 'https://vimeo.com/fireworkchampions',
|
||||
'tags': 'count:6',
|
||||
'duration': 229,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'release_timestamp': 1441916295,
|
||||
'release_date': '20150910',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -820,7 +853,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader': 'Raja Virdi',
|
||||
'uploader_id': 'rajavirdi',
|
||||
'uploader_url': 'https://vimeo.com/rajavirdi',
|
||||
'duration': 309,
|
||||
'duration': 300,
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/1716727772-[\da-f]+-d',
|
||||
},
|
||||
# 'params': {'format': 'source'},
|
||||
@ -860,12 +893,9 @@ def _verify_player_video_password(self, url, video_id, headers):
|
||||
return checked
|
||||
|
||||
def _extract_from_api(self, video_id, unlisted_hash=None):
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info')
|
||||
|
||||
for retry in (False, True):
|
||||
try:
|
||||
video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash)
|
||||
video = self._call_videos_api(video_id, unlisted_hash)
|
||||
break
|
||||
except ExtractorError as e:
|
||||
if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400
|
||||
@ -873,15 +903,14 @@ def _extract_from_api(self, video_id, unlisted_hash=None):
|
||||
self._webpage_read_content(e.cause.response, e.cause.response.url, video_id, fatal=False),
|
||||
({json.loads}, 'invalid_parameters', ..., 'field'),
|
||||
)):
|
||||
self._verify_video_password(
|
||||
video_id, self._get_video_password(), viewer['xsrft'])
|
||||
self._verify_video_password(video_id)
|
||||
continue
|
||||
raise
|
||||
|
||||
info = self._parse_config(self._download_json(
|
||||
video['config_url'], video_id), video_id)
|
||||
source_format = self._extract_original_format(
|
||||
f'https://vimeo.com/{video_id}', video_id, unlisted_hash, jwt=viewer['jwt'], api_data=video)
|
||||
f'https://vimeo.com/{video_id}', video_id, unlisted_hash, api_data=video)
|
||||
if source_format:
|
||||
info['formats'].append(source_format)
|
||||
|
||||
@ -1122,7 +1151,7 @@ class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
|
||||
'description': 'md5:aeeba3dbd4d04b0fa98a4fdc9c639998',
|
||||
'upload_date': '20140906',
|
||||
'timestamp': 1410032453,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/488238335-d7bf151c364cff8d467f1b73784668fe60aae28a54573a35d53a1210ae283bd8-d_1280',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'comment_count': int,
|
||||
'license': 'https://creativecommons.org/licenses/by-nc-nd/3.0/',
|
||||
'duration': 53,
|
||||
@ -1132,7 +1161,7 @@ class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
|
||||
'params': {
|
||||
'format': 'best[protocol=https]',
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# requires Referer to be passed along with og:video:url
|
||||
'url': 'https://vimeo.com/ondemand/36938/126682985',
|
||||
@ -1149,13 +1178,14 @@ class VimeoOndemandIE(VimeoIE): # XXX: Do not subclass from concrete IE
|
||||
'duration': 121,
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/517077723-7066ae1d9a79d3eb361334fb5d58ec13c8f04b52f8dd5eadfbd6fb0bcf11f613-d_1280',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'like_count': int,
|
||||
'tags': 'count:5',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
'url': 'https://vimeo.com/ondemand/nazmaalik',
|
||||
'only_matching': True,
|
||||
@ -1237,7 +1267,7 @@ class VimeoUserIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
|
||||
_TESTS = [{
|
||||
'url': 'https://vimeo.com/nkistudio/videos',
|
||||
'info_dict': {
|
||||
'title': 'Nki',
|
||||
'title': 'AKAMA',
|
||||
'id': 'nkistudio',
|
||||
},
|
||||
'playlist_mincount': 66,
|
||||
@ -1370,10 +1400,10 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
|
||||
'uploader_id': 'user170863801',
|
||||
'uploader_url': 'https://vimeo.com/user170863801',
|
||||
'duration': 30,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1912612821-09a43bd2e75c203d503aed89de7534f28fc4474a48f59c51999716931a246af5-d_1280',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML'],
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
|
||||
'md5': 'c507a72f780cacc12b2248bb4006d253',
|
||||
@ -1423,12 +1453,8 @@ def _real_extract(self, url):
|
||||
user, video_id, review_hash = self._match_valid_url(url).group('user', 'id', 'hash')
|
||||
data_url = f'https://vimeo.com/{user}/review/data/{video_id}/{review_hash}'
|
||||
data = self._download_json(data_url, video_id)
|
||||
viewer = {}
|
||||
if data.get('isLocked') is True:
|
||||
video_password = self._get_video_password()
|
||||
viewer = self._download_json(
|
||||
'https://vimeo.com/_rv/viewer', video_id)
|
||||
self._verify_video_password(video_id, video_password, viewer['xsrft'])
|
||||
self._verify_video_password(video_id)
|
||||
data = self._download_json(data_url, video_id)
|
||||
clip_data = data['clipData']
|
||||
config_url = clip_data['configUrl']
|
||||
@ -1436,7 +1462,7 @@ def _real_extract(self, url):
|
||||
info_dict = self._parse_config(config, video_id)
|
||||
source_format = self._extract_original_format(
|
||||
f'https://vimeo.com/{user}/review/{video_id}/{review_hash}/action',
|
||||
video_id, unlisted_hash=clip_data.get('unlistedHash'), jwt=viewer.get('jwt'))
|
||||
video_id, unlisted_hash=clip_data.get('unlistedHash'))
|
||||
if source_format:
|
||||
info_dict['formats'].append(source_format)
|
||||
info_dict['description'] = clean_html(clip_data.get('description'))
|
||||
@ -1528,20 +1554,22 @@ class VimeoProIE(VimeoBaseInfoExtractor):
|
||||
'uploader_id': 'openstreetmapus',
|
||||
'uploader': 'OpenStreetMap US',
|
||||
'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
|
||||
'description': 'md5:2c362968038d4499f4d79f88458590c1',
|
||||
'description': 'md5:8cf69a1a435f2d763f4adf601e9c3125',
|
||||
'duration': 1595,
|
||||
'upload_date': '20130610',
|
||||
'timestamp': 1370893156,
|
||||
'timestamp': 1370907556,
|
||||
'license': 'by',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/440260469-19b0d92fca3bd84066623b53f1eb8aaa3980c6c809e2d67b6b39ab7b4a77a344-d_960',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'tags': 'count:1',
|
||||
'release_timestamp': 1370907556,
|
||||
'release_date': '20130610',
|
||||
},
|
||||
'params': {
|
||||
'format': 'best[protocol=https]',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# password-protected VimeoPro page with Vimeo player embed
|
||||
'url': 'https://vimeopro.com/cadfem/simulation-conference-mechanische-systeme-in-perfektion',
|
||||
@ -1549,7 +1577,7 @@ class VimeoProIE(VimeoBaseInfoExtractor):
|
||||
'id': '764543723',
|
||||
'ext': 'mp4',
|
||||
'title': 'Mechanische Systeme in Perfektion: Realität erfassen, Innovation treiben',
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1543784598-a1a750494a485e601110136b9fe11e28c2131942452b3a5d30391cb3800ca8fd-d_1280',
|
||||
'thumbnail': r're:https://i\.vimeocdn\.com/video/\d+-[\da-f]+-d',
|
||||
'description': 'md5:2a9d195cd1b0f6f79827107dc88c2420',
|
||||
'uploader': 'CADFEM',
|
||||
'uploader_id': 'cadfem',
|
||||
@ -1561,6 +1589,7 @@ class VimeoProIE(VimeoBaseInfoExtractor):
|
||||
'videopassword': 'Conference2022',
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -300,6 +300,24 @@ class VKIE(VKBaseIE):
|
||||
'upload_date': '20250130',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://vkvideo.ru/video-50883936_456244102',
|
||||
'info_dict': {
|
||||
'id': '-50883936_456244102',
|
||||
'ext': 'mp4',
|
||||
'title': 'Добивание Украины // Техник в коме // МОЯ ЗЛОСТЬ №140',
|
||||
'description': 'md5:a9bc46181e9ebd0fdd82cef6c0191140',
|
||||
'uploader': 'Стас Ай, Как Просто!',
|
||||
'uploader_id': '-50883936',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'duration': 4651,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'chapters': 'count:59',
|
||||
'timestamp': 1743333869,
|
||||
'upload_date': '20250330',
|
||||
},
|
||||
},
|
||||
{
|
||||
# live stream, hls and rtmp links, most likely already finished live
|
||||
# stream by the time you are reading this comment
|
||||
@ -540,7 +558,7 @@ def _real_extract(self, url):
|
||||
'title': ('md_title', {unescapeHTML}),
|
||||
'description': ('description', {clean_html}, filter),
|
||||
'thumbnail': ('jpg', {url_or_none}),
|
||||
'uploader': ('md_author', {str}),
|
||||
'uploader': ('md_author', {unescapeHTML}),
|
||||
'uploader_id': (('author_id', 'authorId'), {str_or_none}, any),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'chapters': ('time_codes', lambda _, v: isinstance(v['time'], int), {
|
||||
|
@ -2,9 +2,11 @@
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class WatIE(InfoExtractor):
|
||||
@ -70,8 +72,14 @@ def _real_extract(self, url):
|
||||
|
||||
error_desc = video_info.get('error_desc')
|
||||
if error_desc:
|
||||
if video_info.get('error_code') == 'GEOBLOCKED':
|
||||
error_code = video_info.get('error_code')
|
||||
if error_code == 'GEOBLOCKED':
|
||||
self.raise_geo_restricted(error_desc, video_info.get('geoList'))
|
||||
elif error_code == 'DELIVERY_ERROR':
|
||||
if traverse_obj(video_data, ('delivery', 'code')) == 500:
|
||||
self.report_drm(video_id)
|
||||
error_desc = join_nonempty(
|
||||
error_desc, traverse_obj(video_data, ('delivery', 'error', {str})), delim=': ')
|
||||
raise ExtractorError(error_desc, expected=True)
|
||||
|
||||
title = video_info['title']
|
||||
|
@ -290,12 +290,14 @@ def _real_extract(self, url):
|
||||
|
||||
elif live_status == 'is_live':
|
||||
video_info = self._call_api(
|
||||
f'/video/v1.2/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
|
||||
f'/video/v1.3/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
|
||||
video_id, note='Downloading live JSON')
|
||||
playback = self._parse_json(video_info['lipPlayback'], video_id)
|
||||
m3u8_url = traverse_obj(playback, (
|
||||
'media', lambda _, v: v['protocol'] == 'HLS', 'path', {url_or_none}), get_all=False)
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True)
|
||||
# Live subtitles are not downloadable, but extract to silence "ignoring subs" warning
|
||||
formats, _ = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True)
|
||||
|
||||
elif live_status == 'post_live':
|
||||
if availability in ('premium_only', 'subscriber_only'):
|
||||
|
@ -417,6 +417,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
||||
_NETRC_MACHINE = 'youtube'
|
||||
|
||||
_COOKIE_HOWTO_WIKI_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies'
|
||||
|
||||
def ucid_or_none(self, ucid):
|
||||
return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
|
||||
|
||||
@ -451,17 +453,15 @@ def _preferred_lang(self):
|
||||
return preferred_lang
|
||||
|
||||
def _initialize_consent(self):
|
||||
cookies = self._get_cookies('https://www.youtube.com/')
|
||||
if cookies.get('__Secure-3PSID'):
|
||||
if self._has_auth_cookies:
|
||||
return
|
||||
socs = cookies.get('SOCS')
|
||||
socs = self._youtube_cookies.get('SOCS')
|
||||
if socs and not socs.value.startswith('CAA'): # not consented
|
||||
return
|
||||
self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
|
||||
|
||||
def _initialize_pref(self):
|
||||
cookies = self._get_cookies('https://www.youtube.com/')
|
||||
pref_cookie = cookies.get('PREF')
|
||||
pref_cookie = self._youtube_cookies.get('PREF')
|
||||
pref = {}
|
||||
if pref_cookie:
|
||||
try:
|
||||
@ -472,8 +472,9 @@ def _initialize_pref(self):
|
||||
self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
|
||||
|
||||
def _initialize_cookie_auth(self):
|
||||
yt_sapisid, yt_1psapisid, yt_3psapisid = self._get_sid_cookies()
|
||||
if yt_sapisid or yt_1psapisid or yt_3psapisid:
|
||||
self._passed_auth_cookies = False
|
||||
if self._has_auth_cookies:
|
||||
self._passed_auth_cookies = True
|
||||
self.write_debug('Found YouTube account cookies')
|
||||
|
||||
def _real_initialize(self):
|
||||
@ -492,8 +493,7 @@ def _perform_login(self, username, password):
|
||||
|
||||
@property
|
||||
def _youtube_login_hint(self):
|
||||
return (f'{self._login_hint(method="cookies")}. Also see '
|
||||
'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies '
|
||||
return (f'{self._login_hint(method="cookies")}. Also see {self._COOKIE_HOWTO_WIKI_URL} '
|
||||
'for tips on effectively exporting YouTube cookies')
|
||||
|
||||
def _check_login_required(self):
|
||||
@ -553,12 +553,16 @@ def _make_sid_authorization(scheme, sid, origin, additional_parts):
|
||||
|
||||
return f'{scheme} {"_".join(parts)}'
|
||||
|
||||
@property
|
||||
def _youtube_cookies(self):
|
||||
return self._get_cookies('https://www.youtube.com')
|
||||
|
||||
def _get_sid_cookies(self):
|
||||
"""
|
||||
Get SAPISID, 1PSAPISID, 3PSAPISID cookie values
|
||||
@returns sapisid, 1psapisid, 3psapisid
|
||||
"""
|
||||
yt_cookies = self._get_cookies('https://www.youtube.com')
|
||||
yt_cookies = self._youtube_cookies
|
||||
yt_sapisid = try_call(lambda: yt_cookies['SAPISID'].value)
|
||||
yt_3papisid = try_call(lambda: yt_cookies['__Secure-3PAPISID'].value)
|
||||
yt_1papisid = try_call(lambda: yt_cookies['__Secure-1PAPISID'].value)
|
||||
@ -595,6 +599,31 @@ def _get_sid_authorization_header(self, origin='https://www.youtube.com', user_s
|
||||
|
||||
return ' '.join(authorizations)
|
||||
|
||||
@property
|
||||
def is_authenticated(self):
|
||||
return self._has_auth_cookies
|
||||
|
||||
@property
|
||||
def _has_auth_cookies(self):
|
||||
yt_sapisid, yt_1psapisid, yt_3psapisid = self._get_sid_cookies()
|
||||
# YouTube doesn't appear to clear 3PSAPISID when rotating cookies (as of 2025-04-26)
|
||||
# But LOGIN_INFO is cleared and should exist if logged in
|
||||
has_login_info = 'LOGIN_INFO' in self._youtube_cookies
|
||||
return bool(has_login_info and (yt_sapisid or yt_1psapisid or yt_3psapisid))
|
||||
|
||||
def _request_webpage(self, *args, **kwargs):
|
||||
response = super()._request_webpage(*args, **kwargs)
|
||||
|
||||
# Check that we are still logged-in and cookies have not rotated after every request
|
||||
if getattr(self, '_passed_auth_cookies', None) and not self._has_auth_cookies:
|
||||
self.report_warning(
|
||||
'The provided YouTube account cookies are no longer valid. '
|
||||
'They have likely been rotated in the browser as a security measure. '
|
||||
f'For tips on how to effectively export YouTube cookies, refer to {self._COOKIE_HOWTO_WIKI_URL} .',
|
||||
only_once=False)
|
||||
|
||||
return response
|
||||
|
||||
def _call_api(self, ep, query, video_id, fatal=True, headers=None,
|
||||
note='Downloading API JSON', errnote='Unable to download API page',
|
||||
context=None, api_key=None, api_hostname=None, default_client='web'):
|
||||
@ -695,10 +724,6 @@ def _extract_visitor_data(self, *args):
|
||||
args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
|
||||
expected_type=str)
|
||||
|
||||
@functools.cached_property
|
||||
def is_authenticated(self):
|
||||
return bool(self._get_sid_authorization_header())
|
||||
|
||||
def extract_ytcfg(self, video_id, webpage):
|
||||
if not webpage:
|
||||
return {}
|
||||
|
@ -37,6 +37,7 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
|
||||
'chapters': 'count:20',
|
||||
'comment_count': int,
|
||||
'heatmap': 'count:100',
|
||||
'media_type': 'clip',
|
||||
},
|
||||
}]
|
||||
|
||||
@ -59,6 +60,7 @@ def _real_extract(self, url):
|
||||
'url': f'https://www.youtube.com/watch?v={video_id}',
|
||||
'ie_key': YoutubeIE.ie_key(),
|
||||
'id': clip_id,
|
||||
'media_type': 'clip',
|
||||
'section_start': int(clip_data['startTimeMs']) / 1000,
|
||||
'section_end': int(clip_data['endTimeMs']) / 1000,
|
||||
'_format_sort_fields': ( # https protocol is prioritized for ffmpeg compatibility
|
||||
|
@ -35,6 +35,7 @@ class YoutubeYtBeIE(YoutubeBaseInfoExtractor):
|
||||
'duration': 59,
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int,
|
||||
'media_type': 'short',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
|
@ -376,6 +376,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Afrojack',
|
||||
'uploader_url': 'https://www.youtube.com/@Afrojack',
|
||||
'uploader_id': '@Afrojack',
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'youtube_include_dash_manifest': True,
|
||||
@ -413,10 +414,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1401991663,
|
||||
'media_type': 'video',
|
||||
},
|
||||
},
|
||||
{
|
||||
'note': 'Age-gate video with embed allowed in public site',
|
||||
'note': 'Formerly an age-gate video with embed allowed in public site',
|
||||
'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
|
||||
'info_dict': {
|
||||
'id': 'HsUATh_Nc2U',
|
||||
@ -424,8 +426,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'title': 'Godzilla 2 (Official Video)',
|
||||
'description': 'md5:bf77e03fcae5529475e500129b05668a',
|
||||
'upload_date': '20200408',
|
||||
'age_limit': 18,
|
||||
'availability': 'needs_auth',
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
|
||||
'channel': 'FlyingKitty',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
|
||||
@ -443,8 +445,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@FlyingKitty900',
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'skip': 'Age-restricted; requires authentication',
|
||||
},
|
||||
{
|
||||
'note': 'Age-gate video embedable only with clientScreen=EMBED',
|
||||
@ -507,6 +509,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Herr Lurik',
|
||||
'uploader_url': 'https://www.youtube.com/@HerrLurik',
|
||||
'uploader_id': '@HerrLurik',
|
||||
'media_type': 'video',
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -546,6 +549,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'deadmau5',
|
||||
'uploader_url': 'https://www.youtube.com/@deadmau5',
|
||||
'uploader_id': '@deadmau5',
|
||||
'media_type': 'video',
|
||||
},
|
||||
'expected_warnings': [
|
||||
'DASH manifest missing',
|
||||
@ -581,6 +585,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@Olympics',
|
||||
'channel_is_verified': True,
|
||||
'timestamp': 1440707674,
|
||||
'media_type': 'livestream',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'requires avconv',
|
||||
@ -615,6 +620,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/@AllenMeow',
|
||||
'uploader_id': '@AllenMeow',
|
||||
'timestamp': 1299776999,
|
||||
'media_type': 'video',
|
||||
},
|
||||
},
|
||||
# url_encoded_fmt_stream_map is empty string
|
||||
@ -809,6 +815,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
'channel_follower_count': int,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -868,6 +875,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@BKCHarvard',
|
||||
'uploader_url': 'https://www.youtube.com/@BKCHarvard',
|
||||
'timestamp': 1422422076,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -904,6 +912,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1447987198,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -968,6 +977,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'timestamp': 1484761047,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -1070,6 +1080,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'tags': 'count:11',
|
||||
'live_status': 'not_live',
|
||||
'channel_follower_count': int,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -1124,6 +1135,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
|
||||
'uploader_id': '@ElevageOrVert',
|
||||
'timestamp': 1497343210,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -1163,6 +1175,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1377976349,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -1207,6 +1220,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_follower_count': int,
|
||||
'uploader': 'The Cinematic Orchestra',
|
||||
'comment_count': int,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -1275,6 +1289,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
|
||||
'uploader_id': '@walkaroundjapan7124',
|
||||
'timestamp': 1605884416,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -1371,6 +1386,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1395685455,
|
||||
'media_type': 'video',
|
||||
}, 'params': {'format': 'mhtml', 'skip_download': True},
|
||||
}, {
|
||||
# Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
|
||||
@ -1401,6 +1417,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@LeonNguyen',
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1641170939,
|
||||
'media_type': 'video',
|
||||
},
|
||||
}, {
|
||||
# date text is premiered video, ensure upload date in UTC (published 1641172509)
|
||||
@ -1434,6 +1451,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1641172509,
|
||||
'media_type': 'video',
|
||||
},
|
||||
},
|
||||
{ # continuous livestream.
|
||||
@ -1495,6 +1513,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Lesmiscore',
|
||||
'uploader_url': 'https://www.youtube.com/@lesmiscore',
|
||||
'timestamp': 1648005313,
|
||||
'media_type': 'short',
|
||||
},
|
||||
}, {
|
||||
# Prefer primary title+description language metadata by default
|
||||
@ -1523,6 +1542,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@coletdjnz',
|
||||
'uploader': 'cole-dlp-test-acc',
|
||||
'timestamp': 1662677394,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
}, {
|
||||
@ -1551,6 +1571,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'cole-dlp-test-acc',
|
||||
'timestamp': 1659073275,
|
||||
'like_count': int,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
|
||||
'expected_warnings': [r'Preferring "fr" translated fields'],
|
||||
@ -1587,6 +1608,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
|
||||
}, {
|
||||
@ -1687,6 +1709,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'comment_count': int,
|
||||
'channel_is_verified': True,
|
||||
'heatmap': 'count:100',
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
|
||||
@ -1719,6 +1742,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'channel_follower_count': int,
|
||||
'categories': ['People & Blogs'],
|
||||
'tags': [],
|
||||
'media_type': 'short',
|
||||
},
|
||||
},
|
||||
]
|
||||
@ -1754,6 +1778,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader_id': '@ChristopherSykesDocumentaries',
|
||||
'heatmap': 'count:100',
|
||||
'timestamp': 1211825920,
|
||||
'media_type': 'video',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@ -1819,6 +1844,12 @@ def mpd_feed(format_id, delay):
|
||||
else:
|
||||
retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
|
||||
continue
|
||||
|
||||
# Formats from ended premieres will be missing a manifest_url
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/8543
|
||||
if not f.get('manifest_url'):
|
||||
break
|
||||
|
||||
return f['manifest_url'], f['manifest_stream_number'], is_live
|
||||
return None
|
||||
|
||||
@ -1982,7 +2013,9 @@ def _download_player_url(self, video_id, fatal=False):
|
||||
def _player_js_cache_key(self, player_url):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
player_path = remove_start(urllib.parse.urlparse(player_url).path, f'/s/player/{player_id}/')
|
||||
variant = self._INVERSE_PLAYER_JS_VARIANT_MAP.get(player_path)
|
||||
variant = self._INVERSE_PLAYER_JS_VARIANT_MAP.get(player_path) or next((
|
||||
v for k, v in self._INVERSE_PLAYER_JS_VARIANT_MAP.items()
|
||||
if re.fullmatch(re.escape(k).replace('en_US', r'[a-zA-Z0-9_]+'), player_path)), None)
|
||||
if not variant:
|
||||
self.write_debug(
|
||||
f'Unable to determine player JS variant\n'
|
||||
@ -2120,23 +2153,23 @@ def inner(*args, **kwargs):
|
||||
return ret
|
||||
return inner
|
||||
|
||||
def _load_nsig_code_from_cache(self, player_url):
|
||||
cache_id = ('youtube-nsig', self._player_js_cache_key(player_url))
|
||||
def _load_player_data_from_cache(self, name, player_url):
|
||||
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
|
||||
|
||||
if func_code := self._player_cache.get(cache_id):
|
||||
return func_code
|
||||
if data := self._player_cache.get(cache_id):
|
||||
return data
|
||||
|
||||
func_code = self.cache.load(*cache_id, min_ver='2025.03.31')
|
||||
if func_code:
|
||||
self._player_cache[cache_id] = func_code
|
||||
data = self.cache.load(*cache_id, min_ver='2025.03.31')
|
||||
if data:
|
||||
self._player_cache[cache_id] = data
|
||||
|
||||
return func_code
|
||||
return data
|
||||
|
||||
def _store_nsig_code_to_cache(self, player_url, func_code):
|
||||
cache_id = ('youtube-nsig', self._player_js_cache_key(player_url))
|
||||
def _store_player_data_to_cache(self, name, player_url, data):
|
||||
cache_id = (f'youtube-{name}', self._player_js_cache_key(player_url))
|
||||
if cache_id not in self._player_cache:
|
||||
self.cache.store(*cache_id, func_code)
|
||||
self._player_cache[cache_id] = func_code
|
||||
self.cache.store(*cache_id, data)
|
||||
self._player_cache[cache_id] = data
|
||||
|
||||
def _decrypt_signature(self, s, video_id, player_url):
|
||||
"""Turn the encrypted s field into a working signature"""
|
||||
@ -2179,7 +2212,7 @@ def _decrypt_nsig(self, s, video_id, player_url):
|
||||
|
||||
self.write_debug(f'Decrypted nsig {s} => {ret}')
|
||||
# Only cache nsig func JS code to disk if successful, and only once
|
||||
self._store_nsig_code_to_cache(player_url, func_code)
|
||||
self._store_player_data_to_cache('nsig', player_url, func_code)
|
||||
return ret
|
||||
|
||||
def _extract_n_function_name(self, jscode, player_url=None):
|
||||
@ -2298,7 +2331,7 @@ def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
|
||||
|
||||
def _extract_n_function_code(self, video_id, player_url):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
func_code = self._load_nsig_code_from_cache(player_url)
|
||||
func_code = self._load_player_data_from_cache('nsig', player_url)
|
||||
jscode = func_code or self._load_player(video_id, player_url)
|
||||
jsi = JSInterpreter(jscode)
|
||||
|
||||
@ -2334,23 +2367,27 @@ def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=F
|
||||
Extract signatureTimestamp (sts)
|
||||
Required to tell API what sig/player version is in use.
|
||||
"""
|
||||
sts = None
|
||||
if isinstance(ytcfg, dict):
|
||||
sts = int_or_none(ytcfg.get('STS'))
|
||||
if sts := traverse_obj(ytcfg, ('STS', {int_or_none})):
|
||||
return sts
|
||||
|
||||
if not player_url:
|
||||
error_msg = 'Cannot extract signature timestamp without player url'
|
||||
if fatal:
|
||||
raise ExtractorError(error_msg)
|
||||
self.report_warning(error_msg)
|
||||
return None
|
||||
|
||||
sts = self._load_player_data_from_cache('sts', player_url)
|
||||
if sts:
|
||||
return sts
|
||||
|
||||
if code := self._load_player(video_id, player_url, fatal=fatal):
|
||||
sts = int_or_none(self._search_regex(
|
||||
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
|
||||
'JS player signature timestamp', group='sts', fatal=fatal))
|
||||
if sts:
|
||||
self._store_player_data_to_cache('sts', player_url, sts)
|
||||
|
||||
if not sts:
|
||||
# Attempt to extract from player
|
||||
if player_url is None:
|
||||
error_msg = 'Cannot extract signature timestamp without player_url.'
|
||||
if fatal:
|
||||
raise ExtractorError(error_msg)
|
||||
self.report_warning(error_msg)
|
||||
return
|
||||
code = self._load_player(video_id, player_url, fatal=fatal)
|
||||
if code:
|
||||
sts = int_or_none(self._search_regex(
|
||||
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
|
||||
'JS player signature timestamp', group='sts', fatal=fatal))
|
||||
return sts
|
||||
|
||||
def _mark_watched(self, video_id, player_responses):
|
||||
@ -3103,9 +3140,19 @@ def append_client(*client_names):
|
||||
else:
|
||||
prs.append(pr)
|
||||
|
||||
# web_embedded can work around age-gate and age-verification for some embeddable videos
|
||||
if self._is_agegated(pr) and variant != 'web_embedded':
|
||||
append_client(f'web_embedded.{base_client}')
|
||||
# Unauthenticated users will only get web_embedded client formats if age-gated
|
||||
if self._is_agegated(pr) and not self.is_authenticated:
|
||||
self.to_screen(
|
||||
f'{video_id}: This video is age-restricted; some formats may be missing '
|
||||
f'without authentication. {self._youtube_login_hint}', only_once=True)
|
||||
|
||||
# EU countries require age-verification for accounts to access age-restricted videos
|
||||
# If account is not age-verified, _is_agegated() will be truthy for non-embedded clients
|
||||
if self.is_authenticated and self._is_agegated(pr):
|
||||
embedding_is_disabled = variant == 'web_embedded' and self._is_unplayable(pr)
|
||||
if self.is_authenticated and (self._is_agegated(pr) or embedding_is_disabled):
|
||||
self.to_screen(
|
||||
f'{video_id}: This video is age-restricted and YouTube is requiring '
|
||||
'account age-verification; some formats may be missing', only_once=True)
|
||||
@ -3232,12 +3279,16 @@ def build_fragments(f):
|
||||
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
|
||||
encrypted_sig = try_get(sc, lambda x: x['s'][0])
|
||||
if not all((sc, fmt_url, player_url, encrypted_sig)):
|
||||
self.report_warning(
|
||||
f'Some {client_name} client https formats have been skipped as they are missing a url. '
|
||||
f'{"Your account" if self.is_authenticated else "The current session"} may have '
|
||||
f'the SSAP (server-side ads) experiment which interferes with yt-dlp. '
|
||||
f'Please see https://github.com/yt-dlp/yt-dlp/issues/12482 for more details.',
|
||||
video_id, only_once=True)
|
||||
msg = f'Some {client_name} client https formats have been skipped as they are missing a url. '
|
||||
if client_name == 'web':
|
||||
msg += 'YouTube is forcing SABR streaming for this client. '
|
||||
else:
|
||||
msg += (
|
||||
f'YouTube may have enabled the SABR-only or Server-Side Ad Placement experiment for '
|
||||
f'{"your account" if self.is_authenticated else "the current session"}. '
|
||||
)
|
||||
msg += 'See https://github.com/yt-dlp/yt-dlp/issues/12482 for more details'
|
||||
self.report_warning(msg, video_id, only_once=True)
|
||||
continue
|
||||
try:
|
||||
fmt_url += '&{}={}'.format(
|
||||
@ -3324,8 +3375,8 @@ def build_fragments(f):
|
||||
'width': int_or_none(fmt.get('width')),
|
||||
'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
|
||||
'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1,
|
||||
# Strictly de-prioritize broken, damaged and 3gp formats
|
||||
'preference': -20 if require_po_token else -10 if is_damaged else -2 if itag == '17' else None,
|
||||
# Strictly de-prioritize damaged and 3gp formats
|
||||
'preference': -10 if is_damaged else -2 if itag == '17' else None,
|
||||
}
|
||||
mime_mobj = re.match(
|
||||
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
|
||||
@ -3648,6 +3699,13 @@ def feed_entry(name):
|
||||
reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}'
|
||||
elif get_first(playability_statuses, ('errorScreen', 'playerCaptchaViewModel', {dict})):
|
||||
reason += '. YouTube is requiring a captcha challenge before playback'
|
||||
elif "This content isn't available, try again later" in reason:
|
||||
reason = (
|
||||
f'{remove_end(reason.strip(), ".")}. {"Your account" if self.is_authenticated else "The current session"} '
|
||||
f'has been rate-limited by YouTube for up to an hour. It is recommended to use `-t sleep` to add a delay '
|
||||
f'between video requests to avoid exceeding the rate limit. For more information, refer to '
|
||||
f'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#this-content-isnt-available-try-again-later'
|
||||
)
|
||||
self.raise_no_formats(reason, expected=True)
|
||||
|
||||
keywords = get_first(video_details, 'keywords', expected_type=list) or []
|
||||
@ -3754,7 +3812,10 @@ def is_bad_format(fmt):
|
||||
'tags': keywords,
|
||||
'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
|
||||
'live_status': live_status,
|
||||
'media_type': 'livestream' if get_first(video_details, 'isLiveContent') else None,
|
||||
'media_type': (
|
||||
'livestream' if get_first(video_details, 'isLiveContent')
|
||||
else 'short' if get_first(microformats, 'isShortsEligible')
|
||||
else 'video'),
|
||||
'release_timestamp': live_start_time,
|
||||
'_format_sort_fields': ( # source_preference is lower for potentially damaged formats
|
||||
'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec', 'channels', 'acodec', 'lang', 'proto'),
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -150,6 +150,15 @@ def format_option_strings(option):
|
||||
return opts
|
||||
|
||||
|
||||
_PRESET_ALIASES = {
|
||||
'mp3': ['-f', 'ba[acodec^=mp3]/ba/b', '-x', '--audio-format', 'mp3'],
|
||||
'aac': ['-f', 'ba[acodec^=aac]/ba[acodec^=mp4a.40.]/ba/b', '-x', '--audio-format', 'aac'],
|
||||
'mp4': ['--merge-output-format', 'mp4', '--remux-video', 'mp4', '-S', 'vcodec:h264,lang,quality,res,fps,hdr:12,acodec:aac'],
|
||||
'mkv': ['--merge-output-format', 'mkv', '--remux-video', 'mkv'],
|
||||
'sleep': ['--sleep-subtitles', '5', '--sleep-requests', '0.75', '--sleep-interval', '10', '--max-sleep-interval', '20'],
|
||||
}
|
||||
|
||||
|
||||
class _YoutubeDLOptionParser(optparse.OptionParser):
|
||||
# optparse is deprecated since Python 3.2. So assume a stable interface even for private methods
|
||||
ALIAS_DEST = '_triggered_aliases'
|
||||
@ -215,6 +224,22 @@ def _match_long_opt(self, opt):
|
||||
return e.possibilities[0]
|
||||
raise
|
||||
|
||||
def format_option_help(self, formatter=None):
|
||||
assert formatter, 'Formatter can not be None'
|
||||
formatted_help = super().format_option_help(formatter=formatter)
|
||||
formatter.indent()
|
||||
heading = formatter.format_heading('Preset Aliases')
|
||||
formatter.indent()
|
||||
result = []
|
||||
for name, args in _PRESET_ALIASES.items():
|
||||
option = optparse.Option('-t', help=shlex.join(args))
|
||||
formatter.option_strings[option] = f'-t {name}'
|
||||
result.append(formatter.format_option(option))
|
||||
formatter.dedent()
|
||||
formatter.dedent()
|
||||
help_lines = '\n'.join(result)
|
||||
return f'{formatted_help}\n{heading}{help_lines}'
|
||||
|
||||
|
||||
def create_parser():
|
||||
def _list_from_options_callback(option, opt_str, value, parser, append=True, delim=',', process=str.strip):
|
||||
@ -317,6 +342,13 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
||||
parser.rargs[:0] = shlex.split(
|
||||
opts if value is None else opts.format(*map(shlex.quote, value)))
|
||||
|
||||
def _preset_alias_callback(option, opt_str, value, parser):
|
||||
if not value:
|
||||
return
|
||||
if value not in _PRESET_ALIASES:
|
||||
raise optparse.OptionValueError(f'Unknown preset alias: {value}')
|
||||
parser.rargs[:0] = _PRESET_ALIASES[value]
|
||||
|
||||
general = optparse.OptionGroup(parser, 'General Options')
|
||||
general.add_option(
|
||||
'-h', '--help', dest='print_help', action='store_true',
|
||||
@ -519,6 +551,15 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
|
||||
'Alias options can trigger more aliases; so be careful to avoid defining recursive options. '
|
||||
f'As a safety measure, each alias may be triggered a maximum of {_YoutubeDLOptionParser.ALIAS_TRIGGER_LIMIT} times. '
|
||||
'This option can be used multiple times'))
|
||||
general.add_option(
|
||||
'-t', '--preset-alias',
|
||||
metavar='PRESET', dest='_', type='str',
|
||||
action='callback', callback=_preset_alias_callback,
|
||||
help=(
|
||||
'Applies a predefined set of options. e.g. --preset-alias mp3. '
|
||||
f'The following presets are available: {", ".join(_PRESET_ALIASES)}. '
|
||||
'See the "Preset Aliases" section at the end for more info. '
|
||||
'This option can be used multiple times'))
|
||||
|
||||
network = optparse.OptionGroup(parser, 'Network Options')
|
||||
network.add_option(
|
||||
|
@ -202,7 +202,7 @@ class UpdateInfo:
|
||||
requested_version: str | None = None
|
||||
commit: str | None = None
|
||||
|
||||
binary_name: str | None = _get_binary_name() # noqa: RUF009: Always returns the same value
|
||||
binary_name: str | None = _get_binary_name() # noqa: RUF009 # Always returns the same value
|
||||
checksum: str | None = None
|
||||
|
||||
|
||||
|
@ -54,7 +54,7 @@
|
||||
from ..dependencies import xattr
|
||||
from ..globals import IN_CLI
|
||||
|
||||
__name__ = __name__.rsplit('.', 1)[0] # noqa: A001: Pretend to be the parent module
|
||||
__name__ = __name__.rsplit('.', 1)[0] # noqa: A001 # Pretend to be the parent module
|
||||
|
||||
|
||||
class NO_DEFAULT:
|
||||
|
@ -1,8 +1,8 @@
|
||||
# Autogenerated by devscripts/update-version.py
|
||||
|
||||
__version__ = '2025.03.31'
|
||||
__version__ = '2025.04.30'
|
||||
|
||||
RELEASE_GIT_HEAD = '5e457af57fae9645b1b8fa0ed689229c8fb9656b'
|
||||
RELEASE_GIT_HEAD = '505b400795af557bdcfd9d4fa7e9133b26ef431c'
|
||||
|
||||
VARIANT = None
|
||||
|
||||
@ -12,4 +12,4 @@
|
||||
|
||||
ORIGIN = 'yt-dlp/yt-dlp'
|
||||
|
||||
_pkg_version = '2025.03.31'
|
||||
_pkg_version = '2025.04.30'
|
||||
|
Loading…
Reference in New Issue
Block a user