mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-08-14 08:28:29 +00:00
Merge branch 'yt-dlp:master' into appleconnect
This commit is contained in:
commit
ff3ad1f10f
11
.github/workflows/build.yml
vendored
11
.github/workflows/build.yml
vendored
@ -256,7 +256,7 @@ jobs:
|
||||
with:
|
||||
path: |
|
||||
~/yt-dlp-build-venv
|
||||
key: cache-reqs-${{ github.job }}
|
||||
key: cache-reqs-${{ github.job }}-${{ github.ref }}
|
||||
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
@ -331,19 +331,16 @@ jobs:
|
||||
if: steps.restore-cache.outputs.cache-hit == 'true'
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
cache_key: cache-reqs-${{ github.job }}
|
||||
repository: ${{ github.repository }}
|
||||
branch: ${{ github.ref }}
|
||||
cache_key: cache-reqs-${{ github.job }}-${{ github.ref }}
|
||||
run: |
|
||||
gh extension install actions/gh-actions-cache
|
||||
gh actions-cache delete "${cache_key}" -R "${repository}" -B "${branch}" --confirm
|
||||
gh cache delete "${cache_key}"
|
||||
|
||||
- name: Cache requirements
|
||||
uses: actions/cache/save@v4
|
||||
with:
|
||||
path: |
|
||||
~/yt-dlp-build-venv
|
||||
key: cache-reqs-${{ github.job }}
|
||||
key: cache-reqs-${{ github.job }}-${{ github.ref }}
|
||||
|
||||
macos_legacy:
|
||||
needs: process
|
||||
|
41
.github/workflows/signature-tests.yml
vendored
Normal file
41
.github/workflows/signature-tests.yml
vendored
Normal file
@ -0,0 +1,41 @@
|
||||
name: Signature Tests
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- .github/workflows/signature-tests.yml
|
||||
- test/test_youtube_signature.py
|
||||
- yt_dlp/jsinterp.py
|
||||
pull_request:
|
||||
paths:
|
||||
- .github/workflows/signature-tests.yml
|
||||
- test/test_youtube_signature.py
|
||||
- yt_dlp/jsinterp.py
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: signature-tests-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
name: Signature Tests
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest]
|
||||
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.10, pypy-3.11]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install test requirements
|
||||
run: python3 ./devscripts/install_deps.py --only-optional --include test
|
||||
- name: Run tests
|
||||
timeout-minutes: 15
|
||||
run: |
|
||||
python3 -m yt_dlp -v || true # Print debug head
|
||||
python3 ./devscripts/run_tests.py test/test_youtube_signature.py
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -105,6 +105,8 @@ README.txt
|
||||
*.zsh
|
||||
*.spec
|
||||
test/testdata/sigs/player-*.js
|
||||
test/testdata/thumbnails/empty.webp
|
||||
test/testdata/thumbnails/foo\ %d\ bar/foo_%d.*
|
||||
|
||||
# Binary
|
||||
/youtube-dl
|
||||
|
@ -126,7 +126,7 @@ ### Are you willing to share account details if needed?
|
||||
While these steps won't necessarily ensure that no misuse of the account takes place, these are still some good practices to follow.
|
||||
|
||||
- Look for people with `Member` (maintainers of the project) or `Contributor` (people who have previously contributed code) tag on their messages.
|
||||
- Change the password before sharing the account to something random (use [this](https://passwordsgenerator.net/) if you don't have a random password generator).
|
||||
- Change the password before sharing the account to something random.
|
||||
- Change the password after receiving the account back.
|
||||
|
||||
### Is the website primarily used for piracy?
|
||||
|
23
CONTRIBUTORS
23
CONTRIBUTORS
@ -770,3 +770,26 @@ NeonMan
|
||||
pj47x
|
||||
troex
|
||||
WouterGordts
|
||||
baierjan
|
||||
GeoffreyFrogeye
|
||||
Pawka
|
||||
v3DJG6GL
|
||||
yozel
|
||||
brian6932
|
||||
iednod55
|
||||
maxbin123
|
||||
nullpos
|
||||
anlar
|
||||
eason1478
|
||||
ceandreasen
|
||||
chauhantirth
|
||||
helpimnotdrowning
|
||||
adamralph
|
||||
averageFOSSenjoyer
|
||||
bubo
|
||||
flanter21
|
||||
Georift
|
||||
moonshinerd
|
||||
R0hanW
|
||||
ShockedPlot7560
|
||||
swayll
|
||||
|
234
Changelog.md
234
Changelog.md
@ -4,6 +4,240 @@ # Changelog
|
||||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||
-->
|
||||
|
||||
### 2025.07.21
|
||||
|
||||
#### Important changes
|
||||
- **Default behaviour changed from `--mtime` to `--no-mtime`**
|
||||
yt-dlp no longer applies the server modified time to downloaded files by default. [Read more](https://github.com/yt-dlp/yt-dlp/issues/12780)
|
||||
- Security: [[CVE-2025-54072](https://nvd.nist.gov/vuln/detail/CVE-2025-54072)] [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-45hg-7f49-5h56)
|
||||
- When `--exec` is used on Windows, the filepath expanded from `{}` (or the default placeholder) is now properly escaped
|
||||
|
||||
#### Core changes
|
||||
- [Allow extractors to designate formats/subtitles for impersonation](https://github.com/yt-dlp/yt-dlp/commit/32809eb2da92c649e540a5b714f6235036026161) ([#13778](https://github.com/yt-dlp/yt-dlp/issues/13778)) by [bashonly](https://github.com/bashonly) (With fixes in [3e49bc8](https://github.com/yt-dlp/yt-dlp/commit/3e49bc8a1bdb4109b857f2c361c358e86fa63405), [2ac3eb9](https://github.com/yt-dlp/yt-dlp/commit/2ac3eb98373d1c31341c5e918c83872c7ff409c6))
|
||||
- [Don't let format testing alter the return code](https://github.com/yt-dlp/yt-dlp/commit/4919051e447c7f8ae9df8ba5c4208b6b5c04915a) ([#13767](https://github.com/yt-dlp/yt-dlp/issues/13767)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/commit/959ac99e98c3215437e573c22d64be42d361e863) by [Grub4K](https://github.com/Grub4K)
|
||||
- [No longer enable `--mtime` by default](https://github.com/yt-dlp/yt-dlp/commit/f3008bc5f89d2691f2f8dfc51b406ef4e25281c3) ([#12781](https://github.com/yt-dlp/yt-dlp/issues/12781)) by [seproDev](https://github.com/seproDev)
|
||||
- [Warn when skipping formats](https://github.com/yt-dlp/yt-dlp/commit/1f27a9f8baccb9105f2476154557540efe09a937) ([#13090](https://github.com/yt-dlp/yt-dlp/issues/13090)) by [bashonly](https://github.com/bashonly)
|
||||
- **jsinterp**
|
||||
- [Cache undefined variable names](https://github.com/yt-dlp/yt-dlp/commit/b342d27f3f82d913976509ddf5bff539ad8567ec) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly) (With fixes in [805519b](https://github.com/yt-dlp/yt-dlp/commit/805519bfaa7cb5443912dfe45ac774834ba65a16))
|
||||
- [Fix variable scoping](https://github.com/yt-dlp/yt-dlp/commit/b6328ca05030d815222b25d208cc59a964623bf9) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||
- **utils**
|
||||
- `mimetype2ext`: [Always parse `flac` from `audio/flac`](https://github.com/yt-dlp/yt-dlp/commit/b8abd255e454acbe0023cdb946f9eb461ced7eeb) ([#13748](https://github.com/yt-dlp/yt-dlp/issues/13748)) by [bashonly](https://github.com/bashonly)
|
||||
- `unified_timestamp`: [Return `int` values](https://github.com/yt-dlp/yt-dlp/commit/6be26626f7cfa71d28e0fac2861eb04758810c5d) ([#13796](https://github.com/yt-dlp/yt-dlp/issues/13796)) by [doe1080](https://github.com/doe1080)
|
||||
- `urlhandle_detect_ext`: [Use `x-amz-meta-file-type` headers](https://github.com/yt-dlp/yt-dlp/commit/28bf46b7dafe2e241137763bf570a2f91ba8a53a) ([#13749](https://github.com/yt-dlp/yt-dlp/issues/13749)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Extractor changes
|
||||
- [Add `_search_nextjs_v13_data` helper](https://github.com/yt-dlp/yt-dlp/commit/5245231e4a39ecd5595d4337d46d85e150e2430a) ([#13398](https://github.com/yt-dlp/yt-dlp/issues/13398)) by [bashonly](https://github.com/bashonly) (With fixes in [b5fea53](https://github.com/yt-dlp/yt-dlp/commit/b5fea53f2099bed41ba1b17ab0ac87c8dba5a5ec))
|
||||
- [Detect invalid m3u8 playlist data](https://github.com/yt-dlp/yt-dlp/commit/e99c0b838a9c5feb40c0dcd291bd7b8620b8d36d) ([#13601](https://github.com/yt-dlp/yt-dlp/issues/13601)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **10play**: [Support new site domain](https://github.com/yt-dlp/yt-dlp/commit/790c286ce3e0b534ca2d8f6648ced220d888f139) ([#13611](https://github.com/yt-dlp/yt-dlp/issues/13611)) by [Georift](https://github.com/Georift)
|
||||
- **9gag**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/0b359b184dee0c7052be482857bf562de67e4928) ([#13678](https://github.com/yt-dlp/yt-dlp/issues/13678)) by [bashonly](https://github.com/bashonly)
|
||||
- **aenetworks**: [Support new URL formats](https://github.com/yt-dlp/yt-dlp/commit/5f951ce929b56a822514f1a02cc06af030855ec7) ([#13747](https://github.com/yt-dlp/yt-dlp/issues/13747)) by [bashonly](https://github.com/bashonly)
|
||||
- **archive.org**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d42a6ff0c4ca8893d722ff4e0c109aecbf4cc7cf) ([#13706](https://github.com/yt-dlp/yt-dlp/issues/13706)) by [rdamas](https://github.com/rdamas)
|
||||
- **bandaichannel**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/23e9389f936ec5236a87815b8576e5ce567b2f77) ([#13152](https://github.com/yt-dlp/yt-dlp/issues/13152)) by [doe1080](https://github.com/doe1080)
|
||||
- **bandcamp**: [Extract tags](https://github.com/yt-dlp/yt-dlp/commit/f9dff95cb1c138913011417b3bba020c0a691bba) ([#13480](https://github.com/yt-dlp/yt-dlp/issues/13480)) by [WouterGordts](https://github.com/WouterGordts)
|
||||
- **bellmedia**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/6fb3947c0dc6d0e3eab5077c5bada8402f47a277) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080)
|
||||
- **bilibili**: [Pass newer user-agent with API requests](https://github.com/yt-dlp/yt-dlp/commit/d3edc5d52a7159eda2331dbc7e14bf40a6585c81) ([#13736](https://github.com/yt-dlp/yt-dlp/issues/13736)) by [c-basalt](https://github.com/c-basalt)
|
||||
- **bilibilibangumi**
|
||||
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b15aa8d77257b86fa44c9a42a615dfe47ac5b3b7) ([#13800](https://github.com/yt-dlp/yt-dlp/issues/13800)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix geo-block detection](https://github.com/yt-dlp/yt-dlp/commit/884f35d54a64f1e6e7be49459842f573fc3a2701) ([#13667](https://github.com/yt-dlp/yt-dlp/issues/13667)) by [bashonly](https://github.com/bashonly)
|
||||
- **blackboardcollaborate**: [Support subtitles and authwalled videos](https://github.com/yt-dlp/yt-dlp/commit/dcc4cba39e2a79d3efce16afa28dbe245468489f) ([#12473](https://github.com/yt-dlp/yt-dlp/issues/12473)) by [flanter21](https://github.com/flanter21)
|
||||
- **btvplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3ae61e0f313dd03a09060abc7a212775c3717818) ([#13541](https://github.com/yt-dlp/yt-dlp/issues/13541)) by [bubo](https://github.com/bubo)
|
||||
- **ctv**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/9f54ea38984788811773ca2ceaca73864acf0e8a) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080)
|
||||
- **dangalplay**: [Support other login regions](https://github.com/yt-dlp/yt-dlp/commit/09982bc33e2f1f9a1ff66e6738df44f15b36f6a6) ([#13768](https://github.com/yt-dlp/yt-dlp/issues/13768)) by [bashonly](https://github.com/bashonly)
|
||||
- **francetv**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/ade876efb31d55d3394185ffc56942fdc8d325cc) ([#13726](https://github.com/yt-dlp/yt-dlp/issues/13726)) by [bashonly](https://github.com/bashonly)
|
||||
- **hotstar**
|
||||
- [Fix support for free accounts](https://github.com/yt-dlp/yt-dlp/commit/07d1d85f6387e4bdb107096f0131c7054f078bb9) ([#13700](https://github.com/yt-dlp/yt-dlp/issues/13700)) by [chauhantirth](https://github.com/chauhantirth)
|
||||
- [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/7e0af2b1f0c3edb688603b022f3a9ca0bfdf75e9) ([#13727](https://github.com/yt-dlp/yt-dlp/issues/13727)) by [bashonly](https://github.com/bashonly) (With fixes in [ef103b2](https://github.com/yt-dlp/yt-dlp/commit/ef103b2d115bd0e880f9cfd2f7dd705f48e4b40d))
|
||||
- **joqrag**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/6d39c420f7774562a106d90253e2ed5b75036321) ([#13152](https://github.com/yt-dlp/yt-dlp/issues/13152)) by [doe1080](https://github.com/doe1080)
|
||||
- **limelight**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/5d693446e882931618c40c99bb593f0b87b30eb9) ([#13267](https://github.com/yt-dlp/yt-dlp/issues/13267)) by [doe1080](https://github.com/doe1080)
|
||||
- **lrtradio**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b4b4486effdcb96bb6b8148171a49ff579b69a4a) ([#13717](https://github.com/yt-dlp/yt-dlp/issues/13717)) by [Pawka](https://github.com/Pawka)
|
||||
- **mir24.tv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/7b4c96e0898db048259ef5fdf12ed14e3605dce3) ([#13651](https://github.com/yt-dlp/yt-dlp/issues/13651)) by [swayll](https://github.com/swayll)
|
||||
- **mixlr**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/0f33950c778331bf4803c76e8b0ba1862df93431) ([#13561](https://github.com/yt-dlp/yt-dlp/issues/13561)) by [seproDev](https://github.com/seproDev), [ShockedPlot7560](https://github.com/ShockedPlot7560)
|
||||
- **mlbtv**: [Make formats downloadable with ffmpeg](https://github.com/yt-dlp/yt-dlp/commit/87e3dc8c7f78929d2ef4f4a44e6a567e04cd8226) ([#13761](https://github.com/yt-dlp/yt-dlp/issues/13761)) by [bashonly](https://github.com/bashonly)
|
||||
- **newspicks**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2aaf1aa71d174700859c9ec1a81109b78e34961c) ([#13612](https://github.com/yt-dlp/yt-dlp/issues/13612)) by [doe1080](https://github.com/doe1080)
|
||||
- **nhkradiru**: [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/7c49a937887756efcfa162abdcf17e48c244cb0c) ([#12708](https://github.com/yt-dlp/yt-dlp/issues/12708)) by [garret1317](https://github.com/garret1317)
|
||||
- **noovo**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/d57a0b5aa78d59324b037d37492fe86aa4fbf58a) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080)
|
||||
- **patreon**: campaign: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d88b304d44c599d81acfa4231502270c8b9fe2f8) ([#13712](https://github.com/yt-dlp/yt-dlp/issues/13712)) by [bashonly](https://github.com/bashonly)
|
||||
- **playerfm**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1a8474c3ca6dbe51bb153b2b8eef7b9a61fa7dc3) ([#13016](https://github.com/yt-dlp/yt-dlp/issues/13016)) by [R0hanW](https://github.com/R0hanW)
|
||||
- **rai**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c8329fc572903eeed7edad1642773b2268b71a62) ([#13572](https://github.com/yt-dlp/yt-dlp/issues/13572)) by [moonshinerd](https://github.com/moonshinerd), [seproDev](https://github.com/seproDev)
|
||||
- **raisudtirol**: [Support alternative domain](https://github.com/yt-dlp/yt-dlp/commit/85c3fa1925a9057ef4ae8af682686d5b3eb8e568) ([#13718](https://github.com/yt-dlp/yt-dlp/issues/13718)) by [barsnick](https://github.com/barsnick)
|
||||
- **skeb**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/060c6a4501a0b8a92f1b9c12788f556d902c83c6) ([#13593](https://github.com/yt-dlp/yt-dlp/issues/13593)) by [doe1080](https://github.com/doe1080)
|
||||
- **soundcloud**: [Always extract original format extension](https://github.com/yt-dlp/yt-dlp/commit/c1ac543c8166ff031d62e340b3244ca8556e3fb9) ([#13746](https://github.com/yt-dlp/yt-dlp/issues/13746)) by [bashonly](https://github.com/bashonly)
|
||||
- **sproutvideo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0b41746964e1d0470ac286ce09408940a3a51147) ([#13610](https://github.com/yt-dlp/yt-dlp/issues/13610)) by [bashonly](https://github.com/bashonly)
|
||||
- **thehighwire**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3a84be9d1660ef798ea28f929a20391bef6afda4) ([#13505](https://github.com/yt-dlp/yt-dlp/issues/13505)) by [swayll](https://github.com/swayll)
|
||||
- **twitch**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/422cc8cb2ff2bd3b4c2bc64e23507b7e6f522c35) ([#13618](https://github.com/yt-dlp/yt-dlp/issues/13618)) by [bashonly](https://github.com/bashonly)
|
||||
- **unitednationswebtv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/630f3389c33f0f7f6ec97e8917d20aeb4e4078da) ([#13538](https://github.com/yt-dlp/yt-dlp/issues/13538)) by [averageFOSSenjoyer](https://github.com/averageFOSSenjoyer)
|
||||
- **vimeo**
|
||||
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a5d697f62d8be78ffd472acb2f52c8bc32833003) ([#13692](https://github.com/yt-dlp/yt-dlp/issues/13692)) by [bashonly](https://github.com/bashonly)
|
||||
- [Handle age-restricted videos](https://github.com/yt-dlp/yt-dlp/commit/a6db1d297ab40cc346de24aacbeab93112b2f4e1) ([#13719](https://github.com/yt-dlp/yt-dlp/issues/13719)) by [bashonly](https://github.com/bashonly)
|
||||
- **youtube**
|
||||
- [Do not require PO Token for premium accounts](https://github.com/yt-dlp/yt-dlp/commit/5b57b72c1a7c6bd249ffcebdf5630761ec664c10) ([#13640](https://github.com/yt-dlp/yt-dlp/issues/13640)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Ensure context params are consistent for web clients](https://github.com/yt-dlp/yt-dlp/commit/6e5bee418bc108565108153fd745c8e7a59f16dd) ([#13701](https://github.com/yt-dlp/yt-dlp/issues/13701)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Extract global nsig helper functions](https://github.com/yt-dlp/yt-dlp/commit/fca94ac5d63ed6578b5cd9c8129d97a8a713c39a) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||
- [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/0e68332bcb9fba87c42805b7a051eeb2bed36206) ([#13659](https://github.com/yt-dlp/yt-dlp/issues/13659)) by [bashonly](https://github.com/bashonly)
|
||||
- [Log bad playability statuses of player responses](https://github.com/yt-dlp/yt-dlp/commit/aa9f1f4d577e99897ac16cd19d4e217d688ea75d) ([#13647](https://github.com/yt-dlp/yt-dlp/issues/13647)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Use impersonation for downloading subtitles](https://github.com/yt-dlp/yt-dlp/commit/8820101aa3152e5f4811541c645f8b5de231ba8c) ([#13786](https://github.com/yt-dlp/yt-dlp/issues/13786)) by [bashonly](https://github.com/bashonly)
|
||||
- tab: [Fix subscriptions feed extraction](https://github.com/yt-dlp/yt-dlp/commit/c23d837b6524d1e7a4595948871ba1708cba4dfa) ([#13665](https://github.com/yt-dlp/yt-dlp/issues/13665)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Downloader changes
|
||||
- **hls**: [Do not fall back to ffmpeg when native is required](https://github.com/yt-dlp/yt-dlp/commit/a7113722ec33f30fc898caee9242af2b82188a53) ([#13655](https://github.com/yt-dlp/yt-dlp/issues/13655)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Networking changes
|
||||
- **Request Handler**
|
||||
- requests
|
||||
- [Refactor default headers](https://github.com/yt-dlp/yt-dlp/commit/a4561c7a66c39d88efe7ae51e7fa1986faf093fb) ([#13785](https://github.com/yt-dlp/yt-dlp/issues/13785)) by [bashonly](https://github.com/bashonly)
|
||||
- [Work around partial read dropping data](https://github.com/yt-dlp/yt-dlp/commit/c2ff2dbaec7929015373fe002e9bd4849931a4ce) ([#13599](https://github.com/yt-dlp/yt-dlp/issues/13599)) by [Grub4K](https://github.com/Grub4K) (With fixes in [c316416](https://github.com/yt-dlp/yt-dlp/commit/c316416b972d1b05e58fbcc21e80428b900ce102))
|
||||
|
||||
#### Misc. changes
|
||||
- **cleanup**
|
||||
- [Bump ruff to 0.12.x](https://github.com/yt-dlp/yt-dlp/commit/ca5cce5b07d51efe7310b449cdefeca8d873e9df) ([#13596](https://github.com/yt-dlp/yt-dlp/issues/13596)) by [seproDev](https://github.com/seproDev)
|
||||
- Miscellaneous: [9951fdd](https://github.com/yt-dlp/yt-dlp/commit/9951fdd0d08b655cb1af8cd7f32a3fb7e2b1324e) by [adamralph](https://github.com/adamralph), [bashonly](https://github.com/bashonly), [doe1080](https://github.com/doe1080), [hseg](https://github.com/hseg), [InvalidUsernameException](https://github.com/InvalidUsernameException), [seproDev](https://github.com/seproDev)
|
||||
- **devscripts**: [Fix filename/directory Bash completions](https://github.com/yt-dlp/yt-dlp/commit/99093e96fd6a26dea9d6e4bd1e4b16283b6ad1ee) ([#13620](https://github.com/yt-dlp/yt-dlp/issues/13620)) by [barsnick](https://github.com/barsnick)
|
||||
- **test**: download: [Support `playlist_maxcount`](https://github.com/yt-dlp/yt-dlp/commit/fd36b8f31bafbd8096bdb92a446a0c9c6081209c) ([#13433](https://github.com/yt-dlp/yt-dlp/issues/13433)) by [InvalidUsernameException](https://github.com/InvalidUsernameException)
|
||||
|
||||
### 2025.06.30
|
||||
|
||||
#### Core changes
|
||||
- **jsinterp**: [Fix `extract_object`](https://github.com/yt-dlp/yt-dlp/commit/958153a226214c86879e36211ac191bf78289578) ([#13580](https://github.com/yt-dlp/yt-dlp/issues/13580)) by [seproDev](https://github.com/seproDev)
|
||||
|
||||
#### Extractor changes
|
||||
- **bilibilispacevideo**: [Extract hidden-mode collections as playlists](https://github.com/yt-dlp/yt-dlp/commit/99b85ac102047446e6adf5b62bfc3c8d80b53778) ([#13533](https://github.com/yt-dlp/yt-dlp/issues/13533)) by [c-basalt](https://github.com/c-basalt)
|
||||
- **hotstar**
|
||||
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b5bd057fe86550f3aa67f2fc8790d1c6a251c57b) ([#13530](https://github.com/yt-dlp/yt-dlp/issues/13530)) by [bashonly](https://github.com/bashonly), [chauhantirth](https://github.com/chauhantirth) (With fixes in [e9f1576](https://github.com/yt-dlp/yt-dlp/commit/e9f157669e24953a88d15ce22053649db7a8e81e) by [bashonly](https://github.com/bashonly))
|
||||
- [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/0a6b1044899f452cd10b6c7a6b00fa985a9a8b97) ([#13560](https://github.com/yt-dlp/yt-dlp/issues/13560)) by [bashonly](https://github.com/bashonly)
|
||||
- [Raise for login required](https://github.com/yt-dlp/yt-dlp/commit/5e292baad62c749b6c340621ab2d0f904165ddfb) ([#10405](https://github.com/yt-dlp/yt-dlp/issues/10405)) by [bashonly](https://github.com/bashonly)
|
||||
- series: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4bd9a7ade7e0508b9795b3e72a69eeb40788b62b) ([#13564](https://github.com/yt-dlp/yt-dlp/issues/13564)) by [bashonly](https://github.com/bashonly)
|
||||
- **jiocinema**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/7e2504f941a11ea2b0dba00de3f0295cdc253e79) ([#13565](https://github.com/yt-dlp/yt-dlp/issues/13565)) by [bashonly](https://github.com/bashonly)
|
||||
- **kick**: [Support subscriber-only content](https://github.com/yt-dlp/yt-dlp/commit/b16722ede83377f77ea8352dcd0a6ca8e83b8f0f) ([#13550](https://github.com/yt-dlp/yt-dlp/issues/13550)) by [helpimnotdrowning](https://github.com/helpimnotdrowning)
|
||||
- **niconico**: live: [Fix extractor and downloader](https://github.com/yt-dlp/yt-dlp/commit/06c1a8cdffe14050206683253726875144192ef5) ([#13158](https://github.com/yt-dlp/yt-dlp/issues/13158)) by [doe1080](https://github.com/doe1080)
|
||||
- **sauceplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/35fc33fbc51c7f5392fb2300f65abf6cf107ef90) ([#13567](https://github.com/yt-dlp/yt-dlp/issues/13567)) by [bashonly](https://github.com/bashonly), [ceandreasen](https://github.com/ceandreasen)
|
||||
- **sproutvideo**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/11b9416e10cff7513167d76d6c47774fcdd3e26a) ([#13589](https://github.com/yt-dlp/yt-dlp/issues/13589)) by [bashonly](https://github.com/bashonly)
|
||||
- **youtube**: [Fix premium formats extraction](https://github.com/yt-dlp/yt-dlp/commit/2ba5391cd68ed4f2415c827d2cecbcbc75ace10b) ([#13586](https://github.com/yt-dlp/yt-dlp/issues/13586)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Misc. changes
|
||||
- **ci**: [Add signature tests](https://github.com/yt-dlp/yt-dlp/commit/1b883846347addeab12663fd74317fd544341a1c) ([#13582](https://github.com/yt-dlp/yt-dlp/issues/13582)) by [bashonly](https://github.com/bashonly)
|
||||
- **cleanup**: Miscellaneous: [b018784](https://github.com/yt-dlp/yt-dlp/commit/b0187844988e557c7e1e6bb1aabd4c1176768d86) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
### 2025.06.25
|
||||
|
||||
#### Extractor changes
|
||||
- [Add `_search_nuxt_json` helper](https://github.com/yt-dlp/yt-dlp/commit/51887484e46ab6015c041cb1ab626a55f25a03bd) ([#13386](https://github.com/yt-dlp/yt-dlp/issues/13386)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
|
||||
- **brightcove**: new: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/e6bd4a3da295b760ab20b39c18ce8934d312c2bf) ([#13461](https://github.com/yt-dlp/yt-dlp/issues/13461)) by [doe1080](https://github.com/doe1080)
|
||||
- **huya**: live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2600849badb0d08c55b58dcc77a13af6ba423da6) ([#13520](https://github.com/yt-dlp/yt-dlp/issues/13520)) by [doe1080](https://github.com/doe1080)
|
||||
- **hypergryph**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/1722c55400ff30bb5aee5dd7a262f0b7e9ce2f0e) ([#13415](https://github.com/yt-dlp/yt-dlp/issues/13415)) by [doe1080](https://github.com/doe1080), [eason1478](https://github.com/eason1478)
|
||||
- **lsm**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/c57412d1f9cf0124adc972a47858ac42b740c61d) ([#13126](https://github.com/yt-dlp/yt-dlp/issues/13126)) by [Caesim404](https://github.com/Caesim404)
|
||||
- **mave**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1838a1ce5d4ade80770ba9162eaffc9a1607dc70) ([#13380](https://github.com/yt-dlp/yt-dlp/issues/13380)) by [anlar](https://github.com/anlar)
|
||||
- **sportdeutschland**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a4ce4327c9836691d3b6b00e44a90b6741601ed8) ([#13519](https://github.com/yt-dlp/yt-dlp/issues/13519)) by [DTrombett](https://github.com/DTrombett)
|
||||
- **sproutvideo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5b559d0072b7164daf06bacdc41c6f11283452c8) ([#13544](https://github.com/yt-dlp/yt-dlp/issues/13544)) by [bashonly](https://github.com/bashonly)
|
||||
- **tv8.it**: [Support slugless URLs](https://github.com/yt-dlp/yt-dlp/commit/3bd30291601c47fa4a257983473884103ecab0c7) ([#13478](https://github.com/yt-dlp/yt-dlp/issues/13478)) by [DTrombett](https://github.com/DTrombett)
|
||||
- **youtube**
|
||||
- [Check any `ios` m3u8 formats prior to download](https://github.com/yt-dlp/yt-dlp/commit/8f94b76cbf7bbd9dfd8762c63cdea04f90f1297f) ([#13524](https://github.com/yt-dlp/yt-dlp/issues/13524)) by [bashonly](https://github.com/bashonly)
|
||||
- [Improve player context payloads](https://github.com/yt-dlp/yt-dlp/commit/ff6f94041aeee19c5559e1c1cd693960a1c1dd14) ([#13539](https://github.com/yt-dlp/yt-dlp/issues/13539)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Misc. changes
|
||||
- **test**: `traversal`: [Fix morsel tests for Python 3.14](https://github.com/yt-dlp/yt-dlp/commit/73bf10211668e4a59ccafd790e06ee82d9fea9ea) ([#13471](https://github.com/yt-dlp/yt-dlp/issues/13471)) by [Grub4K](https://github.com/Grub4K)
|
||||
|
||||
### 2025.06.09
|
||||
|
||||
#### Extractor changes
|
||||
- [Improve JSON LD thumbnails extraction](https://github.com/yt-dlp/yt-dlp/commit/85c8a405e3651dc041b758f4744d4fb3c4c55e01) ([#13368](https://github.com/yt-dlp/yt-dlp/issues/13368)) by [bashonly](https://github.com/bashonly), [doe1080](https://github.com/doe1080)
|
||||
- **10play**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6d265388c6e943419ac99e9151cf75a3265f980f) ([#13349](https://github.com/yt-dlp/yt-dlp/issues/13349)) by [bashonly](https://github.com/bashonly)
|
||||
- **adobepass**
|
||||
- [Add Fubo MSO](https://github.com/yt-dlp/yt-dlp/commit/eee90acc47d7f8de24afaa8b0271ccaefdf6e88c) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123)
|
||||
- [Always add newer user-agent when required](https://github.com/yt-dlp/yt-dlp/commit/0ee1102268cf31b07f8a8318a47424c66b2f7378) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix Philo MSO authentication](https://github.com/yt-dlp/yt-dlp/commit/943083edcd3df45aaa597a6967bc6c95b720f54c) ([#13335](https://github.com/yt-dlp/yt-dlp/issues/13335)) by [Sipherdrakon](https://github.com/Sipherdrakon)
|
||||
- [Rework to require software statement](https://github.com/yt-dlp/yt-dlp/commit/711c5d5d098fee2992a1a624b1c4b30364b91426) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly), [maxbin123](https://github.com/maxbin123)
|
||||
- [Validate login URL before sending credentials](https://github.com/yt-dlp/yt-dlp/commit/89c1b349ad81318d9d3bea76c01c891696e58d38) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
|
||||
- **aenetworks**
|
||||
- [Fix playlist extractors](https://github.com/yt-dlp/yt-dlp/commit/f37d599a697e82fe68b423865897d55bae34f373) ([#13408](https://github.com/yt-dlp/yt-dlp/issues/13408)) by [Sipherdrakon](https://github.com/Sipherdrakon)
|
||||
- [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/6693d6603358ae6beca834dbd822a7917498b813) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123)
|
||||
- **bilibilibangumi**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/13e55162719528d42d2133e16b65ff59a667a6e4) ([#13416](https://github.com/yt-dlp/yt-dlp/issues/13416)) by [c-basalt](https://github.com/c-basalt)
|
||||
- **brightcove**: new: [Adapt to new AdobePass requirement](https://github.com/yt-dlp/yt-dlp/commit/98f8eec956e3b16cb66a3d49cc71af3807db795e) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
|
||||
- **cu.ntv.co.jp**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/aa863ddab9b1d104678e9cf39bb76f5b14fca660) ([#13302](https://github.com/yt-dlp/yt-dlp/issues/13302)) by [doe1080](https://github.com/doe1080), [nullpos](https://github.com/nullpos)
|
||||
- **go**: [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/2e5bf002dad16f5ce35aa2023d392c9e518fcd8f) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly), [maxbin123](https://github.com/maxbin123)
|
||||
- **nbc**: [Rework and adapt extractors to new AdobePass flow](https://github.com/yt-dlp/yt-dlp/commit/2d7949d5642bc37d1e71bf00c9a55260e5505d58) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
|
||||
- **nobelprize**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/97ddfefeb4faba6e61cd80996c16952b8eab16f3) ([#13205](https://github.com/yt-dlp/yt-dlp/issues/13205)) by [doe1080](https://github.com/doe1080)
|
||||
- **odnoklassniki**: [Detect and raise when login is required](https://github.com/yt-dlp/yt-dlp/commit/148a1eb4c59e127965396c7a6e6acf1979de459e) ([#13361](https://github.com/yt-dlp/yt-dlp/issues/13361)) by [bashonly](https://github.com/bashonly)
|
||||
- **patreon**: [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/e0d6c0822930f6e63f574d46d946a58b73ecd10c) ([#13266](https://github.com/yt-dlp/yt-dlp/issues/13266)) by [bashonly](https://github.com/bashonly) (With fixes in [1a8a03e](https://github.com/yt-dlp/yt-dlp/commit/1a8a03ea8d827107319a18076ee3505090667c5a))
|
||||
- **podchaser**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/538eb305673c26bff6a2b12f1c96375fe02ce41a) ([#13271](https://github.com/yt-dlp/yt-dlp/issues/13271)) by [bashonly](https://github.com/bashonly)
|
||||
- **sr**: mediathek: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/e3c605a61f4cc2de9059f37434fa108c3c20f58e) ([#13294](https://github.com/yt-dlp/yt-dlp/issues/13294)) by [doe1080](https://github.com/doe1080)
|
||||
- **stacommu**: [Avoid partial stream formats](https://github.com/yt-dlp/yt-dlp/commit/5d96527be80dc1ed1702d9cd548ff86de570ad70) ([#13412](https://github.com/yt-dlp/yt-dlp/issues/13412)) by [bashonly](https://github.com/bashonly)
|
||||
- **startrek**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a8bf0011bde92b3f1324a98bfbd38932fd3ebe18) ([#13188](https://github.com/yt-dlp/yt-dlp/issues/13188)) by [doe1080](https://github.com/doe1080)
|
||||
- **svt**: play: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/e1b6062f8c4a3fa33c65269d48d09ec78de765a2) ([#13329](https://github.com/yt-dlp/yt-dlp/issues/13329)) by [barsnick](https://github.com/barsnick), [bashonly](https://github.com/bashonly)
|
||||
- **telecinco**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/03dba2012d9bd3f402fa8c2f122afba89bbd22a4) ([#13379](https://github.com/yt-dlp/yt-dlp/issues/13379)) by [bashonly](https://github.com/bashonly)
|
||||
- **theplatform**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/ed108b3ea481c6a4b5215a9302ba92d74baa2425) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
|
||||
- **toutiao**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f8051e3a61686c5db1de5f5746366ecfbc3ad20c) ([#13246](https://github.com/yt-dlp/yt-dlp/issues/13246)) by [doe1080](https://github.com/doe1080)
|
||||
- **turner**: [Adapt extractors to new AdobePass flow](https://github.com/yt-dlp/yt-dlp/commit/0daddc780d3ac5bebc3a3ec5b884d9243cbc0745) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
|
||||
- **twitcasting**: [Fix password-protected livestream support](https://github.com/yt-dlp/yt-dlp/commit/52f9729c9a92ad4656d746ff0b1acecb87b3e96d) ([#13097](https://github.com/yt-dlp/yt-dlp/issues/13097)) by [bashonly](https://github.com/bashonly)
|
||||
- **twitter**: broadcast: [Support events URLs](https://github.com/yt-dlp/yt-dlp/commit/7794374de8afb20499b023107e2abfd4e6b93ee4) ([#13248](https://github.com/yt-dlp/yt-dlp/issues/13248)) by [doe1080](https://github.com/doe1080)
|
||||
- **umg**: de: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/4e7c1ea346b510280218b47e8653dbbca3a69870) ([#13373](https://github.com/yt-dlp/yt-dlp/issues/13373)) by [doe1080](https://github.com/doe1080)
|
||||
- **vice**: [Mark extractors as broken](https://github.com/yt-dlp/yt-dlp/commit/6121559e027a04574690799c1776bc42bb51af31) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
|
||||
- **vimeo**: [Extract subtitles from player subdomain](https://github.com/yt-dlp/yt-dlp/commit/c723c4e5e78263df178dbe69844a3d05f3ef9e35) ([#13350](https://github.com/yt-dlp/yt-dlp/issues/13350)) by [bashonly](https://github.com/bashonly)
|
||||
- **watchespn**: [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/b094747e93cfb0a2c53007120e37d0d84d41f030) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123)
|
||||
- **weverse**: [Support login with oauth refresh tokens](https://github.com/yt-dlp/yt-dlp/commit/3fe72e9eea38d9a58211cde42cfaa577ce020e2c) ([#13284](https://github.com/yt-dlp/yt-dlp/issues/13284)) by [bashonly](https://github.com/bashonly)
|
||||
- **youtube**
|
||||
- [Add `tv_simply` player client](https://github.com/yt-dlp/yt-dlp/commit/1fd0e88b67db53ad163393d6965f68e908fa70e3) ([#13389](https://github.com/yt-dlp/yt-dlp/issues/13389)) by [gamer191](https://github.com/gamer191)
|
||||
- [Extract srt subtitles](https://github.com/yt-dlp/yt-dlp/commit/231349786e8c42089c2e079ec94c0ea866c37999) ([#13411](https://github.com/yt-dlp/yt-dlp/issues/13411)) by [gamer191](https://github.com/gamer191)
|
||||
- [Fix `--mark-watched` support](https://github.com/yt-dlp/yt-dlp/commit/b5be29fa58ec98226e11621fd9c58585bcff6879) ([#13222](https://github.com/yt-dlp/yt-dlp/issues/13222)) by [brian6932](https://github.com/brian6932), [iednod55](https://github.com/iednod55)
|
||||
- [Fix automatic captions for some client combinations](https://github.com/yt-dlp/yt-dlp/commit/53ea743a9c158f8ca2d75a09ca44ba68606042d8) ([#13268](https://github.com/yt-dlp/yt-dlp/issues/13268)) by [bashonly](https://github.com/bashonly)
|
||||
- [Improve signature extraction debug output](https://github.com/yt-dlp/yt-dlp/commit/d30a49742cfa22e61c47df4ac0e7334d648fb85d) ([#13327](https://github.com/yt-dlp/yt-dlp/issues/13327)) by [bashonly](https://github.com/bashonly)
|
||||
- [Rework nsig function name extraction](https://github.com/yt-dlp/yt-dlp/commit/9e38b273b7ac942e7e9fc05a651ed810ab7d30ba) ([#13403](https://github.com/yt-dlp/yt-dlp/issues/13403)) by [Grub4K](https://github.com/Grub4K)
|
||||
- [nsig code improvements and cleanup](https://github.com/yt-dlp/yt-dlp/commit/f7bbf5a617f9ab54ef51eaef99be36e175b5e9c3) ([#13280](https://github.com/yt-dlp/yt-dlp/issues/13280)) by [bashonly](https://github.com/bashonly)
|
||||
- **zdf**: [Fix language extraction and format sorting](https://github.com/yt-dlp/yt-dlp/commit/db162b76f6bdece50babe2e0cacfe56888c2e125) ([#13313](https://github.com/yt-dlp/yt-dlp/issues/13313)) by [InvalidUsernameException](https://github.com/InvalidUsernameException)
|
||||
|
||||
#### Misc. changes
|
||||
- **build**
|
||||
- [Exclude `pkg_resources` from being collected](https://github.com/yt-dlp/yt-dlp/commit/cc749a8a3b8b6e5c05318868c72a403f376a1b38) ([#13320](https://github.com/yt-dlp/yt-dlp/issues/13320)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix macOS requirements caching](https://github.com/yt-dlp/yt-dlp/commit/201812100f315c6727a4418698d5b4e8a79863d4) ([#13328](https://github.com/yt-dlp/yt-dlp/issues/13328)) by [bashonly](https://github.com/bashonly)
|
||||
- **cleanup**: Miscellaneous: [339614a](https://github.com/yt-dlp/yt-dlp/commit/339614a173c74b42d63e858c446a9cae262a13af) by [bashonly](https://github.com/bashonly)
|
||||
- **test**: postprocessors: [Remove binary thumbnail test data](https://github.com/yt-dlp/yt-dlp/commit/a9b370069838e84d44ac7ad095d657003665885a) ([#13341](https://github.com/yt-dlp/yt-dlp/issues/13341)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
### 2025.05.22
|
||||
|
||||
#### Core changes
|
||||
- **cookies**: [Fix Linux desktop environment detection](https://github.com/yt-dlp/yt-dlp/commit/e491fd4d090db3af52a82863fb0553dd5e17fb85) ([#13197](https://github.com/yt-dlp/yt-dlp/issues/13197)) by [mbway](https://github.com/mbway)
|
||||
- **jsinterp**: [Fix increment/decrement evaluation](https://github.com/yt-dlp/yt-dlp/commit/167d7a9f0ffd1b4fe600193441bdb7358db2740b) ([#13238](https://github.com/yt-dlp/yt-dlp/issues/13238)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||
|
||||
#### Extractor changes
|
||||
- **1tv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/41c0a1fb89628696f8bb88e2b9f3a68f355b8c26) ([#13168](https://github.com/yt-dlp/yt-dlp/issues/13168)) by [bashonly](https://github.com/bashonly)
|
||||
- **amcnetworks**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/464c84fedf78eef822a431361155f108b5df96d7) ([#13147](https://github.com/yt-dlp/yt-dlp/issues/13147)) by [bashonly](https://github.com/bashonly)
|
||||
- **bitchute**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1d0f6539c47e5d5c68c3c47cdb7075339e2885ac) ([#13081](https://github.com/yt-dlp/yt-dlp/issues/13081)) by [bashonly](https://github.com/bashonly)
|
||||
- **cartoonnetwork**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/7dbb47f84f0ee1266a3a01f58c9bc4c76d76794a) ([#13148](https://github.com/yt-dlp/yt-dlp/issues/13148)) by [bashonly](https://github.com/bashonly)
|
||||
- **iprima**: [Fix login support](https://github.com/yt-dlp/yt-dlp/commit/a7d9a5eb79ceeecb851389f3f2c88597871ca3f2) ([#12937](https://github.com/yt-dlp/yt-dlp/issues/12937)) by [baierjan](https://github.com/baierjan)
|
||||
- **jiosaavn**
|
||||
- artist: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/586b557b124f954d3f625360ebe970989022ad97) ([#12803](https://github.com/yt-dlp/yt-dlp/issues/12803)) by [subrat-lima](https://github.com/subrat-lima)
|
||||
- playlist, show: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/317f4b8006c2c0f0f64f095b1485163ad97c9053) ([#12803](https://github.com/yt-dlp/yt-dlp/issues/12803)) by [subrat-lima](https://github.com/subrat-lima)
|
||||
- show: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6839276496d8814cf16f58b637e45663467928e6) ([#12803](https://github.com/yt-dlp/yt-dlp/issues/12803)) by [subrat-lima](https://github.com/subrat-lima)
|
||||
- **lrtradio**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/abf58dcd6a09e14eec4ea82ae12f79a0337cb383) ([#13200](https://github.com/yt-dlp/yt-dlp/issues/13200)) by [Pawka](https://github.com/Pawka)
|
||||
- **nebula**: [Support `--mark-watched`](https://github.com/yt-dlp/yt-dlp/commit/20f288bdc2173c7cc58d709d25ca193c1f6001e7) ([#13120](https://github.com/yt-dlp/yt-dlp/issues/13120)) by [GeoffreyFrogeye](https://github.com/GeoffreyFrogeye)
|
||||
- **niconico**
|
||||
- [Fix error handling](https://github.com/yt-dlp/yt-dlp/commit/f569be4602c2a857087e495d5d7ed6060cd97abe) ([#13236](https://github.com/yt-dlp/yt-dlp/issues/13236)) by [bashonly](https://github.com/bashonly)
|
||||
- live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7a7b85c9014d96421e18aa7ea5f4c1bee5ceece0) ([#13045](https://github.com/yt-dlp/yt-dlp/issues/13045)) by [doe1080](https://github.com/doe1080)
|
||||
- **nytimesarticle**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/b26bc32579c00ef579d75a835807ccc87d20ee0a) ([#13104](https://github.com/yt-dlp/yt-dlp/issues/13104)) by [bashonly](https://github.com/bashonly)
|
||||
- **once**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/f475e8b529d18efdad603ffda02a56e707fe0e2c) ([#13164](https://github.com/yt-dlp/yt-dlp/issues/13164)) by [bashonly](https://github.com/bashonly)
|
||||
- **picarto**: vod: [Support `/profile/` video URLs](https://github.com/yt-dlp/yt-dlp/commit/31e090cb787f3504ec25485adff9a2a51d056734) ([#13227](https://github.com/yt-dlp/yt-dlp/issues/13227)) by [subrat-lima](https://github.com/subrat-lima)
|
||||
- **playsuisse**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/d880e060803ae8ed5a047e578cca01e1f0e630ce) ([#12466](https://github.com/yt-dlp/yt-dlp/issues/12466)) by [v3DJG6GL](https://github.com/v3DJG6GL)
|
||||
- **sprout**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/cbcfe6378dde33a650e3852ab17ad4503b8e008d) ([#13149](https://github.com/yt-dlp/yt-dlp/issues/13149)) by [bashonly](https://github.com/bashonly)
|
||||
- **svtpage**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/ea8498ed534642dd7e925961b97b934987142fd3) ([#12957](https://github.com/yt-dlp/yt-dlp/issues/12957)) by [diman8](https://github.com/diman8)
|
||||
- **twitch**: [Support `--live-from-start`](https://github.com/yt-dlp/yt-dlp/commit/00b1bec55249cf2ad6271d36492c51b34b6459d1) ([#13202](https://github.com/yt-dlp/yt-dlp/issues/13202)) by [bashonly](https://github.com/bashonly)
|
||||
- **vimeo**: event: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/545c1a5b6f2fe88722b41aef0e7485bf3be3f3f9) ([#13216](https://github.com/yt-dlp/yt-dlp/issues/13216)) by [bashonly](https://github.com/bashonly)
|
||||
- **wat.tv**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/f123cc83b3aea45053f5fa1d9141048b01fc2774) ([#13111](https://github.com/yt-dlp/yt-dlp/issues/13111)) by [bashonly](https://github.com/bashonly)
|
||||
- **weverse**: [Fix live extraction](https://github.com/yt-dlp/yt-dlp/commit/5328eda8820cc5f21dcf917684d23fbdca41831d) ([#13084](https://github.com/yt-dlp/yt-dlp/issues/13084)) by [bashonly](https://github.com/bashonly)
|
||||
- **xinpianchang**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/83fabf352489d52843f67e6e9cc752db86d27e6e) ([#13245](https://github.com/yt-dlp/yt-dlp/issues/13245)) by [garret1317](https://github.com/garret1317)
|
||||
- **youtube**
|
||||
- [Add PO token support for subtitles](https://github.com/yt-dlp/yt-dlp/commit/32ed5f107c6c641958d1cd2752e130de4db55a13) ([#13234](https://github.com/yt-dlp/yt-dlp/issues/13234)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Add `web_embedded` client for age-restricted videos](https://github.com/yt-dlp/yt-dlp/commit/0feec6dc131f488428bf881519e7c69766fbb9ae) ([#13089](https://github.com/yt-dlp/yt-dlp/issues/13089)) by [bashonly](https://github.com/bashonly)
|
||||
- [Add a PO Token Provider Framework](https://github.com/yt-dlp/yt-dlp/commit/2685654a37141cca63eda3a92da0e2706e23ccfd) ([#12840](https://github.com/yt-dlp/yt-dlp/issues/12840)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Extract `media_type` for all videos](https://github.com/yt-dlp/yt-dlp/commit/ded11ebc9afba6ba33923375103e9be2d7c804e7) ([#13136](https://github.com/yt-dlp/yt-dlp/issues/13136)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix `--live-from-start` support for premieres](https://github.com/yt-dlp/yt-dlp/commit/8f303afb43395be360cafd7ad4ce2b6e2eedfb8a) ([#13079](https://github.com/yt-dlp/yt-dlp/issues/13079)) by [arabcoders](https://github.com/arabcoders)
|
||||
- [Fix geo-restriction error handling](https://github.com/yt-dlp/yt-dlp/commit/c7e575e31608c19c5b26c10a4229db89db5fc9a8) ([#13217](https://github.com/yt-dlp/yt-dlp/issues/13217)) by [yozel](https://github.com/yozel)
|
||||
|
||||
#### Misc. changes
|
||||
- **build**
|
||||
- [Bump PyInstaller to v6.13.0](https://github.com/yt-dlp/yt-dlp/commit/17cf9088d0d535e4a7feffbf02bd49cd9dae5ab9) ([#13082](https://github.com/yt-dlp/yt-dlp/issues/13082)) by [bashonly](https://github.com/bashonly)
|
||||
- [Bump run-on-arch-action to v3](https://github.com/yt-dlp/yt-dlp/commit/9064d2482d1fe722bbb4a49731fe0711c410d1c8) ([#13088](https://github.com/yt-dlp/yt-dlp/issues/13088)) by [bashonly](https://github.com/bashonly)
|
||||
- **cleanup**: Miscellaneous: [7977b32](https://github.com/yt-dlp/yt-dlp/commit/7977b329ed97b216e37bd402f4935f28c00eac9e) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
### 2025.04.30
|
||||
|
||||
#### Important changes
|
||||
|
5
Makefile
5
Makefile
@ -18,10 +18,11 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \
|
||||
tar pypi-files lazy-extractors install uninstall
|
||||
|
||||
clean-test:
|
||||
rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
|
||||
rm -rf tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
|
||||
*.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \
|
||||
*.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.lrc *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 *.mp4 \
|
||||
*.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp
|
||||
*.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp \
|
||||
test/testdata/sigs/player-*.js test/testdata/thumbnails/empty.webp "test/testdata/thumbnails/foo %d bar/foo_%d."*
|
||||
clean-dist:
|
||||
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \
|
||||
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS
|
||||
|
47
README.md
47
README.md
@ -44,6 +44,7 @@
|
||||
* [Post-processing Options](#post-processing-options)
|
||||
* [SponsorBlock Options](#sponsorblock-options)
|
||||
* [Extractor Options](#extractor-options)
|
||||
* [Preset Aliases](#preset-aliases)
|
||||
* [CONFIGURATION](#configuration)
|
||||
* [Configuration file encoding](#configuration-file-encoding)
|
||||
* [Authentication with netrc](#authentication-with-netrc)
|
||||
@ -276,7 +277,7 @@ # USAGE AND OPTIONS
|
||||
<!-- MANPAGE: BEGIN EXCLUDED SECTION -->
|
||||
yt-dlp [OPTIONS] [--] URL [URL...]
|
||||
|
||||
`Ctrl+F` is your friend :D
|
||||
Tip: Use `CTRL`+`F` (or `Command`+`F`) to search by keywords
|
||||
<!-- MANPAGE: END EXCLUDED SECTION -->
|
||||
|
||||
<!-- Auto generated -->
|
||||
@ -348,8 +349,8 @@ ## General Options:
|
||||
--no-flat-playlist Fully extract the videos of a playlist
|
||||
(default)
|
||||
--live-from-start Download livestreams from the start.
|
||||
Currently only supported for YouTube
|
||||
(Experimental)
|
||||
Currently experimental and only supported
|
||||
for YouTube and Twitch
|
||||
--no-live-from-start Download livestreams from the current time
|
||||
(default)
|
||||
--wait-for-video MIN[-MAX] Wait for scheduled streams to become
|
||||
@ -375,12 +376,12 @@ ## General Options:
|
||||
an alias starts with a dash "-", it is
|
||||
prefixed with "--". Arguments are parsed
|
||||
according to the Python string formatting
|
||||
mini-language. E.g. --alias get-audio,-X
|
||||
"-S=aext:{0},abr -x --audio-format {0}"
|
||||
creates options "--get-audio" and "-X" that
|
||||
takes an argument (ARG0) and expands to
|
||||
"-S=aext:ARG0,abr -x --audio-format ARG0".
|
||||
All defined aliases are listed in the --help
|
||||
mini-language. E.g. --alias get-audio,-X "-S
|
||||
aext:{0},abr -x --audio-format {0}" creates
|
||||
options "--get-audio" and "-X" that takes an
|
||||
argument (ARG0) and expands to "-S
|
||||
aext:ARG0,abr -x --audio-format ARG0". All
|
||||
defined aliases are listed in the --help
|
||||
output. Alias options can trigger more
|
||||
aliases; so be careful to avoid defining
|
||||
recursive options. As a safety measure, each
|
||||
@ -638,9 +639,9 @@ ## Filesystem Options:
|
||||
--no-part Do not use .part files - write directly into
|
||||
output file
|
||||
--mtime Use the Last-modified header to set the file
|
||||
modification time (default)
|
||||
modification time
|
||||
--no-mtime Do not use the Last-modified header to set
|
||||
the file modification time
|
||||
the file modification time (default)
|
||||
--write-description Write video description to a .description file
|
||||
--no-write-description Do not write video description (default)
|
||||
--write-info-json Write video metadata to a .info.json file
|
||||
@ -1105,6 +1106,10 @@ ## Extractor Options:
|
||||
arguments for different extractors
|
||||
|
||||
## Preset Aliases:
|
||||
Predefined aliases for convenience and ease of use. Note that future
|
||||
versions of yt-dlp may add or adjust presets, but the existing preset
|
||||
names will not be changed or removed
|
||||
|
||||
-t mp3 -f 'ba[acodec^=mp3]/ba/b' -x --audio-format
|
||||
mp3
|
||||
|
||||
@ -1151,15 +1156,15 @@ # CONFIGURATION
|
||||
* `/etc/yt-dlp/config`
|
||||
* `/etc/yt-dlp/config.txt`
|
||||
|
||||
E.g. with the following configuration file, yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory:
|
||||
E.g. with the following configuration file, yt-dlp will always extract the audio, copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory:
|
||||
```
|
||||
# Lines starting with # are comments
|
||||
|
||||
# Always extract audio
|
||||
-x
|
||||
|
||||
# Do not copy the mtime
|
||||
--no-mtime
|
||||
# Copy the mtime
|
||||
--mtime
|
||||
|
||||
# Use this proxy
|
||||
--proxy 127.0.0.1:3128
|
||||
@ -1790,10 +1795,11 @@ # EXTRACTOR ARGUMENTS
|
||||
The following extractors use this feature:
|
||||
|
||||
#### youtube
|
||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube/_base.py](https://github.com/yt-dlp/yt-dlp/blob/415b4c9f955b1a0391204bd24a7132590e7b3bdb/yt_dlp/extractor/youtube/_base.py#L402-L409) for the list of supported content language codes
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
||||
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv`, `tv_simply` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
|
||||
* `webpage_skip`: Skip extraction of embedded webpage data. One or both of `player_response`, `initial_data`. These options are for testing purposes and don't skip any network requests
|
||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||
* `player_js_variant`: The player javascript variant to use for signature and nsig deciphering. The known variants are: `main`, `tce`, `tv`, `tv_es6`, `phone`, `tablet`. Only `main` is recommended as a possible workaround; the others are for debugging purposes. The default is to use what is prescribed by the site, and can be selected with `actual`
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
@ -1805,7 +1811,7 @@ #### youtube
|
||||
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
|
||||
* `data_sync_id`: Overrides the account Data Sync ID used in Innertube API requests. This may be needed if you are using an account with `youtube:player_skip=webpage,configs` or `youtubetab:skip=webpage`
|
||||
* `visitor_data`: Overrides the Visitor Data used in Innertube API requests. This should be used with `player_skip=webpage,configs` and without cookies. Note: this may have adverse effects if used improperly. If a session from a browser is wanted, you should pass cookies instead (which contain the Visitor ID)
|
||||
* `po_token`: Proof of Origin (PO) Token(s) to use. Comma seperated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be either `gvs` (Google Video Server URLs) or `player` (Innertube player request)
|
||||
* `po_token`: Proof of Origin (PO) Token(s) to use. Comma seperated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be any of `gvs` (Google Video Server URLs), `player` (Innertube player request) or `subs` (Subtitles)
|
||||
* `pot_trace`: Enable debug logging for PO Token fetching. Either `true` or `false` (default)
|
||||
* `fetch_pot`: Policy to use for fetching a PO Token from providers. One of `always` (always try fetch a PO Token regardless if the client requires one for the given context), `never` (never fetch a PO Token), or `auto` (default; only fetch a PO Token if the client requires one for the given context)
|
||||
|
||||
@ -1895,6 +1901,10 @@ #### sonylivseries
|
||||
#### tver
|
||||
* `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated)
|
||||
|
||||
#### vimeo
|
||||
* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `web` client is used by default. The `web` client only works with account cookies or login credentials. The `android` and `ios` clients only work with previously cached OAuth tokens
|
||||
* `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the web client's API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability
|
||||
|
||||
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
||||
|
||||
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
||||
@ -2257,6 +2267,7 @@ ### Differences in default behavior
|
||||
* yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests.
|
||||
* The sub-modules `swfinterp`, `casefold` are removed.
|
||||
* Passing `--simulate` (or calling `extract_info` with `download=False`) no longer alters the default format selection. See [#9843](https://github.com/yt-dlp/yt-dlp/issues/9843) for details.
|
||||
* yt-dlp no longer applies the server modified time to downloaded files by default. Use `--mtime` or `--compat-options mtime-by-default` to revert this.
|
||||
|
||||
For ease of use, a few more compat options are available:
|
||||
|
||||
@ -2266,7 +2277,7 @@ ### Differences in default behavior
|
||||
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization`
|
||||
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
|
||||
* `--compat-options 2023`: Same as `--compat-options 2024,prefer-vp9-sort`
|
||||
* `--compat-options 2024`: Currently does nothing. Use this to enable all future compat options
|
||||
* `--compat-options 2024`: Same as `--compat-options mtime-by-default`. Use this to enable all future compat options
|
||||
|
||||
The following compat options restore vulnerable behavior from before security patches:
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
set -e
|
||||
|
||||
source ~/.local/share/pipx/venvs/pyinstaller/bin/activate
|
||||
python -m devscripts.install_deps -o --include build
|
||||
python -m devscripts.install_deps --include secretstorage --include curl-cffi
|
||||
python -m devscripts.make_lazy_extractors
|
||||
python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}"
|
||||
|
@ -36,6 +36,9 @@ def main():
|
||||
f'--name={name}',
|
||||
'--icon=devscripts/logo.ico',
|
||||
'--upx-exclude=vcruntime140.dll',
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/13311
|
||||
# https://github.com/pyinstaller/pyinstaller/issues/9149
|
||||
'--exclude-module=pkg_resources',
|
||||
'--noconfirm',
|
||||
'--additional-hooks-dir=yt_dlp/__pyinstaller',
|
||||
*opts,
|
||||
|
@ -10,9 +10,13 @@ __yt_dlp()
|
||||
diropts="--cache-dir"
|
||||
|
||||
if [[ ${prev} =~ ${fileopts} ]]; then
|
||||
local IFS=$'\n'
|
||||
type compopt &>/dev/null && compopt -o filenames
|
||||
COMPREPLY=( $(compgen -f -- ${cur}) )
|
||||
return 0
|
||||
elif [[ ${prev} =~ ${diropts} ]]; then
|
||||
local IFS=$'\n'
|
||||
type compopt &>/dev/null && compopt -o dirnames
|
||||
COMPREPLY=( $(compgen -d -- ${cur}) )
|
||||
return 0
|
||||
fi
|
||||
|
@ -254,5 +254,23 @@
|
||||
{
|
||||
"action": "remove",
|
||||
"when": "d596824c2f8428362c072518856065070616e348"
|
||||
},
|
||||
{
|
||||
"action": "remove",
|
||||
"when": "7b81634fb1d15999757e7a9883daa6ef09ea785b"
|
||||
},
|
||||
{
|
||||
"action": "remove",
|
||||
"when": "500761e41acb96953a5064e951d41d190c287e46"
|
||||
},
|
||||
{
|
||||
"action": "add",
|
||||
"when": "f3008bc5f89d2691f2f8dfc51b406ef4e25281c3",
|
||||
"short": "[priority] **Default behaviour changed from `--mtime` to `--no-mtime`**\nyt-dlp no longer applies the server modified time to downloaded files by default. [Read more](https://github.com/yt-dlp/yt-dlp/issues/12780)"
|
||||
},
|
||||
{
|
||||
"action": "add",
|
||||
"when": "959ac99e98c3215437e573c22d64be42d361e863",
|
||||
"short": "[priority] Security: [[CVE-2025-54072](https://nvd.nist.gov/vuln/detail/CVE-2025-54072)] [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-45hg-7f49-5h56)\n - When `--exec` is used on Windows, the filepath expanded from `{}` (or the default placeholder) is now properly escaped"
|
||||
}
|
||||
]
|
||||
|
@ -65,7 +65,7 @@ build = [
|
||||
"build",
|
||||
"hatchling",
|
||||
"pip",
|
||||
"setuptools>=71.0.2", # 71.0.0 broke pyinstaller
|
||||
"setuptools>=71.0.2,<81", # See https://github.com/pyinstaller/pyinstaller/issues/9149
|
||||
"wheel",
|
||||
]
|
||||
dev = [
|
||||
@ -75,7 +75,7 @@ dev = [
|
||||
]
|
||||
static-analysis = [
|
||||
"autopep8~=2.0",
|
||||
"ruff~=0.11.0",
|
||||
"ruff~=0.12.0",
|
||||
]
|
||||
test = [
|
||||
"pytest~=8.1",
|
||||
@ -210,10 +210,12 @@ ignore = [
|
||||
"TD001", # invalid-todo-tag
|
||||
"TD002", # missing-todo-author
|
||||
"TD003", # missing-todo-link
|
||||
"PLC0415", # import-outside-top-level
|
||||
"PLE0604", # invalid-all-object (false positives)
|
||||
"PLE0643", # potential-index-error (false positives)
|
||||
"PLW0603", # global-statement
|
||||
"PLW1510", # subprocess-run-without-check
|
||||
"PLW1641", # eq-without-hash
|
||||
"PLW2901", # redefined-loop-name
|
||||
"RUF001", # ambiguous-unicode-character-string
|
||||
"RUF012", # mutable-class-default
|
||||
|
@ -5,6 +5,8 @@ # Supported sites
|
||||
Not all sites listed here are guaranteed to work; websites are constantly changing and sometimes this breaks yt-dlp's support for them.
|
||||
The only reliable way to check if a site is supported is to try it.
|
||||
|
||||
- **10play**: [*10play*](## "netrc machine")
|
||||
- **10play:season**
|
||||
- **17live**
|
||||
- **17live:clip**
|
||||
- **17live:vod**
|
||||
@ -131,7 +133,6 @@ # Supported sites
|
||||
- **BaiduVideo**: 百度视频
|
||||
- **BanBye**
|
||||
- **BanByeChannel**
|
||||
- **bandaichannel**
|
||||
- **Bandcamp**
|
||||
- **Bandcamp:album**
|
||||
- **Bandcamp:user**
|
||||
@ -155,7 +156,6 @@ # Supported sites
|
||||
- **Beeg**
|
||||
- **BehindKink**: (**Currently broken**)
|
||||
- **Bellator**
|
||||
- **BellMedia**
|
||||
- **BerufeTV**
|
||||
- **Bet**: (**Currently broken**)
|
||||
- **bfi:player**: (**Currently broken**)
|
||||
@ -195,6 +195,7 @@ # Supported sites
|
||||
- **BitChute**
|
||||
- **BitChuteChannel**
|
||||
- **BlackboardCollaborate**
|
||||
- **BlackboardCollaborateLaunch**
|
||||
- **BleacherReport**: (**Currently broken**)
|
||||
- **BleacherReportCMS**: (**Currently broken**)
|
||||
- **blerp**
|
||||
@ -223,6 +224,7 @@ # Supported sites
|
||||
- **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org
|
||||
- **bt:article**: Bergens Tidende Articles
|
||||
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen
|
||||
- **BTVPlus**
|
||||
- **Bundesliga**
|
||||
- **Bundestag**
|
||||
- **BunnyCdn**
|
||||
@ -246,7 +248,6 @@ # Supported sites
|
||||
- **Canalplus**: mycanal.fr and piwiplus.fr
|
||||
- **Canalsurmas**
|
||||
- **CaracolTvPlay**: [*caracoltv-play*](## "netrc machine")
|
||||
- **CartoonNetwork**
|
||||
- **cbc.ca**
|
||||
- **cbc.ca:player**
|
||||
- **cbc.ca:player:playlist**
|
||||
@ -296,7 +297,7 @@ # Supported sites
|
||||
- **CNNIndonesia**
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralTV**
|
||||
- **ConanClassic**
|
||||
- **ConanClassic**: (**Currently broken**)
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **CONtv**
|
||||
- **CookingChannel**
|
||||
@ -316,9 +317,8 @@ # Supported sites
|
||||
- **CSpan**: C-SPAN
|
||||
- **CSpanCongress**
|
||||
- **CtsNews**: 華視新聞
|
||||
- **CTV**
|
||||
- **CTVNews**
|
||||
- **cu.ntv.co.jp**: Nippon Television Network
|
||||
- **cu.ntv.co.jp**: 日テレ無料TADA!
|
||||
- **CultureUnplugged**
|
||||
- **curiositystream**: [*curiositystream*](## "netrc machine")
|
||||
- **curiositystream:collections**: [*curiositystream*](## "netrc machine")
|
||||
@ -574,9 +574,7 @@ # Supported sites
|
||||
- **HollywoodReporterPlaylist**
|
||||
- **Holodex**
|
||||
- **HotNewHipHop**: (**Currently broken**)
|
||||
- **hotstar**
|
||||
- **hotstar:playlist**
|
||||
- **hotstar:season**
|
||||
- **hotstar**: JioHotstar
|
||||
- **hotstar:series**
|
||||
- **hrfernsehen**
|
||||
- **HRTi**: [*hrti*](## "netrc machine")
|
||||
@ -589,7 +587,7 @@ # Supported sites
|
||||
- **Hungama**
|
||||
- **HungamaAlbumPlaylist**
|
||||
- **HungamaSong**
|
||||
- **huya:live**: huya.com
|
||||
- **huya:live**: 虎牙直播
|
||||
- **huya:video**: 虎牙视频
|
||||
- **Hypem**
|
||||
- **Hytale**
|
||||
@ -646,13 +644,13 @@ # Supported sites
|
||||
- **Jamendo**
|
||||
- **JamendoAlbum**
|
||||
- **JeuxVideo**: (**Currently broken**)
|
||||
- **jiocinema**: [*jiocinema*](## "netrc machine")
|
||||
- **jiocinema:series**: [*jiocinema*](## "netrc machine")
|
||||
- **jiosaavn:album**
|
||||
- **jiosaavn:artist**
|
||||
- **jiosaavn:playlist**
|
||||
- **jiosaavn:show**
|
||||
- **jiosaavn:show:playlist**
|
||||
- **jiosaavn:song**
|
||||
- **Joj**
|
||||
- **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)
|
||||
- **Jove**
|
||||
- **JStream**
|
||||
- **JTBC**: jtbc.co.kr
|
||||
@ -723,9 +721,6 @@ # Supported sites
|
||||
- **life:embed**
|
||||
- **likee**
|
||||
- **likee:user**
|
||||
- **limelight**
|
||||
- **limelight:channel**
|
||||
- **limelight:channel_list**
|
||||
- **LinkedIn**: [*linkedin*](## "netrc machine")
|
||||
- **linkedin:events**: [*linkedin*](## "netrc machine")
|
||||
- **linkedin:learning**: [*linkedin*](## "netrc machine")
|
||||
@ -772,6 +767,7 @@ # Supported sites
|
||||
- **massengeschmack.tv**
|
||||
- **Masters**
|
||||
- **MatchTV**
|
||||
- **Mave**
|
||||
- **MBN**: mbn.co.kr (매일방송)
|
||||
- **MDR**: MDR.DE
|
||||
- **MedalTV**
|
||||
@ -806,6 +802,7 @@ # Supported sites
|
||||
- **minds:channel**
|
||||
- **minds:group**
|
||||
- **Minoto**
|
||||
- **mir24.tv**
|
||||
- **mirrativ**
|
||||
- **mirrativ:user**
|
||||
- **MirrorCoUK**
|
||||
@ -816,6 +813,8 @@ # Supported sites
|
||||
- **mixcloud**
|
||||
- **mixcloud:playlist**
|
||||
- **mixcloud:user**
|
||||
- **Mixlr**
|
||||
- **MixlrRecoring**
|
||||
- **MLB**
|
||||
- **MLBArticle**
|
||||
- **MLBTV**: [*mlb*](## "netrc machine")
|
||||
@ -828,7 +827,7 @@ # Supported sites
|
||||
- **Mojevideo**: mojevideo.sk
|
||||
- **Mojvideo**
|
||||
- **Monstercat**
|
||||
- **MonsterSirenHypergryphMusic**
|
||||
- **monstersiren**: 塞壬唱片
|
||||
- **Motherless**
|
||||
- **MotherlessGallery**
|
||||
- **MotherlessGroup**
|
||||
@ -880,19 +879,19 @@ # Supported sites
|
||||
- **Naver**
|
||||
- **Naver:live**
|
||||
- **navernow**
|
||||
- **nba**
|
||||
- **nba:channel**
|
||||
- **nba:embed**
|
||||
- **nba:watch**
|
||||
- **nba:watch:collection**
|
||||
- **nba:watch:embed**
|
||||
- **nba**: (**Currently broken**)
|
||||
- **nba:channel**: (**Currently broken**)
|
||||
- **nba:embed**: (**Currently broken**)
|
||||
- **nba:watch**: (**Currently broken**)
|
||||
- **nba:watch:collection**: (**Currently broken**)
|
||||
- **nba:watch:embed**: (**Currently broken**)
|
||||
- **NBC**
|
||||
- **NBCNews**
|
||||
- **nbcolympics**
|
||||
- **nbcolympics:stream**
|
||||
- **NBCSports**
|
||||
- **NBCSportsStream**
|
||||
- **NBCSportsVPlayer**
|
||||
- **nbcolympics:stream**: (**Currently broken**)
|
||||
- **NBCSports**: (**Currently broken**)
|
||||
- **NBCSportsStream**: (**Currently broken**)
|
||||
- **NBCSportsVPlayer**: (**Currently broken**)
|
||||
- **NBCStations**
|
||||
- **ndr**: NDR.de - Norddeutscher Rundfunk
|
||||
- **ndr:embed**
|
||||
@ -968,11 +967,10 @@ # Supported sites
|
||||
- **Nitter**
|
||||
- **njoy**: N-JOY
|
||||
- **njoy:embed**
|
||||
- **NobelPrize**: (**Currently broken**)
|
||||
- **NobelPrize**
|
||||
- **NoicePodcast**
|
||||
- **NonkTube**
|
||||
- **NoodleMagazine**
|
||||
- **Noovo**
|
||||
- **NOSNLArticle**
|
||||
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
|
||||
- **NovaEmbed**
|
||||
@ -1081,8 +1079,8 @@ # Supported sites
|
||||
- **Photobucket**
|
||||
- **PiaLive**
|
||||
- **Piapro**: [*piapro*](## "netrc machine")
|
||||
- **Picarto**
|
||||
- **PicartoVod**
|
||||
- **picarto**
|
||||
- **picarto:vod**
|
||||
- **Piksel**
|
||||
- **Pinkbike**
|
||||
- **Pinterest**
|
||||
@ -1096,6 +1094,7 @@ # Supported sites
|
||||
- **Platzi**: [*platzi*](## "netrc machine")
|
||||
- **PlatziCourse**: [*platzi*](## "netrc machine")
|
||||
- **player.sky.it**
|
||||
- **PlayerFm**
|
||||
- **playeur**
|
||||
- **PlayPlusTV**: [*playplustv*](## "netrc machine")
|
||||
- **PlaySuisse**: [*playsuisse*](## "netrc machine")
|
||||
@ -1294,6 +1293,7 @@ # Supported sites
|
||||
- **SampleFocus**
|
||||
- **Sangiin**: 参議院インターネット審議中継 (archive)
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **SaucePlus**: Sauce+
|
||||
- **SBS**: sbs.com.au
|
||||
- **sbs.co.kr**
|
||||
- **sbs.co.kr:allvod_program**
|
||||
@ -1390,16 +1390,15 @@ # Supported sites
|
||||
- **Spreaker**
|
||||
- **SpreakerShow**
|
||||
- **SpringboardPlatform**
|
||||
- **Sprout**
|
||||
- **SproutVideo**
|
||||
- **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**)
|
||||
- **sr:mediathek**: Saarländischer Rundfunk
|
||||
- **SRGSSR**
|
||||
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
||||
- **StacommuLive**: [*stacommu*](## "netrc machine")
|
||||
- **StacommuVOD**: [*stacommu*](## "netrc machine")
|
||||
- **StagePlusVODConcert**: [*stageplus*](## "netrc machine")
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **StarTrek**: (**Currently broken**)
|
||||
- **startrek**: STAR TREK
|
||||
- **startv**
|
||||
- **Steam**
|
||||
- **SteamCommunityBroadcast**
|
||||
@ -1422,12 +1421,11 @@ # Supported sites
|
||||
- **SunPorno**
|
||||
- **sverigesradio:episode**
|
||||
- **sverigesradio:publication**
|
||||
- **SVT**
|
||||
- **SVTPage**
|
||||
- **SVTPlay**: SVT Play and Öppet arkiv
|
||||
- **SVTSeries**
|
||||
- **svt:page**
|
||||
- **svt:play**: SVT Play and Öppet arkiv
|
||||
- **svt:play:series**
|
||||
- **SwearnetEpisode**
|
||||
- **Syfy**: (**Currently broken**)
|
||||
- **Syfy**
|
||||
- **SYVDK**
|
||||
- **SztvHu**
|
||||
- **t-online.de**: (**Currently broken**)
|
||||
@ -1471,14 +1469,13 @@ # Supported sites
|
||||
- **Telewebion**: (**Currently broken**)
|
||||
- **Tempo**
|
||||
- **TennisTV**: [*tennistv*](## "netrc machine")
|
||||
- **TenPlay**: [*10play*](## "netrc machine")
|
||||
- **TenPlaySeason**
|
||||
- **TF1**
|
||||
- **TFO**
|
||||
- **TFO**: (**Currently broken**)
|
||||
- **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine")
|
||||
- **theatercomplextown:vod**: [*theatercomplextown*](## "netrc machine")
|
||||
- **TheGuardianPodcast**
|
||||
- **TheGuardianPodcastPlaylist**
|
||||
- **TheHighWire**
|
||||
- **TheHoleTv**
|
||||
- **TheIntercept**
|
||||
- **ThePlatform**
|
||||
@ -1510,6 +1507,7 @@ # Supported sites
|
||||
- **tokfm:podcast**
|
||||
- **ToonGoggles**
|
||||
- **tou.tv**: [*toutv*](## "netrc machine")
|
||||
- **toutiao**: 今日头条
|
||||
- **Toypics**: Toypics video (**Currently broken**)
|
||||
- **ToypicsUser**: Toypics user profile (**Currently broken**)
|
||||
- **TrailerAddict**: (**Currently broken**)
|
||||
@ -1545,8 +1543,8 @@ # Supported sites
|
||||
- **tv2playseries.hu**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TV5MONDE**
|
||||
- **tv5unis**
|
||||
- **tv5unis:video**
|
||||
- **tv5unis**: (**Currently broken**)
|
||||
- **tv5unis:video**: (**Currently broken**)
|
||||
- **tv8.it**
|
||||
- **tv8.it:live**: TV8 Live
|
||||
- **tv8.it:playlist**: TV8 Playlist
|
||||
@ -1599,8 +1597,9 @@ # Supported sites
|
||||
- **UKTVPlay**
|
||||
- **UlizaPlayer**
|
||||
- **UlizaPortal**: ulizaportal.jp
|
||||
- **umg:de**: Universal Music Deutschland (**Currently broken**)
|
||||
- **umg:de**: Universal Music Deutschland
|
||||
- **Unistra**
|
||||
- **UnitedNationsWebTv**
|
||||
- **Unity**: (**Currently broken**)
|
||||
- **uol.com.br**
|
||||
- **uplynk**
|
||||
@ -1622,9 +1621,9 @@ # Supported sites
|
||||
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
||||
- **vh1.com**
|
||||
- **vhx:embed**: [*vimeo*](## "netrc machine")
|
||||
- **vice**
|
||||
- **vice:article**
|
||||
- **vice:show**
|
||||
- **vice**: (**Currently broken**)
|
||||
- **vice:article**: (**Currently broken**)
|
||||
- **vice:show**: (**Currently broken**)
|
||||
- **Viddler**
|
||||
- **Videa**
|
||||
- **video.arnes.si**: Arnes Video
|
||||
@ -1656,6 +1655,7 @@ # Supported sites
|
||||
- **vimeo**: [*vimeo*](## "netrc machine")
|
||||
- **vimeo:album**: [*vimeo*](## "netrc machine")
|
||||
- **vimeo:channel**: [*vimeo*](## "netrc machine")
|
||||
- **vimeo:event**: [*vimeo*](## "netrc machine")
|
||||
- **vimeo:group**: [*vimeo*](## "netrc machine")
|
||||
- **vimeo:likes**: [*vimeo*](## "netrc machine") Vimeo user likes
|
||||
- **vimeo:ondemand**: [*vimeo*](## "netrc machine")
|
||||
|
@ -36,6 +36,18 @@ def do_GET(self):
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
|
||||
elif self.path == '/fake.m3u8':
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Length', '1024')
|
||||
self.end_headers()
|
||||
self.wfile.write(1024 * b'\x00')
|
||||
elif self.path == '/bipbop.m3u8':
|
||||
with open('test/testdata/m3u8/bipbop_16x9.m3u8', 'rb') as f:
|
||||
data = f.read()
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Length', str(len(data)))
|
||||
self.end_headers()
|
||||
self.wfile.write(data)
|
||||
else:
|
||||
assert False
|
||||
|
||||
@ -314,6 +326,20 @@ def test_search_json_ld_realworld(self):
|
||||
},
|
||||
{},
|
||||
),
|
||||
(
|
||||
# test thumbnail_url key without URL scheme
|
||||
r'''
|
||||
<script type="application/ld+json">
|
||||
{
|
||||
"@context": "https://schema.org",
|
||||
"@type": "VideoObject",
|
||||
"thumbnail_url": "//www.nobelprize.org/images/12693-landscape-medium-gallery.jpg"
|
||||
}</script>''',
|
||||
{
|
||||
'thumbnails': [{'url': 'https://www.nobelprize.org/images/12693-landscape-medium-gallery.jpg'}],
|
||||
},
|
||||
{},
|
||||
),
|
||||
]
|
||||
for html, expected_dict, search_json_ld_kwargs in _TESTS:
|
||||
expect_dict(
|
||||
@ -1933,6 +1959,208 @@ def test_search_nextjs_data(self):
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
|
||||
|
||||
def test_search_nextjs_v13_data(self):
|
||||
HTML = R'''
|
||||
<script>(self.__next_f=self.__next_f||[]).push([0])</script>
|
||||
<script>self.__next_f.push([2,"0:[\"$\",\"$L0\",null,{\"do_not_add_this\":\"fail\"}]\n"])</script>
|
||||
<script>self.__next_f.push([1,"1:I[46975,[],\"HTTPAccessFallbackBoundary\"]\n2:I[32630,[\"8183\",\"static/chunks/8183-768193f6a9e33cdd.js\"]]\n"])</script>
|
||||
<script nonce="abc123">self.__next_f.push([1,"e:[false,[\"$\",\"div\",null,{\"children\":[\"$\",\"$L18\",null,{\"foo\":\"bar\"}]}],false]\n "])</script>
|
||||
<script>self.__next_f.push([1,"2a:[[\"$\",\"div\",null,{\"className\":\"flex flex-col\",\"children\":[]}],[\"$\",\"$L16\",null,{\"meta\":{\"dateCreated\":1730489700,\"uuid\":\"40cac41d-8d29-4ef5-aa11-75047b9f0907\"}}]]\n"])</script>
|
||||
<script>self.__next_f.push([1,"df:[\"$undefined\",[\"$\",\"div\",null,{\"children\":[\"$\",\"$L17\",null,{}],\"do_not_include_this_field\":\"fail\"}],[\"$\",\"div\",null,{\"children\":[[\"$\",\"$L19\",null,{\"duplicated_field_name\":{\"x\":1}}],[\"$\",\"$L20\",null,{\"duplicated_field_name\":{\"y\":2}}]]}],\"$undefined\"]\n"])</script>
|
||||
<script>self.__next_f.push([3,"MzM6WyIkIiwiJEwzMiIsbnVsbCx7ImRlY29kZWQiOiJzdWNjZXNzIn1d"])</script>
|
||||
'''
|
||||
EXPECTED = {
|
||||
'18': {
|
||||
'foo': 'bar',
|
||||
},
|
||||
'16': {
|
||||
'meta': {
|
||||
'dateCreated': 1730489700,
|
||||
'uuid': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
|
||||
},
|
||||
},
|
||||
'19': {
|
||||
'duplicated_field_name': {'x': 1},
|
||||
},
|
||||
'20': {
|
||||
'duplicated_field_name': {'y': 2},
|
||||
},
|
||||
}
|
||||
self.assertEqual(self.ie._search_nextjs_v13_data(HTML, None), EXPECTED)
|
||||
self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), {})
|
||||
self.assertEqual(self.ie._search_nextjs_v13_data(None, None, fatal=False), {})
|
||||
|
||||
def test_search_nuxt_json(self):
|
||||
HTML_TMPL = '<script data-ssr="true" id="__NUXT_DATA__" type="application/json">[{}]</script>'
|
||||
VALID_DATA = '''
|
||||
["ShallowReactive",1],
|
||||
{"data":2,"state":21,"once":25,"_errors":28,"_server_errors":30},
|
||||
["ShallowReactive",3],
|
||||
{"$abcdef123456":4},
|
||||
{"podcast":5,"activeEpisodeData":7},
|
||||
{"podcast":6,"seasons":14},
|
||||
{"title":10,"id":11},
|
||||
["Reactive",8],
|
||||
{"episode":9,"creators":18,"empty_list":20},
|
||||
{"title":12,"id":13,"refs":34,"empty_refs":35},
|
||||
"Series Title",
|
||||
"podcast-id-01",
|
||||
"Episode Title",
|
||||
"episode-id-99",
|
||||
[15,16,17],
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
[19],
|
||||
"Podcast Creator",
|
||||
[],
|
||||
{"$ssite-config":22},
|
||||
{"env":23,"name":24,"map":26,"numbers":14},
|
||||
"production",
|
||||
"podcast-website",
|
||||
["Set"],
|
||||
["Reactive",27],
|
||||
["Map"],
|
||||
["ShallowReactive",29],
|
||||
{},
|
||||
["NuxtError",31],
|
||||
{"status":32,"message":33},
|
||||
503,
|
||||
"Service Unavailable",
|
||||
[36,37],
|
||||
[38,39],
|
||||
["Ref",40],
|
||||
["ShallowRef",41],
|
||||
["EmptyRef",42],
|
||||
["EmptyShallowRef",43],
|
||||
"ref",
|
||||
"shallow_ref",
|
||||
"{\\"ref\\":1}",
|
||||
"{\\"shallow_ref\\":2}"
|
||||
'''
|
||||
PAYLOAD = {
|
||||
'data': {
|
||||
'$abcdef123456': {
|
||||
'podcast': {
|
||||
'podcast': {
|
||||
'title': 'Series Title',
|
||||
'id': 'podcast-id-01',
|
||||
},
|
||||
'seasons': [1, 2, 3],
|
||||
},
|
||||
'activeEpisodeData': {
|
||||
'episode': {
|
||||
'title': 'Episode Title',
|
||||
'id': 'episode-id-99',
|
||||
'refs': ['ref', 'shallow_ref'],
|
||||
'empty_refs': [{'ref': 1}, {'shallow_ref': 2}],
|
||||
},
|
||||
'creators': ['Podcast Creator'],
|
||||
'empty_list': [],
|
||||
},
|
||||
},
|
||||
},
|
||||
'state': {
|
||||
'$ssite-config': {
|
||||
'env': 'production',
|
||||
'name': 'podcast-website',
|
||||
'map': [],
|
||||
'numbers': [1, 2, 3],
|
||||
},
|
||||
},
|
||||
'once': [],
|
||||
'_errors': {},
|
||||
'_server_errors': {
|
||||
'status': 503,
|
||||
'message': 'Service Unavailable',
|
||||
},
|
||||
}
|
||||
PARTIALLY_INVALID = [(
|
||||
'''
|
||||
{"data":1},
|
||||
{"invalid_raw_list":2},
|
||||
[15,16,17]
|
||||
''',
|
||||
{'data': {'invalid_raw_list': [None, None, None]}},
|
||||
), (
|
||||
'''
|
||||
{"data":1},
|
||||
["EmptyRef",2],
|
||||
"not valid JSON"
|
||||
''',
|
||||
{'data': None},
|
||||
), (
|
||||
'''
|
||||
{"data":1},
|
||||
["EmptyShallowRef",2],
|
||||
"not valid JSON"
|
||||
''',
|
||||
{'data': None},
|
||||
)]
|
||||
INVALID = [
|
||||
'''
|
||||
[]
|
||||
''',
|
||||
'''
|
||||
["unsupported",1],
|
||||
{"data":2},
|
||||
{}
|
||||
''',
|
||||
]
|
||||
DEFAULT = object()
|
||||
|
||||
self.assertEqual(self.ie._search_nuxt_json(HTML_TMPL.format(VALID_DATA), None), PAYLOAD)
|
||||
self.assertEqual(self.ie._search_nuxt_json('', None, fatal=False), {})
|
||||
self.assertIs(self.ie._search_nuxt_json('', None, default=DEFAULT), DEFAULT)
|
||||
|
||||
for data, expected in PARTIALLY_INVALID:
|
||||
self.assertEqual(
|
||||
self.ie._search_nuxt_json(HTML_TMPL.format(data), None, fatal=False), expected)
|
||||
|
||||
for data in INVALID:
|
||||
self.assertIs(
|
||||
self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT)
|
||||
|
||||
|
||||
class TestInfoExtractorNetwork(unittest.TestCase):
|
||||
def setUp(self, /):
|
||||
self.httpd = http.server.HTTPServer(
|
||||
('127.0.0.1', 0), InfoExtractorTestRequestHandler)
|
||||
self.port = http_server_port(self.httpd)
|
||||
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
self.server_thread.start()
|
||||
|
||||
self.called = False
|
||||
|
||||
def require_warning(*args, **kwargs):
|
||||
self.called = True
|
||||
|
||||
self.ydl = FakeYDL()
|
||||
self.ydl.report_warning = require_warning
|
||||
self.ie = DummyIE(self.ydl)
|
||||
|
||||
def tearDown(self, /):
|
||||
self.ydl.close()
|
||||
self.httpd.shutdown()
|
||||
self.httpd.server_close()
|
||||
self.server_thread.join(1)
|
||||
|
||||
def test_extract_m3u8_formats(self):
|
||||
formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles(
|
||||
f'http://127.0.0.1:{self.port}/bipbop.m3u8', None, fatal=False)
|
||||
self.assertFalse(self.called)
|
||||
self.assertTrue(formats)
|
||||
self.assertTrue(subtitles)
|
||||
|
||||
def test_extract_m3u8_formats_warning(self):
|
||||
formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles(
|
||||
f'http://127.0.0.1:{self.port}/fake.m3u8', None, fatal=False)
|
||||
self.assertTrue(self.called, 'Warning was not issued for binary m3u8 file')
|
||||
self.assertFalse(formats)
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -21,9 +21,6 @@ def test_compat_passthrough(self):
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
_ = compat.compat_basestring
|
||||
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
_ = compat.WINDOWS_VT_MODE
|
||||
|
||||
self.assertEqual(urllib.request.getproxies, getproxies)
|
||||
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
|
@ -58,6 +58,14 @@ def test_get_desktop_environment(self):
|
||||
({'DESKTOP_SESSION': 'kde'}, _LinuxDesktopEnvironment.KDE3),
|
||||
({'DESKTOP_SESSION': 'xfce'}, _LinuxDesktopEnvironment.XFCE),
|
||||
|
||||
({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'gnome'}, _LinuxDesktopEnvironment.GNOME),
|
||||
({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'mate'}, _LinuxDesktopEnvironment.GNOME),
|
||||
({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4),
|
||||
({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'kde'}, _LinuxDesktopEnvironment.KDE3),
|
||||
({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'xfce'}, _LinuxDesktopEnvironment.XFCE),
|
||||
|
||||
({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'my_custom_de', 'GNOME_DESKTOP_SESSION_ID': 1}, _LinuxDesktopEnvironment.GNOME),
|
||||
|
||||
({'GNOME_DESKTOP_SESSION_ID': 1}, _LinuxDesktopEnvironment.GNOME),
|
||||
({'KDE_FULL_SESSION': 1}, _LinuxDesktopEnvironment.KDE3),
|
||||
({'KDE_FULL_SESSION': 1, 'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4),
|
||||
|
235
test/test_devalue.py
Normal file
235
test/test_devalue.py
Normal file
@ -0,0 +1,235 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
import datetime as dt
|
||||
import json
|
||||
import math
|
||||
import re
|
||||
import unittest
|
||||
|
||||
from yt_dlp.utils.jslib import devalue
|
||||
|
||||
|
||||
TEST_CASES_EQUALS = [{
|
||||
'name': 'int',
|
||||
'unparsed': [-42],
|
||||
'parsed': -42,
|
||||
}, {
|
||||
'name': 'str',
|
||||
'unparsed': ['woo!!!'],
|
||||
'parsed': 'woo!!!',
|
||||
}, {
|
||||
'name': 'Number',
|
||||
'unparsed': [['Object', 42]],
|
||||
'parsed': 42,
|
||||
}, {
|
||||
'name': 'String',
|
||||
'unparsed': [['Object', 'yar']],
|
||||
'parsed': 'yar',
|
||||
}, {
|
||||
'name': 'Infinity',
|
||||
'unparsed': -4,
|
||||
'parsed': math.inf,
|
||||
}, {
|
||||
'name': 'negative Infinity',
|
||||
'unparsed': -5,
|
||||
'parsed': -math.inf,
|
||||
}, {
|
||||
'name': 'negative zero',
|
||||
'unparsed': -6,
|
||||
'parsed': -0.0,
|
||||
}, {
|
||||
'name': 'RegExp',
|
||||
'unparsed': [['RegExp', 'regexp', 'gim']], # XXX: flags are ignored
|
||||
'parsed': re.compile('regexp'),
|
||||
}, {
|
||||
'name': 'Date',
|
||||
'unparsed': [['Date', '2001-09-09T01:46:40.000Z']],
|
||||
'parsed': dt.datetime.fromtimestamp(1e9, tz=dt.timezone.utc),
|
||||
}, {
|
||||
'name': 'Array',
|
||||
'unparsed': [[1, 2, 3], 'a', 'b', 'c'],
|
||||
'parsed': ['a', 'b', 'c'],
|
||||
}, {
|
||||
'name': 'Array (empty)',
|
||||
'unparsed': [[]],
|
||||
'parsed': [],
|
||||
}, {
|
||||
'name': 'Array (sparse)',
|
||||
'unparsed': [[-2, 1, -2], 'b'],
|
||||
'parsed': [None, 'b', None],
|
||||
}, {
|
||||
'name': 'Object',
|
||||
'unparsed': [{'foo': 1, 'x-y': 2}, 'bar', 'z'],
|
||||
'parsed': {'foo': 'bar', 'x-y': 'z'},
|
||||
}, {
|
||||
'name': 'Set',
|
||||
'unparsed': [['Set', 1, 2, 3], 1, 2, 3],
|
||||
'parsed': [1, 2, 3],
|
||||
}, {
|
||||
'name': 'Map',
|
||||
'unparsed': [['Map', 1, 2], 'a', 'b'],
|
||||
'parsed': [['a', 'b']],
|
||||
}, {
|
||||
'name': 'BigInt',
|
||||
'unparsed': [['BigInt', '1']],
|
||||
'parsed': 1,
|
||||
}, {
|
||||
'name': 'Uint8Array',
|
||||
'unparsed': [['Uint8Array', 'AQID']],
|
||||
'parsed': [1, 2, 3],
|
||||
}, {
|
||||
'name': 'ArrayBuffer',
|
||||
'unparsed': [['ArrayBuffer', 'AQID']],
|
||||
'parsed': [1, 2, 3],
|
||||
}, {
|
||||
'name': 'str (repetition)',
|
||||
'unparsed': [[1, 1], 'a string'],
|
||||
'parsed': ['a string', 'a string'],
|
||||
}, {
|
||||
'name': 'None (repetition)',
|
||||
'unparsed': [[1, 1], None],
|
||||
'parsed': [None, None],
|
||||
}, {
|
||||
'name': 'dict (repetition)',
|
||||
'unparsed': [[1, 1], {}],
|
||||
'parsed': [{}, {}],
|
||||
}, {
|
||||
'name': 'Object without prototype',
|
||||
'unparsed': [['null']],
|
||||
'parsed': {},
|
||||
}, {
|
||||
'name': 'cross-realm POJO',
|
||||
'unparsed': [{}],
|
||||
'parsed': {},
|
||||
}]
|
||||
|
||||
TEST_CASES_IS = [{
|
||||
'name': 'bool',
|
||||
'unparsed': [True],
|
||||
'parsed': True,
|
||||
}, {
|
||||
'name': 'Boolean',
|
||||
'unparsed': [['Object', False]],
|
||||
'parsed': False,
|
||||
}, {
|
||||
'name': 'undefined',
|
||||
'unparsed': -1,
|
||||
'parsed': None,
|
||||
}, {
|
||||
'name': 'null',
|
||||
'unparsed': [None],
|
||||
'parsed': None,
|
||||
}, {
|
||||
'name': 'NaN',
|
||||
'unparsed': -3,
|
||||
'parsed': math.nan,
|
||||
}]
|
||||
|
||||
TEST_CASES_INVALID = [{
|
||||
'name': 'empty string',
|
||||
'unparsed': '',
|
||||
'error': ValueError,
|
||||
'pattern': r'expected int or list as input',
|
||||
}, {
|
||||
'name': 'hole',
|
||||
'unparsed': -2,
|
||||
'error': ValueError,
|
||||
'pattern': r'invalid integer input',
|
||||
}, {
|
||||
'name': 'string',
|
||||
'unparsed': 'hello',
|
||||
'error': ValueError,
|
||||
'pattern': r'expected int or list as input',
|
||||
}, {
|
||||
'name': 'number',
|
||||
'unparsed': 42,
|
||||
'error': ValueError,
|
||||
'pattern': r'invalid integer input',
|
||||
}, {
|
||||
'name': 'boolean',
|
||||
'unparsed': True,
|
||||
'error': ValueError,
|
||||
'pattern': r'expected int or list as input',
|
||||
}, {
|
||||
'name': 'null',
|
||||
'unparsed': None,
|
||||
'error': ValueError,
|
||||
'pattern': r'expected int or list as input',
|
||||
}, {
|
||||
'name': 'object',
|
||||
'unparsed': {},
|
||||
'error': ValueError,
|
||||
'pattern': r'expected int or list as input',
|
||||
}, {
|
||||
'name': 'empty array',
|
||||
'unparsed': [],
|
||||
'error': ValueError,
|
||||
'pattern': r'expected a non-empty list as input',
|
||||
}, {
|
||||
'name': 'Python negative indexing',
|
||||
'unparsed': [[1, 2, 3, 4, 5, 6, 7, -7], 1, 2, 3, 4, 5, 6, 7],
|
||||
'error': IndexError,
|
||||
'pattern': r'invalid index: -7',
|
||||
}]
|
||||
|
||||
|
||||
class TestDevalue(unittest.TestCase):
|
||||
def test_devalue_parse_equals(self):
|
||||
for tc in TEST_CASES_EQUALS:
|
||||
self.assertEqual(devalue.parse(tc['unparsed']), tc['parsed'], tc['name'])
|
||||
|
||||
def test_devalue_parse_is(self):
|
||||
for tc in TEST_CASES_IS:
|
||||
self.assertIs(devalue.parse(tc['unparsed']), tc['parsed'], tc['name'])
|
||||
|
||||
def test_devalue_parse_invalid(self):
|
||||
for tc in TEST_CASES_INVALID:
|
||||
with self.assertRaisesRegex(tc['error'], tc['pattern'], msg=tc['name']):
|
||||
devalue.parse(tc['unparsed'])
|
||||
|
||||
def test_devalue_parse_cyclical(self):
|
||||
name = 'Map (cyclical)'
|
||||
result = devalue.parse([['Map', 1, 0], 'self'])
|
||||
self.assertEqual(result[0][0], 'self', name)
|
||||
self.assertIs(result, result[0][1], name)
|
||||
|
||||
name = 'Set (cyclical)'
|
||||
result = devalue.parse([['Set', 0, 1], 42])
|
||||
self.assertEqual(result[1], 42, name)
|
||||
self.assertIs(result, result[0], name)
|
||||
|
||||
result = devalue.parse([[0]])
|
||||
self.assertIs(result, result[0], 'Array (cyclical)')
|
||||
|
||||
name = 'Object (cyclical)'
|
||||
result = devalue.parse([{'self': 0}])
|
||||
self.assertIs(result, result['self'], name)
|
||||
|
||||
name = 'Object with null prototype (cyclical)'
|
||||
result = devalue.parse([['null', 'self', 0]])
|
||||
self.assertIs(result, result['self'], name)
|
||||
|
||||
name = 'Objects (cyclical)'
|
||||
result = devalue.parse([[1, 2], {'second': 2}, {'first': 1}])
|
||||
self.assertIs(result[0], result[1]['first'], name)
|
||||
self.assertIs(result[1], result[0]['second'], name)
|
||||
|
||||
def test_devalue_parse_revivers(self):
|
||||
self.assertEqual(
|
||||
devalue.parse([['indirect', 1], {'a': 2}, 'b'], revivers={'indirect': lambda x: x}),
|
||||
{'a': 'b'}, 'revivers (indirect)')
|
||||
|
||||
self.assertEqual(
|
||||
devalue.parse([['parse', 1], '{"a":0}'], revivers={'parse': lambda x: json.loads(x)}),
|
||||
{'a': 0}, 'revivers (parse)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -14,6 +14,7 @@
|
||||
|
||||
from test.helper import (
|
||||
assertGreaterEqual,
|
||||
assertLessEqual,
|
||||
expect_info_dict,
|
||||
expect_warnings,
|
||||
get_params,
|
||||
@ -65,10 +66,6 @@ def _file_md5(fn):
|
||||
|
||||
@is_download_test
|
||||
class TestDownload(unittest.TestCase):
|
||||
# Parallel testing in nosetests. See
|
||||
# http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html
|
||||
_multiprocess_shared_ = True
|
||||
|
||||
maxDiff = None
|
||||
|
||||
COMPLETED_TESTS = {}
|
||||
@ -121,10 +118,13 @@ def print_skipping(reason):
|
||||
params = get_params(test_case.get('params', {}))
|
||||
params['outtmpl'] = tname + '_' + params['outtmpl']
|
||||
if is_playlist and 'playlist' not in test_case:
|
||||
params.setdefault('extract_flat', 'in_playlist')
|
||||
params.setdefault('playlistend', test_case.get(
|
||||
'playlist_mincount', test_case.get('playlist_count', -2) + 1))
|
||||
params.setdefault('playlistend', max(
|
||||
test_case.get('playlist_mincount', -1),
|
||||
test_case.get('playlist_count', -2) + 1,
|
||||
test_case.get('playlist_maxcount', -2) + 1))
|
||||
params.setdefault('skip_download', True)
|
||||
if 'playlist_duration_sum' not in test_case:
|
||||
params.setdefault('extract_flat', 'in_playlist')
|
||||
|
||||
ydl = YoutubeDL(params, auto_init=False)
|
||||
ydl.add_default_info_extractors()
|
||||
@ -159,6 +159,7 @@ def try_rm_tcs_files(tcs=None):
|
||||
try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
|
||||
try_rm_tcs_files()
|
||||
try:
|
||||
test_url = test_case['url']
|
||||
try_num = 1
|
||||
while True:
|
||||
try:
|
||||
@ -166,7 +167,7 @@ def try_rm_tcs_files(tcs=None):
|
||||
# for outside error handling, and returns the exit code
|
||||
# instead of the result dict.
|
||||
res_dict = ydl.extract_info(
|
||||
test_case['url'],
|
||||
test_url,
|
||||
force_generic_extractor=params.get('force_generic_extractor', False))
|
||||
except (DownloadError, ExtractorError) as err:
|
||||
# Check if the exception is not a network related one
|
||||
@ -194,23 +195,23 @@ def try_rm_tcs_files(tcs=None):
|
||||
self.assertTrue('entries' in res_dict)
|
||||
expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
|
||||
|
||||
num_entries = len(res_dict.get('entries', []))
|
||||
if 'playlist_mincount' in test_case:
|
||||
mincount = test_case['playlist_mincount']
|
||||
assertGreaterEqual(
|
||||
self,
|
||||
len(res_dict['entries']),
|
||||
test_case['playlist_mincount'],
|
||||
'Expected at least %d in playlist %s, but got only %d' % (
|
||||
test_case['playlist_mincount'], test_case['url'],
|
||||
len(res_dict['entries'])))
|
||||
self, num_entries, mincount,
|
||||
f'Expected at least {mincount} entries in playlist {test_url}, but got only {num_entries}')
|
||||
if 'playlist_count' in test_case:
|
||||
count = test_case['playlist_count']
|
||||
got = num_entries if num_entries <= count else 'more'
|
||||
self.assertEqual(
|
||||
len(res_dict['entries']),
|
||||
test_case['playlist_count'],
|
||||
'Expected %d entries in playlist %s, but got %d.' % (
|
||||
test_case['playlist_count'],
|
||||
test_case['url'],
|
||||
len(res_dict['entries']),
|
||||
))
|
||||
num_entries, count,
|
||||
f'Expected exactly {count} entries in playlist {test_url}, but got {got}')
|
||||
if 'playlist_maxcount' in test_case:
|
||||
maxcount = test_case['playlist_maxcount']
|
||||
assertLessEqual(
|
||||
self, num_entries, maxcount,
|
||||
f'Expected at most {maxcount} entries in playlist {test_url}, but got more')
|
||||
if 'playlist_duration_sum' in test_case:
|
||||
got_duration = sum(e['duration'] for e in res_dict['entries'])
|
||||
self.assertEqual(
|
||||
|
@ -478,6 +478,69 @@ def test_extract_function_with_global_stack(self):
|
||||
func = jsi.extract_function('c', {'e': 10}, {'f': 100, 'g': 1000})
|
||||
self.assertEqual(func([1]), 1111)
|
||||
|
||||
def test_extract_object(self):
|
||||
jsi = JSInterpreter('var a={};a.xy={};var xy;var zxy={};xy={z:function(){return "abc"}};')
|
||||
self.assertTrue('z' in jsi.extract_object('xy', None))
|
||||
|
||||
def test_increment_decrement(self):
|
||||
self._test('function f() { var x = 1; return ++x; }', 2)
|
||||
self._test('function f() { var x = 1; return x++; }', 1)
|
||||
self._test('function f() { var x = 1; x--; return x }', 0)
|
||||
self._test('function f() { var y; var x = 1; x++, --x, x--, x--, y="z", "abc", x++; return --x }', -1)
|
||||
self._test('function f() { var a = "test--"; return a; }', 'test--')
|
||||
self._test('function f() { var b = 1; var a = "b--"; return a; }', 'b--')
|
||||
|
||||
def test_nested_function_scoping(self):
|
||||
self._test(R'''
|
||||
function f() {
|
||||
var g = function() {
|
||||
var P = 2;
|
||||
return P;
|
||||
};
|
||||
var P = 1;
|
||||
g();
|
||||
return P;
|
||||
}
|
||||
''', 1)
|
||||
self._test(R'''
|
||||
function f() {
|
||||
var x = function() {
|
||||
for (var w = 1, M = []; w < 2; w++) switch (w) {
|
||||
case 1:
|
||||
M.push("a");
|
||||
case 2:
|
||||
M.push("b");
|
||||
}
|
||||
return M
|
||||
};
|
||||
var w = "c";
|
||||
var M = "d";
|
||||
var y = x();
|
||||
y.push(w);
|
||||
y.push(M);
|
||||
return y;
|
||||
}
|
||||
''', ['a', 'b', 'c', 'd'])
|
||||
self._test(R'''
|
||||
function f() {
|
||||
var P, Q;
|
||||
var z = 100;
|
||||
var g = function() {
|
||||
var P, Q; P = 2; Q = 15;
|
||||
z = 0;
|
||||
return P+Q;
|
||||
};
|
||||
P = 1; Q = 10;
|
||||
var x = g(), y = 3;
|
||||
return P+Q+x+y+z;
|
||||
}
|
||||
''', 31)
|
||||
|
||||
def test_undefined_varnames(self):
|
||||
jsi = JSInterpreter('function f(){ var a; return [a, b]; }')
|
||||
self._test(jsi, [JS_Undefined, JS_Undefined])
|
||||
self.assertEqual(jsi._undefined_varnames, {'b'})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -22,7 +22,6 @@
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import warnings
|
||||
import zlib
|
||||
@ -223,10 +222,7 @@ def do_GET(self):
|
||||
if encoding == 'br' and brotli:
|
||||
payload = brotli.compress(payload)
|
||||
elif encoding == 'gzip':
|
||||
buf = io.BytesIO()
|
||||
with gzip.GzipFile(fileobj=buf, mode='wb') as f:
|
||||
f.write(payload)
|
||||
payload = buf.getvalue()
|
||||
payload = gzip.compress(payload, mtime=0)
|
||||
elif encoding == 'deflate':
|
||||
payload = zlib.compress(payload)
|
||||
elif encoding == 'unsupported':
|
||||
@ -729,6 +725,17 @@ def test_keep_header_casing(self, handler):
|
||||
|
||||
assert 'X-test-heaDer: test' in res
|
||||
|
||||
def test_partial_read_then_full_read(self, handler):
|
||||
with handler() as rh:
|
||||
for encoding in ('', 'gzip', 'deflate'):
|
||||
res = validate_and_send(rh, Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': encoding}))
|
||||
assert res.headers.get('Content-Encoding') == encoding
|
||||
assert res.read(6) == b'<html>'
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b'<video src="/vid.mp4" /></html>'
|
||||
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
class TestClientCertificate:
|
||||
|
@ -8,6 +8,8 @@
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
import subprocess
|
||||
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.utils import shell_quote
|
||||
from yt_dlp.postprocessor import (
|
||||
@ -47,7 +49,18 @@ def test_escaping(self):
|
||||
print('Skipping: ffmpeg not found')
|
||||
return
|
||||
|
||||
file = 'test/testdata/thumbnails/foo %d bar/foo_%d.{}'
|
||||
test_data_dir = 'test/testdata/thumbnails'
|
||||
generated_file = f'{test_data_dir}/empty.webp'
|
||||
|
||||
subprocess.check_call([
|
||||
pp.executable, '-y', '-f', 'lavfi', '-i', 'color=c=black:s=320x320',
|
||||
'-c:v', 'libwebp', '-pix_fmt', 'yuv420p', '-vframes', '1', generated_file,
|
||||
], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||
|
||||
file = test_data_dir + '/foo %d bar/foo_%d.{}'
|
||||
initial_file = file.format('webp')
|
||||
os.replace(generated_file, initial_file)
|
||||
|
||||
tests = (('webp', 'png'), ('png', 'jpg'))
|
||||
|
||||
for inp, out in tests:
|
||||
@ -55,11 +68,13 @@ def test_escaping(self):
|
||||
if os.path.exists(out_file):
|
||||
os.remove(out_file)
|
||||
pp.convert_thumbnail(file.format(inp), out)
|
||||
assert os.path.exists(out_file)
|
||||
self.assertTrue(os.path.exists(out_file))
|
||||
|
||||
for _, out in tests:
|
||||
os.remove(file.format(out))
|
||||
|
||||
os.remove(initial_file)
|
||||
|
||||
|
||||
class TestExec(unittest.TestCase):
|
||||
def test_parse_cmd(self):
|
||||
@ -610,3 +625,7 @@ def test_quote_for_concat_QuotesAtEnd(self):
|
||||
self.assertEqual(
|
||||
r"'special '\'' characters '\'' galore'\'\'\'",
|
||||
self._pp._quote_for_ffmpeg("special ' characters ' galore'''"))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -11,10 +11,11 @@ class TestGetWebPoContentBinding:
|
||||
|
||||
@pytest.mark.parametrize('client_name, context, is_authenticated, expected', [
|
||||
*[(client, context, is_authenticated, expected) for client in [
|
||||
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER']
|
||||
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'TVHTML5_SIMPLY']
|
||||
for context, is_authenticated, expected in [
|
||||
(PoTokenContext.GVS, False, ('example-visitor-data', ContentBindingType.VISITOR_DATA)),
|
||||
(PoTokenContext.PLAYER, False, ('example-video-id', ContentBindingType.VIDEO_ID)),
|
||||
(PoTokenContext.SUBS, False, ('example-video-id', ContentBindingType.VIDEO_ID)),
|
||||
(PoTokenContext.GVS, True, ('example-data-sync-id', ContentBindingType.DATASYNC_ID)),
|
||||
]],
|
||||
('WEB_REMIX', PoTokenContext.GVS, False, ('example-visitor-data', ContentBindingType.VISITOR_DATA)),
|
||||
|
@ -49,7 +49,7 @@ def test_not_supports(self, ie, logger, pot_request, client_name, context, is_au
|
||||
|
||||
@pytest.mark.parametrize('client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected', [
|
||||
*[(client, context, is_authenticated, remote_host, source_address, request_proxy, expected) for client in [
|
||||
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER']
|
||||
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'TVHTML5_SIMPLY']
|
||||
for context, is_authenticated, remote_host, source_address, request_proxy, expected in [
|
||||
(PoTokenContext.GVS, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}),
|
||||
(PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'video_id'}),
|
||||
|
@ -416,18 +416,8 @@ def test_traversal_unbranching(self):
|
||||
'`any` should allow further branching'
|
||||
|
||||
def test_traversal_morsel(self):
|
||||
values = {
|
||||
'expires': 'a',
|
||||
'path': 'b',
|
||||
'comment': 'c',
|
||||
'domain': 'd',
|
||||
'max-age': 'e',
|
||||
'secure': 'f',
|
||||
'httponly': 'g',
|
||||
'version': 'h',
|
||||
'samesite': 'i',
|
||||
}
|
||||
morsel = http.cookies.Morsel()
|
||||
values = dict(zip(morsel, 'abcdefghijklmnop'))
|
||||
morsel.set('item_key', 'item_value', 'coded_value')
|
||||
morsel.update(values)
|
||||
values['key'] = 'item_key'
|
||||
|
@ -1373,6 +1373,7 @@ def test_parse_resolution(self):
|
||||
self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080})
|
||||
self.assertEqual(parse_resolution('ep1x2'), {})
|
||||
self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080})
|
||||
self.assertEqual(parse_resolution('1920w', lenient=True), {'width': 1920})
|
||||
|
||||
def test_parse_bitrate(self):
|
||||
self.assertEqual(parse_bitrate(None), None)
|
||||
|
@ -133,6 +133,11 @@
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/e12fbea4/player_ias.vflset/en_US/base.js',
|
||||
'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt',
|
||||
'JC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit0zJAtIEsgOV2SXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-a',
|
||||
),
|
||||
]
|
||||
|
||||
_NSIG_TESTS = [
|
||||
@ -316,6 +321,62 @@
|
||||
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
|
||||
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js',
|
||||
'D3XWVpYgwhLLKNK4AGX', 'aZrQ1qWJ5yv5h',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js',
|
||||
'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/fc2a56a5/tv-player-ias.vflset/tv-player-ias.js',
|
||||
'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/a74bf670/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', 'hQP7k1hA22OrNTnq',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/6275f73c/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20c72c18/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/9fe2e06e/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '6r5ekNIiEMPutZy',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/680f8c75/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '0ml9caTwpa55Jf',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/14397202/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', 'ozZFAN21okDdJTa',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/5dcb2c1f/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', 'p7iTbRZDYAF',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/a10d7fcc/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '9Zue7DDHJSD',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8e20cb06/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '5-4tTneTROTpMzba',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/e12fbea4/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', 'XkeRfXIPOkSwfg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/ef259203/player_ias_tce.vflset/en_US/base.js',
|
||||
'rPqBC01nJpqhhi2iA2U', 'hY7dbiKFT51UIA',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
|
BIN
test/testdata/thumbnails/foo %d bar/foo_%d.webp
vendored
BIN
test/testdata/thumbnails/foo %d bar/foo_%d.webp
vendored
Binary file not shown.
Before Width: | Height: | Size: 3.8 KiB |
0
test/testdata/thumbnails/foo %d bar/placeholder
vendored
Normal file
0
test/testdata/thumbnails/foo %d bar/placeholder
vendored
Normal file
@ -36,6 +36,7 @@
|
||||
from .globals import (
|
||||
IN_CLI,
|
||||
LAZY_EXTRACTORS,
|
||||
WINDOWS_VT_MODE,
|
||||
plugin_ies,
|
||||
plugin_ies_overrides,
|
||||
plugin_pps,
|
||||
@ -52,7 +53,7 @@
|
||||
SSLError,
|
||||
network_exceptions,
|
||||
)
|
||||
from .networking.impersonate import ImpersonateRequestHandler
|
||||
from .networking.impersonate import ImpersonateRequestHandler, ImpersonateTarget
|
||||
from .plugins import directories as plugin_directories, load_all_plugins
|
||||
from .postprocessor import (
|
||||
EmbedThumbnailPP,
|
||||
@ -482,7 +483,8 @@ class YoutubeDL:
|
||||
The following options do not work when used through the API:
|
||||
filename, abort-on-error, multistreams, no-live-chat,
|
||||
format-sort, no-clean-infojson, no-playlist-metafiles,
|
||||
no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort.
|
||||
no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort,
|
||||
mtime-by-default.
|
||||
Refer __init__.py for their implementation
|
||||
progress_template: Dictionary of templates for progress outputs.
|
||||
Allowed keys are 'download', 'postprocess',
|
||||
@ -490,7 +492,7 @@ class YoutubeDL:
|
||||
The template is mapped on a dictionary with keys 'progress' and 'info'
|
||||
retry_sleep_functions: Dictionary of functions that takes the number of attempts
|
||||
as argument and returns the time to sleep in seconds.
|
||||
Allowed keys are 'http', 'fragment', 'file_access'
|
||||
Allowed keys are 'http', 'fragment', 'file_access', 'extractor'
|
||||
download_ranges: A callback function that gets called for every video with
|
||||
the signature (info_dict, ydl) -> Iterable[Section].
|
||||
Only the returned sections will be downloaded.
|
||||
@ -528,6 +530,7 @@ class YoutubeDL:
|
||||
discontinuities such as ad breaks (default: False)
|
||||
extractor_args: A dictionary of arguments to be passed to the extractors.
|
||||
See "EXTRACTOR ARGUMENTS" for details.
|
||||
Argument values must always be a list of string(s).
|
||||
E.g. {'youtube': {'skip': ['dash', 'hls']}}
|
||||
mark_watched: Mark videos watched (even with --simulate). Only for YouTube
|
||||
|
||||
@ -2194,7 +2197,7 @@ def _filter(f):
|
||||
return op(actual_value, comparison_value)
|
||||
return _filter
|
||||
|
||||
def _check_formats(self, formats):
|
||||
def _check_formats(self, formats, warning=True):
|
||||
for f in formats:
|
||||
working = f.get('__working')
|
||||
if working is not None:
|
||||
@ -2207,6 +2210,9 @@ def _check_formats(self, formats):
|
||||
continue
|
||||
temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
|
||||
temp_file.close()
|
||||
# If FragmentFD fails when testing a fragment, it will wrongly set a non-zero return code.
|
||||
# Save the actual return code for later. See https://github.com/yt-dlp/yt-dlp/issues/13750
|
||||
original_retcode = self._download_retcode
|
||||
try:
|
||||
success, _ = self.dl(temp_file.name, f, test=True)
|
||||
except (DownloadError, OSError, ValueError, *network_exceptions):
|
||||
@ -2217,11 +2223,18 @@ def _check_formats(self, formats):
|
||||
os.remove(temp_file.name)
|
||||
except OSError:
|
||||
self.report_warning(f'Unable to delete temporary file "{temp_file.name}"')
|
||||
# Restore the actual return code
|
||||
self._download_retcode = original_retcode
|
||||
f['__working'] = success
|
||||
if success:
|
||||
f.pop('__needs_testing', None)
|
||||
yield f
|
||||
else:
|
||||
self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id']))
|
||||
msg = f'Unable to download format {f["format_id"]}. Skipping...'
|
||||
if warning:
|
||||
self.report_warning(msg)
|
||||
else:
|
||||
self.to_screen(f'[info] {msg}')
|
||||
|
||||
def _select_formats(self, formats, selector):
|
||||
return list(selector({
|
||||
@ -2947,7 +2960,7 @@ def is_wellformed(f):
|
||||
)
|
||||
|
||||
if self.params.get('check_formats') is True:
|
||||
formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
|
||||
formats = LazyList(self._check_formats(formats[::-1], warning=False), reverse=True)
|
||||
|
||||
if not formats or formats[0] is not info_dict:
|
||||
# only set the 'formats' fields if the original info_dict list them
|
||||
@ -3220,6 +3233,7 @@ def dl(self, name, info, subtitle=False, test=False):
|
||||
}
|
||||
else:
|
||||
params = self.params
|
||||
|
||||
fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
|
||||
if not test:
|
||||
for ph in self._progress_hooks:
|
||||
@ -3695,6 +3709,8 @@ def filter_fn(obj):
|
||||
return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
|
||||
elif isinstance(obj, (list, tuple, set, LazyList)):
|
||||
return list(map(filter_fn, obj))
|
||||
elif isinstance(obj, ImpersonateTarget):
|
||||
return str(obj)
|
||||
elif obj is None or isinstance(obj, (str, int, float, bool)):
|
||||
return obj
|
||||
else:
|
||||
@ -3963,6 +3979,7 @@ def simplified_codec(f, field):
|
||||
self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
|
||||
(self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
|
||||
else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
|
||||
self._format_out('Untested', self.Styles.WARNING) if f.get('__needs_testing') else None,
|
||||
format_field(f, 'format_note'),
|
||||
format_field(f, 'container', ignore=(None, f.get('ext'))),
|
||||
delim=', '), delim=' '),
|
||||
@ -4024,8 +4041,7 @@ def get_encoding(stream):
|
||||
if os.environ.get('TERM', '').lower() == 'dumb':
|
||||
additional_info.append('dumb')
|
||||
if not supports_terminal_sequences(stream):
|
||||
from .utils import WINDOWS_VT_MODE # Must be imported locally
|
||||
additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
|
||||
additional_info.append('No VT' if WINDOWS_VT_MODE.value is False else 'No ANSI')
|
||||
if additional_info:
|
||||
ret = f'{ret} ({",".join(additional_info)})'
|
||||
return ret
|
||||
@ -4171,6 +4187,31 @@ def _impersonate_target_available(self, target):
|
||||
for rh in self._request_director.handlers.values()
|
||||
if isinstance(rh, ImpersonateRequestHandler))
|
||||
|
||||
def _parse_impersonate_targets(self, impersonate):
|
||||
if impersonate in (True, ''):
|
||||
impersonate = ImpersonateTarget()
|
||||
|
||||
requested_targets = [
|
||||
t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t)
|
||||
for t in variadic(impersonate)
|
||||
] if impersonate else []
|
||||
|
||||
available_target = next(filter(self._impersonate_target_available, requested_targets), None)
|
||||
|
||||
return available_target, requested_targets
|
||||
|
||||
@staticmethod
|
||||
def _unavailable_targets_message(requested_targets, note=None, is_error=False):
|
||||
note = note or 'The extractor specified to use impersonation for this download'
|
||||
specific_targets = ', '.join(filter(None, map(str, requested_targets)))
|
||||
message = (
|
||||
'no impersonate target is available' if not specific_targets
|
||||
else f'none of these impersonate targets are available: {specific_targets}')
|
||||
return (
|
||||
f'{note}, but {message}. {"See" if is_error else "If you encounter errors, then see"}'
|
||||
f' https://github.com/yt-dlp/yt-dlp#impersonation '
|
||||
f'for information on installing the required dependencies')
|
||||
|
||||
def urlopen(self, req):
|
||||
""" Start an HTTP download """
|
||||
if isinstance(req, str):
|
||||
|
@ -159,6 +159,12 @@ def set_default_compat(compat_name, opt_name, default=True, remove_compat=True):
|
||||
elif 'prefer-vp9-sort' in opts.compat_opts:
|
||||
opts.format_sort.extend(FormatSorter._prefer_vp9_sort)
|
||||
|
||||
if 'mtime-by-default' in opts.compat_opts:
|
||||
if opts.updatetime is None:
|
||||
opts.updatetime = True
|
||||
else:
|
||||
_unused_compat_opt('mtime-by-default')
|
||||
|
||||
_video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False)
|
||||
_audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False)
|
||||
if _video_multistreams_set is False and _audio_multistreams_set is False:
|
||||
|
@ -435,7 +435,7 @@ def sub_bytes_inv(data):
|
||||
|
||||
|
||||
def rotate(data):
|
||||
return data[1:] + [data[0]]
|
||||
return [*data[1:], data[0]]
|
||||
|
||||
|
||||
def key_schedule_core(data, rcon_iteration):
|
||||
|
@ -37,7 +37,7 @@
|
||||
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
|
||||
from ..networking.exceptions import HTTPError as compat_HTTPError
|
||||
|
||||
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
|
||||
passthrough_module(__name__, '...utils', ('windows_enable_vt_mode',))
|
||||
|
||||
|
||||
# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE
|
||||
|
@ -764,11 +764,11 @@ def _get_linux_desktop_environment(env, logger):
|
||||
GetDesktopEnvironment
|
||||
"""
|
||||
xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
|
||||
desktop_session = env.get('DESKTOP_SESSION', None)
|
||||
desktop_session = env.get('DESKTOP_SESSION', '')
|
||||
if xdg_current_desktop is not None:
|
||||
for part in map(str.strip, xdg_current_desktop.split(':')):
|
||||
if part == 'Unity':
|
||||
if desktop_session is not None and 'gnome-fallback' in desktop_session:
|
||||
if 'gnome-fallback' in desktop_session:
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.UNITY
|
||||
@ -797,35 +797,34 @@ def _get_linux_desktop_environment(env, logger):
|
||||
return _LinuxDesktopEnvironment.UKUI
|
||||
elif part == 'LXQt':
|
||||
return _LinuxDesktopEnvironment.LXQT
|
||||
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
|
||||
logger.debug(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
|
||||
|
||||
elif desktop_session is not None:
|
||||
if desktop_session == 'deepin':
|
||||
return _LinuxDesktopEnvironment.DEEPIN
|
||||
elif desktop_session in ('mate', 'gnome'):
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif desktop_session in ('kde4', 'kde-plasma'):
|
||||
if desktop_session == 'deepin':
|
||||
return _LinuxDesktopEnvironment.DEEPIN
|
||||
elif desktop_session in ('mate', 'gnome'):
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif desktop_session in ('kde4', 'kde-plasma'):
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
elif desktop_session == 'kde':
|
||||
if 'KDE_SESSION_VERSION' in env:
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
elif desktop_session == 'kde':
|
||||
if 'KDE_SESSION_VERSION' in env:
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.KDE3
|
||||
elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
|
||||
return _LinuxDesktopEnvironment.XFCE
|
||||
elif desktop_session == 'ukui':
|
||||
return _LinuxDesktopEnvironment.UKUI
|
||||
else:
|
||||
logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
|
||||
|
||||
return _LinuxDesktopEnvironment.KDE3
|
||||
elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
|
||||
return _LinuxDesktopEnvironment.XFCE
|
||||
elif desktop_session == 'ukui':
|
||||
return _LinuxDesktopEnvironment.UKUI
|
||||
else:
|
||||
if 'GNOME_DESKTOP_SESSION_ID' in env:
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif 'KDE_FULL_SESSION' in env:
|
||||
if 'KDE_SESSION_VERSION' in env:
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.KDE3
|
||||
logger.debug(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
|
||||
|
||||
if 'GNOME_DESKTOP_SESSION_ID' in env:
|
||||
return _LinuxDesktopEnvironment.GNOME
|
||||
elif 'KDE_FULL_SESSION' in env:
|
||||
if 'KDE_SESSION_VERSION' in env:
|
||||
return _LinuxDesktopEnvironment.KDE4
|
||||
else:
|
||||
return _LinuxDesktopEnvironment.KDE3
|
||||
|
||||
return _LinuxDesktopEnvironment.OTHER
|
||||
|
||||
|
||||
|
@ -99,7 +99,7 @@ def _get_suitable_downloader(info_dict, protocol, params, default):
|
||||
if external_downloader is None:
|
||||
if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params):
|
||||
return FFmpegFD
|
||||
elif external_downloader.lower() != 'native':
|
||||
elif external_downloader.lower() != 'native' and info_dict.get('impersonate') is None:
|
||||
ed = get_external_downloader(external_downloader)
|
||||
if ed.can_download(info_dict, external_downloader):
|
||||
return ed
|
||||
|
@ -495,3 +495,14 @@ def _debug_cmd(self, args, exe=None):
|
||||
exe = os.path.basename(args[0])
|
||||
|
||||
self.write_debug(f'{exe} command line: {shell_quote(args)}')
|
||||
|
||||
def _get_impersonate_target(self, info_dict):
|
||||
impersonate = info_dict.get('impersonate')
|
||||
if impersonate is None:
|
||||
return None
|
||||
available_target, requested_targets = self.ydl._parse_impersonate_targets(impersonate)
|
||||
if available_target:
|
||||
return available_target
|
||||
elif requested_targets:
|
||||
self.report_warning(self.ydl._unavailable_targets_message(requested_targets))
|
||||
return None
|
||||
|
@ -572,7 +572,21 @@ def _call_downloader(self, tmpfilename, info_dict):
|
||||
if end_time:
|
||||
args += ['-t', str(end_time - start_time)]
|
||||
|
||||
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']]
|
||||
url = fmt['url']
|
||||
if self.params.get('enable_file_urls') and url.startswith('file:'):
|
||||
# The default protocol_whitelist is 'file,crypto,data' when reading local m3u8 URLs,
|
||||
# so only local segments can be read unless we also include 'http,https,tcp,tls'
|
||||
args += ['-protocol_whitelist', 'file,crypto,data,http,https,tcp,tls']
|
||||
# ffmpeg incorrectly handles 'file:' URLs by only removing the
|
||||
# 'file:' prefix and treating the rest as if it's a normal filepath.
|
||||
# FFmpegPostProcessor also depends on this behavior, so we need to fixup the URLs:
|
||||
# - On Windows/Cygwin, replace 'file:///' and 'file://localhost/' with 'file:'
|
||||
# - On *nix, replace 'file://localhost/' with 'file:/'
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/13781
|
||||
# https://trac.ffmpeg.org/ticket/2702
|
||||
url = re.sub(r'^file://(?:localhost)?/', 'file:' if os.name == 'nt' else 'file:/', url)
|
||||
|
||||
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', url]
|
||||
|
||||
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
|
||||
args += ['-c', 'copy']
|
||||
|
@ -302,7 +302,7 @@ def _finish_frag_download(self, ctx, info_dict):
|
||||
elif to_file:
|
||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||
filetime = ctx.get('fragment_filetime')
|
||||
if self.params.get('updatetime', True) and filetime:
|
||||
if self.params.get('updatetime') and filetime:
|
||||
with contextlib.suppress(Exception):
|
||||
os.utime(ctx['filename'], (time.time(), filetime))
|
||||
|
||||
|
@ -94,12 +94,19 @@ def real_download(self, filename, info_dict):
|
||||
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
|
||||
if can_download:
|
||||
has_ffmpeg = FFmpegFD.available()
|
||||
no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s
|
||||
if no_crypto and has_ffmpeg:
|
||||
can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available'
|
||||
elif no_crypto:
|
||||
message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; '
|
||||
'Decryption will be performed natively, but will be extremely slow')
|
||||
if not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s:
|
||||
# Even if pycryptodomex isn't available, force HlsFD for m3u8s that won't work with ffmpeg
|
||||
ffmpeg_can_dl = not traverse_obj(info_dict, ((
|
||||
'extra_param_to_segment_url', 'extra_param_to_key_url',
|
||||
'hls_media_playlist_data', ('hls_aes', ('uri', 'key', 'iv')),
|
||||
), any))
|
||||
message = 'The stream has AES-128 encryption and {} available'.format(
|
||||
'neither ffmpeg nor pycryptodomex are' if ffmpeg_can_dl and not has_ffmpeg else
|
||||
'pycryptodomex is not')
|
||||
if has_ffmpeg and ffmpeg_can_dl:
|
||||
can_download = False
|
||||
else:
|
||||
message += '; decryption will be performed natively, but will be extremely slow'
|
||||
elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s):
|
||||
install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and '
|
||||
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
|
||||
|
@ -27,6 +27,10 @@ class HttpFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
url = info_dict['url']
|
||||
request_data = info_dict.get('request_data', None)
|
||||
request_extensions = {}
|
||||
impersonate_target = self._get_impersonate_target(info_dict)
|
||||
if impersonate_target is not None:
|
||||
request_extensions['impersonate'] = impersonate_target
|
||||
|
||||
class DownloadContext(dict):
|
||||
__getattr__ = dict.get
|
||||
@ -109,7 +113,7 @@ def establish_connection():
|
||||
if try_call(lambda: range_end >= ctx.content_len):
|
||||
range_end = ctx.content_len - 1
|
||||
|
||||
request = Request(url, request_data, headers)
|
||||
request = Request(url, request_data, headers, extensions=request_extensions)
|
||||
has_range = range_start is not None
|
||||
if has_range:
|
||||
request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}'
|
||||
@ -348,7 +352,7 @@ def retry(e):
|
||||
self.try_rename(ctx.tmpfilename, ctx.filename)
|
||||
|
||||
# Update file modification time
|
||||
if self.params.get('updatetime', True):
|
||||
if self.params.get('updatetime'):
|
||||
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None))
|
||||
|
||||
self._hook_progress({
|
||||
|
@ -5,47 +5,46 @@
|
||||
from .common import FileDownloader
|
||||
from .external import FFmpegFD
|
||||
from ..networking import Request
|
||||
from ..utils import DownloadError, str_or_none, try_get
|
||||
from ..networking.websocket import WebSocketResponse
|
||||
from ..utils import DownloadError, str_or_none, truncate_string
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NiconicoLiveFD(FileDownloader):
|
||||
""" Downloads niconico live without being stopped """
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
video_id = info_dict['video_id']
|
||||
ws_url = info_dict['url']
|
||||
ws_extractor = info_dict['ws']
|
||||
ws_origin_host = info_dict['origin']
|
||||
live_quality = info_dict.get('live_quality', 'high')
|
||||
live_latency = info_dict.get('live_latency', 'high')
|
||||
video_id = info_dict['id']
|
||||
opts = info_dict['downloader_options']
|
||||
quality, ws_extractor, ws_url = opts['max_quality'], opts['ws'], opts['ws_url']
|
||||
dl = FFmpegFD(self.ydl, self.params or {})
|
||||
|
||||
new_info_dict = info_dict.copy()
|
||||
new_info_dict.update({
|
||||
'protocol': 'm3u8',
|
||||
})
|
||||
new_info_dict['protocol'] = 'm3u8'
|
||||
|
||||
def communicate_ws(reconnect):
|
||||
if reconnect:
|
||||
ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'}))
|
||||
# Support --load-info-json as if it is a reconnect attempt
|
||||
if reconnect or not isinstance(ws_extractor, WebSocketResponse):
|
||||
ws = self.ydl.urlopen(Request(
|
||||
ws_url, headers={'Origin': 'https://live.nicovideo.jp'}))
|
||||
if self.ydl.params.get('verbose', False):
|
||||
self.to_screen('[debug] Sending startWatching request')
|
||||
self.write_debug('Sending startWatching request')
|
||||
ws.send(json.dumps({
|
||||
'type': 'startWatching',
|
||||
'data': {
|
||||
'reconnect': True,
|
||||
'room': {
|
||||
'commentable': True,
|
||||
'protocol': 'webSocket',
|
||||
},
|
||||
'stream': {
|
||||
'quality': live_quality,
|
||||
'protocol': 'hls+fmp4',
|
||||
'latency': live_latency,
|
||||
'accessRightMethod': 'single_cookie',
|
||||
'chasePlay': False,
|
||||
'latency': 'high',
|
||||
'protocol': 'hls',
|
||||
'quality': quality,
|
||||
},
|
||||
'room': {
|
||||
'protocol': 'webSocket',
|
||||
'commentable': True,
|
||||
},
|
||||
'reconnect': True,
|
||||
},
|
||||
'type': 'startWatching',
|
||||
}))
|
||||
else:
|
||||
ws = ws_extractor
|
||||
@ -58,7 +57,6 @@ def communicate_ws(reconnect):
|
||||
if not data or not isinstance(data, dict):
|
||||
continue
|
||||
if data.get('type') == 'ping':
|
||||
# pong back
|
||||
ws.send(r'{"type":"pong"}')
|
||||
ws.send(r'{"type":"keepSeat"}')
|
||||
elif data.get('type') == 'disconnect':
|
||||
@ -66,12 +64,10 @@ def communicate_ws(reconnect):
|
||||
return True
|
||||
elif data.get('type') == 'error':
|
||||
self.write_debug(data)
|
||||
message = try_get(data, lambda x: x['body']['code'], str) or recv
|
||||
message = traverse_obj(data, ('body', 'code', {str_or_none}), default=recv)
|
||||
return DownloadError(message)
|
||||
elif self.ydl.params.get('verbose', False):
|
||||
if len(recv) > 100:
|
||||
recv = recv[:100] + '...'
|
||||
self.to_screen(f'[debug] Server said: {recv}')
|
||||
self.write_debug(f'Server response: {truncate_string(recv, 100)}')
|
||||
|
||||
def ws_main():
|
||||
reconnect = False
|
||||
@ -81,7 +77,8 @@ def ws_main():
|
||||
if ret is True:
|
||||
return
|
||||
except BaseException as e:
|
||||
self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e)))
|
||||
self.to_screen(
|
||||
f'[niconico:live] {video_id}: Connection error occured, reconnecting after 10 seconds: {e}')
|
||||
time.sleep(10)
|
||||
continue
|
||||
finally:
|
||||
|
@ -201,7 +201,6 @@
|
||||
BanByeChannelIE,
|
||||
BanByeIE,
|
||||
)
|
||||
from .bandaichannel import BandaiChannelIE
|
||||
from .bandcamp import (
|
||||
BandcampAlbumIE,
|
||||
BandcampIE,
|
||||
@ -229,7 +228,6 @@
|
||||
from .beatport import BeatportIE
|
||||
from .beeg import BeegIE
|
||||
from .behindkink import BehindKinkIE
|
||||
from .bellmedia import BellMediaIE
|
||||
from .berufetv import BerufeTVIE
|
||||
from .bet import BetIE
|
||||
from .bfi import BFIPlayerIE
|
||||
@ -275,7 +273,10 @@
|
||||
BitChuteChannelIE,
|
||||
BitChuteIE,
|
||||
)
|
||||
from .blackboardcollaborate import BlackboardCollaborateIE
|
||||
from .blackboardcollaborate import (
|
||||
BlackboardCollaborateIE,
|
||||
BlackboardCollaborateLaunchIE,
|
||||
)
|
||||
from .bleacherreport import (
|
||||
BleacherReportCMSIE,
|
||||
BleacherReportIE,
|
||||
@ -300,7 +301,6 @@
|
||||
BrainPOPIlIE,
|
||||
BrainPOPJrIE,
|
||||
)
|
||||
from .bravotv import BravoTVIE
|
||||
from .breitbart import BreitBartIE
|
||||
from .brightcove import (
|
||||
BrightcoveLegacyIE,
|
||||
@ -310,6 +310,7 @@
|
||||
BrilliantpalaClassesIE,
|
||||
BrilliantpalaElearnIE,
|
||||
)
|
||||
from .btvplus import BTVPlusIE
|
||||
from .bundesliga import BundesligaIE
|
||||
from .bundestag import BundestagIE
|
||||
from .bunnycdn import BunnyCdnIE
|
||||
@ -447,7 +448,6 @@
|
||||
CSpanIE,
|
||||
)
|
||||
from .ctsnews import CtsNewsIE
|
||||
from .ctv import CTVIE
|
||||
from .ctvnews import CTVNewsIE
|
||||
from .cultureunplugged import CultureUnpluggedIE
|
||||
from .curiositystream import (
|
||||
@ -640,6 +640,7 @@
|
||||
FancodeVodIE,
|
||||
)
|
||||
from .fathom import FathomIE
|
||||
from .faulio import FaulioLiveIE
|
||||
from .faz import FazIE
|
||||
from .fc2 import (
|
||||
FC2IE,
|
||||
@ -806,9 +807,7 @@
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .hotstar import (
|
||||
HotStarIE,
|
||||
HotStarPlaylistIE,
|
||||
HotStarPrefixIE,
|
||||
HotStarSeasonIE,
|
||||
HotStarSeriesIE,
|
||||
)
|
||||
from .hrefli import HrefLiRedirectIE
|
||||
@ -922,10 +921,6 @@
|
||||
ShugiinItvVodIE,
|
||||
)
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jiocinema import (
|
||||
JioCinemaIE,
|
||||
JioCinemaSeriesIE,
|
||||
)
|
||||
from .jiosaavn import (
|
||||
JioSaavnAlbumIE,
|
||||
JioSaavnArtistIE,
|
||||
@ -935,7 +930,6 @@
|
||||
JioSaavnSongIE,
|
||||
)
|
||||
from .joj import JojIE
|
||||
from .joqrag import JoqrAgIE
|
||||
from .jove import JoveIE
|
||||
from .jstream import JStreamIE
|
||||
from .jtbc import (
|
||||
@ -1038,11 +1032,6 @@
|
||||
LikeeIE,
|
||||
LikeeUserIE,
|
||||
)
|
||||
from .limelight import (
|
||||
LimelightChannelIE,
|
||||
LimelightChannelListIE,
|
||||
LimelightMediaIE,
|
||||
)
|
||||
from .linkedin import (
|
||||
LinkedInEventsIE,
|
||||
LinkedInIE,
|
||||
@ -1108,6 +1097,7 @@
|
||||
from .massengeschmacktv import MassengeschmackTVIE
|
||||
from .masters import MastersIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mave import MaveIE
|
||||
from .mbn import MBNIE
|
||||
from .mdr import MDRIE
|
||||
from .medaltv import MedalTVIE
|
||||
@ -1153,6 +1143,7 @@
|
||||
MindsIE,
|
||||
)
|
||||
from .minoto import MinotoIE
|
||||
from .mir24tv import Mir24TvIE
|
||||
from .mirrativ import (
|
||||
MirrativIE,
|
||||
MirrativUserIE,
|
||||
@ -1173,6 +1164,10 @@
|
||||
MixcloudPlaylistIE,
|
||||
MixcloudUserIE,
|
||||
)
|
||||
from .mixlr import (
|
||||
MixlrIE,
|
||||
MixlrRecoringIE,
|
||||
)
|
||||
from .mlb import (
|
||||
MLBIE,
|
||||
MLBTVIE,
|
||||
@ -1262,6 +1257,7 @@
|
||||
)
|
||||
from .nbc import (
|
||||
NBCIE,
|
||||
BravoTVIE,
|
||||
NBCNewsIE,
|
||||
NBCOlympicsIE,
|
||||
NBCOlympicsStreamIE,
|
||||
@ -1269,6 +1265,7 @@
|
||||
NBCSportsStreamIE,
|
||||
NBCSportsVPlayerIE,
|
||||
NBCStationsIE,
|
||||
SyfyIE,
|
||||
)
|
||||
from .ndr import (
|
||||
NDRIE,
|
||||
@ -1381,7 +1378,6 @@
|
||||
from .noice import NoicePodcastIE
|
||||
from .nonktube import NonkTubeIE
|
||||
from .noodlemagazine import NoodleMagazineIE
|
||||
from .noovo import NoovoIE
|
||||
from .nosnl import NOSNLArticleIE
|
||||
from .nova import (
|
||||
NovaEmbedIE,
|
||||
@ -1562,6 +1558,7 @@
|
||||
PlatziCourseIE,
|
||||
PlatziIE,
|
||||
)
|
||||
from .playerfm import PlayerFmIE
|
||||
from .playplustv import PlayPlusTVIE
|
||||
from .playsuisse import PlaySuisseIE
|
||||
from .playtvak import PlaytvakIE
|
||||
@ -1572,6 +1569,7 @@
|
||||
)
|
||||
from .plutotv import PlutoTVIE
|
||||
from .plvideo import PlVideoIE
|
||||
from .plyr import PlyrEmbedIE
|
||||
from .podbayfm import (
|
||||
PodbayFMChannelIE,
|
||||
PodbayFMIE,
|
||||
@ -1828,6 +1826,7 @@
|
||||
from .saitosan import SaitosanIE
|
||||
from .samplefocus import SampleFocusIE
|
||||
from .sapo import SapoIE
|
||||
from .sauceplus import SaucePlusIE
|
||||
from .sbs import SBSIE
|
||||
from .sbscokr import (
|
||||
SBSCoKrAllvodProgramIE,
|
||||
@ -2016,13 +2015,11 @@
|
||||
SverigesRadioPublicationIE,
|
||||
)
|
||||
from .svt import (
|
||||
SVTIE,
|
||||
SVTPageIE,
|
||||
SVTPlayIE,
|
||||
SVTSeriesIE,
|
||||
)
|
||||
from .swearnet import SwearnetEpisodeIE
|
||||
from .syfy import SyfyIE
|
||||
from .syvdk import SYVDKIE
|
||||
from .sztvhu import SztvHuIE
|
||||
from .tagesschau import TagesschauIE
|
||||
@ -2101,6 +2098,7 @@
|
||||
TheGuardianPodcastIE,
|
||||
TheGuardianPodcastPlaylistIE,
|
||||
)
|
||||
from .thehighwire import TheHighWireIE
|
||||
from .theholetv import TheHoleTvIE
|
||||
from .theintercept import TheInterceptIE
|
||||
from .theplatform import (
|
||||
@ -2147,6 +2145,7 @@
|
||||
from .toggo import ToggoIE
|
||||
from .tonline import TOnlineIE
|
||||
from .toongoggles import ToonGogglesIE
|
||||
from .toutiao import ToutiaoIE
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import (
|
||||
ToypicsIE,
|
||||
@ -2169,7 +2168,6 @@
|
||||
from .trueid import TrueIDIE
|
||||
from .trunews import TruNewsIE
|
||||
from .truth import TruthIE
|
||||
from .trutv import TruTVIE
|
||||
from .tube8 import Tube8IE
|
||||
from .tubetugraz import (
|
||||
TubeTuGrazIE,
|
||||
@ -2288,6 +2286,7 @@
|
||||
)
|
||||
from .umg import UMGDeIE
|
||||
from .unistra import UnistraIE
|
||||
from .unitednations import UnitedNationsWebTvIE
|
||||
from .unity import UnityIE
|
||||
from .unsupported import (
|
||||
KnownDRMIE,
|
||||
@ -2369,6 +2368,7 @@
|
||||
VHXEmbedIE,
|
||||
VimeoAlbumIE,
|
||||
VimeoChannelIE,
|
||||
VimeoEventIE,
|
||||
VimeoGroupsIE,
|
||||
VimeoIE,
|
||||
VimeoLikesIE,
|
||||
|
@ -3,6 +3,7 @@
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
import uuid
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -10,6 +11,7 @@
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
ExtractorError,
|
||||
parse_qs,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
@ -45,6 +47,7 @@
|
||||
'name': 'Comcast XFINITY',
|
||||
'username_field': 'user',
|
||||
'password_field': 'passwd',
|
||||
'login_hostname': 'login.xfinity.com',
|
||||
},
|
||||
'TWC': {
|
||||
'name': 'Time Warner Cable | Spectrum',
|
||||
@ -74,6 +77,12 @@
|
||||
'name': 'Verizon FiOS',
|
||||
'username_field': 'IDToken1',
|
||||
'password_field': 'IDToken2',
|
||||
'login_hostname': 'ssoauth.verizon.com',
|
||||
},
|
||||
'Fubo': {
|
||||
'name': 'Fubo',
|
||||
'username_field': 'username',
|
||||
'password_field': 'password',
|
||||
},
|
||||
'Cablevision': {
|
||||
'name': 'Optimum/Cablevision',
|
||||
@ -1338,6 +1347,7 @@
|
||||
'name': 'Sling TV',
|
||||
'username_field': 'username',
|
||||
'password_field': 'password',
|
||||
'login_hostname': 'identity.sling.com',
|
||||
},
|
||||
'Suddenlink': {
|
||||
'name': 'Suddenlink',
|
||||
@ -1355,7 +1365,6 @@
|
||||
class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
|
||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||
_MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0'
|
||||
_MVPD_CACHE = 'ap-mvpd'
|
||||
|
||||
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
|
||||
@ -1367,6 +1376,11 @@ def _download_webpage_handle(self, *args, **kwargs):
|
||||
return super()._download_webpage_handle(
|
||||
*args, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _get_mso_headers(mso_info):
|
||||
# Not needed currently
|
||||
return {}
|
||||
|
||||
@staticmethod
|
||||
def _get_mvpd_resource(provider_id, title, guid, rating):
|
||||
channel = etree.Element('channel')
|
||||
@ -1382,7 +1396,13 @@ def _get_mvpd_resource(provider_id, title, guid, rating):
|
||||
resource_rating.text = rating
|
||||
return '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">' + etree.tostring(channel).decode() + '</rss>'
|
||||
|
||||
def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
|
||||
def _extract_mvpd_auth(self, url, video_id, requestor_id, resource, software_statement):
|
||||
mso_id = self.get_param('ap_mso')
|
||||
if mso_id:
|
||||
mso_info = MSO_INFO[mso_id]
|
||||
else:
|
||||
mso_info = {}
|
||||
|
||||
def xml_text(xml_str, tag):
|
||||
return self._search_regex(
|
||||
f'<{tag}>(.+?)</{tag}>', xml_str, tag)
|
||||
@ -1391,15 +1411,27 @@ def is_expired(token, date_ele):
|
||||
token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele)))
|
||||
return token_expires and token_expires <= int(time.time())
|
||||
|
||||
def post_form(form_page_res, note, data={}):
|
||||
def post_form(form_page_res, note, data={}, validate_url=False):
|
||||
form_page, urlh = form_page_res
|
||||
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
|
||||
if not re.match(r'https?://', post_url):
|
||||
post_url = urllib.parse.urljoin(urlh.url, post_url)
|
||||
if validate_url:
|
||||
# This request is submitting credentials so we should validate it when possible
|
||||
url_parsed = urllib.parse.urlparse(post_url)
|
||||
expected_hostname = mso_info.get('login_hostname')
|
||||
if expected_hostname and expected_hostname != url_parsed.hostname:
|
||||
raise ExtractorError(
|
||||
f'Unexpected login URL hostname; expected "{expected_hostname}" but got '
|
||||
f'"{url_parsed.hostname}". Aborting before submitting credentials')
|
||||
if url_parsed.scheme != 'https':
|
||||
self.write_debug('Upgrading login URL scheme to https')
|
||||
post_url = urllib.parse.urlunparse(url_parsed._replace(scheme='https'))
|
||||
form_data = self._hidden_inputs(form_page)
|
||||
form_data.update(data)
|
||||
return self._download_webpage_handle(
|
||||
post_url, video_id, note, data=urlencode_postdata(form_data), headers={
|
||||
**self._get_mso_headers(mso_info),
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
|
||||
@ -1432,40 +1464,72 @@ def extract_redirect_url(html, url=None, fatal=False):
|
||||
}
|
||||
|
||||
guid = xml_text(resource, 'guid') if '<' in resource else resource
|
||||
count = 0
|
||||
while count < 2:
|
||||
for _ in range(2):
|
||||
requestor_info = self.cache.load(self._MVPD_CACHE, requestor_id) or {}
|
||||
authn_token = requestor_info.get('authn_token')
|
||||
if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
|
||||
authn_token = None
|
||||
if not authn_token:
|
||||
mso_id = self.get_param('ap_mso')
|
||||
if mso_id:
|
||||
username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
|
||||
if not username or not password:
|
||||
raise_mvpd_required()
|
||||
mso_info = MSO_INFO[mso_id]
|
||||
|
||||
provider_redirect_page_res = self._download_webpage_handle(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
|
||||
'Downloading Provider Redirect Page', query={
|
||||
'noflash': 'true',
|
||||
'mso_id': mso_id,
|
||||
'requestor_id': requestor_id,
|
||||
'no_iframe': 'false',
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
}, headers={
|
||||
# yt-dlp's default user-agent is usually too old for Comcast_SSO
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/10848
|
||||
'User-Agent': self._MODERN_USER_AGENT,
|
||||
} if mso_id == 'Comcast_SSO' else None)
|
||||
elif not self._cookies_passed:
|
||||
if not mso_id:
|
||||
raise_mvpd_required()
|
||||
username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
|
||||
if not username or not password:
|
||||
raise_mvpd_required()
|
||||
|
||||
if not mso_id:
|
||||
pass
|
||||
elif mso_id == 'Comcast_SSO':
|
||||
device_info, urlh = self._download_json_handle(
|
||||
'https://sp.auth.adobe.com/indiv/devices',
|
||||
video_id, 'Registering device with Adobe',
|
||||
data=json.dumps({'fingerprint': uuid.uuid4().hex}).encode(),
|
||||
headers={'Content-Type': 'application/json; charset=UTF-8'})
|
||||
|
||||
device_id = device_info['deviceId']
|
||||
mvpd_headers['pass_sfp'] = urlh.get_header('pass_sfp')
|
||||
mvpd_headers['Ap_21'] = device_id
|
||||
|
||||
registration = self._download_json(
|
||||
'https://sp.auth.adobe.com/o/client/register',
|
||||
video_id, 'Registering client with Adobe',
|
||||
data=json.dumps({'software_statement': software_statement}).encode(),
|
||||
headers={'Content-Type': 'application/json; charset=UTF-8'})
|
||||
|
||||
access_token = self._download_json(
|
||||
'https://sp.auth.adobe.com/o/client/token', video_id,
|
||||
'Obtaining access token', data=urlencode_postdata({
|
||||
'grant_type': 'client_credentials',
|
||||
'client_id': registration['client_id'],
|
||||
'client_secret': registration['client_secret'],
|
||||
}),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
})['access_token']
|
||||
mvpd_headers['Authorization'] = f'Bearer {access_token}'
|
||||
|
||||
reg_code = self._download_json(
|
||||
f'https://sp.auth.adobe.com/reggie/v1/{requestor_id}/regcode',
|
||||
video_id, 'Obtaining registration code',
|
||||
data=urlencode_postdata({
|
||||
'requestor': requestor_id,
|
||||
'deviceId': device_id,
|
||||
'format': 'json',
|
||||
}),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Authorization': f'Bearer {access_token}',
|
||||
})['code']
|
||||
|
||||
provider_redirect_page_res = self._download_webpage_handle(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
|
||||
'Downloading Provider Redirect Page', query={
|
||||
'noflash': 'true',
|
||||
'mso_id': mso_id,
|
||||
'requestor_id': requestor_id,
|
||||
'no_iframe': 'false',
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
'reg_code': reg_code,
|
||||
}, headers=self._get_mso_headers(mso_info))
|
||||
|
||||
if mso_id == 'Comcast_SSO':
|
||||
# Comcast page flow varies by video site and whether you
|
||||
# are on Comcast's network.
|
||||
provider_redirect_page, urlh = provider_redirect_page_res
|
||||
@ -1489,8 +1553,8 @@ def extract_redirect_url(html, url=None, fatal=False):
|
||||
oauth_redirect_url = extract_redirect_url(
|
||||
provider_redirect_page, fatal=True)
|
||||
provider_login_page_res = self._download_webpage_handle(
|
||||
oauth_redirect_url, video_id,
|
||||
self._DOWNLOADING_LOGIN_PAGE)
|
||||
oauth_redirect_url, video_id, self._DOWNLOADING_LOGIN_PAGE,
|
||||
headers=self._get_mso_headers(mso_info))
|
||||
else:
|
||||
provider_login_page_res = post_form(
|
||||
provider_redirect_page_res,
|
||||
@ -1500,24 +1564,35 @@ def extract_redirect_url(html, url=None, fatal=False):
|
||||
provider_login_page_res, 'Logging in', {
|
||||
mso_info['username_field']: username,
|
||||
mso_info['password_field']: password,
|
||||
})
|
||||
}, validate_url=True)
|
||||
mvpd_confirm_page, urlh = mvpd_confirm_page_res
|
||||
if '<button class="submit" value="Resume">Resume</button>' in mvpd_confirm_page:
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
elif mso_id == 'Philo':
|
||||
# Philo has very unique authentication method
|
||||
self._download_webpage(
|
||||
'https://idp.philo.com/auth/init/login_code', video_id, 'Requesting auth code', data=urlencode_postdata({
|
||||
self._request_webpage(
|
||||
'https://idp.philo.com/auth/init/login_code', video_id,
|
||||
'Requesting Philo auth code', data=json.dumps({
|
||||
'ident': username,
|
||||
'device': 'web',
|
||||
'send_confirm_link': False,
|
||||
'send_token': True,
|
||||
}))
|
||||
'device_ident': f'web-{uuid.uuid4().hex}',
|
||||
'include_login_link': True,
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json',
|
||||
})
|
||||
|
||||
philo_code = getpass.getpass('Type auth code you have received [Return]: ')
|
||||
self._download_webpage(
|
||||
'https://idp.philo.com/auth/update/login_code', video_id, 'Submitting token', data=urlencode_postdata({
|
||||
'token': philo_code,
|
||||
}))
|
||||
self._request_webpage(
|
||||
'https://idp.philo.com/auth/update/login_code', video_id,
|
||||
'Submitting token', data=json.dumps({'token': philo_code}).encode(),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json',
|
||||
})
|
||||
|
||||
mvpd_confirm_page_res = self._download_webpage_handle('https://idp.philo.com/idp/submit', video_id, 'Confirming Philo Login')
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
elif mso_id == 'Verizon':
|
||||
@ -1539,7 +1614,7 @@ def extract_redirect_url(html, url=None, fatal=False):
|
||||
provider_redirect_page_res, 'Logging in', {
|
||||
mso_info['username_field']: username,
|
||||
mso_info['password_field']: password,
|
||||
})
|
||||
}, validate_url=True)
|
||||
saml_login_page, urlh = saml_login_page_res
|
||||
if 'Please try again.' in saml_login_page:
|
||||
raise ExtractorError(
|
||||
@ -1560,7 +1635,7 @@ def extract_redirect_url(html, url=None, fatal=False):
|
||||
[saml_login_page, saml_redirect_url], 'Logging in', {
|
||||
mso_info['username_field']: username,
|
||||
mso_info['password_field']: password,
|
||||
})
|
||||
}, validate_url=True)
|
||||
if 'Please try again.' in saml_login_page:
|
||||
raise ExtractorError(
|
||||
'Failed to login, incorrect User ID or Password.')
|
||||
@ -1631,7 +1706,7 @@ def extract_redirect_url(html, url=None, fatal=False):
|
||||
provider_login_page_res, 'Logging in', {
|
||||
mso_info['username_field']: username,
|
||||
mso_info['password_field']: password,
|
||||
})
|
||||
}, validate_url=True)
|
||||
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_association_redirect, url=urlh.url)
|
||||
@ -1682,7 +1757,7 @@ def extract_redirect_url(html, url=None, fatal=False):
|
||||
provider_login_page_res, 'Logging in', {
|
||||
mso_info['username_field']: username,
|
||||
mso_info['password_field']: password,
|
||||
})
|
||||
}, validate_url=True)
|
||||
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_association_redirect, url=urlh.url)
|
||||
@ -1699,6 +1774,27 @@ def extract_redirect_url(html, url=None, fatal=False):
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
elif mso_id == 'Fubo':
|
||||
_, urlh = provider_redirect_page_res
|
||||
|
||||
fubo_response = self._download_json(
|
||||
'https://api.fubo.tv/partners/tve/connect', video_id,
|
||||
'Authenticating with Fubo', 'Unable to authenticate with Fubo',
|
||||
query=parse_qs(urlh.url), data=json.dumps({
|
||||
'username': username,
|
||||
'password': password,
|
||||
}).encode(), headers={
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
self._request_webpage(
|
||||
'https://sp.auth.adobe.com/adobe-services/oauth2', video_id,
|
||||
'Authenticating with Adobe', 'Failed to authenticate with Adobe',
|
||||
query={
|
||||
'code': fubo_response['code'],
|
||||
'state': fubo_response['state'],
|
||||
})
|
||||
else:
|
||||
# Some providers (e.g. DIRECTV NOW) have another meta refresh
|
||||
# based redirect that should be followed.
|
||||
@ -1717,7 +1813,8 @@ def extract_redirect_url(html, url=None, fatal=False):
|
||||
}
|
||||
if mso_id in ('Cablevision', 'AlticeOne'):
|
||||
form_data['_eventId_proceed'] = ''
|
||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', form_data)
|
||||
mvpd_confirm_page_res = post_form(
|
||||
provider_login_page_res, 'Logging in', form_data, validate_url=True)
|
||||
if mso_id != 'Rogers':
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
|
||||
@ -1727,6 +1824,7 @@ def extract_redirect_url(html, url=None, fatal=False):
|
||||
'Retrieving Session', data=urlencode_postdata({
|
||||
'_method': 'GET',
|
||||
'requestor_id': requestor_id,
|
||||
'reg_code': reg_code,
|
||||
}), headers=mvpd_headers)
|
||||
except ExtractorError as e:
|
||||
if not mso_id and isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
@ -1734,7 +1832,6 @@ def extract_redirect_url(html, url=None, fatal=False):
|
||||
raise
|
||||
if '<pendingLogout' in session:
|
||||
self.cache.store(self._MVPD_CACHE, requestor_id, {})
|
||||
count += 1
|
||||
continue
|
||||
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
|
||||
requestor_info['authn_token'] = authn_token
|
||||
@ -1755,7 +1852,6 @@ def extract_redirect_url(html, url=None, fatal=False):
|
||||
}), headers=mvpd_headers)
|
||||
if '<pendingLogout' in authorize:
|
||||
self.cache.store(self._MVPD_CACHE, requestor_id, {})
|
||||
count += 1
|
||||
continue
|
||||
if '<error' in authorize:
|
||||
raise ExtractorError(xml_text(authorize, 'details'), expected=True)
|
||||
@ -1778,6 +1874,5 @@ def extract_redirect_url(html, url=None, fatal=False):
|
||||
}), headers=mvpd_headers)
|
||||
if '<pendingLogout' in short_authorize:
|
||||
self.cache.store(self._MVPD_CACHE, requestor_id, {})
|
||||
count += 1
|
||||
continue
|
||||
return short_authorize
|
||||
|
@ -84,6 +84,8 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
'skip': '404 Not Found',
|
||||
}]
|
||||
|
||||
_SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwNjg5ZmU2My00OTc5LTQxZmQtYWYxNC1hYjVlNmJjNWVkZWIiLCJuYmYiOjE1MzcxOTA2NzQsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwNjc0fQ.Xl3AEduM0s1TxDQ6-XssdKIiLm261hhsEv1C1yo_nitIajZThSI9rXILqtIzO0aujoHhdzUnu_dUCq9ffiSBzEG632tTa1la-5tegHtce80cMhewBN4n2t8n9O5tiaPx8MPY8ALdm5wS7QzWE6DO_LTJKgE8Bl7Yv-CWJT4q4SywtNiQWLVOuhBRnDyfsRezxRwptw8qTn9dv5ZzUrVJaby5fDZ_nOncMKvegOgaKd5KEuCAGQ-mg-PSuValMjGuf6FwDguGaK7IyI5Y2oOrzXmD4Dj7q4WBg8w9QoZhtLeAU56mcsGILolku2R5FHlVLO9xhjResyt-pfmegOkpSw'
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_path, episode_path = self._match_valid_url(url).groups()
|
||||
display_id = episode_path or show_path
|
||||
@ -152,7 +154,7 @@ def _real_extract(self, url):
|
||||
# CDN_TOKEN_APP_ID from:
|
||||
# https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js
|
||||
'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE',
|
||||
}, {
|
||||
}, self._SOFTWARE_STATEMENT, {
|
||||
'url': url,
|
||||
'site_name': 'AdultSwim',
|
||||
'auth_required': auth,
|
||||
|
@ -1,3 +1,5 @@
|
||||
import json
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@ -6,7 +8,6 @@
|
||||
remove_start,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@ -20,13 +21,13 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
_THEPLATFORM_KEY = '43jXaGRQud'
|
||||
_THEPLATFORM_SECRET = 'S10BPXHMlb'
|
||||
_DOMAIN_MAP = {
|
||||
'history.com': ('HISTORY', 'history'),
|
||||
'aetv.com': ('AETV', 'aetv'),
|
||||
'mylifetime.com': ('LIFETIME', 'lifetime'),
|
||||
'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
|
||||
'fyi.tv': ('FYI', 'fyi'),
|
||||
'historyvault.com': (None, 'historyvault'),
|
||||
'biography.com': (None, 'biography'),
|
||||
'history.com': ('HISTORY', 'history', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI1MzZlMTQ3ZS0zMzFhLTQxY2YtYTMwNC01MDA2NzNlOGYwYjYiLCJuYmYiOjE1Mzg2NjMzMDksImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM4NjYzMzA5fQ.n24-FVHLGXJe2D4atIQZ700aiXKIajKh5PWFoHJ40Az4itjtwwSFHnvufnoal3T8lYkwNLxce7H-IEGxIykRkZEdwq09pMKMT-ft9ASzE4vQ8fAWbf5ZgDME86x4Jq_YaxkRc9Ne0eShGhl8fgTJHvk07sfWcol61HJ7kU7K8FzzcHR0ucFQgA5VNd8RyjoGWY7c6VxnXR214LOpXsywmit04-vGJC102b_WA2EQfqI93UzG6M6l0EeV4n0_ijP3s8_i8WMJZ_uwnTafCIY6G_731i01dKXDLSFzG1vYglAwDa8DTcdrAAuIFFDF6QNGItCCmwbhjufjmoeVb7R1Gg'),
|
||||
'aetv.com': ('AETV', 'aetv', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI5Y2IwNjg2Yy03ODUxLTRiZDUtODcyMC00MjNlZTg1YTQ1NzMiLCJuYmYiOjE1Mzg2NjMyOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM4NjYzMjkwfQ.T5Elf0X4TndO4NEgqBas1gDxNHGPVk_daO2Ha5FBzVO6xi3zM7eavdAKfYMCN7gpWYJx03iADaVPtczO_t_aGZczDjpwJHgTUzDgvcLZAVsVDqtDIAMy3S846rPgT6UDbVoxurA7B2VTPm9phjrSXhejvd0LBO8MQL4AZ3sy2VmiPJ2noT1ily5PuHCYlkrT1fheO064duR__Cd9DQ5VTMnKjzY3Cx345CEwKDkUk5gwgxhXM-aY0eblehrq8VD81_aRM_O3tvh7nbTydHOnUpV-k_iKVi49gqz7Sf8zb6Zh5z2Uftn3vYCfE5NQuesitoRMnsH17nW7o_D59hkRgg'),
|
||||
'mylifetime.com': ('LIFETIME', 'lifetime', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJmODg0MDM1ZC1mZGRmLTRmYjgtYmRkMC05MzRhZDdiYTAwYTciLCJuYmYiOjE1NDkzOTI2NDQsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTQ5MzkyNjQ0fQ.vkTIaCpheKdKQd__2-3ec4qkcpbAhyCTvwe5iTl922ItSQfVhpEJG4wseVSNmBTrpBi0hvLedcw6Hj1_UuzBMVuVcCqLprU-pI8recEwL0u7G-eVkylsxe1OTUm1o3V6OykXQ9KlA-QQLL1neUhdhR1n5B1LZ4cmtBmiEpfgf4rFwXD1ScFylIcaWKLBqHoRBNUmxyTmoXXvn_A-GGSj9eCizFzY8W5uBwUcsoiw2Cr1skx7PbB2RSP1I5DsoIJKG-8XV1KS7MWl-fNLjE-hVAsI9znqfEEFcPBiv3LhCP4Nf4OIs7xAselMn0M0c8igRUZhURWX_hdygUAxkbKFtQ'),
|
||||
'fyi.tv': ('FYI', 'fyi', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIxOGZiOWM3Ny1mYmMzLTQxYTktYmE1Yi1lMzM0ZmUzNzU4NjEiLCJuYmYiOjE1ODc1ODAzNzcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTg3NTgwMzc3fQ.AYDuipKswmIfLBfOjHRsfc5fMV5NmJUmiJnkpiep4VEw9QiXkygFj4bN06Si5tFc5Mee5TDrGzDpV6iuKbVpLT5kuqXhAn-Wozf5zKPsg_IpdEKO7gsiCq4calt72ct44KTqtKD_hVcoxQU24_HaJsRgXzu3B-6Ff6UrmsXkyvYifYVC9v2DSkdCuA02_IrlllzVT2kRuefUXgL4vQRtTFf77uYa0RKSTG7uVkiQ_AU41eXevKlO2qgtc14Hk5cZ7-ZNrDyMCXYA5ngdIHP7Gs9PWaFXT36PFHI_rC4EfxUABPzjQFxjpP75aX5qn8SH__HbM9q3hoPWgaEaf76qIQ'),
|
||||
'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc', None),
|
||||
'historyvault.com': (None, 'historyvault', None),
|
||||
'biography.com': (None, 'biography', None),
|
||||
}
|
||||
|
||||
def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
||||
@ -71,7 +72,7 @@ def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
||||
}
|
||||
|
||||
def _extract_aetn_info(self, domain, filter_key, filter_value, url):
|
||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||
requestor_id, brand, software_statement = self._DOMAIN_MAP[domain]
|
||||
result = self._download_json(
|
||||
f'https://feeds.video.aetnd.com/api/v2/{brand}/videos',
|
||||
filter_value, query={f'filter[{filter_key}]': filter_value})
|
||||
@ -95,7 +96,7 @@ def _extract_aetn_info(self, domain, filter_key, filter_value, url):
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
traverse_obj(theplatform_metadata, ('ratings', 0, 'rating')))
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
url, video_id, requestor_id, resource, software_statement)
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
info.update({
|
||||
'title': title,
|
||||
@ -110,11 +111,9 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
IE_NAME = 'aenetworks'
|
||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
|
||||
_VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
|
||||
shows/[^/]+/season-\d+/episode-\d+|
|
||||
(?:
|
||||
(?:movie|special)s/[^/]+|
|
||||
(?:shows/[^/]+/)?videos
|
||||
)/[^/?#&]+
|
||||
shows/[^/?#]+/season-\d+/episode-\d+|
|
||||
(?P<type>movie|special)s/[^/?#]+(?P<extra>/[^/?#]+)?|
|
||||
(?:shows/[^/?#]+/)?videos/[^/?#]+
|
||||
)'''
|
||||
_TESTS = [{
|
||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||
@ -127,20 +126,18 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'upload_date': '20120529',
|
||||
'uploader': 'AENE-NEW',
|
||||
'duration': 2592.0,
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'chapters': 'count:5',
|
||||
'tags': 'count:14',
|
||||
'categories': ['Mountain Men'],
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'episode': 'Winter Is Coming',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Mountain Men',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'Geo-restricted - This content is not available in your location.',
|
||||
}, {
|
||||
@ -154,21 +151,64 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'upload_date': '20160112',
|
||||
'uploader': 'AENE-NEW',
|
||||
'duration': 1277.695,
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'chapters': 'count:4',
|
||||
'tags': 'count:23',
|
||||
'episode': 'Episode 1',
|
||||
'episode': 'Inlawful Entry',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 9',
|
||||
'season_number': 9,
|
||||
'series': 'Duck Dynasty',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'https://play.mylifetime.com/movies/v-c-andrews-web-of-dreams',
|
||||
'info_dict': {
|
||||
'id': '1590627395981',
|
||||
'ext': 'mp4',
|
||||
'title': 'VC Andrews\' Web of Dreams',
|
||||
'description': 'md5:2a8ba13ae64271c79eb65c0577d312ce',
|
||||
'uploader': 'AENE-NEW',
|
||||
'age_limit': 14,
|
||||
'duration': 5253.665,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'chapters': 'count:8',
|
||||
'tags': ['lifetime', 'mylifetime', 'lifetime channel', "VC Andrews' Web of Dreams"],
|
||||
'series': '',
|
||||
'season': 'Season 0',
|
||||
'season_number': 0,
|
||||
'episode': 'VC Andrews\' Web of Dreams',
|
||||
'episode_number': 0,
|
||||
'timestamp': 1566489703.0,
|
||||
'upload_date': '20190822',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}, {
|
||||
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story',
|
||||
'info_dict': {
|
||||
'id': '1488235587551',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hunting JonBenet\'s Killer: The Untold Story',
|
||||
'description': 'md5:209869425ee392d74fe29201821e48b4',
|
||||
'uploader': 'AENE-NEW',
|
||||
'age_limit': 14,
|
||||
'duration': 5003.903,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'chapters': 'count:10',
|
||||
'tags': 'count:11',
|
||||
'series': '',
|
||||
'season': 'Season 0',
|
||||
'season_number': 0,
|
||||
'episode': 'Hunting JonBenet\'s Killer: The Untold Story',
|
||||
'episode_number': 0,
|
||||
'timestamp': 1554987697.0,
|
||||
'upload_date': '20190411',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'This video is only available for users of participating TV providers.',
|
||||
}, {
|
||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||
'only_matching': True,
|
||||
@ -196,25 +236,28 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, canonical = self._match_valid_url(url).groups()
|
||||
domain, canonical, url_type, extra = self._match_valid_url(url).group('domain', 'id', 'type', 'extra')
|
||||
if url_type in ('movie', 'special') and not extra:
|
||||
canonical += f'/full-{url_type}'
|
||||
return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
|
||||
|
||||
|
||||
class AENetworksListBaseIE(AENetworksBaseIE):
|
||||
def _call_api(self, resource, slug, brand, fields):
|
||||
return self._download_json(
|
||||
'https://yoga.appsvcs.aetnd.com/graphql',
|
||||
slug, query={'brand': brand}, data=urlencode_postdata({
|
||||
'https://yoga.appsvcs.aetnd.com/graphql', slug,
|
||||
query={'brand': brand}, headers={'Content-Type': 'application/json'},
|
||||
data=json.dumps({
|
||||
'query': '''{
|
||||
%s(slug: "%s") {
|
||||
%s
|
||||
}
|
||||
}''' % (resource, slug, fields), # noqa: UP031
|
||||
}))['data'][resource]
|
||||
}).encode())['data'][resource]
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, slug = self._match_valid_url(url).groups()
|
||||
_, brand = self._DOMAIN_MAP[domain]
|
||||
_, brand, _ = self._DOMAIN_MAP[domain]
|
||||
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
|
||||
base_url = f'http://watch.{domain}'
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
dict_get,
|
||||
extract_attributes,
|
||||
get_element_by_id,
|
||||
get_element_text_and_html_by_tag,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
@ -72,6 +73,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'display_id': 'Cops-v2.mp4',
|
||||
'thumbnail': r're:https://archive\.org/download/.*\.jpg',
|
||||
'duration': 1091.96,
|
||||
'track': 'Cops-v2',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||
@ -86,6 +88,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'thumbnail': r're:https://archive\.org/download/.*\.jpg',
|
||||
'duration': 59.77,
|
||||
'display_id': 'Commercial-JFK1960ElectionAdCampaignJingle.mpg',
|
||||
'track': 'Commercial-JFK1960ElectionAdCampaignJingle',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://archive.org/details/Election_Ads/Commercial-Nixon1960ElectionAdToughonDefense.mpg',
|
||||
@ -102,6 +105,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'duration': 59.51,
|
||||
'license': 'http://creativecommons.org/licenses/publicdomain/',
|
||||
'thumbnail': r're:https://archive\.org/download/.*\.jpg',
|
||||
'track': 'Commercial-Nixon1960ElectionAdToughonDefense',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://archive.org/details/gd1977-05-08.shure57.stevenson.29303.flac16',
|
||||
@ -182,6 +186,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'duration': 130.46,
|
||||
'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_01_000117.jpg',
|
||||
'display_id': 'irelandthemakingofarepublicreel1_01.mov',
|
||||
'track': 'irelandthemakingofarepublicreel1 01',
|
||||
},
|
||||
}, {
|
||||
'md5': '67335ee3b23a0da930841981c1e79b02',
|
||||
@ -192,6 +197,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'title': 'irelandthemakingofarepublicreel1_02.mov',
|
||||
'display_id': 'irelandthemakingofarepublicreel1_02.mov',
|
||||
'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_02_001374.jpg',
|
||||
'track': 'irelandthemakingofarepublicreel1 02',
|
||||
},
|
||||
}, {
|
||||
'md5': 'e470e86787893603f4a341a16c281eb5',
|
||||
@ -202,6 +208,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'title': 'irelandthemakingofarepublicreel2.mov',
|
||||
'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg',
|
||||
'display_id': 'irelandthemakingofarepublicreel2.mov',
|
||||
'track': 'irelandthemakingofarepublicreel2',
|
||||
},
|
||||
},
|
||||
],
|
||||
@ -229,15 +236,8 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _playlist_data(webpage):
|
||||
element = re.findall(r'''(?xs)
|
||||
<input
|
||||
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
|
||||
\s+class=['"]?js-play8-playlist['"]?
|
||||
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
|
||||
\s*/>
|
||||
''', webpage)[0]
|
||||
|
||||
return json.loads(extract_attributes(element)['value'])
|
||||
element = get_element_text_and_html_by_tag('play-av', webpage)[1]
|
||||
return json.loads(extract_attributes(element)['playlist'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = urllib.parse.unquote_plus(self._match_id(url))
|
||||
|
@ -1,33 +0,0 @@
|
||||
from .brightcove import BrightcoveNewBaseIE
|
||||
from ..utils import extract_attributes
|
||||
|
||||
|
||||
class BandaiChannelIE(BrightcoveNewBaseIE):
|
||||
IE_NAME = 'bandaichannel'
|
||||
_VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P<id>\d+/\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.b-ch.com/titles/514/001',
|
||||
'md5': 'a0f2d787baa5729bed71108257f613a4',
|
||||
'info_dict': {
|
||||
'id': '6128044564001',
|
||||
'ext': 'mp4',
|
||||
'title': 'メタルファイターMIKU 第1話',
|
||||
'timestamp': 1580354056,
|
||||
'uploader_id': '5797077852001',
|
||||
'upload_date': '20200130',
|
||||
'duration': 1387.733,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
attrs = extract_attributes(self._search_regex(
|
||||
r'(<video-js[^>]+\bid="bcplayer"[^>]*>)', webpage, 'player'))
|
||||
bc = self._download_json(
|
||||
'https://pbifcd.b-ch.com/v1/playbackinfo/ST/70/' + attrs['data-info'],
|
||||
video_id, headers={'X-API-KEY': attrs['data-auth'].strip()})['bc']
|
||||
return self._parse_brightcove_metadata(bc, bc['id'])
|
@ -7,6 +7,7 @@
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
@ -19,7 +20,7 @@
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
from ..utils.traversal import find_element, find_elements, traverse_obj
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
@ -70,6 +71,9 @@ class BandcampIE(InfoExtractor):
|
||||
'album': 'FTL: Advanced Edition Soundtrack',
|
||||
'uploader_url': 'https://benprunty.bandcamp.com',
|
||||
'uploader_id': 'benprunty',
|
||||
'tags': ['soundtrack', 'chiptunes', 'cinematic', 'electronic', 'video game music', 'California'],
|
||||
'artists': ['Ben Prunty'],
|
||||
'album_artists': ['Ben Prunty'],
|
||||
},
|
||||
}, {
|
||||
# no free download, mp3 128
|
||||
@ -94,6 +98,9 @@ class BandcampIE(InfoExtractor):
|
||||
'album': 'Call of the Mastodon',
|
||||
'uploader_url': 'https://relapsealumni.bandcamp.com',
|
||||
'uploader_id': 'relapsealumni',
|
||||
'tags': ['Philadelphia'],
|
||||
'artists': ['Mastodon'],
|
||||
'album_artists': ['Mastodon'],
|
||||
},
|
||||
}, {
|
||||
# track from compilation album (artist/album_artist difference)
|
||||
@ -118,6 +125,9 @@ class BandcampIE(InfoExtractor):
|
||||
'album': 'DSK F/W 2016-2017 Free Compilation',
|
||||
'uploader_url': 'https://diskotopia.bandcamp.com',
|
||||
'uploader_id': 'diskotopia',
|
||||
'tags': ['Japan'],
|
||||
'artists': ['submerse'],
|
||||
'album_artists': ['Diskotopia'],
|
||||
},
|
||||
}]
|
||||
|
||||
@ -252,6 +262,7 @@ def _real_extract(self, url):
|
||||
'album': embed.get('album_title'),
|
||||
'album_artist': album_artist,
|
||||
'formats': formats,
|
||||
'tags': traverse_obj(webpage, ({find_elements(cls='tag')}, ..., {clean_html})),
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,91 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BellMediaIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)https?://(?:www\.)?
|
||||
(?P<domain>
|
||||
(?:
|
||||
ctv|
|
||||
tsn|
|
||||
bnn(?:bloomberg)?|
|
||||
thecomedynetwork|
|
||||
discovery|
|
||||
discoveryvelocity|
|
||||
sciencechannel|
|
||||
investigationdiscovery|
|
||||
animalplanet|
|
||||
bravo|
|
||||
mtv|
|
||||
space|
|
||||
etalk|
|
||||
marilyn
|
||||
)\.ca|
|
||||
(?:much|cp24)\.com
|
||||
)/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
|
||||
'md5': '3e5b8e38370741d5089da79161646635',
|
||||
'info_dict': {
|
||||
'id': '1403070',
|
||||
'ext': 'flv',
|
||||
'title': 'David Cockfield\'s Top Picks',
|
||||
'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
|
||||
'upload_date': '20180525',
|
||||
'timestamp': 1527288600,
|
||||
'season_id': '73997',
|
||||
'season': '2018',
|
||||
'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg',
|
||||
'tags': [],
|
||||
'categories': ['ETFs'],
|
||||
'season_number': 8,
|
||||
'duration': 272.038,
|
||||
'series': 'Market Call Tonight',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.etalk.ca/video?videoid=663455',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cp24.com/video?clipId=1982548',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_DOMAINS = {
|
||||
'thecomedynetwork': 'comedy',
|
||||
'discoveryvelocity': 'discvel',
|
||||
'sciencechannel': 'discsci',
|
||||
'investigationdiscovery': 'invdisc',
|
||||
'animalplanet': 'aniplan',
|
||||
'etalk': 'ctv',
|
||||
'bnnbloomberg': 'bnn',
|
||||
'marilyn': 'ctv_marilyn',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id = self._match_valid_url(url).groups()
|
||||
domain = domain.split('.')[0]
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': f'9c9media:{self._DOMAINS.get(domain, domain)}_web:{video_id}',
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
@ -353,7 +353,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'id': 'BV1bK411W797',
|
||||
'title': '物语中的人物是如何吐槽自己的OP的',
|
||||
},
|
||||
'playlist_count': 18,
|
||||
'playlist_count': 23,
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'BV1bK411W797_p1',
|
||||
@ -373,6 +373,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'_old_archive_ids': ['bilibili 498159642_part1'],
|
||||
},
|
||||
}],
|
||||
'params': {'playlist_items': '2'},
|
||||
}, {
|
||||
'note': 'Specific page of Anthology',
|
||||
'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
|
||||
@ -816,6 +817,26 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
'upload_date': '20111104',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}, {
|
||||
'note': 'new playurlSSRData scheme',
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep678060',
|
||||
'info_dict': {
|
||||
'id': '678060',
|
||||
'ext': 'mp4',
|
||||
'series': '去你家吃饭好吗',
|
||||
'series_id': '6198',
|
||||
'season': '第二季',
|
||||
'season_id': '42542',
|
||||
'season_number': 2,
|
||||
'episode': '吴老二:你家大公鸡养不熟,能煮熟吗…',
|
||||
'episode_id': '678060',
|
||||
'episode_number': 61,
|
||||
'title': '一只小九九丫 吴老二:你家大公鸡养不熟,能煮熟吗…',
|
||||
'duration': 266.123,
|
||||
'timestamp': 1663315904,
|
||||
'upload_date': '20220916',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep267851',
|
||||
'info_dict': {
|
||||
@ -879,12 +900,41 @@ def _real_extract(self, url):
|
||||
'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id},
|
||||
headers=headers))
|
||||
|
||||
premium_only = play_info.get('code') == -10403
|
||||
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
|
||||
# play_info can be structured in at least three different ways, e.g.:
|
||||
# 1.) play_info['result']['video_info'] and play_info['code']
|
||||
# 2.) play_info['raw']['data']['video_info'] and play_info['code']
|
||||
# 3.) play_info['data']['result']['video_info'] and play_info['data']['code']
|
||||
# So we need to transform any of the above into a common structure
|
||||
status_code = play_info.get('code')
|
||||
if 'raw' in play_info:
|
||||
play_info = play_info['raw']
|
||||
if 'data' in play_info:
|
||||
play_info = play_info['data']
|
||||
if status_code is None:
|
||||
status_code = play_info.get('code')
|
||||
if 'result' in play_info:
|
||||
play_info = play_info['result']
|
||||
|
||||
formats = self.extract_formats(play_info)
|
||||
if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
geo_blocked = traverse_obj(play_info, (
|
||||
'plugins', lambda _, v: v['name'] == 'AreaLimitPanel', 'config', 'is_block', {bool}, any))
|
||||
premium_only = status_code == -10403
|
||||
|
||||
video_info = traverse_obj(play_info, ('video_info', {dict})) or {}
|
||||
formats = self.extract_formats(video_info)
|
||||
|
||||
if not formats:
|
||||
if geo_blocked:
|
||||
self.raise_geo_restricted()
|
||||
elif premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage:
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
if traverse_obj(play_info, ((
|
||||
('play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE' vs 'PLAY_NONE'
|
||||
'play_video_type', # 'preview' vs 'whole' vs 'none'
|
||||
), any, {lambda x: x in ('PLAY_PREVIEW', 'preview')})):
|
||||
self.report_warning(
|
||||
'Only preview format is available, '
|
||||
f'you have to become a premium member to access full video. {self._login_hint()}')
|
||||
|
||||
bangumi_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
|
||||
@ -922,7 +972,7 @@ def _real_extract(self, url):
|
||||
'season': str_or_none(season_title),
|
||||
'season_id': str_or_none(season_id),
|
||||
'season_number': season_number,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'duration': float_or_none(video_info.get('timelength'), scale=1000),
|
||||
'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
'http_headers': {'Referer': url},
|
||||
@ -966,6 +1016,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE):
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}],
|
||||
'params': {'playlist_items': '2'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -1021,6 +1072,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}],
|
||||
'params': {'playlist_items': '2'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -1192,6 +1244,26 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
'id': '313580179',
|
||||
},
|
||||
'playlist_mincount': 92,
|
||||
}, {
|
||||
# Hidden-mode collection
|
||||
'url': 'https://space.bilibili.com/3669403/video',
|
||||
'info_dict': {
|
||||
'id': '3669403',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': '3669403_3958082',
|
||||
'title': '合集·直播回放',
|
||||
'description': '',
|
||||
'uploader': '月路Yuel',
|
||||
'uploader_id': '3669403',
|
||||
'timestamp': int,
|
||||
'upload_date': str,
|
||||
'thumbnail': str,
|
||||
},
|
||||
}],
|
||||
'params': {'playlist_items': '7'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -1248,8 +1320,14 @@ def get_metadata(page_data):
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
|
||||
yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
|
||||
for entry in traverse_obj(page_data, ('list', 'vlist', ..., {dict})):
|
||||
if traverse_obj(entry, ('meta', 'attribute')) == 156:
|
||||
# hidden-mode collection doesn't show its videos in uploads; extract as playlist instead
|
||||
yield self.url_result(
|
||||
f'https://space.bilibili.com/{entry["mid"]}/lists/{entry["meta"]["id"]}?type=season',
|
||||
BilibiliCollectionListIE, f'{entry["mid"]}_{entry["meta"]["id"]}')
|
||||
else:
|
||||
yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
|
||||
|
||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
return self.playlist_result(paged_list, playlist_id)
|
||||
@ -1785,7 +1863,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
|
||||
'thumbnail': r're:^https?://.+\.jpg',
|
||||
'timestamp': 1564836614,
|
||||
'upload_date': '20190803',
|
||||
'uploader': 'tsukimi-つきみぐー',
|
||||
'uploader': '十六夜tsukimiつきみぐ',
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
@ -1840,10 +1918,10 @@ class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
|
||||
'url': 'https://www.bilibili.com/audio/am10624',
|
||||
'info_dict': {
|
||||
'id': '10624',
|
||||
'title': '每日新曲推荐(每日11:00更新)',
|
||||
'title': '新曲推荐',
|
||||
'description': '每天11:00更新,为你推送最新音乐',
|
||||
},
|
||||
'playlist_count': 19,
|
||||
'playlist_count': 16,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -1,16 +1,27 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601
|
||||
from ..utils import (
|
||||
UnsupportedError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
jwt_decode_hs256,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BlackboardCollaborateIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?P<region>[a-z-]+)\.bbcollab\.com/
|
||||
(?P<region>[a-z]+)(?:-lti)?\.bbcollab\.com/
|
||||
(?:
|
||||
collab/ui/session/playback/load|
|
||||
recording
|
||||
)/
|
||||
(?P<id>[^/]+)'''
|
||||
(?P<id>[^/?#]+)'''
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://us-lti.bbcollab.com/collab/ui/session/playback/load/0a633b6a88824deb8c918f470b22b256',
|
||||
@ -19,9 +30,55 @@ class BlackboardCollaborateIE(InfoExtractor):
|
||||
'id': '0a633b6a88824deb8c918f470b22b256',
|
||||
'title': 'HESI A2 Information Session - Thursday, May 6, 2021 - recording_1',
|
||||
'ext': 'mp4',
|
||||
'duration': 1896000,
|
||||
'timestamp': 1620331399,
|
||||
'duration': 1896,
|
||||
'timestamp': 1620333295,
|
||||
'upload_date': '20210506',
|
||||
'subtitles': {
|
||||
'live_chat': 'mincount:1',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://eu.bbcollab.com/collab/ui/session/playback/load/4bde2dee104f40289a10f8e554270600',
|
||||
'md5': '108db6a8f83dcb0c2a07793649581865',
|
||||
'info_dict': {
|
||||
'id': '4bde2dee104f40289a10f8e554270600',
|
||||
'title': 'Meeting - Azerbaycanca erize formasi',
|
||||
'ext': 'mp4',
|
||||
'duration': 880,
|
||||
'timestamp': 1671176868,
|
||||
'upload_date': '20221216',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://eu.bbcollab.com/recording/f83be390ecff46c0bf7dccb9dddcf5f6',
|
||||
'md5': 'e3b0b88ddf7847eae4b4c0e2d40b83a5',
|
||||
'info_dict': {
|
||||
'id': 'f83be390ecff46c0bf7dccb9dddcf5f6',
|
||||
'title': 'Keynote lecture by Laura Carvalho - recording_1',
|
||||
'ext': 'mp4',
|
||||
'duration': 5506,
|
||||
'timestamp': 1662721705,
|
||||
'upload_date': '20220909',
|
||||
'subtitles': {
|
||||
'live_chat': 'mincount:1',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://eu.bbcollab.com/recording/c3e1e7c9e83d4cd9981c93c74888d496',
|
||||
'md5': 'fdb2d8c43d66fbc0b0b74ef5e604eb1f',
|
||||
'info_dict': {
|
||||
'id': 'c3e1e7c9e83d4cd9981c93c74888d496',
|
||||
'title': 'International Ally User Group - recording_18',
|
||||
'ext': 'mp4',
|
||||
'duration': 3479,
|
||||
'timestamp': 1721919621,
|
||||
'upload_date': '20240725',
|
||||
'subtitles': {
|
||||
'en': 'mincount:1',
|
||||
'live_chat': 'mincount:1',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
@ -42,22 +99,81 @@ class BlackboardCollaborateIE(InfoExtractor):
|
||||
},
|
||||
]
|
||||
|
||||
def _call_api(self, region, video_id, path=None, token=None, note=None, fatal=False):
|
||||
# Ref: https://github.com/blackboard/BBDN-Collab-Postman-REST
|
||||
return self._download_json(
|
||||
join_nonempty(f'https://{region}.bbcollab.com/collab/api/csa/recordings', video_id, path, delim='/'),
|
||||
video_id, note or 'Downloading JSON metadata', fatal=fatal,
|
||||
headers={'Authorization': f'Bearer {token}'} if token else None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
region = mobj.group('region')
|
||||
video_id = mobj.group('id')
|
||||
info = self._download_json(
|
||||
f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/data', video_id)
|
||||
duration = info.get('duration')
|
||||
title = info['name']
|
||||
upload_date = info.get('created')
|
||||
streams = info['streams']
|
||||
formats = [{'format_id': k, 'url': url} for k, url in streams.items()]
|
||||
token = parse_qs(url).get('authToken', [None])[-1]
|
||||
|
||||
video_info = self._call_api(region, video_id, path='data/secure', token=token, note='Trying auth token')
|
||||
if video_info:
|
||||
video_extra = self._call_api(region, video_id, token=token, note='Retrieving extra attributes')
|
||||
else:
|
||||
video_info = self._call_api(region, video_id, path='data', note='Trying fallback', fatal=True)
|
||||
video_extra = {}
|
||||
|
||||
formats = traverse_obj(video_info, ('extStreams', lambda _, v: url_or_none(v['streamUrl']), {
|
||||
'url': 'streamUrl',
|
||||
'ext': ('contentType', {mimetype2ext}),
|
||||
'aspect_ratio': ('aspectRatio', {float_or_none}),
|
||||
}))
|
||||
|
||||
if filesize := traverse_obj(video_extra, ('storageSize', {int_or_none})):
|
||||
for fmt in formats:
|
||||
fmt['filesize'] = filesize
|
||||
|
||||
subtitles = {}
|
||||
for subs in traverse_obj(video_info, ('subtitles', lambda _, v: url_or_none(v['url']))):
|
||||
subtitles.setdefault(subs.get('lang') or 'und', []).append({
|
||||
'name': traverse_obj(subs, ('label', {str})),
|
||||
'url': subs['url'],
|
||||
})
|
||||
|
||||
for live_chat_url in traverse_obj(video_info, ('chats', ..., 'url', {url_or_none})):
|
||||
subtitles.setdefault('live_chat', []).append({'url': live_chat_url})
|
||||
|
||||
return {
|
||||
'duration': duration,
|
||||
**traverse_obj(video_info, {
|
||||
'title': ('name', {str}),
|
||||
'timestamp': ('created', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none(scale=1000)}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'id': video_id,
|
||||
'timestamp': parse_iso8601(upload_date),
|
||||
'title': title,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class BlackboardCollaborateLaunchIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://[a-z]+\.bbcollab\.com/launch/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://au.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNzQwNDE2NDgzLCJpYXQiOjE3NDA0MTYxODMsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI3MzI4YzRjZTNmM2U0ZTcwYmY3MTY3N2RkZTgzMzk2NSIsImNvbnN1bWVySWQiOiJhM2Q3NGM0Y2QyZGU0MGJmODFkMjFlODNlMmEzNzM5MCIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.xuELw4EafEwUMoYcCHidGn4Tw9O1QCbYHzYGJUl0kKk',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://us.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNjk0NDgxOTc3LCJpYXQiOjE2OTQ0ODE2NzcsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI3YWU0MTFhNTU3NjU0OWFiOTZlYjVmMTM1YmY3MWU5MCIsImNvbnN1bWVySWQiOiJBRUU2MEI4MDI2QzM3ODU2RjMwMzNEN0ZEOTQzMTFFNSIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.yOhRZNaIjXYoMYMpcTzgjZJCnIFaYf2cAzbco8OAxlY',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://eu.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNzUyNjgyODYwLCJpYXQiOjE3NTI2ODI1NjAsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI4MjQzYjFiODg2Nzk0NTZkYjkwN2NmNDZmZmE1MmFhZiIsImNvbnN1bWVySWQiOiI5ZTY4NzYwZWJiNzM0MzRiYWY3NTQyZjA1YmJkOTMzMCIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.Xj4ymojYLwZ1vKPKZ-KxjpqQvFXoJekjRaG0npngwWs',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
token = self._match_id(url)
|
||||
video_id = jwt_decode_hs256(token)['resourceAccessTicket']['resourceId']
|
||||
|
||||
redirect_url = self._request_webpage(url, video_id).url
|
||||
if self.suitable(redirect_url):
|
||||
raise UnsupportedError(redirect_url)
|
||||
return self.url_result(redirect_url, BlackboardCollaborateIE, video_id)
|
||||
|
@ -1,188 +0,0 @@
|
||||
from .adobepass import AdobePassIE
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_html_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BravoTVIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
|
||||
'info_dict': {
|
||||
'id': '3923059',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Top Chef Season 16 Winner Is...',
|
||||
'description': 'Find out who takes the title of Top Chef!',
|
||||
'upload_date': '20190314',
|
||||
'timestamp': 1552591860,
|
||||
'season_number': 16,
|
||||
'episode_number': 15,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'The Top Chef Season 16 Winner Is...',
|
||||
'duration': 190.357,
|
||||
'season': 'Season 16',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.bravotv.com/top-chef/season-20/episode-1/london-calling',
|
||||
'info_dict': {
|
||||
'id': '9000234570',
|
||||
'ext': 'mp4',
|
||||
'title': 'London Calling',
|
||||
'description': 'md5:5af95a8cbac1856bd10e7562f86bb759',
|
||||
'upload_date': '20230310',
|
||||
'timestamp': 1678410000,
|
||||
'season_number': 20,
|
||||
'episode_number': 1,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'London Calling',
|
||||
'duration': 3266.03,
|
||||
'season': 'Season 20',
|
||||
'chapters': 'count:7',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-1/closing-night',
|
||||
'info_dict': {
|
||||
'id': '3692045',
|
||||
'ext': 'mp4',
|
||||
'title': 'Closing Night',
|
||||
'description': 'md5:3170065c5c2f19548d72a4cbc254af63',
|
||||
'upload_date': '20180401',
|
||||
'timestamp': 1522623600,
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'series': 'In Ice Cold Blood',
|
||||
'episode': 'Closing Night',
|
||||
'duration': 2629.051,
|
||||
'season': 'Season 1',
|
||||
'chapters': 'count:6',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
|
||||
'info_dict': {
|
||||
'id': '3974019',
|
||||
'ext': 'mp4',
|
||||
'title': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
|
||||
'description': 'md5:f9d638dd6946a1c1c0533a9c6100eae5',
|
||||
'upload_date': '20190617',
|
||||
'timestamp': 1560790800,
|
||||
'season_number': 2,
|
||||
'episode_number': 16,
|
||||
'series': 'In Ice Cold Blood',
|
||||
'episode': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
|
||||
'duration': 68.235,
|
||||
'season': 'Season 2',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, display_id = self._match_valid_url(url).group('site', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
settings = self._search_json(
|
||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>', webpage, 'settings', display_id)
|
||||
tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '')
|
||||
query = {
|
||||
'manifest': 'm3u',
|
||||
'formats': 'm3u,mpeg4',
|
||||
}
|
||||
|
||||
if tve:
|
||||
account_pid = tve.get('data-mpx-media-account-pid') or 'HNK2IC'
|
||||
account_id = tve['data-mpx-media-account-id']
|
||||
metadata = self._parse_json(
|
||||
tve.get('data-normalized-video', ''), display_id, fatal=False, transform_source=unescapeHTML)
|
||||
video_id = tve.get('data-guid') or metadata['guid']
|
||||
if tve.get('data-entitlement') == 'auth':
|
||||
auth = traverse_obj(settings, ('tve_adobe_auth', {dict})) or {}
|
||||
site = remove_end(site, 'tv')
|
||||
release_pid = tve['data-release-pid']
|
||||
resource = self._get_mvpd_resource(
|
||||
tve.get('data-adobe-pass-resource-id') or auth.get('adobePassResourceId') or site,
|
||||
tve['data-title'], release_pid, tve.get('data-rating'))
|
||||
query.update({
|
||||
'switch': 'HLSServiceSecure',
|
||||
'auth': self._extract_mvpd_auth(
|
||||
url, release_pid, auth.get('adobePassRequestorId') or site, resource),
|
||||
})
|
||||
|
||||
else:
|
||||
ls_playlist = traverse_obj(settings, ('ls_playlist', ..., {dict}), get_all=False) or {}
|
||||
account_pid = ls_playlist.get('mpxMediaAccountPid') or 'PHSl-B'
|
||||
account_id = ls_playlist['mpxMediaAccountId']
|
||||
video_id = ls_playlist['defaultGuid']
|
||||
metadata = traverse_obj(
|
||||
ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, {dict}), get_all=False)
|
||||
|
||||
tp_url = f'https://link.theplatform.com/s/{account_pid}/media/guid/{account_id}/{video_id}'
|
||||
tp_metadata = self._download_json(
|
||||
update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
|
||||
|
||||
chapters = traverse_obj(tp_metadata, ('chapters', ..., {
|
||||
'start_time': ('startTime', {float_or_none(scale=1000)}),
|
||||
'end_time': ('endTime', {float_or_none(scale=1000)}),
|
||||
}))
|
||||
# prune pointless single chapters that span the entire duration from short videos
|
||||
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
|
||||
chapters = None
|
||||
|
||||
m3u8_url = self._request_webpage(HEADRequest(
|
||||
update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').url
|
||||
if 'mpeg_cenc' in m3u8_url:
|
||||
self.report_drm(video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': chapters,
|
||||
**merge_dicts(traverse_obj(tp_metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'timestamp': ('pubDate', {float_or_none(scale=1000)}),
|
||||
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
|
||||
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
|
||||
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),
|
||||
'episode': (('title', 'pl1$episodeNumber', 'nbcu$episodeNumber'), {str_or_none}),
|
||||
'age_limit': ('ratings', ..., 'rating', {parse_age_limit}),
|
||||
}, get_all=False), traverse_obj(metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('durationInSeconds', {int_or_none}),
|
||||
'timestamp': ('airDate', {unified_timestamp}),
|
||||
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'episode': 'episodeTitle',
|
||||
'series': 'show',
|
||||
})),
|
||||
}
|
@ -495,8 +495,6 @@ def _real_extract(self, url):
|
||||
|
||||
class BrightcoveNewBaseIE(AdobePassIE):
|
||||
def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
|
||||
title = json_data['name'].strip()
|
||||
|
||||
formats, subtitles = [], {}
|
||||
sources = json_data.get('sources') or []
|
||||
for source in sources:
|
||||
@ -600,16 +598,18 @@ def build_format_id(kind):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': clean_html(json_data.get('description')),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
'timestamp': parse_iso8601(json_data.get('published_at')),
|
||||
'uploader_id': json_data.get('account_id'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'tags': json_data.get('tags', []),
|
||||
'is_live': is_live,
|
||||
**traverse_obj(json_data, {
|
||||
'title': ('name', {clean_html}),
|
||||
'description': ('description', {clean_html}),
|
||||
'tags': ('tags', ..., {str}, filter, all, filter),
|
||||
'timestamp': ('published_at', {parse_iso8601}),
|
||||
'uploader_id': ('account_id', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
@ -645,10 +645,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
'uploader_id': '4036320279001',
|
||||
'formats': 'mincount:39',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# playlist stream
|
||||
'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001',
|
||||
@ -709,7 +706,6 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'TGD_01-032_5',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'tags': [],
|
||||
'timestamp': 1646078943,
|
||||
'uploader_id': '1569565978001',
|
||||
'upload_date': '20220228',
|
||||
@ -721,7 +717,6 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'TGD 01-087 (Airs 05.25.22)_Segment 5',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'tags': [],
|
||||
'timestamp': 1651604591,
|
||||
'uploader_id': '1569565978001',
|
||||
'upload_date': '20220503',
|
||||
@ -923,10 +918,18 @@ def extract_policy_key():
|
||||
errors = json_data.get('errors')
|
||||
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
|
||||
custom_fields = json_data['custom_fields']
|
||||
missing_fields = ', '.join(
|
||||
key for key in ('source_url', 'software_statement') if not smuggled_data.get(key))
|
||||
if missing_fields:
|
||||
raise ExtractorError(
|
||||
f'Missing fields in smuggled data: {missing_fields}. '
|
||||
f'This video can be only extracted from the webpage where it is embedded. '
|
||||
f'Pass the URL of the embedding webpage instead of the Brightcove URL', expected=True)
|
||||
tve_token = self._extract_mvpd_auth(
|
||||
smuggled_data['source_url'], video_id,
|
||||
custom_fields['bcadobepassrequestorid'],
|
||||
custom_fields['bcadobepassresourceid'])
|
||||
custom_fields['bcadobepassresourceid'],
|
||||
smuggled_data['software_statement'])
|
||||
json_data = self._download_json(
|
||||
api_url, video_id, headers={
|
||||
'Accept': f'application/json;pk={policy_key}',
|
||||
|
73
yt_dlp/extractor/btvplus.py
Normal file
73
yt_dlp/extractor/btvplus.py
Normal file
@ -0,0 +1,73 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
bug_reports_message,
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
strip_or_none,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BTVPlusIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?btvplus\.bg/produkt/(?:predavaniya|seriali|novini)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://btvplus.bg/produkt/predavaniya/67271/btv-reporterite/btv-reporterite-12-07-2025-g',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '67271',
|
||||
'title': 'bTV Репортерите - 12.07.2025 г.',
|
||||
'thumbnail': 'https://cdn.btv.bg/media/images/940x529/Jul2025/2113606319.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://btvplus.bg/produkt/seriali/66942/sezon-2/plen-sezon-2-epizod-55',
|
||||
'info_dict': {
|
||||
'ext': 'mp4',
|
||||
'id': '66942',
|
||||
'title': 'Плен - сезон 2, епизод 55',
|
||||
'thumbnail': 'https://cdn.btv.bg/media/images/940x529/Jun2025/2113595104.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://btvplus.bg/produkt/novini/67270/btv-novinite-centralna-emisija-12-07-2025',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_url = self._search_regex(
|
||||
r'var\s+videoUrl\s*=\s*[\'"]([^\'"]+)[\'"]',
|
||||
webpage, 'player URL')
|
||||
|
||||
player_config = self._download_json(
|
||||
urljoin('https://btvplus.bg', player_url), video_id)['config']
|
||||
|
||||
videojs_data = self._search_json(
|
||||
r'videojs\(["\'][^"\']+["\'],', player_config, 'videojs data',
|
||||
video_id, transform_source=js_to_json)
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for src in traverse_obj(videojs_data, ('sources', lambda _, v: url_or_none(v['src']))):
|
||||
ext = mimetype2ext(src.get('type'))
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
self.report_warning(f'Unknown format type {ext}{bug_reports_message()}')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'title': (
|
||||
strip_or_none(self._og_search_title(webpage, default=None))
|
||||
or clean_html(get_element_by_class('product-title', webpage))),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
}
|
@ -11,7 +11,7 @@
|
||||
|
||||
|
||||
class CloudyCDNIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?:)?//embed\.cloudycdn\.services/(?P<site_id>[^/?#]+)/media/(?P<id>[\w-]+)'
|
||||
_VALID_URL = r'(?:https?:)?//embed\.(?P<domain>cloudycdn\.services|backscreen\.com)/(?P<site_id>[^/?#]+)/media/(?P<id>[\w-]+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://embed.cloudycdn.services/ltv/media/46k_d23-6000-105?',
|
||||
@ -23,7 +23,7 @@ class CloudyCDNIE(InfoExtractor):
|
||||
'duration': 1442,
|
||||
'upload_date': '20231121',
|
||||
'title': 'D23-6000-105_cetstud',
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
|
||||
'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://embed.cloudycdn.services/izm/media/26e_lv-8-5-1',
|
||||
@ -33,7 +33,7 @@ class CloudyCDNIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'LV-8-5-1',
|
||||
'timestamp': 1669767167,
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/488306/placeholder1679423604.jpg',
|
||||
'thumbnail': 'https://store.bstrm.net/tmsp00120/assets/media/488306/placeholder1679423604.jpg',
|
||||
'duration': 1205,
|
||||
'upload_date': '20221130',
|
||||
},
|
||||
@ -48,9 +48,21 @@ class CloudyCDNIE(InfoExtractor):
|
||||
'duration': 1673,
|
||||
'title': 'D24-6000-074-cetstud',
|
||||
'timestamp': 1718902233,
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/788392/placeholder1718903938.jpg',
|
||||
'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/788392/placeholder1718903938.jpg',
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
}, {
|
||||
'url': 'https://embed.backscreen.com/ltv/media/32j_z25-0600-127?',
|
||||
'md5': '9b6fa09ac1a4de53d4f42b94affc3b42',
|
||||
'info_dict': {
|
||||
'id': '32j_z25-0600-127',
|
||||
'ext': 'mp4',
|
||||
'title': 'Z25-0600-127-DZ',
|
||||
'duration': 1906,
|
||||
'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/977427/placeholder1746633646.jpg',
|
||||
'timestamp': 1746632402,
|
||||
'upload_date': '20250507',
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
|
||||
@ -60,17 +72,17 @@ class CloudyCDNIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20230223',
|
||||
'duration': 629,
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/518407/placeholder1678748124.jpg',
|
||||
'thumbnail': 'https://store.bstrm.net/tmsp00120/assets/media/518407/placeholder1678748124.jpg',
|
||||
'timestamp': 1677181513,
|
||||
'title': 'LIB-2',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
site_id, video_id = self._match_valid_url(url).group('site_id', 'id')
|
||||
domain, site_id, video_id = self._match_valid_url(url).group('domain', 'site_id', 'id')
|
||||
|
||||
data = self._download_json(
|
||||
f'https://player.cloudycdn.services/player/{site_id}/media/{video_id}/',
|
||||
f'https://player.{domain}/player/{site_id}/media/{video_id}/',
|
||||
video_id, data=urlencode_postdata({
|
||||
'version': '6.4.0',
|
||||
'referer': url,
|
||||
|
@ -1,5 +1,6 @@
|
||||
import base64
|
||||
import collections
|
||||
import contextlib
|
||||
import functools
|
||||
import getpass
|
||||
import http.client
|
||||
@ -37,7 +38,6 @@
|
||||
TransportError,
|
||||
network_exceptions,
|
||||
)
|
||||
from ..networking.impersonate import ImpersonateTarget
|
||||
from ..utils import (
|
||||
IDENTITY,
|
||||
JSON_LD_RE,
|
||||
@ -101,6 +101,7 @@
|
||||
xpath_with_ns,
|
||||
)
|
||||
from ..utils._utils import _request_dump_filename
|
||||
from ..utils.jslib import devalue
|
||||
|
||||
|
||||
class InfoExtractor:
|
||||
@ -257,11 +258,19 @@ class InfoExtractor:
|
||||
* key The key (as hex) used to decrypt fragments.
|
||||
If `key` is given, any key URI will be ignored
|
||||
* iv The IV (as hex) used to decrypt fragments
|
||||
* impersonate Impersonate target(s). Can be any of the following entities:
|
||||
* an instance of yt_dlp.networking.impersonate.ImpersonateTarget
|
||||
* a string in the format of CLIENT[:OS]
|
||||
* a list or a tuple of CLIENT[:OS] strings or ImpersonateTarget instances
|
||||
* a boolean value; True means any impersonate target is sufficient
|
||||
* downloader_options A dictionary of downloader options
|
||||
(For internal use only)
|
||||
* http_chunk_size Chunk size for HTTP downloads
|
||||
* ffmpeg_args Extra arguments for ffmpeg downloader (input)
|
||||
* ffmpeg_args_out Extra arguments for ffmpeg downloader (output)
|
||||
* ws (NiconicoLiveFD only) WebSocketResponse
|
||||
* ws_url (NiconicoLiveFD only) Websockets URL
|
||||
* max_quality (NiconicoLiveFD only) Max stream quality string
|
||||
* is_dash_periods Whether the format is a result of merging
|
||||
multiple DASH periods.
|
||||
RTMP formats can also have the additional fields: page_url,
|
||||
@ -331,6 +340,7 @@ class InfoExtractor:
|
||||
* "name": Name or description of the subtitles
|
||||
* "http_headers": A dictionary of additional HTTP headers
|
||||
to add to the request.
|
||||
* "impersonate": Impersonate target(s); same as the "formats" field
|
||||
"ext" will be calculated from URL if missing
|
||||
automatic_captions: Like 'subtitles'; contains automatically generated
|
||||
captions instead of normal subtitles
|
||||
@ -387,6 +397,8 @@ class InfoExtractor:
|
||||
chapters: A list of dictionaries, with the following entries:
|
||||
* "start_time" - The start time of the chapter in seconds
|
||||
* "end_time" - The end time of the chapter in seconds
|
||||
(optional: core code can determine this value from
|
||||
the next chapter's start_time or the video's duration)
|
||||
* "title" (optional, string)
|
||||
heatmap: A list of dictionaries, with the following entries:
|
||||
* "start_time" - The start time of the data point in seconds
|
||||
@ -401,7 +413,8 @@ class InfoExtractor:
|
||||
'unlisted' or 'public'. Use 'InfoExtractor._availability'
|
||||
to set it
|
||||
media_type: The type of media as classified by the site, e.g. "episode", "clip", "trailer"
|
||||
_old_archive_ids: A list of old archive ids needed for backward compatibility
|
||||
_old_archive_ids: A list of old archive ids needed for backward
|
||||
compatibility. Use yt_dlp.utils.make_archive_id to generate ids
|
||||
_format_sort_fields: A list of fields to use for sorting formats
|
||||
__post_extractor: A function to be called just before the metadata is
|
||||
written to either disk, logger or console. The function
|
||||
@ -879,26 +892,17 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa
|
||||
|
||||
extensions = {}
|
||||
|
||||
if impersonate in (True, ''):
|
||||
impersonate = ImpersonateTarget()
|
||||
requested_targets = [
|
||||
t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t)
|
||||
for t in variadic(impersonate)
|
||||
] if impersonate else []
|
||||
|
||||
available_target = next(filter(self._downloader._impersonate_target_available, requested_targets), None)
|
||||
available_target, requested_targets = self._downloader._parse_impersonate_targets(impersonate)
|
||||
if available_target:
|
||||
extensions['impersonate'] = available_target
|
||||
elif requested_targets:
|
||||
message = 'The extractor is attempting impersonation, but '
|
||||
message += (
|
||||
'no impersonate target is available' if not str(impersonate)
|
||||
else f'none of these impersonate targets are available: "{", ".join(map(str, requested_targets))}"')
|
||||
info_msg = ('see https://github.com/yt-dlp/yt-dlp#impersonation '
|
||||
'for information on installing the required dependencies')
|
||||
msg = 'The extractor is attempting impersonation'
|
||||
if require_impersonation:
|
||||
raise ExtractorError(f'{message}; {info_msg}', expected=True)
|
||||
self.report_warning(f'{message}; if you encounter errors, then {info_msg}', only_once=True)
|
||||
raise ExtractorError(
|
||||
self._downloader._unavailable_targets_message(requested_targets, note=msg, is_error=True),
|
||||
expected=True)
|
||||
self.report_warning(
|
||||
self._downloader._unavailable_targets_message(requested_targets, note=msg), only_once=True)
|
||||
|
||||
try:
|
||||
return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query, extensions))
|
||||
@ -1675,9 +1679,9 @@ def extract_video_object(e):
|
||||
'ext': mimetype2ext(e.get('encodingFormat')),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'thumbnails': [{'url': unescapeHTML(url)}
|
||||
for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))
|
||||
if url_or_none(url)],
|
||||
'thumbnails': traverse_obj(e, (('thumbnailUrl', 'thumbnailURL', 'thumbnail_url'), (None, ...), {
|
||||
'url': ({str}, {unescapeHTML}, {self._proto_relative_url}, {url_or_none}),
|
||||
})),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
# author can be an instance of 'Organization' or 'Person' types.
|
||||
@ -1778,6 +1782,59 @@ def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAU
|
||||
r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
|
||||
video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)
|
||||
|
||||
def _search_nextjs_v13_data(self, webpage, video_id, fatal=True):
|
||||
"""Parses Next.js app router flight data that was introduced in Next.js v13"""
|
||||
nextjs_data = {}
|
||||
if not fatal and not isinstance(webpage, str):
|
||||
return nextjs_data
|
||||
|
||||
def flatten(flight_data):
|
||||
if not isinstance(flight_data, list):
|
||||
return
|
||||
if len(flight_data) == 4 and flight_data[0] == '$':
|
||||
_, name, _, data = flight_data
|
||||
if not isinstance(data, dict):
|
||||
return
|
||||
children = data.pop('children', None)
|
||||
if data and isinstance(name, str) and re.fullmatch(r'\$L[0-9a-f]+', name):
|
||||
# It is useful hydration JSON data
|
||||
nextjs_data[name[2:]] = data
|
||||
flatten(children)
|
||||
return
|
||||
for f in flight_data:
|
||||
flatten(f)
|
||||
|
||||
flight_text = ''
|
||||
# The pattern for the surrounding JS/tag should be strict as it's a hardcoded string in the next.js source
|
||||
# Ref: https://github.com/vercel/next.js/blob/5a4a08fdc/packages/next/src/server/app-render/use-flight-response.tsx#L189
|
||||
for flight_segment in re.findall(r'<script\b[^>]*>self\.__next_f\.push\((\[.+?\])\)</script>', webpage):
|
||||
segment = self._parse_json(flight_segment, video_id, fatal=fatal, errnote=None if fatal else False)
|
||||
# Some earlier versions of next.js "optimized" away this array structure; this is unsupported
|
||||
# Ref: https://github.com/vercel/next.js/commit/0123a9d5c9a9a77a86f135b7ae30b46ca986d761
|
||||
if not isinstance(segment, list) or len(segment) != 2:
|
||||
self.write_debug(
|
||||
f'{video_id}: Unsupported next.js flight data structure detected', only_once=True)
|
||||
continue
|
||||
# Only use the relevant payload type (1 == data)
|
||||
# Ref: https://github.com/vercel/next.js/blob/5a4a08fdc/packages/next/src/server/app-render/use-flight-response.tsx#L11-L14
|
||||
payload_type, chunk = segment
|
||||
if payload_type == 1:
|
||||
flight_text += chunk
|
||||
|
||||
for f in flight_text.splitlines():
|
||||
prefix, _, body = f.lstrip().partition(':')
|
||||
if not re.fullmatch(r'[0-9a-f]+', prefix):
|
||||
continue
|
||||
# The body still isn't guaranteed to be valid JSON, so parsing should always be non-fatal
|
||||
if body.startswith('[') and body.endswith(']'):
|
||||
flatten(self._parse_json(body, video_id, fatal=False, errnote=False))
|
||||
elif body.startswith('{') and body.endswith('}'):
|
||||
data = self._parse_json(body, video_id, fatal=False, errnote=False)
|
||||
if data is not None:
|
||||
nextjs_data[prefix] = data
|
||||
|
||||
return nextjs_data
|
||||
|
||||
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
|
||||
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
||||
rectx = re.escape(context_name)
|
||||
@ -1795,6 +1852,63 @@ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal
|
||||
ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
|
||||
return traverse_obj(ret, traverse) or {}
|
||||
|
||||
def _resolve_nuxt_array(self, array, video_id, *, fatal=True, default=NO_DEFAULT):
|
||||
"""Resolves Nuxt rich JSON payload arrays"""
|
||||
# Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57
|
||||
# https://github.com/nuxt/nuxt/pull/19205
|
||||
if default is not NO_DEFAULT:
|
||||
fatal = False
|
||||
|
||||
if not isinstance(array, list) or not array:
|
||||
error_msg = 'Unable to resolve Nuxt JSON data: invalid input'
|
||||
if fatal:
|
||||
raise ExtractorError(error_msg, video_id=video_id)
|
||||
elif default is NO_DEFAULT:
|
||||
self.report_warning(error_msg, video_id=video_id)
|
||||
return {} if default is NO_DEFAULT else default
|
||||
|
||||
def indirect_reviver(data):
|
||||
return data
|
||||
|
||||
def json_reviver(data):
|
||||
return json.loads(data)
|
||||
|
||||
gen = devalue.parse_iter(array, revivers={
|
||||
'NuxtError': indirect_reviver,
|
||||
'EmptyShallowRef': json_reviver,
|
||||
'EmptyRef': json_reviver,
|
||||
'ShallowRef': indirect_reviver,
|
||||
'ShallowReactive': indirect_reviver,
|
||||
'Ref': indirect_reviver,
|
||||
'Reactive': indirect_reviver,
|
||||
})
|
||||
|
||||
while True:
|
||||
try:
|
||||
error_msg = f'Error resolving Nuxt JSON: {gen.send(None)}'
|
||||
if fatal:
|
||||
raise ExtractorError(error_msg, video_id=video_id)
|
||||
elif default is NO_DEFAULT:
|
||||
self.report_warning(error_msg, video_id=video_id, only_once=True)
|
||||
else:
|
||||
self.write_debug(f'{video_id}: {error_msg}', only_once=True)
|
||||
except StopIteration as error:
|
||||
return error.value or ({} if default is NO_DEFAULT else default)
|
||||
|
||||
def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT):
|
||||
"""Parses metadata from Nuxt rich JSON payloads embedded in HTML"""
|
||||
passed_default = default is not NO_DEFAULT
|
||||
|
||||
array = self._search_json(
|
||||
r'<script\b[^>]+\bid="__NUXT_DATA__"[^>]*>', webpage,
|
||||
'Nuxt JSON data', video_id, contains_pattern=r'\[(?s:.+)\]',
|
||||
fatal=fatal, default=NO_DEFAULT if not passed_default else None)
|
||||
|
||||
if not array:
|
||||
return default if passed_default else {}
|
||||
|
||||
return self._resolve_nuxt_array(array, video_id, fatal=fatal, default=default)
|
||||
|
||||
@staticmethod
|
||||
def _hidden_inputs(html):
|
||||
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
|
||||
@ -2068,21 +2182,33 @@ def _extract_m3u8_formats_and_subtitles(
|
||||
raise ExtractorError(errnote, video_id=video_id)
|
||||
self.report_warning(f'{errnote}{bug_reports_message()}')
|
||||
return [], {}
|
||||
|
||||
res = self._download_webpage_handle(
|
||||
m3u8_url, video_id,
|
||||
note='Downloading m3u8 information' if note is None else note,
|
||||
errnote='Failed to download m3u8 information' if errnote is None else errnote,
|
||||
if note is None:
|
||||
note = 'Downloading m3u8 information'
|
||||
if errnote is None:
|
||||
errnote = 'Failed to download m3u8 information'
|
||||
response = self._request_webpage(
|
||||
m3u8_url, video_id, note=note, errnote=errnote,
|
||||
fatal=fatal, data=data, headers=headers, query=query)
|
||||
|
||||
if res is False:
|
||||
if response is False:
|
||||
return [], {}
|
||||
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.url
|
||||
with contextlib.closing(response):
|
||||
prefix = response.read(512)
|
||||
if not prefix.startswith(b'#EXTM3U'):
|
||||
msg = 'Response data has no m3u header'
|
||||
if fatal:
|
||||
raise ExtractorError(msg, video_id=video_id)
|
||||
self.report_warning(f'{msg}{bug_reports_message()}', video_id=video_id)
|
||||
return [], {}
|
||||
|
||||
content = self._webpage_read_content(
|
||||
response, m3u8_url, video_id, note=note, errnote=errnote,
|
||||
fatal=fatal, prefix=prefix, data=data)
|
||||
if content is False:
|
||||
return [], {}
|
||||
|
||||
return self._parse_m3u8_formats_and_subtitles(
|
||||
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
|
||||
content, response.url, ext=ext, entry_protocol=entry_protocol,
|
||||
preference=preference, quality=quality, m3u8_id=m3u8_id,
|
||||
note=note, errnote=errnote, fatal=fatal, live=live, data=data,
|
||||
headers=headers, query=query, video_id=video_id)
|
||||
|
@ -1,49 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class CTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P<id>(?:show|movie)s/[^/]+/[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88',
|
||||
'info_dict': {
|
||||
'id': '2102249',
|
||||
'ext': 'flv',
|
||||
'title': 'Wednesday, December 23, 2020',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.',
|
||||
'timestamp': 1608732000,
|
||||
'upload_date': '20201223',
|
||||
'series': 'Your Morning',
|
||||
'season': '2020-2021',
|
||||
'season_number': 5,
|
||||
'episode_number': 88,
|
||||
'tags': ['Your Morning'],
|
||||
'categories': ['Talk Show'],
|
||||
'duration': 7467.126,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
content = self._download_json(
|
||||
'https://www.ctv.ca/space-graphql/graphql', display_id, query={
|
||||
'query': '''{
|
||||
resolvedPath(path: "/%s") {
|
||||
lastSegment {
|
||||
content {
|
||||
... on AxisContent {
|
||||
axisId
|
||||
videoPlayerDestCode
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}''' % display_id, # noqa: UP031
|
||||
})['data']['resolvedPath']['lastSegment']['content']
|
||||
video_id = content['axisId']
|
||||
return self.url_result(
|
||||
'9c9media:{}:{}'.format(content['videoPlayerDestCode'], video_id),
|
||||
'NineCNineMedia', video_id)
|
@ -11,8 +11,14 @@
|
||||
|
||||
class DangalPlayBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'dangalplay'
|
||||
_REGION = 'IN'
|
||||
_OTV_USER_ID = None
|
||||
_LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage'
|
||||
_LOGIN_HINT = (
|
||||
'Pass credentials as -u "token" -p "USER_ID" '
|
||||
'(where USER_ID is the value of "otv_user_id" in your browser local storage). '
|
||||
'Your login region can be optionally suffixed to the username as @REGION '
|
||||
'(where REGION is the two-letter "region" code found in your browser local storage), '
|
||||
'e.g.: -u "token@IN" -p "USER_ID"')
|
||||
_API_BASE = 'https://ottapi.dangalplay.com'
|
||||
_AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js
|
||||
_SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above
|
||||
@ -20,8 +26,12 @@ class DangalPlayBaseIE(InfoExtractor):
|
||||
def _perform_login(self, username, password):
|
||||
if self._OTV_USER_ID:
|
||||
return
|
||||
if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password):
|
||||
mobj = re.fullmatch(r'token(?:@(?P<region>[A-Z]{2}))?', username)
|
||||
if not mobj or not re.fullmatch(r'[\da-f]{32}', password):
|
||||
raise ExtractorError(self._LOGIN_HINT, expected=True)
|
||||
if region := mobj.group('region'):
|
||||
self._REGION = region
|
||||
self.write_debug(f'Setting login region to "{self._REGION}"')
|
||||
self._OTV_USER_ID = password
|
||||
|
||||
def _real_initialize(self):
|
||||
@ -52,7 +62,7 @@ def _call_api(self, path, display_id, note='Downloading JSON metadata', fatal=Tr
|
||||
f'{self._API_BASE}/{path}', display_id, note, fatal=fatal,
|
||||
headers={'Accept': 'application/json'}, query={
|
||||
'auth_token': self._AUTH_TOKEN,
|
||||
'region': 'IN',
|
||||
'region': self._REGION,
|
||||
**query,
|
||||
})
|
||||
|
||||
@ -106,7 +116,7 @@ def _generate_api_data(self, data):
|
||||
'catalog_id': catalog_id,
|
||||
'content_id': content_id,
|
||||
'category': '',
|
||||
'region': 'IN',
|
||||
'region': self._REGION,
|
||||
'auth_token': self._AUTH_TOKEN,
|
||||
'id': self._OTV_USER_ID,
|
||||
'md5': hashlib.md5(unhashed.encode()).hexdigest(),
|
||||
@ -129,11 +139,14 @@ def _real_extract(self, url):
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 422:
|
||||
error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {}
|
||||
if error_info.get('code') == '1016':
|
||||
error_code = error_info.get('code')
|
||||
if error_code == '1016':
|
||||
self.raise_login_required(
|
||||
f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None)
|
||||
elif msg := error_info.get('message'):
|
||||
raise ExtractorError(msg)
|
||||
elif error_code == '4028':
|
||||
self.raise_login_required(
|
||||
f'Your login region is unspecified or incorrect. {self._LOGIN_HINT}', method=None)
|
||||
raise ExtractorError(join_nonempty(error_code, error_info.get('message'), delim=': '))
|
||||
raise
|
||||
|
||||
m3u8_url = traverse_obj(details, (
|
||||
|
@ -206,7 +206,7 @@ def _real_extract(self, url):
|
||||
'is_live': True,
|
||||
**traverse_obj(room, {
|
||||
'display_id': ('url', {str}, {lambda i: i[1:]}),
|
||||
'title': ('room_name', {unescapeHTML}),
|
||||
'title': ('room_name', {str}, {unescapeHTML}),
|
||||
'description': ('show_details', {str}),
|
||||
'uploader': ('nickname', {str}),
|
||||
'thumbnail': ('room_src', {url_or_none}),
|
||||
|
@ -64,7 +64,7 @@ class DreiSatIE(ZDFBaseIE):
|
||||
'title': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1',
|
||||
'description': 'md5:bae51bfc22f15563ce3acbf97d2e8844',
|
||||
'duration': 5399.0,
|
||||
'thumbnail': 'https://www.3sat.de/assets/buchmesse-kerkeling-100~original?cb=1743329640903',
|
||||
'thumbnail': 'https://www.3sat.de/assets/buchmesse-kerkeling-100~original?cb=1747256996338',
|
||||
'chapters': 'count:24',
|
||||
'episode': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1',
|
||||
'episode_id': 'POS_1ef236cc-b390-401e-acd0-4fb4b04315fb',
|
||||
|
@ -329,6 +329,7 @@ class WatchESPNIE(AdobePassIE):
|
||||
}]
|
||||
|
||||
_API_KEY = 'ZXNwbiZicm93c2VyJjEuMC4w.ptUt7QxsteaRruuPmGZFaJByOoqKvDP2a5YkInHrc7c'
|
||||
_SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIyZGJmZWM4My03OWE1LTQyNzEtYTVmZC04NTZjYTMxMjRjNjMiLCJuYmYiOjE1NDAyMTI3NjEsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTQwMjEyNzYxfQ.yaK3r4AI2uLVvsyN1GLzqzgzRlxMPtasSaiYYBV0wIstqih5tvjTmeoLmi8Xy9Kp_U7Md-bOffwiyK3srHkpUkhhwXLH2x6RPjmS1tPmhaG7-3LBcHTf2ySPvXhVf7cN4ngldawK4tdtLtsw6rF_JoZE2yaC6XbS2F51nXSFEDDnOQWIHEQRG3aYAj-38P2CLGf7g-Yfhbp5cKXeksHHQ90u3eOO4WH0EAjc9oO47h33U8KMEXxJbvjV5J8Va2G2fQSgLDZ013NBI3kQnE313qgqQh2feQILkyCENpB7g-TVBreAjOaH1fU471htSoGGYepcAXv-UDtpgitDiLy7CQ'
|
||||
|
||||
def _call_bamgrid_api(self, path, video_id, payload=None, headers={}):
|
||||
if 'Authorization' not in headers:
|
||||
@ -405,8 +406,8 @@ def _real_extract(self, url):
|
||||
|
||||
# TV Provider required
|
||||
else:
|
||||
resource = self._get_mvpd_resource('ESPN', video_data['name'], video_id, None)
|
||||
auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource).encode()
|
||||
resource = self._get_mvpd_resource('espn1', video_data['name'], video_id, None)
|
||||
auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource, self._SOFTWARE_STATEMENT).encode()
|
||||
|
||||
asset = self._download_json(
|
||||
f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb',
|
||||
|
92
yt_dlp/extractor/faulio.py
Normal file
92
yt_dlp/extractor/faulio.py
Normal file
@ -0,0 +1,92 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class FaulioLiveIE(InfoExtractor):
|
||||
_DOMAINS = (
|
||||
'aloula.sba.sa',
|
||||
'maraya.sba.net.ae',
|
||||
'sat7plus.org',
|
||||
)
|
||||
_VALID_URL = fr'https?://(?:{"|".join(map(re.escape, _DOMAINS))})/(?:(?:en|ar|fa)/)?live/(?P<id>[a-zA-Z0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://aloula.sba.sa/live/saudiatv',
|
||||
'info_dict': {
|
||||
'id': 'aloula.faulio.com_saudiatv',
|
||||
'title': str,
|
||||
'description': str,
|
||||
'ext': 'mp4',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://maraya.sba.net.ae/live/1',
|
||||
'info_dict': {
|
||||
'id': 'maraya.faulio.com_1',
|
||||
'title': str,
|
||||
'description': str,
|
||||
'ext': 'mp4',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://sat7plus.org/live/pars',
|
||||
'info_dict': {
|
||||
'id': 'sat7.faulio.com_pars',
|
||||
'title': str,
|
||||
'description': str,
|
||||
'ext': 'mp4',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://sat7plus.org/fa/live/arabic',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
config_data = self._search_json(
|
||||
r'window\.__NUXT__\.config=', webpage, 'config', video_id, transform_source=js_to_json)
|
||||
api_base = config_data['public']['TRANSLATIONS_API_URL']
|
||||
|
||||
channel = traverse_obj(
|
||||
self._download_json(f'{api_base}/channels', video_id),
|
||||
(lambda k, v: v['url'] == video_id, any))
|
||||
|
||||
formats = []
|
||||
subtitles = {}
|
||||
if hls_url := traverse_obj(channel, ('streams', 'hls', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_url, video_id, 'mp4', m3u8_id='hls', live=True, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
if mpd_url := traverse_obj(channel, ('streams', 'mpd', {url_or_none})):
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
mpd_url, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
'id': f'{urllib.parse.urlparse(api_base).hostname}_{video_id}',
|
||||
**traverse_obj(channel, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': True,
|
||||
}
|
@ -17,8 +17,140 @@
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class FloatplaneIE(InfoExtractor):
|
||||
class FloatplaneBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
post_id = self._match_id(url)
|
||||
|
||||
post_data = self._download_json(
|
||||
f'{self._BASE_URL}/api/v3/content/post', post_id, query={'id': post_id},
|
||||
note='Downloading post data', errnote='Unable to download post data',
|
||||
impersonate=self._IMPERSONATE_TARGET)
|
||||
|
||||
if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))):
|
||||
raise ExtractorError('Post does not contain a video or audio track', expected=True)
|
||||
|
||||
uploader_url = format_field(
|
||||
post_data, [('creator', 'urlname')], f'{self._BASE_URL}/channel/%s/home') or None
|
||||
|
||||
common_info = {
|
||||
'uploader_url': uploader_url,
|
||||
'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))),
|
||||
'availability': self._availability(needs_subscription=True),
|
||||
**traverse_obj(post_data, {
|
||||
'uploader': ('creator', 'title', {str}),
|
||||
'uploader_id': ('creator', 'id', {str}),
|
||||
'channel': ('channel', 'title', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
'release_timestamp': ('releaseDate', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
items = []
|
||||
for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)):
|
||||
media_id = media['id']
|
||||
media_typ = media.get('type') or 'video'
|
||||
|
||||
metadata = self._download_json(
|
||||
f'{self._BASE_URL}/api/v3/content/{media_typ}', media_id, query={'id': media_id},
|
||||
note=f'Downloading {media_typ} metadata', impersonate=self._IMPERSONATE_TARGET)
|
||||
|
||||
stream = self._download_json(
|
||||
f'{self._BASE_URL}/api/v2/cdn/delivery', media_id, query={
|
||||
'type': 'vod' if media_typ == 'video' else 'aod',
|
||||
'guid': metadata['guid'],
|
||||
}, note=f'Downloading {media_typ} stream data',
|
||||
impersonate=self._IMPERSONATE_TARGET)
|
||||
|
||||
path_template = traverse_obj(stream, ('resource', 'uri', {str}))
|
||||
|
||||
def format_path(params):
|
||||
path = path_template
|
||||
for i, val in (params or {}).items():
|
||||
path = path.replace(f'{{qualityLevelParams.{i}}}', val)
|
||||
return path
|
||||
|
||||
formats = []
|
||||
for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)):
|
||||
url = urljoin(stream['cdn'], format_path(traverse_obj(
|
||||
stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict}))))
|
||||
format_id = traverse_obj(quality, ('name', {str}))
|
||||
hls_aes = {}
|
||||
m3u8_data = None
|
||||
|
||||
# If we need impersonation for the API, then we need it for HLS keys too: extract in advance
|
||||
if self._IMPERSONATE_TARGET is not None:
|
||||
m3u8_data = self._download_webpage(
|
||||
url, media_id, fatal=False, impersonate=self._IMPERSONATE_TARGET, headers=self._HEADERS,
|
||||
note=join_nonempty('Downloading', format_id, 'm3u8 information', delim=' '),
|
||||
errnote=join_nonempty('Failed to download', format_id, 'm3u8 information', delim=' '))
|
||||
if not m3u8_data:
|
||||
continue
|
||||
|
||||
key_url = self._search_regex(
|
||||
r'#EXT-X-KEY:METHOD=AES-128,URI="(https?://[^"]+)"',
|
||||
m3u8_data, 'HLS AES key URI', default=None)
|
||||
if key_url:
|
||||
urlh = self._request_webpage(
|
||||
key_url, media_id, fatal=False, impersonate=self._IMPERSONATE_TARGET, headers=self._HEADERS,
|
||||
note=join_nonempty('Downloading', format_id, 'HLS AES key', delim=' '),
|
||||
errnote=join_nonempty('Failed to download', format_id, 'HLS AES key', delim=' '))
|
||||
if urlh:
|
||||
hls_aes['key'] = urlh.read().hex()
|
||||
|
||||
formats.append({
|
||||
**traverse_obj(quality, {
|
||||
'format_note': ('label', {str}),
|
||||
'width': ('width', {int}),
|
||||
'height': ('height', {int}),
|
||||
}),
|
||||
**parse_codecs(quality.get('codecs')),
|
||||
'url': url,
|
||||
'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'),
|
||||
'format_id': format_id,
|
||||
'hls_media_playlist_data': m3u8_data,
|
||||
'hls_aes': hls_aes or None,
|
||||
})
|
||||
items.append({
|
||||
**common_info,
|
||||
'id': media_id,
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
post_info = {
|
||||
**common_info,
|
||||
'id': post_id,
|
||||
'display_id': post_id,
|
||||
**traverse_obj(post_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('text', {clean_html}),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'dislike_count': ('dislikes', {int_or_none}),
|
||||
'comment_count': ('comments', {int_or_none}),
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
'http_headers': self._HEADERS,
|
||||
}
|
||||
|
||||
if len(items) > 1:
|
||||
return self.playlist_result(items, **post_info)
|
||||
|
||||
post_info.update(items[0])
|
||||
return post_info
|
||||
|
||||
|
||||
class FloatplaneIE(FloatplaneBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/post/(?P<id>\w+)'
|
||||
_BASE_URL = 'https://www.floatplane.com'
|
||||
_IMPERSONATE_TARGET = None
|
||||
_HEADERS = {
|
||||
'Origin': _BASE_URL,
|
||||
'Referer': f'{_BASE_URL}/',
|
||||
}
|
||||
_TESTS = [{
|
||||
'url': 'https://www.floatplane.com/post/2Yf3UedF7C',
|
||||
'info_dict': {
|
||||
@ -170,105 +302,9 @@ class FloatplaneIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._get_cookies('https://www.floatplane.com').get('sails.sid'):
|
||||
if not self._get_cookies(self._BASE_URL).get('sails.sid'):
|
||||
self.raise_login_required()
|
||||
|
||||
def _real_extract(self, url):
|
||||
post_id = self._match_id(url)
|
||||
|
||||
post_data = self._download_json(
|
||||
'https://www.floatplane.com/api/v3/content/post', post_id, query={'id': post_id},
|
||||
note='Downloading post data', errnote='Unable to download post data')
|
||||
|
||||
if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))):
|
||||
raise ExtractorError('Post does not contain a video or audio track', expected=True)
|
||||
|
||||
uploader_url = format_field(
|
||||
post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
|
||||
|
||||
common_info = {
|
||||
'uploader_url': uploader_url,
|
||||
'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))),
|
||||
'availability': self._availability(needs_subscription=True),
|
||||
**traverse_obj(post_data, {
|
||||
'uploader': ('creator', 'title', {str}),
|
||||
'uploader_id': ('creator', 'id', {str}),
|
||||
'channel': ('channel', 'title', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
'release_timestamp': ('releaseDate', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
items = []
|
||||
for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)):
|
||||
media_id = media['id']
|
||||
media_typ = media.get('type') or 'video'
|
||||
|
||||
metadata = self._download_json(
|
||||
f'https://www.floatplane.com/api/v3/content/{media_typ}', media_id, query={'id': media_id},
|
||||
note=f'Downloading {media_typ} metadata')
|
||||
|
||||
stream = self._download_json(
|
||||
'https://www.floatplane.com/api/v2/cdn/delivery', media_id, query={
|
||||
'type': 'vod' if media_typ == 'video' else 'aod',
|
||||
'guid': metadata['guid'],
|
||||
}, note=f'Downloading {media_typ} stream data')
|
||||
|
||||
path_template = traverse_obj(stream, ('resource', 'uri', {str}))
|
||||
|
||||
def format_path(params):
|
||||
path = path_template
|
||||
for i, val in (params or {}).items():
|
||||
path = path.replace(f'{{qualityLevelParams.{i}}}', val)
|
||||
return path
|
||||
|
||||
formats = []
|
||||
for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)):
|
||||
url = urljoin(stream['cdn'], format_path(traverse_obj(
|
||||
stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict}))))
|
||||
formats.append({
|
||||
**traverse_obj(quality, {
|
||||
'format_id': ('name', {str}),
|
||||
'format_note': ('label', {str}),
|
||||
'width': ('width', {int}),
|
||||
'height': ('height', {int}),
|
||||
}),
|
||||
**parse_codecs(quality.get('codecs')),
|
||||
'url': url,
|
||||
'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'),
|
||||
})
|
||||
|
||||
items.append({
|
||||
**common_info,
|
||||
'id': media_id,
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
post_info = {
|
||||
**common_info,
|
||||
'id': post_id,
|
||||
'display_id': post_id,
|
||||
**traverse_obj(post_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('text', {clean_html}),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'dislike_count': ('dislikes', {int_or_none}),
|
||||
'comment_count': ('comments', {int_or_none}),
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
if len(items) > 1:
|
||||
return self.playlist_result(items, **post_info)
|
||||
|
||||
post_info.update(items[0])
|
||||
return post_info
|
||||
|
||||
|
||||
class FloatplaneChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/channel/(?P<id>[\w-]+)/home(?:/(?P<channel>[\w-]+))?'
|
||||
|
@ -1,9 +1,7 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
@ -58,16 +56,7 @@ class FrancaisFacileIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = urllib.parse.unquote(self._match_id(url))
|
||||
|
||||
try: # yt-dlp's default user-agents are too old and blocked by the site
|
||||
webpage = self._download_webpage(url, display_id, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
|
||||
raise
|
||||
# Retry with impersonation if hardcoded UA is insufficient
|
||||
webpage = self._download_webpage(url, display_id, impersonate=True)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
data = self._search_json(
|
||||
r'<script[^>]+\bdata-media-id=[^>]+\btype="application/json"[^>]*>',
|
||||
|
@ -1,4 +1,3 @@
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
@ -19,7 +18,11 @@
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
get_first,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||
@ -121,9 +124,10 @@ def _extract_video(self, video_id, hostname=None):
|
||||
elif code := traverse_obj(dinfo, ('code', {int})):
|
||||
if code == 2009:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
elif code in (2015, 2017):
|
||||
elif code in (2015, 2017, 2019):
|
||||
# 2015: L'accès à cette vidéo est impossible. (DRM-only)
|
||||
# 2017: Cette vidéo n'est pas disponible depuis le site web mobile (b/c DRM)
|
||||
# 2019: L'accès à cette vidéo est incompatible avec votre configuration. (DRM-only)
|
||||
drm_formats = True
|
||||
continue
|
||||
self.report_warning(
|
||||
@ -258,7 +262,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
_TESTS = [{
|
||||
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
|
||||
'info_dict': {
|
||||
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', # old: c5bda21d-2c6f-4470-8849-3d8327adb2ba'
|
||||
'id': 'b2cf9fd8-e971-4757-8651-848f2772df61', # old: ec217ecc-0733-48cf-ac06-af1347b849d1
|
||||
'ext': 'mp4',
|
||||
'title': '13h15, le dimanche... - Les mystères de Jésus',
|
||||
'timestamp': 1502623500,
|
||||
@ -269,7 +273,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [FranceTVIE.ie_key()],
|
||||
'skip': 'Unfortunately, this video is no longer available',
|
||||
}, {
|
||||
# geo-restricted
|
||||
'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html',
|
||||
@ -287,7 +291,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 1441,
|
||||
},
|
||||
'skip': 'No longer available',
|
||||
'skip': 'Unfortunately, this video is no longer available',
|
||||
}, {
|
||||
# geo-restricted livestream (workflow == 'token-akamai')
|
||||
'url': 'https://www.france.tv/france-4/direct.html',
|
||||
@ -308,6 +312,19 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {'skip_download': 'livestream'},
|
||||
}, {
|
||||
# Not geo-restricted
|
||||
'url': 'https://www.france.tv/france-2/la-maison-des-maternelles/5574051-nous-sommes-amis-et-nous-avons-fait-un-enfant-ensemble.html',
|
||||
'info_dict': {
|
||||
'id': 'b448bfe4-9fe7-11ee-97d8-2ba3426fa3df',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nous sommes amis et nous avons fait un enfant ensemble - Émission du jeudi 21 décembre 2023',
|
||||
'duration': 1065,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1703147921,
|
||||
'upload_date': '20231221',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# france3
|
||||
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
|
||||
@ -342,30 +359,16 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.goplay
|
||||
def _find_json(self, s):
|
||||
return self._search_json(
|
||||
r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
nextjs_data = self._search_nextjs_v13_data(webpage, display_id)
|
||||
|
||||
nextjs_data = traverse_obj(
|
||||
re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
|
||||
(..., {json.loads}, ..., {self._find_json}, ..., 'children', ..., ..., 'children', ..., ..., 'children'))
|
||||
|
||||
if traverse_obj(nextjs_data, (..., ..., 'children', ..., 'isLive', {bool}, any)):
|
||||
if get_first(nextjs_data, ('isLive', {bool})):
|
||||
# For livestreams we need the id of the stream instead of the currently airing episode id
|
||||
video_id = traverse_obj(nextjs_data, (
|
||||
..., ..., 'children', ..., 'children', ..., 'children', ..., 'children', ..., ...,
|
||||
'children', ..., ..., 'children', ..., ..., 'children', (..., (..., ...)),
|
||||
'options', 'id', {str}, any))
|
||||
video_id = get_first(nextjs_data, ('options', 'id', {str}))
|
||||
else:
|
||||
video_id = traverse_obj(nextjs_data, (
|
||||
..., ..., ..., 'children',
|
||||
lambda _, v: v['video']['url'] == urllib.parse.urlparse(url).path,
|
||||
'video', ('playerReplayId', 'siId'), {str}, any))
|
||||
video_id = get_first(nextjs_data, ('video', ('playerReplayId', 'siId'), {str}))
|
||||
|
||||
if not video_id:
|
||||
raise ExtractorError('Unable to extract video ID')
|
||||
|
@ -1481,30 +1481,6 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['SenateISVP'],
|
||||
},
|
||||
{
|
||||
# Limelight embeds (1 channel embed + 4 media embeds)
|
||||
'url': 'http://www.sedona.com/FacilitatorTraining2017',
|
||||
'info_dict': {
|
||||
'id': 'FacilitatorTraining2017',
|
||||
'title': 'Facilitator Training 2017',
|
||||
},
|
||||
'playlist_mincount': 5,
|
||||
},
|
||||
{
|
||||
# Limelight embed (LimelightPlayerUtil.embed)
|
||||
'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
|
||||
'info_dict': {
|
||||
'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
|
||||
'ext': 'mp4',
|
||||
'title': '07448641',
|
||||
'timestamp': 1499890639,
|
||||
'upload_date': '20170712',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['LimelightMedia'],
|
||||
},
|
||||
{
|
||||
'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
|
||||
'info_dict': {
|
||||
|
@ -7,161 +7,157 @@
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
remove_start,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class GoIE(AdobePassIE):
|
||||
_SITE_INFO = {
|
||||
'abc': {
|
||||
'brand': '001',
|
||||
'requestor_id': 'ABC',
|
||||
'requestor_id': 'dtci',
|
||||
'provider_id': 'ABC',
|
||||
'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI4OTcwMjlkYS0yYjM1LTQyOWUtYWQ0NS02ZjZiZjVkZTdhOTUiLCJuYmYiOjE2MjAxNzM5NjksImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNjIwMTczOTY5fQ.SC69DVJWSL8sIe-vVUrP6xS_kzHKqwz9PdKYexs_y-f7Vin6mM-7S-W1TE_-K55O0pyf-TL4xYgvm6LIye8CckG-nZfVwNPV4huduov0jmIcxCQFeUwkHULG2IaA44wfBVUBdaHgkhPweZ2amjycO_IXtez-gBXOLbE3B7Gx9j_5ISCFtyVUblThKfoGyQv6KT6t8Vpmc4ZSKCCQp74KWFFypydb9ucego1taW_nQD06Cdf4yByLd6NaTBceMcIKbug9b9gxFm3XBgJ5q3z7KGo1Kr6XalAV5j4m-fQ91wczlTilX8FM4AljMupyRM9mA_aEADILQ4hS79q4SM0w6w',
|
||||
},
|
||||
'freeform': {
|
||||
'brand': '002',
|
||||
'requestor_id': 'ABCFamily',
|
||||
},
|
||||
'watchdisneychannel': {
|
||||
'brand': '004',
|
||||
'resource_id': 'Disney',
|
||||
},
|
||||
'watchdisneyjunior': {
|
||||
'brand': '008',
|
||||
'resource_id': 'DisneyJunior',
|
||||
},
|
||||
'watchdisneyxd': {
|
||||
'brand': '009',
|
||||
'resource_id': 'DisneyXD',
|
||||
'provider_id': 'ABCFamily',
|
||||
'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZWM2MGYyNC0xYzRjLTQ1NzQtYjc0Zi03ZmM4N2E5YWMzMzgiLCJuYmYiOjE1ODc2NjU5MjMsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTg3NjY1OTIzfQ.flCn3dhvmvPnWmV0JV8Fm0YFyj07yPez9-n1GFEwVIm_S2wQVWbWyJhqsAyLZVFrhOMZYTqmPS3OHxGwTwXkEYn6PD7o_vIVG3oqi-Xn1m5jRt_Gazw5qEtpat6VE7bvKGSD3ZhcidOrsCk8NcYyq75u61NHDvSl81pcedJjVRVUpsqrEwmo0aVbA0C8PX3ri0mEbGvkMKvHn8E60xp-PSE-VK8SDT0plwPu_TwUszkZ6-_I8_2xcv_WBqcXFkAVg7Q-iNJXgQvmNsrpcrYuLvi6hEH4ZLtoDcXU6MhwTQAJTiHSo8x9aHX1_qFP09CzlNOFQbC2ZEJdP9SvA53SLQ',
|
||||
},
|
||||
'disneynow': {
|
||||
'brand': '011',
|
||||
'brand': '011', # also: '004', '008', '009'
|
||||
'requestor_id': 'DisneyChannels',
|
||||
'provider_id': 'DisneyChannels',
|
||||
'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI1MzAzNTRiOS04NDNiLTRkNjAtYTQ3ZS0yNzk1MzlkOTIyNTciLCJuYmYiOjE1NTg5ODc0NDksImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTU4OTg3NDQ5fQ.Jud6YS6-J2h0h6po0oMheDym0qRTJQGj4kzacrz4DFuEwhcBkkykW6pF5pKuAUJy9HCZ40oDAHe2KcTlDJjCZF5tDaUEfdihakZ9cC_rG7MU-QoRne8qaB_dPDKwGuk-ZyWD8eV3zwTJmbGo8hDxYTEU81YNCxwhyc_BPDr5TYiubbmpP3_pTnXmSpuL58isJ2peSKWlX9BacuXtBY25c_QnPFKk-_EETm7IHkTpDazde1QfHWGu4s4yJpKGk8RVVujVG6h6ELlL-ZeYLilBm7iS7h1TYG1u7fJhyZRL7isaom6NvAzsvN3ngss1fLwt8decP8wzdFHrbYTdTjW8qw',
|
||||
'resource_id': 'Disney',
|
||||
},
|
||||
'fxnow.fxnetworks': {
|
||||
'brand': '025',
|
||||
'fxnetworks': {
|
||||
'brand': '025', # also: '020'
|
||||
'requestor_id': 'dtci',
|
||||
'provider_id': 'fx', # also 'fxx', 'fxm'
|
||||
'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIzYWRhYWZiNC02OTAxLTRlYzktOTdmNy1lYWZkZTJkODJkN2EiLCJuYmYiOjE1NjIwMjQwNzYsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTYyMDI0MDc2fQ.dhKMpZK50AObbZYrMiYPSfWtzXHUaeMP3jrIY4Cgfvh0GaEgk0Mns_zp78jypFeZgRtPVleQMQDNq2YEloRLcAGqP1aa6WVDglnK77ZWUm4IKai14Rwf3A6YBhSRoO2_lMmUGkuTf6gZY-kMIPqBYKqzTQiQl4HbniPFodIzFRiuI9QJVrkoyTGrJL4oqiX08PoFI3Z-TOti1Heu3EbFC-GveQHhlinYrzU7rbiAqLEz7FImtfBDsnXX1Y3uJDLYM3Bq4Oh0nrzTv1Fd62wNsCNErHHIbELidh1zZF0ujvt7ReuZUwAitm0UhEJ7OxNOUbEQWtae6pVNscvdvTFMpg',
|
||||
},
|
||||
'nationalgeographic': {
|
||||
'brand': '026', # also '023'
|
||||
'requestor_id': 'dtci',
|
||||
'provider_id': 'ngc', # also 'ngw'
|
||||
'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIxMzE4YTM1Ni05Mjc4LTQ4NjEtYTFmNi1jMTIzMzg1ZWMzYzMiLCJuYmYiOjE1NjIwMjM4MjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTYyMDIzODI4fQ.Le-2OzF9-jrhJ7ZfWtLWk5iSHGVZoxeU1w0_fO--Heli0OwRZsRq2slSmx-oZTzxuWmAgDEiBkWSDcDK6sM25DrCLsdsJa3MBuZ-slBRtH8aq3HpNoqqLkU-vg6gRUEKMtwBUtwCu_9aKUCayYtndWv4b1DjVQeSrteOW5NNudWVYleAe0kxeNJQHo5If9SCzDudKVJktFUjhNks4QPOC_uONPkRRlL9D0fNvtOY-LRFckfcHhf5z9l1iZjeukV0YhdKnuw1wyiaWrQXBUDiBfbkCRd2DM-KnelqPxfiXCaTjGKDURRBO3pz33ebge3IFXSiU5vl4qHQ8xvunzGpFw',
|
||||
},
|
||||
}
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?P<sub_domain>
|
||||
(?:{}\.)?go|fxnow\.fxnetworks|
|
||||
(?:www\.)?(?:abc|freeform|disneynow)
|
||||
)\.com/
|
||||
(?:
|
||||
(?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
|
||||
(?:[^/]+/)*(?P<display_id>[^/?\#]+)
|
||||
)
|
||||
'''.format(r'\.|'.join(list(_SITE_INFO.keys())))
|
||||
_URL_PATH_RE = r'(?:video|episode|movies-and-specials)/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
_VALID_URL = [
|
||||
fr'https?://(?:www\.)?(?P<site>abc)\.com/{_URL_PATH_RE}',
|
||||
fr'https?://(?:www\.)?(?P<site>freeform)\.com/{_URL_PATH_RE}',
|
||||
fr'https?://(?:www\.)?(?P<site>disneynow)\.com/{_URL_PATH_RE}',
|
||||
fr'https?://fxnow\.(?P<site>fxnetworks)\.com/{_URL_PATH_RE}',
|
||||
fr'https?://(?:www\.)?(?P<site>nationalgeographic)\.com/tv/{_URL_PATH_RE}',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
|
||||
'url': 'https://abc.com/episode/4192c0e6-26e5-47a8-817b-ce8272b9e440/playlist/PL551127435',
|
||||
'info_dict': {
|
||||
'id': 'VDKA3807643',
|
||||
'id': 'VDKA10805898',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Traitor in the White House',
|
||||
'description': 'md5:05b009d2d145a1e85d25111bd37222e8',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This content is no longer available.',
|
||||
}, {
|
||||
'url': 'https://disneynow.com/shows/big-hero-6-the-series',
|
||||
'info_dict': {
|
||||
'title': 'Doraemon',
|
||||
'id': 'SH55574025',
|
||||
},
|
||||
'playlist_mincount': 51,
|
||||
}, {
|
||||
'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood',
|
||||
'info_dict': {
|
||||
'id': 'VDKA3609139',
|
||||
'title': 'This Guilty Blood',
|
||||
'description': 'md5:f18e79ad1c613798d95fdabfe96cd292',
|
||||
'title': 'Switch the Flip',
|
||||
'description': 'To help get Brian’s life in order, Stewie and Brian swap bodies using a machine that Stewie invents.',
|
||||
'age_limit': 14,
|
||||
'duration': 1297,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'series': 'Family Guy',
|
||||
'season': 'Season 16',
|
||||
'season_number': 16,
|
||||
'episode': 'Episode 17',
|
||||
'episode_number': 17,
|
||||
'timestamp': 1746082800.0,
|
||||
'upload_date': '20250501',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://disneynow.com/episode/21029660-ba06-4406-adb0-a9a78f6e265e/playlist/PL553044961',
|
||||
'info_dict': {
|
||||
'id': 'VDKA39546942',
|
||||
'ext': 'mp4',
|
||||
'title': 'Zero Friends Again',
|
||||
'description': 'Relationships fray under the pressures of a difficult journey.',
|
||||
'age_limit': 0,
|
||||
'duration': 1721,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'series': 'Star Wars: Skeleton Crew',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 6',
|
||||
'episode_number': 6,
|
||||
'timestamp': 1746946800.0,
|
||||
'upload_date': '20250511',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://fxnow.fxnetworks.com/episode/09f4fa6f-c293-469e-aebe-32c9ca5842a7/playlist/PL554408064',
|
||||
'info_dict': {
|
||||
'id': 'VDKA38112033',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Return of Jerry',
|
||||
'description': 'The vampires’ long-lost fifth roommate returns. Written by Paul Simms; directed by Kyle Newacheck.',
|
||||
'age_limit': 17,
|
||||
'duration': 1493,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'series': 'What We Do in the Shadows',
|
||||
'season': 'Season 6',
|
||||
'season_number': 6,
|
||||
'episode': 'Episode 1',
|
||||
'upload_date': '20170102',
|
||||
'season': 'Season 2',
|
||||
'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/abcf/Shadowhunters/video/201/ae5f75608d86bf88aa4f9f4aa76ab1b7/579x325-Q100_ae5f75608d86bf88aa4f9f4aa76ab1b7.jpg',
|
||||
'duration': 2544,
|
||||
'season_number': 2,
|
||||
'series': 'Shadowhunters',
|
||||
'episode_number': 1,
|
||||
'timestamp': 1483387200,
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {
|
||||
'geo_bypass_ip_block': '3.244.239.0/24',
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'timestamp': 1729573200.0,
|
||||
'upload_date': '20241022',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://abc.com/shows/the-rookie/episode-guide/season-04/12-the-knock',
|
||||
'url': 'https://www.freeform.com/episode/bda0eaf7-761a-4838-aa44-96f794000844/playlist/PL553044961',
|
||||
'info_dict': {
|
||||
'id': 'VDKA26050359',
|
||||
'title': 'The Knock',
|
||||
'description': 'md5:0c2947e3ada4c31f28296db7db14aa64',
|
||||
'age_limit': 14,
|
||||
'id': 'VDKA39007340',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/abc/TheRookie/video/412/daf830d06e83b11eaf5c0a299d993ae3/1556x876-Q75_daf830d06e83b11eaf5c0a299d993ae3.jpg',
|
||||
'episode': 'Episode 12',
|
||||
'season_number': 4,
|
||||
'season': 'Season 4',
|
||||
'timestamp': 1642975200,
|
||||
'episode_number': 12,
|
||||
'upload_date': '20220123',
|
||||
'series': 'The Rookie',
|
||||
'duration': 2572,
|
||||
},
|
||||
'params': {
|
||||
'geo_bypass_ip_block': '3.244.239.0/24',
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'title': 'Angel\'s Landing',
|
||||
'description': 'md5:91bf084e785c968fab16734df7313446',
|
||||
'age_limit': 14,
|
||||
'duration': 2523,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'series': 'How I Escaped My Cult',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 2',
|
||||
'episode_number': 2,
|
||||
'timestamp': 1740038400.0,
|
||||
'upload_date': '20250220',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841',
|
||||
'url': 'https://www.nationalgeographic.com/tv/episode/ca694661-1186-41ae-8089-82f64d69b16d/playlist/PL554408064',
|
||||
'info_dict': {
|
||||
'id': 'VDKA12782841',
|
||||
'title': 'First Look: Better Things - Season 2',
|
||||
'description': 'md5:fa73584a95761c605d9d54904e35b407',
|
||||
'id': 'VDKA39492078',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 14,
|
||||
'upload_date': '20170825',
|
||||
'duration': 161,
|
||||
'series': 'Better Things',
|
||||
'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/fx/BetterThings/video/12782841/b6b05e58264121cc2c98811318e6d507/1556x876-Q75_b6b05e58264121cc2c98811318e6d507.jpg',
|
||||
'timestamp': 1503661074,
|
||||
},
|
||||
'params': {
|
||||
'geo_bypass_ip_block': '3.244.239.0/24',
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'title': 'Heart of the Emperors',
|
||||
'description': 'md5:4fc50a2878f030bb3a7eac9124dca677',
|
||||
'age_limit': 0,
|
||||
'duration': 2775,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'series': 'Secrets of the Penguins',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'timestamp': 1745204400.0,
|
||||
'upload_date': '20250421',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
||||
'url': 'https://www.freeform.com/movies-and-specials/c38281fc-9f8f-47c7-8220-22394f9df2e1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# brand 004
|
||||
'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# brand 008
|
||||
'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.freeform.com/shows/cruel-summer/episode-guide/season-01/01-happy-birthday-jeanette-turner',
|
||||
'url': 'https://abc.com/video/219a454a-172c-41bf-878a-d169e6bc0bdc/playlist/PL5523098420',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@ -171,58 +167,29 @@ def _extract_videos(self, brand, video_id='-1', show_id='-1'):
|
||||
f'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/{brand}/001/-1/{show_id}/-1/{video_id}/-1/-1.json',
|
||||
display_id)['video']
|
||||
|
||||
def _extract_global_var(self, name, webpage, video_id):
|
||||
return self._search_json(
|
||||
fr'window\[["\']{re.escape(name)}["\']\]\s*=',
|
||||
webpage, f'{name.strip("_")} JSON', video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
sub_domain = remove_start(remove_end(mobj.group('sub_domain') or '', '.go'), 'www.')
|
||||
video_id, display_id = mobj.group('id', 'display_id')
|
||||
site_info = self._SITE_INFO.get(sub_domain, {})
|
||||
brand = site_info.get('brand')
|
||||
if not video_id or not site_info:
|
||||
webpage = self._download_webpage(url, display_id or video_id)
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'["\']__abc_com__["\']\s*\]\s*=\s*({.+?})\s*;', webpage,
|
||||
'data', default='{}'),
|
||||
display_id or video_id, fatal=False)
|
||||
# https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot
|
||||
layout = try_get(data, lambda x: x['page']['content']['video']['layout'], dict)
|
||||
video_id = None
|
||||
if layout:
|
||||
video_id = try_get(
|
||||
layout,
|
||||
(lambda x: x['videoid'], lambda x: x['video']['id']),
|
||||
str)
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*(VDKA\w+)',
|
||||
# page.analytics.videoIdCode
|
||||
r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\w+)',
|
||||
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
||||
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)',
|
||||
), webpage, 'video id', default=video_id)
|
||||
if not site_info:
|
||||
brand = self._search_regex(
|
||||
(r'data-brand=\s*["\']\s*(\d+)',
|
||||
r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand',
|
||||
default='004')
|
||||
site_info = next(
|
||||
si for _, si in self._SITE_INFO.items()
|
||||
if si.get('brand') == brand)
|
||||
if not video_id:
|
||||
# show extraction works for Disney, DisneyJunior and DisneyXD
|
||||
# ABC and Freeform has different layout
|
||||
show_id = self._search_regex(r'data-show-id=["\']*(SH\d+)', webpage, 'show id')
|
||||
videos = self._extract_videos(brand, show_id=show_id)
|
||||
show_title = self._search_regex(r'data-show-title="([^"]+)"', webpage, 'show title', fatal=False)
|
||||
entries = []
|
||||
for video in videos:
|
||||
entries.append(self.url_result(
|
||||
video['url'], 'Go', video.get('id'), video.get('title')))
|
||||
entries.reverse()
|
||||
return self.playlist_result(entries, show_id, show_title)
|
||||
site, display_id = self._match_valid_url(url).group('site', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
config = self._extract_global_var('__CONFIG__', webpage, display_id)
|
||||
data = self._extract_global_var(config['globalVar'], webpage, display_id)
|
||||
video_id = traverse_obj(data, (
|
||||
'page', 'content', 'video', 'layout', (('video', 'id'), 'videoid'), {str}, any))
|
||||
if not video_id:
|
||||
video_id = self._search_regex([
|
||||
# data-track-video_id="VDKA39492078"
|
||||
# data-track-video_id_code="vdka39492078"
|
||||
# data-video-id="'VDKA3609139'"
|
||||
r'data-(?:track-)?video[_-]id(?:_code)?=["\']*((?:vdka|VDKA)\d+)',
|
||||
# page.analytics.videoIdCode
|
||||
r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\d+)'], webpage, 'video ID')
|
||||
|
||||
site_info = self._SITE_INFO[site]
|
||||
brand = site_info['brand']
|
||||
video_data = self._extract_videos(brand, video_id)[0]
|
||||
video_id = video_data['id']
|
||||
title = video_data['title']
|
||||
@ -238,26 +205,31 @@ def _real_extract(self, url):
|
||||
if ext == 'm3u8':
|
||||
video_type = video_data.get('type')
|
||||
data = {
|
||||
'video_id': video_data['id'],
|
||||
'video_id': video_id,
|
||||
'video_type': video_type,
|
||||
'brand': brand,
|
||||
'device': '001',
|
||||
'app_name': 'webplayer-abc',
|
||||
}
|
||||
if video_data.get('accesslevel') == '1':
|
||||
requestor_id = site_info.get('requestor_id', 'DisneyChannels')
|
||||
provider_id = site_info['provider_id']
|
||||
software_statement = traverse_obj(data, ('app', 'config', (
|
||||
('features', 'auth', 'softwareStatement'),
|
||||
('tvAuth', 'SOFTWARE_STATEMENTS', 'PRODUCTION'),
|
||||
), {str}, any)) or site_info['software_statement']
|
||||
resource = site_info.get('resource_id') or self._get_mvpd_resource(
|
||||
requestor_id, title, video_id, None)
|
||||
provider_id, title, video_id, None)
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
url, video_id, site_info['requestor_id'], resource, software_statement)
|
||||
data.update({
|
||||
'token': auth,
|
||||
'token_type': 'ap',
|
||||
'adobe_requestor_id': requestor_id,
|
||||
'adobe_requestor_id': provider_id,
|
||||
})
|
||||
else:
|
||||
self._initialize_geo_bypass({'countries': ['US']})
|
||||
entitlement = self._download_json(
|
||||
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
|
||||
'https://prod.gatekeeper.us-abc.symphony.edgedatg.go.com/vp2/ws-secure/entitlement/2020/playmanifest_secure.json',
|
||||
video_id, data=urlencode_postdata(data))
|
||||
errors = entitlement.get('errors', {}).get('errors', [])
|
||||
if errors:
|
||||
@ -267,7 +239,7 @@ def _real_extract(self, url):
|
||||
error['message'], countries=['US'])
|
||||
error_message = ', '.join([error['message'] for error in errors])
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {error_message}', expected=True)
|
||||
asset_url += '?' + entitlement['uplynkData']['sessionKey']
|
||||
asset_url += '?' + entitlement['entitlement']['uplynkData']['sessionKey']
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
|
@ -5,16 +5,11 @@
|
||||
import hmac
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
remove_end,
|
||||
traverse_obj,
|
||||
)
|
||||
from ..utils import ExtractorError, int_or_none
|
||||
from ..utils.traversal import get_first, traverse_obj
|
||||
|
||||
|
||||
class GoPlayIE(InfoExtractor):
|
||||
@ -27,10 +22,10 @@ class GoPlayIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '2baa4560-87a0-421b-bffc-359914e3c387',
|
||||
'ext': 'mp4',
|
||||
'title': 'S22 - Aflevering 1',
|
||||
'title': 'De Slimste Mens ter Wereld - S22 - Aflevering 1',
|
||||
'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}',
|
||||
'series': 'De Slimste Mens ter Wereld',
|
||||
'episode': 'Episode 1',
|
||||
'episode': 'Wordt aangekondigd',
|
||||
'season_number': 22,
|
||||
'episode_number': 1,
|
||||
'season': 'Season 22',
|
||||
@ -52,7 +47,7 @@ class GoPlayIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee',
|
||||
'ext': 'mp4',
|
||||
'title': 'S11 - Aflevering 1',
|
||||
'title': 'De Mol - S11 - Aflevering 1',
|
||||
'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}',
|
||||
'episode': 'Episode 1',
|
||||
'series': 'De Mol',
|
||||
@ -75,21 +70,13 @@ def _real_initialize(self):
|
||||
if not self._id_token:
|
||||
raise self.raise_login_required(method='password')
|
||||
|
||||
# XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.francetv
|
||||
def _find_json(self, s):
|
||||
return self._search_json(
|
||||
r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nextjs_data = traverse_obj(
|
||||
re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
|
||||
(..., {json.loads}, ..., {self._find_json}, ...))
|
||||
meta = traverse_obj(nextjs_data, (
|
||||
..., ..., 'children', ..., ..., 'children',
|
||||
lambda _, v: v['video']['path'] == urllib.parse.urlparse(url).path, 'video', any))
|
||||
nextjs_data = self._search_nextjs_v13_data(webpage, display_id)
|
||||
meta = get_first(nextjs_data, (
|
||||
lambda k, v: k in ('video', 'meta') and v['path'] == urllib.parse.urlparse(url).path))
|
||||
|
||||
video_id = meta['uuid']
|
||||
info_dict = traverse_obj(meta, {
|
||||
@ -98,19 +85,18 @@ def _real_extract(self, url):
|
||||
})
|
||||
|
||||
if traverse_obj(meta, ('program', 'subtype')) != 'movie':
|
||||
for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)):
|
||||
episode_data = traverse_obj(
|
||||
season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
|
||||
for season_data in traverse_obj(nextjs_data, (..., 'playlists', ..., {dict})):
|
||||
episode_data = traverse_obj(season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
|
||||
if not episode_data:
|
||||
continue
|
||||
|
||||
episode_title = traverse_obj(
|
||||
episode_data, 'contextualTitle', 'episodeTitle', expected_type=str)
|
||||
season_number = traverse_obj(season_data, ('season', {int_or_none}))
|
||||
info_dict.update({
|
||||
'title': episode_title or info_dict.get('title'),
|
||||
'series': remove_end(info_dict.get('title'), f' - {episode_title}'),
|
||||
'season_number': traverse_obj(season_data, ('season', {int_or_none})),
|
||||
'episode': traverse_obj(episode_data, ('episodeTitle', {str})),
|
||||
'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})),
|
||||
'season_number': season_number,
|
||||
'series': self._search_regex(
|
||||
fr'^(.+)? - S{season_number} - ', info_dict.get('title'), 'series', default=None),
|
||||
})
|
||||
break
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
import functools
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
@ -9,77 +10,126 @@
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
jwt_decode_hs256,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class HotStarBaseIE(InfoExtractor):
|
||||
_TOKEN_NAME = 'userUP'
|
||||
_BASE_URL = 'https://www.hotstar.com'
|
||||
_API_URL = 'https://api.hotstar.com'
|
||||
_API_URL_V2 = 'https://apix.hotstar.com/v2'
|
||||
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
|
||||
|
||||
_FREE_HEADERS = {
|
||||
'user-agent': 'Hotstar;in.startv.hotstar/25.06.30.0.11580 (Android/12)',
|
||||
'x-hs-client': 'platform:android;app_id:in.startv.hotstar;app_version:25.06.30.0;os:Android;os_version:12;schema_version:0.0.1523',
|
||||
'x-hs-platform': 'android',
|
||||
}
|
||||
_SUB_HEADERS = {
|
||||
'user-agent': 'Disney+;in.startv.hotstar.dplus.tv/23.08.14.4.2915 (Android/13)',
|
||||
'x-hs-client': 'platform:androidtv;app_id:in.startv.hotstar.dplus.tv;app_version:23.08.14.4;os:Android;os_version:13;schema_version:0.0.970',
|
||||
'x-hs-platform': 'androidtv',
|
||||
}
|
||||
|
||||
def _has_active_subscription(self, cookies, server_time):
|
||||
server_time = int_or_none(server_time) or int(time.time())
|
||||
expiry = traverse_obj(cookies, (
|
||||
self._TOKEN_NAME, 'value', {jwt_decode_hs256}, 'sub', {json.loads},
|
||||
'subscriptions', 'in', ..., 'expiry', {parse_iso8601}, all, {max})) or 0
|
||||
return expiry > server_time
|
||||
|
||||
def _call_api_v1(self, path, *args, **kwargs):
|
||||
return self._download_json(
|
||||
f'{self._API_URL}/o/v1/{path}', *args, **kwargs,
|
||||
headers={'x-country-code': 'IN', 'x-platform-code': 'PCTV'})
|
||||
|
||||
def _call_api_impl(self, path, video_id, query, st=None, cookies=None):
|
||||
def _call_api_impl(self, path, video_id, query, cookies=None, st=None):
|
||||
st = int_or_none(st) or int(time.time())
|
||||
exp = st + 6000
|
||||
auth = f'st={st}~exp={exp}~acl=/*'
|
||||
auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
|
||||
|
||||
if cookies and cookies.get('userUP'):
|
||||
token = cookies.get('userUP').value
|
||||
else:
|
||||
token = self._download_json(
|
||||
f'{self._API_URL}/um/v3/users',
|
||||
video_id, note='Downloading token',
|
||||
data=json.dumps({'device_ids': [{'id': str(uuid.uuid4()), 'type': 'device_id'}]}).encode(),
|
||||
headers={
|
||||
'hotstarauth': auth,
|
||||
'x-hs-platform': 'PCTV', # or 'web'
|
||||
'Content-Type': 'application/json',
|
||||
})['user_identity']
|
||||
|
||||
response = self._download_json(
|
||||
f'{self._API_URL}/{path}', video_id, query=query,
|
||||
headers={
|
||||
f'{self._API_URL_V2}/{path}', video_id, query=query,
|
||||
headers=filter_dict({
|
||||
**(self._SUB_HEADERS if self._has_active_subscription(cookies, st) else self._FREE_HEADERS),
|
||||
'hotstarauth': auth,
|
||||
'x-hs-appversion': '6.72.2',
|
||||
'x-hs-platform': 'web',
|
||||
'x-hs-usertoken': token,
|
||||
})
|
||||
'x-hs-usertoken': traverse_obj(cookies, (self._TOKEN_NAME, 'value')),
|
||||
'x-hs-device-id': traverse_obj(cookies, ('deviceId', 'value')) or str(uuid.uuid4()),
|
||||
'content-type': 'application/json',
|
||||
}))
|
||||
|
||||
if response['message'] != "Playback URL's fetched successfully":
|
||||
raise ExtractorError(
|
||||
response['message'], expected=True)
|
||||
return response['data']
|
||||
if not traverse_obj(response, ('success', {dict})):
|
||||
raise ExtractorError('API call was unsuccessful')
|
||||
return response['success']
|
||||
|
||||
def _call_api_v2(self, path, video_id, st=None, cookies=None):
|
||||
return self._call_api_impl(
|
||||
f'{path}/content/{video_id}', video_id, st=st, cookies=cookies, query={
|
||||
'desired-config': 'audio_channel:stereo|container:fmp4|dynamic_range:hdr|encryption:plain|ladder:tv|package:dash|resolution:fhd|subs-tag:HotstarVIP|video_codec:h265',
|
||||
'device-id': cookies.get('device_id').value if cookies.get('device_id') else str(uuid.uuid4()),
|
||||
'os-name': 'Windows',
|
||||
'os-version': '10',
|
||||
})
|
||||
def _call_api_v2(self, path, video_id, content_type, cookies=None, st=None):
|
||||
return self._call_api_impl(f'{path}', video_id, query={
|
||||
'content_id': video_id,
|
||||
'filters': f'content_type={content_type}',
|
||||
'client_capabilities': json.dumps({
|
||||
'package': ['dash', 'hls'],
|
||||
'container': ['fmp4', 'fmp4br', 'ts'],
|
||||
'ads': ['non_ssai', 'ssai'],
|
||||
'audio_channel': ['stereo', 'dolby51', 'atmos'],
|
||||
'encryption': ['plain', 'widevine'], # wv only so we can raise appropriate error
|
||||
'video_codec': ['h264', 'h265'],
|
||||
'video_codec_non_secure': ['h264', 'h265', 'vp9'],
|
||||
'ladder': ['phone', 'tv', 'full'],
|
||||
'resolution': ['hd', '4k'],
|
||||
'true_resolution': ['hd', '4k'],
|
||||
'dynamic_range': ['sdr', 'hdr'],
|
||||
}, separators=(',', ':')),
|
||||
'drm_parameters': json.dumps({
|
||||
'widevine_security_level': ['SW_SECURE_DECODE', 'SW_SECURE_CRYPTO'],
|
||||
'hdcp_version': ['HDCP_V2_2', 'HDCP_V2_1', 'HDCP_V2', 'HDCP_V1'],
|
||||
}, separators=(',', ':')),
|
||||
}, cookies=cookies, st=st)
|
||||
|
||||
def _playlist_entries(self, path, item_id, root=None, **kwargs):
|
||||
results = self._call_api_v1(path, item_id, **kwargs)['body']['results']
|
||||
for video in traverse_obj(results, (('assets', None), 'items', ...)):
|
||||
if video.get('contentId'):
|
||||
yield self.url_result(
|
||||
HotStarIE._video_url(video['contentId'], root=root), HotStarIE, video['contentId'])
|
||||
@staticmethod
|
||||
def _parse_metadata_v1(video_data):
|
||||
return traverse_obj(video_data, {
|
||||
'id': ('contentId', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': (('broadcastDate', 'startDate'), {int_or_none}, any),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
'channel': ('channelName', {str}),
|
||||
'channel_id': ('channelId', {int}, {str_or_none}),
|
||||
'series': ('showName', {str}),
|
||||
'season': ('seasonName', {str}),
|
||||
'season_number': ('seasonNo', {int_or_none}),
|
||||
'season_id': ('seasonId', {int}, {str_or_none}),
|
||||
'episode': ('title', {str}),
|
||||
'episode_number': ('episodeNo', {int_or_none}),
|
||||
})
|
||||
|
||||
def _fetch_page(self, path, item_id, name, query, root, page):
|
||||
results = self._call_api_v1(
|
||||
path, item_id, note=f'Downloading {name} page {page + 1} JSON', query={
|
||||
**query,
|
||||
'tao': page * self._PAGE_SIZE,
|
||||
'tas': self._PAGE_SIZE,
|
||||
})['body']['results']
|
||||
|
||||
for video in traverse_obj(results, (('assets', None), 'items', lambda _, v: v['contentId'])):
|
||||
yield self.url_result(
|
||||
HotStarIE._video_url(video['contentId'], root=root), HotStarIE, **self._parse_metadata_v1(video))
|
||||
|
||||
|
||||
class HotStarIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar'
|
||||
IE_DESC = 'JioHotstar'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
|
||||
(?:
|
||||
@ -114,15 +164,16 @@ class HotStarIE(HotStarBaseIE):
|
||||
'upload_date': '20190501',
|
||||
'duration': 1219,
|
||||
'channel': 'StarPlus',
|
||||
'channel_id': '3',
|
||||
'channel_id': '821',
|
||||
'series': 'Ek Bhram - Sarvagun Sampanna',
|
||||
'season': 'Chapter 1',
|
||||
'season_number': 1,
|
||||
'season_id': '6771',
|
||||
'season_id': '1260004607',
|
||||
'episode': 'Janhvi Targets Suman',
|
||||
'episode_number': 8,
|
||||
},
|
||||
}, {
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, { # Metadata call gets HTTP Error 504 with tas=10000
|
||||
'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/anupama-anuj-share-a-moment/1000282843',
|
||||
'info_dict': {
|
||||
'id': '1000282843',
|
||||
@ -134,14 +185,14 @@ class HotStarIE(HotStarBaseIE):
|
||||
'channel': 'StarPlus',
|
||||
'series': 'Anupama',
|
||||
'season_number': 1,
|
||||
'season_id': '7399',
|
||||
'season_id': '1260022018',
|
||||
'upload_date': '20230307',
|
||||
'episode': 'Anupama, Anuj Share a Moment',
|
||||
'episode_number': 853,
|
||||
'duration': 1272,
|
||||
'channel_id': '3',
|
||||
'duration': 1266,
|
||||
'channel_id': '821',
|
||||
},
|
||||
'skip': 'HTTP Error 504: Gateway Time-out', # XXX: Investigate 504 errors on some episodes
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/shows/kana-kaanum-kaalangal/1260097087/back-to-school/1260097320',
|
||||
'info_dict': {
|
||||
@ -154,14 +205,15 @@ class HotStarIE(HotStarBaseIE):
|
||||
'channel': 'Hotstar Specials',
|
||||
'series': 'Kana Kaanum Kaalangal',
|
||||
'season_number': 1,
|
||||
'season_id': '9441',
|
||||
'season_id': '1260097089',
|
||||
'upload_date': '20220421',
|
||||
'episode': 'Back To School',
|
||||
'episode_number': 1,
|
||||
'duration': 1810,
|
||||
'channel_id': '54',
|
||||
'channel_id': '1260003991',
|
||||
},
|
||||
}, {
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, { # Metadata call gets HTTP Error 504 with tas=10000
|
||||
'url': 'https://www.hotstar.com/in/clips/e3-sairat-kahani-pyaar-ki/1000262286',
|
||||
'info_dict': {
|
||||
'id': '1000262286',
|
||||
@ -173,6 +225,7 @@ class HotStarIE(HotStarBaseIE):
|
||||
'timestamp': 1622943900,
|
||||
'duration': 5395,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/movies/premam/1000091195',
|
||||
'info_dict': {
|
||||
@ -180,12 +233,13 @@ class HotStarIE(HotStarBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Premam',
|
||||
'release_year': 2015,
|
||||
'description': 'md5:d833c654e4187b5e34757eafb5b72d7f',
|
||||
'description': 'md5:096cd8aaae8dab56524823dc19dfa9f7',
|
||||
'timestamp': 1462149000,
|
||||
'upload_date': '20160502',
|
||||
'episode': 'Premam',
|
||||
'duration': 8994,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
|
||||
'only_matching': True,
|
||||
@ -208,6 +262,13 @@ class HotStarIE(HotStarBaseIE):
|
||||
None: 'content',
|
||||
}
|
||||
|
||||
_CONTENT_TYPE = {
|
||||
'movie': 'MOVIE',
|
||||
'episode': 'EPISODE',
|
||||
'match': 'SPORT',
|
||||
'content': 'CLIPS',
|
||||
}
|
||||
|
||||
_IGNORE_MAP = {
|
||||
'res': 'resolution',
|
||||
'vcodec': 'video_codec',
|
||||
@ -229,38 +290,50 @@ def _video_url(cls, video_id, video_type=None, *, slug='ignore_me', root=None):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, video_type = self._match_valid_url(url).group('id', 'type')
|
||||
video_type = self._TYPE.get(video_type, video_type)
|
||||
video_type = self._TYPE[video_type]
|
||||
cookies = self._get_cookies(url) # Cookies before any request
|
||||
if not cookies or not cookies.get(self._TOKEN_NAME):
|
||||
self.raise_login_required()
|
||||
|
||||
video_data = traverse_obj(
|
||||
self._call_api_v1(
|
||||
f'{video_type}/detail', video_id, fatal=False, query={'tas': 10000, 'contentId': video_id}),
|
||||
('body', 'results', 'item', {dict})) or {}
|
||||
if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'):
|
||||
self._call_api_v1(f'{video_type}/detail', video_id, fatal=False, query={
|
||||
'tas': 5, # See https://github.com/yt-dlp/yt-dlp/issues/7946
|
||||
'contentId': video_id,
|
||||
}), ('body', 'results', 'item', {dict})) or {}
|
||||
|
||||
if video_data.get('drmProtected'):
|
||||
self.report_drm(video_id)
|
||||
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/396
|
||||
st = self._download_webpage_handle(f'{self._BASE_URL}/in', video_id)[1].headers.get('x-origin-date')
|
||||
|
||||
geo_restricted = False
|
||||
formats, subs = [], {}
|
||||
formats, subs, has_drm = [], {}, False
|
||||
headers = {'Referer': f'{self._BASE_URL}/in'}
|
||||
content_type = traverse_obj(video_data, ('contentType', {str})) or self._CONTENT_TYPE[video_type]
|
||||
|
||||
# change to v2 in the future
|
||||
playback_sets = self._call_api_v2('play/v1/playback', video_id, st=st, cookies=cookies)['playBackSets']
|
||||
for playback_set in playback_sets:
|
||||
if not isinstance(playback_set, dict):
|
||||
continue
|
||||
tags = str_or_none(playback_set.get('tagsCombination')) or ''
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/396
|
||||
st = self._request_webpage(
|
||||
f'{self._BASE_URL}/in', video_id, 'Fetching server time').get_header('x-origin-date')
|
||||
watch = self._call_api_v2('pages/watch', video_id, content_type, cookies, st)
|
||||
player_config = traverse_obj(watch, (
|
||||
'page', 'spaces', 'player', 'widget_wrappers', lambda _, v: v['template'] == 'PlayerWidget',
|
||||
'widget', 'data', 'player_config', {dict}, any, {require('player config')}))
|
||||
|
||||
for playback_set in traverse_obj(player_config, (
|
||||
('media_asset', 'media_asset_v2'),
|
||||
('primary', 'fallback'),
|
||||
all, lambda _, v: url_or_none(v['content_url']),
|
||||
)):
|
||||
tags = str_or_none(playback_set.get('playback_tags')) or ''
|
||||
if any(f'{prefix}:{ignore}' in tags
|
||||
for key, prefix in self._IGNORE_MAP.items()
|
||||
for ignore in self._configuration_arg(key)):
|
||||
continue
|
||||
|
||||
format_url = url_or_none(playback_set.get('playbackUrl'))
|
||||
if not format_url:
|
||||
tag_dict = dict((*t.split(':', 1), None)[:2] for t in tags.split(';'))
|
||||
if tag_dict.get('encryption') not in ('plain', None):
|
||||
has_drm = True
|
||||
continue
|
||||
format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', format_url)
|
||||
|
||||
format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', playback_set['content_url'])
|
||||
ext = determine_ext(format_url)
|
||||
|
||||
current_formats, current_subs = [], {}
|
||||
@ -280,14 +353,12 @@ def _real_extract(self, url):
|
||||
'height': int_or_none(playback_set.get('height')),
|
||||
}]
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status in (403, 474):
|
||||
geo_restricted = True
|
||||
else:
|
||||
self.write_debug(e)
|
||||
continue
|
||||
|
||||
tag_dict = dict((*t.split(':', 1), None)[:2] for t in tags.split(';'))
|
||||
if tag_dict.get('encryption') not in ('plain', None):
|
||||
for f in current_formats:
|
||||
f['has_drm'] = True
|
||||
for f in current_formats:
|
||||
for k, v in self._TAG_FIELDS.items():
|
||||
if not f.get(k):
|
||||
@ -299,6 +370,11 @@ def _real_extract(self, url):
|
||||
'stereo': 2,
|
||||
'dolby51': 6,
|
||||
}.get(tag_dict.get('audio_channel'))
|
||||
if (
|
||||
'Audio_Description' in f['format_id']
|
||||
or 'Audio Description' in (f.get('format_note') or '')
|
||||
):
|
||||
f['source_preference'] = -99 + (f.get('source_preference') or -1)
|
||||
f['format_note'] = join_nonempty(
|
||||
tag_dict.get('ladder'),
|
||||
tag_dict.get('audio_channel') if f.get('acodec') != 'none' else None,
|
||||
@ -308,29 +384,22 @@ def _real_extract(self, url):
|
||||
formats.extend(current_formats)
|
||||
subs = self._merge_subtitles(subs, current_subs)
|
||||
|
||||
if not formats and geo_restricted:
|
||||
self.raise_geo_restricted(countries=['IN'], metadata_available=True)
|
||||
if not formats:
|
||||
if geo_restricted:
|
||||
self.raise_geo_restricted(countries=['IN'], metadata_available=True)
|
||||
elif has_drm:
|
||||
self.report_drm(video_id)
|
||||
elif not self._has_active_subscription(cookies, st):
|
||||
self.raise_no_formats('Your account does not have access to this content', expected=True)
|
||||
self._remove_duplicate_formats(formats)
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {}).update(headers)
|
||||
|
||||
return {
|
||||
**self._parse_metadata_v1(video_data),
|
||||
'id': video_id,
|
||||
'title': video_data.get('title'),
|
||||
'description': video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': int_or_none(traverse_obj(video_data, 'broadcastDate', 'startDate')),
|
||||
'release_year': int_or_none(video_data.get('year')),
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
'channel': video_data.get('channelName'),
|
||||
'channel_id': str_or_none(video_data.get('channelId')),
|
||||
'series': video_data.get('showName'),
|
||||
'season': video_data.get('seasonName'),
|
||||
'season_number': int_or_none(video_data.get('seasonNo')),
|
||||
'season_id': str_or_none(video_data.get('seasonId')),
|
||||
'episode': video_data.get('title'),
|
||||
'episode_number': int_or_none(video_data.get('episodeNo')),
|
||||
}
|
||||
|
||||
|
||||
@ -371,64 +440,6 @@ def _real_extract(self, url):
|
||||
return self.url_result(HotStarIE._video_url(video_id, video_type), HotStarIE, video_id)
|
||||
|
||||
|
||||
class HotStarPlaylistIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)(?:/[^/]+){2}/list/[^/]+/t-(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
|
||||
'info_dict': {
|
||||
'id': '3_2_26',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/shows/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/tv/karthika-deepam/15457/list/popular-clips/t-3_2_1272',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id_ = self._match_id(url)
|
||||
return self.playlist_result(
|
||||
self._playlist_entries('tray/find', id_, query={'tas': 10000, 'uqId': id_}), id_)
|
||||
|
||||
|
||||
class HotStarSeasonIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar:season'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/\w+)/seasons/[^/]+/ss-(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hotstar.com/tv/radhakrishn/1260000646/seasons/season-2/ss-8028',
|
||||
'info_dict': {
|
||||
'id': '8028',
|
||||
},
|
||||
'playlist_mincount': 35,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/tv/ishqbaaz/9567/seasons/season-2/ss-4357',
|
||||
'info_dict': {
|
||||
'id': '4357',
|
||||
},
|
||||
'playlist_mincount': 30,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/tv/bigg-boss/14714/seasons/season-4/ss-8208/',
|
||||
'info_dict': {
|
||||
'id': '8208',
|
||||
},
|
||||
'playlist_mincount': 19,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/shows/bigg-boss/14714/seasons/season-4/ss-8208/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, season_id = self._match_valid_url(url).groups()
|
||||
return self.playlist_result(self._playlist_entries(
|
||||
'season/asset', season_id, url, query={'tao': 0, 'tas': 0, 'size': 10000, 'id': season_id}), season_id)
|
||||
|
||||
|
||||
class HotStarSeriesIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar:series'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/(?P<id>\d+))/?(?:[#?]|$)'
|
||||
@ -443,25 +454,29 @@ class HotStarSeriesIE(HotStarBaseIE):
|
||||
'info_dict': {
|
||||
'id': '1260050431',
|
||||
},
|
||||
'playlist_mincount': 43,
|
||||
'playlist_mincount': 42,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/tv/mahabharat/435/',
|
||||
'info_dict': {
|
||||
'id': '435',
|
||||
},
|
||||
'playlist_mincount': 267,
|
||||
}, {
|
||||
}, { # HTTP Error 504 with tas=10000 (possibly because total size is over 1000 items?)
|
||||
'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/',
|
||||
'info_dict': {
|
||||
'id': '1260022017',
|
||||
},
|
||||
'playlist_mincount': 940,
|
||||
'playlist_mincount': 1601,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, series_id = self._match_valid_url(url).groups()
|
||||
id_ = self._call_api_v1(
|
||||
url, series_id = self._match_valid_url(url).group('url', 'id')
|
||||
eid = self._call_api_v1(
|
||||
'show/detail', series_id, query={'contentId': series_id})['body']['results']['item']['id']
|
||||
|
||||
return self.playlist_result(self._playlist_entries(
|
||||
'tray/g/1/items', series_id, url, query={'tao': 0, 'tas': 10000, 'etid': 0, 'eid': id_}), series_id)
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, 'tray/g/1/items', series_id,
|
||||
'series', {'etid': 0, 'eid': eid}, url), self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, series_id)
|
||||
|
@ -7,12 +7,13 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
@ -22,8 +23,8 @@
|
||||
class HuyaLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?!(?:video/play/))(?P<id>[^/#?&]+)(?:\D|$)'
|
||||
IE_NAME = 'huya:live'
|
||||
IE_DESC = 'huya.com'
|
||||
TESTS = [{
|
||||
IE_DESC = '虎牙直播'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.huya.com/572329',
|
||||
'info_dict': {
|
||||
'id': '572329',
|
||||
@ -149,63 +150,94 @@ class HuyaVideoIE(InfoExtractor):
|
||||
'id': '1002412640',
|
||||
'ext': 'mp4',
|
||||
'title': '8月3日',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'duration': 14,
|
||||
'categories': ['主机游戏'],
|
||||
'duration': 14.0,
|
||||
'uploader': '虎牙-ATS欧卡车队青木',
|
||||
'uploader_id': '1564376151',
|
||||
'upload_date': '20240803',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'timestamp': 1722675433,
|
||||
},
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'https://www.huya.com/video/play/556054543.html',
|
||||
'info_dict': {
|
||||
'id': '556054543',
|
||||
'ext': 'mp4',
|
||||
'title': '我不挑事 也不怕事',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'duration': 1864,
|
||||
'categories': ['英雄联盟'],
|
||||
'description': 'md5:58184869687d18ce62dc7b4b2ad21201',
|
||||
'duration': 1864.0,
|
||||
'uploader': '卡尔',
|
||||
'uploader_id': '367138632',
|
||||
'upload_date': '20210811',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'tags': 'count:4',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'timestamp': 1628675950,
|
||||
},
|
||||
}, {
|
||||
# Only m3u8 available
|
||||
'url': 'https://www.huya.com/video/play/1063345618.html',
|
||||
'info_dict': {
|
||||
'id': '1063345618',
|
||||
'ext': 'mp4',
|
||||
'title': '峡谷第一中!黑铁上钻石顶级教学对抗elo',
|
||||
'categories': ['英雄联盟'],
|
||||
'comment_count': int,
|
||||
'duration': 21603.0,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'timestamp': 1749668803,
|
||||
'upload_date': '20250611',
|
||||
'uploader': '北枫CC',
|
||||
'uploader_id': '2183525275',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url: str):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://liveapi.huya.com/moment/getMomentContent', video_id,
|
||||
query={'videoId': video_id})['data']['moment']['videoInfo']
|
||||
moment = self._download_json(
|
||||
'https://liveapi.huya.com/moment/getMomentContent',
|
||||
video_id, query={'videoId': video_id})['data']['moment']
|
||||
|
||||
formats = []
|
||||
for definition in traverse_obj(video_data, ('definitions', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append({
|
||||
'url': definition['url'],
|
||||
**traverse_obj(definition, {
|
||||
'format_id': ('defName', {str}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
for definition in traverse_obj(moment, (
|
||||
'videoInfo', 'definitions', lambda _, v: url_or_none(v['m3u8']),
|
||||
)):
|
||||
fmts = self._extract_m3u8_formats(definition['m3u8'], video_id, 'mp4', fatal=False)
|
||||
for fmt in fmts:
|
||||
fmt.update(**traverse_obj(definition, {
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
'format_id': ('defName', {str}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'quality': ('definition', {int_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
}))
|
||||
formats.extend(fmts)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
**traverse_obj(moment, {
|
||||
'comment_count': ('commentCount', {int_or_none}),
|
||||
'description': ('content', {clean_html}, filter),
|
||||
'like_count': ('favorCount', {int_or_none}),
|
||||
'timestamp': ('cTime', {int_or_none}),
|
||||
}),
|
||||
**traverse_obj(moment, ('videoInfo', {
|
||||
'title': ('videoTitle', {str}),
|
||||
'thumbnail': ('videoCover', {url_or_none}),
|
||||
'categories': ('category', {str}, filter, all, filter),
|
||||
'duration': ('videoDuration', {parse_duration}),
|
||||
'tags': ('tags', ..., {str}, filter, all, filter),
|
||||
'thumbnail': (('videoBigCover', 'videoCover'), {url_or_none}, {update_url(query=None)}, any),
|
||||
'uploader': ('nickName', {str}),
|
||||
'uploader_id': ('uid', {str_or_none}),
|
||||
'upload_date': ('videoUploadTime', {unified_strdate}),
|
||||
'view_count': ('videoPlayNum', {int_or_none}),
|
||||
'comment_count': ('videoCommentNum', {int_or_none}),
|
||||
'like_count': ('favorCount', {int_or_none}),
|
||||
}),
|
||||
})),
|
||||
}
|
||||
|
@ -1,32 +1,66 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json, traverse_obj
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import subs_list_to_dict, traverse_obj
|
||||
|
||||
|
||||
class MonsterSirenHypergryphMusicIE(InfoExtractor):
|
||||
IE_NAME = 'monstersiren'
|
||||
IE_DESC = '塞壬唱片'
|
||||
_API_BASE = 'https://monster-siren.hypergryph.com/api'
|
||||
_VALID_URL = r'https?://monster-siren\.hypergryph\.com/music/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://monster-siren.hypergryph.com/music/514562',
|
||||
'info_dict': {
|
||||
'id': '514562',
|
||||
'ext': 'wav',
|
||||
'artists': ['塞壬唱片-MSR'],
|
||||
'album': 'Flame Shadow',
|
||||
'title': 'Flame Shadow',
|
||||
'album': 'Flame Shadow',
|
||||
'artists': ['塞壬唱片-MSR'],
|
||||
'description': 'md5:19e2acfcd1b65b41b29e8079ab948053',
|
||||
'thumbnail': r're:https?://web\.hycdn\.cn/siren/pic/.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://monster-siren.hypergryph.com/music/514518',
|
||||
'info_dict': {
|
||||
'id': '514518',
|
||||
'ext': 'wav',
|
||||
'title': 'Heavenly Me (Instrumental)',
|
||||
'album': 'Heavenly Me',
|
||||
'artists': ['塞壬唱片-MSR', 'AIYUE blessed : 理名'],
|
||||
'description': 'md5:ce790b41c932d1ad72eb791d1d8ae598',
|
||||
'thumbnail': r're:https?://web\.hycdn\.cn/siren/pic/.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, audio_id)
|
||||
json_data = self._search_json(
|
||||
r'window\.g_initialProps\s*=', webpage, 'data', audio_id, transform_source=js_to_json)
|
||||
song = self._download_json(f'{self._API_BASE}/song/{audio_id}', audio_id)
|
||||
if traverse_obj(song, 'code') != 0:
|
||||
msg = traverse_obj(song, ('msg', {str}, filter))
|
||||
raise ExtractorError(
|
||||
msg or 'API returned an error response', expected=bool(msg))
|
||||
|
||||
album = None
|
||||
if album_id := traverse_obj(song, ('data', 'albumCid', {str})):
|
||||
album = self._download_json(
|
||||
f'{self._API_BASE}/album/{album_id}/detail', album_id, fatal=False)
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'title': traverse_obj(json_data, ('player', 'songDetail', 'name')),
|
||||
'url': traverse_obj(json_data, ('player', 'songDetail', 'sourceUrl')),
|
||||
'ext': 'wav',
|
||||
'vcodec': 'none',
|
||||
'artists': traverse_obj(json_data, ('player', 'songDetail', 'artists', ...)),
|
||||
'album': traverse_obj(json_data, ('musicPlay', 'albumDetail', 'name')),
|
||||
**traverse_obj(song, ('data', {
|
||||
'title': ('name', {str}),
|
||||
'artists': ('artists', ..., {str}),
|
||||
'subtitles': ({'url': 'lyricUrl'}, all, {subs_list_to_dict(lang='en')}),
|
||||
'url': ('sourceUrl', {url_or_none}),
|
||||
})),
|
||||
**traverse_obj(album, ('data', {
|
||||
'album': ('name', {str}),
|
||||
'description': ('intro', {clean_html}),
|
||||
'thumbnail': ('coverUrl', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
|
@ -1,408 +0,0 @@
|
||||
import base64
|
||||
import itertools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
jwt_decode_hs256,
|
||||
parse_age_limit,
|
||||
try_call,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class JioCinemaBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'jiocinema'
|
||||
_GEO_BYPASS = False
|
||||
_ACCESS_TOKEN = None
|
||||
_REFRESH_TOKEN = None
|
||||
_GUEST_TOKEN = None
|
||||
_USER_ID = None
|
||||
_DEVICE_ID = None
|
||||
_API_HEADERS = {'Origin': 'https://www.jiocinema.com', 'Referer': 'https://www.jiocinema.com/'}
|
||||
_APP_NAME = {'appName': 'RJIL_JioCinema'}
|
||||
_APP_VERSION = {'appVersion': '5.0.0'}
|
||||
_API_SIGNATURES = 'o668nxgzwff'
|
||||
_METADATA_API_BASE = 'https://content-jiovoot.voot.com/psapi'
|
||||
_ACCESS_HINT = 'the `accessToken` from your browser local storage'
|
||||
_LOGIN_HINT = (
|
||||
'Log in with "-u phone -p <PHONE_NUMBER>" to authenticate with OTP, '
|
||||
f'or use "-u token -p <ACCESS_TOKEN>" to log in with {_ACCESS_HINT}. '
|
||||
'If you have previously logged in with yt-dlp and your session '
|
||||
'has been cached, you can use "-u device -p <DEVICE_ID>"')
|
||||
|
||||
def _cache_token(self, token_type):
|
||||
assert token_type in ('access', 'refresh', 'all')
|
||||
if token_type in ('access', 'all'):
|
||||
self.cache.store(
|
||||
JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-access', JioCinemaBaseIE._ACCESS_TOKEN)
|
||||
if token_type in ('refresh', 'all'):
|
||||
self.cache.store(
|
||||
JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh', JioCinemaBaseIE._REFRESH_TOKEN)
|
||||
|
||||
def _call_api(self, url, video_id, note='Downloading API JSON', headers={}, data={}):
|
||||
return self._download_json(
|
||||
url, video_id, note, data=json.dumps(data, separators=(',', ':')).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json',
|
||||
**self._API_HEADERS,
|
||||
**headers,
|
||||
}, expected_status=(400, 403, 474))
|
||||
|
||||
def _call_auth_api(self, service, endpoint, note, headers={}, data={}):
|
||||
return self._call_api(
|
||||
f'https://auth-jiocinema.voot.com/{service}service/apis/v4/{endpoint}',
|
||||
None, note=note, headers=headers, data=data)
|
||||
|
||||
def _refresh_token(self):
|
||||
if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._DEVICE_ID:
|
||||
raise ExtractorError('User token has expired', expected=True)
|
||||
response = self._call_auth_api(
|
||||
'token', 'refreshtoken', 'Refreshing token',
|
||||
headers={'accesstoken': self._ACCESS_TOKEN}, data={
|
||||
**self._APP_NAME,
|
||||
'deviceId': self._DEVICE_ID,
|
||||
'refreshToken': self._REFRESH_TOKEN,
|
||||
**self._APP_VERSION,
|
||||
})
|
||||
refresh_token = response.get('refreshTokenId')
|
||||
if refresh_token and refresh_token != JioCinemaBaseIE._REFRESH_TOKEN:
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
|
||||
self._cache_token('refresh')
|
||||
JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
|
||||
self._cache_token('access')
|
||||
|
||||
def _fetch_guest_token(self):
|
||||
JioCinemaBaseIE._DEVICE_ID = ''.join(random.choices(string.digits, k=10))
|
||||
guest_token = self._call_auth_api(
|
||||
'token', 'guest', 'Downloading guest token', data={
|
||||
**self._APP_NAME,
|
||||
'deviceType': 'phone',
|
||||
'os': 'ios',
|
||||
'deviceId': self._DEVICE_ID,
|
||||
'freshLaunch': False,
|
||||
'adId': self._DEVICE_ID,
|
||||
**self._APP_VERSION,
|
||||
})
|
||||
self._GUEST_TOKEN = guest_token['authToken']
|
||||
self._USER_ID = guest_token['userId']
|
||||
|
||||
def _call_login_api(self, endpoint, guest_token, data, note):
|
||||
return self._call_auth_api(
|
||||
'user', f'loginotp/{endpoint}', note, headers={
|
||||
**self.geo_verification_headers(),
|
||||
'accesstoken': self._GUEST_TOKEN,
|
||||
**self._APP_NAME,
|
||||
**traverse_obj(guest_token, 'data', {
|
||||
'deviceType': ('deviceType', {str}),
|
||||
'os': ('os', {str}),
|
||||
})}, data=data)
|
||||
|
||||
def _is_token_expired(self, token):
|
||||
return (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 180)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self._ACCESS_TOKEN and not self._is_token_expired(self._ACCESS_TOKEN):
|
||||
return
|
||||
|
||||
UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||
|
||||
if username.lower() == 'token':
|
||||
if try_call(lambda: jwt_decode_hs256(password)):
|
||||
JioCinemaBaseIE._ACCESS_TOKEN = password
|
||||
refresh_hint = 'the `refreshToken` UUID from your browser local storage'
|
||||
refresh_token = self._configuration_arg('refresh_token', [''], ie_key=JioCinemaIE)[0]
|
||||
if not refresh_token:
|
||||
self.to_screen(
|
||||
'To extend the life of your login session, in addition to your access token, '
|
||||
'you can pass --extractor-args "jiocinema:refresh_token=REFRESH_TOKEN" '
|
||||
f'where REFRESH_TOKEN is {refresh_hint}')
|
||||
elif re.fullmatch(UUID_RE, refresh_token):
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
|
||||
else:
|
||||
self.report_warning(f'Invalid refresh_token value. Use {refresh_hint}')
|
||||
else:
|
||||
raise ExtractorError(
|
||||
f'The password given could not be decoded as a token; use {self._ACCESS_HINT}', expected=True)
|
||||
|
||||
elif username.lower() == 'device' and re.fullmatch(rf'(?:{UUID_RE}|\d+)', password):
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-refresh')
|
||||
JioCinemaBaseIE._ACCESS_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-access')
|
||||
if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._ACCESS_TOKEN:
|
||||
raise ExtractorError(f'Failed to load cached tokens for device ID "{password}"', expected=True)
|
||||
|
||||
elif username.lower() == 'phone' and re.fullmatch(r'\+?\d+', password):
|
||||
self._fetch_guest_token()
|
||||
guest_token = jwt_decode_hs256(self._GUEST_TOKEN)
|
||||
initial_data = {
|
||||
'number': base64.b64encode(password.encode()).decode(),
|
||||
**self._APP_VERSION,
|
||||
}
|
||||
response = self._call_login_api('send', guest_token, initial_data, 'Requesting OTP')
|
||||
if not traverse_obj(response, ('OTPInfo', {dict})):
|
||||
raise ExtractorError('There was a problem with the phone number login attempt')
|
||||
|
||||
is_iphone = guest_token.get('os') == 'ios'
|
||||
response = self._call_login_api('verify', guest_token, {
|
||||
'deviceInfo': {
|
||||
'consumptionDeviceName': 'iPhone' if is_iphone else 'Android',
|
||||
'info': {
|
||||
'platform': {'name': 'iPhone OS' if is_iphone else 'Android'},
|
||||
'androidId': self._DEVICE_ID,
|
||||
'type': 'iOS' if is_iphone else 'Android',
|
||||
},
|
||||
},
|
||||
**initial_data,
|
||||
'otp': self._get_tfa_info('the one-time password sent to your phone'),
|
||||
}, 'Submitting OTP')
|
||||
if traverse_obj(response, 'code') == 1043:
|
||||
raise ExtractorError('Wrong OTP', expected=True)
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = response['refreshToken']
|
||||
JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
|
||||
|
||||
else:
|
||||
raise ExtractorError(self._LOGIN_HINT, expected=True)
|
||||
|
||||
user_token = jwt_decode_hs256(JioCinemaBaseIE._ACCESS_TOKEN)['data']
|
||||
JioCinemaBaseIE._USER_ID = user_token['userId']
|
||||
JioCinemaBaseIE._DEVICE_ID = user_token['deviceId']
|
||||
if JioCinemaBaseIE._REFRESH_TOKEN and username != 'device':
|
||||
self._cache_token('all')
|
||||
if self.get_param('cachedir') is not False:
|
||||
self.to_screen(
|
||||
f'NOTE: For subsequent logins you can use "-u device -p {JioCinemaBaseIE._DEVICE_ID}"')
|
||||
elif not JioCinemaBaseIE._REFRESH_TOKEN:
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(
|
||||
JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh')
|
||||
if JioCinemaBaseIE._REFRESH_TOKEN:
|
||||
self._cache_token('access')
|
||||
self.to_screen(f'Logging in as device ID "{JioCinemaBaseIE._DEVICE_ID}"')
|
||||
if self._is_token_expired(JioCinemaBaseIE._ACCESS_TOKEN):
|
||||
self._refresh_token()
|
||||
|
||||
|
||||
class JioCinemaIE(JioCinemaBaseIE):
|
||||
IE_NAME = 'jiocinema'
|
||||
_VALID_URL = r'https?://(?:www\.)?jiocinema\.com/?(?:movies?/[^/?#]+/|tv-shows/(?:[^/?#]+/){3})(?P<id>\d{3,})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiocinema.com/tv-shows/agnisakshi-ek-samjhauta/1/pradeep-to-stop-the-wedding/3759931',
|
||||
'info_dict': {
|
||||
'id': '3759931',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pradeep to stop the wedding?',
|
||||
'description': 'md5:75f72d1d1a66976633345a3de6d672b1',
|
||||
'episode': 'Pradeep to stop the wedding?',
|
||||
'episode_number': 89,
|
||||
'season': 'Agnisakshi…Ek Samjhauta-S1',
|
||||
'season_number': 1,
|
||||
'series': 'Agnisakshi Ek Samjhauta',
|
||||
'duration': 1238.0,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'age_limit': 13,
|
||||
'season_id': '3698031',
|
||||
'upload_date': '20230606',
|
||||
'timestamp': 1686009600,
|
||||
'release_date': '20230607',
|
||||
'genres': ['Drama'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.jiocinema.com/movies/bhediya/3754021/watch',
|
||||
'info_dict': {
|
||||
'id': '3754021',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bhediya',
|
||||
'description': 'md5:a6bf2900371ac2fc3f1447401a9f7bb0',
|
||||
'episode': 'Bhediya',
|
||||
'duration': 8500.0,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'age_limit': 13,
|
||||
'upload_date': '20230525',
|
||||
'timestamp': 1685026200,
|
||||
'release_date': '20230524',
|
||||
'genres': ['Comedy'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _extract_formats_and_subtitles(self, playback, video_id):
|
||||
m3u8_url = traverse_obj(playback, (
|
||||
'data', 'playbackUrls', lambda _, v: v['streamtype'] == 'hls', 'url', {url_or_none}, any))
|
||||
if not m3u8_url: # DRM-only content only serves dash urls
|
||||
self.report_drm(video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, m3u8_id='hls')
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
return {
|
||||
# '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p
|
||||
'formats': traverse_obj(formats, (
|
||||
lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if not self._ACCESS_TOKEN and self._is_token_expired(self._GUEST_TOKEN):
|
||||
self._fetch_guest_token()
|
||||
elif self._ACCESS_TOKEN and self._is_token_expired(self._ACCESS_TOKEN):
|
||||
self._refresh_token()
|
||||
|
||||
playback = self._call_api(
|
||||
f'https://apis-jiovoot.voot.com/playbackjv/v3/{video_id}', video_id,
|
||||
'Downloading playback JSON', headers={
|
||||
**self.geo_verification_headers(),
|
||||
'accesstoken': self._ACCESS_TOKEN or self._GUEST_TOKEN,
|
||||
**self._APP_NAME,
|
||||
'deviceid': self._DEVICE_ID,
|
||||
'uniqueid': self._USER_ID,
|
||||
'x-apisignatures': self._API_SIGNATURES,
|
||||
'x-platform': 'androidweb',
|
||||
'x-platform-token': 'web',
|
||||
}, data={
|
||||
'4k': False,
|
||||
'ageGroup': '18+',
|
||||
'appVersion': '3.4.0',
|
||||
'bitrateProfile': 'xhdpi',
|
||||
'capability': {
|
||||
'drmCapability': {
|
||||
'aesSupport': 'yes',
|
||||
'fairPlayDrmSupport': 'none',
|
||||
'playreadyDrmSupport': 'none',
|
||||
'widevineDRMSupport': 'none',
|
||||
},
|
||||
'frameRateCapability': [{
|
||||
'frameRateSupport': '30fps',
|
||||
'videoQuality': '1440p',
|
||||
}],
|
||||
},
|
||||
'continueWatchingRequired': False,
|
||||
'dolby': False,
|
||||
'downloadRequest': False,
|
||||
'hevc': False,
|
||||
'kidsSafe': False,
|
||||
'manufacturer': 'Windows',
|
||||
'model': 'Windows',
|
||||
'multiAudioRequired': True,
|
||||
'osVersion': '10',
|
||||
'parentalPinValid': True,
|
||||
'x-apisignatures': self._API_SIGNATURES,
|
||||
})
|
||||
|
||||
status_code = traverse_obj(playback, ('code', {int}))
|
||||
if status_code == 474:
|
||||
self.raise_geo_restricted(countries=['IN'])
|
||||
elif status_code == 1008:
|
||||
error_msg = 'This content is only available for premium users'
|
||||
if self._ACCESS_TOKEN:
|
||||
raise ExtractorError(error_msg, expected=True)
|
||||
self.raise_login_required(f'{error_msg}. {self._LOGIN_HINT}', method=None)
|
||||
elif status_code == 400:
|
||||
raise ExtractorError('The requested content is not available', expected=True)
|
||||
elif status_code is not None and status_code != 200:
|
||||
raise ExtractorError(
|
||||
f'JioCinema says: {traverse_obj(playback, ("message", {str})) or status_code}')
|
||||
|
||||
metadata = self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/query/asset-details',
|
||||
video_id, fatal=False, query={
|
||||
'ids': f'include:{video_id}',
|
||||
'responseType': 'common',
|
||||
'devicePlatformType': 'desktop',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'http_headers': self._API_HEADERS,
|
||||
**self._extract_formats_and_subtitles(playback, video_id),
|
||||
**traverse_obj(playback, ('data', {
|
||||
# fallback metadata
|
||||
'title': ('name', {str}),
|
||||
'description': ('fullSynopsis', {str}),
|
||||
'series': ('show', 'name', {str}, filter),
|
||||
'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}),
|
||||
'season_number': ('episode', 'season', {int_or_none}, filter),
|
||||
'episode': ('fullTitle', {str}),
|
||||
'episode_number': ('episode', 'episodeNo', {int_or_none}, filter),
|
||||
'age_limit': ('ageNemonic', {parse_age_limit}),
|
||||
'duration': ('totalDuration', {float_or_none}),
|
||||
'thumbnail': ('images', {url_or_none}),
|
||||
})),
|
||||
**traverse_obj(metadata, ('result', 0, {
|
||||
'title': ('fullTitle', {str}),
|
||||
'description': ('fullSynopsis', {str}),
|
||||
'series': ('showName', {str}, filter),
|
||||
'season': ('seasonName', {str}, filter),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'season_id': ('seasonId', {str}, filter),
|
||||
'episode': ('fullTitle', {str}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'timestamp': ('uploadTime', {int_or_none}),
|
||||
'release_date': ('telecastDate', {str}),
|
||||
'age_limit': ('ageNemonic', {parse_age_limit}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'genres': ('genres', ..., {str}),
|
||||
'thumbnail': ('seo', 'ogImage', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
class JioCinemaSeriesIE(JioCinemaBaseIE):
|
||||
IE_NAME = 'jiocinema:series'
|
||||
_VALID_URL = r'https?://(?:www\.)?jiocinema\.com/tv-shows/(?P<slug>[\w-]+)/(?P<id>\d{3,})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiocinema.com/tv-shows/naagin/3499917',
|
||||
'info_dict': {
|
||||
'id': '3499917',
|
||||
'title': 'naagin',
|
||||
},
|
||||
'playlist_mincount': 120,
|
||||
}, {
|
||||
'url': 'https://www.jiocinema.com/tv-shows/mtv-splitsvilla-x5/3499820',
|
||||
'info_dict': {
|
||||
'id': '3499820',
|
||||
'title': 'mtv-splitsvilla-x5',
|
||||
},
|
||||
'playlist_mincount': 310,
|
||||
}]
|
||||
|
||||
def _entries(self, series_id):
|
||||
seasons = traverse_obj(self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/view/show/{series_id}', series_id,
|
||||
'Downloading series metadata JSON', query={'responseType': 'common'}), (
|
||||
'trays', lambda _, v: v['trayId'] == 'season-by-show-multifilter',
|
||||
'trayTabs', lambda _, v: v['id']))
|
||||
|
||||
for season_num, season in enumerate(seasons, start=1):
|
||||
season_id = season['id']
|
||||
label = season.get('label') or season_num
|
||||
for page_num in itertools.count(1):
|
||||
episodes = traverse_obj(self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',
|
||||
season_id, f'Downloading season {label} page {page_num} JSON', query={
|
||||
'sort': 'episode:asc',
|
||||
'id': season_id,
|
||||
'responseType': 'common',
|
||||
'page': page_num,
|
||||
}), ('result', lambda _, v: v['id'] and url_or_none(v['slug'])))
|
||||
if not episodes:
|
||||
break
|
||||
for episode in episodes:
|
||||
yield self.url_result(
|
||||
episode['slug'], JioCinemaIE, **traverse_obj(episode, {
|
||||
'video_id': 'id',
|
||||
'video_title': ('fullTitle', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, series_id = self._match_valid_url(url).group('slug', 'id')
|
||||
return self.playlist_result(self._entries(series_id), series_id, slug)
|
@ -1,112 +0,0 @@
|
||||
import datetime as dt
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
datetime_from_str,
|
||||
unified_timestamp,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class JoqrAgIE(InfoExtractor):
|
||||
IE_DESC = '超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)'
|
||||
_VALID_URL = [r'https?://www\.uniqueradio\.jp/agplayer5/(?:player|inc-player-hls)\.php',
|
||||
r'https?://(?:www\.)?joqr\.co\.jp/ag/',
|
||||
r'https?://(?:www\.)?joqr\.co\.jp/qr/ag(?:daily|regular)program/?(?:$|[#?])']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.uniqueradio.jp/agplayer5/player.php',
|
||||
'info_dict': {
|
||||
'id': 'live',
|
||||
'title': str,
|
||||
'channel': '超!A&G+',
|
||||
'description': str,
|
||||
'live_status': 'is_live',
|
||||
'release_timestamp': int,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.joqr.co.jp/ag/article/103760/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.joqr.co.jp/qr/agdailyprogram/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.joqr.co.jp/qr/agregularprogram/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_metadata(self, variable, html):
|
||||
return clean_html(urllib.parse.unquote_plus(self._search_regex(
|
||||
rf'var\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
html, 'metadata', group='value', default=''))) or None
|
||||
|
||||
def _extract_start_timestamp(self, video_id, is_live):
|
||||
def extract_start_time_from(date_str):
|
||||
dt_ = datetime_from_str(date_str) + dt.timedelta(hours=9)
|
||||
date = dt_.strftime('%Y%m%d')
|
||||
start_time = self._search_regex(
|
||||
r'<h3[^>]+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+–\s*(\d{1,2}:\d{1,2})',
|
||||
self._download_webpage(
|
||||
f'https://www.joqr.co.jp/qr/agdailyprogram/?date={date}', video_id,
|
||||
note=f'Downloading program list of {date}', fatal=False,
|
||||
errnote=f'Failed to download program list of {date}') or '',
|
||||
'start time', default=None)
|
||||
if start_time:
|
||||
return unified_timestamp(f'{dt_.strftime("%Y/%m/%d")} {start_time} +09:00')
|
||||
return None
|
||||
|
||||
start_timestamp = extract_start_time_from('today')
|
||||
if not start_timestamp:
|
||||
return None
|
||||
|
||||
if not is_live or start_timestamp < datetime_from_str('now').timestamp():
|
||||
return start_timestamp
|
||||
else:
|
||||
return extract_start_time_from('yesterday')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = 'live'
|
||||
|
||||
metadata = self._download_webpage(
|
||||
'https://www.uniqueradio.jp/aandg', video_id,
|
||||
note='Downloading metadata', errnote='Failed to download metadata')
|
||||
title = self._extract_metadata('Program_name', metadata)
|
||||
|
||||
if not title or title == '放送休止':
|
||||
formats = []
|
||||
live_status = 'is_upcoming'
|
||||
release_timestamp = self._extract_start_timestamp(video_id, False)
|
||||
msg = 'This stream is not currently live'
|
||||
if release_timestamp:
|
||||
msg += (' and will start at '
|
||||
+ dt.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S'))
|
||||
self.raise_no_formats(msg, expected=True)
|
||||
else:
|
||||
m3u8_path = self._search_regex(
|
||||
r'<source\s[^>]*\bsrc="([^"]+)"',
|
||||
self._download_webpage(
|
||||
'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php', video_id,
|
||||
note='Downloading player data', errnote='Failed to download player data'),
|
||||
'm3u8 url')
|
||||
formats = self._extract_m3u8_formats(
|
||||
urljoin('https://www.uniqueradio.jp/', m3u8_path), video_id)
|
||||
live_status = 'is_live'
|
||||
release_timestamp = self._extract_start_timestamp(video_id, True)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'channel': '超!A&G+',
|
||||
'description': self._extract_metadata('Program_text', metadata),
|
||||
'formats': formats,
|
||||
'live_status': live_status,
|
||||
'release_timestamp': release_timestamp,
|
||||
}
|
@ -1,12 +1,12 @@
|
||||
import functools
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
UserNotLive,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
@ -16,21 +16,17 @@
|
||||
|
||||
|
||||
class KickBaseIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._request_webpage(
|
||||
HEADRequest('https://kick.com/'), None, 'Setting up session', fatal=False, impersonate=True)
|
||||
xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN')
|
||||
if not xsrf_token:
|
||||
self.write_debug('kick.com did not set XSRF-TOKEN cookie')
|
||||
KickBaseIE._API_HEADERS = {
|
||||
'Authorization': f'Bearer {xsrf_token.value}',
|
||||
'X-XSRF-TOKEN': xsrf_token.value,
|
||||
} if xsrf_token else {}
|
||||
@functools.cached_property
|
||||
def _api_headers(self):
|
||||
token = traverse_obj(
|
||||
self._get_cookies('https://kick.com/'),
|
||||
('session_token', 'value', {urllib.parse.unquote}))
|
||||
return {'Authorization': f'Bearer {token}'} if token else {}
|
||||
|
||||
def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
|
||||
return self._download_json(
|
||||
f'https://kick.com/api/{path}', display_id, note=note,
|
||||
headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs)
|
||||
headers={**self._api_headers, **headers}, impersonate=True, **kwargs)
|
||||
|
||||
|
||||
class KickIE(KickBaseIE):
|
||||
|
@ -1,358 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
unsmuggle_url,
|
||||
)
|
||||
|
||||
|
||||
class LimelightBaseIE(InfoExtractor):
|
||||
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
lm = {
|
||||
'Media': 'media',
|
||||
'Channel': 'channel',
|
||||
'ChannelList': 'channel_list',
|
||||
}
|
||||
|
||||
def smuggle(url):
|
||||
return smuggle_url(url, {'source_url': url})
|
||||
|
||||
entries = []
|
||||
for kind, video_id in re.findall(
|
||||
r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
|
||||
webpage):
|
||||
entries.append(cls.url_result(
|
||||
smuggle(f'limelight:{lm[kind]}:{video_id}'),
|
||||
f'Limelight{kind}', video_id))
|
||||
for mobj in re.finditer(
|
||||
# As per [1] class attribute should be exactly equal to
|
||||
# LimelightEmbeddedPlayerFlash but numerous examples seen
|
||||
# that don't exactly match it (e.g. [2]).
|
||||
# 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
|
||||
# 2. http://www.sedona.com/FacilitatorTraining2017
|
||||
r'''(?sx)
|
||||
<object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
|
||||
<param[^>]+
|
||||
name=(["\'])flashVars\2[^>]+
|
||||
value=(["\'])(?:(?!\3).)*(?P<kind>media|channel(?:List)?)Id=(?P<id>[a-z0-9]{32})
|
||||
''', webpage):
|
||||
kind, video_id = mobj.group('kind'), mobj.group('id')
|
||||
entries.append(cls.url_result(
|
||||
smuggle(f'limelight:{kind}:{video_id}'),
|
||||
f'Limelight{kind.capitalize()}', video_id))
|
||||
# http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page)
|
||||
for video_id in re.findall(
|
||||
r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P<id>[a-z0-9]{32})',
|
||||
webpage):
|
||||
entries.append(cls.url_result(
|
||||
smuggle(f'limelight:media:{video_id}'),
|
||||
LimelightMediaIE.ie_key(), video_id))
|
||||
return entries
|
||||
|
||||
def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
|
||||
headers = {}
|
||||
if referer:
|
||||
headers['Referer'] = referer
|
||||
try:
|
||||
return self._download_json(
|
||||
self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
|
||||
item_id, f'Downloading PlaylistService {method} JSON',
|
||||
fatal=fatal, headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
error = self._parse_json(e.cause.response.read().decode(), item_id)['detail']['contentAccessPermission']
|
||||
if error == 'CountryDisabled':
|
||||
self.raise_geo_restricted()
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise
|
||||
|
||||
def _extract(self, item_id, pc_method, mobile_method, referer=None):
|
||||
pc = self._call_playlist_service(item_id, pc_method, referer=referer)
|
||||
mobile = self._call_playlist_service(
|
||||
item_id, mobile_method, fatal=False, referer=referer)
|
||||
return pc, mobile
|
||||
|
||||
def _extract_info(self, pc, mobile, i, referer):
|
||||
get_item = lambda x, y: try_get(x, lambda x: x[y][i], dict) or {}
|
||||
pc_item = get_item(pc, 'playlistItems')
|
||||
mobile_item = get_item(mobile, 'mediaList')
|
||||
video_id = pc_item.get('mediaId') or mobile_item['mediaId']
|
||||
title = pc_item.get('title') or mobile_item['title']
|
||||
|
||||
formats = []
|
||||
urls = []
|
||||
for stream in pc_item.get('streams', []):
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url or stream_url in urls:
|
||||
continue
|
||||
if not self.get_param('allow_unplayable_formats') and stream.get('drmProtected'):
|
||||
continue
|
||||
urls.append(stream_url)
|
||||
ext = determine_ext(stream_url)
|
||||
if ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url, video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
fmt = {
|
||||
'url': stream_url,
|
||||
'abr': float_or_none(stream.get('audioBitRate')),
|
||||
'fps': float_or_none(stream.get('videoFrameRate')),
|
||||
'ext': ext,
|
||||
}
|
||||
width = int_or_none(stream.get('videoWidthInPixels'))
|
||||
height = int_or_none(stream.get('videoHeightInPixels'))
|
||||
vbr = float_or_none(stream.get('videoBitRate'))
|
||||
if width or height or vbr:
|
||||
fmt.update({
|
||||
'width': width,
|
||||
'height': height,
|
||||
'vbr': vbr,
|
||||
})
|
||||
else:
|
||||
fmt['vcodec'] = 'none'
|
||||
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', stream_url)
|
||||
if rtmp:
|
||||
format_id = 'rtmp'
|
||||
if stream.get('videoBitRate'):
|
||||
format_id += '-%d' % int_or_none(stream['videoBitRate'])
|
||||
http_format_id = format_id.replace('rtmp', 'http')
|
||||
|
||||
CDN_HOSTS = (
|
||||
('delvenetworks.com', 'cpl.delvenetworks.com'),
|
||||
('video.llnw.net', 's2.content.video.llnw.net'),
|
||||
)
|
||||
for cdn_host, http_host in CDN_HOSTS:
|
||||
if cdn_host not in rtmp.group('host').lower():
|
||||
continue
|
||||
http_url = 'http://{}/{}'.format(http_host, rtmp.group('playpath')[4:])
|
||||
urls.append(http_url)
|
||||
if self._is_valid_url(http_url, video_id, http_format_id):
|
||||
http_fmt = fmt.copy()
|
||||
http_fmt.update({
|
||||
'url': http_url,
|
||||
'format_id': http_format_id,
|
||||
})
|
||||
formats.append(http_fmt)
|
||||
break
|
||||
|
||||
fmt.update({
|
||||
'url': rtmp.group('url'),
|
||||
'play_path': rtmp.group('playpath'),
|
||||
'app': rtmp.group('app'),
|
||||
'ext': 'flv',
|
||||
'format_id': format_id,
|
||||
})
|
||||
formats.append(fmt)
|
||||
|
||||
for mobile_url in mobile_item.get('mobileUrls', []):
|
||||
media_url = mobile_url.get('mobileUrl')
|
||||
format_id = mobile_url.get('targetMediaPlatform')
|
||||
if not media_url or media_url in urls:
|
||||
continue
|
||||
if (format_id in ('Widevine', 'SmoothStreaming')
|
||||
and not self.get_param('allow_unplayable_formats', False)):
|
||||
continue
|
||||
urls.append(media_url)
|
||||
ext = determine_ext(media_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
media_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url, video_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': media_url,
|
||||
'format_id': format_id,
|
||||
'quality': -10,
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for flag in mobile_item.get('flags'):
|
||||
if flag == 'ClosedCaptions':
|
||||
closed_captions = self._call_playlist_service(
|
||||
video_id, 'getClosedCaptionsDetailsByMediaId',
|
||||
False, referer) or []
|
||||
for cc in closed_captions:
|
||||
cc_url = cc.get('webvttFileUrl')
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = cc.get('languageCode') or self._search_regex(r'/([a-z]{2})\.vtt', cc_url, 'lang', default='en')
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': cc_url,
|
||||
})
|
||||
break
|
||||
|
||||
get_meta = lambda x: pc_item.get(x) or mobile_item.get(x)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': get_meta('description'),
|
||||
'formats': formats,
|
||||
'duration': float_or_none(get_meta('durationInMilliseconds'), 1000),
|
||||
'thumbnail': get_meta('previewImageUrl') or get_meta('thumbnailImageUrl'),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class LimelightMediaIE(LimelightBaseIE):
|
||||
IE_NAME = 'limelight'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
limelight:media:|
|
||||
https?://
|
||||
(?:
|
||||
link\.videoplatform\.limelight\.com/media/|
|
||||
assets\.delvenetworks\.com/player/loader\.swf
|
||||
)
|
||||
\?.*?\bmediaId=
|
||||
)
|
||||
(?P<id>[a-z0-9]{32})
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
|
||||
'info_dict': {
|
||||
'id': '3ffd040b522b4485b6d84effc750cd86',
|
||||
'ext': 'mp4',
|
||||
'title': 'HaP and the HB Prince Trailer',
|
||||
'description': 'md5:8005b944181778e313d95c1237ddb640',
|
||||
'thumbnail': r're:^https?://.*\.jpeg$',
|
||||
'duration': 144.23,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# video with subtitles
|
||||
'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335',
|
||||
'md5': '2fa3bad9ac321e23860ca23bc2c69e3d',
|
||||
'info_dict': {
|
||||
'id': 'a3e00274d4564ec4a9b29b9466432335',
|
||||
'ext': 'mp4',
|
||||
'title': '3Play Media Overview Video',
|
||||
'thumbnail': r're:^https?://.*\.jpeg$',
|
||||
'duration': 78.101,
|
||||
# TODO: extract all languages that were accessible via API
|
||||
# 'subtitles': 'mincount:9',
|
||||
'subtitles': 'mincount:1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PLAYLIST_SERVICE_PATH = 'media'
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
video_id = self._match_id(url)
|
||||
source_url = smuggled_data.get('source_url')
|
||||
self._initialize_geo_bypass({
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
})
|
||||
|
||||
pc, mobile = self._extract(
|
||||
video_id, 'getPlaylistByMediaId',
|
||||
'getMobilePlaylistByMediaId', source_url)
|
||||
|
||||
return self._extract_info(pc, mobile, 0, source_url)
|
||||
|
||||
|
||||
class LimelightChannelIE(LimelightBaseIE):
|
||||
IE_NAME = 'limelight:channel'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
limelight:channel:|
|
||||
https?://
|
||||
(?:
|
||||
link\.videoplatform\.limelight\.com/media/|
|
||||
assets\.delvenetworks\.com/player/loader\.swf
|
||||
)
|
||||
\?.*?\bchannelId=
|
||||
)
|
||||
(?P<id>[a-z0-9]{32})
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
|
||||
'info_dict': {
|
||||
'id': 'ab6a524c379342f9b23642917020c082',
|
||||
'title': 'Javascript Sample Code',
|
||||
'description': 'Javascript Sample Code - http://www.delvenetworks.com/sample-code/playerCode-demo.html',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PLAYLIST_SERVICE_PATH = 'channel'
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
channel_id = self._match_id(url)
|
||||
source_url = smuggled_data.get('source_url')
|
||||
|
||||
pc, mobile = self._extract(
|
||||
channel_id, 'getPlaylistByChannelId',
|
||||
'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
|
||||
source_url)
|
||||
|
||||
entries = [
|
||||
self._extract_info(pc, mobile, i, source_url)
|
||||
for i in range(len(pc['playlistItems']))]
|
||||
|
||||
return self.playlist_result(
|
||||
entries, channel_id, pc.get('title'), mobile.get('description'))
|
||||
|
||||
|
||||
class LimelightChannelListIE(LimelightBaseIE):
|
||||
IE_NAME = 'limelight:channel_list'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
limelight:channel_list:|
|
||||
https?://
|
||||
(?:
|
||||
link\.videoplatform\.limelight\.com/media/|
|
||||
assets\.delvenetworks\.com/player/loader\.swf
|
||||
)
|
||||
\?.*?\bchannelListId=
|
||||
)
|
||||
(?P<id>[a-z0-9]{32})
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
|
||||
'info_dict': {
|
||||
'id': '301b117890c4465c8179ede21fd92e2b',
|
||||
'title': 'Website - Hero Player',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PLAYLIST_SERVICE_PATH = 'channel_list'
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_list_id = self._match_id(url)
|
||||
|
||||
channel_list = self._call_playlist_service(
|
||||
channel_list_id, 'getMobileChannelListById')
|
||||
|
||||
entries = [
|
||||
self.url_result('limelight:channel:{}'.format(channel['id']), 'LimelightChannel')
|
||||
for channel in channel_list['channelList']]
|
||||
|
||||
return self.playlist_result(
|
||||
entries, channel_list_id, channel_list['title'])
|
@ -134,7 +134,7 @@ class LRTRadioIE(LRTBaseIE):
|
||||
def _real_extract(self, url):
|
||||
video_id, path = self._match_valid_url(url).group('id', 'path')
|
||||
media = self._download_json(
|
||||
'https://www.lrt.lt/radioteka/api/media', video_id,
|
||||
'https://www.lrt.lt/rest-api/media', video_id,
|
||||
query={'url': f'/mediateka/irasas/{video_id}/{path}'})
|
||||
|
||||
return {
|
||||
|
@ -167,11 +167,11 @@ class LSMLTVEmbedIE(InfoExtractor):
|
||||
'duration': 1442,
|
||||
'upload_date': '20231121',
|
||||
'title': 'D23-6000-105_cetstud',
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
|
||||
'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ltv.lsm.lv/embed?enablesdkjs=1&c=eyJpdiI6IncwVzZmUFk2MU12enVWK1I3SUcwQ1E9PSIsInZhbHVlIjoid3FhV29vamc3T2sxL1RaRmJ5Rm1GTXozU0o2dVczdUtLK0cwZEZJMDQ2a3ZIRG5DK2pneGlnbktBQy9uazVleHN6VXhxdWIweWNvcHRDSnlISlNYOHlVZ1lpcTUrcWZSTUZPQW14TVdkMW9aOUtRWVNDcFF4eWpHNGcrT0VZbUNFQStKQk91cGpndW9FVjJIa0lpbkh3PT0iLCJtYWMiOiIyZGI1NDJlMWRlM2QyMGNhOGEwYTM2MmNlN2JlOGRhY2QyYjdkMmEzN2RlOTEzYTVkNzI1ODlhZDlhZjU4MjQ2IiwidGFnIjoiIn0=',
|
||||
'md5': 'a1711e190fe680fdb68fd8413b378e87',
|
||||
'md5': 'f236cef2fd5953612754e4e66be51e7a',
|
||||
'info_dict': {
|
||||
'id': 'wUnFArIPDSY',
|
||||
'ext': 'mp4',
|
||||
@ -198,6 +198,8 @@ class LSMLTVEmbedIE(InfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/@LTV16plus',
|
||||
'like_count': int,
|
||||
'description': 'md5:7ff0c42ba971e3c13e4b8a2ff03b70b5',
|
||||
'media_type': 'livestream',
|
||||
'timestamp': 1652550741,
|
||||
},
|
||||
}]
|
||||
|
||||
@ -208,7 +210,7 @@ def _real_extract(self, url):
|
||||
r'window\.ltvEmbedPayload\s*=', webpage, 'embed json', video_id)
|
||||
embed_type = traverse_obj(data, ('source', 'name', {str}))
|
||||
|
||||
if embed_type == 'telia':
|
||||
if embed_type in ('backscreen', 'telia'): # 'telia' only for backwards compat
|
||||
ie_key = 'CloudyCDN'
|
||||
embed_url = traverse_obj(data, ('source', 'embed_url', {url_or_none}))
|
||||
elif embed_type == 'youtube':
|
||||
@ -226,9 +228,9 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class LSMReplayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:ieraksts|statja)/[^/?#]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:skaties/|klausies/)?(?:ieraksts|statja)/[^/?#]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
|
||||
'url': 'https://replay.lsm.lv/lv/skaties/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
|
||||
'md5': '64f72a360ca530d5ed89c77646c9eee5',
|
||||
'info_dict': {
|
||||
'id': '46k_d23-6000-105',
|
||||
@ -241,20 +243,23 @@ class LSMReplayIE(InfoExtractor):
|
||||
'thumbnail': 'https://ltv.lsm.lv/storage/media/8/7/large/5/1f9604e1.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://replay.lsm.lv/lv/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam',
|
||||
'md5': '719b33875cd1429846eeeaeec6df2830',
|
||||
'url': 'https://replay.lsm.lv/lv/klausies/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam',
|
||||
'md5': '84feb80fd7e6ec07744726a9f01cda4d',
|
||||
'info_dict': {
|
||||
'id': 'a342781',
|
||||
'ext': 'mp3',
|
||||
'id': '183522',
|
||||
'ext': 'm4a',
|
||||
'duration': 1823,
|
||||
'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/large_fd4675ac.jpg',
|
||||
'upload_date': '20231102',
|
||||
'timestamp': 1698921060,
|
||||
'timestamp': 1698913860,
|
||||
'description': 'md5:7bac3b2dd41e44325032943251c357b1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://replay.lsm.lv/ru/statja/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
|
||||
'url': 'https://replay.lsm.lv/ru/skaties/statja/ltv/355067/v-kengaragse-nacalas-ukladka-relsov',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@ -267,12 +272,24 @@ def _real_extract(self, url):
|
||||
|
||||
data = self._search_nuxt_data(
|
||||
self._fix_nuxt_data(webpage), video_id, context_name='__REPLAY__')
|
||||
playback_type = data['playback']['type']
|
||||
|
||||
if playback_type == 'playable_audio_lr':
|
||||
playback_data = {
|
||||
'formats': self._extract_m3u8_formats(data['playback']['service']['hls_url'], video_id),
|
||||
}
|
||||
elif playback_type == 'embed':
|
||||
playback_data = {
|
||||
'_type': 'url_transparent',
|
||||
'url': data['playback']['service']['url'],
|
||||
}
|
||||
else:
|
||||
raise ExtractorError(f'Unsupported playback type "{playback_type}"')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
**playback_data,
|
||||
**traverse_obj(data, {
|
||||
'url': ('playback', 'service', 'url', {url_or_none}),
|
||||
'title': ('mediaItem', 'title'),
|
||||
'description': ('mediaItem', ('lead', 'body')),
|
||||
'duration': ('mediaItem', 'duration', {int_or_none}),
|
||||
|
107
yt_dlp/extractor/mave.py
Normal file
107
yt_dlp/extractor/mave.py
Normal file
@ -0,0 +1,107 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class MaveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<channel>[\w-]+)\.mave\.digital/(?P<id>ep-\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://ochenlichnoe.mave.digital/ep-25',
|
||||
'md5': 'aa3e513ef588b4366df1520657cbc10c',
|
||||
'info_dict': {
|
||||
'id': '4035f587-914b-44b6-aa5a-d76685ad9bc2',
|
||||
'ext': 'mp3',
|
||||
'display_id': 'ochenlichnoe-ep-25',
|
||||
'title': 'Между мной и миром: психология самооценки',
|
||||
'description': 'md5:4b7463baaccb6982f326bce5c700382a',
|
||||
'uploader': 'Самарский университет',
|
||||
'channel': 'Очень личное',
|
||||
'channel_id': 'ochenlichnoe',
|
||||
'channel_url': 'https://ochenlichnoe.mave.digital/',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'duration': 3744,
|
||||
'thumbnail': r're:https://.+/storage/podcasts/.+\.jpg',
|
||||
'series': 'Очень личное',
|
||||
'series_id': '2e0c3749-6df2-4946-82f4-50691419c065',
|
||||
'season': 'Season 3',
|
||||
'season_number': 3,
|
||||
'episode': 'Episode 3',
|
||||
'episode_number': 3,
|
||||
'timestamp': 1747817300,
|
||||
'upload_date': '20250521',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://budem.mave.digital/ep-12',
|
||||
'md5': 'e1ce2780fcdb6f17821aa3ca3e8c919f',
|
||||
'info_dict': {
|
||||
'id': '41898bb5-ff57-4797-9236-37a8e537aa21',
|
||||
'ext': 'mp3',
|
||||
'display_id': 'budem-ep-12',
|
||||
'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана',
|
||||
'description': 'md5:fa3bdd59ee829dfaf16e3efcb13f1d19',
|
||||
'uploader': 'Полина Цветкова+Евгения Акопова',
|
||||
'channel': 'Все там будем',
|
||||
'channel_id': 'budem',
|
||||
'channel_url': 'https://budem.mave.digital/',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'duration': 3664,
|
||||
'thumbnail': r're:https://.+/storage/podcasts/.+\.jpg',
|
||||
'series': 'Все там будем',
|
||||
'series_id': 'fe9347bf-c009-4ebd-87e8-b06f2f324746',
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'episode': 'Episode 5',
|
||||
'episode_number': 5,
|
||||
'timestamp': 1735538400,
|
||||
'upload_date': '20241230',
|
||||
},
|
||||
}]
|
||||
_API_BASE_URL = 'https://api.mave.digital/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, slug = self._match_valid_url(url).group('channel', 'id')
|
||||
display_id = f'{channel_id}-{slug}'
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data = traverse_obj(
|
||||
self._search_nuxt_json(webpage, display_id),
|
||||
('data', lambda _, v: v['activeEpisodeData'], any, {require('podcast data')}))
|
||||
|
||||
return {
|
||||
'display_id': display_id,
|
||||
'channel_id': channel_id,
|
||||
'channel_url': f'https://{channel_id}.mave.digital/',
|
||||
'vcodec': 'none',
|
||||
'thumbnail': re.sub(r'_\d+(?=\.(?:jpg|png))', '', self._og_search_thumbnail(webpage, default='')) or None,
|
||||
**traverse_obj(data, ('activeEpisodeData', {
|
||||
'url': ('audio', {urljoin(self._API_BASE_URL)}),
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {clean_html}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('number', {int_or_none}),
|
||||
'view_count': ('listenings', {int_or_none}),
|
||||
'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any),
|
||||
'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any),
|
||||
'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}),
|
||||
'timestamp': ('publish_date', {parse_iso8601}),
|
||||
})),
|
||||
**traverse_obj(data, ('podcast', 'podcast', {
|
||||
'series_id': ('id', {str}),
|
||||
'series': ('title', {str}),
|
||||
'channel': ('title', {str}),
|
||||
'uploader': ('author', {str}),
|
||||
})),
|
||||
}
|
37
yt_dlp/extractor/mir24tv.py
Normal file
37
yt_dlp/extractor/mir24tv.py
Normal file
@ -0,0 +1,37 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_qs, url_or_none
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class Mir24TvIE(InfoExtractor):
|
||||
IE_NAME = 'mir24.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?mir24\.tv/news/(?P<id>[0-9]+)/[^/?#]+'
|
||||
_TESTS = [{
|
||||
'url': 'https://mir24.tv/news/16635210/dni-kultury-rossii-otkrylis-v-uzbekistane.-na-prazdnichnom-koncerte-vystupili-zvezdy-rossijskoj-estrada',
|
||||
'info_dict': {
|
||||
'id': '16635210',
|
||||
'title': 'Дни культуры России открылись в Узбекистане. На праздничном концерте выступили звезды российской эстрады',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https://images\.mir24\.tv/.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id, impersonate=True)
|
||||
|
||||
iframe_url = self._search_regex(
|
||||
r'<iframe\b[^>]+\bsrc=["\'](https?://mir24\.tv/players/[^"\']+)',
|
||||
webpage, 'iframe URL')
|
||||
|
||||
m3u8_url = traverse_obj(iframe_url, (
|
||||
{parse_qs}, 'source', -1, {self._proto_relative_url}, {url_or_none}, {require('m3u8 URL')}))
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@ -18,7 +18,7 @@ class MirrativIE(MirrativBaseIE):
|
||||
IE_NAME = 'mirrativ'
|
||||
_VALID_URL = r'https?://(?:www\.)?mirrativ\.com/live/(?P<id>[^/?#&]+)'
|
||||
|
||||
TESTS = [{
|
||||
_TESTS = [{
|
||||
'url': 'https://mirrativ.com/live/UQomuS7EMgHoxRHjEhNiHw',
|
||||
'info_dict': {
|
||||
'id': 'UQomuS7EMgHoxRHjEhNiHw',
|
||||
|
@ -1,7 +1,5 @@
|
||||
from .telecinco import TelecincoBaseIE
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
@ -81,17 +79,7 @@ class MiTeleIE(TelecincoBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
try: # yt-dlp's default user-agents are too old and blocked by akamai
|
||||
webpage = self._download_webpage(url, display_id, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
|
||||
raise
|
||||
# Retry with impersonation if hardcoded UA is insufficient to bypass akamai
|
||||
webpage = self._download_webpage(url, display_id, impersonate=True)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
pre_player = self._search_json(
|
||||
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=',
|
||||
webpage, 'Pre Player', display_id)['prePlayer']
|
||||
|
134
yt_dlp/extractor/mixlr.py
Normal file
134
yt_dlp/extractor/mixlr.py
Normal file
@ -0,0 +1,134 @@
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import int_or_none, parse_iso8601, url_or_none, urlhandle_detect_ext
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class MixlrIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<username>[\w-]+)\.mixlr\.com/events/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://suncity-104-9fm.mixlr.com/events/4387115',
|
||||
'info_dict': {
|
||||
'id': '4387115',
|
||||
'ext': 'mp3',
|
||||
'title': r're:SUNCITY 104.9FM\'s live audio \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||
'uploader': 'suncity-104-9fm',
|
||||
'like_count': int,
|
||||
'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/cd5b34d05fa2cee72d80477724a2f02e.png',
|
||||
'timestamp': 1751943773,
|
||||
'upload_date': '20250708',
|
||||
'release_timestamp': 1751943764,
|
||||
'release_date': '20250708',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://brcountdown.mixlr.com/events/4395480',
|
||||
'info_dict': {
|
||||
'id': '4395480',
|
||||
'ext': 'aac',
|
||||
'title': r're:Beats Revolution Countdown Episodio 461 \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||
'description': 'md5:5cacd089723f7add3f266bd588315bb3',
|
||||
'uploader': 'brcountdown',
|
||||
'like_count': int,
|
||||
'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/c48727a59f690b87a55d47d123ba0d6d.jpg',
|
||||
'timestamp': 1752354007,
|
||||
'upload_date': '20250712',
|
||||
'release_timestamp': 1752354000,
|
||||
'release_date': '20250712',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.brcountdown.mixlr.com/events/4395480',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
username, event_id = self._match_valid_url(url).group('username', 'id')
|
||||
|
||||
broadcast_info = self._download_json(
|
||||
f'https://api.mixlr.com/v3/channels/{username}/events/{event_id}', event_id)
|
||||
|
||||
formats = []
|
||||
format_url = traverse_obj(
|
||||
broadcast_info, ('included', 0, 'attributes', 'progressive_stream_url', {url_or_none}))
|
||||
if format_url:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest(format_url), event_id, fatal=False, note='Checking stream')
|
||||
if urlh and urlh.status == 200:
|
||||
ext = urlhandle_detect_ext(urlh)
|
||||
if ext == 'octet-stream':
|
||||
self.report_warning(
|
||||
'The server did not return a valid file extension for the stream URL. '
|
||||
'Assuming an mp3 stream; postprocessing may fail if this is incorrect')
|
||||
ext = 'mp3'
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'ext': ext,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
release_timestamp = traverse_obj(
|
||||
broadcast_info, ('data', 'attributes', 'starts_at', {str}))
|
||||
if not formats and release_timestamp:
|
||||
self.raise_no_formats(f'This event will start at {release_timestamp}', expected=True)
|
||||
|
||||
return {
|
||||
'id': event_id,
|
||||
'uploader': username,
|
||||
'formats': formats,
|
||||
'release_timestamp': parse_iso8601(release_timestamp),
|
||||
**traverse_obj(broadcast_info, ('included', 0, 'attributes', {
|
||||
'title': ('title', {str}),
|
||||
'timestamp': ('started_at', {parse_iso8601}),
|
||||
'concurrent_view_count': ('concurrent_view_count', {int_or_none}),
|
||||
'like_count': ('heart_count', {int_or_none}),
|
||||
'is_live': ('live', {bool}),
|
||||
})),
|
||||
**traverse_obj(broadcast_info, ('data', 'attributes', {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('started_at', {parse_iso8601}),
|
||||
'concurrent_view_count': ('concurrent_view_count', {int_or_none}),
|
||||
'like_count': ('heart_count', {int_or_none}),
|
||||
'thumbnail': ('artwork_url', {url_or_none}),
|
||||
'uploader_id': ('broadcaster_id', {str}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
class MixlrRecoringIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<username>[\w-]+)\.mixlr\.com/recordings/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://biblewayng.mixlr.com/recordings/2375193',
|
||||
'info_dict': {
|
||||
'id': '2375193',
|
||||
'ext': 'mp3',
|
||||
'title': "God's Jewels and Their Resting Place Bro. Adeniji",
|
||||
'description': 'Preached February 21, 2024 in the evening',
|
||||
'uploader_id': '8659190',
|
||||
'duration': 10968,
|
||||
'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/ceca120ef707f642abeea6e29cd74238.jpg',
|
||||
'timestamp': 1708544542,
|
||||
'upload_date': '20240221',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
username, recording_id = self._match_valid_url(url).group('username', 'id')
|
||||
|
||||
recording_info = self._download_json(
|
||||
f'https://api.mixlr.com/v3/channels/{username}/recordings/{recording_id}', recording_id)
|
||||
|
||||
return {
|
||||
'id': recording_id,
|
||||
**traverse_obj(recording_info, ('data', 'attributes', {
|
||||
'ext': ('file_format', {str}),
|
||||
'url': ('url', {url_or_none}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('artwork_url', {url_or_none}),
|
||||
'uploader_id': ('user_id', {str}),
|
||||
})),
|
||||
}
|
@ -457,12 +457,9 @@ def _extract_formats_and_subtitles(self, broadcast, video_id):
|
||||
self.report_warning(f'No formats available for {format_id} broadcast; skipping')
|
||||
return [], {}
|
||||
|
||||
cdn_headers = {'x-cdn-token': token}
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url.replace(f'/{token}/', '/'), video_id, 'mp4',
|
||||
m3u8_id=format_id, fatal=False, headers=cdn_headers)
|
||||
m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
|
||||
for fmt in fmts:
|
||||
fmt['http_headers'] = cdn_headers
|
||||
fmt.setdefault('format_note', join_nonempty(feed, medium, delim=' '))
|
||||
fmt.setdefault('language', language)
|
||||
if fmt.get('vcodec') == 'none' and fmt['language'] == 'en':
|
||||
|
@ -19,7 +19,8 @@
|
||||
class NBACVPBaseIE(TurnerBaseIE):
|
||||
def _extract_nba_cvp_info(self, path, video_id, fatal=False):
|
||||
return self._extract_cvp_info(
|
||||
f'http://secure.nba.com/{path}', video_id, {
|
||||
# XXX: The 3rd argument (None) needs to be the AdobePass software_statement
|
||||
f'http://secure.nba.com/{path}', video_id, None, {
|
||||
'default': {
|
||||
'media_src': 'http://nba.cdn.turner.com/nba/big',
|
||||
},
|
||||
@ -94,6 +95,7 @@ def _extract_video(self, filter_key, filter_value):
|
||||
|
||||
|
||||
class NBAWatchEmbedIE(NBAWatchBaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'nba:watch:embed'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
@ -115,6 +117,7 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class NBAWatchIE(NBAWatchBaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'nba:watch'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P<id>.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
@ -167,6 +170,7 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class NBAWatchCollectionIE(NBAWatchBaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'nba:watch:collection'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
@ -336,6 +340,7 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class NBAEmbedIE(NBABaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'nba:embed'
|
||||
_VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
|
||||
_TESTS = [{
|
||||
@ -358,6 +363,7 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class NBAIE(NBABaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'nba'
|
||||
_VALID_URL = NBABaseIE._VALID_URL_BASE + f'(?!{NBABaseIE._CHANNEL_PATH_REGEX})video/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
@ -385,6 +391,7 @@ def _extract_url_results(self, team, content_id):
|
||||
|
||||
|
||||
class NBAChannelIE(NBABaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'nba:channel'
|
||||
_VALID_URL = NBABaseIE._VALID_URL_BASE + f'(?:{NBABaseIE._CHANNEL_PATH_REGEX})/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE, default_ns
|
||||
from .theplatform import ThePlatformBaseIE, ThePlatformIE, default_ns
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@ -14,26 +14,130 @@
|
||||
UserNotLive,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_html_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
make_archive_id,
|
||||
mimetype2ext,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>(?:NBCE|n)?\d+))'
|
||||
class NBCUniversalBaseIE(ThePlatformBaseIE):
|
||||
_GEO_COUNTRIES = ['US']
|
||||
_GEO_BYPASS = False
|
||||
_M3U8_RE = r'https?://[^/?#]+/prod/[\w-]+/(?P<folders>[^?#]+/)cmaf/mpeg_(?:cbcs|cenc)\w*/master_cmaf\w*\.m3u8'
|
||||
|
||||
def _download_nbcu_smil_and_extract_m3u8_url(self, tp_path, video_id, query):
|
||||
smil = self._download_xml(
|
||||
f'https://link.theplatform.com/s/{tp_path}', video_id,
|
||||
'Downloading SMIL manifest', 'Failed to download SMIL manifest', query={
|
||||
**query,
|
||||
'format': 'SMIL', # XXX: Do not confuse "format" with "formats"
|
||||
'manifest': 'm3u',
|
||||
'switch': 'HLSServiceSecure', # Or else we get broken mp4 http URLs instead of HLS
|
||||
}, headers=self.geo_verification_headers())
|
||||
|
||||
ns = f'//{{{default_ns}}}'
|
||||
if url := traverse_obj(smil, (f'{ns}video/@src', lambda _, v: determine_ext(v) == 'm3u8', any)):
|
||||
return url
|
||||
|
||||
exc = traverse_obj(smil, (f'{ns}param', lambda _, v: v.get('name') == 'exception', '@value', any))
|
||||
if exc == 'GeoLocationBlocked':
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(traverse_obj(smil, (f'{ns}ref/@abstract', ..., any)), expected=exc == 'Expired')
|
||||
|
||||
def _extract_nbcu_formats_and_subtitles(self, tp_path, video_id, query):
|
||||
# formats='mpeg4' will return either a working m3u8 URL or an m3u8 template for non-DRM HLS
|
||||
# formats='m3u+none,mpeg4' may return DRM HLS but w/the "folders" needed for non-DRM template
|
||||
query['formats'] = 'm3u+none,mpeg4'
|
||||
m3u8_url = self._download_nbcu_smil_and_extract_m3u8_url(tp_path, video_id, query)
|
||||
|
||||
if mobj := re.fullmatch(self._M3U8_RE, m3u8_url):
|
||||
query['formats'] = 'mpeg4'
|
||||
m3u8_tmpl = self._download_nbcu_smil_and_extract_m3u8_url(tp_path, video_id, query)
|
||||
# Example: https://vod-lf-oneapp-prd.akamaized.net/prod/video/{folders}master_hls.m3u8
|
||||
if '{folders}' in m3u8_tmpl:
|
||||
self.write_debug('Found m3u8 URL template, formatting URL path')
|
||||
m3u8_url = m3u8_tmpl.format(folders=mobj.group('folders'))
|
||||
|
||||
if '/mpeg_cenc' in m3u8_url or '/mpeg_cbcs' in m3u8_url:
|
||||
self.report_drm(video_id)
|
||||
|
||||
return self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
def _extract_nbcu_video(self, url, display_id, old_ie_key=None):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
settings = self._search_json(
|
||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>',
|
||||
webpage, 'settings', display_id)
|
||||
|
||||
query = {}
|
||||
tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '')
|
||||
if tve:
|
||||
account_pid = tve.get('data-mpx-media-account-pid') or tve['data-mpx-account-pid']
|
||||
account_id = tve['data-mpx-media-account-id']
|
||||
metadata = self._parse_json(
|
||||
tve.get('data-normalized-video') or '', display_id, fatal=False, transform_source=unescapeHTML)
|
||||
video_id = tve.get('data-guid') or metadata['guid']
|
||||
if tve.get('data-entitlement') == 'auth':
|
||||
auth = settings['tve_adobe_auth']
|
||||
release_pid = tve['data-release-pid']
|
||||
resource = self._get_mvpd_resource(
|
||||
tve.get('data-adobe-pass-resource-id') or auth['adobePassResourceId'],
|
||||
tve['data-title'], release_pid, tve.get('data-rating'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, release_pid, auth['adobePassRequestorId'],
|
||||
resource, auth['adobePassSoftwareStatement'])
|
||||
else:
|
||||
ls_playlist = traverse_obj(settings, (
|
||||
'ls_playlist', lambda _, v: v['defaultGuid'], any, {require('LS playlist')}))
|
||||
video_id = ls_playlist['defaultGuid']
|
||||
account_pid = ls_playlist.get('mpxMediaAccountPid') or ls_playlist['mpxAccountPid']
|
||||
account_id = ls_playlist['mpxMediaAccountId']
|
||||
metadata = traverse_obj(ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, any)) or {}
|
||||
|
||||
tp_path = f'{account_pid}/media/guid/{account_id}/{video_id}'
|
||||
formats, subtitles = self._extract_nbcu_formats_and_subtitles(tp_path, video_id, query)
|
||||
tp_metadata = self._download_theplatform_metadata(tp_path, video_id, fatal=False)
|
||||
parsed_info = self._parse_theplatform_metadata(tp_metadata)
|
||||
self._merge_subtitles(parsed_info['subtitles'], target=subtitles)
|
||||
|
||||
return {
|
||||
**parsed_info,
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'duration': ('durationInSeconds', {int_or_none}),
|
||||
'timestamp': ('airDate', {parse_iso8601}),
|
||||
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'episode': ('episodeTitle', {str}),
|
||||
'series': ('show', {str}),
|
||||
}),
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'_old_archive_ids': [make_archive_id(old_ie_key, video_id)] if old_ie_key else None,
|
||||
}
|
||||
|
||||
|
||||
class NBCIE(NBCUniversalBaseIE):
|
||||
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/?#]+/video/[^/?#]+/(?P<id>\w+))'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
|
||||
@ -49,47 +153,20 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
'episode_number': 86,
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'series': 'Tonight Show: Jimmy Fallon',
|
||||
'duration': 237.0,
|
||||
'chapters': 'count:1',
|
||||
'tags': 'count:4',
|
||||
'series': 'Tonight',
|
||||
'duration': 236.504,
|
||||
'tags': 'count:2',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'categories': ['Series/The Tonight Show Starring Jimmy Fallon'],
|
||||
'media_type': 'Full Episode',
|
||||
'age_limit': 14,
|
||||
'_old_archive_ids': ['theplatform 2848237'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
|
||||
'info_dict': {
|
||||
'id': '2832821',
|
||||
'ext': 'mp4',
|
||||
'title': 'Star Wars Teaser',
|
||||
'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
|
||||
'timestamp': 1417852800,
|
||||
'upload_date': '20141206',
|
||||
'uploader': 'NBCU-COM',
|
||||
},
|
||||
'skip': 'page not found',
|
||||
},
|
||||
{
|
||||
# HLS streams requires the 'hdnea3' cookie
|
||||
'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
|
||||
'info_dict': {
|
||||
'id': '101528f5a9e8127b107e98c5e6ce4638',
|
||||
'ext': 'mp4',
|
||||
'title': 'Goliath',
|
||||
'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
|
||||
'timestamp': 1237100400,
|
||||
'upload_date': '20090315',
|
||||
'uploader': 'NBCU-COM',
|
||||
},
|
||||
'skip': 'page not found',
|
||||
},
|
||||
{
|
||||
# manifest url does not have extension
|
||||
'url': 'https://www.nbc.com/the-golden-globe-awards/video/oprah-winfrey-receives-cecil-b-de-mille-award-at-the-2018-golden-globes/3646439',
|
||||
'info_dict': {
|
||||
'id': '3646439',
|
||||
@ -99,48 +176,47 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
'episode_number': 1,
|
||||
'season': 'Season 75',
|
||||
'season_number': 75,
|
||||
'series': 'The Golden Globe Awards',
|
||||
'series': 'Golden Globes',
|
||||
'description': 'Oprah Winfrey receives the Cecil B. de Mille Award at the 75th Annual Golden Globe Awards.',
|
||||
'uploader': 'NBCU-COM',
|
||||
'upload_date': '20180107',
|
||||
'timestamp': 1515312000,
|
||||
'duration': 570.0,
|
||||
'duration': 569.703,
|
||||
'tags': 'count:8',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'chapters': 'count:1',
|
||||
'media_type': 'Highlight',
|
||||
'age_limit': 0,
|
||||
'categories': ['Series/The Golden Globe Awards'],
|
||||
'_old_archive_ids': ['theplatform 3646439'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
},
|
||||
{
|
||||
# new video_id format
|
||||
'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978',
|
||||
# Needs to be extracted from webpage instead of GraphQL
|
||||
'url': 'https://www.nbc.com/paris2024/video/ali-truwit-found-purpose-pool-after-her-life-changed/para24_sww_alitruwittodayshow_240823',
|
||||
'info_dict': {
|
||||
'id': 'NBCE125189978',
|
||||
'id': 'para24_sww_alitruwittodayshow_240823',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ben\'s First Leap | NBC\'s Quantum Leap',
|
||||
'description': 'md5:a82762449b7ec4bb83291a7b355ebf8e',
|
||||
'uploader': 'NBCU-COM',
|
||||
'series': 'Quantum Leap',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Ben\'s First Leap | NBC\'s Quantum Leap',
|
||||
'episode_number': 1,
|
||||
'duration': 170.171,
|
||||
'chapters': [],
|
||||
'timestamp': 1663956155,
|
||||
'upload_date': '20220923',
|
||||
'tags': 'count:10',
|
||||
'age_limit': 0,
|
||||
'title': 'Ali Truwit found purpose in the pool after her life changed',
|
||||
'description': 'md5:c16d7489e1516593de1cc5d3f39b9bdb',
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
'duration': 311.077,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'categories': ['Series/Quantum Leap 2022'],
|
||||
'media_type': 'Highlight',
|
||||
'episode': 'Ali Truwit found purpose in the pool after her life changed',
|
||||
'timestamp': 1724435902.0,
|
||||
'upload_date': '20240823',
|
||||
'_old_archive_ids': ['theplatform para24_sww_alitruwittodayshow_240823'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
|
||||
'only_matching': True,
|
||||
@ -151,6 +227,7 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
_SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI1Yzg2YjdkYy04NDI3LTRjNDUtOGQwZi1iNDkzYmE3MmQwYjQiLCJuYmYiOjE1Nzg3MDM2MzEsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTc4NzAzNjMxfQ.QQKIsBhAjGQTMdAqRTqhcz2Cddr4Y2hEjnSiOeKKki4nLrkDOsjQMmqeTR0hSRarraxH54wBgLvsxI7LHwKMvr7G8QpynNAxylHlQD3yhN9tFhxt4KR5wW3as02B-W2TznK9bhNWPKIyHND95Uo2Mi6rEQoq8tM9O09WPWaanE5BX_-r6Llr6dPq5F0Lpx2QOn2xYRb1T4nFxdFTNoss8GBds8OvChTiKpXMLHegLTc1OS4H_1a8tO_37jDwSdJuZ8iTyRLV4kZ2cpL6OL5JPMObD4-HQiec_dfcYgMKPiIfP9ZqdXpec2SVaCLsWEk86ZYvD97hLIQrK5rrKd1y-A'
|
||||
|
||||
def _real_extract(self, url):
|
||||
permalink, video_id = self._match_valid_url(url).groups()
|
||||
@ -196,62 +273,50 @@ def _real_extract(self, url):
|
||||
'userId': '0',
|
||||
}),
|
||||
})['data']['bonanzaPage']['metadata']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
'switch': 'HLSServiceSecure',
|
||||
}
|
||||
|
||||
if not video_data:
|
||||
# Some videos are not available via GraphQL API
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_data = self._search_json(
|
||||
r'<script>\s*PRELOAD\s*=', webpage, 'video data',
|
||||
video_id)['pages'][urllib.parse.urlparse(url).path]['base']['metadata']
|
||||
|
||||
video_id = video_data['mpxGuid']
|
||||
tp_path = 'NnzsPC/media/guid/{}/{}'.format(video_data.get('mpxAccountId') or '2410887629', video_id)
|
||||
tpm = self._download_theplatform_metadata(tp_path, video_id)
|
||||
title = tpm.get('title') or video_data.get('secondaryTitle')
|
||||
tp_path = f'NnzsPC/media/guid/{video_data["mpxAccountId"]}/{video_id}'
|
||||
tpm = self._download_theplatform_metadata(tp_path, video_id, fatal=False)
|
||||
title = traverse_obj(tpm, ('title', {str})) or video_data.get('secondaryTitle')
|
||||
query = {}
|
||||
if video_data.get('locked'):
|
||||
resource = self._get_mvpd_resource(
|
||||
video_data.get('resourceId') or 'nbcentertainment',
|
||||
title, video_id, video_data.get('rating'))
|
||||
video_data['resourceId'], title, video_id, video_data.get('rating'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, 'nbcentertainment', resource)
|
||||
theplatform_url = smuggle_url(update_url_query(
|
||||
'http://link.theplatform.com/s/NnzsPC/media/guid/{}/{}'.format(video_data.get('mpxAccountId') or '2410887629', video_id),
|
||||
query), {'force_smil_url': True})
|
||||
url, video_id, 'nbcentertainment', resource, self._SOFTWARE_STATEMENT)
|
||||
|
||||
# Empty string or 0 can be valid values for these. So the check must be `is None`
|
||||
description = video_data.get('description')
|
||||
if description is None:
|
||||
description = tpm.get('description')
|
||||
episode_number = int_or_none(video_data.get('episodeNumber'))
|
||||
if episode_number is None:
|
||||
episode_number = int_or_none(tpm.get('nbcu$airOrder'))
|
||||
rating = video_data.get('rating')
|
||||
if rating is None:
|
||||
try_get(tpm, lambda x: x['ratings'][0]['rating'])
|
||||
season_number = int_or_none(video_data.get('seasonNumber'))
|
||||
if season_number is None:
|
||||
season_number = int_or_none(tpm.get('nbcu$seasonNumber'))
|
||||
series = video_data.get('seriesShortTitle')
|
||||
if series is None:
|
||||
series = tpm.get('nbcu$seriesShortTitle')
|
||||
tags = video_data.get('keywords')
|
||||
if tags is None or len(tags) == 0:
|
||||
tags = tpm.get('keywords')
|
||||
formats, subtitles = self._extract_nbcu_formats_and_subtitles(tp_path, video_id, query)
|
||||
parsed_info = self._parse_theplatform_metadata(tpm)
|
||||
self._merge_subtitles(parsed_info['subtitles'], target=subtitles)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'age_limit': parse_age_limit(rating),
|
||||
'description': description,
|
||||
'episode': title,
|
||||
'episode_number': episode_number,
|
||||
**traverse_obj(video_data, {
|
||||
'description': ('description', {str}, filter),
|
||||
'episode': ('secondaryTitle', {str}, filter),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
'age_limit': ('rating', {parse_age_limit}),
|
||||
'tags': ('keywords', ..., {str}, filter, all, filter),
|
||||
'series': ('seriesShortTitle', {str}),
|
||||
}),
|
||||
**parsed_info,
|
||||
'id': video_id,
|
||||
'ie_key': 'ThePlatform',
|
||||
'season_number': season_number,
|
||||
'series': series,
|
||||
'tags': tags,
|
||||
'title': title,
|
||||
'url': theplatform_url,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'_old_archive_ids': [make_archive_id('ThePlatform', video_id)],
|
||||
}
|
||||
|
||||
|
||||
class NBCSportsVPlayerIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL_BASE = r'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
|
||||
_VALID_URL = _VALID_URL_BASE + r'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||
_EMBED_REGEX = [rf'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>{_VALID_URL_BASE}[^\"]+)']
|
||||
@ -286,6 +351,7 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class NBCSportsIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
@ -321,6 +387,7 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class NBCSportsStreamIE(AdobePassIE):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
|
||||
@ -354,7 +421,7 @@ def _real_extract(self, url):
|
||||
source_url = video_source['ottStreamUrl']
|
||||
is_live = video_source.get('type') == 'live' or video_source.get('status') == 'Live'
|
||||
resource = self._get_mvpd_resource('nbcsports', title, video_id, '')
|
||||
token = self._extract_mvpd_auth(url, video_id, 'nbcsports', resource)
|
||||
token = self._extract_mvpd_auth(url, video_id, 'nbcsports', resource, None) # XXX: None arg needs to be software_statement
|
||||
tokenized_url = self._download_json(
|
||||
'https://token.playmakerservices.com/cdn',
|
||||
video_id, data=json.dumps({
|
||||
@ -534,22 +601,26 @@ class NBCOlympicsIE(InfoExtractor):
|
||||
IE_NAME = 'nbcolympics'
|
||||
_VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# Geo-restricted to US
|
||||
'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
|
||||
'md5': '54fecf846d05429fbaa18af557ee523a',
|
||||
'url': 'https://www.nbcolympics.com/videos/watch-final-minutes-team-usas-mens-basketball-gold',
|
||||
'info_dict': {
|
||||
'id': 'WjTBzDXx5AUq',
|
||||
'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold',
|
||||
'id': 'SAwGfPlQ1q01',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rose\'s son Leo was in tears after his dad won gold',
|
||||
'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.',
|
||||
'timestamp': 1471274964,
|
||||
'upload_date': '20160815',
|
||||
'display_id': 'watch-final-minutes-team-usas-mens-basketball-gold',
|
||||
'title': 'Watch the final minutes of Team USA\'s men\'s basketball gold',
|
||||
'description': 'md5:f704f591217305c9559b23b877aa8d31',
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
'duration': 387.053,
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'chapters': [],
|
||||
'timestamp': 1723346984,
|
||||
'upload_date': '20240811',
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@ -578,6 +649,7 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class NBCOlympicsStreamIE(AdobePassIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'nbcolympics:stream'
|
||||
_VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
|
||||
_TESTS = [
|
||||
@ -630,7 +702,8 @@ def _real_extract(self, url):
|
||||
event_config.get('resourceId', 'NBCOlympics'),
|
||||
re.sub(r'[^\w\d ]+', '', event_config['eventTitle']), pid,
|
||||
event_config.get('ratingId', 'NO VALUE'))
|
||||
media_token = self._extract_mvpd_auth(url, pid, event_config.get('requestorId', 'NBCOlympics'), ap_resource)
|
||||
# XXX: The None arg below needs to be the software_statement for this requestor
|
||||
media_token = self._extract_mvpd_auth(url, pid, event_config.get('requestorId', 'NBCOlympics'), ap_resource, None)
|
||||
|
||||
source_url = self._download_json(
|
||||
'https://tokens.playmakerservices.com/', pid, 'Retrieving tokenized URL',
|
||||
@ -848,3 +921,178 @@ def _real_extract(self, url):
|
||||
'is_live': is_live,
|
||||
**info,
|
||||
}
|
||||
|
||||
|
||||
class BravoTVIE(NBCUniversalBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:bravotv|oxygen)\.com/(?:[^/?#]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
|
||||
'info_dict': {
|
||||
'id': '3923059',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Top Chef Season 16 Winner Is...',
|
||||
'display_id': 'the-top-chef-season-16-winner-is',
|
||||
'description': 'Find out who takes the title of Top Chef!',
|
||||
'upload_date': '20190315',
|
||||
'timestamp': 1552618860,
|
||||
'season_number': 16,
|
||||
'episode_number': 15,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'Finale',
|
||||
'duration': 190,
|
||||
'season': 'Season 16',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'uploader': 'NBCU-BRAV',
|
||||
'categories': ['Series', 'Series/Top Chef'],
|
||||
'tags': 'count:10',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.bravotv.com/top-chef/season-20/episode-1/london-calling',
|
||||
'info_dict': {
|
||||
'id': '9000234570',
|
||||
'ext': 'mp4',
|
||||
'title': 'London Calling',
|
||||
'display_id': 'london-calling',
|
||||
'description': 'md5:5af95a8cbac1856bd10e7562f86bb759',
|
||||
'upload_date': '20230310',
|
||||
'timestamp': 1678418100,
|
||||
'season_number': 20,
|
||||
'episode_number': 1,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'London Calling',
|
||||
'duration': 3266,
|
||||
'season': 'Season 20',
|
||||
'chapters': 'count:7',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
'media_type': 'Full Episode',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
'categories': ['Series/Top Chef'],
|
||||
'tags': 'count:10',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-1/closing-night',
|
||||
'info_dict': {
|
||||
'id': '3692045',
|
||||
'ext': 'mp4',
|
||||
'title': 'Closing Night',
|
||||
'display_id': 'closing-night',
|
||||
'description': 'md5:c8a5bb523c8ef381f3328c6d9f1e4632',
|
||||
'upload_date': '20230126',
|
||||
'timestamp': 1674709200,
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'series': 'In Ice Cold Blood',
|
||||
'episode': 'Closing Night',
|
||||
'duration': 2629,
|
||||
'season': 'Season 1',
|
||||
'chapters': 'count:6',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
'media_type': 'Full Episode',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
'categories': ['Series/In Ice Cold Blood'],
|
||||
'tags': ['ice-t', 'in ice cold blood', 'law and order', 'oxygen', 'true crime'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
|
||||
'info_dict': {
|
||||
'id': '3974019',
|
||||
'ext': 'mp4',
|
||||
'title': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
|
||||
'display_id': 'handling-the-horwitz-house-after-the-murder-season-2',
|
||||
'description': 'md5:f9d638dd6946a1c1c0533a9c6100eae5',
|
||||
'upload_date': '20190618',
|
||||
'timestamp': 1560819600,
|
||||
'season_number': 2,
|
||||
'episode_number': 16,
|
||||
'series': 'In Ice Cold Blood',
|
||||
'episode': 'Mother Vs Son',
|
||||
'duration': 68,
|
||||
'season': 'Season 2',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
'uploader': 'NBCU-OXY',
|
||||
'categories': ['Series/In Ice Cold Blood'],
|
||||
'tags': ['in ice cold blood', 'ice-t', 'law and order', 'true crime', 'oxygen'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._extract_nbcu_video(url, display_id)
|
||||
|
||||
|
||||
class SyfyIE(NBCUniversalBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?syfy\.com/[^/?#]+/(?:season-\d+/episode-\d+/(?:videos/)?|videos/)(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.syfy.com/face-off/season-13/episode-10/videos/keyed-up',
|
||||
'info_dict': {
|
||||
'id': '3774403',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'keyed-up',
|
||||
'title': 'Keyed Up',
|
||||
'description': 'md5:feafd15bee449f212dcd3065bbe9a755',
|
||||
'age_limit': 14,
|
||||
'duration': 169,
|
||||
'thumbnail': r're:https://www\.syfy\.com/.+/.+\.jpg',
|
||||
'series': 'Face Off',
|
||||
'season': 'Season 13',
|
||||
'season_number': 13,
|
||||
'episode': 'Through the Looking Glass Part 2',
|
||||
'episode_number': 10,
|
||||
'timestamp': 1533711618,
|
||||
'upload_date': '20180808',
|
||||
'media_type': 'Excerpt',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
'categories': ['Series/Face Off'],
|
||||
'tags': 'count:15',
|
||||
'_old_archive_ids': ['theplatform 3774403'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.syfy.com/face-off/season-13/episode-10/through-the-looking-glass-part-2',
|
||||
'info_dict': {
|
||||
'id': '3772391',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'through-the-looking-glass-part-2',
|
||||
'title': 'Through the Looking Glass Pt.2',
|
||||
'description': 'md5:90bd5dcbf1059fe3296c263599af41d2',
|
||||
'age_limit': 0,
|
||||
'duration': 2599,
|
||||
'thumbnail': r're:https://www\.syfy\.com/.+/.+\.jpg',
|
||||
'chapters': [{'start_time': 0.0, 'end_time': 679.0, 'title': '<Untitled Chapter 1>'},
|
||||
{'start_time': 679.0, 'end_time': 1040.967, 'title': '<Untitled Chapter 2>'},
|
||||
{'start_time': 1040.967, 'end_time': 1403.0, 'title': '<Untitled Chapter 3>'},
|
||||
{'start_time': 1403.0, 'end_time': 1870.0, 'title': '<Untitled Chapter 4>'},
|
||||
{'start_time': 1870.0, 'end_time': 2496.967, 'title': '<Untitled Chapter 5>'},
|
||||
{'start_time': 2496.967, 'end_time': 2599, 'title': '<Untitled Chapter 6>'}],
|
||||
'series': 'Face Off',
|
||||
'season': 'Season 13',
|
||||
'season_number': 13,
|
||||
'episode': 'Through the Looking Glass Part 2',
|
||||
'episode_number': 10,
|
||||
'timestamp': 1672570800,
|
||||
'upload_date': '20230101',
|
||||
'media_type': 'Full Episode',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
'categories': ['Series/Face Off'],
|
||||
'tags': 'count:15',
|
||||
'_old_archive_ids': ['theplatform 3772391'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._extract_nbcu_video(url, display_id, old_ie_key='ThePlatform')
|
||||
|
@ -1,53 +1,70 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class NewsPicksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://newspicks\.com/movie-series/(?P<channel_id>\d+)\?movieId=(?P<id>\d+)'
|
||||
|
||||
_VALID_URL = r'https?://newspicks\.com/movie-series/(?P<id>[^?/#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://newspicks.com/movie-series/11?movieId=1813',
|
||||
'url': 'https://newspicks.com/movie-series/11/?movieId=1813',
|
||||
'info_dict': {
|
||||
'id': '1813',
|
||||
'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】',
|
||||
'description': 'md5:09397aad46d6ded6487ff13f138acadf',
|
||||
'channel': 'HORIE ONE',
|
||||
'channel_id': '11',
|
||||
'release_date': '20220117',
|
||||
'thumbnail': r're:https://.+jpg',
|
||||
'ext': 'mp4',
|
||||
'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】',
|
||||
'cast': 'count:4',
|
||||
'description': 'md5:09397aad46d6ded6487ff13f138acadf',
|
||||
'release_date': '20220117',
|
||||
'release_timestamp': 1642424400,
|
||||
'series': 'HORIE ONE',
|
||||
'series_id': '11',
|
||||
'thumbnail': r're:https?://resources\.newspicks\.com/.+\.(?:jpe?g|png)',
|
||||
'timestamp': 1642424420,
|
||||
'upload_date': '20220117',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://newspicks.com/movie-series/158/?movieId=3932',
|
||||
'info_dict': {
|
||||
'id': '3932',
|
||||
'ext': 'mp4',
|
||||
'title': '【検証】専門家は、KADOKAWAをどう見るか',
|
||||
'cast': 'count:3',
|
||||
'description': 'md5:2c2d4bf77484a4333ec995d676f9a91d',
|
||||
'release_date': '20240622',
|
||||
'release_timestamp': 1719088080,
|
||||
'series': 'NPレポート',
|
||||
'series_id': '158',
|
||||
'thumbnail': r're:https?://resources\.newspicks\.com/.+\.(?:jpe?g|png)',
|
||||
'timestamp': 1719086400,
|
||||
'upload_date': '20240622',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, channel_id = self._match_valid_url(url).group('id', 'channel_id')
|
||||
series_id = self._match_id(url)
|
||||
video_id = traverse_obj(parse_qs(url), ('movieId', -1, {str}, {require('movie ID')}))
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
entries = self._parse_html5_media_entries(
|
||||
url, webpage.replace('movie-for-pc', 'movie'), video_id, 'hls')
|
||||
if not entries:
|
||||
raise ExtractorError('No HTML5 media elements found')
|
||||
info = entries[0]
|
||||
|
||||
title = self._html_search_meta('og:title', webpage, fatal=False)
|
||||
description = self._html_search_meta(
|
||||
('og:description', 'twitter:title'), webpage, fatal=False)
|
||||
channel = self._html_search_regex(
|
||||
r'value="11".+?<div\s+class="title">(.+?)</div', webpage, 'channel name', fatal=False)
|
||||
if not title or not channel:
|
||||
title, channel = re.split(r'\s*|\s*', self._html_extract_title(webpage))
|
||||
fragment = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['fragment']
|
||||
m3u8_url = traverse_obj(fragment, ('movie', 'movieUrl', {url_or_none}, {require('m3u8 URL')}))
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
|
||||
|
||||
release_date = self._search_regex(
|
||||
r'<span\s+class="on-air-date">\s*(\d+)年(\d+)月(\d+)日\s*</span>',
|
||||
webpage, 'release date', fatal=False, group=(1, 2, 3))
|
||||
|
||||
info.update({
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'channel': channel,
|
||||
'channel_id': channel_id,
|
||||
'release_date': ('%04d%02d%02d' % tuple(map(int, release_date))) if release_date else None,
|
||||
})
|
||||
return info
|
||||
'formats': formats,
|
||||
'series': traverse_obj(fragment, ('series', 'title', {str})),
|
||||
'series_id': series_id,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(fragment, ('movie', {
|
||||
'title': ('title', {str}),
|
||||
'cast': ('relatedUsers', ..., 'displayName', {str}, filter, all, filter),
|
||||
'description': ('explanation', {clean_html}),
|
||||
'release_timestamp': ('onAirStartDate', {parse_iso8601}),
|
||||
'thumbnail': (('image', 'coverImageUrl'), {url_or_none}, any),
|
||||
'timestamp': ('published', {parse_iso8601}),
|
||||
})),
|
||||
}
|
||||
|
@ -8,6 +8,8 @@
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
make_archive_id,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
remove_end,
|
||||
traverse_obj,
|
||||
@ -16,6 +18,7 @@
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
@ -495,7 +498,7 @@ def _real_extract(self, url):
|
||||
chapters = None
|
||||
if chapter_durations and chapter_titles and len(chapter_durations) == len(chapter_titles):
|
||||
start_time = chapter_durations
|
||||
end_time = chapter_durations[1:] + [duration]
|
||||
end_time = [*chapter_durations[1:], duration]
|
||||
chapters = [{
|
||||
'start_time': s,
|
||||
'end_time': e,
|
||||
@ -591,102 +594,179 @@ class NhkRadiruIE(InfoExtractor):
|
||||
IE_DESC = 'NHK らじる (Radiru/Rajiru)'
|
||||
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_4003239',
|
||||
'skip': 'Episode expired on 2024-06-09',
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=LG96ZW5KZ4_01_4251382',
|
||||
'skip': 'Episode expires on 2025-07-14',
|
||||
'info_dict': {
|
||||
'title': 'ジャズ・トゥナイト ジャズ「Night and Day」特集',
|
||||
'id': '0449_01_4003239',
|
||||
'title': 'クラシックの庭\u3000特集「ドボルザークを聴く」(1)交響曲を中心に',
|
||||
'id': 'LG96ZW5KZ4_01_4251382',
|
||||
'ext': 'm4a',
|
||||
'uploader': 'NHK FM 東京',
|
||||
'description': 'md5:ad05f3c3f3f6e99b2e69f9b5e49551dc',
|
||||
'series': 'ジャズ・トゥナイト',
|
||||
'channel': 'NHK FM 東京',
|
||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
|
||||
'upload_date': '20240601',
|
||||
'series_id': '0449_01',
|
||||
'release_date': '20240601',
|
||||
'timestamp': 1717257600,
|
||||
'release_timestamp': 1717250400,
|
||||
'description': 'md5:652d3c38a25b77959c716421eba1617a',
|
||||
'uploader': 'NHK FM・東京',
|
||||
'channel': 'NHK FM・東京',
|
||||
'duration': 6597.0,
|
||||
'thumbnail': 'https://www.nhk.jp/static/assets/images/radioseries/rs/LG96ZW5KZ4/LG96ZW5KZ4-eyecatch_a67c6e949325016c0724f2ed3eec8a2f.jpg',
|
||||
'categories': ['音楽', 'クラシック・オペラ'],
|
||||
'cast': ['田添菜穂子'],
|
||||
'series': 'クラシックの庭',
|
||||
'series_id': 'LG96ZW5KZ4',
|
||||
'episode': '特集「ドボルザークを聴く」(1)交響曲を中心に',
|
||||
'episode_id': 'QP1Q2ZXZY3',
|
||||
'timestamp': 1751871000,
|
||||
'upload_date': '20250707',
|
||||
'release_timestamp': 1751864403,
|
||||
'release_date': '20250707',
|
||||
},
|
||||
}, {
|
||||
# playlist, airs every weekday so it should _hopefully_ be okay forever
|
||||
'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=0458_01',
|
||||
'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=Z9L1V2M24L_01',
|
||||
'info_dict': {
|
||||
'id': '0458_01',
|
||||
'id': 'Z9L1V2M24L_01',
|
||||
'title': 'ベストオブクラシック',
|
||||
'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。',
|
||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg',
|
||||
'series_id': '0458_01',
|
||||
'thumbnail': 'https://www.nhk.jp/static/assets/images/radioseries/rs/Z9L1V2M24L/Z9L1V2M24L-eyecatch_83ed28b4782907998875965fee60a351.jpg',
|
||||
'series_id': 'Z9L1V2M24L_01',
|
||||
'uploader': 'NHK FM',
|
||||
'channel': 'NHK FM',
|
||||
'series': 'ベストオブクラシック',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
# one with letters in the id
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F683_01_3910688',
|
||||
'note': 'Expires on 2025-03-31',
|
||||
'info_dict': {
|
||||
'id': 'F683_01_3910688',
|
||||
'ext': 'm4a',
|
||||
'title': '夏目漱石「文鳥」第1回',
|
||||
'series': '【らじる文庫】夏目漱石「文鳥」(全4回)',
|
||||
'series_id': 'F683_01',
|
||||
'description': '朗読:浅井理アナウンサー',
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F683/img/roudoku_05_rod_640.jpg',
|
||||
'upload_date': '20240106',
|
||||
'release_date': '20240106',
|
||||
'uploader': 'NHK R1',
|
||||
'release_timestamp': 1704511800,
|
||||
'channel': 'NHK R1',
|
||||
'timestamp': 1704512700,
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata',
|
||||
'Failed to get extended metadata. API returned Error 1: Invalid parameters'],
|
||||
}, {
|
||||
# news
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_4012173',
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=18439M2W42_02_4251212',
|
||||
'skip': 'Expires on 2025-07-15',
|
||||
'info_dict': {
|
||||
'id': 'F261_01_4012173',
|
||||
'id': '18439M2W42_02_4251212',
|
||||
'ext': 'm4a',
|
||||
'channel': 'NHKラジオ第1',
|
||||
'title': 'マイあさ! 午前5時のNHKニュース 2025年7月8日',
|
||||
'uploader': 'NHKラジオ第1',
|
||||
'channel': 'NHKラジオ第1',
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/18439M2W42/img/series_945_thumbnail.jpg',
|
||||
'series': 'NHKラジオニュース',
|
||||
'title': '午前0時のNHKニュース',
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
|
||||
'release_timestamp': 1718290800,
|
||||
'release_date': '20240613',
|
||||
'timestamp': 1718291400,
|
||||
'upload_date': '20240613',
|
||||
'timestamp': 1751919420,
|
||||
'upload_date': '20250707',
|
||||
'release_timestamp': 1751918400,
|
||||
'release_date': '20250707',
|
||||
},
|
||||
}, {
|
||||
# fallback when extended metadata fails
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=2834_01_4009298',
|
||||
'skip': 'Expires on 2024-06-07',
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=J8792PY43V_20_4253945',
|
||||
'skip': 'Expires on 2025-09-01',
|
||||
'info_dict': {
|
||||
'id': '2834_01_4009298',
|
||||
'title': 'まち☆キラ!開成町特集',
|
||||
'id': 'J8792PY43V_20_4253945',
|
||||
'ext': 'm4a',
|
||||
'release_date': '20240531',
|
||||
'upload_date': '20240531',
|
||||
'series': 'はま☆キラ!',
|
||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/2834/g2834.jpg',
|
||||
'channel': 'NHK R1,FM',
|
||||
'description': '',
|
||||
'timestamp': 1717123800,
|
||||
'uploader': 'NHK R1,FM',
|
||||
'release_timestamp': 1717120800,
|
||||
'series_id': '2834_01',
|
||||
'title': '「後絶たない筋肉増強剤の使用」ワールドリポート',
|
||||
'description': '大濱 敦(ソウル支局)',
|
||||
'uploader': 'NHK R1',
|
||||
'channel': 'NHK R1',
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/J8792PY43V/img/corner/box_31_thumbnail.jpg',
|
||||
'series': 'マイあさ! ワールドリポート',
|
||||
'series_id': 'J8792PY43V_20',
|
||||
'timestamp': 1751837100,
|
||||
'upload_date': '20250706',
|
||||
'release_timestamp': 1751835600,
|
||||
'release_date': '20250706',
|
||||
|
||||
},
|
||||
'expected_warnings': ['Failed to get extended metadata. API returned empty list.'],
|
||||
'expected_warnings': ['Failed to download extended metadata: HTTP Error 404: Not Found'],
|
||||
}]
|
||||
|
||||
_API_URL_TMPL = None
|
||||
|
||||
# The `_format_*` and `_make_*` functions are ported from: https://www.nhk.or.jp/radio/assets/js/timetable_detail_new.js
|
||||
|
||||
def _format_act_list(self, act_list):
|
||||
role_groups = {}
|
||||
for act in traverse_obj(act_list, (..., {dict})):
|
||||
role = act.get('role')
|
||||
if role not in role_groups:
|
||||
role_groups[role] = []
|
||||
role_groups[role].append(act)
|
||||
|
||||
formatted_roles = []
|
||||
for role, acts in role_groups.items():
|
||||
for i, act in enumerate(acts):
|
||||
res = f'【{role}】' if i == 0 and role is not None else ''
|
||||
if title := act.get('title'):
|
||||
res += f'{title}…'
|
||||
formatted_roles.append(join_nonempty(res, act.get('name'), delim=''))
|
||||
return join_nonempty(*formatted_roles, delim=',')
|
||||
|
||||
def _make_artists(self, track, key):
|
||||
artists = []
|
||||
for artist in traverse_obj(track, (key, ..., {dict})):
|
||||
if res := join_nonempty(*traverse_obj(artist, ((
|
||||
('role', filter, {'{}…'.format}),
|
||||
('part', filter, {'({})'.format}),
|
||||
('name', filter),
|
||||
), {str})), delim=''):
|
||||
artists.append(res)
|
||||
|
||||
return '、'.join(artists) or None
|
||||
|
||||
def _make_duration(self, track, key):
|
||||
d = traverse_obj(track, (key, {parse_duration}))
|
||||
if d is None:
|
||||
return None
|
||||
hours, remainder = divmod(d, 3600)
|
||||
minutes, seconds = divmod(remainder, 60)
|
||||
res = '('
|
||||
if hours > 0:
|
||||
res += f'{int(hours)}時間'
|
||||
if minutes > 0:
|
||||
res += f'{int(minutes)}分'
|
||||
res += f'{int(seconds):02}秒)'
|
||||
return res
|
||||
|
||||
def _format_music_list(self, music_list):
|
||||
tracks = []
|
||||
for track in traverse_obj(music_list, (..., {dict})):
|
||||
track_details = traverse_obj(track, ((
|
||||
('name', filter, {'「{}」'.format}),
|
||||
('lyricist', filter, {'{}:作詞'.format}),
|
||||
('composer', filter, {'{}:作曲'.format}),
|
||||
('arranger', filter, {'{}:編曲'.format}),
|
||||
), {str}))
|
||||
|
||||
track_details.append(self._make_artists(track, 'byArtist'))
|
||||
track_details.append(self._make_duration(track, 'duration'))
|
||||
|
||||
if label := join_nonempty('label', 'code', delim=' ', from_dict=track):
|
||||
track_details.append(f'<{label}>')
|
||||
if location := traverse_obj(track, ('location', {str})):
|
||||
track_details.append(f'~{location}~')
|
||||
tracks.append(join_nonempty(*track_details, delim='\n'))
|
||||
return '\n\n'.join(tracks)
|
||||
|
||||
def _format_description(self, response):
|
||||
detailed_description = traverse_obj(response, ('detailedDescription', {dict})) or {}
|
||||
return join_nonempty(
|
||||
join_nonempty('epg80', 'epg200', delim='\n\n', from_dict=detailed_description),
|
||||
traverse_obj(response, ('misc', 'actList', {self._format_act_list})),
|
||||
traverse_obj(response, ('misc', 'musicList', {self._format_music_list})),
|
||||
delim='\n\n')
|
||||
|
||||
def _get_thumbnails(self, data, keys, name=None, preference=-1):
|
||||
thumbnails = []
|
||||
for size, thumb in traverse_obj(data, (
|
||||
*variadic(keys, (str, bytes, dict, set)), {dict.items},
|
||||
lambda _, v: v[0] != 'copyright' and url_or_none(v[1]['url']),
|
||||
)):
|
||||
thumbnails.append({
|
||||
'url': thumb['url'],
|
||||
'width': int_or_none(thumb.get('width')),
|
||||
'height': int_or_none(thumb.get('height')),
|
||||
'preference': preference,
|
||||
'id': join_nonempty(name, size),
|
||||
})
|
||||
preference -= 1
|
||||
return thumbnails
|
||||
|
||||
def _extract_extended_metadata(self, episode_id, aa_vinfo):
|
||||
service, _, area = traverse_obj(aa_vinfo, (2, {str}, {lambda x: (x or '').partition(',')}))
|
||||
date_id = aa_vinfo[3]
|
||||
|
||||
detail_url = try_call(
|
||||
lambda: self._API_URL_TMPL.format(area=area, service=service, dateid=aa_vinfo[3]))
|
||||
lambda: self._API_URL_TMPL.format(broadcastEventId=join_nonempty(service, area, date_id)))
|
||||
if not detail_url:
|
||||
return {}
|
||||
|
||||
@ -699,36 +779,37 @@ def _extract_extended_metadata(self, episode_id, aa_vinfo):
|
||||
if error := traverse_obj(response, ('error', {dict})):
|
||||
self.report_warning(
|
||||
'Failed to get extended metadata. API returned '
|
||||
f'Error {join_nonempty("code", "message", from_dict=error, delim=": ")}')
|
||||
f'Error {join_nonempty("statuscode", "message", from_dict=error, delim=": ")}')
|
||||
return {}
|
||||
|
||||
full_meta = traverse_obj(response, ('list', service, 0, {dict}))
|
||||
if not full_meta:
|
||||
self.report_warning('Failed to get extended metadata. API returned empty list.')
|
||||
return {}
|
||||
station = traverse_obj(response, ('publishedOn', 'broadcastDisplayName', {str}))
|
||||
|
||||
station = ' '.join(traverse_obj(full_meta, (('service', 'area'), 'name', {str}))) or None
|
||||
thumbnails = [{
|
||||
'id': str(id_),
|
||||
'preference': 1 if id_.startswith('thumbnail') else -2 if id_.startswith('logo') else -1,
|
||||
**traverse_obj(thumb, {
|
||||
'url': 'url',
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
} for id_, thumb in traverse_obj(full_meta, ('images', {dict.items}, lambda _, v: v[1]['url']))]
|
||||
thumbnails = []
|
||||
thumbnails.extend(self._get_thumbnails(response, ('about', 'eyecatch')))
|
||||
for num, dct in enumerate(traverse_obj(response, ('about', 'eyecatchList', ...))):
|
||||
thumbnails.extend(self._get_thumbnails(dct, None, join_nonempty('list', num), -2))
|
||||
thumbnails.extend(
|
||||
self._get_thumbnails(response, ('about', 'partOfSeries', 'eyecatch'), 'series', -3))
|
||||
|
||||
return filter_dict({
|
||||
'description': self._format_description(response),
|
||||
'cast': traverse_obj(response, ('misc', 'actList', ..., 'name', {str})),
|
||||
'thumbnails': thumbnails,
|
||||
**traverse_obj(response, {
|
||||
'title': ('name', {str}),
|
||||
'timestamp': ('endDate', {unified_timestamp}),
|
||||
'release_timestamp': ('startDate', {unified_timestamp}),
|
||||
'duration': ('duration', {parse_duration}),
|
||||
}),
|
||||
**traverse_obj(response, ('identifierGroup', {
|
||||
'series': ('radioSeriesName', {str}),
|
||||
'series_id': ('radioSeriesId', {str}),
|
||||
'episode': ('radioEpisodeName', {str}),
|
||||
'episode_id': ('radioEpisodeId', {str}),
|
||||
'categories': ('genre', ..., ['name1', 'name2'], {str}, all, {orderedSet}),
|
||||
})),
|
||||
'channel': station,
|
||||
'uploader': station,
|
||||
'description': join_nonempty(
|
||||
'subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta),
|
||||
'thumbnails': thumbnails,
|
||||
**traverse_obj(full_meta, {
|
||||
'title': ('title', {str}),
|
||||
'timestamp': ('end_time', {unified_timestamp}),
|
||||
'release_timestamp': ('start_time', {unified_timestamp}),
|
||||
}),
|
||||
})
|
||||
|
||||
def _extract_episode_info(self, episode, programme_id, series_meta):
|
||||
@ -782,7 +863,9 @@ def _real_extract(self, url):
|
||||
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
|
||||
programme_id = f'{site_id}_{corner_id}'
|
||||
|
||||
if site_id == 'F261': # XXX: News programmes use old API (for now?)
|
||||
# XXX: News programmes use the old API
|
||||
# Can't move this to NhkRadioNewsPageIE because news items still use the normal URL format
|
||||
if site_id == '18439M2W42':
|
||||
meta = self._download_json(
|
||||
'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json', programme_id)['main']
|
||||
series_meta = traverse_obj(meta, {
|
||||
@ -843,8 +926,8 @@ class NhkRadioNewsPageIE(InfoExtractor):
|
||||
'url': 'https://www.nhk.or.jp/radionews/',
|
||||
'playlist_mincount': 5,
|
||||
'info_dict': {
|
||||
'id': 'F261_01',
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
|
||||
'id': '18439M2W42_01',
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/18439M2W42/img/series_945_thumbnail.jpg',
|
||||
'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d',
|
||||
'channel': 'NHKラジオ第1',
|
||||
'uploader': 'NHKラジオ第1',
|
||||
@ -853,7 +936,7 @@ class NhkRadioNewsPageIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=F261_01', NhkRadiruIE)
|
||||
return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=18439M2W42_01', NhkRadiruIE)
|
||||
|
||||
|
||||
class NhkRadiruLiveIE(InfoExtractor):
|
||||
@ -863,11 +946,12 @@ class NhkRadiruLiveIE(InfoExtractor):
|
||||
# radio 1, no area specified
|
||||
'url': 'https://www.nhk.or.jp/radio/player/?ch=r1',
|
||||
'info_dict': {
|
||||
'id': 'r1-tokyo',
|
||||
'title': 're:^NHKネットラジオ第1 東京.+$',
|
||||
'id': 'bs-r1-130',
|
||||
'title': 're:^NHKラジオ第1・東京.+$',
|
||||
'ext': 'm4a',
|
||||
'thumbnail': 'https://www.nhk.or.jp/common/img/media/r1-200x200.png',
|
||||
'thumbnail': 'https://www.nhk.jp/assets/images/broadcastservice/bs/r1/r1-logo.svg',
|
||||
'live_status': 'is_live',
|
||||
'_old_archive_ids': ['nhkradirulive r1-tokyo'],
|
||||
},
|
||||
}, {
|
||||
# radio 2, area specified
|
||||
@ -875,26 +959,28 @@ class NhkRadiruLiveIE(InfoExtractor):
|
||||
'url': 'https://www.nhk.or.jp/radio/player/?ch=r2',
|
||||
'params': {'extractor_args': {'nhkradirulive': {'area': ['fukuoka']}}},
|
||||
'info_dict': {
|
||||
'id': 'r2-fukuoka',
|
||||
'title': 're:^NHKネットラジオ第2 福岡.+$',
|
||||
'id': 'bs-r2-400',
|
||||
'title': 're:^NHKラジオ第2.+$',
|
||||
'ext': 'm4a',
|
||||
'thumbnail': 'https://www.nhk.or.jp/common/img/media/r2-200x200.png',
|
||||
'thumbnail': 'https://www.nhk.jp/assets/images/broadcastservice/bs/r2/r2-logo.svg',
|
||||
'live_status': 'is_live',
|
||||
'_old_archive_ids': ['nhkradirulive r2-fukuoka'],
|
||||
},
|
||||
}, {
|
||||
# fm, area specified
|
||||
'url': 'https://www.nhk.or.jp/radio/player/?ch=fm',
|
||||
'params': {'extractor_args': {'nhkradirulive': {'area': ['sapporo']}}},
|
||||
'info_dict': {
|
||||
'id': 'fm-sapporo',
|
||||
'title': 're:^NHKネットラジオFM 札幌.+$',
|
||||
'id': 'bs-r3-010',
|
||||
'title': 're:^NHK FM・札幌.+$',
|
||||
'ext': 'm4a',
|
||||
'thumbnail': 'https://www.nhk.or.jp/common/img/media/fm-200x200.png',
|
||||
'thumbnail': 'https://www.nhk.jp/assets/images/broadcastservice/bs/r3/r3-logo.svg',
|
||||
'live_status': 'is_live',
|
||||
'_old_archive_ids': ['nhkradirulive fm-sapporo'],
|
||||
},
|
||||
}]
|
||||
|
||||
_NOA_STATION_IDS = {'r1': 'n1', 'r2': 'n2', 'fm': 'n3'}
|
||||
_NOA_STATION_IDS = {'r1': 'r1', 'r2': 'r2', 'fm': 'r3'}
|
||||
|
||||
def _real_extract(self, url):
|
||||
station = self._match_id(url)
|
||||
@ -911,12 +997,15 @@ def _real_extract(self, url):
|
||||
noa_info = self._download_json(
|
||||
f'https:{config.find(".//url_program_noa").text}'.format(area=data.find('areakey').text),
|
||||
station, note=f'Downloading {area} station metadata', fatal=False)
|
||||
present_info = traverse_obj(noa_info, ('nowonair_list', self._NOA_STATION_IDS.get(station), 'present'))
|
||||
broadcast_service = traverse_obj(noa_info, (self._NOA_STATION_IDS.get(station), 'publishedOn'))
|
||||
|
||||
return {
|
||||
'title': ' '.join(traverse_obj(present_info, (('service', 'area'), 'name', {str}))),
|
||||
'id': join_nonempty(station, area),
|
||||
'thumbnails': traverse_obj(present_info, ('service', 'images', ..., {
|
||||
**traverse_obj(broadcast_service, {
|
||||
'title': ('broadcastDisplayName', {str}),
|
||||
'id': ('id', {str}),
|
||||
}),
|
||||
'_old_archive_ids': [make_archive_id(self, join_nonempty(station, area))],
|
||||
'thumbnails': traverse_obj(broadcast_service, ('logo', ..., {
|
||||
'url': 'url',
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
|
@ -4,16 +4,15 @@
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_bitrate,
|
||||
@ -22,9 +21,8 @@
|
||||
parse_qs,
|
||||
parse_resolution,
|
||||
qualities,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
unescapeHTML,
|
||||
truncate_string,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
@ -32,7 +30,11 @@
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
require,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class NiconicoBaseIE(InfoExtractor):
|
||||
@ -283,35 +285,54 @@ def _yield_dms_formats(self, api_data, video_id):
|
||||
lambda _, v: v['id'] == video_fmt['format_id'], 'qualityLevel', {int_or_none}, any)) or -1
|
||||
yield video_fmt
|
||||
|
||||
def _extract_server_response(self, webpage, video_id, fatal=True):
|
||||
try:
|
||||
return traverse_obj(
|
||||
self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id),
|
||||
('data', 'response', {dict}, {require('server response')}))
|
||||
except ExtractorError:
|
||||
if not fatal:
|
||||
return {}
|
||||
raise
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
webpage, handle = self._download_webpage_handle(
|
||||
'https://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||
f'https://www.nicovideo.jp/watch/{video_id}', video_id,
|
||||
headers=self.geo_verification_headers())
|
||||
if video_id.startswith('so'):
|
||||
video_id = self._match_id(handle.url)
|
||||
|
||||
api_data = traverse_obj(
|
||||
self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id),
|
||||
('data', 'response', {dict}))
|
||||
if not api_data:
|
||||
raise ExtractorError('Server response data not found')
|
||||
api_data = self._extract_server_response(webpage, video_id)
|
||||
except ExtractorError as e:
|
||||
try:
|
||||
api_data = self._download_json(
|
||||
f'https://www.nicovideo.jp/api/watch/v3/{video_id}?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_{round(time.time() * 1000)}', video_id,
|
||||
note='Downloading API JSON', errnote='Unable to fetch data')['data']
|
||||
f'https://www.nicovideo.jp/api/watch/v3/{video_id}', video_id,
|
||||
'Downloading API JSON', 'Unable to fetch data', query={
|
||||
'_frontendId': '6',
|
||||
'_frontendVersion': '0',
|
||||
'actionTrackId': f'AAAAAAAAAA_{round(time.time() * 1000)}',
|
||||
}, headers=self.geo_verification_headers())['data']
|
||||
except ExtractorError:
|
||||
if not isinstance(e.cause, HTTPError):
|
||||
# Raise if original exception was from _parse_json or utils.traversal.require
|
||||
raise
|
||||
# The webpage server response has more detailed error info than the API response
|
||||
webpage = e.cause.response.read().decode('utf-8', 'replace')
|
||||
error_msg = self._html_search_regex(
|
||||
r'(?s)<section\s+class="(?:(?:ErrorMessage|WatchExceptionPage-message)\s*)+">(.+?)</section>',
|
||||
webpage, 'error reason', default=None)
|
||||
if not error_msg:
|
||||
reason_code = self._extract_server_response(
|
||||
webpage, video_id, fatal=False).get('reasonCode')
|
||||
if not reason_code:
|
||||
raise
|
||||
raise ExtractorError(clean_html(error_msg), expected=True)
|
||||
if reason_code in ('DOMESTIC_VIDEO', 'HIGH_RISK_COUNTRY_VIDEO'):
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
elif reason_code == 'HIDDEN_VIDEO':
|
||||
raise ExtractorError(
|
||||
'The viewing period of this video has expired', expected=True)
|
||||
elif reason_code == 'DELETED_VIDEO':
|
||||
raise ExtractorError('This video has been deleted', expected=True)
|
||||
raise ExtractorError(f'Niconico says: {reason_code}')
|
||||
|
||||
availability = self._availability(**(traverse_obj(api_data, ('payment', 'video', {
|
||||
'needs_premium': ('isPremium', {bool}),
|
||||
@ -787,41 +808,39 @@ class NiconicoLiveIE(NiconicoBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id)
|
||||
webpage = self._download_webpage(url, video_id, expected_status=404)
|
||||
if err_msg := traverse_obj(webpage, ({find_element(cls='message')}, {clean_html})):
|
||||
raise ExtractorError(err_msg, expected=True)
|
||||
|
||||
embedded_data = self._parse_json(unescapeHTML(self._search_regex(
|
||||
r'<script\s+id="embedded-data"\s*data-props="(.+?)"', webpage, 'embedded data')), video_id)
|
||||
|
||||
ws_url = traverse_obj(embedded_data, ('site', 'relive', 'webSocketUrl'))
|
||||
if not ws_url:
|
||||
raise ExtractorError('The live hasn\'t started yet or already ended.', expected=True)
|
||||
ws_url = update_url_query(ws_url, {
|
||||
'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9',
|
||||
})
|
||||
|
||||
hostname = remove_start(urllib.parse.urlparse(urlh.url).hostname, 'sp.')
|
||||
embedded_data = traverse_obj(webpage, (
|
||||
{find_element(tag='script', id='embedded-data', html=True)},
|
||||
{extract_attributes}, 'data-props', {json.loads}))
|
||||
frontend_id = traverse_obj(embedded_data, ('site', 'frontendId', {str_or_none}), default='9')
|
||||
|
||||
ws_url = traverse_obj(embedded_data, (
|
||||
'site', 'relive', 'webSocketUrl', {url_or_none}, {require('websocket URL')}))
|
||||
ws_url = update_url_query(ws_url, {'frontend_id': frontend_id})
|
||||
ws = self._request_webpage(
|
||||
Request(ws_url, headers={'Origin': f'https://{hostname}'}),
|
||||
video_id=video_id, note='Connecting to WebSocket server')
|
||||
ws_url, video_id, 'Connecting to WebSocket server',
|
||||
headers={'Origin': 'https://live.nicovideo.jp'})
|
||||
|
||||
self.write_debug('Sending HLS server request')
|
||||
ws.send(json.dumps({
|
||||
'type': 'startWatching',
|
||||
'data': {
|
||||
'reconnect': False,
|
||||
'room': {
|
||||
'commentable': True,
|
||||
'protocol': 'webSocket',
|
||||
},
|
||||
'stream': {
|
||||
'quality': 'abr',
|
||||
'protocol': 'hls',
|
||||
'latency': 'high',
|
||||
'accessRightMethod': 'single_cookie',
|
||||
'chasePlay': False,
|
||||
'latency': 'high',
|
||||
'protocol': 'hls',
|
||||
'quality': 'abr',
|
||||
},
|
||||
'room': {
|
||||
'protocol': 'webSocket',
|
||||
'commentable': True,
|
||||
},
|
||||
'reconnect': False,
|
||||
},
|
||||
'type': 'startWatching',
|
||||
}))
|
||||
|
||||
while True:
|
||||
@ -841,17 +860,15 @@ def _real_extract(self, url):
|
||||
raise ExtractorError('Disconnected at middle of extraction')
|
||||
elif data.get('type') == 'error':
|
||||
self.write_debug(recv)
|
||||
message = traverse_obj(data, ('body', 'code')) or recv
|
||||
message = traverse_obj(data, ('body', 'code', {str_or_none}), default=recv)
|
||||
raise ExtractorError(message)
|
||||
elif self.get_param('verbose', False):
|
||||
if len(recv) > 100:
|
||||
recv = recv[:100] + '...'
|
||||
self.write_debug(f'Server said: {recv}')
|
||||
self.write_debug(f'Server response: {truncate_string(recv, 100)}')
|
||||
|
||||
title = traverse_obj(embedded_data, ('program', 'title')) or self._html_search_meta(
|
||||
('og:title', 'twitter:title'), webpage, 'live title', fatal=False)
|
||||
|
||||
raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail')) or {}
|
||||
raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail', {dict})) or {}
|
||||
thumbnails = []
|
||||
for name, value in raw_thumbs.items():
|
||||
if not isinstance(value, dict):
|
||||
@ -878,31 +895,30 @@ def _real_extract(self, url):
|
||||
cookie['domain'], cookie['name'], cookie['value'],
|
||||
expire_time=unified_timestamp(cookie.get('expires')), path=cookie['path'], secure=cookie['secure'])
|
||||
|
||||
fmt_common = {
|
||||
'live_latency': 'high',
|
||||
'origin': hostname,
|
||||
'protocol': 'niconico_live',
|
||||
'video_id': video_id,
|
||||
'ws': ws,
|
||||
}
|
||||
q_iter = (q for q in qualities[1:] if not q.startswith('audio_')) # ignore initial 'abr'
|
||||
a_map = {96: 'audio_low', 192: 'audio_high'}
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
|
||||
for fmt in formats:
|
||||
fmt['protocol'] = 'niconico_live'
|
||||
if fmt.get('acodec') == 'none':
|
||||
fmt['format_id'] = next(q_iter, fmt['format_id'])
|
||||
elif fmt.get('vcodec') == 'none':
|
||||
abr = parse_bitrate(fmt['url'].lower())
|
||||
fmt.update({
|
||||
'abr': abr,
|
||||
'acodec': 'mp4a.40.2',
|
||||
'format_id': a_map.get(abr, fmt['format_id']),
|
||||
})
|
||||
fmt.update(fmt_common)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'downloader_options': {
|
||||
'max_quality': traverse_obj(embedded_data, ('program', 'stream', 'maxQuality', {str})) or 'normal',
|
||||
'ws': ws,
|
||||
'ws_url': ws_url,
|
||||
},
|
||||
**traverse_obj(embedded_data, {
|
||||
'view_count': ('program', 'statistics', 'watchCount'),
|
||||
'comment_count': ('program', 'statistics', 'commentCount'),
|
||||
|
@ -1,6 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
@ -61,10 +60,10 @@ def _real_extract(self, url):
|
||||
post = self._download_json(
|
||||
'https://9gag.com/v1/post', post_id, query={
|
||||
'id': post_id,
|
||||
})['data']['post']
|
||||
}, impersonate=True)['data']['post']
|
||||
|
||||
if post.get('type') != 'Animated':
|
||||
raise ExtractorError(
|
||||
self.raise_no_formats(
|
||||
'The given url does not contain a video',
|
||||
expected=True)
|
||||
|
||||
|
@ -1,6 +1,3 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@ -11,7 +8,12 @@
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj, value
|
||||
from ..utils.traversal import (
|
||||
get_first,
|
||||
require,
|
||||
traverse_obj,
|
||||
value,
|
||||
)
|
||||
|
||||
|
||||
class NineNowIE(InfoExtractor):
|
||||
@ -101,20 +103,11 @@ class NineNowIE(InfoExtractor):
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId={}'
|
||||
|
||||
# XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.francetv and yt_dlp.extractor.goplay
|
||||
def _find_json(self, s):
|
||||
return self._search_json(
|
||||
r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, video_type = self._match_valid_url(url).group('id', 'type')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
common_data = traverse_obj(
|
||||
re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
|
||||
(..., {json.loads}, ..., {self._find_json},
|
||||
lambda _, v: v['payload'][video_type]['slug'] == display_id,
|
||||
'payload', any, {require('video data')}))
|
||||
common_data = get_first(self._search_nextjs_v13_data(webpage, display_id), ('payload', {dict}))
|
||||
|
||||
if traverse_obj(common_data, (video_type, 'video', 'drm', {bool})):
|
||||
self.report_drm(display_id)
|
||||
|
@ -1,59 +1,57 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
get_element_by_attribute,
|
||||
UnsupportedError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
update_url_query,
|
||||
parse_duration,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
update_url,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class NobelPrizeIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?nobelprize\.org/mediaplayer.*?\bid=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.nobelprize.org/mediaplayer/?id=2636',
|
||||
'md5': '04c81e5714bb36cc4e2232fee1d8157f',
|
||||
_VALID_URL = r'https?://(?:(?:mediaplayer|www)\.)?nobelprize\.org/mediaplayer/'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nobelprize.org/mediaplayer/?id=2636',
|
||||
'info_dict': {
|
||||
'id': '2636',
|
||||
'ext': 'mp4',
|
||||
'title': 'Announcement of the 2016 Nobel Prize in Physics',
|
||||
'description': 'md5:05beba57f4f5a4bbd4cf2ef28fcff739',
|
||||
'description': 'md5:1a2d8a6ca80c88fb3b9a326e0b0e8e43',
|
||||
'duration': 1560.0,
|
||||
'thumbnail': r're:https?://www\.nobelprize\.org/images/.+\.jpg',
|
||||
'timestamp': 1504883793,
|
||||
'upload_date': '20170908',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://mediaplayer.nobelprize.org/mediaplayer/?qid=12693',
|
||||
'info_dict': {
|
||||
'id': '12693',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nobel Lecture by Peter Higgs',
|
||||
'description': 'md5:9b12e275dbe3a8138484e70e00673a05',
|
||||
'duration': 1800.0,
|
||||
'thumbnail': r're:https?://www\.nobelprize\.org/images/.+\.jpg',
|
||||
'timestamp': 1504883793,
|
||||
'upload_date': '20170908',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
media = self._parse_json(self._search_regex(
|
||||
r'(?s)var\s*config\s*=\s*({.+?});', webpage,
|
||||
'config'), video_id, js_to_json)['media']
|
||||
title = media['title']
|
||||
|
||||
formats = []
|
||||
for source in media.get('source', []):
|
||||
source_src = source.get('src')
|
||||
if not source_src:
|
||||
continue
|
||||
ext = mimetype2ext(source.get('type')) or determine_ext(source_src)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_src, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(source_src, {'hdcore': '3.7.0'}),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_src,
|
||||
})
|
||||
video_id = traverse_obj(parse_qs(url), (
|
||||
('id', 'qid'), -1, {int_or_none}, {str_or_none}, any))
|
||||
if not video_id:
|
||||
raise UnsupportedError(url)
|
||||
webpage = self._download_webpage(
|
||||
update_url(url, netloc='mediaplayer.nobelprize.org'), video_id)
|
||||
|
||||
return {
|
||||
**self._search_json_ld(webpage, video_id),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': get_element_by_attribute('itemprop', 'description', webpage),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'formats': formats,
|
||||
'title': self._html_search_meta('caption', webpage),
|
||||
'description': traverse_obj(webpage, (
|
||||
{find_element(tag='span', attr='itemprop', value='description')}, {clean_html})),
|
||||
'duration': parse_duration(self._html_search_meta('duration', webpage)),
|
||||
}
|
||||
|
@ -1,100 +0,0 @@
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class NoovoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?noovo\.ca/videos/(?P<id>[^/]+/[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
# clip
|
||||
'url': 'http://noovo.ca/videos/rpm-plus/chrysler-imperial',
|
||||
'info_dict': {
|
||||
'id': '5386045029001',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chrysler Imperial',
|
||||
'description': 'md5:de3c898d1eb810f3e6243e08c8b4a056',
|
||||
'timestamp': 1491399228,
|
||||
'upload_date': '20170405',
|
||||
'uploader_id': '618566855001',
|
||||
'series': 'RPM+',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# episode
|
||||
'url': 'http://noovo.ca/videos/l-amour-est-dans-le-pre/episode-13-8',
|
||||
'info_dict': {
|
||||
'id': '5395865725001',
|
||||
'title': 'Épisode 13 : Les retrouvailles',
|
||||
'description': 'md5:888c3330f0c1b4476c5bc99a1c040473',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1492019320,
|
||||
'upload_date': '20170412',
|
||||
'uploader_id': '618566855001',
|
||||
'series': "L'amour est dans le pré",
|
||||
'season_number': 5,
|
||||
'episode': 'Épisode 13',
|
||||
'episode_number': 13,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/618566855001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
brightcove_id = self._search_regex(
|
||||
r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)dataLayer\.push\(\s*({.+?})\s*\);', webpage, 'data',
|
||||
default='{}'),
|
||||
video_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
title = try_get(
|
||||
data, lambda x: x['video']['nom'],
|
||||
str) or self._html_search_meta(
|
||||
'dcterms.Title', webpage, 'title', fatal=True)
|
||||
|
||||
description = self._html_search_meta(
|
||||
('dcterms.Description', 'description'), webpage, 'description')
|
||||
|
||||
series = try_get(
|
||||
data, lambda x: x['emission']['nom']) or self._search_regex(
|
||||
r'<div[^>]+class="banner-card__subtitle h4"[^>]*>([^<]+)',
|
||||
webpage, 'series', default=None)
|
||||
|
||||
season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {}
|
||||
season = try_get(season_el, lambda x: x['nom'], str)
|
||||
season_number = int_or_none(try_get(season_el, lambda x: x['numero']))
|
||||
|
||||
episode_el = try_get(season_el, lambda x: x['episode'], dict) or {}
|
||||
episode = try_get(episode_el, lambda x: x['nom'], str)
|
||||
episode_number = int_or_none(try_get(episode_el, lambda x: x['numero']))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': BrightcoveNewIE.ie_key(),
|
||||
'url': smuggle_url(
|
||||
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||
{'geo_countries': ['CA']}),
|
||||
'id': brightcove_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'series': series,
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
}
|
@ -1,55 +1,82 @@
|
||||
from .common import InfoExtractor
|
||||
from .streaks import StreaksBaseIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class NTVCoJpCUIE(InfoExtractor):
|
||||
class NTVCoJpCUIE(StreaksBaseIE):
|
||||
IE_NAME = 'cu.ntv.co.jp'
|
||||
IE_DESC = 'Nippon Television Network'
|
||||
_VALID_URL = r'https?://cu\.ntv\.co\.jp/(?!program)(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://cu.ntv.co.jp/televiva-chill-gohan_181031/',
|
||||
IE_DESC = '日テレ無料TADA!'
|
||||
_VALID_URL = r'https?://cu\.ntv\.co\.jp/(?!program-list|search)(?P<id>[\w-]+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cu.ntv.co.jp/gaki_20250525/',
|
||||
'info_dict': {
|
||||
'id': '5978891207001',
|
||||
'id': 'gaki_20250525',
|
||||
'ext': 'mp4',
|
||||
'title': '桜エビと炒り卵がポイント! 「中華風 エビチリおにぎり」──『美虎』五十嵐美幸',
|
||||
'upload_date': '20181213',
|
||||
'description': 'md5:1985b51a9abc285df0104d982a325f2a',
|
||||
'uploader_id': '3855502814001',
|
||||
'timestamp': 1544669941,
|
||||
'title': '放送開始36年!方正ココリコが選ぶ神回&地獄回!',
|
||||
'cast': 'count:2',
|
||||
'description': 'md5:1e1db556224d627d4d2f74370c650927',
|
||||
'display_id': 'ref:gaki_20250525',
|
||||
'duration': 1450,
|
||||
'episode': '放送開始36年!方正ココリコが選ぶ神回&地獄回!',
|
||||
'episode_id': '000000010172808',
|
||||
'episode_number': 255,
|
||||
'genres': ['variety'],
|
||||
'live_status': 'not_live',
|
||||
'modified_date': '20250525',
|
||||
'modified_timestamp': 1748145537,
|
||||
'release_date': '20250525',
|
||||
'release_timestamp': 1748145539,
|
||||
'series': 'ダウンタウンのガキの使いやあらへんで!',
|
||||
'series_id': 'gaki',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'timestamp': 1748145197,
|
||||
'upload_date': '20250525',
|
||||
'uploader': '日本テレビ放送網',
|
||||
'uploader_id': '0x7FE2',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_config = self._search_nuxt_data(webpage, display_id)
|
||||
video_id = traverse_obj(player_config, ('movie', 'video_id'))
|
||||
if not video_id:
|
||||
raise ExtractorError('Failed to extract video ID for Brightcove')
|
||||
account_id = traverse_obj(player_config, ('player', 'account')) or '3855502814001'
|
||||
title = traverse_obj(player_config, ('movie', 'name'))
|
||||
if not title:
|
||||
og_title = self._og_search_title(webpage, fatal=False) or traverse_obj(player_config, ('player', 'title'))
|
||||
if og_title:
|
||||
title = og_title.split('(', 1)[0].strip()
|
||||
description = (traverse_obj(player_config, ('movie', 'description'))
|
||||
or self._html_search_meta(['description', 'og:description'], webpage))
|
||||
|
||||
info = self._search_json(
|
||||
r'window\.app\s*=', webpage, 'video info',
|
||||
display_id)['falcorCache']['catalog']['episode'][display_id]['value']
|
||||
media_id = traverse_obj(info, (
|
||||
'streaks_data', 'mediaid', {str_or_none}, {require('Streaks media ID')}))
|
||||
non_phonetic = (lambda _, v: v['is_phonetic'] is False, 'value', {str})
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id), {'geo_countries': ['JP']}),
|
||||
'ie_key': 'BrightcoveNew',
|
||||
**self._extract_from_streaks_api('ntv-tada', media_id, headers={
|
||||
'X-Streaks-Api-Key': 'df497719056b44059a0483b8faad1f4a',
|
||||
}),
|
||||
**traverse_obj(info, {
|
||||
'id': ('content_id', {str_or_none}),
|
||||
'title': ('title', *non_phonetic, any),
|
||||
'age_limit': ('is_adult_only_content', {lambda x: 18 if x else None}),
|
||||
'cast': ('credit', ..., 'name', *non_phonetic),
|
||||
'genres': ('genre', ..., {str}),
|
||||
'release_timestamp': ('pub_date', {parse_iso8601}),
|
||||
'tags': ('tags', ..., {str}),
|
||||
'thumbnail': ('artwork', ..., 'url', any, {url_or_none}),
|
||||
}),
|
||||
**traverse_obj(info, ('tv_episode_info', {
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'episode_number': ('episode_number', {int}),
|
||||
'series': ('parent_show_title', *non_phonetic, any),
|
||||
'series_id': ('show_content_id', {str}),
|
||||
})),
|
||||
**traverse_obj(info, ('custom_data', {
|
||||
'description': ('program_detail', {str}),
|
||||
'episode': ('episode_title', {str}),
|
||||
'episode_id': ('episode_id', {str_or_none}),
|
||||
'uploader': ('network_name', {str}),
|
||||
'uploader_id': ('network_id', {str}),
|
||||
})),
|
||||
}
|
||||
|
@ -273,6 +273,8 @@ def _extract_desktop(self, url):
|
||||
return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'}))
|
||||
elif error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
elif '>Access to this video is restricted</div>' in webpage:
|
||||
self.raise_login_required()
|
||||
|
||||
player = self._parse_json(
|
||||
unescapeHTML(self._search_regex(
|
||||
@ -429,7 +431,7 @@ def _extract_mobile(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
f'http://m.ok.ru/video/{video_id}', video_id,
|
||||
f'https://m.ok.ru/video/{video_id}', video_id,
|
||||
note='Downloading mobile webpage')
|
||||
|
||||
error = self._search_regex(
|
||||
|
@ -1,63 +1,63 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
from ..utils import parse_duration, parse_iso8601, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class ParlviewIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?parlview\.aph\.gov\.au/(?:[^/]+)?\bvideoID=(?P<id>\d{6})'
|
||||
_VALID_URL = r'https?://(?:www\.)?aph\.gov\.au/News_and_Events/Watch_Read_Listen/ParlView/video/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=542661',
|
||||
'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/3406614',
|
||||
'info_dict': {
|
||||
'id': '542661',
|
||||
'id': '3406614',
|
||||
'ext': 'mp4',
|
||||
'title': "Australia's Family Law System [Part 2]",
|
||||
'duration': 5799,
|
||||
'description': 'md5:7099883b391619dbae435891ca871a62',
|
||||
'timestamp': 1621430700,
|
||||
'upload_date': '20210519',
|
||||
'uploader': 'Joint Committee',
|
||||
'title': 'Senate Chamber',
|
||||
'description': 'Official Recording of Senate Proceedings from the Australian Parliament',
|
||||
'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/SenateParlview06.jpg',
|
||||
'upload_date': '20250325',
|
||||
'duration': 17999,
|
||||
'timestamp': 1742939400,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=539936',
|
||||
'only_matching': True,
|
||||
'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/SV1394.dv',
|
||||
'info_dict': {
|
||||
'id': 'SV1394.dv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Senate Select Committee on Uranium Mining and Milling [Part 1]',
|
||||
'description': 'Official Recording of Senate Committee Proceedings from the Australian Parliament',
|
||||
'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/CommitteeThumbnail06.jpg',
|
||||
'upload_date': '19960822',
|
||||
'duration': 14765,
|
||||
'timestamp': 840754200,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
_API_URL = 'https://parlview.aph.gov.au/api_v3/1/playback/getUniversalPlayerConfig?videoID=%s&format=json'
|
||||
_MEDIA_INFO_URL = 'https://parlview.aph.gov.au/ajaxPlayer.php?videoID=%s&tabNum=4&action=loadTab'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
media = self._download_json(self._API_URL % video_id, video_id).get('media')
|
||||
timestamp = try_get(media, lambda x: x['timeMap']['source']['timecode_offsets'][0], str) or '/'
|
||||
video_details = self._download_json(
|
||||
f'https://vodapi.aph.gov.au/api/search/parlview/{video_id}', video_id)['videoDetails']
|
||||
|
||||
stream = try_get(media, lambda x: x['renditions'][0], dict)
|
||||
if not stream:
|
||||
self.raise_no_formats('No streams were detected')
|
||||
elif stream.get('streamType') != 'VOD':
|
||||
self.raise_no_formats('Unknown type of stream was detected: "{}"'.format(str(stream.get('streamType'))))
|
||||
formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native')
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
video_details['files']['file']['url'], video_id, 'mp4')
|
||||
|
||||
media_info = self._download_webpage(
|
||||
self._MEDIA_INFO_URL % video_id, video_id, note='Downloading media info', fatal=False)
|
||||
DURATION_RE = re.compile(r'(?P<duration>\d+:\d+:\d+):\d+')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': url,
|
||||
'title': self._html_search_regex(r'<h2>([^<]+)<', webpage, 'title', fatal=False),
|
||||
'formats': formats,
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'timestamp': unified_timestamp(timestamp.split('/', 1)[1].replace('_', ' ')),
|
||||
'description': self._html_search_regex(
|
||||
r'<div[^>]+class="descripti?on"[^>]*>[^>]+<strong>[^>]+>[^>]+>([^<]+)',
|
||||
webpage, 'description', fatal=False),
|
||||
'uploader': self._html_search_regex(
|
||||
r'<td>[^>]+>Channel:[^>]+>([^<]+)', media_info, 'channel', fatal=False),
|
||||
'thumbnail': media.get('staticImage'),
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(video_details, {
|
||||
'title': (('parlViewTitle', 'title'), {str}, any),
|
||||
'description': ('parlViewDescription', {str}),
|
||||
'duration': ('files', 'file', 'duration', {DURATION_RE.fullmatch}, 'duration', {parse_duration}),
|
||||
'timestamp': ('recordingFrom', {parse_iso8601}),
|
||||
'thumbnail': ('thumbUrl', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
@ -19,7 +19,7 @@
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj, value
|
||||
from ..utils.traversal import require, traverse_obj, value
|
||||
|
||||
|
||||
class PatreonBaseIE(InfoExtractor):
|
||||
@ -340,8 +340,9 @@ def _real_extract(self, url):
|
||||
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
|
||||
}))
|
||||
|
||||
# all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
|
||||
headers = {'referer': 'https://patreon.com/'}
|
||||
# Must be all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, and Vimeo.
|
||||
# patreon.com URLs redirect to www.patreon.com; this matters when requesting mux.com m3u8s
|
||||
headers = {'referer': 'https://www.patreon.com/'}
|
||||
|
||||
# handle Vimeo embeds
|
||||
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
|
||||
@ -352,7 +353,7 @@ def _real_extract(self, url):
|
||||
v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
|
||||
fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection
|
||||
entries.append(self.url_result(
|
||||
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
|
||||
VimeoIE._smuggle_referrer(v_url, headers['referer']),
|
||||
VimeoIE, url_transparent=True))
|
||||
|
||||
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
|
||||
@ -379,11 +380,13 @@ def _real_extract(self, url):
|
||||
'url': post_file['url'],
|
||||
})
|
||||
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
post_file['url'], video_id, headers=headers)
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': headers,
|
||||
})
|
||||
|
||||
can_view_post = traverse_obj(attributes, 'current_user_can_view')
|
||||
@ -459,7 +462,7 @@ class PatreonCampaignIE(PatreonBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?patreon\.com/(?:
|
||||
(?:m|api/campaigns)/(?P<campaign_id>\d+)|
|
||||
(?:c/)?(?P<vanity>(?!creation[?/]|posts/|rss[?/])[\w-]+)
|
||||
(?:cw?/)?(?P<vanity>(?!creation[?/]|posts/|rss[?/])[\w-]+)
|
||||
)(?:/posts)?/?(?:$|[?#])'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.patreon.com/dissonancepod/',
|
||||
@ -528,6 +531,28 @@ class PatreonCampaignIE(PatreonBaseIE):
|
||||
'age_limit': 0,
|
||||
},
|
||||
'playlist_mincount': 331,
|
||||
'skip': 'Channel removed',
|
||||
}, {
|
||||
# next.js v13 data, see https://github.com/yt-dlp/yt-dlp/issues/13622
|
||||
'url': 'https://www.patreon.com/c/anythingelse/posts',
|
||||
'info_dict': {
|
||||
'id': '9631148',
|
||||
'title': 'Anything Else?',
|
||||
'description': 'md5:2ee1db4aed2f9460c2b295825a24aa08',
|
||||
'uploader': 'dan ',
|
||||
'uploader_id': '13852412',
|
||||
'uploader_url': 'https://www.patreon.com/anythingelse',
|
||||
'channel': 'Anything Else?',
|
||||
'channel_id': '9631148',
|
||||
'channel_url': 'https://www.patreon.com/anythingelse',
|
||||
'channel_follower_count': int,
|
||||
'age_limit': 0,
|
||||
'thumbnail': r're:https?://.+/.+',
|
||||
},
|
||||
'playlist_mincount': 151,
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/cw/anythingelse',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/c/OgSog/posts',
|
||||
'only_matching': True,
|
||||
@ -569,8 +594,11 @@ def _real_extract(self, url):
|
||||
campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity')
|
||||
if campaign_id is None:
|
||||
webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent})
|
||||
campaign_id = self._search_nextjs_data(
|
||||
webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id']
|
||||
campaign_id = traverse_obj(self._search_nextjs_data(webpage, vanity, default=None), (
|
||||
'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str}))
|
||||
if not campaign_id:
|
||||
campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), (
|
||||
lambda _, v: v['type'] == 'campaign', 'id', {str}, any, {require('campaign ID')}))
|
||||
|
||||
params = {
|
||||
'json-api-use-default-includes': 'false',
|
||||
|
@ -10,7 +10,8 @@
|
||||
|
||||
|
||||
class PicartoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
|
||||
IE_NAME = 'picarto'
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[^/#?]+)/?(?:$|[?#])'
|
||||
_TEST = {
|
||||
'url': 'https://picarto.tv/Setz',
|
||||
'info_dict': {
|
||||
@ -89,7 +90,8 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class PicartoVodIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?picarto\.tv/(?:videopopout|\w+/videos)/(?P<id>[^/?#&]+)'
|
||||
IE_NAME = 'picarto:vod'
|
||||
_VALID_URL = r'https?://(?:www\.)?picarto\.tv/(?:videopopout|\w+(?:/profile)?/videos)/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
|
||||
'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
|
||||
@ -111,6 +113,18 @@ class PicartoVodIE(InfoExtractor):
|
||||
'channel': 'ArtofZod',
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://picarto.tv/DrechuArt/profile/videos/400347',
|
||||
'md5': 'f9ea54868b1d9dec40eb554b484cc7bf',
|
||||
'info_dict': {
|
||||
'id': '400347',
|
||||
'ext': 'mp4',
|
||||
'title': 'Welcome to the Show',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'channel': 'DrechuArt',
|
||||
'age_limit': 0,
|
||||
},
|
||||
|
||||
}, {
|
||||
'url': 'https://picarto.tv/videopopout/Plague',
|
||||
'only_matching': True,
|
||||
|
70
yt_dlp/extractor/playerfm.py
Normal file
70
yt_dlp/extractor/playerfm.py
Normal file
@ -0,0 +1,70 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import clean_html, clean_podcast_url, int_or_none, str_or_none, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class PlayerFmIE(InfoExtractor):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?player\.fm/(?:series/)?[\w-]+/(?P<id>[\w-]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://player.fm/series/chapo-trap-house/movie-mindset-33-casino-feat-felix',
|
||||
'info_dict': {
|
||||
'ext': 'mp3',
|
||||
'id': '478606546',
|
||||
'display_id': 'movie-mindset-33-casino-feat-felix',
|
||||
'thumbnail': r're:^https://.*\.(jpg|png)',
|
||||
'title': 'Movie Mindset 33 - Casino feat. Felix',
|
||||
'creators': ['Chapo Trap House'],
|
||||
'description': r're:The first episode of this season of Movie Mindset is free .+ we feel about it\.',
|
||||
'duration': 6830,
|
||||
'timestamp': 1745406000,
|
||||
'upload_date': '20250423',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://player.fm/series/nbc-nightly-news-with-tom-llamas/thursday-april-17-2025',
|
||||
'info_dict': {
|
||||
'ext': 'mp3',
|
||||
'id': '477635490',
|
||||
'display_id': 'thursday-april-17-2025',
|
||||
'title': 'Thursday, April 17, 2025',
|
||||
'thumbnail': r're:^https://.*\.(jpg|png)',
|
||||
'duration': 1143,
|
||||
'description': 'md5:4890b8cf9a55a787561cd5d59dfcda82',
|
||||
'creators': ['NBC News'],
|
||||
'timestamp': 1744941374,
|
||||
'upload_date': '20250418',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://player.fm/series/soccer-101/ep-109-its-kicking-off-how-have-the-rules-for-kickoff-changed-what-are-the-best-approaches-to-getting-the-game-underway-and-how-could-we-improve-on-the-present-system-ack3NzL3yibvs4pf',
|
||||
'info_dict': {
|
||||
'ext': 'mp3',
|
||||
'id': '481418710',
|
||||
'thumbnail': r're:^https://.*\.(jpg|png)',
|
||||
'title': r're:#109 It\'s kicking off! How have the rules for kickoff changed, .+ the present system\?',
|
||||
'creators': ['TSS'],
|
||||
'duration': 1510,
|
||||
'display_id': 'md5:b52ecacaefab891b59db69721bfd9b13',
|
||||
'description': 'md5:52a39e36d08d8919527454f152ad3c25',
|
||||
'timestamp': 1659102055,
|
||||
'upload_date': '20220729',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, url = self._match_valid_url(url).group('id', 'url')
|
||||
data = self._download_json(f'{url}.json', display_id)
|
||||
|
||||
return {
|
||||
'display_id': display_id,
|
||||
'vcodec': 'none',
|
||||
**traverse_obj(data, {
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'url': ('url', {clean_podcast_url}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {clean_html}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': (('image', ('series', 'image')), 'url', {url_or_none}, any),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
'timestamp': ('publishedAt', {int_or_none}),
|
||||
'creators': ('series', 'author', {str}, filter, all, filter),
|
||||
}),
|
||||
}
|
@ -9,11 +9,10 @@
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import unpack
|
||||
from ..utils.traversal import traverse_obj, unpack
|
||||
|
||||
|
||||
class PlaySuisseIE(InfoExtractor):
|
||||
|
104
yt_dlp/extractor/plyr.py
Normal file
104
yt_dlp/extractor/plyr.py
Normal file
@ -0,0 +1,104 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
|
||||
|
||||
class PlyrEmbedIE(InfoExtractor):
|
||||
_VALID_URL = False
|
||||
_WEBPAGE_TESTS = [{
|
||||
# data-plyr-embed-id="https://player.vimeo.com/video/522319456/90e5c96063?dnt=1"
|
||||
'url': 'https://www.dhm.de/zeughauskino/filmreihen/online-filmreihen/filme-des-marshall-plans/200000000-mouths/',
|
||||
'info_dict': {
|
||||
'id': '522319456',
|
||||
'ext': 'mp4',
|
||||
'title': '200.000.000 Mouths (1950–51)',
|
||||
'uploader': 'Zeughauskino',
|
||||
'uploader_url': '',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'duration': 963,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1081797161-9f09ddb4b7faa86e834e006b8e4b9c2cbaa0baa7da493211bf0796ae133a5ab8-d',
|
||||
'timestamp': 1615467405,
|
||||
'upload_date': '20210311',
|
||||
'release_timestamp': 1615467405,
|
||||
'release_date': '20210311',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# data-plyr-provider="vimeo" data-plyr-embed-id="803435276"
|
||||
'url': 'https://www.inarcassa.it/',
|
||||
'info_dict': {
|
||||
'id': '803435276',
|
||||
'ext': 'mp4',
|
||||
'title': 'HOME_Moto_Perpetuo',
|
||||
'uploader': 'Inarcassa',
|
||||
'uploader_url': '',
|
||||
'duration': 38,
|
||||
'thumbnail': 'https://i.vimeocdn.com/video/1663734769-945ad7ffabb16dbca009c023fd1d7b36bdb426a3dbae8345ed758136fe28f89a-d',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||
}, {
|
||||
# data-plyr-embed-id="https://youtu.be/GF-BjYKoAqI"
|
||||
'url': 'https://www.profile.nl',
|
||||
'info_dict': {
|
||||
'id': 'GF-BjYKoAqI',
|
||||
'ext': 'mp4',
|
||||
'title': 'PROFILE: Recruitment Profile',
|
||||
'description': '',
|
||||
'media_type': 'video',
|
||||
'uploader': 'Profile Nederland',
|
||||
'uploader_id': '@profilenederland',
|
||||
'uploader_url': 'https://www.youtube.com/@profilenederland',
|
||||
'channel': 'Profile Nederland',
|
||||
'channel_id': 'UC9AUkB0Tv39-TBYjs05n3vg',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC9AUkB0Tv39-TBYjs05n3vg',
|
||||
'channel_follower_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'age_limit': 0,
|
||||
'duration': 39,
|
||||
'thumbnail': 'https://i.ytimg.com/vi/GF-BjYKoAqI/maxresdefault.jpg',
|
||||
'categories': ['Autos & Vehicles'],
|
||||
'tags': [],
|
||||
'timestamp': 1675692990,
|
||||
'upload_date': '20230206',
|
||||
'playable_in_embed': True,
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
}, {
|
||||
# data-plyr-embed-id="B1TZV8rNZoc" data-plyr-provider="youtube"
|
||||
'url': 'https://www.vnis.edu.vn',
|
||||
'info_dict': {
|
||||
'id': 'vnis.edu',
|
||||
'title': 'VNIS Education - Master Agent các Trường hàng đầu Bắc Mỹ',
|
||||
'description': 'md5:4dafcf7335bb018780e4426da8ab8e4e',
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://vnis.edu.vn/wp-content/uploads/2021/05/ve-welcome-en.png',
|
||||
'timestamp': 1753233356,
|
||||
'upload_date': '20250723',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
plyr_embeds = re.finditer(r'''(?x)
|
||||
<div[^>]+(?:
|
||||
data-plyr-embed-id="(?P<id1>[^"]+)"[^>]+data-plyr-provider="(?P<provider1>[^"]+)"|
|
||||
data-plyr-provider="(?P<provider2>[^"]+)"[^>]+data-plyr-embed-id="(?P<id2>[^"]+)"
|
||||
)[^>]*>''', webpage)
|
||||
for mobj in plyr_embeds:
|
||||
embed_id = mobj.group('id1') or mobj.group('id2')
|
||||
provider = mobj.group('provider1') or mobj.group('provider2')
|
||||
if provider == 'vimeo':
|
||||
if not re.match(r'https?://', embed_id):
|
||||
embed_id = f'https://player.vimeo.com/video/{embed_id}'
|
||||
yield VimeoIE._smuggle_referrer(embed_id, url)
|
||||
elif provider == 'youtube':
|
||||
if not re.match(r'https?://', embed_id):
|
||||
embed_id = f'https://youtube.com/watch?v={embed_id}'
|
||||
yield embed_id
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user