mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-27 17:08:32 +00:00
Merge branch 'master' into ke/refactor-move-files-pp
This commit is contained in:
commit
96a9b5573f
7
.github/workflows/build.yml
vendored
7
.github/workflows/build.yml
vendored
@ -411,7 +411,7 @@ jobs:
|
||||
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||
python devscripts/install_deps.py -o --include build
|
||||
python devscripts/install_deps.py --include curl-cffi
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.10.0-py3-none-any.whl"
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.11.1-py3-none-any.whl"
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
@ -460,7 +460,7 @@ jobs:
|
||||
run: |
|
||||
python devscripts/install_deps.py -o --include build
|
||||
python devscripts/install_deps.py
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.10.0-py3-none-any.whl"
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.11.1-py3-none-any.whl"
|
||||
|
||||
- name: Prepare
|
||||
run: |
|
||||
@ -504,7 +504,8 @@ jobs:
|
||||
- windows32
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: artifact
|
||||
pattern: build-bin-*
|
||||
|
17
.github/workflows/release-master.yml
vendored
17
.github/workflows/release-master.yml
vendored
@ -28,3 +28,20 @@ jobs:
|
||||
actions: write # For cleaning up cache
|
||||
id-token: write # mandatory for trusted publishing
|
||||
secrets: inherit
|
||||
|
||||
publish_pypi:
|
||||
needs: [release]
|
||||
if: vars.MASTER_PYPI_PROJECT != ''
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
id-token: write # mandatory for trusted publishing
|
||||
steps:
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: dist
|
||||
name: build-pypi
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
verbose: true
|
||||
|
17
.github/workflows/release-nightly.yml
vendored
17
.github/workflows/release-nightly.yml
vendored
@ -41,3 +41,20 @@ jobs:
|
||||
actions: write # For cleaning up cache
|
||||
id-token: write # mandatory for trusted publishing
|
||||
secrets: inherit
|
||||
|
||||
publish_pypi:
|
||||
needs: [release]
|
||||
if: vars.NIGHTLY_PYPI_PROJECT != ''
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
id-token: write # mandatory for trusted publishing
|
||||
steps:
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: dist
|
||||
name: build-pypi
|
||||
- name: Publish to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
verbose: true
|
||||
|
19
.github/workflows/release.yml
vendored
19
.github/workflows/release.yml
vendored
@ -2,10 +2,6 @@ name: Release
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
prerelease:
|
||||
required: false
|
||||
default: true
|
||||
type: boolean
|
||||
source:
|
||||
required: false
|
||||
default: ''
|
||||
@ -18,6 +14,10 @@ on:
|
||||
required: false
|
||||
default: ''
|
||||
type: string
|
||||
prerelease:
|
||||
required: false
|
||||
default: true
|
||||
type: boolean
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
source:
|
||||
@ -278,11 +278,20 @@ jobs:
|
||||
make clean-cache
|
||||
python -m build --no-isolation .
|
||||
|
||||
- name: Upload artifacts
|
||||
if: github.event_name != 'workflow_dispatch'
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-pypi
|
||||
path: |
|
||||
dist/*
|
||||
compression-level: 0
|
||||
|
||||
- name: Publish to PyPI
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
verbose: true
|
||||
attestations: false # Currently doesn't work w/ reusable workflows (breaks nightly)
|
||||
|
||||
publish:
|
||||
needs: [prepare, build]
|
||||
|
20
CONTRIBUTORS
20
CONTRIBUTORS
@ -695,3 +695,23 @@ KBelmin
|
||||
kesor
|
||||
MellowKyler
|
||||
Wesley107772
|
||||
a13ssandr0
|
||||
ChocoLZS
|
||||
doe1080
|
||||
hugovdev
|
||||
jshumphrey
|
||||
julionc
|
||||
manavchaudhary1
|
||||
powergold1
|
||||
Sakura286
|
||||
SamDecrock
|
||||
stratus-ss
|
||||
subrat-lima
|
||||
gitninja1234
|
||||
jkruse
|
||||
xiaomac
|
||||
wesson09
|
||||
Crypto90
|
||||
MutantPiggieGolem1
|
||||
Sanceilaks
|
||||
Strkmn
|
||||
|
161
Changelog.md
161
Changelog.md
@ -4,6 +4,167 @@ # Changelog
|
||||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||
-->
|
||||
|
||||
### 2025.01.15
|
||||
|
||||
#### Extractor changes
|
||||
- **youtube**: [Do not use `web_creator` as a default client](https://github.com/yt-dlp/yt-dlp/commit/c8541f8b13e743fcfa06667530d13fee8686e22a) ([#12087](https://github.com/yt-dlp/yt-dlp/issues/12087)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
### 2025.01.12
|
||||
|
||||
#### Core changes
|
||||
- [Fix filename sanitization with `--no-windows-filenames`](https://github.com/yt-dlp/yt-dlp/commit/8346b549150003df988538e54c9d8bc4de568979) ([#11988](https://github.com/yt-dlp/yt-dlp/issues/11988)) by [bashonly](https://github.com/bashonly)
|
||||
- [Validate retries values are non-negative](https://github.com/yt-dlp/yt-dlp/commit/1f4e1e85a27c5b43e34d7706cfd88ffce1b56a4a) ([#11927](https://github.com/yt-dlp/yt-dlp/issues/11927)) by [Strkmn](https://github.com/Strkmn)
|
||||
|
||||
#### Extractor changes
|
||||
- **drtalks**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1f489f4a45691cac3f9e787d22a3a8a086229ba6) ([#10831](https://github.com/yt-dlp/yt-dlp/issues/10831)) by [pzhlkj6612](https://github.com/pzhlkj6612), [seproDev](https://github.com/seproDev)
|
||||
- **plvideo**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3c14e9191f3035b9a729d1d87bc0381f42de57cf) ([#10657](https://github.com/yt-dlp/yt-dlp/issues/10657)) by [Sanceilaks](https://github.com/Sanceilaks), [seproDev](https://github.com/seproDev)
|
||||
- **vine**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/e2ef4fece6c9742d1733e3bae408c4787765f78c) ([#11700](https://github.com/yt-dlp/yt-dlp/issues/11700)) by [allendema](https://github.com/allendema)
|
||||
- **xiaohongshu**: [Extend `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/763ed06ee69f13949397897bd42ff2ec3dc3d384) ([#11806](https://github.com/yt-dlp/yt-dlp/issues/11806)) by [HobbyistDev](https://github.com/HobbyistDev)
|
||||
- **youtube**
|
||||
- [Fix DASH formats incorrectly skipped in some situations](https://github.com/yt-dlp/yt-dlp/commit/0b6b7742c2e7f2a1fcb0b54ef3dd484bab404b3f) ([#11910](https://github.com/yt-dlp/yt-dlp/issues/11910)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Refactor cookie auth](https://github.com/yt-dlp/yt-dlp/commit/75079f4e3f7dce49b61ef01da7adcd9876a0ca3b) ([#11989](https://github.com/yt-dlp/yt-dlp/issues/11989)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
- [Use `tv` instead of `mweb` client by default](https://github.com/yt-dlp/yt-dlp/commit/712d2abb32f59b2d246be2901255f84f1a4c30b3) ([#12059](https://github.com/yt-dlp/yt-dlp/issues/12059)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
|
||||
#### Misc. changes
|
||||
- **cleanup**: Miscellaneous: [dade5e3](https://github.com/yt-dlp/yt-dlp/commit/dade5e35c89adaad04408bfef766820dbca06ebe) by [grqz](https://github.com/grqz), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev)
|
||||
|
||||
### 2024.12.23
|
||||
|
||||
#### Core changes
|
||||
- [Don't sanitize filename on Unix when `--no-windows-filenames`](https://github.com/yt-dlp/yt-dlp/commit/6fc85f617a5850307fd5b258477070e6ee177796) ([#9591](https://github.com/yt-dlp/yt-dlp/issues/9591)) by [pukkandan](https://github.com/pukkandan)
|
||||
- **update**
|
||||
- [Check 64-bitness when upgrading ARM builds](https://github.com/yt-dlp/yt-dlp/commit/b91c3925c2059970daa801cb131c0c2f4f302e72) ([#11819](https://github.com/yt-dlp/yt-dlp/issues/11819)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix endless update loop for `linux_exe` builds](https://github.com/yt-dlp/yt-dlp/commit/3d3ee458c1fe49dd5ebd7651a092119d23eb7000) ([#11827](https://github.com/yt-dlp/yt-dlp/issues/11827)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Extractor changes
|
||||
- **soundcloud**: [Various fixes](https://github.com/yt-dlp/yt-dlp/commit/d298693b1b266d198e8eeecb90ea17c4a031268f) ([#11820](https://github.com/yt-dlp/yt-dlp/issues/11820)) by [bashonly](https://github.com/bashonly)
|
||||
- **youtube**
|
||||
- [Add age-gate workaround for some embeddable videos](https://github.com/yt-dlp/yt-dlp/commit/09a6c687126f04e243fcb105a828787efddd1030) ([#11821](https://github.com/yt-dlp/yt-dlp/issues/11821)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix `uploader_id` extraction](https://github.com/yt-dlp/yt-dlp/commit/1a8851b689763e5173b96f70f8a71df0e4a44b66) ([#11818](https://github.com/yt-dlp/yt-dlp/issues/11818)) by [bashonly](https://github.com/bashonly)
|
||||
- [Player client maintenance](https://github.com/yt-dlp/yt-dlp/commit/65cf46cddd873fd229dbb0fc0689bca4c201c6b6) ([#11893](https://github.com/yt-dlp/yt-dlp/issues/11893)) by [bashonly](https://github.com/bashonly)
|
||||
- [Skip iOS formats that require PO Token](https://github.com/yt-dlp/yt-dlp/commit/9f42e68a74f3f00b0253fe70763abd57cac4237b) ([#11890](https://github.com/yt-dlp/yt-dlp/issues/11890)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
|
||||
### 2024.12.13
|
||||
|
||||
#### Extractor changes
|
||||
- **patreon**: campaign: [Support /c/ URLs](https://github.com/yt-dlp/yt-dlp/commit/bc262bcad4d3683ceadf61a7eb87e233e72adef3) ([#11756](https://github.com/yt-dlp/yt-dlp/issues/11756)) by [bashonly](https://github.com/bashonly)
|
||||
- **soundcloud**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/f4d3e9e6dc25077b79849a31a2f67f93fdc01e62) ([#11777](https://github.com/yt-dlp/yt-dlp/issues/11777)) by [bashonly](https://github.com/bashonly)
|
||||
- **youtube**
|
||||
- [Fix `release_date` extraction](https://github.com/yt-dlp/yt-dlp/commit/d5e2a379f2adcb28bc48c7d9e90716d7278f89d2) ([#11759](https://github.com/yt-dlp/yt-dlp/issues/11759)) by [MutantPiggieGolem1](https://github.com/MutantPiggieGolem1)
|
||||
- [Fix signature function extraction for `2f1832d2`](https://github.com/yt-dlp/yt-dlp/commit/5460cd91891bf613a2065e2fc278d9903c37a127) ([#11801](https://github.com/yt-dlp/yt-dlp/issues/11801)) by [bashonly](https://github.com/bashonly)
|
||||
- [Prioritize original language over auto-dubbed audio](https://github.com/yt-dlp/yt-dlp/commit/dc3c4fddcc653989dae71fc563d82a308fc898cc) ([#11803](https://github.com/yt-dlp/yt-dlp/issues/11803)) by [bashonly](https://github.com/bashonly)
|
||||
- search_url: [Fix playlist searches](https://github.com/yt-dlp/yt-dlp/commit/f6c73aad5f1a67544bea137ebd9d1e22e0e56567) ([#11782](https://github.com/yt-dlp/yt-dlp/issues/11782)) by [Crypto90](https://github.com/Crypto90)
|
||||
|
||||
#### Misc. changes
|
||||
- **cleanup**: [Make more playlist entries lazy](https://github.com/yt-dlp/yt-dlp/commit/54216696261bc07cacd9a837c501d9e0b7fed09e) ([#11763](https://github.com/yt-dlp/yt-dlp/issues/11763)) by [seproDev](https://github.com/seproDev)
|
||||
|
||||
### 2024.12.06
|
||||
|
||||
#### Core changes
|
||||
- **cookies**: [Add `--cookies-from-browser` support for MS Store Firefox](https://github.com/yt-dlp/yt-dlp/commit/354cb4026cf2191e1a130ec2a627b95cabfbc60a) ([#11731](https://github.com/yt-dlp/yt-dlp/issues/11731)) by [wesson09](https://github.com/wesson09)
|
||||
|
||||
#### Extractor changes
|
||||
- **bilibili**: [Fix HD formats extraction](https://github.com/yt-dlp/yt-dlp/commit/fca3eb5f8be08d5fab2e18b45b7281a12e566725) ([#11734](https://github.com/yt-dlp/yt-dlp/issues/11734)) by [grqz](https://github.com/grqz)
|
||||
- **soundcloud**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/2feb28028ee48f2185d2d95076e62accb09b9e2e) ([#11742](https://github.com/yt-dlp/yt-dlp/issues/11742)) by [bashonly](https://github.com/bashonly)
|
||||
- **youtube**
|
||||
- [Fix `n` sig extraction for player `3bb1f723`](https://github.com/yt-dlp/yt-dlp/commit/a95ee6d8803fca9157adecf63732ab58bf87fd88) ([#11750](https://github.com/yt-dlp/yt-dlp/issues/11750)) by [bashonly](https://github.com/bashonly) (With fixes in [4bd2655](https://github.com/yt-dlp/yt-dlp/commit/4bd2655398aed450456197a6767639114a24eac2))
|
||||
- [Fix signature function extraction](https://github.com/yt-dlp/yt-dlp/commit/4c85ccd1366c88cf93982f8350f58eed17355981) ([#11751](https://github.com/yt-dlp/yt-dlp/issues/11751)) by [bashonly](https://github.com/bashonly)
|
||||
- [Player client maintenance](https://github.com/yt-dlp/yt-dlp/commit/2e49c789d3eebc39af8910705d65a98bca0e4c4f) ([#11724](https://github.com/yt-dlp/yt-dlp/issues/11724)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
### 2024.12.03
|
||||
|
||||
#### Core changes
|
||||
- [Add `playlist_webpage_url` field](https://github.com/yt-dlp/yt-dlp/commit/7d6c259a03bc4707a319e5e8c6eff0278707874b) ([#11613](https://github.com/yt-dlp/yt-dlp/issues/11613)) by [seproDev](https://github.com/seproDev)
|
||||
|
||||
#### Extractor changes
|
||||
- [Handle fragmented formats in `_remove_duplicate_formats`](https://github.com/yt-dlp/yt-dlp/commit/e0500cbf796323551bbabe5b8ed8c75a511ba47a) ([#11637](https://github.com/yt-dlp/yt-dlp/issues/11637)) by [Grub4K](https://github.com/Grub4K)
|
||||
- **bilibili**
|
||||
- [Always try to extract HD formats](https://github.com/yt-dlp/yt-dlp/commit/dc1687648077c5bf64863b307ecc5ab7e029bd8d) ([#10559](https://github.com/yt-dlp/yt-dlp/issues/10559)) by [grqz](https://github.com/grqz)
|
||||
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/239f5f36fe04603bec59c8b975f6a792f10246db) ([#11667](https://github.com/yt-dlp/yt-dlp/issues/11667)) by [grqz](https://github.com/grqz) (With fixes in [f05a1cd](https://github.com/yt-dlp/yt-dlp/commit/f05a1cd1492fc98dc8d80d2081d632a1879913d2) by [bashonly](https://github.com/bashonly), [grqz](https://github.com/grqz))
|
||||
- [Fix subtitles and chapters extraction](https://github.com/yt-dlp/yt-dlp/commit/a13a336aa6f906812701abec8101b73b73db8ff7) ([#11708](https://github.com/yt-dlp/yt-dlp/issues/11708)) by [xiaomac](https://github.com/xiaomac)
|
||||
- **chaturbate**: [Fix support for non-public streams](https://github.com/yt-dlp/yt-dlp/commit/4b5eec0aaa7c02627f27a386591b735b90e681a8) ([#11624](https://github.com/yt-dlp/yt-dlp/issues/11624)) by [jkruse](https://github.com/jkruse)
|
||||
- **dacast**: [Fix HLS AES formats extraction](https://github.com/yt-dlp/yt-dlp/commit/0a0d80800b9350d1a4c4b18d82cfb77ffbc3c507) ([#11644](https://github.com/yt-dlp/yt-dlp/issues/11644)) by [bashonly](https://github.com/bashonly)
|
||||
- **dropbox**: [Fix password-protected video extraction](https://github.com/yt-dlp/yt-dlp/commit/00dcde728635633eee969ad4d498b9f233c4a94e) ([#11636](https://github.com/yt-dlp/yt-dlp/issues/11636)) by [bashonly](https://github.com/bashonly)
|
||||
- **duoplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/62cba8a1bedbfc0ddde7267ae57b72bf5f7ea7b1) ([#11588](https://github.com/yt-dlp/yt-dlp/issues/11588)) by [bashonly](https://github.com/bashonly), [glensc](https://github.com/glensc)
|
||||
- **facebook**: [Support more groups URLs](https://github.com/yt-dlp/yt-dlp/commit/e0f1ae813b36e783e2348ba2a1566e12f5cd8f6e) ([#11576](https://github.com/yt-dlp/yt-dlp/issues/11576)) by [grqz](https://github.com/grqz)
|
||||
- **instagram**: [Support `share` URLs](https://github.com/yt-dlp/yt-dlp/commit/360aed810ad85db950df586282d256516c98cd2d) ([#11677](https://github.com/yt-dlp/yt-dlp/issues/11677)) by [grqz](https://github.com/grqz)
|
||||
- **microsoftembed**: [Make format extraction non fatal](https://github.com/yt-dlp/yt-dlp/commit/2bea7936323ca4b6f3b9b1fdd892566223e30efa) ([#11654](https://github.com/yt-dlp/yt-dlp/issues/11654)) by [seproDev](https://github.com/seproDev)
|
||||
- **mitele**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/cd0f934604587ed793e9177f6a127e5dcf99a7dd) ([#11683](https://github.com/yt-dlp/yt-dlp/issues/11683)) by [DarkZeros](https://github.com/DarkZeros)
|
||||
- **stripchat**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/16336c51d0848a6868a4fa04e749fa03548b4913) ([#11596](https://github.com/yt-dlp/yt-dlp/issues/11596)) by [gitninja1234](https://github.com/gitninja1234)
|
||||
- **tiktok**: [Deprioritize animated thumbnails](https://github.com/yt-dlp/yt-dlp/commit/910ecc422930bca14e2abe4986f5f92359e3cea8) ([#11645](https://github.com/yt-dlp/yt-dlp/issues/11645)) by [bashonly](https://github.com/bashonly)
|
||||
- **vk**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/c038a7b187ba24360f14134842a7a2cf897c33b1) ([#11715](https://github.com/yt-dlp/yt-dlp/issues/11715)) by [bashonly](https://github.com/bashonly)
|
||||
- **youtube**
|
||||
- [Adjust player clients for site changes](https://github.com/yt-dlp/yt-dlp/commit/0d146c1e36f467af30e87b7af651bdee67b73500) ([#11663](https://github.com/yt-dlp/yt-dlp/issues/11663)) by [bashonly](https://github.com/bashonly)
|
||||
- tab: [Fix playlists tab extraction](https://github.com/yt-dlp/yt-dlp/commit/fe70f20aedf528fdee332131bc9b6710e54e6f10) ([#11615](https://github.com/yt-dlp/yt-dlp/issues/11615)) by [seproDev](https://github.com/seproDev)
|
||||
|
||||
#### Networking changes
|
||||
- **Request Handler**: websockets: [Support websockets 14.0+](https://github.com/yt-dlp/yt-dlp/commit/c7316373c0a886f65a07a51e50ee147bb3294c85) ([#11616](https://github.com/yt-dlp/yt-dlp/issues/11616)) by [coletdjnz](https://github.com/coletdjnz)
|
||||
|
||||
#### Misc. changes
|
||||
- **cleanup**
|
||||
- [Bump ruff to 0.8.x](https://github.com/yt-dlp/yt-dlp/commit/d8fb3490863653182864d2a53522f350d67a9ff8) ([#11608](https://github.com/yt-dlp/yt-dlp/issues/11608)) by [seproDev](https://github.com/seproDev)
|
||||
- Miscellaneous
|
||||
- [ccf0a6b](https://github.com/yt-dlp/yt-dlp/commit/ccf0a6b86b7f68a75463804fe485ec240b8635f0) by [bashonly](https://github.com/bashonly), [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
- [2b67ac3](https://github.com/yt-dlp/yt-dlp/commit/2b67ac300ac8b44368fb121637d1743cea8c5b6b) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||
|
||||
### 2024.11.18
|
||||
|
||||
#### Important changes
|
||||
- **Login with OAuth is no longer supported for YouTube**
|
||||
Due to a change made by the site, yt-dlp is no longer able to support OAuth login for YouTube. [Read more](https://github.com/yt-dlp/yt-dlp/issues/11462#issuecomment-2471703090)
|
||||
|
||||
#### Core changes
|
||||
- [Catch broken Cryptodome installations](https://github.com/yt-dlp/yt-dlp/commit/b83ca24eb72e1e558b0185bd73975586c0bc0546) ([#11486](https://github.com/yt-dlp/yt-dlp/issues/11486)) by [seproDev](https://github.com/seproDev)
|
||||
- **utils**
|
||||
- [Fix `join_nonempty`, add `**kwargs` to `unpack`](https://github.com/yt-dlp/yt-dlp/commit/39d79c9b9cf23411d935910685c40aa1a2fdb409) ([#11559](https://github.com/yt-dlp/yt-dlp/issues/11559)) by [Grub4K](https://github.com/Grub4K)
|
||||
- `subs_list_to_dict`: [Add `lang` default parameter](https://github.com/yt-dlp/yt-dlp/commit/c014fbcddcb4c8f79d914ac5bb526758b540ea33) ([#11508](https://github.com/yt-dlp/yt-dlp/issues/11508)) by [Grub4K](https://github.com/Grub4K)
|
||||
|
||||
#### Extractor changes
|
||||
- [Allow `ext` override for thumbnails](https://github.com/yt-dlp/yt-dlp/commit/eb64ae7d5def6df2aba74fb703e7f168fb299865) ([#11545](https://github.com/yt-dlp/yt-dlp/issues/11545)) by [bashonly](https://github.com/bashonly)
|
||||
- **adobepass**: [Fix provider requests](https://github.com/yt-dlp/yt-dlp/commit/85fdc66b6e01d19a94b4f39b58e3c0cf23600902) ([#11472](https://github.com/yt-dlp/yt-dlp/issues/11472)) by [bashonly](https://github.com/bashonly)
|
||||
- **archive.org**: [Fix comments extraction](https://github.com/yt-dlp/yt-dlp/commit/f2a4983df7a64c4e93b56f79dbd16a781bd90206) ([#11527](https://github.com/yt-dlp/yt-dlp/issues/11527)) by [jshumphrey](https://github.com/jshumphrey)
|
||||
- **bandlab**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/6365e92589e4bc17b8fffb0125a716d144ad2137) ([#11535](https://github.com/yt-dlp/yt-dlp/issues/11535)) by [seproDev](https://github.com/seproDev)
|
||||
- **chaturbate**
|
||||
- [Extract from API and support impersonation](https://github.com/yt-dlp/yt-dlp/commit/720b3dc453c342bc2e8df7dbc0acaab4479de46c) ([#11555](https://github.com/yt-dlp/yt-dlp/issues/11555)) by [powergold1](https://github.com/powergold1) (With fixes in [7cecd29](https://github.com/yt-dlp/yt-dlp/commit/7cecd299e4a5ef1f0f044b2fedc26f17e41f15e3) by [seproDev](https://github.com/seproDev))
|
||||
- [Support alternate domains](https://github.com/yt-dlp/yt-dlp/commit/a9f85670d03ab993dc589f21a9ffffcad61392d5) ([#10595](https://github.com/yt-dlp/yt-dlp/issues/10595)) by [manavchaudhary1](https://github.com/manavchaudhary1)
|
||||
- **cloudflarestream**: [Avoid extraction via videodelivery.net](https://github.com/yt-dlp/yt-dlp/commit/2db8c2e7d57a1784b06057c48e3e91023720d195) ([#11478](https://github.com/yt-dlp/yt-dlp/issues/11478)) by [hugovdev](https://github.com/hugovdev)
|
||||
- **ctvnews**
|
||||
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f351440f1dc5b3dfbfc5737b037a869d946056fe) ([#11534](https://github.com/yt-dlp/yt-dlp/issues/11534)) by [bashonly](https://github.com/bashonly), [jshumphrey](https://github.com/jshumphrey)
|
||||
- [Fix playlist ID extraction](https://github.com/yt-dlp/yt-dlp/commit/f9d98509a898737c12977b2e2117277bada2c196) ([#8892](https://github.com/yt-dlp/yt-dlp/issues/8892)) by [qbnu](https://github.com/qbnu)
|
||||
- **digitalconcerthall**: [Support login with access/refresh tokens](https://github.com/yt-dlp/yt-dlp/commit/f7257588bdff5f0b0452635a66b253a783c97357) ([#11571](https://github.com/yt-dlp/yt-dlp/issues/11571)) by [bashonly](https://github.com/bashonly)
|
||||
- **facebook**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/bacc31b05a04181b63100c481565256b14813a5e) ([#11513](https://github.com/yt-dlp/yt-dlp/issues/11513)) by [bashonly](https://github.com/bashonly)
|
||||
- **gamedevtv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/be3579aaf0c3b71a0a3195e1955415d5e4d6b3d8) ([#11368](https://github.com/yt-dlp/yt-dlp/issues/11368)) by [bashonly](https://github.com/bashonly), [stratus-ss](https://github.com/stratus-ss)
|
||||
- **goplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6b43a8d84b881d769b480ba6e20ec691e9d1b92d) ([#11466](https://github.com/yt-dlp/yt-dlp/issues/11466)) by [bashonly](https://github.com/bashonly), [SamDecrock](https://github.com/SamDecrock)
|
||||
- **kenh14**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/eb15fd5a32d8b35ef515f7a3d1158c03025648ff) ([#3996](https://github.com/yt-dlp/yt-dlp/issues/3996)) by [krichbanana](https://github.com/krichbanana), [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
- **litv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/e079ffbda66de150c0a9ebef05e89f61bb4d5f76) ([#11071](https://github.com/yt-dlp/yt-dlp/issues/11071)) by [jiru](https://github.com/jiru)
|
||||
- **mixchmovie**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/0ec9bfed4d4a52bfb4f8733da1acf0aeeae21e6b) ([#10897](https://github.com/yt-dlp/yt-dlp/issues/10897)) by [Sakura286](https://github.com/Sakura286)
|
||||
- **patreon**: [Fix comments extraction](https://github.com/yt-dlp/yt-dlp/commit/1d253b0a27110d174c40faf8fb1c999d099e0cde) ([#11530](https://github.com/yt-dlp/yt-dlp/issues/11530)) by [bashonly](https://github.com/bashonly), [jshumphrey](https://github.com/jshumphrey)
|
||||
- **pialive**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/d867f99622ef7fba690b08da56c39d739b822bb7) ([#10811](https://github.com/yt-dlp/yt-dlp/issues/10811)) by [ChocoLZS](https://github.com/ChocoLZS)
|
||||
- **radioradicale**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/70c55cb08f780eab687e881ef42bb5c6007d290b) ([#5607](https://github.com/yt-dlp/yt-dlp/issues/5607)) by [a13ssandr0](https://github.com/a13ssandr0), [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||
- **reddit**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/7ea2787920cccc6b8ea30791993d114fbd564434) ([#11573](https://github.com/yt-dlp/yt-dlp/issues/11573)) by [bashonly](https://github.com/bashonly)
|
||||
- **redgifsuser**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/d215fba7edb69d4fa665f43663756fd260b1489f) ([#11531](https://github.com/yt-dlp/yt-dlp/issues/11531)) by [jshumphrey](https://github.com/jshumphrey)
|
||||
- **rutube**: [Rework extractors](https://github.com/yt-dlp/yt-dlp/commit/e398217aae19bb25f91797bfbe8a3243698d7f45) ([#11480](https://github.com/yt-dlp/yt-dlp/issues/11480)) by [seproDev](https://github.com/seproDev)
|
||||
- **sonylivseries**: [Add `sort_order` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/2009cb27e17014787bf63eaa2ada51293d54f22a) ([#11569](https://github.com/yt-dlp/yt-dlp/issues/11569)) by [bashonly](https://github.com/bashonly)
|
||||
- **soop**: [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/c699bafc5038b59c9afe8c2e69175fb66424c832) ([#11545](https://github.com/yt-dlp/yt-dlp/issues/11545)) by [bashonly](https://github.com/bashonly)
|
||||
- **spankbang**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/8388ec256f7753b02488788e3cfa771f6e1db247) ([#11542](https://github.com/yt-dlp/yt-dlp/issues/11542)) by [jshumphrey](https://github.com/jshumphrey)
|
||||
- **spreaker**
|
||||
- [Support episode pages and access keys](https://github.com/yt-dlp/yt-dlp/commit/c39016f66df76d14284c705736ca73db8055d8de) ([#11489](https://github.com/yt-dlp/yt-dlp/issues/11489)) by [julionc](https://github.com/julionc)
|
||||
- [Support podcast and feed pages](https://github.com/yt-dlp/yt-dlp/commit/c6737310619022248f5d0fd13872073cac168453) ([#10968](https://github.com/yt-dlp/yt-dlp/issues/10968)) by [subrat-lima](https://github.com/subrat-lima)
|
||||
- **youtube**
|
||||
- [Player client maintenance](https://github.com/yt-dlp/yt-dlp/commit/637d62a3a9fc723d68632c1af25c30acdadeeb85) ([#11528](https://github.com/yt-dlp/yt-dlp/issues/11528)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
|
||||
- [Remove broken OAuth support](https://github.com/yt-dlp/yt-dlp/commit/52c0ffe40ad6e8404d93296f575007b05b04c686) ([#11558](https://github.com/yt-dlp/yt-dlp/issues/11558)) by [bashonly](https://github.com/bashonly)
|
||||
- tab: [Fix podcasts tab extraction](https://github.com/yt-dlp/yt-dlp/commit/37cd7660eaff397c551ee18d80507702342b0c2b) ([#11567](https://github.com/yt-dlp/yt-dlp/issues/11567)) by [seproDev](https://github.com/seproDev)
|
||||
|
||||
#### Misc. changes
|
||||
- **build**
|
||||
- [Bump PyInstaller version pin to `>=6.11.1`](https://github.com/yt-dlp/yt-dlp/commit/f9c8deb4e5887ff5150e911ac0452e645f988044) ([#11507](https://github.com/yt-dlp/yt-dlp/issues/11507)) by [bashonly](https://github.com/bashonly)
|
||||
- [Enable attestations for trusted publishing](https://github.com/yt-dlp/yt-dlp/commit/f13df591d4d7ca8e2f31b35c9c91e69ba9e9b013) ([#11420](https://github.com/yt-dlp/yt-dlp/issues/11420)) by [bashonly](https://github.com/bashonly)
|
||||
- [Pin `websockets` version to >=13.0,<14](https://github.com/yt-dlp/yt-dlp/commit/240a7d43c8a67ffb86d44dc276805aa43c358dcc) ([#11488](https://github.com/yt-dlp/yt-dlp/issues/11488)) by [bashonly](https://github.com/bashonly)
|
||||
- **cleanup**
|
||||
- [Deprecate more compat functions](https://github.com/yt-dlp/yt-dlp/commit/f95a92b3d0169a784ee15a138fbe09d82b2754a1) ([#11439](https://github.com/yt-dlp/yt-dlp/issues/11439)) by [seproDev](https://github.com/seproDev)
|
||||
- [Remove dead extractors](https://github.com/yt-dlp/yt-dlp/commit/10fc719bc7f1eef469389c5219102266ef411f29) ([#11566](https://github.com/yt-dlp/yt-dlp/issues/11566)) by [doe1080](https://github.com/doe1080)
|
||||
- Miscellaneous: [da252d9](https://github.com/yt-dlp/yt-dlp/commit/da252d9d322af3e2178ac5eae324809502a0a862) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev)
|
||||
|
||||
### 2024.11.04
|
||||
|
||||
#### Important changes
|
||||
|
21
README.md
21
README.md
@ -342,8 +342,9 @@ ## General Options:
|
||||
extractor plugins; postprocessor plugins can
|
||||
only be loaded from the default plugin
|
||||
directories
|
||||
--flat-playlist Do not extract the videos of a playlist,
|
||||
only list them
|
||||
--flat-playlist Do not extract a playlist's URL result
|
||||
entries; some entry metadata may be missing
|
||||
and downloading may be bypassed
|
||||
--no-flat-playlist Fully extract the videos of a playlist
|
||||
(default)
|
||||
--live-from-start Download livestreams from the start.
|
||||
@ -612,8 +613,7 @@ ## Filesystem Options:
|
||||
--no-restrict-filenames Allow Unicode characters, "&" and spaces in
|
||||
filenames (default)
|
||||
--windows-filenames Force filenames to be Windows-compatible
|
||||
--no-windows-filenames Make filenames Windows-compatible only if
|
||||
using Windows (default)
|
||||
--no-windows-filenames Sanitize filenames only minimally
|
||||
--trim-filenames LENGTH Limit the filename length (excluding
|
||||
extension) to the specified number of
|
||||
characters
|
||||
@ -1293,6 +1293,7 @@ # OUTPUT TEMPLATE
|
||||
- `playlist_uploader_id` (string): Nickname or id of the playlist uploader
|
||||
- `playlist_channel` (string): Display name of the channel that uploaded the playlist
|
||||
- `playlist_channel_id` (string): Identifier of the channel that uploaded the playlist
|
||||
- `playlist_webpage_url` (string): URL of the playlist webpage
|
||||
- `webpage_url` (string): A URL to the video webpage which, if given to yt-dlp, should yield the same result again
|
||||
- `webpage_url_basename` (string): The basename of the webpage URL
|
||||
- `webpage_url_domain` (string): The domain of the webpage URL
|
||||
@ -1759,7 +1760,7 @@ # Replace all spaces and "_" in title and uploader with a `-`
|
||||
|
||||
# EXTRACTOR ARGUMENTS
|
||||
|
||||
Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=mediaconnect,web;formats=incomplete" --extractor-args "funimation:version=uncut"`
|
||||
Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=tv,mweb;formats=incomplete" --extractor-args "funimation:version=uncut"`
|
||||
|
||||
Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"`
|
||||
|
||||
@ -1768,13 +1769,13 @@ # EXTRACTOR ARGUMENTS
|
||||
#### youtube
|
||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `mediaconnect`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `web_creator,mediaconnect` is added as needed for age-gated videos when account age verification is required. Similarly, the `_music` variants are added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web`
|
||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `_music` variants may be added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
|
||||
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
|
||||
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
|
||||
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8), `missing_pot` (include formats that require a PO Token but are missing one)
|
||||
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
||||
* `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used
|
||||
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
|
||||
@ -1858,7 +1859,7 @@ #### afreecatvlive
|
||||
* `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web`
|
||||
|
||||
#### soundcloud
|
||||
* `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{extension}` (omitting the bitrate), e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can be passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known extensions include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3`
|
||||
* `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{codec}`, e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can be passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known codecs include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3`
|
||||
|
||||
#### orfon (orf:on)
|
||||
* `prefer_segments_playlist`: Prefer a playlist of program segments instead of a single complete video when available. If individual segments are desired, use `--concat-playlist never --extractor-args "orfon:prefer_segments_playlist"`
|
||||
@ -1866,8 +1867,8 @@ #### orfon (orf:on)
|
||||
#### bilibili
|
||||
* `prefer_multi_flv`: Prefer extracting flv formats over mp4 for older videos that still provide legacy formats
|
||||
|
||||
#### digitalconcerthall
|
||||
* `prefer_combined_hls`: Prefer extracting combined/pre-merged video and audio HLS formats. This will exclude 4K/HEVC video and lossless/FLAC audio formats, which are only available as split video/audio HLS formats
|
||||
#### sonylivseries
|
||||
* `sort_order`: Episode sort order for series extraction - one of `asc` (ascending, oldest first) or `desc` (descending, newest first). Default is `asc`
|
||||
|
||||
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
||||
|
||||
|
@ -234,5 +234,10 @@
|
||||
"when": "57212a5f97ce367590aaa5c3e9a135eead8f81f7",
|
||||
"short": "[ie/vimeo] Fix API retries (#11351)",
|
||||
"authors": ["bashonly"]
|
||||
},
|
||||
{
|
||||
"action": "add",
|
||||
"when": "52c0ffe40ad6e8404d93296f575007b05b04c686",
|
||||
"short": "[priority] **Login with OAuth is no longer supported for YouTube**\nDue to a change made by the site, yt-dlp is no longer able to support OAuth login for YouTube. [Read more](https://github.com/yt-dlp/yt-dlp/issues/11462#issuecomment-2471703090)"
|
||||
}
|
||||
]
|
||||
|
@ -11,13 +11,12 @@
|
||||
import subprocess
|
||||
|
||||
from yt_dlp.aes import aes_encrypt, key_expansion
|
||||
from yt_dlp.utils import intlist_to_bytes
|
||||
|
||||
secret_msg = b'Secret message goes here'
|
||||
|
||||
|
||||
def hex_str(int_list):
|
||||
return codecs.encode(intlist_to_bytes(int_list), 'hex')
|
||||
return codecs.encode(bytes(int_list), 'hex')
|
||||
|
||||
|
||||
def openssl_encode(algo, key, iv):
|
||||
|
@ -76,14 +76,14 @@ dev = [
|
||||
]
|
||||
static-analysis = [
|
||||
"autopep8~=2.0",
|
||||
"ruff~=0.7.0",
|
||||
"ruff~=0.9.0",
|
||||
]
|
||||
test = [
|
||||
"pytest~=8.1",
|
||||
"pytest-rerunfailures~=14.0",
|
||||
]
|
||||
pyinstaller = [
|
||||
"pyinstaller>=6.10.0", # Windows temp cleanup fixed in 6.10.0
|
||||
"pyinstaller>=6.11.1", # Windows temp cleanup fixed in 6.11.1
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
@ -186,6 +186,7 @@ ignore = [
|
||||
"E501", # line-too-long
|
||||
"E731", # lambda-assignment
|
||||
"E741", # ambiguous-variable-name
|
||||
"UP031", # printf-string-formatting
|
||||
"UP036", # outdated-version-block
|
||||
"B006", # mutable-argument-default
|
||||
"B008", # function-call-in-default-argument
|
||||
@ -194,6 +195,7 @@ ignore = [
|
||||
"B023", # function-uses-loop-variable (false positives)
|
||||
"B028", # no-explicit-stacklevel
|
||||
"B904", # raise-without-from-inside-except
|
||||
"A005", # stdlib-module-shadowing
|
||||
"C401", # unnecessary-generator-set
|
||||
"C402", # unnecessary-generator-dict
|
||||
"PIE790", # unnecessary-placeholder
|
||||
@ -258,9 +260,6 @@ select = [
|
||||
"A002", # builtin-argument-shadowing
|
||||
"C408", # unnecessary-collection-call
|
||||
]
|
||||
"yt_dlp/jsinterp.py" = [
|
||||
"UP031", # printf-string-formatting
|
||||
]
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
known-first-party = [
|
||||
@ -313,6 +312,16 @@ banned-from = [
|
||||
"yt_dlp.compat.compat_urllib_parse_urlparse".msg = "Use `urllib.parse.urlparse` instead."
|
||||
"yt_dlp.compat.compat_shlex_quote".msg = "Use `yt_dlp.utils.shell_quote` instead."
|
||||
"yt_dlp.utils.error_to_compat_str".msg = "Use `str` instead."
|
||||
"yt_dlp.utils.bytes_to_intlist".msg = "Use `list` instead."
|
||||
"yt_dlp.utils.intlist_to_bytes".msg = "Use `bytes` instead."
|
||||
"yt_dlp.utils.decodeArgument".msg = "Do not use"
|
||||
"yt_dlp.utils.decodeFilename".msg = "Do not use"
|
||||
"yt_dlp.utils.encodeFilename".msg = "Do not use"
|
||||
"yt_dlp.compat.compat_os_name".msg = "Use `os.name` instead."
|
||||
"yt_dlp.compat.compat_realpath".msg = "Use `os.path.realpath` instead."
|
||||
"yt_dlp.compat.functools".msg = "Use `functools` instead."
|
||||
"yt_dlp.utils.decodeOption".msg = "Do not use"
|
||||
"yt_dlp.utils.compiled_regex_type".msg = "Use `re.Pattern` instead."
|
||||
|
||||
[tool.autopep8]
|
||||
max_line_length = 120
|
||||
|
@ -129,6 +129,8 @@ # Supported sites
|
||||
- **Bandcamp:album**
|
||||
- **Bandcamp:user**
|
||||
- **Bandcamp:weekly**
|
||||
- **Bandlab**
|
||||
- **BandlabPlaylist**
|
||||
- **BannedVideo**
|
||||
- **bbc**: [*bbc*](## "netrc machine") BBC
|
||||
- **bbc.co.uk**: [*bbc*](## "netrc machine") BBC iPlayer
|
||||
@ -372,6 +374,7 @@ # Supported sites
|
||||
- **Dropbox**
|
||||
- **Dropout**: [*dropout*](## "netrc machine")
|
||||
- **DropoutSeason**
|
||||
- **DrTalks**
|
||||
- **DrTuber**
|
||||
- **drtv**
|
||||
- **drtv:live**
|
||||
@ -484,6 +487,7 @@ # Supported sites
|
||||
- **Gab**
|
||||
- **GabTV**
|
||||
- **Gaia**: [*gaia*](## "netrc machine")
|
||||
- **GameDevTVDashboard**: [*gamedevtv*](## "netrc machine")
|
||||
- **GameJolt**
|
||||
- **GameJoltCommunity**
|
||||
- **GameJoltGame**
|
||||
@ -651,6 +655,8 @@ # Supported sites
|
||||
- **Karaoketv**
|
||||
- **Katsomo**: (**Currently broken**)
|
||||
- **KelbyOne**: (**Currently broken**)
|
||||
- **Kenh14Playlist**
|
||||
- **Kenh14Video**
|
||||
- **Ketnet**
|
||||
- **khanacademy**
|
||||
- **khanacademy:unit**
|
||||
@ -784,10 +790,6 @@ # Supported sites
|
||||
- **MicrosoftLearnSession**
|
||||
- **MicrosoftMedius**
|
||||
- **microsoftstream**: Microsoft Stream
|
||||
- **mildom**: Record ongoing live by specific user in Mildom
|
||||
- **mildom:clip**: Clip in Mildom
|
||||
- **mildom:user:vod**: Download all VODs from specific user in Mildom
|
||||
- **mildom:vod**: VOD in Mildom
|
||||
- **minds**
|
||||
- **minds:channel**
|
||||
- **minds:group**
|
||||
@ -798,6 +800,7 @@ # Supported sites
|
||||
- **MiTele**: mitele.es
|
||||
- **mixch**
|
||||
- **mixch:archive**
|
||||
- **mixch:movie**
|
||||
- **mixcloud**
|
||||
- **mixcloud:playlist**
|
||||
- **mixcloud:user**
|
||||
@ -1060,8 +1063,8 @@ # Supported sites
|
||||
- **PhilharmonieDeParis**: Philharmonie de Paris
|
||||
- **phoenix.de**
|
||||
- **Photobucket**
|
||||
- **PiaLive**
|
||||
- **Piapro**: [*piapro*](## "netrc machine")
|
||||
- **PIAULIZAPortal**: ulizaportal.jp - PIA LIVE STREAM
|
||||
- **Picarto**
|
||||
- **PicartoVod**
|
||||
- **Piksel**
|
||||
@ -1084,12 +1087,11 @@ # Supported sites
|
||||
- **pluralsight**: [*pluralsight*](## "netrc machine")
|
||||
- **pluralsight:course**
|
||||
- **PlutoTV**: (**Currently broken**)
|
||||
- **PlVideo**: Платформа
|
||||
- **PodbayFM**
|
||||
- **PodbayFMChannel**
|
||||
- **Podchaser**
|
||||
- **podomatic**: (**Currently broken**)
|
||||
- **Pokemon**
|
||||
- **PokemonWatch**
|
||||
- **PokerGo**: [*pokergo*](## "netrc machine")
|
||||
- **PokerGoCollection**: [*pokergo*](## "netrc machine")
|
||||
- **PolsatGo**
|
||||
@ -1160,6 +1162,7 @@ # Supported sites
|
||||
- **RadioJavan**: (**Currently broken**)
|
||||
- **radiokapital**
|
||||
- **radiokapital:show**
|
||||
- **RadioRadicale**
|
||||
- **RadioZetPodcast**
|
||||
- **radlive**
|
||||
- **radlive:channel**
|
||||
@ -1367,9 +1370,7 @@ # Supported sites
|
||||
- **spotify**: Spotify episodes (**Currently broken**)
|
||||
- **spotify:show**: Spotify shows (**Currently broken**)
|
||||
- **Spreaker**
|
||||
- **SpreakerPage**
|
||||
- **SpreakerShow**
|
||||
- **SpreakerShowPage**
|
||||
- **SpringboardPlatform**
|
||||
- **Sprout**
|
||||
- **SproutVideo**
|
||||
@ -1570,6 +1571,8 @@ # Supported sites
|
||||
- **UFCTV**: [*ufctv*](## "netrc machine")
|
||||
- **ukcolumn**: (**Currently broken**)
|
||||
- **UKTVPlay**
|
||||
- **UlizaPlayer**
|
||||
- **UlizaPortal**: ulizaportal.jp
|
||||
- **umg:de**: Universal Music Deutschland (**Currently broken**)
|
||||
- **Unistra**
|
||||
- **Unity**: (**Currently broken**)
|
||||
@ -1587,8 +1590,6 @@ # Supported sites
|
||||
- **Varzesh3**: (**Currently broken**)
|
||||
- **Vbox7**
|
||||
- **Veo**
|
||||
- **Veoh**
|
||||
- **veoh:user**
|
||||
- **Vesti**: Вести.Ru (**Currently broken**)
|
||||
- **Vevo**
|
||||
- **VevoPlaylist**
|
||||
@ -1642,8 +1643,6 @@ # Supported sites
|
||||
- **Vimm:stream**
|
||||
- **ViMP**
|
||||
- **ViMP:Playlist**
|
||||
- **Vine**
|
||||
- **vine:user**
|
||||
- **Viously**
|
||||
- **Viqeo**: (**Currently broken**)
|
||||
- **Viu**
|
||||
|
@ -9,7 +9,6 @@
|
||||
|
||||
import yt_dlp.extractor
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.compat import compat_os_name
|
||||
from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port
|
||||
|
||||
if 'pytest' in sys.modules:
|
||||
@ -49,7 +48,7 @@ def report_warning(message, *args, **kwargs):
|
||||
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||
If stderr is a tty file the 'WARNING:' will be colored
|
||||
"""
|
||||
if sys.stderr.isatty() and compat_os_name != 'nt':
|
||||
if sys.stderr.isatty() and os.name != 'nt':
|
||||
_msg_header = '\033[0;33mWARNING:\033[0m'
|
||||
else:
|
||||
_msg_header = 'WARNING:'
|
||||
|
@ -15,7 +15,6 @@
|
||||
|
||||
from test.helper import FakeYDL, assertRegexpMatches, try_rm
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.compat import compat_os_name
|
||||
from yt_dlp.extractor import YoutubeIE
|
||||
from yt_dlp.extractor.common import InfoExtractor
|
||||
from yt_dlp.postprocessor.common import PostProcessor
|
||||
@ -762,6 +761,13 @@ def test(tmpl, expected, *, info=None, **params):
|
||||
test('%(width)06d.%%(ext)s', 'NA.%(ext)s')
|
||||
test('%%(width)06d.%(ext)s', '%(width)06d.mp4')
|
||||
|
||||
# Sanitization options
|
||||
test('%(title3)s', (None, 'foo⧸bar⧹test'))
|
||||
test('%(title5)s', (None, 'aei_A'), restrictfilenames=True)
|
||||
test('%(title3)s', (None, 'foo_bar_test'), windowsfilenames=False, restrictfilenames=True)
|
||||
if sys.platform != 'win32':
|
||||
test('%(title3)s', (None, 'foo⧸bar\\test'), windowsfilenames=False)
|
||||
|
||||
# ID sanitization
|
||||
test('%(id)s', '_abcd', info={'id': '_abcd'})
|
||||
test('%(some_id)s', '_abcd', info={'some_id': '_abcd'})
|
||||
@ -839,8 +845,8 @@ def expect_same_infodict(out):
|
||||
test('%(filesize)#D', '1Ki')
|
||||
test('%(height)5.2D', ' 1.08k')
|
||||
test('%(title4)#S', 'foo_bar_test')
|
||||
test('%(title4).10S', ('foo "bar" ', 'foo "bar"' + ('#' if compat_os_name == 'nt' else ' ')))
|
||||
if compat_os_name == 'nt':
|
||||
test('%(title4).10S', ('foo "bar" ', 'foo "bar"' + ('#' if os.name == 'nt' else ' ')))
|
||||
if os.name == 'nt':
|
||||
test('%(title4)q', ('"foo ""bar"" test"', None))
|
||||
test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None))
|
||||
test('%(formats.0.id)#q', ('"id 1"', None))
|
||||
@ -903,9 +909,9 @@ def gen():
|
||||
|
||||
# Environment variable expansion for prepare_filename
|
||||
os.environ['__yt_dlp_var'] = 'expanded'
|
||||
envvar = '%__yt_dlp_var%' if compat_os_name == 'nt' else '$__yt_dlp_var'
|
||||
envvar = '%__yt_dlp_var%' if os.name == 'nt' else '$__yt_dlp_var'
|
||||
test(envvar, (envvar, 'expanded'))
|
||||
if compat_os_name == 'nt':
|
||||
if os.name == 'nt':
|
||||
test('%s%', ('%s%', '%s%'))
|
||||
os.environ['s'] = 'expanded'
|
||||
test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s
|
||||
|
@ -27,7 +27,6 @@
|
||||
pad_block,
|
||||
)
|
||||
from yt_dlp.dependencies import Cryptodome
|
||||
from yt_dlp.utils import bytes_to_intlist, intlist_to_bytes
|
||||
|
||||
# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
|
||||
|
||||
@ -40,33 +39,33 @@ def setUp(self):
|
||||
def test_encrypt(self):
|
||||
msg = b'message'
|
||||
key = list(range(16))
|
||||
encrypted = aes_encrypt(bytes_to_intlist(msg), key)
|
||||
decrypted = intlist_to_bytes(aes_decrypt(encrypted, key))
|
||||
encrypted = aes_encrypt(list(msg), key)
|
||||
decrypted = bytes(aes_decrypt(encrypted, key))
|
||||
self.assertEqual(decrypted, msg)
|
||||
|
||||
def test_cbc_decrypt(self):
|
||||
data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd'
|
||||
decrypted = intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(data), self.key, self.iv))
|
||||
decrypted = bytes(aes_cbc_decrypt(list(data), self.key, self.iv))
|
||||
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
|
||||
if Cryptodome.AES:
|
||||
decrypted = aes_cbc_decrypt_bytes(data, intlist_to_bytes(self.key), intlist_to_bytes(self.iv))
|
||||
decrypted = aes_cbc_decrypt_bytes(data, bytes(self.key), bytes(self.iv))
|
||||
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
|
||||
|
||||
def test_cbc_encrypt(self):
|
||||
data = bytes_to_intlist(self.secret_msg)
|
||||
encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv))
|
||||
data = list(self.secret_msg)
|
||||
encrypted = bytes(aes_cbc_encrypt(data, self.key, self.iv))
|
||||
self.assertEqual(
|
||||
encrypted,
|
||||
b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd')
|
||||
|
||||
def test_ctr_decrypt(self):
|
||||
data = bytes_to_intlist(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08')
|
||||
decrypted = intlist_to_bytes(aes_ctr_decrypt(data, self.key, self.iv))
|
||||
data = list(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08')
|
||||
decrypted = bytes(aes_ctr_decrypt(data, self.key, self.iv))
|
||||
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
|
||||
|
||||
def test_ctr_encrypt(self):
|
||||
data = bytes_to_intlist(self.secret_msg)
|
||||
encrypted = intlist_to_bytes(aes_ctr_encrypt(data, self.key, self.iv))
|
||||
data = list(self.secret_msg)
|
||||
encrypted = bytes(aes_ctr_encrypt(data, self.key, self.iv))
|
||||
self.assertEqual(
|
||||
encrypted,
|
||||
b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08')
|
||||
@ -75,19 +74,19 @@ def test_gcm_decrypt(self):
|
||||
data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f.\x08\xb4T\xe4/\x17\xbd'
|
||||
authentication_tag = b'\xe8&I\x80rI\x07\x9d}YWuU@:e'
|
||||
|
||||
decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify(
|
||||
bytes_to_intlist(data), self.key, bytes_to_intlist(authentication_tag), self.iv[:12]))
|
||||
decrypted = bytes(aes_gcm_decrypt_and_verify(
|
||||
list(data), self.key, list(authentication_tag), self.iv[:12]))
|
||||
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
|
||||
if Cryptodome.AES:
|
||||
decrypted = aes_gcm_decrypt_and_verify_bytes(
|
||||
data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12]))
|
||||
data, bytes(self.key), authentication_tag, bytes(self.iv[:12]))
|
||||
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
|
||||
|
||||
def test_gcm_aligned_decrypt(self):
|
||||
data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f'
|
||||
authentication_tag = b'\x08\xb1\x9d!&\x98\xd0\xeaRq\x90\xe6;\xb5]\xd8'
|
||||
|
||||
decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify(
|
||||
decrypted = bytes(aes_gcm_decrypt_and_verify(
|
||||
list(data), self.key, list(authentication_tag), self.iv[:12]))
|
||||
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg[:16])
|
||||
if Cryptodome.AES:
|
||||
@ -96,38 +95,38 @@ def test_gcm_aligned_decrypt(self):
|
||||
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg[:16])
|
||||
|
||||
def test_decrypt_text(self):
|
||||
password = intlist_to_bytes(self.key).decode()
|
||||
password = bytes(self.key).decode()
|
||||
encrypted = base64.b64encode(
|
||||
intlist_to_bytes(self.iv[:8])
|
||||
bytes(self.iv[:8])
|
||||
+ b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae',
|
||||
).decode()
|
||||
decrypted = (aes_decrypt_text(encrypted, password, 16))
|
||||
self.assertEqual(decrypted, self.secret_msg)
|
||||
|
||||
password = intlist_to_bytes(self.key).decode()
|
||||
password = bytes(self.key).decode()
|
||||
encrypted = base64.b64encode(
|
||||
intlist_to_bytes(self.iv[:8])
|
||||
bytes(self.iv[:8])
|
||||
+ b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83',
|
||||
).decode()
|
||||
decrypted = (aes_decrypt_text(encrypted, password, 32))
|
||||
self.assertEqual(decrypted, self.secret_msg)
|
||||
|
||||
def test_ecb_encrypt(self):
|
||||
data = bytes_to_intlist(self.secret_msg)
|
||||
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key))
|
||||
data = list(self.secret_msg)
|
||||
encrypted = bytes(aes_ecb_encrypt(data, self.key))
|
||||
self.assertEqual(
|
||||
encrypted,
|
||||
b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
|
||||
|
||||
def test_ecb_decrypt(self):
|
||||
data = bytes_to_intlist(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
|
||||
decrypted = intlist_to_bytes(aes_ecb_decrypt(data, self.key, self.iv))
|
||||
data = list(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
|
||||
decrypted = bytes(aes_ecb_decrypt(data, self.key, self.iv))
|
||||
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
|
||||
|
||||
def test_key_expansion(self):
|
||||
key = '4f6bdaa39e2f8cb07f5e722d9edef314'
|
||||
|
||||
self.assertEqual(key_expansion(bytes_to_intlist(bytearray.fromhex(key))), [
|
||||
self.assertEqual(key_expansion(list(bytearray.fromhex(key))), [
|
||||
0x4F, 0x6B, 0xDA, 0xA3, 0x9E, 0x2F, 0x8C, 0xB0, 0x7F, 0x5E, 0x72, 0x2D, 0x9E, 0xDE, 0xF3, 0x14,
|
||||
0x53, 0x66, 0x20, 0xA8, 0xCD, 0x49, 0xAC, 0x18, 0xB2, 0x17, 0xDE, 0x35, 0x2C, 0xC9, 0x2D, 0x21,
|
||||
0x8C, 0xBE, 0xDD, 0xD9, 0x41, 0xF7, 0x71, 0xC1, 0xF3, 0xE0, 0xAF, 0xF4, 0xDF, 0x29, 0x82, 0xD5,
|
||||
|
@ -12,12 +12,7 @@
|
||||
|
||||
from yt_dlp import compat
|
||||
from yt_dlp.compat import urllib # isort: split
|
||||
from yt_dlp.compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
compat_urllib_parse_unquote, # noqa: TID251
|
||||
compat_urllib_parse_urlencode, # noqa: TID251
|
||||
)
|
||||
from yt_dlp.compat import compat_etree_fromstring, compat_expanduser
|
||||
from yt_dlp.compat.urllib.request import getproxies
|
||||
|
||||
|
||||
@ -43,39 +38,6 @@ def test_compat_expanduser(self):
|
||||
finally:
|
||||
os.environ['HOME'] = old_home or ''
|
||||
|
||||
def test_compat_urllib_parse_unquote(self):
|
||||
self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def')
|
||||
self.assertEqual(compat_urllib_parse_unquote(''), '')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%'), '%')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%%'), '%%')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%2F'), '/')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%2f'), '/')
|
||||
self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波')
|
||||
self.assertEqual(
|
||||
compat_urllib_parse_unquote('''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%25%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
|
||||
%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a'''),
|
||||
'''<meta property="og:description" content="▁▂▃▄%▅▆▇█" />
|
||||
%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''')
|
||||
self.assertEqual(
|
||||
compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''),
|
||||
'''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''')
|
||||
|
||||
def test_compat_urllib_parse_unquote_plus(self):
|
||||
self.assertEqual(urllib.parse.unquote_plus('abc%20def'), 'abc def')
|
||||
self.assertEqual(urllib.parse.unquote_plus('%7e/abc+def'), '~/abc def')
|
||||
|
||||
def test_compat_urllib_parse_urlencode(self):
|
||||
self.assertEqual(compat_urllib_parse_urlencode({'abc': 'def'}), 'abc=def')
|
||||
self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def')
|
||||
self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def')
|
||||
self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def')
|
||||
self.assertEqual(compat_urllib_parse_urlencode([('abc', 'def')]), 'abc=def')
|
||||
self.assertEqual(compat_urllib_parse_urlencode([('abc', b'def')]), 'abc=def')
|
||||
self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def')
|
||||
self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def')
|
||||
|
||||
def test_compat_etree_fromstring(self):
|
||||
xml = '''
|
||||
<root foo="bar" spam="中文">
|
||||
|
@ -15,7 +15,6 @@
|
||||
from test.helper import http_server_port, try_rm
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.downloader.http import HttpFD
|
||||
from yt_dlp.utils import encodeFilename
|
||||
from yt_dlp.utils._utils import _YDLLogger as FakeLogger
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
@ -82,12 +81,12 @@ def download(self, params, ep):
|
||||
ydl = YoutubeDL(params)
|
||||
downloader = HttpFD(ydl, params)
|
||||
filename = 'testfile.mp4'
|
||||
try_rm(encodeFilename(filename))
|
||||
try_rm(filename)
|
||||
self.assertTrue(downloader.real_download(filename, {
|
||||
'url': f'http://127.0.0.1:{self.port}/{ep}',
|
||||
}), ep)
|
||||
self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep)
|
||||
try_rm(encodeFilename(filename))
|
||||
self.assertEqual(os.path.getsize(filename), TEST_SIZE, ep)
|
||||
try_rm(filename)
|
||||
|
||||
def download_all(self, params):
|
||||
for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):
|
||||
|
@ -216,7 +216,9 @@ def handle(self):
|
||||
protocol = websockets.ServerProtocol()
|
||||
connection = websockets.sync.server.ServerConnection(socket=self.request, protocol=protocol, close_timeout=0)
|
||||
connection.handshake()
|
||||
connection.send(json.dumps(self.socks_info))
|
||||
for message in connection:
|
||||
if message == 'socks_info':
|
||||
connection.send(json.dumps(self.socks_info))
|
||||
connection.close()
|
||||
|
||||
|
||||
|
@ -481,7 +481,7 @@ def test_subs_list_to_dict(self):
|
||||
'id': 'name',
|
||||
'data': 'content',
|
||||
'url': 'url',
|
||||
}, all, {subs_list_to_dict}]) == {
|
||||
}, all, {subs_list_to_dict(lang=None)}]) == {
|
||||
'de': [{'url': 'https://example.com/subs/de.ass'}],
|
||||
'en': [{'data': 'content'}],
|
||||
}, 'subs with mandatory items missing should be filtered'
|
||||
@ -507,6 +507,54 @@ def test_subs_list_to_dict(self):
|
||||
{'url': 'https://example.com/subs/en1', 'ext': 'ext'},
|
||||
{'url': 'https://example.com/subs/en2', 'ext': 'ext'},
|
||||
]}, '`quality` key should sort subtitle list accordingly'
|
||||
assert traverse_obj([
|
||||
{'name': 'de', 'url': 'https://example.com/subs/de.ass'},
|
||||
{'name': 'de'},
|
||||
{'name': 'en', 'content': 'content'},
|
||||
{'url': 'https://example.com/subs/en'},
|
||||
], [..., {
|
||||
'id': 'name',
|
||||
'url': 'url',
|
||||
'data': 'content',
|
||||
}, all, {subs_list_to_dict(lang='en')}]) == {
|
||||
'de': [{'url': 'https://example.com/subs/de.ass'}],
|
||||
'en': [
|
||||
{'data': 'content'},
|
||||
{'url': 'https://example.com/subs/en'},
|
||||
],
|
||||
}, 'optionally provided lang should be used if no id available'
|
||||
assert traverse_obj([
|
||||
{'name': 1, 'url': 'https://example.com/subs/de1'},
|
||||
{'name': {}, 'url': 'https://example.com/subs/de2'},
|
||||
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
|
||||
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
|
||||
], [..., {
|
||||
'id': 'name',
|
||||
'url': 'url',
|
||||
'ext': 'ext',
|
||||
}, all, {subs_list_to_dict(lang=None)}]) == {
|
||||
'de': [
|
||||
{'url': 'https://example.com/subs/de3'},
|
||||
{'url': 'https://example.com/subs/de4'},
|
||||
],
|
||||
}, 'non str types should be ignored for id and ext'
|
||||
assert traverse_obj([
|
||||
{'name': 1, 'url': 'https://example.com/subs/de1'},
|
||||
{'name': {}, 'url': 'https://example.com/subs/de2'},
|
||||
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
|
||||
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
|
||||
], [..., {
|
||||
'id': 'name',
|
||||
'url': 'url',
|
||||
'ext': 'ext',
|
||||
}, all, {subs_list_to_dict(lang='de')}]) == {
|
||||
'de': [
|
||||
{'url': 'https://example.com/subs/de1'},
|
||||
{'url': 'https://example.com/subs/de2'},
|
||||
{'url': 'https://example.com/subs/de3'},
|
||||
{'url': 'https://example.com/subs/de4'},
|
||||
],
|
||||
}, 'non str types should be replaced by default id'
|
||||
|
||||
def test_trim_str(self):
|
||||
with pytest.raises(TypeError):
|
||||
@ -525,7 +573,7 @@ def test_trim_str(self):
|
||||
def test_unpack(self):
|
||||
assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123'
|
||||
assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3'
|
||||
assert unpack(join_nonempty(delim=' '))([1, 2, 3]) == '1 2 3'
|
||||
assert unpack(join_nonempty, delim=' ')([1, 2, 3]) == '1 2 3'
|
||||
with pytest.raises(TypeError):
|
||||
unpack(join_nonempty)()
|
||||
with pytest.raises(TypeError):
|
||||
|
@ -21,7 +21,6 @@
|
||||
from yt_dlp.compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_HTMLParseError,
|
||||
compat_os_name,
|
||||
)
|
||||
from yt_dlp.utils import (
|
||||
Config,
|
||||
@ -49,7 +48,6 @@
|
||||
dfxp2srt,
|
||||
encode_base_n,
|
||||
encode_compat_str,
|
||||
encodeFilename,
|
||||
expand_path,
|
||||
extract_attributes,
|
||||
extract_basic_auth,
|
||||
@ -69,10 +67,8 @@
|
||||
get_elements_html_by_class,
|
||||
get_elements_text_and_html_by_attribute,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
iri_to_uri,
|
||||
is_html,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
limit_length,
|
||||
locked_file,
|
||||
@ -567,10 +563,10 @@ def test_smuggle_url(self):
|
||||
self.assertEqual(res_data, {'a': 'b', 'c': 'd'})
|
||||
|
||||
def test_shell_quote(self):
|
||||
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')]
|
||||
args = ['ffmpeg', '-i', 'ñ€ß\'.mp4']
|
||||
self.assertEqual(
|
||||
shell_quote(args),
|
||||
"""ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
|
||||
"""ffmpeg -i 'ñ€ß'"'"'.mp4'""" if os.name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
|
||||
|
||||
def test_float_or_none(self):
|
||||
self.assertEqual(float_or_none('42.42'), 42.42)
|
||||
@ -1310,15 +1306,10 @@ def test_clean_html(self):
|
||||
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
|
||||
self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb')
|
||||
|
||||
def test_intlist_to_bytes(self):
|
||||
self.assertEqual(
|
||||
intlist_to_bytes([0, 1, 127, 128, 255]),
|
||||
b'\x00\x01\x7f\x80\xff')
|
||||
|
||||
def test_args_to_str(self):
|
||||
self.assertEqual(
|
||||
args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
|
||||
'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""',
|
||||
'foo ba/r -baz \'2 be\' \'\'' if os.name != 'nt' else 'foo ba/r -baz "2 be" ""',
|
||||
)
|
||||
|
||||
def test_parse_filesize(self):
|
||||
@ -2118,7 +2109,7 @@ def test_extract_basic_auth(self):
|
||||
assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=')
|
||||
assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
|
||||
|
||||
@unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows')
|
||||
@unittest.skipUnless(os.name == 'nt', 'Only relevant on Windows')
|
||||
def test_windows_escaping(self):
|
||||
tests = [
|
||||
'test"&',
|
||||
@ -2158,10 +2149,6 @@ def test_partial_application(self):
|
||||
assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
|
||||
assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function'
|
||||
|
||||
assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially'
|
||||
assert callable(join_nonempty()), 'varargs positional should apply partially'
|
||||
assert join_nonempty(None, delim=', ') == '', 'passed varargs should call the function'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -68,6 +68,16 @@
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q',
|
||||
),
|
||||
]
|
||||
|
||||
_NSIG_TESTS = [
|
||||
@ -183,6 +193,14 @@
|
||||
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
|
||||
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js',
|
||||
'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js',
|
||||
'YWt1qdbe8SAfkoPHW5d', 'RrRjWQOJmBiP',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@ -254,8 +272,11 @@ def signature(jscode, sig_input):
|
||||
|
||||
|
||||
def n_sig(jscode, sig_input):
|
||||
funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
|
||||
return JSInterpreter(jscode).call_function(funcname, sig_input)
|
||||
ie = YoutubeIE(FakeYDL())
|
||||
funcname = ie._extract_n_function_name(jscode)
|
||||
jsi = JSInterpreter(jscode)
|
||||
func = jsi.extract_function_from_code(*ie._fixup_n_function_code(*jsi.extract_function_code(funcname)))
|
||||
return func([sig_input])
|
||||
|
||||
|
||||
make_sig_test = t_factory(
|
||||
|
@ -26,7 +26,7 @@
|
||||
|
||||
from .cache import Cache
|
||||
from .compat import urllib # isort: split
|
||||
from .compat import compat_os_name, urllib_req_to_req
|
||||
from .compat import urllib_req_to_req
|
||||
from .cookies import CookieLoadError, LenientSimpleCookie, load_cookies
|
||||
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
|
||||
from .downloader.rtmp import rtmpdump_version
|
||||
@ -109,7 +109,6 @@
|
||||
determine_ext,
|
||||
determine_protocol,
|
||||
encode_compat_str,
|
||||
encodeFilename,
|
||||
escapeHTML,
|
||||
expand_path,
|
||||
extract_basic_auth,
|
||||
@ -167,7 +166,7 @@
|
||||
)
|
||||
from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
if os.name == 'nt':
|
||||
import ctypes
|
||||
|
||||
|
||||
@ -267,7 +266,9 @@ class YoutubeDL:
|
||||
outtmpl_na_placeholder: Placeholder for unavailable meta fields.
|
||||
restrictfilenames: Do not allow "&" and spaces in file names
|
||||
trim_file_name: Limit length of filename (extension excluded)
|
||||
windowsfilenames: Force the filenames to be windows compatible
|
||||
windowsfilenames: True: Force filenames to be Windows compatible
|
||||
False: Sanitize filenames only minimally
|
||||
This option has no effect when running on Windows
|
||||
ignoreerrors: Do not stop on download/postprocessing errors.
|
||||
Can be 'only_download' to ignore only download errors.
|
||||
Default is 'only_download' for CLI, but False for API
|
||||
@ -282,7 +283,10 @@ class YoutubeDL:
|
||||
lazy_playlist: Process playlist entries as they are received.
|
||||
matchtitle: Download only matching titles.
|
||||
rejecttitle: Reject downloads for matching titles.
|
||||
logger: Log messages to a logging.Logger instance.
|
||||
logger: A class having a `debug`, `warning` and `error` function where
|
||||
each has a single string parameter, the message to be logged.
|
||||
For compatibility reasons, both debug and info messages are passed to `debug`.
|
||||
A debug message will have a prefix of `[debug] ` to discern it from info messages.
|
||||
logtostderr: Print everything to stderr instead of stdout.
|
||||
consoletitle: Display progress in the console window's titlebar.
|
||||
writedescription: Write the video description to a .description file
|
||||
@ -643,7 +647,7 @@ def __init__(self, params=None, auto_init=True):
|
||||
out=stdout,
|
||||
error=sys.stderr,
|
||||
screen=sys.stderr if self.params.get('quiet') else stdout,
|
||||
console=None if compat_os_name == 'nt' else next(
|
||||
console=None if os.name == 'nt' else next(
|
||||
filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None),
|
||||
)
|
||||
|
||||
@ -952,7 +956,7 @@ def to_stderr(self, message, only_once=False):
|
||||
self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
|
||||
|
||||
def _send_console_code(self, code):
|
||||
if compat_os_name == 'nt' or not self._out_files.console:
|
||||
if os.name == 'nt' or not self._out_files.console:
|
||||
return
|
||||
self._write_string(code, self._out_files.console)
|
||||
|
||||
@ -960,7 +964,7 @@ def to_console_title(self, message):
|
||||
if not self.params.get('consoletitle', False):
|
||||
return
|
||||
message = remove_terminal_sequences(message)
|
||||
if compat_os_name == 'nt':
|
||||
if os.name == 'nt':
|
||||
if ctypes.windll.kernel32.GetConsoleWindow():
|
||||
# c_wchar_p() might not be necessary if `message` is
|
||||
# already of type unicode()
|
||||
@ -1117,7 +1121,7 @@ def report_file_delete(self, file_name):
|
||||
def raise_no_formats(self, info, forced=False, *, msg=None):
|
||||
has_drm = info.get('_has_drm')
|
||||
ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
|
||||
msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
|
||||
msg = msg or (has_drm and 'This video is DRM protected') or 'No video formats found!'
|
||||
if forced or not ignored:
|
||||
raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
|
||||
expected=has_drm or ignored or expected)
|
||||
@ -1193,8 +1197,7 @@ def _copy_infodict(info_dict):
|
||||
|
||||
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
|
||||
""" Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
|
||||
@param sanitize Whether to sanitize the output as a filename.
|
||||
For backward compatibility, a function can also be passed
|
||||
@param sanitize Whether to sanitize the output as a filename
|
||||
"""
|
||||
|
||||
info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
|
||||
@ -1310,14 +1313,23 @@ def get_value(mdict):
|
||||
|
||||
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
||||
|
||||
def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
|
||||
def filename_sanitizer(key, value, restricted):
|
||||
return sanitize_filename(str(value), restricted=restricted, is_id=(
|
||||
bool(re.search(r'(^|[_.])id(\.|$)', key))
|
||||
if 'filename-sanitization' in self.params['compat_opts']
|
||||
else NO_DEFAULT))
|
||||
|
||||
sanitizer = sanitize if callable(sanitize) else filename_sanitizer
|
||||
sanitize = bool(sanitize)
|
||||
if callable(sanitize):
|
||||
self.deprecation_warning('Passing a callable "sanitize" to YoutubeDL.prepare_outtmpl is deprecated')
|
||||
elif not sanitize:
|
||||
pass
|
||||
elif (sys.platform != 'win32' and not self.params.get('restrictfilenames')
|
||||
and self.params.get('windowsfilenames') is False):
|
||||
def sanitize(key, value):
|
||||
return str(value).replace('/', '\u29F8').replace('\0', '')
|
||||
else:
|
||||
def sanitize(key, value):
|
||||
return filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames'))
|
||||
|
||||
def _dumpjson_default(obj):
|
||||
if isinstance(obj, (set, LazyList)):
|
||||
@ -1400,13 +1412,13 @@ def create_key(outer_mobj):
|
||||
|
||||
if sanitize:
|
||||
# If value is an object, sanitize might convert it to a string
|
||||
# So we convert it to repr first
|
||||
# So we manually convert it before sanitizing
|
||||
if fmt[-1] == 'r':
|
||||
value, fmt = repr(value), str_fmt
|
||||
elif fmt[-1] == 'a':
|
||||
value, fmt = ascii(value), str_fmt
|
||||
if fmt[-1] in 'csra':
|
||||
value = sanitizer(last_field, value)
|
||||
value = sanitize(last_field, value)
|
||||
|
||||
key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format'))
|
||||
TMPL_DICT[key] = value
|
||||
@ -1948,6 +1960,7 @@ def _playlist_infodict(ie_result, strict=False, **kwargs):
|
||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||
'playlist_channel': ie_result.get('channel'),
|
||||
'playlist_channel_id': ie_result.get('channel_id'),
|
||||
'playlist_webpage_url': ie_result.get('webpage_url'),
|
||||
**kwargs,
|
||||
}
|
||||
if strict:
|
||||
@ -2196,7 +2209,7 @@ def _select_formats(self, formats, selector):
|
||||
def _default_format_spec(self, info_dict):
|
||||
prefer_best = (
|
||||
self.params['outtmpl']['default'] == '-'
|
||||
or info_dict.get('is_live') and not self.params.get('live_from_start'))
|
||||
or (info_dict.get('is_live') and not self.params.get('live_from_start')))
|
||||
|
||||
def can_merge():
|
||||
merger = FFmpegMergerPP(self)
|
||||
@ -2365,7 +2378,7 @@ def _merge(formats_pair):
|
||||
vexts=[f['ext'] for f in video_fmts],
|
||||
aexts=[f['ext'] for f in audio_fmts],
|
||||
preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
|
||||
or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
|
||||
or (self.params.get('prefer_free_formats') and ('webm', 'mkv'))))
|
||||
|
||||
filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
|
||||
|
||||
@ -3254,9 +3267,9 @@ def check_max_downloads():
|
||||
|
||||
if full_filename is None:
|
||||
return
|
||||
if not self._ensure_dir_exists(encodeFilename(full_filename)):
|
||||
if not self._ensure_dir_exists(full_filename):
|
||||
return
|
||||
if not self._ensure_dir_exists(encodeFilename(temp_filename)):
|
||||
if not self._ensure_dir_exists(temp_filename):
|
||||
return
|
||||
|
||||
if self._write_description('video', info_dict,
|
||||
@ -3286,16 +3299,16 @@ def check_max_downloads():
|
||||
if self.params.get('writeannotations', False):
|
||||
annofn = self.prepare_filename(info_dict, 'annotation')
|
||||
if annofn:
|
||||
if not self._ensure_dir_exists(encodeFilename(annofn)):
|
||||
if not self._ensure_dir_exists(annofn):
|
||||
return
|
||||
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
|
||||
if not self.params.get('overwrites', True) and os.path.exists(annofn):
|
||||
self.to_screen('[info] Video annotations are already present')
|
||||
elif not info_dict.get('annotations'):
|
||||
self.report_warning('There are no annotations to write.')
|
||||
else:
|
||||
try:
|
||||
self.to_screen('[info] Writing video annotations to: ' + annofn)
|
||||
with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
|
||||
with open(annofn, 'w', encoding='utf-8') as annofile:
|
||||
annofile.write(info_dict['annotations'])
|
||||
except (KeyError, TypeError):
|
||||
self.report_warning('There are no annotations to write.')
|
||||
@ -3311,14 +3324,14 @@ def _write_link_file(link_type):
|
||||
f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
|
||||
return True
|
||||
linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
|
||||
if not self._ensure_dir_exists(encodeFilename(linkfn)):
|
||||
if not self._ensure_dir_exists(linkfn):
|
||||
return False
|
||||
if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
|
||||
if self.params.get('overwrites', True) and os.path.exists(linkfn):
|
||||
self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
|
||||
return True
|
||||
try:
|
||||
self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
|
||||
with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
|
||||
with open(to_high_limit_path(linkfn), 'w', encoding='utf-8',
|
||||
newline='\r\n' if link_type == 'url' else '\n') as linkfile:
|
||||
template_vars = {'url': url}
|
||||
if link_type == 'desktop':
|
||||
@ -3349,7 +3362,7 @@ def _write_link_file(link_type):
|
||||
|
||||
if self.params.get('skip_download'):
|
||||
info_dict['filepath'] = temp_filename
|
||||
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
|
||||
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(full_filename))
|
||||
replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
|
||||
info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
|
||||
else:
|
||||
@ -3475,7 +3488,8 @@ def correct_ext(filename, ext=new_ext):
|
||||
self.report_file_already_downloaded(dl_filename)
|
||||
|
||||
dl_filename = dl_filename or temp_filename
|
||||
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
|
||||
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(full_filename))
|
||||
|
||||
except network_exceptions as err:
|
||||
self.report_error(f'unable to download video data: {err}')
|
||||
return
|
||||
@ -3533,8 +3547,8 @@ def ffmpeg_fixup(cndn, msg, cls):
|
||||
and info_dict.get('container') == 'm4a_dash',
|
||||
'writing DASH m4a. Only some players support this container',
|
||||
FFmpegFixupM4aPP)
|
||||
ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
|
||||
or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
|
||||
ffmpeg_fixup((downloader == 'hlsnative' and not self.params.get('hls_use_mpegts'))
|
||||
or (info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None),
|
||||
'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
|
||||
FFmpegFixupM3u8PP)
|
||||
ffmpeg_fixup(downloader == 'dashsegments'
|
||||
@ -4287,7 +4301,7 @@ def _write_description(self, label, ie_result, descfn):
|
||||
else:
|
||||
try:
|
||||
self.to_screen(f'[info] Writing {label} description to: {descfn}')
|
||||
with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
|
||||
with open(descfn, 'w', encoding='utf-8') as descfile:
|
||||
descfile.write(ie_result['description'])
|
||||
except OSError:
|
||||
self.report_error(f'Cannot write {label} description file {descfn}')
|
||||
@ -4373,7 +4387,9 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
|
||||
return None
|
||||
|
||||
for idx, t in list(enumerate(thumbnails))[::-1]:
|
||||
thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
|
||||
thumb_ext = t.get('ext') or determine_ext(t['url'], 'jpg')
|
||||
if multiple:
|
||||
thumb_ext = f'{t["id"]}.{thumb_ext}'
|
||||
thumb_display_id = f'{label} thumbnail {t["id"]}'
|
||||
thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
|
||||
thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
|
||||
@ -4389,7 +4405,7 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
|
||||
try:
|
||||
uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
|
||||
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
|
||||
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
|
||||
with open(thumb_filename, 'wb') as thumbf:
|
||||
shutil.copyfileobj(uf, thumbf)
|
||||
ret.append(thumb_filename)
|
||||
t['filepath'] = thumb_filename
|
||||
|
@ -14,7 +14,6 @@
|
||||
import re
|
||||
import traceback
|
||||
|
||||
from .compat import compat_os_name
|
||||
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError
|
||||
from .downloader.external import get_external_downloader
|
||||
from .extractor import list_extractor_classes
|
||||
@ -44,7 +43,6 @@
|
||||
GeoUtils,
|
||||
PlaylistEntries,
|
||||
SameFileError,
|
||||
decodeOption,
|
||||
download_range_func,
|
||||
expand_path,
|
||||
float_or_none,
|
||||
@ -263,9 +261,11 @@ def parse_retries(name, value):
|
||||
elif value in ('inf', 'infinite'):
|
||||
return float('inf')
|
||||
try:
|
||||
return int(value)
|
||||
int_value = int(value)
|
||||
except (TypeError, ValueError):
|
||||
validate(False, f'{name} retry count', value)
|
||||
validate_positive(f'{name} retry count', int_value)
|
||||
return int_value
|
||||
|
||||
opts.retries = parse_retries('download', opts.retries)
|
||||
opts.fragment_retries = parse_retries('fragment', opts.fragment_retries)
|
||||
@ -883,8 +883,8 @@ def parse_options(argv=None):
|
||||
'listsubtitles': opts.listsubtitles,
|
||||
'subtitlesformat': opts.subtitlesformat,
|
||||
'subtitleslangs': opts.subtitleslangs,
|
||||
'matchtitle': decodeOption(opts.matchtitle),
|
||||
'rejecttitle': decodeOption(opts.rejecttitle),
|
||||
'matchtitle': opts.matchtitle,
|
||||
'rejecttitle': opts.rejecttitle,
|
||||
'max_downloads': opts.max_downloads,
|
||||
'prefer_free_formats': opts.prefer_free_formats,
|
||||
'trim_file_name': opts.trim_file_name,
|
||||
@ -1053,7 +1053,7 @@ def make_row(target, handler):
|
||||
ydl.warn_if_short_id(args)
|
||||
|
||||
# Show a useful error message and wait for keypress if not launched from shell on Windows
|
||||
if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False):
|
||||
if not args and os.name == 'nt' and getattr(sys, 'frozen', False):
|
||||
import ctypes.wintypes
|
||||
import msvcrt
|
||||
|
||||
@ -1064,7 +1064,7 @@ def make_row(target, handler):
|
||||
# If we only have a single process attached, then the executable was double clicked
|
||||
# When using `pyinstaller` with `--onefile`, two processes get attached
|
||||
is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI')
|
||||
if attached_processes == 1 or is_onefile and attached_processes == 2:
|
||||
if attached_processes == 1 or (is_onefile and attached_processes == 2):
|
||||
print(parser._generate_error_message(
|
||||
'Do not double-click the executable, instead call it from a command line.\n'
|
||||
'Please read the README for further information on how to use yt-dlp: '
|
||||
@ -1111,9 +1111,9 @@ def main(argv=None):
|
||||
from .extractor import gen_extractors, list_extractors
|
||||
|
||||
__all__ = [
|
||||
'main',
|
||||
'YoutubeDL',
|
||||
'parse_options',
|
||||
'gen_extractors',
|
||||
'list_extractors',
|
||||
'main',
|
||||
'parse_options',
|
||||
]
|
||||
|
@ -3,7 +3,6 @@
|
||||
|
||||
from .compat import compat_ord
|
||||
from .dependencies import Cryptodome
|
||||
from .utils import bytes_to_intlist, intlist_to_bytes
|
||||
|
||||
if Cryptodome.AES:
|
||||
def aes_cbc_decrypt_bytes(data, key, iv):
|
||||
@ -17,15 +16,15 @@ def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
|
||||
else:
|
||||
def aes_cbc_decrypt_bytes(data, key, iv):
|
||||
""" Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """
|
||||
return intlist_to_bytes(aes_cbc_decrypt(*map(bytes_to_intlist, (data, key, iv))))
|
||||
return bytes(aes_cbc_decrypt(*map(list, (data, key, iv))))
|
||||
|
||||
def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
|
||||
""" Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """
|
||||
return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce))))
|
||||
return bytes(aes_gcm_decrypt_and_verify(*map(list, (data, key, tag, nonce))))
|
||||
|
||||
|
||||
def aes_cbc_encrypt_bytes(data, key, iv, **kwargs):
|
||||
return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs))
|
||||
return bytes(aes_cbc_encrypt(*map(list, (data, key, iv)), **kwargs))
|
||||
|
||||
|
||||
BLOCK_SIZE_BYTES = 16
|
||||
@ -221,7 +220,7 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
|
||||
j0 = [*nonce, 0, 0, 0, 1]
|
||||
else:
|
||||
fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8
|
||||
ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big'))
|
||||
ghash_in = nonce + [0] * fill + list((8 * len(nonce)).to_bytes(8, 'big'))
|
||||
j0 = ghash(hash_subkey, ghash_in)
|
||||
|
||||
# TODO: add nonce support to aes_ctr_decrypt
|
||||
@ -234,9 +233,9 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
|
||||
s_tag = ghash(
|
||||
hash_subkey,
|
||||
data
|
||||
+ [0] * pad_len # pad
|
||||
+ bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data
|
||||
+ ((len(data) * 8).to_bytes(8, 'big'))), # length of data
|
||||
+ [0] * pad_len # pad
|
||||
+ list((0 * 8).to_bytes(8, 'big') # length of associated data
|
||||
+ ((len(data) * 8).to_bytes(8, 'big'))), # length of data
|
||||
)
|
||||
|
||||
if tag != aes_ctr_encrypt(s_tag, key, j0):
|
||||
@ -300,8 +299,8 @@ def aes_decrypt_text(data, password, key_size_bytes):
|
||||
"""
|
||||
NONCE_LENGTH_BYTES = 8
|
||||
|
||||
data = bytes_to_intlist(base64.b64decode(data))
|
||||
password = bytes_to_intlist(password.encode())
|
||||
data = list(base64.b64decode(data))
|
||||
password = list(password.encode())
|
||||
|
||||
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
|
||||
key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
|
||||
@ -310,7 +309,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
|
||||
cipher = data[NONCE_LENGTH_BYTES:]
|
||||
|
||||
decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES))
|
||||
return intlist_to_bytes(decrypted_data)
|
||||
return bytes(decrypted_data)
|
||||
|
||||
|
||||
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
|
||||
@ -535,19 +534,17 @@ def ghash(subkey, data):
|
||||
__all__ = [
|
||||
'aes_cbc_decrypt',
|
||||
'aes_cbc_decrypt_bytes',
|
||||
'aes_ctr_decrypt',
|
||||
'aes_decrypt_text',
|
||||
'aes_decrypt',
|
||||
'aes_ecb_decrypt',
|
||||
'aes_gcm_decrypt_and_verify',
|
||||
'aes_gcm_decrypt_and_verify_bytes',
|
||||
|
||||
'aes_cbc_encrypt',
|
||||
'aes_cbc_encrypt_bytes',
|
||||
'aes_ctr_decrypt',
|
||||
'aes_ctr_encrypt',
|
||||
'aes_decrypt',
|
||||
'aes_decrypt_text',
|
||||
'aes_ecb_decrypt',
|
||||
'aes_ecb_encrypt',
|
||||
'aes_encrypt',
|
||||
|
||||
'aes_gcm_decrypt_and_verify',
|
||||
'aes_gcm_decrypt_and_verify_bytes',
|
||||
'key_expansion',
|
||||
'pad_block',
|
||||
'pkcs7_padding',
|
||||
|
@ -1,5 +1,4 @@
|
||||
import os
|
||||
import sys
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
@ -24,33 +23,14 @@ def compat_etree_fromstring(text):
|
||||
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
|
||||
|
||||
|
||||
compat_os_name = os._name if os.name == 'java' else os.name
|
||||
|
||||
|
||||
def compat_shlex_quote(s):
|
||||
from ..utils import shell_quote
|
||||
return shell_quote(s)
|
||||
|
||||
|
||||
def compat_ord(c):
|
||||
return c if isinstance(c, int) else ord(c)
|
||||
|
||||
|
||||
if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
||||
# os.path.realpath on Windows does not follow symbolic links
|
||||
# prior to Python 3.8 (see https://bugs.python.org/issue9949)
|
||||
def compat_realpath(path):
|
||||
while os.path.islink(path):
|
||||
path = os.path.abspath(os.readlink(path))
|
||||
return os.path.realpath(path)
|
||||
else:
|
||||
compat_realpath = os.path.realpath
|
||||
|
||||
|
||||
# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/792
|
||||
# https://docs.python.org/3/library/os.path.html#os.path.expanduser
|
||||
if compat_os_name in ('nt', 'ce'):
|
||||
if os.name in ('nt', 'ce'):
|
||||
def compat_expanduser(path):
|
||||
HOME = os.environ.get('HOME')
|
||||
if not HOME:
|
||||
|
@ -8,16 +8,14 @@
|
||||
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
|
||||
del passthrough_module
|
||||
|
||||
import base64
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import functools # noqa: F401
|
||||
import os
|
||||
|
||||
compat_str = str
|
||||
|
||||
compat_b64decode = base64.b64decode
|
||||
compat_os_name = os.name
|
||||
compat_realpath = os.path.realpath
|
||||
|
||||
compat_urlparse = urllib.parse
|
||||
compat_parse_qs = urllib.parse.parse_qs
|
||||
compat_urllib_parse_unquote = urllib.parse.unquote
|
||||
compat_urllib_parse_urlencode = urllib.parse.urlencode
|
||||
compat_urllib_parse_urlparse = urllib.parse.urlparse
|
||||
|
||||
def compat_shlex_quote(s):
|
||||
from ..utils import shell_quote
|
||||
return shell_quote(s)
|
||||
|
@ -30,7 +30,7 @@
|
||||
from re import Pattern as compat_Pattern # noqa: F401
|
||||
from re import match as compat_Match # noqa: F401
|
||||
|
||||
from . import compat_expanduser, compat_HTMLParseError, compat_realpath
|
||||
from . import compat_expanduser, compat_HTMLParseError
|
||||
from .compat_utils import passthrough_module
|
||||
from ..dependencies import brotli as compat_brotli # noqa: F401
|
||||
from ..dependencies import websockets as compat_websockets # noqa: F401
|
||||
@ -78,7 +78,7 @@ def compat_setenv(key, value, env=os.environ):
|
||||
compat_map = map
|
||||
compat_numeric_types = (int, float, complex)
|
||||
compat_os_path_expanduser = compat_expanduser
|
||||
compat_os_path_realpath = compat_realpath
|
||||
compat_os_path_realpath = os.path.realpath
|
||||
compat_print = print
|
||||
compat_shlex_split = shlex.split
|
||||
compat_socket_create_connection = socket.create_connection
|
||||
@ -104,5 +104,12 @@ def compat_setenv(key, value, env=os.environ):
|
||||
compat_xpath = lambda xpath: xpath
|
||||
compat_zip = zip
|
||||
workaround_optparse_bug9161 = lambda: None
|
||||
compat_str = str
|
||||
compat_b64decode = base64.b64decode
|
||||
compat_urlparse = urllib.parse
|
||||
compat_parse_qs = urllib.parse.parse_qs
|
||||
compat_urllib_parse_unquote = urllib.parse.unquote
|
||||
compat_urllib_parse_urlencode = urllib.parse.urlencode
|
||||
compat_urllib_parse_urlparse = urllib.parse.urlparse
|
||||
|
||||
legacy = []
|
||||
|
@ -1,7 +0,0 @@
|
||||
# flake8: noqa: F405
|
||||
from functools import * # noqa: F403
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'functools')
|
||||
del passthrough_module
|
@ -7,9 +7,9 @@
|
||||
del passthrough_module
|
||||
|
||||
|
||||
from .. import compat_os_name
|
||||
import os
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
if os.name == 'nt':
|
||||
# On older Python versions, proxies are extracted from Windows registry erroneously. [1]
|
||||
# If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2]
|
||||
# It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade
|
||||
@ -37,4 +37,4 @@ def getproxies_registry_patched():
|
||||
def getproxies():
|
||||
return getproxies_environment() or getproxies_registry_patched()
|
||||
|
||||
del compat_os_name
|
||||
del os
|
||||
|
@ -25,7 +25,6 @@
|
||||
aes_gcm_decrypt_and_verify_bytes,
|
||||
unpad_pkcs7,
|
||||
)
|
||||
from .compat import compat_os_name
|
||||
from .dependencies import (
|
||||
_SECRETSTORAGE_UNAVAILABLE_REASON,
|
||||
secretstorage,
|
||||
@ -196,7 +195,10 @@ def _extract_firefox_cookies(profile, container, logger):
|
||||
|
||||
def _firefox_browser_dirs():
|
||||
if sys.platform in ('cygwin', 'win32'):
|
||||
yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
|
||||
yield from map(os.path.expandvars, (
|
||||
R'%APPDATA%\Mozilla\Firefox\Profiles',
|
||||
R'%LOCALAPPDATA%\Packages\Mozilla.Firefox_n80bbvh6b1yt2\LocalCache\Roaming\Mozilla\Firefox\Profiles',
|
||||
))
|
||||
|
||||
elif sys.platform == 'darwin':
|
||||
yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
|
||||
@ -343,7 +345,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
|
||||
logger.debug(f'cookie version breakdown: {counts}')
|
||||
return jar
|
||||
except PermissionError as error:
|
||||
if compat_os_name == 'nt' and error.errno == 13:
|
||||
if os.name == 'nt' and error.errno == 13:
|
||||
message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
|
||||
logger.error(message)
|
||||
raise DownloadError(message) # force exit
|
||||
@ -1277,8 +1279,8 @@ def open(self, file, *, write=False):
|
||||
def _really_save(self, f, ignore_discard, ignore_expires):
|
||||
now = time.time()
|
||||
for cookie in self:
|
||||
if (not ignore_discard and cookie.discard
|
||||
or not ignore_expires and cookie.is_expired(now)):
|
||||
if ((not ignore_discard and cookie.discard)
|
||||
or (not ignore_expires and cookie.is_expired(now))):
|
||||
continue
|
||||
name, value = cookie.name, cookie.value
|
||||
if value is None:
|
||||
|
@ -24,7 +24,7 @@
|
||||
from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401
|
||||
from Crypto.Hash import CMAC, SHA1 # noqa: F401
|
||||
from Crypto.PublicKey import RSA # noqa: F401
|
||||
except ImportError:
|
||||
except (ImportError, OSError):
|
||||
__version__ = f'broken {__version__}'.strip()
|
||||
|
||||
|
||||
|
@ -20,9 +20,7 @@
|
||||
Namespace,
|
||||
RetryManager,
|
||||
classproperty,
|
||||
decodeArgument,
|
||||
deprecation_warning,
|
||||
encodeFilename,
|
||||
format_bytes,
|
||||
join_nonempty,
|
||||
parse_bytes,
|
||||
@ -219,7 +217,7 @@ def slow_down(self, start_time, now, byte_counter):
|
||||
def temp_name(self, filename):
|
||||
"""Returns a temporary filename for the given filename."""
|
||||
if self.params.get('nopart', False) or filename == '-' or \
|
||||
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
|
||||
(os.path.exists(filename) and not os.path.isfile(filename)):
|
||||
return filename
|
||||
return filename + '.part'
|
||||
|
||||
@ -273,7 +271,7 @@ def try_utime(self, filename, last_modified_hdr):
|
||||
"""Try to set the last-modified time of the given file."""
|
||||
if last_modified_hdr is None:
|
||||
return
|
||||
if not os.path.isfile(encodeFilename(filename)):
|
||||
if not os.path.isfile(filename):
|
||||
return
|
||||
timestr = last_modified_hdr
|
||||
if timestr is None:
|
||||
@ -432,13 +430,13 @@ def download(self, filename, info_dict, subtitle=False):
|
||||
"""
|
||||
nooverwrites_and_exists = (
|
||||
not self.params.get('overwrites', True)
|
||||
and os.path.exists(encodeFilename(filename))
|
||||
and os.path.exists(filename)
|
||||
)
|
||||
|
||||
if not hasattr(filename, 'write'):
|
||||
continuedl_and_exists = (
|
||||
self.params.get('continuedl', True)
|
||||
and os.path.isfile(encodeFilename(filename))
|
||||
and os.path.isfile(filename)
|
||||
and not self.params.get('nopart', False)
|
||||
)
|
||||
|
||||
@ -448,7 +446,7 @@ def download(self, filename, info_dict, subtitle=False):
|
||||
self._hook_progress({
|
||||
'filename': filename,
|
||||
'status': 'finished',
|
||||
'total_bytes': os.path.getsize(encodeFilename(filename)),
|
||||
'total_bytes': os.path.getsize(filename),
|
||||
}, info_dict)
|
||||
self._finish_multiline_status()
|
||||
return True, False
|
||||
@ -489,9 +487,7 @@ def _debug_cmd(self, args, exe=None):
|
||||
if not self.params.get('verbose', False):
|
||||
return
|
||||
|
||||
str_args = [decodeArgument(a) for a in args]
|
||||
|
||||
if exe is None:
|
||||
exe = os.path.basename(str_args[0])
|
||||
exe = os.path.basename(args[0])
|
||||
|
||||
self.write_debug(f'{exe} command line: {shell_quote(str_args)}')
|
||||
self.write_debug(f'{exe} command line: {shell_quote(args)}')
|
||||
|
@ -23,7 +23,6 @@
|
||||
cli_valueless_option,
|
||||
determine_ext,
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
find_available_port,
|
||||
remove_end,
|
||||
traverse_obj,
|
||||
@ -67,7 +66,7 @@ def real_download(self, filename, info_dict):
|
||||
'elapsed': time.time() - started,
|
||||
}
|
||||
if filename != '-':
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
fsize = os.path.getsize(tmpfilename)
|
||||
self.try_rename(tmpfilename, filename)
|
||||
status.update({
|
||||
'downloaded_bytes': fsize,
|
||||
@ -184,9 +183,9 @@ def _call_downloader(self, tmpfilename, info_dict):
|
||||
dest.write(decrypt_fragment(fragment, src.read()))
|
||||
src.close()
|
||||
if not self.params.get('keep_fragments', False):
|
||||
self.try_remove(encodeFilename(fragment_filename))
|
||||
self.try_remove(fragment_filename)
|
||||
dest.close()
|
||||
self.try_remove(encodeFilename(f'{tmpfilename}.frag.urls'))
|
||||
self.try_remove(f'{tmpfilename}.frag.urls')
|
||||
return 0
|
||||
|
||||
def _call_process(self, cmd, info_dict):
|
||||
@ -620,7 +619,7 @@ def _call_downloader(self, tmpfilename, info_dict):
|
||||
args += self._configuration_args(('_o1', '_o', ''))
|
||||
|
||||
args = [encodeArgument(opt) for opt in args]
|
||||
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
|
||||
args.append(ffpp._ffmpeg_filename_argument(tmpfilename))
|
||||
self._debug_cmd(args)
|
||||
|
||||
piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats)
|
||||
|
@ -9,10 +9,9 @@
|
||||
from .common import FileDownloader
|
||||
from .http import HttpFD
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import compat_os_name
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import HTTPError, IncompleteRead
|
||||
from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj
|
||||
from ..utils import DownloadError, RetryManager, traverse_obj
|
||||
from ..utils.networking import HTTPHeaderDict
|
||||
from ..utils.progress import ProgressCalculator
|
||||
|
||||
@ -152,7 +151,7 @@ def _append_fragment(self, ctx, frag_content):
|
||||
if self.__do_ytdl_file(ctx):
|
||||
self._write_ytdl_file(ctx)
|
||||
if not self.params.get('keep_fragments', False):
|
||||
self.try_remove(encodeFilename(ctx['fragment_filename_sanitized']))
|
||||
self.try_remove(ctx['fragment_filename_sanitized'])
|
||||
del ctx['fragment_filename_sanitized']
|
||||
|
||||
def _prepare_frag_download(self, ctx):
|
||||
@ -188,7 +187,7 @@ def _prepare_frag_download(self, ctx):
|
||||
})
|
||||
|
||||
if self.__do_ytdl_file(ctx):
|
||||
ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
|
||||
ytdl_file_exists = os.path.isfile(self.ytdl_filename(ctx['filename']))
|
||||
continuedl = self.params.get('continuedl', True)
|
||||
if continuedl and ytdl_file_exists:
|
||||
self._read_ytdl_file(ctx)
|
||||
@ -390,7 +389,7 @@ class FTPE(concurrent.futures.ThreadPoolExecutor):
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
pass
|
||||
|
||||
if compat_os_name == 'nt':
|
||||
if os.name == 'nt':
|
||||
def future_result(future):
|
||||
while True:
|
||||
try:
|
||||
|
@ -119,12 +119,12 @@ def real_download(self, filename, info_dict):
|
||||
self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}')
|
||||
|
||||
def is_ad_fragment_start(s):
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
|
||||
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
|
||||
return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s)
|
||||
or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')))
|
||||
|
||||
def is_ad_fragment_end(s):
|
||||
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
|
||||
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
|
||||
return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s)
|
||||
or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')))
|
||||
|
||||
fragments = []
|
||||
|
||||
|
@ -15,7 +15,6 @@
|
||||
ThrottledDownload,
|
||||
XAttrMetadataError,
|
||||
XAttrUnavailableError,
|
||||
encodeFilename,
|
||||
int_or_none,
|
||||
parse_http_range,
|
||||
try_call,
|
||||
@ -58,9 +57,8 @@ class DownloadContext(dict):
|
||||
|
||||
if self.params.get('continuedl', True):
|
||||
# Establish possible resume length
|
||||
if os.path.isfile(encodeFilename(ctx.tmpfilename)):
|
||||
ctx.resume_len = os.path.getsize(
|
||||
encodeFilename(ctx.tmpfilename))
|
||||
if os.path.isfile(ctx.tmpfilename):
|
||||
ctx.resume_len = os.path.getsize(ctx.tmpfilename)
|
||||
|
||||
ctx.is_resume = ctx.resume_len > 0
|
||||
|
||||
@ -241,7 +239,7 @@ def retry(e):
|
||||
ctx.resume_len = byte_counter
|
||||
else:
|
||||
try:
|
||||
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename))
|
||||
ctx.resume_len = os.path.getsize(ctx.tmpfilename)
|
||||
except FileNotFoundError:
|
||||
ctx.resume_len = 0
|
||||
raise RetryDownload(e)
|
||||
|
@ -8,7 +8,6 @@
|
||||
Popen,
|
||||
check_executable,
|
||||
encodeArgument,
|
||||
encodeFilename,
|
||||
get_exe_version,
|
||||
)
|
||||
|
||||
@ -179,7 +178,7 @@ def run_rtmpdump(args):
|
||||
return False
|
||||
|
||||
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
|
||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
prevsize = os.path.getsize(tmpfilename)
|
||||
self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes')
|
||||
time.sleep(5.0) # This seems to be needed
|
||||
args = [*basic_args, '--resume']
|
||||
@ -187,7 +186,7 @@ def run_rtmpdump(args):
|
||||
args += ['--skip', '1']
|
||||
args = [encodeArgument(a) for a in args]
|
||||
retval = run_rtmpdump(args)
|
||||
cursize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
cursize = os.path.getsize(tmpfilename)
|
||||
if prevsize == cursize and retval == RD_FAILED:
|
||||
break
|
||||
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
|
||||
@ -196,7 +195,7 @@ def run_rtmpdump(args):
|
||||
retval = RD_SUCCESS
|
||||
break
|
||||
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
fsize = os.path.getsize(tmpfilename)
|
||||
self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes')
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
|
@ -2,7 +2,7 @@
|
||||
import subprocess
|
||||
|
||||
from .common import FileDownloader
|
||||
from ..utils import check_executable, encodeFilename
|
||||
from ..utils import check_executable
|
||||
|
||||
|
||||
class RtspFD(FileDownloader):
|
||||
@ -26,7 +26,7 @@ def real_download(self, filename, info_dict):
|
||||
|
||||
retval = subprocess.call(args)
|
||||
if retval == 0:
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
fsize = os.path.getsize(tmpfilename)
|
||||
self.to_screen(f'\r[{args[0]}] {fsize} bytes')
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
|
@ -123,8 +123,8 @@ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None
|
||||
data,
|
||||
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
|
||||
|
||||
func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live
|
||||
or frag_index == 1 and try_refresh_replay_beginning
|
||||
func = ((info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live)
|
||||
or (frag_index == 1 and try_refresh_replay_beginning)
|
||||
or parse_actions_replay)
|
||||
return (True, *func(live_chat_continuation))
|
||||
except HTTPError as err:
|
||||
|
@ -208,6 +208,10 @@
|
||||
BandcampUserIE,
|
||||
BandcampWeeklyIE,
|
||||
)
|
||||
from .bandlab import (
|
||||
BandlabIE,
|
||||
BandlabPlaylistIE,
|
||||
)
|
||||
from .bannedvideo import BannedVideoIE
|
||||
from .bbc import (
|
||||
BBCIE,
|
||||
@ -252,6 +256,7 @@
|
||||
BilibiliCheeseIE,
|
||||
BilibiliCheeseSeasonIE,
|
||||
BilibiliCollectionListIE,
|
||||
BiliBiliDynamicIE,
|
||||
BilibiliFavoritesListIE,
|
||||
BiliBiliIE,
|
||||
BiliBiliPlayerIE,
|
||||
@ -551,6 +556,7 @@
|
||||
DropoutIE,
|
||||
DropoutSeasonIE,
|
||||
)
|
||||
from .drtalks import DrTalksIE
|
||||
from .drtuber import DrTuberIE
|
||||
from .drtv import (
|
||||
DRTVIE,
|
||||
@ -580,6 +586,10 @@
|
||||
EggheadCourseIE,
|
||||
EggheadLessonIE,
|
||||
)
|
||||
from .eggs import (
|
||||
EggsArtistIE,
|
||||
EggsIE,
|
||||
)
|
||||
from .eighttracks import EightTracksIE
|
||||
from .eitb import EitbIE
|
||||
from .elementorembed import ElementorEmbedIE
|
||||
@ -942,6 +952,10 @@
|
||||
from .kankanews import KankaNewsIE
|
||||
from .karaoketv import KaraoketvIE
|
||||
from .kelbyone import KelbyOneIE
|
||||
from .kenh14 import (
|
||||
Kenh14PlaylistIE,
|
||||
Kenh14VideoIE,
|
||||
)
|
||||
from .khanacademy import (
|
||||
KhanAcademyIE,
|
||||
KhanAcademyUnitIE,
|
||||
@ -1131,12 +1145,6 @@
|
||||
MicrosoftMediusIE,
|
||||
)
|
||||
from .microsoftstream import MicrosoftStreamIE
|
||||
from .mildom import (
|
||||
MildomClipIE,
|
||||
MildomIE,
|
||||
MildomUserVodIE,
|
||||
MildomVodIE,
|
||||
)
|
||||
from .minds import (
|
||||
MindsChannelIE,
|
||||
MindsGroupIE,
|
||||
@ -1156,6 +1164,7 @@
|
||||
from .mixch import (
|
||||
MixchArchiveIE,
|
||||
MixchIE,
|
||||
MixchMovieIE,
|
||||
)
|
||||
from .mixcloud import (
|
||||
MixcloudIE,
|
||||
@ -1275,6 +1284,10 @@
|
||||
)
|
||||
from .nekohacker import NekoHackerIE
|
||||
from .nerdcubed import NerdCubedFeedIE
|
||||
from .nest import (
|
||||
NestClipIE,
|
||||
NestIE,
|
||||
)
|
||||
from .neteasemusic import (
|
||||
NetEaseMusicAlbumIE,
|
||||
NetEaseMusicDjRadioIE,
|
||||
@ -1517,8 +1530,8 @@
|
||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||
from .phoenix import PhoenixIE
|
||||
from .photobucket import PhotobucketIE
|
||||
from .pialive import PiaLiveIE
|
||||
from .piapro import PiaproIE
|
||||
from .piaulizaportal import PIAULIZAPortalIE
|
||||
from .picarto import (
|
||||
PicartoIE,
|
||||
PicartoVodIE,
|
||||
@ -1529,6 +1542,10 @@
|
||||
PinterestCollectionIE,
|
||||
PinterestIE,
|
||||
)
|
||||
from .piramidetv import (
|
||||
PiramideTVChannelIE,
|
||||
PiramideTVIE,
|
||||
)
|
||||
from .pixivsketch import (
|
||||
PixivSketchIE,
|
||||
PixivSketchUserIE,
|
||||
@ -1548,16 +1565,13 @@
|
||||
PluralsightIE,
|
||||
)
|
||||
from .plutotv import PlutoTVIE
|
||||
from .plvideo import PlVideoIE
|
||||
from .podbayfm import (
|
||||
PodbayFMChannelIE,
|
||||
PodbayFMIE,
|
||||
)
|
||||
from .podchaser import PodchaserIE
|
||||
from .podomatic import PodomaticIE
|
||||
from .pokemon import (
|
||||
PokemonIE,
|
||||
PokemonWatchIE,
|
||||
)
|
||||
from .pokergo import (
|
||||
PokerGoCollectionIE,
|
||||
PokerGoIE,
|
||||
@ -1648,6 +1662,7 @@
|
||||
RadioKapitalIE,
|
||||
RadioKapitalShowIE,
|
||||
)
|
||||
from .radioradicale import RadioRadicaleIE
|
||||
from .radiozet import RadioZetPodcastIE
|
||||
from .radlive import (
|
||||
RadLiveChannelIE,
|
||||
@ -1939,9 +1954,7 @@
|
||||
)
|
||||
from .spreaker import (
|
||||
SpreakerIE,
|
||||
SpreakerPageIE,
|
||||
SpreakerShowIE,
|
||||
SpreakerShowPageIE,
|
||||
)
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
@ -2252,6 +2265,10 @@
|
||||
)
|
||||
from .ukcolumn import UkColumnIE
|
||||
from .uktvplay import UKTVPlayIE
|
||||
from .uliza import (
|
||||
UlizaPlayerIE,
|
||||
UlizaPortalIE,
|
||||
)
|
||||
from .umg import UMGDeIE
|
||||
from .unistra import UnistraIE
|
||||
from .unity import UnityIE
|
||||
@ -2280,10 +2297,6 @@
|
||||
from .varzesh3 import Varzesh3IE
|
||||
from .vbox7 import Vbox7IE
|
||||
from .veo import VeoIE
|
||||
from .veoh import (
|
||||
VeohIE,
|
||||
VeohUserIE,
|
||||
)
|
||||
from .vesti import VestiIE
|
||||
from .vevo import (
|
||||
VevoIE,
|
||||
@ -2356,10 +2369,6 @@
|
||||
VimmIE,
|
||||
VimmRecordingIE,
|
||||
)
|
||||
from .vine import (
|
||||
VineIE,
|
||||
VineUserIE,
|
||||
)
|
||||
from .viously import ViouslyIE
|
||||
from .viqeo import ViqeoIE
|
||||
from .viu import (
|
||||
|
@ -6,7 +6,6 @@
|
||||
import io
|
||||
import json
|
||||
import re
|
||||
import struct
|
||||
import time
|
||||
import urllib.parse
|
||||
import uuid
|
||||
@ -18,10 +17,8 @@
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
bytes_to_intlist,
|
||||
decode_base_n,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
@ -72,15 +69,15 @@ def _get_videokey_from_ticket(self, ticket):
|
||||
})
|
||||
|
||||
res = decode_base_n(license_response['k'], table=self._STRTABLE)
|
||||
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
|
||||
encvideokey = list(res.to_bytes(16, 'big'))
|
||||
|
||||
h = hmac.new(
|
||||
binascii.unhexlify(self._HKEY),
|
||||
(license_response['cid'] + self.ie._DEVICE_ID).encode(),
|
||||
digestmod=hashlib.sha256)
|
||||
enckey = bytes_to_intlist(h.digest())
|
||||
enckey = list(h.digest())
|
||||
|
||||
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
return bytes(aes_ecb_decrypt(encvideokey, enckey))
|
||||
|
||||
|
||||
class AbemaTVBaseIE(InfoExtractor):
|
||||
|
@ -11,11 +11,9 @@
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
ass_subtitles_timecode,
|
||||
bytes_to_intlist,
|
||||
bytes_to_long,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
join_nonempty,
|
||||
long_to_bytes,
|
||||
parse_iso8601,
|
||||
@ -198,16 +196,16 @@ def _real_extract(self, url):
|
||||
|
||||
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
|
||||
self._K = ''.join(random.choices('0123456789abcdef', k=16))
|
||||
message = bytes_to_intlist(json.dumps({
|
||||
message = list(json.dumps({
|
||||
'k': self._K,
|
||||
't': token,
|
||||
}))
|
||||
}).encode())
|
||||
|
||||
# Sometimes authentication fails for no good reason, retry with
|
||||
# a different random padding
|
||||
links_data = None
|
||||
for _ in range(3):
|
||||
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
|
||||
padded_message = bytes(pkcs1pad(message, 128))
|
||||
n, e = self._RSA_KEY
|
||||
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
|
||||
authorization = base64.b64encode(encrypted_message).decode()
|
||||
@ -234,7 +232,7 @@ def _real_extract(self, url):
|
||||
|
||||
error = self._parse_json(e.cause.response.read(), video_id)
|
||||
message = error.get('message')
|
||||
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
if e.cause.status == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||
self.raise_geo_restricted(msg=message)
|
||||
raise ExtractorError(message)
|
||||
else:
|
||||
|
@ -66,6 +66,14 @@ def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
|
||||
extensions={'legacy_ssl': True}), display_id,
|
||||
'Downloading API JSON', 'Unable to download API JSON')
|
||||
|
||||
@staticmethod
|
||||
def _fixup_thumb(thumb_url):
|
||||
if not url_or_none(thumb_url):
|
||||
return None
|
||||
# Core would determine_ext as 'php' from the url, so we need to provide the real ext
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/11537
|
||||
return [{'url': thumb_url, 'ext': 'jpg'}]
|
||||
|
||||
|
||||
class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
IE_NAME = 'soop'
|
||||
@ -155,7 +163,7 @@ def _real_extract(self, url):
|
||||
'uploader': ('writer_nick', {str}),
|
||||
'uploader_id': ('bj_id', {str}),
|
||||
'duration': ('total_file_duration', {int_or_none(scale=1000)}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
'thumbnails': ('thumb', {self._fixup_thumb}),
|
||||
})
|
||||
|
||||
entries = []
|
||||
@ -226,8 +234,7 @@ def _real_extract(self, url):
|
||||
|
||||
return self.playlist_result(self._entries(data), video_id)
|
||||
|
||||
@staticmethod
|
||||
def _entries(data):
|
||||
def _entries(self, data):
|
||||
# 'files' is always a list with 1 element
|
||||
yield from traverse_obj(data, (
|
||||
'data', lambda _, v: v['story_type'] == 'catch',
|
||||
@ -238,7 +245,7 @@ def _entries(data):
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('writer_nick', {str}),
|
||||
'uploader_id': ('writer_id', {str}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
'thumbnails': ('thumb', {self._fixup_thumb}),
|
||||
'timestamp': ('write_timestamp', {int_or_none}),
|
||||
}))
|
||||
|
||||
|
@ -8,10 +8,8 @@
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_encrypt
|
||||
from ..utils import (
|
||||
bytes_to_intlist,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
join_nonempty,
|
||||
smuggle_url,
|
||||
strip_jsonp,
|
||||
@ -234,8 +232,8 @@ def _get_video_json(self, access_key, video_id, extracted_token):
|
||||
server_time = self._server_time(access_key, video_id)
|
||||
input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}'
|
||||
|
||||
auth_secret = intlist_to_bytes(aes_encrypt(
|
||||
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
|
||||
auth_secret = bytes(aes_encrypt(
|
||||
list(input_data[:64].encode()), list(self._AUTH_KEY)))
|
||||
query = {
|
||||
'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'),
|
||||
'rtyp': 'fp',
|
||||
|
@ -205,6 +205,26 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
],
|
||||
}, {
|
||||
# The reviewbody is None for one of the reviews; just need to extract data without crashing
|
||||
'url': 'https://archive.org/details/gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
|
||||
'info_dict': {
|
||||
'id': 'gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
|
||||
'ext': 'mp3',
|
||||
'title': 'Stuck Inside of Mobile with the Memphis Blues Again',
|
||||
'creators': ['Grateful Dead'],
|
||||
'duration': 338.31,
|
||||
'track': 'Stuck Inside of Mobile with the Memphis Blues Again',
|
||||
'description': 'md5:764348a470b986f1217ffd38d6ac7b72',
|
||||
'display_id': 'gd95-04-02d1t04.shn',
|
||||
'location': 'Pyramid Arena',
|
||||
'uploader': 'jon@archive.org',
|
||||
'album': '1995-04-02 - Pyramid Arena',
|
||||
'upload_date': '20040519',
|
||||
'track_number': 4,
|
||||
'release_date': '19950402',
|
||||
'timestamp': 1084927901,
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@ -335,7 +355,7 @@ def _real_extract(self, url):
|
||||
info['comments'].append({
|
||||
'id': review.get('review_id'),
|
||||
'author': review.get('reviewer'),
|
||||
'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'),
|
||||
'text': join_nonempty('reviewtitle', 'reviewbody', from_dict=review, delim='\n\n'),
|
||||
'timestamp': unified_timestamp(review.get('createdate')),
|
||||
'parent': 'root'})
|
||||
|
||||
|
437
yt_dlp/extractor/bandlab.py
Normal file
437
yt_dlp/extractor/bandlab.py
Normal file
@ -0,0 +1,437 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
truncate_string,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj, value
|
||||
|
||||
|
||||
class BandlabBaseIE(InfoExtractor):
|
||||
def _call_api(self, endpoint, asset_id, **kwargs):
|
||||
headers = kwargs.pop('headers', None) or {}
|
||||
return self._download_json(
|
||||
f'https://www.bandlab.com/api/v1.3/{endpoint}/{asset_id}',
|
||||
asset_id, headers={
|
||||
'accept': 'application/json',
|
||||
'referer': 'https://www.bandlab.com/',
|
||||
'x-client-id': 'BandLab-Web',
|
||||
'x-client-version': '10.1.124',
|
||||
**headers,
|
||||
}, **kwargs)
|
||||
|
||||
def _parse_revision(self, revision_data, url=None):
|
||||
return {
|
||||
'vcodec': 'none',
|
||||
'media_type': 'revision',
|
||||
'extractor_key': BandlabIE.ie_key(),
|
||||
'extractor': BandlabIE.IE_NAME,
|
||||
**traverse_obj(revision_data, {
|
||||
'webpage_url': (
|
||||
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/revision/%s')}), filter, any),
|
||||
'id': (('revisionId', 'id'), {str}, any),
|
||||
'title': ('song', 'name', {str}),
|
||||
'track': ('song', 'name', {str}),
|
||||
'url': ('mixdown', 'file', {url_or_none}),
|
||||
'thumbnail': ('song', 'picture', 'url', {url_or_none}),
|
||||
'description': ('description', {str}),
|
||||
'uploader': ('creator', 'name', {str}),
|
||||
'uploader_id': ('creator', 'username', {str}),
|
||||
'timestamp': ('createdOn', {parse_iso8601}),
|
||||
'duration': ('mixdown', 'duration', {float_or_none}),
|
||||
'view_count': ('counters', 'plays', {int_or_none}),
|
||||
'like_count': ('counters', 'likes', {int_or_none}),
|
||||
'comment_count': ('counters', 'comments', {int_or_none}),
|
||||
'genres': ('genres', ..., 'name', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _parse_track(self, track_data, url=None):
|
||||
return {
|
||||
'vcodec': 'none',
|
||||
'media_type': 'track',
|
||||
'extractor_key': BandlabIE.ie_key(),
|
||||
'extractor': BandlabIE.IE_NAME,
|
||||
**traverse_obj(track_data, {
|
||||
'webpage_url': (
|
||||
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
|
||||
'id': (('revisionId', 'id'), {str}, any),
|
||||
'url': ('track', 'sample', 'audioUrl', {url_or_none}),
|
||||
'title': ('track', 'name', {str}),
|
||||
'track': ('track', 'name', {str}),
|
||||
'description': ('caption', {str}),
|
||||
'thumbnail': ('track', 'picture', ('original', 'url'), {url_or_none}, any),
|
||||
'view_count': ('counters', 'plays', {int_or_none}),
|
||||
'like_count': ('counters', 'likes', {int_or_none}),
|
||||
'comment_count': ('counters', 'comments', {int_or_none}),
|
||||
'duration': ('track', 'sample', 'duration', {float_or_none}),
|
||||
'uploader': ('creator', 'name', {str}),
|
||||
'uploader_id': ('creator', 'username', {str}),
|
||||
'timestamp': ('createdOn', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
def _parse_video(self, video_data, url=None):
|
||||
return {
|
||||
'media_type': 'video',
|
||||
'extractor_key': BandlabIE.ie_key(),
|
||||
'extractor': BandlabIE.IE_NAME,
|
||||
**traverse_obj(video_data, {
|
||||
'id': ('id', {str}),
|
||||
'webpage_url': (
|
||||
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
|
||||
'url': ('video', 'url', {url_or_none}),
|
||||
'title': ('caption', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
|
||||
'description': ('caption', {str}),
|
||||
'thumbnail': ('video', 'picture', 'url', {url_or_none}),
|
||||
'view_count': ('video', 'counters', 'plays', {int_or_none}),
|
||||
'like_count': ('video', 'counters', 'likes', {int_or_none}),
|
||||
'comment_count': ('counters', 'comments', {int_or_none}),
|
||||
'duration': ('video', 'duration', {float_or_none}),
|
||||
'uploader': ('creator', 'name', {str}),
|
||||
'uploader_id': ('creator', 'username', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class BandlabIE(BandlabBaseIE):
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?bandlab.com/(?P<url_type>track|post|revision)/(?P<id>[\da-f_-]+)',
|
||||
r'https?://(?:www\.)?bandlab.com/(?P<url_type>embed)/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
|
||||
]
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bandlab.com/track/04b37e88dba24967b9dac8eb8567ff39_07d7f906fc96ee11b75e000d3a428fff',
|
||||
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
|
||||
'info_dict': {
|
||||
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
|
||||
'ext': 'm4a',
|
||||
'uploader_id': 'ender_milze',
|
||||
'track': 'sweet black',
|
||||
'description': 'composed by juanjn3737',
|
||||
'timestamp': 1702171963,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 54.629999999999995,
|
||||
'title': 'sweet black',
|
||||
'upload_date': '20231210',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
|
||||
'genres': ['Lofi'],
|
||||
'uploader': 'ender milze',
|
||||
'comment_count': int,
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}, {
|
||||
# Same track as above but post URL
|
||||
'url': 'https://www.bandlab.com/post/07d7f906-fc96-ee11-b75e-000d3a428fff',
|
||||
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
|
||||
'info_dict': {
|
||||
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
|
||||
'ext': 'm4a',
|
||||
'uploader_id': 'ender_milze',
|
||||
'track': 'sweet black',
|
||||
'description': 'composed by juanjn3737',
|
||||
'timestamp': 1702171973,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'duration': 54.629999999999995,
|
||||
'title': 'sweet black',
|
||||
'upload_date': '20231210',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
|
||||
'genres': ['Lofi'],
|
||||
'uploader': 'ender milze',
|
||||
'comment_count': int,
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}, {
|
||||
# SharedKey Example
|
||||
'url': 'https://www.bandlab.com/track/048916c2-c6da-ee11-85f9-6045bd2e11f9?sharedKey=0NNWX8qYAEmI38lWAzCNDA',
|
||||
'md5': '15174b57c44440e2a2008be9cae00250',
|
||||
'info_dict': {
|
||||
'id': '038916c2-c6da-ee11-85f9-6045bd2e11f9',
|
||||
'ext': 'm4a',
|
||||
'comment_count': int,
|
||||
'genres': ['Other'],
|
||||
'uploader_id': 'user8353034818103753',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/51b18363-da23-4b9b-a29c-2933a3e561ca/',
|
||||
'timestamp': 1709625771,
|
||||
'track': 'PodcastMaerchen4b',
|
||||
'duration': 468.14,
|
||||
'view_count': int,
|
||||
'description': 'Podcast: Neues aus der Märchenwelt',
|
||||
'like_count': int,
|
||||
'upload_date': '20240305',
|
||||
'uploader': 'Erna Wageneder',
|
||||
'title': 'PodcastMaerchen4b',
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}, {
|
||||
# Different Revision selected
|
||||
'url': 'https://www.bandlab.com/track/130343fc-148b-ea11-96d2-0003ffd1fc09?revId=110343fc-148b-ea11-96d2-0003ffd1fc09',
|
||||
'md5': '74e055ef9325d63f37088772fbfe4454',
|
||||
'info_dict': {
|
||||
'id': '110343fc-148b-ea11-96d2-0003ffd1fc09',
|
||||
'ext': 'm4a',
|
||||
'timestamp': 1588273294,
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/b612e533-e4f7-4542-9f50-3fcfd8dd822c/',
|
||||
'description': 'Final Revision.',
|
||||
'title': 'Replay ( Instrumental)',
|
||||
'uploader': 'David R Sparks',
|
||||
'uploader_id': 'davesnothome69',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'track': 'Replay ( Instrumental)',
|
||||
'genres': ['Rock'],
|
||||
'upload_date': '20200430',
|
||||
'like_count': int,
|
||||
'duration': 279.43,
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}, {
|
||||
# Video
|
||||
'url': 'https://www.bandlab.com/post/5cdf9036-3857-ef11-991a-6045bd36e0d9',
|
||||
'md5': '8caa2ef28e86c1dacf167293cfdbeba9',
|
||||
'info_dict': {
|
||||
'id': '5cdf9036-3857-ef11-991a-6045bd36e0d9',
|
||||
'ext': 'mp4',
|
||||
'duration': 44.705,
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/videos/67c6cef1-cef6-40d3-831e-a55bc1dcb972/',
|
||||
'comment_count': int,
|
||||
'title': 'backing vocals',
|
||||
'uploader_id': 'marliashya',
|
||||
'uploader': 'auraa',
|
||||
'like_count': int,
|
||||
'description': 'backing vocals',
|
||||
'media_type': 'video',
|
||||
},
|
||||
}, {
|
||||
# Embed Example
|
||||
'url': 'https://www.bandlab.com/embed/?blur=false&id=014de0a4-7d82-ea11-a94c-0003ffd19c0f',
|
||||
'md5': 'a4ad05cb68c54faaed9b0a8453a8cf4a',
|
||||
'info_dict': {
|
||||
'id': '014de0a4-7d82-ea11-a94c-0003ffd19c0f',
|
||||
'ext': 'm4a',
|
||||
'comment_count': int,
|
||||
'genres': ['Electronic'],
|
||||
'uploader': 'Charlie Henson',
|
||||
'timestamp': 1587328674,
|
||||
'upload_date': '20200419',
|
||||
'view_count': int,
|
||||
'track': 'Positronic Meltdown',
|
||||
'duration': 318.55,
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/87165bc3-5439-496e-b1f7-a9f13b541ff2/',
|
||||
'description': 'Checkout my tracks at AOMX http://aomxsounds.com/',
|
||||
'uploader_id': 'microfreaks',
|
||||
'title': 'Positronic Meltdown',
|
||||
'like_count': int,
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}, {
|
||||
# Track without revisions available
|
||||
'url': 'https://www.bandlab.com/track/55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
|
||||
'md5': 'f05d68a3769952c2d9257c473e14c15f',
|
||||
'info_dict': {
|
||||
'id': '55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
|
||||
'ext': 'm4a',
|
||||
'track': 'insame',
|
||||
'like_count': int,
|
||||
'duration': 84.03,
|
||||
'title': 'insame',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'uploader': 'Sorakime',
|
||||
'uploader_id': 'sorakime',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/572a351a-0f3a-4c6a-ac39-1a5defdeeb1c/',
|
||||
'timestamp': 1691162128,
|
||||
'upload_date': '20230804',
|
||||
'media_type': 'track',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bandlab.com/revision/014de0a4-7d82-ea11-a94c-0003ffd19c0f',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://phantomluigi.github.io/',
|
||||
'info_dict': {
|
||||
'id': 'e14223c3-7871-ef11-bdfd-000d3a980db3',
|
||||
'ext': 'm4a',
|
||||
'view_count': int,
|
||||
'upload_date': '20240913',
|
||||
'uploader_id': 'phantommusicofficial',
|
||||
'timestamp': 1726194897,
|
||||
'uploader': 'Phantom',
|
||||
'comment_count': int,
|
||||
'genres': ['Progresive Rock'],
|
||||
'description': 'md5:a38cd668f7a2843295ef284114f18429',
|
||||
'duration': 225.23,
|
||||
'like_count': int,
|
||||
'title': 'Vermilion Pt. 2 (Cover)',
|
||||
'track': 'Vermilion Pt. 2 (Cover)',
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/62b10750-7aef-4f42-ad08-1af52f577e97/',
|
||||
'media_type': 'revision',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, url_type = self._match_valid_url(url).group('id', 'url_type')
|
||||
|
||||
qs = parse_qs(url)
|
||||
revision_id = traverse_obj(qs, (('revId', 'id'), 0, any))
|
||||
if url_type == 'revision':
|
||||
revision_id = display_id
|
||||
|
||||
revision_data = None
|
||||
if not revision_id:
|
||||
post_data = self._call_api(
|
||||
'posts', display_id, note='Downloading post data',
|
||||
query=traverse_obj(qs, {'sharedKey': ('sharedKey', 0)}))
|
||||
|
||||
revision_id = traverse_obj(post_data, (('revisionId', ('revision', 'id')), {str}, any))
|
||||
revision_data = traverse_obj(post_data, ('revision', {dict}))
|
||||
|
||||
if not revision_data and not revision_id:
|
||||
post_type = post_data.get('type')
|
||||
if post_type == 'Video':
|
||||
return self._parse_video(post_data, url=url)
|
||||
if post_type == 'Track':
|
||||
return self._parse_track(post_data, url=url)
|
||||
raise ExtractorError(f'Could not extract data for post type {post_type!r}')
|
||||
|
||||
if not revision_data:
|
||||
revision_data = self._call_api(
|
||||
'revisions', revision_id, note='Downloading revision data', query={'edit': 'false'})
|
||||
|
||||
return self._parse_revision(revision_data, url=url)
|
||||
|
||||
|
||||
class BandlabPlaylistIE(BandlabBaseIE):
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?bandlab.com/(?:[\w]+/)?(?P<type>albums|collections)/(?P<id>[\da-f-]+)',
|
||||
r'https?://(?:www\.)?bandlab.com/(?P<type>embed)/collection/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
|
||||
]
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bandlab.com/davesnothome69/albums/89b79ea6-de42-ed11-b495-00224845aac7',
|
||||
'info_dict': {
|
||||
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/69507ff3-579a-45be-afca-9e87eddec944/',
|
||||
'release_date': '20221003',
|
||||
'title': 'Remnants',
|
||||
'album': 'Remnants',
|
||||
'like_count': int,
|
||||
'album_type': 'LP',
|
||||
'description': 'A collection of some feel good, rock hits.',
|
||||
'comment_count': int,
|
||||
'view_count': int,
|
||||
'id': '89b79ea6-de42-ed11-b495-00224845aac7',
|
||||
'uploader': 'David R Sparks',
|
||||
'uploader_id': 'davesnothome69',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://www.bandlab.com/slytheband/collections/955102d4-1040-ef11-86c3-000d3a42581b',
|
||||
'info_dict': {
|
||||
'id': '955102d4-1040-ef11-86c3-000d3a42581b',
|
||||
'timestamp': 1720762659,
|
||||
'view_count': int,
|
||||
'title': 'My Shit 🖤',
|
||||
'uploader_id': 'slytheband',
|
||||
'uploader': '𝓢𝓛𝓨',
|
||||
'upload_date': '20240712',
|
||||
'like_count': int,
|
||||
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/collections/2c64ca12-b180-4b76-8587-7a8da76bddc8/',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
}, {
|
||||
# Embeds can contain both albums and collections with the same URL pattern. This is an album
|
||||
'url': 'https://www.bandlab.com/embed/collection/?id=12cc6f7f-951b-ee11-907c-00224844f303',
|
||||
'info_dict': {
|
||||
'id': '12cc6f7f-951b-ee11-907c-00224844f303',
|
||||
'release_date': '20230706',
|
||||
'description': 'This is a collection of songs I created when I had an Amiga computer.',
|
||||
'view_count': int,
|
||||
'title': 'Mark Salud The Amiga Collection',
|
||||
'uploader_id': 'mssirmooth1962',
|
||||
'comment_count': int,
|
||||
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/d618bd7b-0537-40d5-bdd8-61b066e77d59/',
|
||||
'like_count': int,
|
||||
'uploader': 'Mark Salud',
|
||||
'album': 'Mark Salud The Amiga Collection',
|
||||
'album_type': 'LP',
|
||||
},
|
||||
'playlist_count': 24,
|
||||
}, {
|
||||
# Tracks without revision id
|
||||
'url': 'https://www.bandlab.com/embed/collection/?id=e98aafb5-d932-ee11-b8f0-00224844c719',
|
||||
'info_dict': {
|
||||
'like_count': int,
|
||||
'uploader_id': 'sorakime',
|
||||
'comment_count': int,
|
||||
'uploader': 'Sorakime',
|
||||
'view_count': int,
|
||||
'description': 'md5:4ec31c568a5f5a5a2b17572ea64c3825',
|
||||
'release_date': '20230812',
|
||||
'title': 'Art',
|
||||
'album': 'Art',
|
||||
'album_type': 'Album',
|
||||
'id': 'e98aafb5-d932-ee11-b8f0-00224844c719',
|
||||
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/20c890de-e94a-4422-828a-2da6377a13c8/',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
'url': 'https://www.bandlab.com/albums/89b79ea6-de42-ed11-b495-00224845aac7',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _entries(self, album_data):
|
||||
for post in traverse_obj(album_data, ('posts', lambda _, v: v['type'])):
|
||||
post_type = post['type']
|
||||
if post_type == 'Revision':
|
||||
yield self._parse_revision(post.get('revision'))
|
||||
elif post_type == 'Track':
|
||||
yield self._parse_track(post)
|
||||
elif post_type == 'Video':
|
||||
yield self._parse_video(post)
|
||||
else:
|
||||
self.report_warning(f'Skipping unknown post type: "{post_type}"')
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
|
||||
|
||||
endpoints = {
|
||||
'albums': ['albums'],
|
||||
'collections': ['collections'],
|
||||
'embed': ['collections', 'albums'],
|
||||
}.get(playlist_type)
|
||||
for endpoint in endpoints:
|
||||
playlist_data = self._call_api(
|
||||
endpoint, playlist_id, note=f'Downloading {endpoint[:-1]} data',
|
||||
fatal=False, expected_status=404)
|
||||
if not playlist_data.get('errorCode'):
|
||||
playlist_type = endpoint
|
||||
break
|
||||
if error_code := playlist_data.get('errorCode'):
|
||||
raise ExtractorError(f'Could not find playlist data. Error code: "{error_code}"')
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(playlist_data), playlist_id,
|
||||
**traverse_obj(playlist_data, {
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}),
|
||||
'uploader': ('creator', 'name', {str}),
|
||||
'uploader_id': ('creator', 'username', {str}),
|
||||
'timestamp': ('createdOn', {parse_iso8601}),
|
||||
'release_date': ('releaseDate', {lambda x: x.replace('-', '')}, filter),
|
||||
'thumbnail': ('picture', ('original', 'url'), {url_or_none}, any),
|
||||
'like_count': ('counters', 'likes', {int_or_none}),
|
||||
'comment_count': ('counters', 'comments', {int_or_none}),
|
||||
'view_count': ('counters', 'plays', {int_or_none}),
|
||||
}),
|
||||
**(traverse_obj(playlist_data, {
|
||||
'album': ('name', {str}),
|
||||
'album_type': ('type', {str}),
|
||||
}) if playlist_type == 'albums' else {}))
|
@ -18,7 +18,6 @@
|
||||
InAdvancePagedList,
|
||||
OnDemandPagedList,
|
||||
bool_or_none,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
@ -33,6 +32,7 @@
|
||||
parse_qs,
|
||||
parse_resolution,
|
||||
qualities,
|
||||
sanitize_url,
|
||||
smuggle_url,
|
||||
srt_subtitles_timecode,
|
||||
str_or_none,
|
||||
@ -63,7 +63,7 @@ def _check_missing_formats(self, play_info, formats):
|
||||
'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
|
||||
if missing_formats:
|
||||
self.to_screen(
|
||||
f'Format(s) {missing_formats} are missing; you have to login or '
|
||||
f'Format(s) {missing_formats} are missing; you have to '
|
||||
f'become a premium member to download them. {self._login_hint()}')
|
||||
|
||||
def extract_formats(self, play_info):
|
||||
@ -165,14 +165,18 @@ def _sign_wbi(self, params, video_id):
|
||||
params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
|
||||
return params
|
||||
|
||||
def _download_playinfo(self, bvid, cid, headers=None, qn=None):
|
||||
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
|
||||
if qn:
|
||||
params['qn'] = qn
|
||||
def _download_playinfo(self, bvid, cid, headers=None, query=None):
|
||||
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048, **(query or {})}
|
||||
if self.is_logged_in:
|
||||
params.pop('try_look', None)
|
||||
if qn := params.get('qn'):
|
||||
note = f'Downloading video format {qn} for cid {cid}'
|
||||
else:
|
||||
note = f'Downloading video formats for cid {cid}'
|
||||
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/player/wbi/playurl', bvid,
|
||||
query=self._sign_wbi(params, bvid), headers=headers,
|
||||
note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
|
||||
query=self._sign_wbi(params, bvid), headers=headers, note=note)['data']
|
||||
|
||||
def json2srt(self, json_data):
|
||||
srt_data = ''
|
||||
@ -191,7 +195,7 @@ def _get_subtitles(self, video_id, cid, aid=None):
|
||||
}
|
||||
|
||||
video_info = self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', video_id,
|
||||
'https://api.bilibili.com/x/player/wbi/v2', video_id,
|
||||
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
|
||||
note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
|
||||
if traverse_obj(video_info, ('data', 'need_login_subtitle')):
|
||||
@ -207,7 +211,7 @@ def _get_subtitles(self, video_id, cid, aid=None):
|
||||
|
||||
def _get_chapters(self, aid, cid):
|
||||
chapters = aid and cid and self._download_json(
|
||||
'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
|
||||
'https://api.bilibili.com/x/player/wbi/v2', aid, query={'aid': aid, 'cid': cid},
|
||||
note='Extracting chapters', fatal=False, headers=self._HEADERS)
|
||||
return traverse_obj(chapters, ('data', 'view_points', ..., {
|
||||
'title': 'content',
|
||||
@ -286,7 +290,7 @@ def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
|
||||
('data', 'interaction', 'graph_version', {int_or_none}))
|
||||
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
|
||||
for cid, edges in cid_edges.items():
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1})
|
||||
yield {
|
||||
**metainfo,
|
||||
'id': f'{video_id}_{cid}',
|
||||
@ -639,40 +643,29 @@ def _real_extract(self, url):
|
||||
headers['Referer'] = url
|
||||
|
||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
|
||||
self.raise_login_required()
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
|
||||
raise ExtractorError(
|
||||
'This video may be deleted or geo-restricted. '
|
||||
'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
|
||||
|
||||
is_festival = 'videoData' not in initial_state
|
||||
if is_festival:
|
||||
video_data = initial_state['videoInfo']
|
||||
else:
|
||||
play_info_obj = self._search_json(
|
||||
r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
|
||||
if not play_info_obj:
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
|
||||
self.raise_login_required()
|
||||
if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
|
||||
raise ExtractorError(
|
||||
'This video may be deleted or geo-restricted. '
|
||||
'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
|
||||
play_info = traverse_obj(play_info_obj, ('data', {dict}))
|
||||
if not play_info:
|
||||
if traverse_obj(play_info_obj, 'code') == 87007:
|
||||
toast = get_element_by_class('tips-toast', webpage) or ''
|
||||
msg = clean_html(
|
||||
f'{get_element_by_class("belongs-to", toast) or ""},'
|
||||
+ (get_element_by_class('level', toast) or ''))
|
||||
raise ExtractorError(
|
||||
f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
|
||||
raise ExtractorError('Failed to extract play info')
|
||||
video_data = initial_state['videoData']
|
||||
|
||||
video_id, title = video_data['bvid'], video_data.get('title')
|
||||
|
||||
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
|
||||
page_list_json = not is_festival and traverse_obj(
|
||||
page_list_json = (not is_festival and traverse_obj(
|
||||
self._download_json(
|
||||
'https://api.bilibili.com/x/player/pagelist', video_id,
|
||||
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
|
||||
note='Extracting videos in anthology', headers=headers),
|
||||
'data', expected_type=list) or []
|
||||
'data', expected_type=list)) or []
|
||||
is_anthology = len(page_list_json) > 1
|
||||
|
||||
part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
|
||||
@ -691,8 +684,6 @@ def _real_extract(self, url):
|
||||
|
||||
festival_info = {}
|
||||
if is_festival:
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||
|
||||
festival_info = traverse_obj(initial_state, {
|
||||
'uploader': ('videoInfo', 'upName'),
|
||||
'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
|
||||
@ -727,62 +718,79 @@ def _real_extract(self, url):
|
||||
self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
|
||||
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
|
||||
__post_extractor=self.extract_comments(aid))
|
||||
else:
|
||||
formats = self.extract_formats(play_info)
|
||||
|
||||
if not traverse_obj(play_info, ('dash')):
|
||||
# we only have legacy formats and need additional work
|
||||
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
|
||||
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
|
||||
formats.extend(traverse_obj(
|
||||
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
|
||||
lambda _, v: not has_qn(v['quality'])))
|
||||
self._check_missing_formats(play_info, formats)
|
||||
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
|
||||
if flv_formats and len(flv_formats) < len(formats):
|
||||
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
|
||||
if not self._configuration_arg('prefer_multi_flv'):
|
||||
dropped_fmts = ', '.join(
|
||||
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
|
||||
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
|
||||
if dropped_fmts:
|
||||
self.to_screen(
|
||||
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
|
||||
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
|
||||
else:
|
||||
formats = traverse_obj(
|
||||
# XXX: Filtering by extractor-arg is for testing purposes
|
||||
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
|
||||
) or [max(flv_formats, key=lambda x: x['quality'])]
|
||||
play_info = None
|
||||
if self.is_logged_in:
|
||||
play_info = traverse_obj(
|
||||
self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id, default=None),
|
||||
('data', {dict}))
|
||||
if not play_info:
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1})
|
||||
formats = self.extract_formats(play_info)
|
||||
|
||||
if traverse_obj(formats, (0, 'fragments')):
|
||||
# We have flv formats, which are individual short videos with their own timestamps and metainfo
|
||||
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
|
||||
return {
|
||||
**metainfo,
|
||||
'_type': 'multi_video',
|
||||
'entries': [{
|
||||
'id': f'{metainfo["id"]}_{idx}',
|
||||
'title': metainfo['title'],
|
||||
'http_headers': metainfo['http_headers'],
|
||||
'formats': [{
|
||||
**fragment,
|
||||
'format_id': formats[0].get('format_id'),
|
||||
}],
|
||||
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
|
||||
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
|
||||
} for idx, fragment in enumerate(formats[0]['fragments'])],
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
}
|
||||
else:
|
||||
return {
|
||||
**metainfo,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
}
|
||||
if video_data.get('is_upower_exclusive'):
|
||||
high_level = traverse_obj(initial_state, ('elecFullInfo', 'show_info', 'high_level', {dict})) or {}
|
||||
msg = f'{join_nonempty("title", "sub_title", from_dict=high_level, delim=",")}. {self._login_hint()}'
|
||||
if not formats:
|
||||
raise ExtractorError(f'This is a supporter-only video: {msg}', expected=True)
|
||||
if '试看' in traverse_obj(play_info, ('accept_description', ..., {str})):
|
||||
self.report_warning(
|
||||
f'This is a supporter-only video, only the preview will be extracted: {msg}',
|
||||
video_id=video_id)
|
||||
|
||||
if not traverse_obj(play_info, 'dash'):
|
||||
# we only have legacy formats and need additional work
|
||||
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
|
||||
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
|
||||
formats.extend(traverse_obj(
|
||||
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, query={'qn': qn})),
|
||||
lambda _, v: not has_qn(v['quality'])))
|
||||
self._check_missing_formats(play_info, formats)
|
||||
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
|
||||
if flv_formats and len(flv_formats) < len(formats):
|
||||
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
|
||||
if not self._configuration_arg('prefer_multi_flv'):
|
||||
dropped_fmts = ', '.join(
|
||||
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
|
||||
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
|
||||
if dropped_fmts:
|
||||
self.to_screen(
|
||||
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
|
||||
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
|
||||
else:
|
||||
formats = traverse_obj(
|
||||
# XXX: Filtering by extractor-arg is for testing purposes
|
||||
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
|
||||
) or [max(flv_formats, key=lambda x: x['quality'])]
|
||||
|
||||
if traverse_obj(formats, (0, 'fragments')):
|
||||
# We have flv formats, which are individual short videos with their own timestamps and metainfo
|
||||
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
|
||||
return {
|
||||
**metainfo,
|
||||
'_type': 'multi_video',
|
||||
'entries': [{
|
||||
'id': f'{metainfo["id"]}_{idx}',
|
||||
'title': metainfo['title'],
|
||||
'http_headers': metainfo['http_headers'],
|
||||
'formats': [{
|
||||
**fragment,
|
||||
'format_id': formats[0].get('format_id'),
|
||||
}],
|
||||
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
|
||||
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
|
||||
} for idx, fragment in enumerate(formats[0]['fragments'])],
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
}
|
||||
|
||||
return {
|
||||
**metainfo,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'chapters': self._get_chapters(aid, cid),
|
||||
'subtitles': self.extract_subtitles(video_id, cid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
}
|
||||
|
||||
|
||||
class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
@ -860,10 +868,16 @@ def _real_extract(self, url):
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
headers['Referer'] = url
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
|
||||
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
|
||||
headers=headers)
|
||||
|
||||
play_info = (
|
||||
self._search_json(
|
||||
r'playurlSSRData\s*=', webpage, 'embedded page info', episode_id,
|
||||
end_pattern='\n', default=None)
|
||||
or self._download_json(
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
|
||||
'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id},
|
||||
headers=headers))
|
||||
|
||||
premium_only = play_info.get('code') == -10403
|
||||
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
|
||||
|
||||
@ -1848,6 +1862,47 @@ def _real_extract(self, url):
|
||||
ie=BiliBiliIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
||||
class BiliBiliDynamicIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:t\.bilibili\.com|(?:www\.)?bilibili\.com/opus)/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://t.bilibili.com/998134289197432852',
|
||||
'info_dict': {
|
||||
'id': 'BV1TAmBYVEJr',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': '1192648858',
|
||||
'comment_count': int,
|
||||
'_old_archive_ids': ['bilibili 113457567568273_part1'],
|
||||
'thumbnail': 'http://i2.hdslb.com/bfs/archive/50091efd965d9f13ff6814f7ad374f90ab21e77d.jpg',
|
||||
'duration': 929.238,
|
||||
'upload_date': '20241110',
|
||||
'uploader': '何同学工作室',
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'title': '美国小朋友就玩这个?!何同学工作室11月开箱',
|
||||
'description': '本期产品信息:\n机器狗\n气味模拟器\nCloudboom Strike LS\n无弦吉他\n蓝牙磁带音箱\n神奇画板',
|
||||
'timestamp': 1731232800,
|
||||
'tags': list,
|
||||
'chapters': list,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
post_id = self._match_id(url)
|
||||
# Without the newer chrome UA, the API will return an error (-352)
|
||||
post_data = self._download_json(
|
||||
'https://api.bilibili.com/x/polymer/web-dynamic/v1/detail', post_id,
|
||||
query={'id': post_id}, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||
})
|
||||
video_url = traverse_obj(post_data, (
|
||||
'data', 'item', (None, 'orig'), 'modules', 'module_dynamic',
|
||||
(('major', ('archive', 'pgc')), ('additional', ('reserve', 'common'))),
|
||||
'jump_url', {url_or_none}, any, {sanitize_url}))
|
||||
if not video_url or (self.suitable(video_url) and post_id == self._match_id(video_url)):
|
||||
raise ExtractorError('No valid video URL found', expected=True)
|
||||
return self.url_result(video_url)
|
||||
|
||||
|
||||
class BiliIntlBaseIE(InfoExtractor):
|
||||
_API_URL = 'https://api.bilibili.tv/intl/gateway'
|
||||
_NETRC_MACHINE = 'biliintl'
|
||||
|
@ -88,7 +88,7 @@ class BlueskyIE(InfoExtractor):
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/de1.pds.tentacle.expert/post/3l3w4tnezek2e',
|
||||
'md5': '1af9c7fda061cf7593bbffca89e43d1c',
|
||||
'md5': 'cc0110ed1f6b0247caac8234cc1e861d',
|
||||
'info_dict': {
|
||||
'id': '3l3w4tnezek2e',
|
||||
'ext': 'mp4',
|
||||
@ -133,6 +133,8 @@ class BlueskyIE(InfoExtractor):
|
||||
'channel_follower_count': int,
|
||||
'categories': ['Entertainment'],
|
||||
'tags': [],
|
||||
'chapters': list,
|
||||
'heatmap': 'count:100',
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
@ -184,14 +186,14 @@ class BlueskyIE(InfoExtractor):
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bsky.app/profile/alt.bun.how/post/3l7rdfxhyds2f',
|
||||
'url': 'https://bsky.app/profile/cinny.bun.how/post/3l7rdfxhyds2f',
|
||||
'md5': '8775118b235cf9fa6b5ad30f95cda75c',
|
||||
'info_dict': {
|
||||
'id': '3l7rdfxhyds2f',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'cinnamon',
|
||||
'uploader_id': 'alt.bun.how',
|
||||
'uploader_url': 'https://bsky.app/profile/alt.bun.how',
|
||||
'uploader_id': 'cinny.bun.how',
|
||||
'uploader_url': 'https://bsky.app/profile/cinny.bun.how',
|
||||
'channel_id': 'did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||
'channel_url': 'https://bsky.app/profile/did:plc:7x6rtuenkuvxq3zsvffp2ide',
|
||||
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
|
||||
@ -341,6 +343,7 @@ def _extract_videos(self, root, video_id, embed_path='embed', record_path='recor
|
||||
|
||||
formats.append({
|
||||
'format_id': 'blob',
|
||||
'quality': 1,
|
||||
'url': update_url_query(
|
||||
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': video_cid}),
|
||||
**traverse_obj(root, (*embed_path, 'aspectRatio', {
|
||||
|
@ -31,6 +31,7 @@
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BrightcoveLegacyIE(InfoExtractor):
|
||||
@ -935,8 +936,8 @@ def extract_policy_key():
|
||||
|
||||
if content_type == 'playlist':
|
||||
return self.playlist_result(
|
||||
[self._parse_brightcove_metadata(vid, vid.get('id'), headers)
|
||||
for vid in json_data.get('videos', []) if vid.get('id')],
|
||||
(self._parse_brightcove_metadata(vid, vid['id'], headers)
|
||||
for vid in traverse_obj(json_data, ('videos', lambda _, v: v['id']))),
|
||||
json_data.get('id'), json_data.get('name'),
|
||||
json_data.get('description'))
|
||||
|
||||
|
@ -5,11 +5,12 @@
|
||||
ExtractorError,
|
||||
lowercase_escape,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class ChaturbateIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.(?P<tld>com|eu|global)/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.chaturbate.com/siswet19/',
|
||||
'info_dict': {
|
||||
@ -29,16 +30,58 @@ class ChaturbateIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://en.chaturbate.com/siswet19/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://chaturbate.eu/siswet19/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://chaturbate.eu/fullvideo/?b=caylin',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://chaturbate.global/siswet19/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_ROOM_OFFLINE = 'Room is currently offline'
|
||||
_ERROR_MAP = {
|
||||
'offline': 'Room is currently offline',
|
||||
'private': 'Room is currently in a private show',
|
||||
'away': 'Performer is currently away',
|
||||
'password protected': 'Room is password protected',
|
||||
'hidden': 'Hidden session in progress',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
def _extract_from_api(self, video_id, tld):
|
||||
response = self._download_json(
|
||||
f'https://chaturbate.{tld}/get_edge_hls_url_ajax/', video_id,
|
||||
data=urlencode_postdata({'room_slug': video_id}),
|
||||
headers={
|
||||
**self.geo_verification_headers(),
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Accept': 'application/json',
|
||||
}, fatal=False, impersonate=True) or {}
|
||||
|
||||
m3u8_url = response.get('url')
|
||||
if not m3u8_url:
|
||||
status = response.get('room_status')
|
||||
if error := self._ERROR_MAP.get(status):
|
||||
raise ExtractorError(error, expected=True)
|
||||
if status == 'public':
|
||||
self.raise_geo_restricted()
|
||||
self.report_warning(f'Got status "{status}" from API; falling back to webpage extraction')
|
||||
return None
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'thumbnail': f'https://roomimg.stream.highwebmedia.com/ri/{video_id}.jpg',
|
||||
'is_live': True,
|
||||
'age_limit': 18,
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
|
||||
}
|
||||
|
||||
def _extract_from_html(self, video_id, tld):
|
||||
webpage = self._download_webpage(
|
||||
f'https://chaturbate.com/{video_id}/', video_id,
|
||||
headers=self.geo_verification_headers())
|
||||
f'https://chaturbate.{tld}/{video_id}/', video_id,
|
||||
headers=self.geo_verification_headers(), impersonate=True)
|
||||
|
||||
found_m3u8_urls = []
|
||||
|
||||
@ -76,8 +119,8 @@ def _real_extract(self, url):
|
||||
webpage, 'error', group='error', default=None)
|
||||
if not error:
|
||||
if any(p in webpage for p in (
|
||||
self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')):
|
||||
error = self._ROOM_OFFLINE
|
||||
self._ERROR_MAP['offline'], 'offline_tipping', 'tip_offline')):
|
||||
error = self._ERROR_MAP['offline']
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
raise ExtractorError('Unable to find stream URL')
|
||||
@ -104,3 +147,7 @@ def _real_extract(self, url):
|
||||
'is_live': True,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, tld = self._match_valid_url(url).group('id', 'tld')
|
||||
return self._extract_from_api(video_id, tld) or self._extract_from_html(video_id, tld)
|
||||
|
@ -8,7 +8,7 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
|
||||
_EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video='
|
||||
_ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+'
|
||||
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
|
||||
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}(?P<domain>{_DOMAIN_RE})/|{_EMBED_RE})(?P<id>{_ID_RE})'
|
||||
_EMBED_REGEX = [
|
||||
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1',
|
||||
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
|
||||
@ -19,7 +19,7 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
'id': '31c9291ab41fac05471db4e73aa11717',
|
||||
'ext': 'mp4',
|
||||
'title': '31c9291ab41fac05471db4e73aa11717',
|
||||
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
|
||||
'thumbnail': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
@ -30,7 +30,7 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
'id': '0e8e040aec776862e1d632a699edf59e',
|
||||
'ext': 'mp4',
|
||||
'title': '0e8e040aec776862e1d632a699edf59e',
|
||||
'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
|
||||
'thumbnail': 'https://cloudflarestream.com/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
|
||||
@ -54,7 +54,7 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
'id': 'eaef9dea5159cf968be84241b5cedfe7',
|
||||
'ext': 'mp4',
|
||||
'title': 'eaef9dea5159cf968be84241b5cedfe7',
|
||||
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
|
||||
'thumbnail': 'https://cloudflarestream.com/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
@ -62,8 +62,9 @@ class CloudflareStreamIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net'
|
||||
video_id, domain = self._match_valid_url(url).group('id', 'domain')
|
||||
if domain != 'bytehighway.net':
|
||||
domain = 'cloudflarestream.com'
|
||||
base_url = f'https://{domain}/{video_id}/'
|
||||
if '.' in video_id:
|
||||
video_id = self._parse_json(base64.urlsafe_b64decode(
|
||||
|
@ -25,7 +25,6 @@
|
||||
from ..compat import (
|
||||
compat_etree_fromstring,
|
||||
compat_expanduser,
|
||||
compat_os_name,
|
||||
urllib_req_to_req,
|
||||
)
|
||||
from ..cookies import LenientSimpleCookie
|
||||
@ -279,6 +278,7 @@ class InfoExtractor:
|
||||
thumbnails: A list of dictionaries, with the following entries:
|
||||
* "id" (optional, string) - Thumbnail format ID
|
||||
* "url"
|
||||
* "ext" (optional, string) - actual image extension if not given in URL
|
||||
* "preference" (optional, int) - quality of the image
|
||||
* "width" (optional, int)
|
||||
* "height" (optional, int)
|
||||
@ -1028,7 +1028,7 @@ def _request_dump_filename(self, url, video_id, data=None):
|
||||
filename = sanitize_filename(f'{basen}.dump', restricted=True)
|
||||
# Working around MAX_PATH limitation on Windows (see
|
||||
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
|
||||
if compat_os_name == 'nt':
|
||||
if os.name == 'nt':
|
||||
absfilepath = os.path.abspath(filename)
|
||||
if len(absfilepath) > 259:
|
||||
filename = fR'\\?\{absfilepath}'
|
||||
@ -1854,12 +1854,26 @@ def _check_formats(self, formats, video_id):
|
||||
|
||||
@staticmethod
|
||||
def _remove_duplicate_formats(formats):
|
||||
format_urls = set()
|
||||
seen_urls = set()
|
||||
seen_fragment_urls = set()
|
||||
unique_formats = []
|
||||
for f in formats:
|
||||
if f['url'] not in format_urls:
|
||||
format_urls.add(f['url'])
|
||||
fragments = f.get('fragments')
|
||||
if callable(fragments):
|
||||
unique_formats.append(f)
|
||||
|
||||
elif fragments:
|
||||
fragment_urls = frozenset(
|
||||
fragment.get('url') or urljoin(f['fragment_base_url'], fragment['path'])
|
||||
for fragment in fragments)
|
||||
if fragment_urls not in seen_fragment_urls:
|
||||
seen_fragment_urls.add(fragment_urls)
|
||||
unique_formats.append(f)
|
||||
|
||||
elif f['url'] not in seen_urls:
|
||||
seen_urls.add(f['url'])
|
||||
unique_formats.append(f)
|
||||
|
||||
formats[:] = unique_formats
|
||||
|
||||
def _is_valid_url(self, url, video_id, item='video', headers={}):
|
||||
@ -3767,7 +3781,7 @@ def _merge_subtitles(cls, *dicts, target=None):
|
||||
""" Merge subtitle dictionaries, language by language. """
|
||||
if target is None:
|
||||
target = {}
|
||||
for d in dicts:
|
||||
for d in filter(None, dicts):
|
||||
for lang, subs in d.items():
|
||||
target[lang] = cls._merge_subtitle_items(target.get(lang, []), subs)
|
||||
return target
|
||||
@ -3789,7 +3803,7 @@ def _cookies_passed(self):
|
||||
def mark_watched(self, *args, **kwargs):
|
||||
if not self.get_param('mark_watched', False):
|
||||
return
|
||||
if self.supports_login() and self._get_login_info()[0] is not None or self._cookies_passed:
|
||||
if (self.supports_login() and self._get_login_info()[0] is not None) or self._cookies_passed:
|
||||
self._mark_watched(*args, **kwargs)
|
||||
|
||||
def _mark_watched(self, *args, **kwargs):
|
||||
|
@ -1,14 +1,27 @@
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import orderedSet
|
||||
from .ninecninemedia import NineCNineMediaIE
|
||||
from ..utils import extract_attributes, orderedSet
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class CTVNewsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)'
|
||||
_BASE_REGEX = r'https?://(?:[^.]+\.)?ctvnews\.ca/'
|
||||
_VIDEO_ID_RE = r'(?P<id>\d{5,})'
|
||||
_PLAYLIST_ID_RE = r'(?P<id>\d\.\d{5,})'
|
||||
_VALID_URL = [
|
||||
rf'{_BASE_REGEX}video/c{_VIDEO_ID_RE}',
|
||||
rf'{_BASE_REGEX}video(?:-gallery)?/?\?clipId={_VIDEO_ID_RE}',
|
||||
rf'{_BASE_REGEX}video/?\?(?:playlist|bin)Id={_PLAYLIST_ID_RE}',
|
||||
rf'{_BASE_REGEX}(?!video/)[^?#]*?{_PLAYLIST_ID_RE}/?(?:$|[?#])',
|
||||
rf'{_BASE_REGEX}(?!video/)[^?#]+\?binId={_PLAYLIST_ID_RE}',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://www.ctvnews.ca/video?clipId=901995',
|
||||
'md5': '9b8624ba66351a23e0b6e1391971f9af',
|
||||
'md5': 'b608f466c7fa24b9666c6439d766ab7e',
|
||||
'info_dict': {
|
||||
'id': '901995',
|
||||
'ext': 'flv',
|
||||
@ -16,6 +29,33 @@ class CTVNewsIE(InfoExtractor):
|
||||
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
|
||||
'timestamp': 1467286284,
|
||||
'upload_date': '20160630',
|
||||
'categories': [],
|
||||
'season_number': 0,
|
||||
'season': 'Season 0',
|
||||
'tags': [],
|
||||
'series': 'CTV News National | Archive | Stories 2',
|
||||
'season_id': '57981',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'duration': 764.631,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://barrie.ctvnews.ca/video/c3030933-here_s-what_s-making-news-for-nov--15?binId=1272429',
|
||||
'md5': '8b8c2b33c5c1803e3c26bc74ff8694d5',
|
||||
'info_dict': {
|
||||
'id': '3030933',
|
||||
'ext': 'flv',
|
||||
'title': 'Here’s what’s making news for Nov. 15',
|
||||
'description': 'Here are the top stories we’re working on for CTV News at 11 for Nov. 15',
|
||||
'thumbnail': 'http://images2.9c9media.com/image_asset/2021_2_22_a602e68e-1514-410e-a67a-e1f7cccbacab_png_2000x1125.jpg',
|
||||
'season_id': '58104',
|
||||
'season_number': 0,
|
||||
'tags': [],
|
||||
'season': 'Season 0',
|
||||
'categories': [],
|
||||
'series': 'CTV News Barrie',
|
||||
'upload_date': '20241116',
|
||||
'duration': 42.943,
|
||||
'timestamp': 1731722452,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
|
||||
@ -31,6 +71,72 @@ class CTVNewsIE(InfoExtractor):
|
||||
'id': '1.2876780',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
}, {
|
||||
'url': 'https://www.ctvnews.ca/it-s-been-23-years-since-toronto-called-in-the-army-after-a-major-snowstorm-1.5736957',
|
||||
'info_dict':
|
||||
{
|
||||
'id': '1.5736957',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}, {
|
||||
'url': 'https://www.ctvnews.ca/business/respondents-to-bank-of-canada-questionnaire-largely-oppose-creating-a-digital-loonie-1.6665797',
|
||||
'md5': '24bc4b88cdc17d8c3fc01dfc228ab72c',
|
||||
'info_dict': {
|
||||
'id': '2695026',
|
||||
'ext': 'flv',
|
||||
'season_id': '89852',
|
||||
'series': 'From CTV News Channel',
|
||||
'description': 'md5:796a985a23cacc7e1e2fafefd94afd0a',
|
||||
'season': '2023',
|
||||
'title': 'Bank of Canada asks public about digital currency',
|
||||
'categories': [],
|
||||
'tags': [],
|
||||
'upload_date': '20230526',
|
||||
'season_number': 2023,
|
||||
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
|
||||
'timestamp': 1685105157,
|
||||
'duration': 253.553,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://stox.ctvnews.ca/video-gallery?clipId=582589',
|
||||
'md5': '135cc592df607d29dddc931f1b756ae2',
|
||||
'info_dict': {
|
||||
'id': '582589',
|
||||
'ext': 'flv',
|
||||
'categories': [],
|
||||
'timestamp': 1427906183,
|
||||
'season_number': 0,
|
||||
'duration': 125.559,
|
||||
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
|
||||
'series': 'CTV News Stox',
|
||||
'description': 'CTV original footage of the rise and fall of the Berlin Wall.',
|
||||
'title': 'Berlin Wall',
|
||||
'season_id': '63817',
|
||||
'season': 'Season 0',
|
||||
'tags': [],
|
||||
'upload_date': '20150401',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ottawa.ctvnews.ca/features/regional-contact/regional-contact-archive?binId=1.1164587#3023759',
|
||||
'md5': 'a14c0603557decc6531260791c23cc5e',
|
||||
'info_dict': {
|
||||
'id': '3023759',
|
||||
'ext': 'flv',
|
||||
'season_number': 2024,
|
||||
'timestamp': 1731798000,
|
||||
'season': '2024',
|
||||
'episode': 'Episode 125',
|
||||
'description': 'CTV News Ottawa at Six',
|
||||
'duration': 2712.076,
|
||||
'episode_number': 125,
|
||||
'upload_date': '20241116',
|
||||
'title': 'CTV News Ottawa at Six for Saturday, November 16, 2024',
|
||||
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
|
||||
'categories': [],
|
||||
'tags': [],
|
||||
'series': 'CTV News Ottawa at Six',
|
||||
'season_id': '92667',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.ctvnews.ca/1.810401',
|
||||
'only_matching': True,
|
||||
@ -42,29 +148,35 @@ class CTVNewsIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _ninecninemedia_url_result(self, clip_id):
|
||||
return self.url_result(f'9c9media:ctvnews_web:{clip_id}', NineCNineMediaIE, clip_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
|
||||
def ninecninemedia_url_result(clip_id):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': clip_id,
|
||||
'url': f'9c9media:ctvnews_web:{clip_id}',
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
||||
if mobj := re.fullmatch(self._VIDEO_ID_RE, urllib.parse.urlparse(url).fragment):
|
||||
page_id = mobj.group('id')
|
||||
|
||||
if page_id.isdigit():
|
||||
return ninecninemedia_url_result(page_id)
|
||||
else:
|
||||
webpage = self._download_webpage(f'http://www.ctvnews.ca/{page_id}', page_id, query={
|
||||
'ot': 'example.AjaxPageLayout.ot',
|
||||
'maxItemsPerPage': 1000000,
|
||||
})
|
||||
entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet(
|
||||
re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
|
||||
if not entries:
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
if 'getAuthStates("' in webpage:
|
||||
entries = [ninecninemedia_url_result(clip_id) for clip_id in
|
||||
self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')]
|
||||
return self.playlist_result(entries, page_id)
|
||||
if re.fullmatch(self._VIDEO_ID_RE, page_id):
|
||||
return self._ninecninemedia_url_result(page_id)
|
||||
|
||||
webpage = self._download_webpage(f'https://www.ctvnews.ca/{page_id}', page_id, query={
|
||||
'ot': 'example.AjaxPageLayout.ot',
|
||||
'maxItemsPerPage': 1000000,
|
||||
})
|
||||
entries = [self._ninecninemedia_url_result(clip_id)
|
||||
for clip_id in orderedSet(re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
|
||||
if not entries:
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
if 'getAuthStates("' in webpage:
|
||||
entries = [self._ninecninemedia_url_result(clip_id) for clip_id in
|
||||
self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')]
|
||||
else:
|
||||
entries = [
|
||||
self._ninecninemedia_url_result(clip_id) for clip_id in
|
||||
traverse_obj(webpage, (
|
||||
{find_element(tag='jasper-player-container', html=True)},
|
||||
{extract_attributes}, 'axis-ids', {json.loads}, ..., 'axisId', {str}))
|
||||
]
|
||||
|
||||
return self.playlist_result(entries, page_id)
|
||||
|
@ -1,7 +1,4 @@
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
@ -31,9 +28,6 @@ def _real_extract(self, url):
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
|
||||
# request setClientTimezone.php to get PHPSESSID cookie which is need to get valid json data in the next request
|
||||
self._request_webpage(HEADRequest(
|
||||
'http://www.cultureunplugged.com/setClientTimezone.php?timeOffset=%d' % -(time.timezone / 3600)), display_id)
|
||||
movie_data = self._download_json(
|
||||
f'http://www.cultureunplugged.com/movie-data/cu-{video_id}.json', display_id)
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
import functools
|
||||
import hashlib
|
||||
import re
|
||||
import time
|
||||
@ -51,6 +52,15 @@ class DacastVODIE(DacastBaseIE):
|
||||
'thumbnail': 'https://universe-files.dacast.com/26137208-5858-65c1-5e9a-9d6b6bd2b6c2',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, { # /uspaes/ in hls_url
|
||||
'url': 'https://iframe.dacast.com/vod/f9823fc6-faba-b98f-0d00-4a7b50a58c5b/348c5c84-b6af-4859-bb9d-1d01009c795b',
|
||||
'info_dict': {
|
||||
'id': '348c5c84-b6af-4859-bb9d-1d01009c795b',
|
||||
'ext': 'mp4',
|
||||
'title': 'pl1-edyta-rubas-211124.mp4',
|
||||
'uploader_id': 'f9823fc6-faba-b98f-0d00-4a7b50a58c5b',
|
||||
'thumbnail': 'https://universe-files.dacast.com/4d0bd042-a536-752d-fc34-ad2fa44bbcbb.png',
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.dacast.com/support/knowledgebase/how-can-i-embed-a-video-on-my-website/',
|
||||
@ -74,6 +84,15 @@ class DacastVODIE(DacastBaseIE):
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
@functools.cached_property
|
||||
def _usp_signing_secret(self):
|
||||
player_js = self._download_webpage(
|
||||
'https://player.dacast.com/js/player.js', None, 'Downloading player JS')
|
||||
# Rotates every so often, but hardcode a fallback in case of JS change/breakage before rotation
|
||||
return self._search_regex(
|
||||
r'\bUSP_SIGNING_SECRET\s*=\s*(["\'])(?P<secret>(?:(?!\1).)+)', player_js,
|
||||
'usp signing secret', group='secret', fatal=False) or 'odnInCGqhvtyRTtIiddxtuRtawYYICZP'
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
|
||||
query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'}
|
||||
@ -94,10 +113,10 @@ def _real_extract(self, url):
|
||||
if 'DRM_EXT' in hls_url:
|
||||
self.report_drm(video_id)
|
||||
elif '/uspaes/' in hls_url:
|
||||
# From https://player.dacast.com/js/player.js
|
||||
# Ref: https://player.dacast.com/js/player.js
|
||||
ts = int(time.time())
|
||||
signature = hashlib.sha1(
|
||||
f'{10413792000 - ts}{ts}YfaKtquEEpDeusCKbvYszIEZnWmBcSvw').digest().hex()
|
||||
f'{10413792000 - ts}{ts}{self._usp_signing_secret}'.encode()).digest().hex()
|
||||
hls_aes['uri'] = f'https://keys.dacast.com/uspaes/{video_id}.key?s={signature}&ts={ts}'
|
||||
|
||||
for retry in self.RetryManager():
|
||||
|
@ -261,6 +261,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'tags': [],
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:https://\w+.dmcdn.net/v/WnEY61cmvMxt2Fi6d/x1080',
|
||||
},
|
||||
}, {
|
||||
# https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj
|
||||
@ -288,6 +289,25 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
'description': 'À bord du « véloto », l’alternative à la voiture pour la campagne',
|
||||
'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'],
|
||||
},
|
||||
}, {
|
||||
# https://geo.dailymotion.com/player/xry80.html?video=x8vu47w
|
||||
'url': 'https://www.metatube.com/en/videos/546765/This-frogs-decorates-Christmas-tree/',
|
||||
'info_dict': {
|
||||
'id': 'x8vu47w',
|
||||
'ext': 'mp4',
|
||||
'like_count': int,
|
||||
'uploader': 'Metatube',
|
||||
'thumbnail': r're:https://\w+.dmcdn.net/v/W1G_S1coGSFTfkTeR/x1080',
|
||||
'upload_date': '20240326',
|
||||
'view_count': int,
|
||||
'timestamp': 1711496732,
|
||||
'age_limit': 0,
|
||||
'uploader_id': 'x2xpy74',
|
||||
'title': 'Está lindas ranitas ponen su arbolito',
|
||||
'duration': 28,
|
||||
'description': 'Que lindura',
|
||||
'tags': [],
|
||||
},
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
_COMMON_MEDIA_FIELDS = '''description
|
||||
@ -302,7 +322,7 @@ def _extract_embed_urls(cls, url, webpage):
|
||||
yield from super()._extract_embed_urls(url, webpage)
|
||||
for mobj in re.finditer(
|
||||
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
|
||||
yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
|
||||
yield 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
|
||||
for mobj in re.finditer(
|
||||
r'(?s)<script [^>]*\bsrc=(["\'])(?:https?:)?//[\w-]+\.dailymotion\.com/player/(?:(?!\1).)+\1[^>]*>', webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
|
@ -1,7 +1,10 @@
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
jwt_decode_hs256,
|
||||
parse_codecs,
|
||||
try_get,
|
||||
url_or_none,
|
||||
@ -13,9 +16,6 @@
|
||||
class DigitalConcertHallIE(InfoExtractor):
|
||||
IE_DESC = 'DigitalConcertHall extractor'
|
||||
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?'
|
||||
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
|
||||
_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15'
|
||||
_ACCESS_TOKEN = None
|
||||
_NETRC_MACHINE = 'digitalconcerthall'
|
||||
_TESTS = [{
|
||||
'note': 'Playlist with only one video',
|
||||
@ -69,59 +69,157 @@ class DigitalConcertHallIE(InfoExtractor):
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'playlist_count': 1,
|
||||
}]
|
||||
_LOGIN_HINT = ('Use --username token --password ACCESS_TOKEN where ACCESS_TOKEN '
|
||||
'is the "access_token_production" from your browser local storage')
|
||||
_REFRESH_HINT = 'or else use a "refresh_token" with --username refresh --password REFRESH_TOKEN'
|
||||
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
|
||||
_CLIENT_ID = 'dch.webapp'
|
||||
_CLIENT_SECRET = '2ySLN+2Fwb'
|
||||
_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15'
|
||||
_OAUTH_HEADERS = {
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
||||
'Origin': 'https://www.digitalconcerthall.com',
|
||||
'Referer': 'https://www.digitalconcerthall.com/',
|
||||
'User-Agent': _USER_AGENT,
|
||||
}
|
||||
_access_token = None
|
||||
_access_token_expiry = 0
|
||||
_refresh_token = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_token = self._download_json(
|
||||
self._OAUTH_URL,
|
||||
None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({
|
||||
@property
|
||||
def _access_token_is_expired(self):
|
||||
return self._access_token_expiry - 30 <= int(time.time())
|
||||
|
||||
def _set_access_token(self, value):
|
||||
self._access_token = value
|
||||
self._access_token_expiry = traverse_obj(value, ({jwt_decode_hs256}, 'exp', {int})) or 0
|
||||
|
||||
def _cache_tokens(self, /):
|
||||
self.cache.store(self._NETRC_MACHINE, 'tokens', {
|
||||
'access_token': self._access_token,
|
||||
'refresh_token': self._refresh_token,
|
||||
})
|
||||
|
||||
def _fetch_new_tokens(self, invalidate=False):
|
||||
if invalidate:
|
||||
self.report_warning('Access token has been invalidated')
|
||||
self._set_access_token(None)
|
||||
|
||||
if not self._access_token_is_expired:
|
||||
return
|
||||
|
||||
if not self._refresh_token:
|
||||
self._set_access_token(None)
|
||||
self._cache_tokens()
|
||||
raise ExtractorError(
|
||||
'Access token has expired or been invalidated. '
|
||||
'Get a new "access_token_production" value from your browser '
|
||||
f'and try again, {self._REFRESH_HINT}', expected=True)
|
||||
|
||||
# If we only have a refresh token, we need a temporary "initial token" for the refresh flow
|
||||
bearer_token = self._access_token or self._download_json(
|
||||
self._OAUTH_URL, None, 'Obtaining initial token', 'Unable to obtain initial token',
|
||||
data=urlencode_postdata({
|
||||
'affiliate': 'none',
|
||||
'grant_type': 'device',
|
||||
'device_vendor': 'unknown',
|
||||
# device_model 'Safari' gets split streams of 4K/HEVC video and lossless/FLAC audio
|
||||
'device_model': 'unknown' if self._configuration_arg('prefer_combined_hls') else 'Safari',
|
||||
'app_id': 'dch.webapp',
|
||||
# device_model 'Safari' gets split streams of 4K/HEVC video and lossless/FLAC audio,
|
||||
# but this is no longer effective since actual login is not possible anymore
|
||||
'device_model': 'unknown',
|
||||
'app_id': self._CLIENT_ID,
|
||||
'app_distributor': 'berlinphil',
|
||||
'app_version': '1.84.0',
|
||||
'client_secret': '2ySLN+2Fwb',
|
||||
}), headers={
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
||||
'User-Agent': self._USER_AGENT,
|
||||
})['access_token']
|
||||
'app_version': '1.95.0',
|
||||
'client_secret': self._CLIENT_SECRET,
|
||||
}), headers=self._OAUTH_HEADERS)['access_token']
|
||||
|
||||
try:
|
||||
login_response = self._download_json(
|
||||
self._OAUTH_URL,
|
||||
None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({
|
||||
'grant_type': 'password',
|
||||
'username': username,
|
||||
'password': password,
|
||||
response = self._download_json(
|
||||
self._OAUTH_URL, None, 'Refreshing token', 'Unable to refresh token',
|
||||
data=urlencode_postdata({
|
||||
'grant_type': 'refresh_token',
|
||||
'refresh_token': self._refresh_token,
|
||||
'client_id': self._CLIENT_ID,
|
||||
'client_secret': self._CLIENT_SECRET,
|
||||
}), headers={
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
||||
'Referer': 'https://www.digitalconcerthall.com',
|
||||
'Authorization': f'Bearer {login_token}',
|
||||
'User-Agent': self._USER_AGENT,
|
||||
**self._OAUTH_HEADERS,
|
||||
'Authorization': f'Bearer {bearer_token}',
|
||||
})
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 401:
|
||||
raise ExtractorError('Invalid username or password', expected=True)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
self._set_access_token(None)
|
||||
self._refresh_token = None
|
||||
self._cache_tokens()
|
||||
raise ExtractorError('Your tokens have been invalidated', expected=True)
|
||||
raise
|
||||
self._ACCESS_TOKEN = login_response['access_token']
|
||||
|
||||
self._set_access_token(response['access_token'])
|
||||
if refresh_token := traverse_obj(response, ('refresh_token', {str})):
|
||||
self.write_debug('New refresh token granted')
|
||||
self._refresh_token = refresh_token
|
||||
self._cache_tokens()
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
self.report_login()
|
||||
|
||||
if username == 'refresh':
|
||||
self._refresh_token = password
|
||||
self._fetch_new_tokens()
|
||||
|
||||
if username == 'token':
|
||||
if not traverse_obj(password, {jwt_decode_hs256}):
|
||||
raise ExtractorError(
|
||||
f'The access token passed to yt-dlp is not valid. {self._LOGIN_HINT}', expected=True)
|
||||
self._set_access_token(password)
|
||||
self._cache_tokens()
|
||||
|
||||
if username in ('refresh', 'token'):
|
||||
if self.get_param('cachedir') is not False:
|
||||
token_type = 'access' if username == 'token' else 'refresh'
|
||||
self.to_screen(f'Your {token_type} token has been cached to disk. To use the cached '
|
||||
'token next time, pass --username cache along with any password')
|
||||
return
|
||||
|
||||
if username != 'cache':
|
||||
raise ExtractorError(
|
||||
'Login with username and password is no longer supported '
|
||||
f'for this site. {self._LOGIN_HINT}, {self._REFRESH_HINT}', expected=True)
|
||||
|
||||
# Try cached access_token
|
||||
cached_tokens = self.cache.load(self._NETRC_MACHINE, 'tokens', default={})
|
||||
self._set_access_token(cached_tokens.get('access_token'))
|
||||
self._refresh_token = cached_tokens.get('refresh_token')
|
||||
if not self._access_token_is_expired:
|
||||
return
|
||||
|
||||
# Try cached refresh_token
|
||||
self._fetch_new_tokens(invalidate=True)
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._ACCESS_TOKEN:
|
||||
self.raise_login_required(method='password')
|
||||
if not self._access_token:
|
||||
self.raise_login_required(
|
||||
'All content on this site is only available for registered users. '
|
||||
f'{self._LOGIN_HINT}, {self._REFRESH_HINT}', method=None)
|
||||
|
||||
def _entries(self, items, language, type_, **kwargs):
|
||||
for item in items:
|
||||
video_id = item['id']
|
||||
stream_info = self._download_json(
|
||||
self._proto_relative_url(item['_links']['streams']['href']), video_id, headers={
|
||||
'Accept': 'application/json',
|
||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
||||
'Accept-Language': language,
|
||||
'User-Agent': self._USER_AGENT,
|
||||
})
|
||||
|
||||
for should_retry in (True, False):
|
||||
self._fetch_new_tokens(invalidate=not should_retry)
|
||||
try:
|
||||
stream_info = self._download_json(
|
||||
self._proto_relative_url(item['_links']['streams']['href']), video_id, headers={
|
||||
'Accept': 'application/json',
|
||||
'Authorization': f'Bearer {self._access_token}',
|
||||
'Accept-Language': language,
|
||||
'User-Agent': self._USER_AGENT,
|
||||
})
|
||||
break
|
||||
except ExtractorError as error:
|
||||
if should_retry and isinstance(error.cause, HTTPError) and error.cause.status == 401:
|
||||
continue
|
||||
raise
|
||||
|
||||
formats = []
|
||||
for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})):
|
||||
@ -157,7 +255,6 @@ def _real_extract(self, url):
|
||||
'Accept': 'application/json',
|
||||
'Accept-Language': language,
|
||||
'User-Agent': self._USER_AGENT,
|
||||
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
|
||||
})
|
||||
videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))
|
||||
|
||||
|
@ -48,32 +48,30 @@ def _real_extract(self, url):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
fn = urllib.parse.unquote(url_basename(url))
|
||||
title = os.path.splitext(fn)[0]
|
||||
password = self.get_param('videopassword')
|
||||
content_id = None
|
||||
|
||||
for part in self._yield_decoded_parts(webpage):
|
||||
if '/sm/password' in part:
|
||||
webpage = self._download_webpage(
|
||||
update_url('https://www.dropbox.com/sm/password', query=part.partition('?')[2]), video_id)
|
||||
content_id = self._search_regex(r'content_id=([\w.+=/-]+)', part, 'content ID')
|
||||
break
|
||||
|
||||
if (self._og_search_title(webpage, default=None) == 'Dropbox - Password Required'
|
||||
or 'Enter the password for this link' in webpage):
|
||||
if password:
|
||||
response = self._download_json(
|
||||
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
|
||||
headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'},
|
||||
data=urlencode_postdata({
|
||||
'is_xhr': 'true',
|
||||
't': self._get_cookies('https://www.dropbox.com')['t'].value,
|
||||
'content_id': self._search_regex(r'content_id=([\w.+=/-]+)["\']', webpage, 'content id'),
|
||||
'password': password,
|
||||
'url': url,
|
||||
}))
|
||||
|
||||
if response.get('status') != 'authed':
|
||||
raise ExtractorError('Invalid password', expected=True)
|
||||
elif not self._get_cookies('https://dropbox.com').get('sm_auth'):
|
||||
if content_id:
|
||||
password = self.get_param('videopassword')
|
||||
if not password:
|
||||
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
|
||||
|
||||
response = self._download_json(
|
||||
'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
|
||||
data=urlencode_postdata({
|
||||
'is_xhr': 'true',
|
||||
't': self._get_cookies('https://www.dropbox.com')['t'].value,
|
||||
'content_id': content_id,
|
||||
'password': password,
|
||||
'url': update_url(url, scheme='', netloc=''),
|
||||
}))
|
||||
if response.get('status') != 'authed':
|
||||
raise ExtractorError('Invalid password', expected=True)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
|
@ -135,7 +135,7 @@ def _real_extract(self, url):
|
||||
self.raise_login_required(method='any')
|
||||
raise ExtractorError(login_err, expected=True)
|
||||
|
||||
embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url')
|
||||
embed_url = self._html_search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url')
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
watch_info = get_element_by_id('watch-info', webpage) or ''
|
||||
|
||||
|
51
yt_dlp/extractor/drtalks.py
Normal file
51
yt_dlp/extractor/drtalks.py
Normal file
@ -0,0 +1,51 @@
|
||||
from .brightcove import BrightcoveNewIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class DrTalksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?drtalks\.com/videos/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://drtalks.com/videos/six-pillars-of-resilience-tools-for-managing-stress-and-flourishing/',
|
||||
'info_dict': {
|
||||
'id': '6366193757112',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': '6314452011001',
|
||||
'tags': ['resilience'],
|
||||
'description': 'md5:9c6805aee237ee6de8052461855b9dda',
|
||||
'timestamp': 1734546659,
|
||||
'thumbnail': 'https://drtalks.com/wp-content/uploads/2024/12/Episode-82-Eva-Selhub-DrTalks-Thumbs.jpg',
|
||||
'title': 'Six Pillars of Resilience: Tools for Managing Stress and Flourishing',
|
||||
'duration': 2800.682,
|
||||
'upload_date': '20241218',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://drtalks.com/videos/the-pcos-puzzle-mastering-metabolic-health-with-marcelle-pick/',
|
||||
'info_dict': {
|
||||
'id': '6364699891112',
|
||||
'ext': 'mp4',
|
||||
'title': 'The PCOS Puzzle: Mastering Metabolic Health with Marcelle Pick',
|
||||
'description': 'md5:e87cbe00ca50135d5702787fc4043aaa',
|
||||
'thumbnail': 'https://drtalks.com/wp-content/uploads/2024/11/Episode-34-Marcelle-Pick-OBGYN-NP-DrTalks.jpg',
|
||||
'duration': 3515.2,
|
||||
'tags': ['pcos'],
|
||||
'upload_date': '20241114',
|
||||
'timestamp': 1731592119,
|
||||
'uploader_id': '6314452011001',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
next_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data']['video']
|
||||
|
||||
return self.url_result(
|
||||
next_data['videos']['brightcoveVideoLink'], BrightcoveNewIE, video_id,
|
||||
url_transparent=True,
|
||||
**traverse_obj(next_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('videos', 'summury', {str}),
|
||||
'thumbnail': ('featuredImage', 'node', 'sourceUrl', {url_or_none}),
|
||||
}))
|
@ -5,15 +5,16 @@
|
||||
get_element_text_and_html_by_tag,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
try_call,
|
||||
unified_timestamp,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
from ..utils.traversal import traverse_obj, value
|
||||
|
||||
|
||||
class DuoplayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://duoplay\.ee/(?P<id>\d+)/[\w-]+/?(?:\?(?:[^#]+&)?ep=(?P<ep>\d+))?'
|
||||
_VALID_URL = r'https?://duoplay\.ee/(?P<id>\d+)(?:[/?#]|$)'
|
||||
_TESTS = [{
|
||||
'note': 'Siberi võmm S02E12',
|
||||
'url': 'https://duoplay.ee/4312/siberi-vomm?ep=24',
|
||||
@ -34,15 +35,16 @@ class DuoplayIE(InfoExtractor):
|
||||
'episode_number': 12,
|
||||
'episode_id': '24',
|
||||
},
|
||||
'skip': 'No video found',
|
||||
}, {
|
||||
'note': 'Empty title',
|
||||
'url': 'https://duoplay.ee/17/uhikarotid?ep=14',
|
||||
'md5': '6aca68be71112314738dd17cced7f8bf',
|
||||
'md5': 'cba9f5dabf2582b224d80ac44fb80e47',
|
||||
'info_dict': {
|
||||
'id': '17_14',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ühikarotid',
|
||||
'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
|
||||
'title': 'Episode 14',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'description': 'md5:4719b418e058c209def41d48b601276e',
|
||||
'upload_date': '20100916',
|
||||
'timestamp': 1284661800,
|
||||
@ -52,6 +54,8 @@ class DuoplayIE(InfoExtractor):
|
||||
'season_number': 2,
|
||||
'episode_id': '14',
|
||||
'release_year': 2010,
|
||||
'episode': 'Episode 14',
|
||||
'episode_number': 14,
|
||||
},
|
||||
}, {
|
||||
'note': 'Movie without expiry',
|
||||
@ -68,10 +72,32 @@ class DuoplayIE(InfoExtractor):
|
||||
'timestamp': 1671054000,
|
||||
'release_year': 2018,
|
||||
},
|
||||
'skip': 'No video found',
|
||||
}, {
|
||||
'note': 'Episode url without show name',
|
||||
'url': 'https://duoplay.ee/9644?ep=185',
|
||||
'md5': '63f324b4fe2dbd8194dca16a6d52184a',
|
||||
'info_dict': {
|
||||
'id': '9644_185',
|
||||
'ext': 'mp4',
|
||||
'title': 'Episode 185',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'description': 'md5:ed25ba4e9e5d54bc291a4a0cdd241467',
|
||||
'upload_date': '20241120',
|
||||
'timestamp': 1732077000,
|
||||
'episode': 'Episode 63',
|
||||
'episode_id': '185',
|
||||
'episode_number': 63,
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'series': 'Telehommik',
|
||||
'series_id': '9644',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
telecast_id, episode = self._match_valid_url(url).group('id', 'ep')
|
||||
telecast_id = self._match_id(url)
|
||||
episode = traverse_obj(parse_qs(url), ('ep', 0, {int_or_none}, {str_or_none}))
|
||||
video_id = join_nonempty(telecast_id, episode, delim='_')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_player = try_call(lambda: extract_attributes(
|
||||
@ -79,25 +105,33 @@ def _real_extract(self, url):
|
||||
if not video_player or not video_player.get('manifest-url'):
|
||||
raise ExtractorError('No video found', expected=True)
|
||||
|
||||
manifest_url = video_player['manifest-url']
|
||||
session_token = self._download_json(
|
||||
'https://sts.postimees.ee/session/register', video_id, 'Registering session',
|
||||
'Unable to register session', headers={
|
||||
'Accept': 'application/json',
|
||||
'X-Original-URI': manifest_url,
|
||||
})['session']
|
||||
|
||||
episode_attr = self._parse_json(video_player.get(':episode') or '', video_id, fatal=False) or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(video_player['manifest-url'], video_id, 'mp4'),
|
||||
'formats': self._extract_m3u8_formats(manifest_url, video_id, 'mp4', query={'s': session_token}),
|
||||
**traverse_obj(episode_attr, {
|
||||
'title': 'title',
|
||||
'description': 'synopsis',
|
||||
'title': ('title', {str}),
|
||||
'description': ('synopsis', {str}),
|
||||
'thumbnail': ('images', 'original'),
|
||||
'timestamp': ('airtime', {lambda x: unified_timestamp(x + ' +0200')}),
|
||||
'cast': ('cast', {lambda x: x.split(', ')}),
|
||||
'cast': ('cast', filter, {lambda x: x.split(', ')}),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
}),
|
||||
**(traverse_obj(episode_attr, {
|
||||
'title': (None, ('subtitle', ('episode_nr', {lambda x: f'Episode {x}' if x else None}))),
|
||||
'series': 'title',
|
||||
'title': (None, (('subtitle', {str}, filter), {value(f'Episode {episode}' if episode else None)})),
|
||||
'series': ('title', {str}),
|
||||
'series_id': ('telecast_id', {str_or_none}),
|
||||
'season_number': ('season_id', {int_or_none}),
|
||||
'episode': 'subtitle',
|
||||
'episode': ('subtitle', {str}, filter),
|
||||
'episode_number': ('episode_nr', {int_or_none}),
|
||||
'episode_id': ('episode_id', {str_or_none}),
|
||||
}, get_all=False) if episode_attr.get('category') != 'movies' else {}),
|
||||
|
@ -162,7 +162,7 @@ def _real_extract(self, url):
|
||||
items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage)
|
||||
if items:
|
||||
return self.playlist_result(
|
||||
[self._parse_video_metadata(i, video_id, timestamp) for i in items],
|
||||
(self._parse_video_metadata(i, video_id, timestamp) for i in items),
|
||||
video_id, self._html_search_meta('twitter:title', webpage))
|
||||
|
||||
item = self._search_regex(
|
||||
|
155
yt_dlp/extractor/eggs.py
Normal file
155
yt_dlp/extractor/eggs.py
Normal file
@ -0,0 +1,155 @@
|
||||
import secrets
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class EggsBaseIE(InfoExtractor):
|
||||
_API_HEADERS = {
|
||||
'Accept': '*/*',
|
||||
'apVersion': '8.2.00',
|
||||
'deviceName': 'Android',
|
||||
}
|
||||
|
||||
def _real_initialize(self):
|
||||
self._API_HEADERS['deviceId'] = secrets.token_hex(8)
|
||||
|
||||
def _call_api(self, endpoint, video_id):
|
||||
return self._download_json(
|
||||
f'https://app-front-api.eggs.mu/v1/{endpoint}', video_id,
|
||||
headers=self._API_HEADERS)
|
||||
|
||||
def _extract_music_info(self, data):
|
||||
if yt_url := traverse_obj(data, ('youtubeUrl', {url_or_none})):
|
||||
return self.url_result(yt_url, ie=YoutubeIE)
|
||||
|
||||
artist_name = traverse_obj(data, ('artist', 'artistName', {str_or_none}))
|
||||
music_id = traverse_obj(data, ('musicId', {str_or_none}))
|
||||
webpage_url = None
|
||||
if artist_name and music_id:
|
||||
webpage_url = f'https://eggs.mu/artist/{artist_name}/song/{music_id}'
|
||||
|
||||
return {
|
||||
'id': music_id,
|
||||
'vcodec': 'none',
|
||||
'webpage_url': webpage_url,
|
||||
'extractor_key': EggsIE.ie_key(),
|
||||
'extractor': EggsIE.IE_NAME,
|
||||
**traverse_obj(data, {
|
||||
'title': ('musicTitle', {str}),
|
||||
'url': ('musicDataPath', {url_or_none}),
|
||||
'uploader': ('artist', 'displayName', {str}),
|
||||
'uploader_id': ('artist', 'artistId', {str_or_none}),
|
||||
'thumbnail': ('imageDataPath', {url_or_none}),
|
||||
'view_count': ('numberOfMusicPlays', {int_or_none}),
|
||||
'like_count': ('numberOfLikes', {int_or_none}),
|
||||
'comment_count': ('numberOfComments', {int_or_none}),
|
||||
'composers': ('composer', {str}, all),
|
||||
'tags': ('tags', ..., {str}),
|
||||
'timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'artist': ('artist', 'displayName', {str}),
|
||||
})}
|
||||
|
||||
|
||||
class EggsIE(EggsBaseIE):
|
||||
IE_NAME = 'eggs:single'
|
||||
_VALID_URL = r'https?://eggs\.mu/artist/[^/?#]+/song/(?P<id>[\da-f-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://eggs.mu/artist/32_sunny_girl/song/0e95fd1d-4d61-4d5b-8b18-6092c551da90',
|
||||
'info_dict': {
|
||||
'id': '0e95fd1d-4d61-4d5b-8b18-6092c551da90',
|
||||
'ext': 'm4a',
|
||||
'title': 'シネマと信号',
|
||||
'uploader': 'Sunny Girl',
|
||||
'thumbnail': r're:https?://.*\.jpg(?:\?.*)?$',
|
||||
'uploader_id': '1607',
|
||||
'like_count': int,
|
||||
'timestamp': 1731327327,
|
||||
'composers': ['橘高連太郎'],
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'artists': ['Sunny Girl'],
|
||||
'upload_date': '20241111',
|
||||
'tags': ['SunnyGirl', 'シネマと信号'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://eggs.mu/artist/KAMO_3pband/song/1d4bc45f-1af6-47a9-8b30-a70cae350b4f',
|
||||
'info_dict': {
|
||||
'id': '80cLKA2wnoA',
|
||||
'ext': 'mp4',
|
||||
'title': 'KAMO「いい女だから」Audio',
|
||||
'uploader': 'KAMO',
|
||||
'live_status': 'not_live',
|
||||
'channel_id': 'UCsHLBw2__5Q9y55skXPotOg',
|
||||
'channel_follower_count': int,
|
||||
'description': 'md5:d260da711ecbec3e720293dc11401b87',
|
||||
'availability': 'public',
|
||||
'uploader_id': '@KAMO_band',
|
||||
'upload_date': '20240925',
|
||||
'thumbnail': 'https://i.ytimg.com/vi/80cLKA2wnoA/maxresdefault.jpg',
|
||||
'comment_count': int,
|
||||
'channel_url': 'https://www.youtube.com/channel/UCsHLBw2__5Q9y55skXPotOg',
|
||||
'view_count': int,
|
||||
'duration': 151,
|
||||
'like_count': int,
|
||||
'channel': 'KAMO',
|
||||
'playable_in_embed': True,
|
||||
'uploader_url': 'https://www.youtube.com/@KAMO_band',
|
||||
'tags': [],
|
||||
'timestamp': 1727271121,
|
||||
'age_limit': 0,
|
||||
'categories': ['People & Blogs'],
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
'params': {'skip_download': 'Youtube'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
song_id = self._match_id(url)
|
||||
json_data = self._call_api(f'musics/{song_id}', song_id)
|
||||
return self._extract_music_info(json_data)
|
||||
|
||||
|
||||
class EggsArtistIE(EggsBaseIE):
|
||||
IE_NAME = 'eggs:artist'
|
||||
_VALID_URL = r'https?://eggs\.mu/artist/(?P<id>\w+)/?(?:[?#&]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://eggs.mu/artist/32_sunny_girl',
|
||||
'info_dict': {
|
||||
'id': '32_sunny_girl',
|
||||
'thumbnail': 'https://image-pro.eggs.mu/profile/1607.jpeg?updated_at=2024-04-03T20%3A06%3A00%2B09%3A00',
|
||||
'description': 'Muddy Mine / 東京高田馬場CLUB PHASE / Gt.Vo 橘高 連太郎 / Ba.Cho 小野 ゆうき / Dr 大森 りゅうひこ',
|
||||
'title': 'Sunny Girl',
|
||||
},
|
||||
'playlist_mincount': 18,
|
||||
}, {
|
||||
'url': 'https://eggs.mu/artist/KAMO_3pband',
|
||||
'info_dict': {
|
||||
'id': 'KAMO_3pband',
|
||||
'description': '川崎発3ピースバンド',
|
||||
'thumbnail': 'https://image-pro.eggs.mu/profile/35217.jpeg?updated_at=2024-11-27T16%3A31%3A50%2B09%3A00',
|
||||
'title': 'KAMO',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
artist_id = self._match_id(url)
|
||||
artist_data = self._call_api(f'artists/{artist_id}', artist_id)
|
||||
song_data = self._call_api(f'artists/{artist_id}/musics', artist_id)
|
||||
return self.playlist_result(
|
||||
traverse_obj(song_data, ('data', ..., {dict}, {self._extract_music_info})),
|
||||
playlist_id=artist_id, **traverse_obj(artist_data, {
|
||||
'title': ('displayName', {str}),
|
||||
'description': ('profile', {str}),
|
||||
'thumbnail': ('imageDataPath', {url_or_none}),
|
||||
}))
|
@ -50,7 +50,7 @@ class FacebookIE(InfoExtractor):
|
||||
[^/]+/videos/(?:[^/]+/)?|
|
||||
[^/]+/posts/|
|
||||
events/(?:[^/]+/)?|
|
||||
groups/[^/]+/(?:permalink|posts)/|
|
||||
groups/[^/]+/(?:permalink|posts)/(?:[\da-f]+/)?|
|
||||
watchparty/
|
||||
)|
|
||||
facebook:
|
||||
@ -410,6 +410,9 @@ class FacebookIE(InfoExtractor):
|
||||
'uploader': 'Comitato Liberi Pensatori',
|
||||
'uploader_id': '100065709540881',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.facebook.com/groups/1513990329015294/posts/d41d8cd9/2013209885760000/?app=fbl',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
||||
_api_config = {
|
||||
@ -563,13 +566,13 @@ def extract_from_jsmods_instances(js_data):
|
||||
return extract_video_data(try_get(
|
||||
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||
|
||||
def extract_dash_manifest(video, formats):
|
||||
def extract_dash_manifest(vid_data, formats, mpd_url=None):
|
||||
dash_manifest = traverse_obj(
|
||||
video, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', expected_type=str)
|
||||
vid_data, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', 'manifest_xml', expected_type=str)
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
||||
mpd_url=url_or_none(video.get('dash_manifest_url'))))
|
||||
mpd_url=url_or_none(vid_data.get('dash_manifest_url')) or mpd_url))
|
||||
|
||||
def process_formats(info):
|
||||
# Downloads with browser's User-Agent are rate limited. Working around
|
||||
@ -619,9 +622,12 @@ def parse_graphql_video(video):
|
||||
video = video['creation_story']
|
||||
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
|
||||
video.update(reel_info)
|
||||
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
|
||||
|
||||
formats = []
|
||||
q = qualities(['sd', 'hd'])
|
||||
|
||||
# Legacy formats extraction
|
||||
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
|
||||
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
|
||||
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
|
||||
('browser_native_sd_url', 'sd')):
|
||||
@ -629,7 +635,7 @@ def parse_graphql_video(video):
|
||||
if not playable_url:
|
||||
continue
|
||||
if determine_ext(playable_url) == 'mpd':
|
||||
formats.extend(self._extract_mpd_formats(playable_url, video_id))
|
||||
formats.extend(self._extract_mpd_formats(playable_url, video_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
@ -638,6 +644,28 @@ def parse_graphql_video(video):
|
||||
'url': playable_url,
|
||||
})
|
||||
extract_dash_manifest(fmt_data, formats)
|
||||
|
||||
# New videoDeliveryResponse formats extraction
|
||||
fmt_data = traverse_obj(video, ('videoDeliveryResponseFragment', 'videoDeliveryResponseResult'))
|
||||
mpd_urls = traverse_obj(fmt_data, ('dash_manifest_urls', ..., 'manifest_url', {url_or_none}))
|
||||
dash_manifests = traverse_obj(fmt_data, ('dash_manifests', lambda _, v: v['manifest_xml']))
|
||||
for idx, dash_manifest in enumerate(dash_manifests):
|
||||
extract_dash_manifest(dash_manifest, formats, mpd_url=traverse_obj(mpd_urls, idx))
|
||||
if not dash_manifests:
|
||||
# Only extract from MPD URLs if the manifests are not already provided
|
||||
for mpd_url in mpd_urls:
|
||||
formats.extend(self._extract_mpd_formats(mpd_url, video_id, fatal=False))
|
||||
for prog_fmt in traverse_obj(fmt_data, ('progressive_urls', lambda _, v: v['progressive_url'])):
|
||||
format_id = traverse_obj(prog_fmt, ('metadata', 'quality', {str.lower}))
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
# sd, hd formats w/o resolution info should be deprioritized below DASH
|
||||
'quality': q(format_id) - 3,
|
||||
'url': prog_fmt['progressive_url'],
|
||||
})
|
||||
for m3u8_url in traverse_obj(fmt_data, ('hls_playlist_urls', ..., 'hls_playlist_url', {url_or_none})):
|
||||
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False, m3u8_id='hls'))
|
||||
|
||||
if not formats:
|
||||
# Do not append false positive entry w/o any formats
|
||||
return
|
||||
|
@ -193,9 +193,9 @@ def _real_extract(self, url):
|
||||
|
||||
for lang, version, fmt in self._get_experiences(episode):
|
||||
experience_id = str(fmt['experienceId'])
|
||||
if (only_initial_experience and experience_id != initial_experience_id
|
||||
or requested_languages and lang.lower() not in requested_languages
|
||||
or requested_versions and version.lower() not in requested_versions):
|
||||
if ((only_initial_experience and experience_id != initial_experience_id)
|
||||
or (requested_languages and lang.lower() not in requested_languages)
|
||||
or (requested_versions and version.lower() not in requested_versions)):
|
||||
continue
|
||||
thumbnails.append({'url': fmt.get('poster')})
|
||||
duration = max(duration, fmt.get('duration', 0))
|
||||
|
@ -5,56 +5,63 @@
|
||||
import hmac
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
remove_end,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
)
|
||||
|
||||
|
||||
class GoPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/]+/[^/]+/|)(?P<display_id>[^/#]+)'
|
||||
_VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/?#]+/[^/?#]+/|)(?P<id>[^/#]+)'
|
||||
|
||||
_NETRC_MACHINE = 'goplay'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.goplay.be/video/de-container-cup/de-container-cup-s3/de-container-cup-s3-aflevering-2#autoplay',
|
||||
'url': 'https://www.goplay.be/video/de-slimste-mens-ter-wereld/de-slimste-mens-ter-wereld-s22/de-slimste-mens-ter-wereld-s22-aflevering-1',
|
||||
'info_dict': {
|
||||
'id': '9c4214b8-e55d-4e4b-a446-f015f6c6f811',
|
||||
'id': '2baa4560-87a0-421b-bffc-359914e3c387',
|
||||
'ext': 'mp4',
|
||||
'title': 'S3 - Aflevering 2',
|
||||
'series': 'De Container Cup',
|
||||
'season': 'Season 3',
|
||||
'season_number': 3,
|
||||
'episode': 'Episode 2',
|
||||
'episode_number': 2,
|
||||
'title': 'S22 - Aflevering 1',
|
||||
'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}',
|
||||
'series': 'De Slimste Mens ter Wereld',
|
||||
'episode': 'Episode 1',
|
||||
'season_number': 22,
|
||||
'episode_number': 1,
|
||||
'season': 'Season 22',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'This video is only available for registered users',
|
||||
}, {
|
||||
'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay',
|
||||
'url': 'https://www.goplay.be/video/1917',
|
||||
'info_dict': {
|
||||
'id': '74e3ed07-748c-49e4-85a0-393a93337dbf',
|
||||
'id': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Family for the Holidays',
|
||||
'title': '1917',
|
||||
'description': r're:Op het hoogtepunt van de Eerste Wereldoorlog krijgen twee jonge .{94}',
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'This video is only available for registered users',
|
||||
}, {
|
||||
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
|
||||
'info_dict': {
|
||||
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656',
|
||||
'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee',
|
||||
'ext': 'mp4',
|
||||
'title': 'S11 - Aflevering 1',
|
||||
'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}',
|
||||
'episode': 'Episode 1',
|
||||
'series': 'De Mol',
|
||||
'season_number': 11,
|
||||
'episode_number': 1,
|
||||
'season': 'Season 11',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'params': {'skip_download': True},
|
||||
'skip': 'This video is only available for registered users',
|
||||
}]
|
||||
|
||||
@ -69,27 +76,42 @@ def _real_initialize(self):
|
||||
if not self._id_token:
|
||||
raise self.raise_login_required(method='password')
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, display_id = self._match_valid_url(url).group(0, 'display_id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_data_json = self._html_search_regex(r'<div\s+data-hero="([^"]+)"', webpage, 'video_data')
|
||||
video_data = self._parse_json(unescapeHTML(video_data_json), display_id).get('data')
|
||||
def _find_json(self, s):
|
||||
return self._search_json(
|
||||
r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
|
||||
|
||||
movie = video_data.get('movie')
|
||||
if movie:
|
||||
video_id = movie['videoUuid']
|
||||
info_dict = {
|
||||
'title': movie.get('title'),
|
||||
}
|
||||
else:
|
||||
episode = traverse_obj(video_data, ('playlists', ..., 'episodes', lambda _, v: v['pageInfo']['url'] == url), get_all=False)
|
||||
video_id = episode['videoUuid']
|
||||
info_dict = {
|
||||
'title': episode.get('episodeTitle'),
|
||||
'series': traverse_obj(episode, ('program', 'title')),
|
||||
'season_number': episode.get('seasonNumber'),
|
||||
'episode_number': episode.get('episodeNumber'),
|
||||
}
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nextjs_data = traverse_obj(
|
||||
re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
|
||||
(..., {js_to_json}, {json.loads}, ..., {self._find_json}, ...))
|
||||
meta = traverse_obj(nextjs_data, (
|
||||
..., lambda _, v: v['meta']['path'] == urllib.parse.urlparse(url).path, 'meta', any))
|
||||
|
||||
video_id = meta['uuid']
|
||||
info_dict = traverse_obj(meta, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str.strip}),
|
||||
})
|
||||
|
||||
if traverse_obj(meta, ('program', 'subtype')) != 'movie':
|
||||
for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)):
|
||||
episode_data = traverse_obj(
|
||||
season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
|
||||
if not episode_data:
|
||||
continue
|
||||
|
||||
episode_title = traverse_obj(
|
||||
episode_data, 'contextualTitle', 'episodeTitle', expected_type=str)
|
||||
info_dict.update({
|
||||
'title': episode_title or info_dict.get('title'),
|
||||
'series': remove_end(info_dict.get('title'), f' - {episode_title}'),
|
||||
'season_number': traverse_obj(season_data, ('season', {int_or_none})),
|
||||
'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})),
|
||||
})
|
||||
break
|
||||
|
||||
api = self._download_json(
|
||||
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',
|
||||
|
@ -254,7 +254,7 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class InstagramIE(InstagramBaseIE):
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reels?(?!/audio/))/(?P<id>[^/?#&]+))'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com(?:/(?!share/)[^/?#]+)?/(?:p|tv|reels?(?!/audio/))/(?P<id>[^/?#&]+))'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1']
|
||||
_TESTS = [{
|
||||
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||
|
160
yt_dlp/extractor/kenh14.py
Normal file
160
yt_dlp/extractor/kenh14.py
Normal file
@ -0,0 +1,160 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_attribute,
|
||||
get_elements_html_by_class,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
strip_or_none,
|
||||
unescapeHTML,
|
||||
update_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class Kenh14VideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P<id>[0-9]+)\.chn'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn',
|
||||
'md5': '1ed67f9c3a1e74acf15db69590cf6210',
|
||||
'info_dict': {
|
||||
'id': '316173',
|
||||
'ext': 'mp4',
|
||||
'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
|
||||
'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
|
||||
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
|
||||
'tags': [],
|
||||
'uploader': 'Unbox Therapy',
|
||||
'upload_date': '20220517',
|
||||
'view_count': int,
|
||||
'duration': 722.86,
|
||||
'timestamp': 1652764468,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.kenh14.vn/video-316174.chn',
|
||||
'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd',
|
||||
'info_dict': {
|
||||
'id': '316174',
|
||||
'ext': 'mp4',
|
||||
'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu',
|
||||
'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc',
|
||||
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
|
||||
'tags': [],
|
||||
'upload_date': '20220517',
|
||||
'view_count': int,
|
||||
'duration': 70.04,
|
||||
'timestamp': 1652766021,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.kenh14.vn/0-344740.chn',
|
||||
'md5': 'b843495d5e728142c8870c09b46df2a9',
|
||||
'info_dict': {
|
||||
'id': '344740',
|
||||
'ext': 'mov',
|
||||
'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi',
|
||||
'description': 'md5:2a2dbb4a7397169fb21ee68f09160497',
|
||||
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$',
|
||||
'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'],
|
||||
'uploader': 'Quang Vũ',
|
||||
'upload_date': '20241024',
|
||||
'view_count': int,
|
||||
'duration': 198.88,
|
||||
'timestamp': 1729741590,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '')
|
||||
direct_url = attrs['data-vid']
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format(
|
||||
remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False)
|
||||
|
||||
formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}]
|
||||
subtitles = {}
|
||||
video_data = self._download_json(
|
||||
f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False)
|
||||
if hls_url := traverse_obj(video_data, ('hls', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_url, video_id, m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})):
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
dash_url, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
return {
|
||||
**traverse_obj(metadata, {
|
||||
'duration': ('duration', {parse_duration}),
|
||||
'uploader': ('author', {strip_or_none}),
|
||||
'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}),
|
||||
'view_count': ('views', {int_or_none}),
|
||||
}),
|
||||
'id': video_id,
|
||||
'title': (
|
||||
traverse_obj(metadata, ('title', {strip_or_none}))
|
||||
or clean_html(self._og_search_title(webpage))
|
||||
or clean_html(get_element_by_class('vdbw-title', webpage))),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': (
|
||||
clean_html(self._og_search_description(webpage))
|
||||
or clean_html(get_element_by_class('vdbw-sapo', webpage))),
|
||||
'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')),
|
||||
'tags': traverse_obj(self._html_search_meta('keywords', webpage), (
|
||||
{lambda x: x.split(';')}, ..., filter)),
|
||||
}
|
||||
|
||||
|
||||
class Kenh14PlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P<id>[0-9]+)\.chn'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn',
|
||||
'info_dict': {
|
||||
'id': '71',
|
||||
'title': 'Trần Tình (Naked love) mùa 2',
|
||||
'description': 'md5:e9522339304956dea931722dd72eddb2',
|
||||
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
'url': 'https://video.kenh14.vn/playlist/0-72.chn',
|
||||
'info_dict': {
|
||||
'id': '72',
|
||||
'title': 'Lau Lại Đầu Từ',
|
||||
'description': 'Cùng xem xưa và nay có gì khác biệt nhé!',
|
||||
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
category_detail = get_element_by_class('category-detail', webpage) or ''
|
||||
embed_info = traverse_obj(
|
||||
self._yield_json_ld(webpage, playlist_id),
|
||||
(lambda _, v: v['name'] and v['alternateName'], any)) or {}
|
||||
|
||||
return self.playlist_from_matches(
|
||||
get_elements_html_by_class('video-item', webpage), playlist_id,
|
||||
(clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))),
|
||||
getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']),
|
||||
ie=Kenh14VideoIE, playlist_description=(
|
||||
clean_html(get_element_by_class('description', category_detail))
|
||||
or unescapeHTML(embed_info.get('alternateName'))),
|
||||
thumbnail=traverse_obj(
|
||||
self._og_search_thumbnail(webpage),
|
||||
({url_or_none}, {update_url(query=None)})))
|
@ -310,7 +310,13 @@ def _real_extract(self, url):
|
||||
if stream_type in self._SUPPORTED_STREAM_TYPES:
|
||||
claim_id, is_live = result['claim_id'], False
|
||||
streaming_url = self._call_api_proxy(
|
||||
'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
|
||||
'get', claim_id, {
|
||||
'uri': uri,
|
||||
**traverse_obj(parse_qs(url), {
|
||||
'signature': ('signature', 0),
|
||||
'signature_ts': ('signature_ts', 0),
|
||||
}),
|
||||
}, 'streaming url')['streaming_url']
|
||||
|
||||
# GET request to v3 API returns original video/audio file if available
|
||||
direct_url = re.sub(r'/api/v\d+/', '/api/v3/', streaming_url)
|
||||
|
@ -1,30 +1,32 @@
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
unsmuggle_url,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class LiTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)'
|
||||
|
||||
_URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:[^/?#]+/watch/|vod/[^/?#]+/content\.do\?content_id=)(?P<id>[\w-]+)'
|
||||
_URL_TEMPLATE = 'https://www.litv.tv/%s/watch/%s'
|
||||
_GEO_COUNTRIES = ['TW']
|
||||
_TESTS = [{
|
||||
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
|
||||
'url': 'https://www.litv.tv/drama/watch/VOD00041610',
|
||||
'info_dict': {
|
||||
'id': 'VOD00041606',
|
||||
'title': '花千骨',
|
||||
},
|
||||
'playlist_count': 51, # 50 episodes + 1 trailer
|
||||
}, {
|
||||
'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1',
|
||||
'url': 'https://www.litv.tv/drama/watch/VOD00041610',
|
||||
'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a',
|
||||
'info_dict': {
|
||||
'id': 'VOD00041610',
|
||||
@ -32,16 +34,15 @@ class LiTVIE(InfoExtractor):
|
||||
'title': '花千骨第1集',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'description': '《花千骨》陸劇線上看。十六年前,平靜的村莊內,一名女嬰隨異相出生,途徑此地的蜀山掌門清虛道長算出此女命運非同一般,她體內散發的異香易招惹妖魔。一念慈悲下,他在村莊周邊設下結界阻擋妖魔入侵,讓其年滿十六後去蜀山,並賜名花千骨。',
|
||||
'categories': ['奇幻', '愛情', '中國', '仙俠'],
|
||||
'categories': ['奇幻', '愛情', '仙俠', '古裝'],
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
},
|
||||
'skip': 'Georestricted to Taiwan',
|
||||
}, {
|
||||
'url': 'https://www.litv.tv/promo/miyuezhuan/?content_id=VOD00044841&',
|
||||
'url': 'https://www.litv.tv/drama/watch/VOD00044841',
|
||||
'md5': '88322ea132f848d6e3e18b32a832b918',
|
||||
'info_dict': {
|
||||
'id': 'VOD00044841',
|
||||
@ -55,94 +56,62 @@ class LiTVIE(InfoExtractor):
|
||||
def _extract_playlist(self, playlist_data, content_type):
|
||||
all_episodes = [
|
||||
self.url_result(smuggle_url(
|
||||
self._URL_TEMPLATE % (content_type, episode['contentId']),
|
||||
self._URL_TEMPLATE % (content_type, episode['content_id']),
|
||||
{'force_noplaylist': True})) # To prevent infinite recursion
|
||||
for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))]
|
||||
for episode in traverse_obj(playlist_data, ('seasons', ..., 'episodes', lambda _, v: v['content_id']))]
|
||||
|
||||
return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title'))
|
||||
return self.playlist_result(all_episodes, playlist_data['content_id'], playlist_data.get('title'))
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
vod_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']
|
||||
|
||||
if self._search_regex(
|
||||
r'(?i)<meta\s[^>]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"',
|
||||
webpage, 'meta refresh redirect', default=False, group=0):
|
||||
raise ExtractorError('No such content found', expected=True)
|
||||
program_info = traverse_obj(vod_data, ('programInformation', {dict})) or {}
|
||||
playlist_data = traverse_obj(vod_data, ('seriesTree'))
|
||||
if playlist_data and self._yes_playlist(program_info.get('series_id'), video_id, smuggled_data):
|
||||
return self._extract_playlist(playlist_data, program_info.get('content_type'))
|
||||
|
||||
program_info = self._parse_json(self._search_regex(
|
||||
r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'),
|
||||
video_id)
|
||||
asset_id = traverse_obj(program_info, ('assets', 0, 'asset_id', {str}))
|
||||
if asset_id: # This is a VOD
|
||||
media_type = 'vod'
|
||||
else: # This is a live stream
|
||||
asset_id = program_info['content_id']
|
||||
media_type = program_info['content_type']
|
||||
puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
|
||||
if puid:
|
||||
endpoint = 'get-urls'
|
||||
else:
|
||||
puid = str(uuid.uuid4())
|
||||
endpoint = 'get-urls-no-auth'
|
||||
video_data = self._download_json(
|
||||
f'https://www.litv.tv/api/{endpoint}', video_id,
|
||||
data=json.dumps({'AssetId': asset_id, 'MediaType': media_type, 'puid': puid}).encode(),
|
||||
headers={'Content-Type': 'application/json'})
|
||||
|
||||
# In browsers `getProgramInfo` request is always issued. Usually this
|
||||
# endpoint gives the same result as the data embedded in the webpage.
|
||||
# If, for some reason, there are no embedded data, we do an extra request.
|
||||
if 'assetId' not in program_info:
|
||||
program_info = self._download_json(
|
||||
'https://www.litv.tv/vod/ajax/getProgramInfo', video_id,
|
||||
query={'contentId': video_id},
|
||||
headers={'Accept': 'application/json'})
|
||||
|
||||
series_id = program_info['seriesId']
|
||||
if self._yes_playlist(series_id, video_id, smuggled_data):
|
||||
playlist_data = self._download_json(
|
||||
'https://www.litv.tv/vod/ajax/getSeriesTree', video_id,
|
||||
query={'seriesId': series_id}, headers={'Accept': 'application/json'})
|
||||
return self._extract_playlist(playlist_data, program_info['contentType'])
|
||||
|
||||
video_data = self._parse_json(self._search_regex(
|
||||
r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);',
|
||||
webpage, 'video data', default='{}'), video_id)
|
||||
if not video_data:
|
||||
payload = {'assetId': program_info['assetId']}
|
||||
puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value)
|
||||
if puid:
|
||||
payload.update({
|
||||
'type': 'auth',
|
||||
'puid': puid,
|
||||
})
|
||||
endpoint = 'getUrl'
|
||||
else:
|
||||
payload.update({
|
||||
'watchDevices': program_info['watchDevices'],
|
||||
'contentType': program_info['contentType'],
|
||||
})
|
||||
endpoint = 'getMainUrlNoAuth'
|
||||
video_data = self._download_json(
|
||||
f'https://www.litv.tv/vod/ajax/{endpoint}', video_id,
|
||||
data=json.dumps(payload).encode(),
|
||||
headers={'Content-Type': 'application/json'})
|
||||
|
||||
if not video_data.get('fullpath'):
|
||||
error_msg = video_data.get('errorMessage')
|
||||
if error_msg == 'vod.error.outsideregionerror':
|
||||
if error := traverse_obj(video_data, ('error', {dict})):
|
||||
error_msg = traverse_obj(error, ('message', {str}))
|
||||
if error_msg and 'OutsideRegionError' in error_msg:
|
||||
self.raise_geo_restricted('This video is available in Taiwan only')
|
||||
if error_msg:
|
||||
elif error_msg:
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {error_msg}', expected=True)
|
||||
raise ExtractorError(f'Unexpected result from {self.IE_NAME}')
|
||||
raise ExtractorError(f'Unexpected error from {self.IE_NAME}')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_data['fullpath'], video_id, ext='mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
video_data['result']['AssetURLs'][0], video_id, ext='mp4', m3u8_id='hls')
|
||||
for a_format in formats:
|
||||
# LiTV HLS segments doesn't like compressions
|
||||
a_format.setdefault('http_headers', {})['Accept-Encoding'] = 'identity'
|
||||
|
||||
title = program_info['title'] + program_info.get('secondaryMark', '')
|
||||
description = program_info.get('description')
|
||||
thumbnail = program_info.get('imageFile')
|
||||
categories = [item['name'] for item in program_info.get('category', [])]
|
||||
episode = int_or_none(program_info.get('episode'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'categories': categories,
|
||||
'episode_number': episode,
|
||||
'title': join_nonempty('title', 'secondary_mark', delim='', from_dict=program_info),
|
||||
**traverse_obj(program_info, {
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('picture', {urljoin('https://p-cdnstatic.svc.litv.tv/')}),
|
||||
'categories': ('genres', ..., 'name', {str}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
}),
|
||||
}
|
||||
|
@ -26,6 +26,7 @@ class MicrosoftEmbedIE(InfoExtractor):
|
||||
'timestamp': 1631658316,
|
||||
'upload_date': '20210914',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML: syntax error: line 1, column 0'],
|
||||
}]
|
||||
_API_URL = 'https://prod-video-cms-rt-microsoft-com.akamaized.net/vhs/api/videos/'
|
||||
|
||||
@ -36,11 +37,11 @@ def _real_extract(self, url):
|
||||
formats = []
|
||||
for source_type, source in metadata['streams'].items():
|
||||
if source_type == 'smooth_Streaming':
|
||||
formats.extend(self._extract_ism_formats(source['url'], video_id, 'mss'))
|
||||
formats.extend(self._extract_ism_formats(source['url'], video_id, 'mss', fatal=False))
|
||||
elif source_type == 'apple_HTTP_Live_Streaming':
|
||||
formats.extend(self._extract_m3u8_formats(source['url'], video_id, 'mp4'))
|
||||
formats.extend(self._extract_m3u8_formats(source['url'], video_id, 'mp4', fatal=False))
|
||||
elif source_type == 'mPEG_DASH':
|
||||
formats.extend(self._extract_mpd_formats(source['url'], video_id))
|
||||
formats.extend(self._extract_mpd_formats(source['url'], video_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': source_type,
|
||||
|
@ -1,291 +0,0 @@
|
||||
import functools
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
determine_ext,
|
||||
dict_get,
|
||||
float_or_none,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class MildomBaseIE(InfoExtractor):
|
||||
_GUEST_ID = None
|
||||
|
||||
def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None):
|
||||
if not self._GUEST_ID:
|
||||
self._GUEST_ID = f'pc-gp-{uuid.uuid4()}'
|
||||
|
||||
content = self._download_json(
|
||||
url, video_id, note=note, data=json.dumps(body).encode() if body else None,
|
||||
headers={'Content-Type': 'application/json'} if body else {},
|
||||
query={
|
||||
'__guest_id': self._GUEST_ID,
|
||||
'__platform': 'web',
|
||||
**(query or {}),
|
||||
})
|
||||
|
||||
if content['code'] != 0:
|
||||
raise ExtractorError(
|
||||
f'Mildom says: {content["message"]} (code {content["code"]})',
|
||||
expected=True)
|
||||
return content['body']
|
||||
|
||||
|
||||
class MildomIE(MildomBaseIE):
|
||||
IE_NAME = 'mildom'
|
||||
IE_DESC = 'Record ongoing live by specific user in Mildom'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id)
|
||||
|
||||
enterstudio = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
|
||||
note='Downloading live metadata', query={'user_id': video_id})
|
||||
result_video_id = enterstudio.get('log_id', video_id)
|
||||
|
||||
servers = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
|
||||
note='Downloading live server list', query={
|
||||
'user_id': video_id,
|
||||
'live_server_type': 'hls',
|
||||
})
|
||||
|
||||
playback_token = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id,
|
||||
note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'})
|
||||
playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False)
|
||||
if not playback_token:
|
||||
raise ExtractorError('Failed to obtain live playback token')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}',
|
||||
result_video_id, 'mp4', headers={
|
||||
'Referer': 'https://www.mildom.com/',
|
||||
'Origin': 'https://www.mildom.com',
|
||||
})
|
||||
|
||||
for fmt in formats:
|
||||
fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
|
||||
|
||||
return {
|
||||
'id': result_video_id,
|
||||
'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'),
|
||||
'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str),
|
||||
'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000),
|
||||
'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'),
|
||||
'uploader_id': video_id,
|
||||
'formats': formats,
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class MildomVodIE(MildomBaseIE):
|
||||
IE_NAME = 'mildom:vod'
|
||||
IE_DESC = 'VOD in Mildom'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269',
|
||||
'info_dict': {
|
||||
'id': '10882672-1597662269',
|
||||
'ext': 'mp4',
|
||||
'title': '始めてのミルダム配信じゃぃ!',
|
||||
'thumbnail': r're:^https?://.*\.(png|jpg)$',
|
||||
'upload_date': '20200817',
|
||||
'duration': 4138.37,
|
||||
'description': 'ゲームをしたくて!',
|
||||
'timestamp': 1597662269.0,
|
||||
'uploader_id': '10882672',
|
||||
'uploader': 'kson組長(けいそん)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mildom.com/playback/10882672/10882672-1597758589870-477',
|
||||
'info_dict': {
|
||||
'id': '10882672-1597758589870-477',
|
||||
'ext': 'mp4',
|
||||
'title': '【kson】感染メイズ!麻酔銃で無双する',
|
||||
'thumbnail': r're:^https?://.*\.(png|jpg)$',
|
||||
'timestamp': 1597759093.0,
|
||||
'uploader': 'kson組長(けいそん)',
|
||||
'duration': 4302.58,
|
||||
'uploader_id': '10882672',
|
||||
'description': 'このステージ絶対乗り越えたい',
|
||||
'upload_date': '20200818',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mildom.com/playback/10882672/10882672-buha9td2lrn97fk2jme0',
|
||||
'info_dict': {
|
||||
'id': '10882672-buha9td2lrn97fk2jme0',
|
||||
'ext': 'mp4',
|
||||
'title': '【kson組長】CART RACER!!!',
|
||||
'thumbnail': r're:^https?://.*\.(png|jpg)$',
|
||||
'uploader_id': '10882672',
|
||||
'uploader': 'kson組長(けいそん)',
|
||||
'upload_date': '20201104',
|
||||
'timestamp': 1604494797.0,
|
||||
'duration': 4657.25,
|
||||
'description': 'WTF',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
|
||||
webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id)
|
||||
|
||||
autoplay = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
|
||||
note='Downloading playback metadata', query={
|
||||
'v_id': video_id,
|
||||
})['playback']
|
||||
|
||||
formats = [{
|
||||
'url': autoplay['audio_url'],
|
||||
'format_id': 'audio',
|
||||
'protocol': 'm3u8_native',
|
||||
'vcodec': 'none',
|
||||
'acodec': 'aac',
|
||||
'ext': 'm4a',
|
||||
}]
|
||||
for fmt in autoplay['video_link']:
|
||||
formats.append({
|
||||
'format_id': 'video-{}'.format(fmt['name']),
|
||||
'url': fmt['url'],
|
||||
'protocol': 'm3u8_native',
|
||||
'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'],
|
||||
'height': fmt['level'],
|
||||
'vcodec': 'h264',
|
||||
'acodec': 'aac',
|
||||
'ext': 'mp4',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'),
|
||||
'description': traverse_obj(autoplay, 'video_intro'),
|
||||
'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000),
|
||||
'duration': float_or_none(autoplay.get('video_length'), scale=1000),
|
||||
'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')),
|
||||
'uploader': traverse_obj(autoplay, ('author_info', 'login_name')),
|
||||
'uploader_id': user_id,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class MildomClipIE(MildomBaseIE):
|
||||
IE_NAME = 'mildom:clip'
|
||||
IE_DESC = 'Clip in Mildom'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P<id>(?P<user_id>\d+)-[a-zA-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9',
|
||||
'info_dict': {
|
||||
'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9',
|
||||
'title': '全然違ったよ',
|
||||
'timestamp': 1619181890,
|
||||
'duration': 59,
|
||||
'thumbnail': r're:https?://.+',
|
||||
'uploader': 'ざきんぽ',
|
||||
'uploader_id': '10042245',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864',
|
||||
'info_dict': {
|
||||
'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864',
|
||||
'title': 'かっこいい',
|
||||
'timestamp': 1621094003,
|
||||
'duration': 59,
|
||||
'thumbnail': r're:https?://.+',
|
||||
'uploader': '(ルーキー',
|
||||
'uploader_id': '10111524',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
|
||||
'info_dict': {
|
||||
'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
|
||||
'title': 'あ',
|
||||
'timestamp': 1614769431,
|
||||
'duration': 31,
|
||||
'thumbnail': r're:https?://.+',
|
||||
'uploader': 'ドルゴルスレンギーン=ダグワドルジ',
|
||||
'uploader_id': '10660174',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
|
||||
webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id)
|
||||
|
||||
clip_detail = self._call_api(
|
||||
'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id,
|
||||
note='Downloading playback metadata', query={
|
||||
'clip_id': video_id,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._html_search_meta(
|
||||
('og:description', 'description'), webpage, default=None) or clip_detail.get('title'),
|
||||
'timestamp': float_or_none(clip_detail.get('create_time')),
|
||||
'duration': float_or_none(clip_detail.get('length')),
|
||||
'thumbnail': clip_detail.get('cover'),
|
||||
'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')),
|
||||
'uploader_id': user_id,
|
||||
|
||||
'url': clip_detail['url'],
|
||||
'ext': determine_ext(clip_detail.get('url'), 'mp4'),
|
||||
}
|
||||
|
||||
|
||||
class MildomUserVodIE(MildomBaseIE):
|
||||
IE_NAME = 'mildom:user:vod'
|
||||
IE_DESC = 'Download all VODs from specific user in Mildom'
|
||||
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.mildom.com/profile/10093333',
|
||||
'info_dict': {
|
||||
'id': '10093333',
|
||||
'title': 'Uploads from ねこばたけ',
|
||||
},
|
||||
'playlist_mincount': 732,
|
||||
}, {
|
||||
'url': 'https://www.mildom.com/profile/10882672',
|
||||
'info_dict': {
|
||||
'id': '10882672',
|
||||
'title': 'Uploads from kson組長(けいそん)',
|
||||
},
|
||||
'playlist_mincount': 201,
|
||||
}]
|
||||
|
||||
def _fetch_page(self, user_id, page):
|
||||
page += 1
|
||||
reply = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
|
||||
user_id, note=f'Downloading page {page}', query={
|
||||
'user_id': user_id,
|
||||
'page': page,
|
||||
'limit': '30',
|
||||
})
|
||||
if not reply:
|
||||
return
|
||||
for x in reply:
|
||||
v_id = x.get('v_id')
|
||||
if not v_id:
|
||||
continue
|
||||
yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}')
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
self.to_screen(f'This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/{user_id}" instead')
|
||||
|
||||
profile = self._call_api(
|
||||
'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
|
||||
query={'user_id': user_id}, note='Downloading user profile')['user_info']
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30),
|
||||
user_id, f'Uploads from {profile["loginname"]}')
|
@ -80,9 +80,9 @@ class MiTeleIE(TelecincoBaseIE):
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
pre_player = self._parse_json(self._search_regex(
|
||||
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})',
|
||||
webpage, 'Pre Player'), display_id)['prePlayer']
|
||||
pre_player = self._search_json(
|
||||
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=',
|
||||
webpage, 'Pre Player', display_id)['prePlayer']
|
||||
title = pre_player['title']
|
||||
video_info = self._parse_content(pre_player['video'], url)
|
||||
content = pre_player.get('content') or {}
|
||||
|
@ -12,7 +12,7 @@
|
||||
|
||||
class MixchIE(InfoExtractor):
|
||||
IE_NAME = 'mixch'
|
||||
_VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://mixch\.tv/u/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mixch.tv/u/16943797/live',
|
||||
@ -74,7 +74,7 @@ def _get_comments(self, video_id):
|
||||
|
||||
class MixchArchiveIE(InfoExtractor):
|
||||
IE_NAME = 'mixch:archive'
|
||||
_VALID_URL = r'https?://(?:www\.)?mixch\.tv/archive/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://mixch\.tv/archive/(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mixch.tv/archive/421',
|
||||
@ -116,3 +116,56 @@ def _real_extract(self, url):
|
||||
'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id),
|
||||
'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})),
|
||||
}
|
||||
|
||||
|
||||
class MixchMovieIE(InfoExtractor):
|
||||
IE_NAME = 'mixch:movie'
|
||||
_VALID_URL = r'https?://mixch\.tv/m/(?P<id>\w+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mixch.tv/m/Ve8KNkJ5',
|
||||
'info_dict': {
|
||||
'id': 'Ve8KNkJ5',
|
||||
'title': '夏☀️\nムービーへのポイントは本イベントに加算されないので配信にてお願い致します🙇🏻\u200d♀️\n#TGCCAMPUS #ミス東大 #ミス東大2024 ',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'ミス東大No.5 松藤百香🍑💫',
|
||||
'uploader_id': '12299174',
|
||||
'channel_follower_count': int,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'timestamp': 1724070828,
|
||||
'uploader_url': 'https://mixch.tv/u/12299174',
|
||||
'live_status': 'not_live',
|
||||
'upload_date': '20240819',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://mixch.tv/m/61DzpIKE',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
f'https://mixch.tv/api-web/movies/{video_id}', video_id)
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': [{
|
||||
'format_id': 'mp4',
|
||||
'url': data['movie']['file'],
|
||||
'ext': 'mp4',
|
||||
}],
|
||||
**traverse_obj(data, {
|
||||
'title': ('movie', 'title', {str}),
|
||||
'thumbnail': ('movie', 'thumbnailURL', {url_or_none}),
|
||||
'uploader': ('ownerInfo', 'name', {str}),
|
||||
'uploader_id': ('ownerInfo', 'id', {int}, {str_or_none}),
|
||||
'channel_follower_count': ('ownerInfo', 'fan', {int_or_none}),
|
||||
'view_count': ('ownerInfo', 'view', {int_or_none}),
|
||||
'like_count': ('movie', 'favCount', {int_or_none}),
|
||||
'comment_count': ('movie', 'commentCount', {int_or_none}),
|
||||
'timestamp': ('movie', 'published', {int_or_none}),
|
||||
'uploader_url': ('ownerInfo', 'id', {lambda x: x and f'https://mixch.tv/u/{x}'}, filter),
|
||||
}),
|
||||
'live_status': 'not_live',
|
||||
}
|
||||
|
117
yt_dlp/extractor/nest.py
Normal file
117
yt_dlp/extractor/nest.py
Normal file
@ -0,0 +1,117 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, float_or_none, update_url_query, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NestIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.nest\.com/(?:embedded/)?live/(?P<id>\w+)'
|
||||
_EMBED_REGEX = [rf'<iframe [^>]*\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://video.nest.com/embedded/live/4fvYdSo8AX?autoplay=0',
|
||||
'info_dict': {
|
||||
'id': '4fvYdSo8AX',
|
||||
'ext': 'mp4',
|
||||
'title': 'startswith:Outside ',
|
||||
'alt_title': 'Outside',
|
||||
'description': '<null>',
|
||||
'location': 'Los Angeles',
|
||||
'availability': 'public',
|
||||
'thumbnail': r're:https?://',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.nest.com/live/4fvYdSo8AX',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.pacificblue.biz/noyo-harbor-webcam/',
|
||||
'info_dict': {
|
||||
'id': '4fvYdSo8AX',
|
||||
'ext': 'mp4',
|
||||
'title': 'startswith:Outside ',
|
||||
'alt_title': 'Outside',
|
||||
'description': '<null>',
|
||||
'location': 'Los Angeles',
|
||||
'availability': 'public',
|
||||
'thumbnail': r're:https?://',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
item = self._download_json(
|
||||
'https://video.nest.com/api/dropcam/cameras.get_by_public_token',
|
||||
video_id, query={'token': video_id})['items'][0]
|
||||
uuid = item.get('uuid')
|
||||
stream_domain = item.get('live_stream_host')
|
||||
if not stream_domain or not uuid:
|
||||
raise ExtractorError('Unable to construct playlist URL')
|
||||
|
||||
thumb_domain = item.get('nexus_api_nest_domain_host')
|
||||
return {
|
||||
'id': video_id,
|
||||
**traverse_obj(item, {
|
||||
'description': ('description', {str}),
|
||||
'title': (('title', 'name', 'where'), {str}, filter, any),
|
||||
'alt_title': ('name', {str}),
|
||||
'location': ((('timezone', {lambda x: x.split('/')[1].replace('_', ' ')}), 'where'), {str}, filter, any),
|
||||
}),
|
||||
'thumbnail': update_url_query(
|
||||
f'https://{thumb_domain}/get_image',
|
||||
{'uuid': uuid, 'public': video_id}) if thumb_domain else None,
|
||||
'availability': self._availability(is_private=item.get('is_public') is False),
|
||||
'formats': self._extract_m3u8_formats(
|
||||
f'https://{stream_domain}/nexus_aac/{uuid}/playlist.m3u8',
|
||||
video_id, 'mp4', live=True, query={'public': video_id}),
|
||||
'is_live': True,
|
||||
}
|
||||
|
||||
|
||||
class NestClipIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://video\.nest\.com/(?:embedded/)?clip/(?P<id>\w+)'
|
||||
_EMBED_REGEX = [rf'<iframe [^>]*\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://video.nest.com/clip/f34c9dd237a44eca9a0001af685e3dff',
|
||||
'info_dict': {
|
||||
'id': 'f34c9dd237a44eca9a0001af685e3dff',
|
||||
'ext': 'mp4',
|
||||
'title': 'NestClip video #f34c9dd237a44eca9a0001af685e3dff',
|
||||
'thumbnail': 'https://clips.dropcam.com/f34c9dd237a44eca9a0001af685e3dff.jpg',
|
||||
'timestamp': 1735413474.468,
|
||||
'upload_date': '20241228',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.nest.com/embedded/clip/34e0432adc3c46a98529443d8ad5aa76',
|
||||
'info_dict': {
|
||||
'id': '34e0432adc3c46a98529443d8ad5aa76',
|
||||
'ext': 'mp4',
|
||||
'title': 'Shootout at Veterans Boulevard at Fleur De Lis Drive',
|
||||
'thumbnail': 'https://clips.dropcam.com/34e0432adc3c46a98529443d8ad5aa76.jpg',
|
||||
'upload_date': '20230817',
|
||||
'timestamp': 1692262897.191,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
'https://video.nest.com/api/dropcam/videos.get_by_filename', video_id,
|
||||
query={'filename': f'{video_id}.mp4'})
|
||||
return {
|
||||
'id': video_id,
|
||||
**traverse_obj(data, ('items', 0, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||
'url': ('download_url', {url_or_none}),
|
||||
'timestamp': ('start_time', {float_or_none}),
|
||||
})),
|
||||
}
|
@ -12,6 +12,7 @@
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
try_get,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
@ -171,6 +172,8 @@ def call_playback_api(item, query=None):
|
||||
format_url = url_or_none(asset.get('url'))
|
||||
if not format_url:
|
||||
continue
|
||||
# Remove the 'adap' query parameter
|
||||
format_url = update_url_query(format_url, {'adap': []})
|
||||
asset_format = (asset.get('format') or '').lower()
|
||||
if asset_format == 'hls' or determine_ext(format_url) == 'm3u8':
|
||||
formats.extend(self._extract_nrk_formats(format_url, video_id))
|
||||
|
@ -343,7 +343,7 @@ def _real_extract(self, url):
|
||||
if media_ids:
|
||||
media_ids.append(lead_video_id)
|
||||
return self.playlist_result(
|
||||
[self._extract_video(media_id) for media_id in media_ids], page_id, title, description)
|
||||
map(self._extract_video, media_ids), page_id, title, description)
|
||||
|
||||
return {
|
||||
**self._extract_video(lead_video_id),
|
||||
|
@ -16,10 +16,10 @@
|
||||
parse_iso8601,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj, value
|
||||
|
||||
|
||||
class PatreonBaseIE(InfoExtractor):
|
||||
@ -252,6 +252,27 @@ class PatreonIE(PatreonBaseIE):
|
||||
'thumbnail': r're:^https?://.+',
|
||||
},
|
||||
'skip': 'Patron-only content',
|
||||
}, {
|
||||
# Contains a comment reply in the 'included' section
|
||||
'url': 'https://www.patreon.com/posts/114721679',
|
||||
'info_dict': {
|
||||
'id': '114721679',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20241025',
|
||||
'uploader': 'Japanalysis',
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.+',
|
||||
'comment_count': int,
|
||||
'title': 'Karasawa Part 2',
|
||||
'description': 'Part 2 of this video https://www.youtube.com/watch?v=Azms2-VTASk',
|
||||
'uploader_url': 'https://www.patreon.com/japanalysis',
|
||||
'uploader_id': '80504268',
|
||||
'channel_url': 'https://www.patreon.com/japanalysis',
|
||||
'channel_follower_count': int,
|
||||
'timestamp': 1729897015,
|
||||
'channel_id': '9346307',
|
||||
},
|
||||
'params': {'getcomments': True},
|
||||
}]
|
||||
_RETURN_TYPE = 'video'
|
||||
|
||||
@ -404,26 +425,24 @@ def _get_comments(self, post_id):
|
||||
f'posts/{post_id}/comments', post_id, query=params, note=f'Downloading comments page {page}')
|
||||
|
||||
cursor = None
|
||||
for comment in traverse_obj(response, (('data', ('included', lambda _, v: v['type'] == 'comment')), ...)):
|
||||
for comment in traverse_obj(response, (('data', 'included'), lambda _, v: v['type'] == 'comment' and v['id'])):
|
||||
count += 1
|
||||
comment_id = comment.get('id')
|
||||
attributes = comment.get('attributes') or {}
|
||||
if comment_id is None:
|
||||
continue
|
||||
author_id = traverse_obj(comment, ('relationships', 'commenter', 'data', 'id'))
|
||||
author_info = traverse_obj(
|
||||
response, ('included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes'),
|
||||
get_all=False, expected_type=dict, default={})
|
||||
|
||||
yield {
|
||||
'id': comment_id,
|
||||
'text': attributes.get('body'),
|
||||
'timestamp': parse_iso8601(attributes.get('created')),
|
||||
'parent': traverse_obj(comment, ('relationships', 'parent', 'data', 'id'), default='root'),
|
||||
'author_is_uploader': attributes.get('is_by_creator'),
|
||||
**traverse_obj(comment, {
|
||||
'id': ('id', {str_or_none}),
|
||||
'text': ('attributes', 'body', {str}),
|
||||
'timestamp': ('attributes', 'created', {parse_iso8601}),
|
||||
'parent': ('relationships', 'parent', 'data', ('id', {value('root')}), {str}, any),
|
||||
'author_is_uploader': ('attributes', 'is_by_creator', {bool}),
|
||||
}),
|
||||
**traverse_obj(response, (
|
||||
'included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes', {
|
||||
'author': ('full_name', {str}),
|
||||
'author_thumbnail': ('image_url', {url_or_none}),
|
||||
}), get_all=False),
|
||||
'author_id': author_id,
|
||||
'author': author_info.get('full_name'),
|
||||
'author_thumbnail': author_info.get('image_url'),
|
||||
}
|
||||
|
||||
if count < traverse_obj(response, ('meta', 'count')):
|
||||
@ -438,7 +457,7 @@ class PatreonCampaignIE(PatreonBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?patreon\.com/(?:
|
||||
(?:m|api/campaigns)/(?P<campaign_id>\d+)|
|
||||
(?P<vanity>(?!creation[?/]|posts/|rss[?/])[\w-]+)
|
||||
(?:c/)?(?P<vanity>(?!creation[?/]|posts/|rss[?/])[\w-]+)
|
||||
)(?:/posts)?/?(?:$|[?#])'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.patreon.com/dissonancepod/',
|
||||
@ -490,6 +509,26 @@ class PatreonCampaignIE(PatreonBaseIE):
|
||||
'thumbnail': r're:^https?://.*$',
|
||||
},
|
||||
'playlist_mincount': 201,
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/c/OgSog',
|
||||
'info_dict': {
|
||||
'id': '8504388',
|
||||
'title': 'OGSoG',
|
||||
'description': r're:(?s)Hello and welcome to our Patreon page. We are Mari, Lasercorn, .+',
|
||||
'channel': 'OGSoG',
|
||||
'channel_id': '8504388',
|
||||
'channel_url': 'https://www.patreon.com/OgSog',
|
||||
'uploader_url': 'https://www.patreon.com/OgSog',
|
||||
'uploader_id': '72323575',
|
||||
'uploader': 'David Moss',
|
||||
'thumbnail': r're:https?://.+/.+',
|
||||
'channel_follower_count': int,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'playlist_mincount': 331,
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/c/OgSog/posts',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/dissonancepod/posts',
|
||||
'only_matching': True,
|
||||
|
122
yt_dlp/extractor/pialive.py
Normal file
122
yt_dlp/extractor/pialive.py
Normal file
@ -0,0 +1,122 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_class,
|
||||
multipart_encode,
|
||||
str_or_none,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class PiaLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\.pia-live\.jp/stream/(?P<id>[\w-]+)'
|
||||
_PLAYER_ROOT_URL = 'https://player.pia-live.jp/'
|
||||
_PIA_LIVE_API_URL = 'https://api.pia-live.jp'
|
||||
_API_KEY = 'kfds)FKFps-dms9e'
|
||||
_TESTS = [{
|
||||
'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krUDqGOwN4d61dCWQYOd6CTxl4hjya9dsfEZGsM4uGOUdax60lEI4twsXGXf7crmz8Gk__GhupTrWxA7RFRVt76',
|
||||
'info_dict': {
|
||||
'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
|
||||
'display_id': '2431867_001',
|
||||
'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
|
||||
'live_status': 'was_live',
|
||||
'comment_count': int,
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True,
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True,
|
||||
},
|
||||
'skip': 'The video is no longer available',
|
||||
}, {
|
||||
'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ',
|
||||
'info_dict': {
|
||||
'id': '9ce8b8ba-f6d1-4d1f-83a0-18c3148ded93',
|
||||
'display_id': '2431867_002',
|
||||
'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)',
|
||||
'live_status': 'was_live',
|
||||
'comment_count': int,
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True,
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True,
|
||||
},
|
||||
'skip': 'The video is no longer available',
|
||||
}]
|
||||
|
||||
def _extract_var(self, variable, html):
|
||||
return self._search_regex(
|
||||
rf'(?:var|const|let)\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
html, f'variable {variable}', group='value')
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_key = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_key)
|
||||
|
||||
program_code = self._extract_var('programCode', webpage)
|
||||
article_code = self._extract_var('articleCode', webpage)
|
||||
title = self._html_extract_title(webpage)
|
||||
|
||||
if get_element_html_by_class('play-end', webpage):
|
||||
raise ExtractorError('The video is no longer available', expected=True, video_id=program_code)
|
||||
|
||||
if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)):
|
||||
date, time = self._search_regex(
|
||||
r'(?P<date>\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P<time>\d{2}:\d{2})',
|
||||
start_info, 'start_info', fatal=False, group=('date', 'time'))
|
||||
if date and time:
|
||||
release_timestamp_str = f'{date} {time} +09:00'
|
||||
release_timestamp = unified_timestamp(release_timestamp_str)
|
||||
self.raise_no_formats(f'The video will be available after {release_timestamp_str}', expected=True)
|
||||
return {
|
||||
'id': program_code,
|
||||
'title': title,
|
||||
'live_status': 'is_upcoming',
|
||||
'release_timestamp': release_timestamp,
|
||||
}
|
||||
|
||||
payload, content_type = multipart_encode({
|
||||
'play_url': video_key,
|
||||
'api_key': self._API_KEY,
|
||||
})
|
||||
api_data_and_headers = {
|
||||
'data': payload,
|
||||
'headers': {'Content-Type': content_type, 'Referer': self._PLAYER_ROOT_URL},
|
||||
}
|
||||
|
||||
player_tag_list = self._download_json(
|
||||
f'{self._PIA_LIVE_API_URL}/perf/player-tag-list/{program_code}', program_code,
|
||||
'Fetching player tag list', 'Unable to fetch player tag list', **api_data_and_headers)
|
||||
|
||||
return self.url_result(
|
||||
extract_attributes(player_tag_list['data']['movie_one_tag'])['src'],
|
||||
url_transparent=True, title=title, display_id=program_code,
|
||||
__post_extractor=self.extract_comments(program_code, article_code, api_data_and_headers))
|
||||
|
||||
def _get_comments(self, program_code, article_code, api_data_and_headers):
|
||||
chat_room_url = traverse_obj(self._download_json(
|
||||
f'{self._PIA_LIVE_API_URL}/perf/chat-tag-list/{program_code}/{article_code}', program_code,
|
||||
'Fetching chat info', 'Unable to fetch chat info', fatal=False, **api_data_and_headers),
|
||||
('data', 'chat_one_tag', {extract_attributes}, 'src', {url_or_none}))
|
||||
if not chat_room_url:
|
||||
return
|
||||
comment_page = self._download_webpage(
|
||||
chat_room_url, program_code, 'Fetching comment page', 'Unable to fetch comment page',
|
||||
fatal=False, headers={'Referer': self._PLAYER_ROOT_URL})
|
||||
if not comment_page:
|
||||
return
|
||||
yield from traverse_obj(self._search_json(
|
||||
r'var\s+_history\s*=', comment_page, 'comment list',
|
||||
program_code, contains_pattern=r'\[(?s:.+)\]', fatal=False), (..., {
|
||||
'timestamp': (0, {int}),
|
||||
'author_is_uploader': (1, {lambda x: x == 2}),
|
||||
'author': (2, {str}),
|
||||
'text': (3, {str}),
|
||||
'id': (4, {str_or_none}),
|
||||
}))
|
@ -1,70 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class PIAULIZAPortalIE(InfoExtractor):
|
||||
IE_DESC = 'ulizaportal.jp - PIA LIVE STREAM'
|
||||
_VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
|
||||
'info_dict': {
|
||||
'id': '005f18b7-e810-5618-cb82-0987c5755d44',
|
||||
'title': 'プレゼンテーションプレイヤーのサンプル',
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
|
||||
'info_dict': {
|
||||
'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
|
||||
'title': '【確認用】視聴サンプルページ(ULIZA)',
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
|
||||
if expires and expires <= time_seconds():
|
||||
raise ExtractorError('The link is expired.', video_id=video_id, expected=True)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_data = self._download_webpage(
|
||||
self._search_regex(
|
||||
r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
|
||||
webpage, 'player data url'),
|
||||
video_id, headers={'Referer': 'https://ulizaportal.jp/'},
|
||||
note='Fetching player data', errnote='Unable to fetch player data')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
self._search_regex(
|
||||
r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data,
|
||||
'm3u8 url', default=None),
|
||||
video_id, fatal=False)
|
||||
m3u8_type = self._search_regex(
|
||||
r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._html_extract_title(webpage),
|
||||
'formats': formats,
|
||||
'live_status': {
|
||||
'video': 'is_live',
|
||||
'dvr': 'was_live', # short-term archives
|
||||
}.get(m3u8_type, 'not_live'), # VOD or long-term archives
|
||||
}
|
99
yt_dlp/extractor/piramidetv.py
Normal file
99
yt_dlp/extractor/piramidetv.py
Normal file
@ -0,0 +1,99 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_iso8601, smuggle_url, unsmuggle_url, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class PiramideTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://piramide\.tv/video/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://piramide.tv/video/wWtBAORdJUTh',
|
||||
'info_dict': {
|
||||
'id': 'wWtBAORdJUTh',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:79f9c8183ea6a35c836923142cf0abcc',
|
||||
'description': '',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/W86PgQDn/thumbnails/B9gpIxkH.jpg',
|
||||
'channel': 'León Picarón',
|
||||
'channel_id': 'leonpicaron',
|
||||
'timestamp': 1696460362,
|
||||
'upload_date': '20231004',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://piramide.tv/video/wcYn6li79NgN',
|
||||
'info_dict': {
|
||||
'id': 'wcYn6li79NgN',
|
||||
'ext': 'mp4',
|
||||
'title': 'ACEPTO TENER UN BEBE CON MI NOVIA\u2026? | Parte 1',
|
||||
'description': '',
|
||||
'channel': 'ARTA GAME',
|
||||
'channel_id': 'arta_game',
|
||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/cnEdGp5X/thumbnails/rHAaWfP7.jpg',
|
||||
'timestamp': 1703434976,
|
||||
'upload_date': '20231224',
|
||||
},
|
||||
}]
|
||||
|
||||
def _extract_video(self, video_id):
|
||||
video_data = self._download_json(
|
||||
f'https://hermes.piramide.tv/video/data/{video_id}', video_id, fatal=False)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://cdn.piramide.tv/video/{video_id}/manifest.m3u8', video_id, fatal=False)
|
||||
next_video = traverse_obj(video_data, ('video', 'next_video', 'id', {str}))
|
||||
return next_video, {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(video_data, ('video', {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('media', 'thumbnail', {url_or_none}),
|
||||
'channel': ('channel', 'name', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
'timestamp': ('date', {parse_iso8601}),
|
||||
})),
|
||||
}
|
||||
|
||||
def _entries(self, video_id):
|
||||
visited = set()
|
||||
while True:
|
||||
visited.add(video_id)
|
||||
next_video, info = self._extract_video(video_id)
|
||||
yield info
|
||||
if not next_video or next_video in visited:
|
||||
break
|
||||
video_id = next_video
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
video_id = self._match_id(url)
|
||||
if self._yes_playlist(video_id, video_id, smuggled_data):
|
||||
return self.playlist_result(self._entries(video_id), video_id)
|
||||
return self._extract_video(video_id)[1]
|
||||
|
||||
|
||||
class PiramideTVChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://piramide\.tv/channel/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://piramide.tv/channel/thekalo',
|
||||
'playlist_mincount': 10,
|
||||
'info_dict': {
|
||||
'id': 'thekalo',
|
||||
},
|
||||
}]
|
||||
|
||||
def _entries(self, channel_name):
|
||||
videos = self._download_json(
|
||||
f'https://hermes.piramide.tv/channel/list/{channel_name}/date/100000', channel_name)
|
||||
for video in traverse_obj(videos, ('videos', lambda _, v: v['id'])):
|
||||
yield self.url_result(smuggle_url(
|
||||
f'https://piramide.tv/video/{video["id"]}', {'force_noplaylist': True}),
|
||||
**traverse_obj(video, {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_name = self._match_id(url)
|
||||
return self.playlist_result(self._entries(channel_name), channel_name)
|
@ -1,4 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
@ -110,8 +111,8 @@ def _real_extract(self, url):
|
||||
if not traverse_obj(data, 'is_broadcasting'):
|
||||
try:
|
||||
self._call_api(user_id, 'users/current.json', url, 'Investigating reason for request failure')
|
||||
except ExtractorError as ex:
|
||||
if ex.cause and ex.cause.code == 401:
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
self.raise_login_required(f'Please log in, or use direct link like https://sketch.pixiv.net/@{user_id}/1234567890', method='cookies')
|
||||
raise ExtractorError('This user is offline', expected=True)
|
||||
|
||||
|
130
yt_dlp/extractor/plvideo.py
Normal file
130
yt_dlp/extractor/plvideo.py
Normal file
@ -0,0 +1,130 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
parse_resolution,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class PlVideoIE(InfoExtractor):
|
||||
IE_DESC = 'Платформа'
|
||||
_VALID_URL = r'https?://(?:www\.)?plvideo\.ru/(?:watch\?(?:[^#]+&)?v=|shorts/)(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://plvideo.ru/watch?v=Y5JzUzkcQTMK',
|
||||
'md5': 'fe8e18aca892b3b31f3bf492169f8a26',
|
||||
'info_dict': {
|
||||
'id': 'Y5JzUzkcQTMK',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://img.plvideo.ru/images/fp-2024-images/v/cover/37/dd/37dd00a4c96c77436ab737e85947abd7/original663a4a3bb713e5.33151959.jpg',
|
||||
'title': 'Presidente de Cuba llega a Moscú en una visita de trabajo',
|
||||
'channel': 'RT en Español',
|
||||
'channel_id': 'ZH4EKqunVDvo',
|
||||
'media_type': 'video',
|
||||
'comment_count': int,
|
||||
'tags': ['rusia', 'cuba', 'russia', 'miguel díaz-canel'],
|
||||
'description': 'md5:a1a395d900d77a86542a91ee0826c115',
|
||||
'released_timestamp': 1715096124,
|
||||
'channel_is_verified': True,
|
||||
'like_count': int,
|
||||
'timestamp': 1715095911,
|
||||
'duration': 44320,
|
||||
'view_count': int,
|
||||
'dislike_count': int,
|
||||
'upload_date': '20240507',
|
||||
'modified_date': '20240701',
|
||||
'channel_follower_count': int,
|
||||
'modified_timestamp': 1719824073,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://plvideo.ru/shorts/S3Uo9c-VLwFX',
|
||||
'md5': '7d8fa2279406c69d2fd2a6fc548a9805',
|
||||
'info_dict': {
|
||||
'id': 'S3Uo9c-VLwFX',
|
||||
'ext': 'mp4',
|
||||
'channel': 'Romaatom',
|
||||
'tags': 'count:22',
|
||||
'dislike_count': int,
|
||||
'upload_date': '20241130',
|
||||
'description': 'md5:452e6de219bf2f32bb95806c51c3b364',
|
||||
'duration': 58433,
|
||||
'modified_date': '20241130',
|
||||
'thumbnail': 'https://img.plvideo.ru/images/fp-2024-11-cover/S3Uo9c-VLwFX/f9318999-a941-482b-b700-2102a7049366.jpg',
|
||||
'media_type': 'shorts',
|
||||
'like_count': int,
|
||||
'modified_timestamp': 1732961458,
|
||||
'channel_is_verified': True,
|
||||
'channel_id': 'erJyyTIbmUd1',
|
||||
'timestamp': 1732961355,
|
||||
'comment_count': int,
|
||||
'title': 'Белоусов отменил приказы о кадровом резерве на гражданской службе',
|
||||
'channel_follower_count': int,
|
||||
'view_count': int,
|
||||
'released_timestamp': 1732961458,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_data = self._download_json(
|
||||
f'https://api.g1.plvideo.ru/v1/videos/{video_id}?Aud=18', video_id)
|
||||
|
||||
is_live = False
|
||||
formats = []
|
||||
subtitles = {}
|
||||
automatic_captions = {}
|
||||
for quality, data in traverse_obj(video_data, ('item', 'profiles', {dict.items}, lambda _, v: url_or_none(v[1]['hls']))):
|
||||
formats.append({
|
||||
'format_id': quality,
|
||||
'ext': 'mp4',
|
||||
'protocol': 'm3u8_native',
|
||||
**traverse_obj(data, {
|
||||
'url': 'hls',
|
||||
'fps': ('fps', {float_or_none}),
|
||||
'aspect_ratio': ('aspectRatio', {float_or_none}),
|
||||
}),
|
||||
**parse_resolution(quality),
|
||||
})
|
||||
if livestream_url := traverse_obj(video_data, ('item', 'livestream', 'url', {url_or_none})):
|
||||
is_live = True
|
||||
formats.extend(self._extract_m3u8_formats(livestream_url, video_id, 'mp4', live=True))
|
||||
for lang, url in traverse_obj(video_data, ('item', 'subtitles', {dict.items}, lambda _, v: url_or_none(v[1]))):
|
||||
if lang.endswith('-auto'):
|
||||
automatic_captions.setdefault(lang[:-5], []).append({
|
||||
'url': url,
|
||||
})
|
||||
else:
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': url,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'automatic_captions': automatic_captions,
|
||||
'is_live': is_live,
|
||||
**traverse_obj(video_data, ('item', {
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('cover', 'paths', 'original', 'src', {url_or_none}),
|
||||
'duration': ('uploadFile', 'videoDuration', {int_or_none}),
|
||||
'channel': ('channel', 'name', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
'channel_follower_count': ('channel', 'stats', 'subscribers', {int_or_none}),
|
||||
'channel_is_verified': ('channel', 'verified', {bool}),
|
||||
'tags': ('tags', ..., {str}),
|
||||
'timestamp': ('createdAt', {parse_iso8601}),
|
||||
'released_timestamp': ('publishedAt', {parse_iso8601}),
|
||||
'modified_timestamp': ('updatedAt', {parse_iso8601}),
|
||||
'view_count': ('stats', 'viewTotalCount', {int_or_none}),
|
||||
'like_count': ('stats', 'likeCount', {int_or_none}),
|
||||
'dislike_count': ('stats', 'dislikeCount', {int_or_none}),
|
||||
'comment_count': ('stats', 'commentCount', {int_or_none}),
|
||||
'media_type': ('type', {str}),
|
||||
})),
|
||||
}
|
@ -1,136 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
)
|
||||
|
||||
|
||||
class PokemonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
|
||||
'md5': '2fe8eaec69768b25ef898cda9c43062e',
|
||||
'info_dict': {
|
||||
'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Ol’ Raise and Switch!',
|
||||
'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
|
||||
},
|
||||
'add_id': ['LimelightMedia'],
|
||||
}, {
|
||||
# no data-video-title
|
||||
'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008',
|
||||
'info_dict': {
|
||||
'id': 'dfbaf830d7e54e179837c50c0c6cc0e1',
|
||||
'ext': 'mp4',
|
||||
'title': "Pokémon : L'ascension de Darkrai",
|
||||
'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5',
|
||||
},
|
||||
'add_id': ['LimelightMedia'],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).groups()
|
||||
webpage = self._download_webpage(url, video_id or display_id)
|
||||
video_data = extract_attributes(self._search_regex(
|
||||
r'(<[^>]+data-video-id="{}"[^>]*>)'.format(video_id if video_id else '[a-z0-9]{32}'),
|
||||
webpage, 'video data element'))
|
||||
video_id = video_data['data-video-id']
|
||||
title = video_data.get('data-video-title') or self._html_search_meta(
|
||||
'pkm-title', webpage, ' title', default=None) or self._search_regex(
|
||||
r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': f'limelight:media:{video_id}',
|
||||
'title': title,
|
||||
'description': video_data.get('data-video-summary'),
|
||||
'thumbnail': video_data.get('data-video-poster'),
|
||||
'series': 'Pokémon',
|
||||
'season_number': int_or_none(video_data.get('data-video-season')),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(video_data.get('data-video-episode')),
|
||||
'ie_key': 'LimelightMedia',
|
||||
}
|
||||
|
||||
|
||||
class PokemonWatchIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://watch\.pokemon\.com/[a-z]{2}-[a-z]{2}/(?:#/)?player(?:\.html)?\?id=(?P<id>[a-z0-9]{32})'
|
||||
_API_URL = 'https://www.pokemon.com/api/pokemontv/v2/channels/{0:}'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.pokemon.com/en-us/player.html?id=8309a40969894a8e8d5bc1311e9c5667',
|
||||
'md5': '62833938a31e61ab49ada92f524c42ff',
|
||||
'info_dict': {
|
||||
'id': '8309a40969894a8e8d5bc1311e9c5667',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lillier and the Staff!',
|
||||
'description': 'md5:338841b8c21b283d24bdc9b568849f04',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://watch.pokemon.com/en-us/#/player?id=3fe7752ba09141f0b0f7756d1981c6b2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://watch.pokemon.com/de-de/player.html?id=b3c402e111a4459eb47e12160ab0ba07',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_media(self, channel_array, video_id):
|
||||
for channel in channel_array:
|
||||
for media in channel.get('media'):
|
||||
if media.get('id') == video_id:
|
||||
return media
|
||||
return None
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
info = {
|
||||
'_type': 'url',
|
||||
'id': video_id,
|
||||
'url': f'limelight:media:{video_id}',
|
||||
'ie_key': 'LimelightMedia',
|
||||
}
|
||||
|
||||
# API call can be avoided entirely if we are listing formats
|
||||
if self.get_param('listformats', False):
|
||||
return info
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
build_vars = self._parse_json(self._search_regex(
|
||||
r'(?s)buildVars\s*=\s*({.*?})', webpage, 'build vars'),
|
||||
video_id, transform_source=js_to_json)
|
||||
region = build_vars.get('region')
|
||||
channel_array = self._download_json(self._API_URL.format(region), video_id)
|
||||
video_data = self._extract_media(channel_array, video_id)
|
||||
|
||||
if video_data is None:
|
||||
raise ExtractorError(
|
||||
f'Video {video_id} does not exist', expected=True)
|
||||
|
||||
info['_type'] = 'url_transparent'
|
||||
images = video_data.get('images')
|
||||
|
||||
return merge_dicts(info, {
|
||||
'title': video_data.get('title'),
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': images.get('medium') or images.get('small'),
|
||||
'series': 'Pokémon',
|
||||
'season_number': int_or_none(video_data.get('season')),
|
||||
'episode': video_data.get('title'),
|
||||
'episode_number': int_or_none(video_data.get('episode')),
|
||||
})
|
105
yt_dlp/extractor/radioradicale.py
Normal file
105
yt_dlp/extractor/radioradicale.py
Normal file
@ -0,0 +1,105 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class RadioRadicaleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?radioradicale\.it/scheda/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.radioradicale.it/scheda/471591',
|
||||
'md5': 'eb0fbe43a601f1a361cbd00f3c45af4a',
|
||||
'info_dict': {
|
||||
'id': '471591',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:e8fbb8de57011a3255db0beca69af73d',
|
||||
'description': 'md5:5e15a789a2fe4d67da8d1366996e89ef',
|
||||
'location': 'Napoli',
|
||||
'duration': 2852.0,
|
||||
'timestamp': 1459987200,
|
||||
'upload_date': '20160407',
|
||||
'thumbnail': 'https://www.radioradicale.it/photo400/0/0/9/0/1/00901768.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.radioradicale.it/scheda/742783/parlamento-riunito-in-seduta-comune-11a-della-xix-legislatura',
|
||||
'info_dict': {
|
||||
'id': '742783',
|
||||
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
|
||||
'description': '-) Votazione per l\'elezione di un giudice della Corte Costituzionale (nono scrutinio)',
|
||||
'location': 'CAMERA',
|
||||
'duration': 5868.0,
|
||||
'timestamp': 1730246400,
|
||||
'upload_date': '20241030',
|
||||
},
|
||||
'playlist': [{
|
||||
'md5': 'aa48de55dcc45478e4cd200f299aab7d',
|
||||
'info_dict': {
|
||||
'id': '742783-0',
|
||||
'ext': 'mp4',
|
||||
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
|
||||
},
|
||||
}, {
|
||||
'md5': 'be915c189c70ad2920e5810f32260ff5',
|
||||
'info_dict': {
|
||||
'id': '742783-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
|
||||
},
|
||||
}, {
|
||||
'md5': 'f0ee4047342baf8ed3128a8417ac5e0a',
|
||||
'info_dict': {
|
||||
'id': '742783-2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)',
|
||||
},
|
||||
}],
|
||||
}]
|
||||
|
||||
def _entries(self, videos_info, page_id):
|
||||
for idx, video in enumerate(traverse_obj(
|
||||
videos_info, ('playlist', lambda _, v: v['sources']))):
|
||||
video_id = f'{page_id}-{idx}'
|
||||
formats = []
|
||||
subtitles = {}
|
||||
|
||||
for m3u8_url in traverse_obj(video, ('sources', ..., 'src', {url_or_none})):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
for sub in traverse_obj(video, ('subtitles', ..., lambda _, v: url_or_none(v['src']))):
|
||||
self._merge_subtitles({sub.get('srclang') or 'und': [{
|
||||
'url': sub['src'],
|
||||
'name': sub.get('label'),
|
||||
}]}, target=subtitles)
|
||||
|
||||
yield {
|
||||
'id': video_id,
|
||||
'title': video.get('title'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
page_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, page_id)
|
||||
|
||||
videos_info = self._search_json(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,',
|
||||
webpage, 'videos_info', page_id)['RRscheda']
|
||||
|
||||
entries = list(self._entries(videos_info, page_id))
|
||||
|
||||
common_info = {
|
||||
'id': page_id,
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'location': videos_info.get('luogo'),
|
||||
**self._search_json_ld(webpage, page_id),
|
||||
}
|
||||
|
||||
if len(entries) == 1:
|
||||
return {
|
||||
**entries[0],
|
||||
**common_info,
|
||||
}
|
||||
|
||||
return self.playlist_result(entries, multi_video=True, **common_info)
|
@ -259,6 +259,8 @@ def _real_extract(self, url):
|
||||
f'https://www.reddit.com/{slug}/.json', video_id, expected_status=403)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, json.JSONDecodeError):
|
||||
if self._get_cookies('https://www.reddit.com/').get('reddit_session'):
|
||||
raise ExtractorError('Your IP address is unable to access the Reddit API', expected=True)
|
||||
self.raise_login_required('Account authentication is required')
|
||||
raise
|
||||
|
||||
|
@ -213,7 +213,7 @@ def _real_extract(self, url):
|
||||
class RedGifsUserIE(RedGifsBaseInfoExtractor):
|
||||
IE_DESC = 'Redgifs user'
|
||||
_VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?'
|
||||
_PAGE_SIZE = 30
|
||||
_PAGE_SIZE = 80
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.redgifs.com/users/lamsinka89',
|
||||
@ -222,7 +222,7 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor):
|
||||
'title': 'lamsinka89',
|
||||
'description': 'RedGifs user lamsinka89, ordered by recent',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
'playlist_mincount': 391,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.redgifs.com/users/lamsinka89?page=3',
|
||||
@ -231,7 +231,7 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor):
|
||||
'title': 'lamsinka89',
|
||||
'description': 'RedGifs user lamsinka89, ordered by recent',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
'playlist_count': 80,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g',
|
||||
@ -240,7 +240,17 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor):
|
||||
'title': 'lamsinka89',
|
||||
'description': 'RedGifs user lamsinka89, ordered by best',
|
||||
},
|
||||
'playlist_mincount': 100,
|
||||
'playlist_mincount': 391,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.redgifs.com/users/ignored52',
|
||||
'note': 'https://github.com/yt-dlp/yt-dlp/issues/7382',
|
||||
'info_dict': {
|
||||
'id': 'ignored52',
|
||||
'title': 'ignored52',
|
||||
'description': 'RedGifs user ignored52, ordered by recent',
|
||||
},
|
||||
'playlist_mincount': 121,
|
||||
},
|
||||
]
|
||||
|
||||
|
@ -176,6 +176,8 @@ class RTVSLOShowIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '173250997',
|
||||
'title': 'Ekipa Bled',
|
||||
'description': 'md5:c88471e27a1268c448747a5325319ab7',
|
||||
'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/173250997/logo_wide1.jpg',
|
||||
},
|
||||
'playlist_count': 18,
|
||||
}]
|
||||
@ -187,4 +189,7 @@ def _real_extract(self, url):
|
||||
return self.playlist_from_matches(
|
||||
re.findall(r'<a [^>]*\bhref="(/arhiv/[^"]+)"', webpage),
|
||||
playlist_id, self._html_extract_title(webpage),
|
||||
getter=urljoin('https://365.rtvslo.si'), ie=RTVSLOIE)
|
||||
getter=urljoin('https://365.rtvslo.si'), ie=RTVSLOIE,
|
||||
description=self._og_search_description(webpage),
|
||||
thumbnail=self._og_search_thumbnail(webpage),
|
||||
)
|
||||
|
@ -2,15 +2,21 @@
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
UnsupportedError,
|
||||
bool_or_none,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import (
|
||||
subs_list_to_dict,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class RutubeBaseIE(InfoExtractor):
|
||||
@ -19,7 +25,7 @@ def _download_api_info(self, video_id, query=None):
|
||||
query = {}
|
||||
query['format'] = 'json'
|
||||
return self._download_json(
|
||||
f'http://rutube.ru/api/video/{video_id}/',
|
||||
f'https://rutube.ru/api/video/{video_id}/',
|
||||
video_id, 'Downloading video JSON',
|
||||
'Unable to download video JSON', query=query)
|
||||
|
||||
@ -61,18 +67,21 @@ def _download_api_options(self, video_id, query=None):
|
||||
query = {}
|
||||
query['format'] = 'json'
|
||||
return self._download_json(
|
||||
f'http://rutube.ru/api/play/options/{video_id}/',
|
||||
f'https://rutube.ru/api/play/options/{video_id}/',
|
||||
video_id, 'Downloading options JSON',
|
||||
'Unable to download options JSON',
|
||||
headers=self.geo_verification_headers(), query=query)
|
||||
|
||||
def _extract_formats(self, options, video_id):
|
||||
def _extract_formats_and_subtitles(self, options, video_id):
|
||||
formats = []
|
||||
subtitles = {}
|
||||
for format_id, format_url in options['video_balancer'].items():
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url, video_id, f4m_id=format_id, fatal=False))
|
||||
@ -82,11 +91,19 @@ def _extract_formats(self, options, video_id):
|
||||
'format_id': format_id,
|
||||
})
|
||||
for hls_url in traverse_obj(options, ('live_streams', 'hls', ..., 'url', {url_or_none})):
|
||||
formats.extend(self._extract_m3u8_formats(hls_url, video_id, ext='mp4', fatal=False))
|
||||
return formats
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls')
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
self._merge_subtitles(traverse_obj(options, ('captions', ..., {
|
||||
'id': 'code',
|
||||
'url': 'file',
|
||||
'name': ('langTitle', {str}),
|
||||
}, all, {subs_list_to_dict(lang='ru')})), target=subtitles)
|
||||
return formats, subtitles
|
||||
|
||||
def _download_and_extract_formats(self, video_id, query=None):
|
||||
return self._extract_formats(
|
||||
def _download_and_extract_formats_and_subtitles(self, video_id, query=None):
|
||||
return self._extract_formats_and_subtitles(
|
||||
self._download_api_options(video_id, query=query), video_id)
|
||||
|
||||
|
||||
@ -97,8 +114,8 @@ class RutubeIE(RutubeBaseIE):
|
||||
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
||||
'md5': 'e33ac625efca66aba86cbec9851f2692',
|
||||
'url': 'https://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
|
||||
'md5': '3d73fdfe5bb81b9aef139e22ef3de26a',
|
||||
'info_dict': {
|
||||
'id': '3eac3b4561676c17df9132a9a1e62e3e',
|
||||
'ext': 'mp4',
|
||||
@ -111,26 +128,25 @@ class RutubeIE(RutubeBaseIE):
|
||||
'upload_date': '20131016',
|
||||
'age_limit': 0,
|
||||
'view_count': int,
|
||||
'thumbnail': 'http://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg',
|
||||
'thumbnail': 'https://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg',
|
||||
'categories': ['Новости и СМИ'],
|
||||
'chapters': [],
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m'],
|
||||
}, {
|
||||
'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
|
||||
'url': 'https://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
|
||||
'url': 'https://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252',
|
||||
'url': 'https://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg',
|
||||
'md5': 'd106225f15d625538fe22971158e896f',
|
||||
'md5': '4fce7b4fcc7b1bcaa3f45eb1e1ad0dd7',
|
||||
'info_dict': {
|
||||
'id': '884fb55f07a97ab673c7d654553e0f48',
|
||||
'ext': 'mp4',
|
||||
@ -143,11 +159,10 @@ class RutubeIE(RutubeBaseIE):
|
||||
'upload_date': '20221210',
|
||||
'age_limit': 0,
|
||||
'view_count': int,
|
||||
'thumbnail': 'http://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg',
|
||||
'thumbnail': 'https://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg',
|
||||
'categories': ['Видеоигры'],
|
||||
'chapters': [],
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m'],
|
||||
}, {
|
||||
'url': 'https://rutube.ru/video/c65b465ad0c98c89f3b25cb03dcc87c6/',
|
||||
'info_dict': {
|
||||
@ -156,17 +171,16 @@ class RutubeIE(RutubeBaseIE):
|
||||
'chapters': 'count:4',
|
||||
'categories': ['Бизнес и предпринимательство'],
|
||||
'description': 'md5:252feac1305257d8c1bab215cedde75d',
|
||||
'thumbnail': 'http://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png',
|
||||
'thumbnail': 'https://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png',
|
||||
'duration': 782,
|
||||
'age_limit': 0,
|
||||
'uploader_id': '23491359',
|
||||
'timestamp': 1677153329,
|
||||
'view_count': int,
|
||||
'upload_date': '20230223',
|
||||
'title': 'Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании',
|
||||
'title': 'Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании #1',
|
||||
'uploader': 'Стас Быков',
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m'],
|
||||
}, {
|
||||
'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/',
|
||||
'info_dict': {
|
||||
@ -174,7 +188,7 @@ class RutubeIE(RutubeBaseIE):
|
||||
'ext': 'mp4',
|
||||
'categories': ['Телепередачи'],
|
||||
'description': '',
|
||||
'thumbnail': 'http://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg',
|
||||
'thumbnail': 'https://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg',
|
||||
'live_status': 'is_live',
|
||||
'age_limit': 0,
|
||||
'uploader_id': '23460655',
|
||||
@ -184,6 +198,24 @@ class RutubeIE(RutubeBaseIE):
|
||||
'title': r're:Первый канал. Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||
'uploader': 'Первый канал',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://rutube.ru/play/embed/03a9cb54bac3376af4c5cb0f18444e01/',
|
||||
'info_dict': {
|
||||
'id': '03a9cb54bac3376af4c5cb0f18444e01',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 0,
|
||||
'description': '',
|
||||
'title': 'Церемония начала торгов акциями ПАО «ЕвроТранс»',
|
||||
'chapters': [],
|
||||
'upload_date': '20240829',
|
||||
'duration': 293,
|
||||
'uploader': 'MOEX - Московская биржа',
|
||||
'timestamp': 1724946628,
|
||||
'thumbnail': 'https://pic.rutubelist.ru/video/2e/24/2e241fddb459baf0fa54acfca44874f4.jpg',
|
||||
'view_count': int,
|
||||
'uploader_id': '38420507',
|
||||
'categories': ['Интервью'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/',
|
||||
'only_matching': True,
|
||||
@ -192,40 +224,46 @@ class RutubeIE(RutubeBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if RutubePlaylistIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
query = parse_qs(url)
|
||||
info = self._download_and_extract_info(video_id, query)
|
||||
info['formats'] = self._download_and_extract_formats(video_id, query)
|
||||
return info
|
||||
formats, subtitles = self._download_and_extract_formats_and_subtitles(video_id, query)
|
||||
return {
|
||||
**info,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class RutubeEmbedIE(RutubeBaseIE):
|
||||
IE_NAME = 'rutube:embed'
|
||||
IE_DESC = 'Rutube embedded videos'
|
||||
_VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)(?:[?#/]|$)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
||||
'url': 'https://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
|
||||
'info_dict': {
|
||||
'id': 'a10e53b86e8f349080f718582ce4c661',
|
||||
'ext': 'mp4',
|
||||
'timestamp': 1387830582,
|
||||
'upload_date': '20131223',
|
||||
'uploader_id': '297833',
|
||||
'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89',
|
||||
'uploader': 'subziro89 ILya',
|
||||
'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89',
|
||||
'age_limit': 0,
|
||||
'duration': 1395,
|
||||
'chapters': [],
|
||||
'description': 'md5:a5acea57bbc3ccdc3cacd1f11a014b5b',
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://pic.rutubelist.ru/video/d3/03/d3031f4670a6e6170d88fb3607948418.jpg',
|
||||
'categories': ['Сериалы'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://rutube.ru/play/embed/8083783',
|
||||
'url': 'https://rutube.ru/play/embed/8083783',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# private video
|
||||
@ -240,11 +278,12 @@ def _real_extract(self, url):
|
||||
query = parse_qs(url)
|
||||
options = self._download_api_options(embed_id, query)
|
||||
video_id = options['effective_video']
|
||||
formats = self._extract_formats(options, video_id)
|
||||
formats, subtitles = self._extract_formats_and_subtitles(options, video_id)
|
||||
info = self._download_and_extract_info(video_id, query)
|
||||
info.update({
|
||||
'extractor_key': 'Rutube',
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
})
|
||||
return info
|
||||
|
||||
@ -295,14 +334,14 @@ class RutubeTagsIE(RutubePlaylistBaseIE):
|
||||
IE_DESC = 'Rutube tags'
|
||||
_VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://rutube.ru/tags/video/1800/',
|
||||
'url': 'https://rutube.ru/tags/video/1800/',
|
||||
'info_dict': {
|
||||
'id': '1800',
|
||||
},
|
||||
'playlist_mincount': 68,
|
||||
}]
|
||||
|
||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
|
||||
_PAGE_TEMPLATE = 'https://rutube.ru/api/tags/video/%s/?page=%s&format=json'
|
||||
|
||||
|
||||
class RutubeMovieIE(RutubePlaylistBaseIE):
|
||||
@ -310,8 +349,8 @@ class RutubeMovieIE(RutubePlaylistBaseIE):
|
||||
IE_DESC = 'Rutube movies'
|
||||
_VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)'
|
||||
|
||||
_MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
|
||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
|
||||
_MOVIE_TEMPLATE = 'https://rutube.ru/api/metainfo/tv/%s/?format=json'
|
||||
_PAGE_TEMPLATE = 'https://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
|
||||
|
||||
def _real_extract(self, url):
|
||||
movie_id = self._match_id(url)
|
||||
@ -327,62 +366,82 @@ class RutubePersonIE(RutubePlaylistBaseIE):
|
||||
IE_DESC = 'Rutube person videos'
|
||||
_VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://rutube.ru/video/person/313878/',
|
||||
'url': 'https://rutube.ru/video/person/313878/',
|
||||
'info_dict': {
|
||||
'id': '313878',
|
||||
},
|
||||
'playlist_mincount': 37,
|
||||
'playlist_mincount': 36,
|
||||
}]
|
||||
|
||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
|
||||
_PAGE_TEMPLATE = 'https://rutube.ru/api/video/person/%s/?page=%s&format=json'
|
||||
|
||||
|
||||
class RutubePlaylistIE(RutubePlaylistBaseIE):
|
||||
IE_NAME = 'rutube:playlist'
|
||||
IE_DESC = 'Rutube playlists'
|
||||
_VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://rutube\.ru/plst/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag',
|
||||
'url': 'https://rutube.ru/plst/308547/',
|
||||
'info_dict': {
|
||||
'id': '3097',
|
||||
'id': '308547',
|
||||
},
|
||||
'playlist_count': 27,
|
||||
}, {
|
||||
'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source',
|
||||
'only_matching': True,
|
||||
'playlist_mincount': 22,
|
||||
}]
|
||||
|
||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
from ..utils import int_or_none, parse_qs
|
||||
|
||||
if not super().suitable(url):
|
||||
return False
|
||||
params = parse_qs(url)
|
||||
return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0])
|
||||
|
||||
def _next_page_url(self, page_num, playlist_id, item_kind):
|
||||
return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num)
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = parse_qs(url)
|
||||
playlist_kind = qs['pl_type'][0]
|
||||
playlist_id = qs['pl_id'][0]
|
||||
return self._extract_playlist(playlist_id, item_kind=playlist_kind)
|
||||
_PAGE_TEMPLATE = 'https://rutube.ru/api/playlist/custom/%s/videos?page=%s&format=json'
|
||||
|
||||
|
||||
class RutubeChannelIE(RutubePlaylistBaseIE):
|
||||
IE_NAME = 'rutube:channel'
|
||||
IE_DESC = 'Rutube channel'
|
||||
_VALID_URL = r'https?://rutube\.ru/channel/(?P<id>\d+)/videos'
|
||||
_VALID_URL = r'https?://rutube\.ru/(?:channel/(?P<id>\d+)|u/(?P<slug>\w+))(?:/(?P<section>videos|shorts|playlists))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://rutube.ru/channel/639184/videos/',
|
||||
'info_dict': {
|
||||
'id': '639184',
|
||||
'id': '639184_videos',
|
||||
},
|
||||
'playlist_mincount': 133,
|
||||
'playlist_mincount': 129,
|
||||
}, {
|
||||
'url': 'https://rutube.ru/channel/25902603/shorts/',
|
||||
'info_dict': {
|
||||
'id': '25902603_shorts',
|
||||
},
|
||||
'playlist_mincount': 277,
|
||||
}, {
|
||||
'url': 'https://rutube.ru/channel/25902603/',
|
||||
'info_dict': {
|
||||
'id': '25902603',
|
||||
},
|
||||
'playlist_mincount': 406,
|
||||
}, {
|
||||
'url': 'https://rutube.ru/u/rutube/videos/',
|
||||
'info_dict': {
|
||||
'id': '23704195_videos',
|
||||
},
|
||||
'playlist_mincount': 113,
|
||||
}]
|
||||
|
||||
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
|
||||
_PAGE_TEMPLATE = 'https://rutube.ru/api/video/person/%s/?page=%s&format=json&origin__type=%s'
|
||||
|
||||
def _next_page_url(self, page_num, playlist_id, section):
|
||||
origin_type = {
|
||||
'videos': 'rtb,rst,ifrm,rspa',
|
||||
'shorts': 'rshorts',
|
||||
None: '',
|
||||
}.get(section)
|
||||
return self._PAGE_TEMPLATE % (playlist_id, page_num, origin_type)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id, slug, section = self._match_valid_url(url).group('id', 'slug', 'section')
|
||||
if section == 'playlists':
|
||||
raise UnsupportedError(url)
|
||||
if slug:
|
||||
webpage = self._download_webpage(url, slug)
|
||||
redux_state = self._search_json(
|
||||
r'window\.reduxState\s*=', webpage, 'redux state', slug, transform_source=js_to_json)
|
||||
playlist_id = traverse_obj(redux_state, (
|
||||
'api', 'queries', lambda k, _: k.startswith('channelIdBySlug'),
|
||||
'data', 'channel_id', {int}, {str_or_none}, any))
|
||||
playlist = self._extract_playlist(playlist_id, section=section)
|
||||
if section:
|
||||
playlist['id'] = f'{playlist_id}_{section}'
|
||||
return playlist
|
||||
|
@ -4,43 +4,12 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
parse_qs,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
make_archive_id,
|
||||
remove_end,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
_COMMITTEES = {
|
||||
'ag': ('76440', 'http://ag-f.akamaihd.net'),
|
||||
'aging': ('76442', 'http://aging-f.akamaihd.net'),
|
||||
'approps': ('76441', 'http://approps-f.akamaihd.net'),
|
||||
'arch': ('', 'http://ussenate-f.akamaihd.net'),
|
||||
'armed': ('76445', 'http://armed-f.akamaihd.net'),
|
||||
'banking': ('76446', 'http://banking-f.akamaihd.net'),
|
||||
'budget': ('76447', 'http://budget-f.akamaihd.net'),
|
||||
'cecc': ('76486', 'http://srs-f.akamaihd.net'),
|
||||
'commerce': ('80177', 'http://commerce1-f.akamaihd.net'),
|
||||
'csce': ('75229', 'http://srs-f.akamaihd.net'),
|
||||
'dpc': ('76590', 'http://dpc-f.akamaihd.net'),
|
||||
'energy': ('76448', 'http://energy-f.akamaihd.net'),
|
||||
'epw': ('76478', 'http://epw-f.akamaihd.net'),
|
||||
'ethics': ('76449', 'http://ethics-f.akamaihd.net'),
|
||||
'finance': ('76450', 'http://finance-f.akamaihd.net'),
|
||||
'foreign': ('76451', 'http://foreign-f.akamaihd.net'),
|
||||
'govtaff': ('76453', 'http://govtaff-f.akamaihd.net'),
|
||||
'help': ('76452', 'http://help-f.akamaihd.net'),
|
||||
'indian': ('76455', 'http://indian-f.akamaihd.net'),
|
||||
'intel': ('76456', 'http://intel-f.akamaihd.net'),
|
||||
'intlnarc': ('76457', 'http://intlnarc-f.akamaihd.net'),
|
||||
'jccic': ('85180', 'http://jccic-f.akamaihd.net'),
|
||||
'jec': ('76458', 'http://jec-f.akamaihd.net'),
|
||||
'judiciary': ('76459', 'http://judiciary-f.akamaihd.net'),
|
||||
'rpc': ('76591', 'http://rpc-f.akamaihd.net'),
|
||||
'rules': ('76460', 'http://rules-f.akamaihd.net'),
|
||||
'saa': ('76489', 'http://srs-f.akamaihd.net'),
|
||||
'smbiz': ('76461', 'http://smbiz-f.akamaihd.net'),
|
||||
'srs': ('75229', 'http://srs-f.akamaihd.net'),
|
||||
'uscc': ('76487', 'http://srs-f.akamaihd.net'),
|
||||
'vetaff': ('76462', 'http://vetaff-f.akamaihd.net'),
|
||||
}
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class SenateISVPIE(InfoExtractor):
|
||||
@ -53,31 +22,46 @@ class SenateISVPIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'judiciary031715',
|
||||
'ext': 'mp4',
|
||||
'title': 'Integrated Senate Video Player',
|
||||
'title': 'ISVP',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||
'_old_archive_ids': ['senategov judiciary031715'],
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
|
||||
'info_dict': {
|
||||
'id': 'commerce011514',
|
||||
'ext': 'mp4',
|
||||
'title': 'Integrated Senate Video Player',
|
||||
'_old_archive_ids': ['senategov commerce011514'],
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This video is not available.',
|
||||
}, {
|
||||
'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
|
||||
# checksum differs each time
|
||||
'info_dict': {
|
||||
'id': 'intel090613',
|
||||
'ext': 'mp4',
|
||||
'title': 'Integrated Senate Video Player',
|
||||
'title': 'ISVP',
|
||||
'_old_archive_ids': ['senategov intel090613'],
|
||||
},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'https://www.senate.gov/isvp/?auto_play=false&comm=help&filename=help090920&poster=https://www.help.senate.gov/assets/images/video-poster.png&stt=950',
|
||||
'info_dict': {
|
||||
'id': 'help090920',
|
||||
'ext': 'mp4',
|
||||
'title': 'ISVP',
|
||||
'thumbnail': 'https://www.help.senate.gov/assets/images/video-poster.png',
|
||||
'_old_archive_ids': ['senategov help090920'],
|
||||
},
|
||||
}, {
|
||||
# From http://www.c-span.org/video/?96791-1
|
||||
@ -85,60 +69,81 @@ class SenateISVPIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_COMMITTEES = {
|
||||
'ag': ('76440', 'https://ag-f.akamaihd.net', '2036803', 'agriculture'),
|
||||
'aging': ('76442', 'https://aging-f.akamaihd.net', '2036801', 'aging'),
|
||||
'approps': ('76441', 'https://approps-f.akamaihd.net', '2036802', 'appropriations'),
|
||||
'arch': ('', 'https://ussenate-f.akamaihd.net', '', 'arch'),
|
||||
'armed': ('76445', 'https://armed-f.akamaihd.net', '2036800', 'armedservices'),
|
||||
'banking': ('76446', 'https://banking-f.akamaihd.net', '2036799', 'banking'),
|
||||
'budget': ('76447', 'https://budget-f.akamaihd.net', '2036798', 'budget'),
|
||||
'cecc': ('76486', 'https://srs-f.akamaihd.net', '2036782', 'srs_cecc'),
|
||||
'commerce': ('80177', 'https://commerce1-f.akamaihd.net', '2036779', 'commerce'),
|
||||
'csce': ('75229', 'https://srs-f.akamaihd.net', '2036777', 'srs_srs'),
|
||||
'dpc': ('76590', 'https://dpc-f.akamaihd.net', '', 'dpc'),
|
||||
'energy': ('76448', 'https://energy-f.akamaihd.net', '2036797', 'energy'),
|
||||
'epw': ('76478', 'https://epw-f.akamaihd.net', '2036783', 'environment'),
|
||||
'ethics': ('76449', 'https://ethics-f.akamaihd.net', '2036796', 'ethics'),
|
||||
'finance': ('76450', 'https://finance-f.akamaihd.net', '2036795', 'finance_finance'),
|
||||
'foreign': ('76451', 'https://foreign-f.akamaihd.net', '2036794', 'foreignrelations'),
|
||||
'govtaff': ('76453', 'https://govtaff-f.akamaihd.net', '2036792', 'hsgac'),
|
||||
'help': ('76452', 'https://help-f.akamaihd.net', '2036793', 'help'),
|
||||
'indian': ('76455', 'https://indian-f.akamaihd.net', '2036791', 'indianaffairs'),
|
||||
'intel': ('76456', 'https://intel-f.akamaihd.net', '2036790', 'intelligence'),
|
||||
'intlnarc': ('76457', 'https://intlnarc-f.akamaihd.net', '', 'internationalnarcoticscaucus'),
|
||||
'jccic': ('85180', 'https://jccic-f.akamaihd.net', '2036778', 'jccic'),
|
||||
'jec': ('76458', 'https://jec-f.akamaihd.net', '2036789', 'jointeconomic'),
|
||||
'judiciary': ('76459', 'https://judiciary-f.akamaihd.net', '2036788', 'judiciary'),
|
||||
'rpc': ('76591', 'https://rpc-f.akamaihd.net', '', 'rpc'),
|
||||
'rules': ('76460', 'https://rules-f.akamaihd.net', '2036787', 'rules'),
|
||||
'saa': ('76489', 'https://srs-f.akamaihd.net', '2036780', 'srs_saa'),
|
||||
'smbiz': ('76461', 'https://smbiz-f.akamaihd.net', '2036786', 'smallbusiness'),
|
||||
'srs': ('75229', 'https://srs-f.akamaihd.net', '2031966', 'srs_srs'),
|
||||
'uscc': ('76487', 'https://srs-f.akamaihd.net', '2036781', 'srs_uscc'),
|
||||
'vetaff': ('76462', 'https://vetaff-f.akamaihd.net', '2036785', 'veteransaffairs'),
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
qs = urllib.parse.parse_qs(self._match_valid_url(url).group('qs'))
|
||||
if not qs.get('filename') or not qs.get('type') or not qs.get('comm'):
|
||||
if not qs.get('filename') or not qs.get('comm'):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
video_id = re.sub(r'.mp4$', '', qs['filename'][0])
|
||||
filename = qs['filename'][0]
|
||||
video_id = remove_end(filename, '.mp4')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
committee = qs['comm'][0]
|
||||
|
||||
if smuggled_data.get('force_title'):
|
||||
title = smuggled_data['force_title']
|
||||
else:
|
||||
title = self._html_extract_title(webpage)
|
||||
poster = qs.get('poster')
|
||||
thumbnail = poster[0] if poster else None
|
||||
|
||||
video_type = qs['type'][0]
|
||||
committee = video_type if video_type == 'arch' else qs['comm'][0]
|
||||
|
||||
stream_num, domain = _COMMITTEES[committee]
|
||||
stream_num, stream_domain, stream_id, msl3 = self._COMMITTEES[committee]
|
||||
|
||||
urls_alternatives = [f'https://www-senate-gov-media-srs.akamaized.net/hls/live/{stream_id}/{committee}/{filename}/master.m3u8',
|
||||
f'https://www-senate-gov-msl3archive.akamaized.net/{msl3}/{filename}_1/master.m3u8',
|
||||
f'{stream_domain}/i/{filename}_1@{stream_num}/master.m3u8',
|
||||
f'{stream_domain}/i/{filename}.mp4/master.m3u8']
|
||||
formats = []
|
||||
if video_type == 'arch':
|
||||
filename = video_id if '.' in video_id else video_id + '.mp4'
|
||||
m3u8_url = urllib.parse.urljoin(domain, 'i/' + filename + '/master.m3u8')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8')
|
||||
else:
|
||||
hdcore_sign = 'hdcore=3.1.0'
|
||||
url_params = (domain, video_id, stream_num)
|
||||
f4m_url = f'%s/z/%s_1@%s/manifest.f4m?{hdcore_sign}' % url_params
|
||||
m3u8_url = '{}/i/{}_1@{}/master.m3u8'.format(*url_params)
|
||||
for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'):
|
||||
# URLs without the extra param induce an 404 error
|
||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||
formats.append(entry)
|
||||
for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'):
|
||||
mobj = re.search(r'(?P<tag>(?:-p|-b)).m3u8', entry['url'])
|
||||
if mobj:
|
||||
entry['format_id'] += mobj.group('tag')
|
||||
formats.append(entry)
|
||||
subtitles = {}
|
||||
for video_url in urls_alternatives:
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', fatal=False)
|
||||
if formats:
|
||||
break
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'title': self._html_extract_title(webpage),
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': traverse_obj(qs, ('poster', 0, {url_or_none})),
|
||||
'_old_archive_ids': [make_archive_id(SenateGovIE, video_id)],
|
||||
}
|
||||
|
||||
|
||||
class SenateGovIE(InfoExtractor):
|
||||
_IE_NAME = 'senate.gov'
|
||||
_VALID_URL = r'https?:\/\/(?:www\.)?(help|appropriations|judiciary|banking|armed-services|finance)\.senate\.gov'
|
||||
_SUBDOMAIN_RE = '|'.join(map(re.escape, (
|
||||
'agriculture', 'aging', 'appropriations', 'armed-services', 'banking',
|
||||
'budget', 'commerce', 'energy', 'epw', 'finance', 'foreign', 'help',
|
||||
'intelligence', 'inaugural', 'judiciary', 'rules', 'sbc', 'veterans',
|
||||
)))
|
||||
_VALID_URL = rf'https?://(?:www\.)?(?:{_SUBDOMAIN_RE})\.senate\.gov'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.help.senate.gov/hearings/vaccines-saving-lives-ensuring-confidence-and-protecting-public-health',
|
||||
'info_dict': {
|
||||
@ -147,6 +152,9 @@ class SenateGovIE(InfoExtractor):
|
||||
'title': 'Vaccines: Saving Lives, Ensuring Confidence, and Protecting Public Health',
|
||||
'description': 'The U.S. Senate Committee on Health, Education, Labor & Pensions',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://www.help.senate.gov/assets/images/sharelogo.jpg',
|
||||
'_old_archive_ids': ['senategov help090920'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
@ -156,8 +164,12 @@ class SenateGovIE(InfoExtractor):
|
||||
'display_id': 'watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD',
|
||||
'title': 'Review of the FY2019 Budget Request for the U.S. Army',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 0,
|
||||
'thumbnail': 'https://www.appropriations.senate.gov/themes/appropriations/images/video-poster-flash-fit.png',
|
||||
'_old_archive_ids': ['senategov appropsA051518'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to download m3u8 information'],
|
||||
}, {
|
||||
'url': 'https://www.banking.senate.gov/hearings/21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization',
|
||||
'info_dict': {
|
||||
@ -166,32 +178,65 @@ class SenateGovIE(InfoExtractor):
|
||||
'title': '21st Century Communities: Public Transportation Infrastructure Investment and FAST Act Reauthorization',
|
||||
'description': 'The Official website of The United States Committee on Banking, Housing, and Urban Affairs',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://www.banking.senate.gov/themes/banking/images/sharelogo.jpg',
|
||||
'age_limit': 0,
|
||||
'_old_archive_ids': ['senategov banking041521'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.agriculture.senate.gov/hearings/hemp-production-and-the-2018-farm-bill',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.aging.senate.gov/hearings/the-older-americans-act-the-local-impact-of-the-law-and-the-upcoming-reauthorization',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.budget.senate.gov/hearings/improving-care-lowering-costs-achieving-health-care-efficiency',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.commerce.senate.gov/2024/12/communications-networks-safety-and-security',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.energy.senate.gov/hearings/2024/2/full-committee-hearing-to-examine',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.epw.senate.gov/public/index.cfm/hearings?ID=F63083EA-2C13-498C-B548-341BED68C209',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.foreign.senate.gov/hearings/american-diplomacy-and-global-leadership-review-of-the-fy25-state-department-budget-request',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.intelligence.senate.gov/hearings/foreign-threats-elections-2024-%E2%80%93-roles-and-responsibilities-us-tech-providers',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.inaugural.senate.gov/52nd-inaugural-ceremonies/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.rules.senate.gov/hearings/02/07/2023/business-meeting',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.sbc.senate.gov/public/index.cfm/hearings?ID=5B13AA6B-8279-45AF-B54B-94156DC7A2AB',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.veterans.senate.gov/2024/5/frontier-health-care-ensuring-veterans-access-no-matter-where-they-live',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._generic_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
parse_info = parse_qs(self._search_regex(
|
||||
r'<iframe class="[^>"]*streaminghearing[^>"]*"\s[^>]*\bsrc="([^">]*)', webpage, 'hearing URL'))
|
||||
|
||||
stream_num, stream_domain = _COMMITTEES[parse_info['comm'][-1]]
|
||||
filename = parse_info['filename'][-1]
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'{stream_domain}/i/{filename}_1@{stream_num}/master.m3u8',
|
||||
display_id, ext='mp4')
|
||||
url_info = next(SenateISVPIE.extract_from_webpage(self._downloader, url, webpage), None)
|
||||
if not url_info:
|
||||
raise UnsupportedError(url)
|
||||
|
||||
title = self._html_search_regex(
|
||||
(*self._og_regexes('title'), r'(?s)<title>([^<]*?)</title>'), webpage, 'video title')
|
||||
(*self._og_regexes('title'), r'(?s)<title>([^<]*?)</title>'), webpage, 'video title', fatal=False)
|
||||
|
||||
return {
|
||||
'id': re.sub(r'.mp4$', '', filename),
|
||||
**url_info,
|
||||
'_type': 'url_transparent',
|
||||
'display_id': display_id,
|
||||
'title': re.sub(r'\s+', ' ', title.split('|')[0]).strip(),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'age_limit': self._rta_search(webpage),
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -1,11 +1,9 @@
|
||||
import base64
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt, unpad_pkcs7
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bytes_to_intlist,
|
||||
intlist_to_bytes,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
@ -68,10 +66,10 @@ def _real_extract(self, url):
|
||||
data_json = self._download_json('https://www.shemaroome.com/users/user_all_lists', video_id, data=data.encode())
|
||||
if not data_json.get('status'):
|
||||
raise ExtractorError('Premium videos cannot be downloaded yet.', expected=True)
|
||||
url_data = bytes_to_intlist(base64.b64decode(data_json['new_play_url']))
|
||||
key = bytes_to_intlist(base64.b64decode(data_json['key']))
|
||||
iv = [0] * 16
|
||||
m3u8_url = unpad_pkcs7(intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv))).decode('ascii')
|
||||
url_data = base64.b64decode(data_json['new_play_url'])
|
||||
key = base64.b64decode(data_json['key'])
|
||||
iv = bytes(16)
|
||||
m3u8_url = unpad_pkcs7(aes_cbc_decrypt_bytes(url_data, key, iv)).decode('ascii')
|
||||
headers = {'stream_key': data_json['stream_key']}
|
||||
formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers=headers)
|
||||
for fmt in formats:
|
||||
|
@ -199,8 +199,9 @@ class SonyLIVSeriesIE(InfoExtractor):
|
||||
},
|
||||
}]
|
||||
_API_BASE = 'https://apiv2.sonyliv.com/AGL'
|
||||
_SORT_ORDERS = ('asc', 'desc')
|
||||
|
||||
def _entries(self, show_id):
|
||||
def _entries(self, show_id, sort_order):
|
||||
headers = {
|
||||
'Accept': 'application/json, text/plain, */*',
|
||||
'Referer': 'https://www.sonyliv.com',
|
||||
@ -215,6 +216,9 @@ def _entries(self, show_id):
|
||||
'from': '0',
|
||||
'to': '49',
|
||||
}), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
|
||||
|
||||
if sort_order == 'desc':
|
||||
seasons = reversed(seasons)
|
||||
for season in seasons:
|
||||
season_id = str(season['id'])
|
||||
note = traverse_obj(season, ('metadata', 'title', {str})) or 'season'
|
||||
@ -226,7 +230,7 @@ def _entries(self, show_id):
|
||||
'from': str(cursor),
|
||||
'to': str(cursor + 99),
|
||||
'orderBy': 'episodeNumber',
|
||||
'sortOrder': 'asc',
|
||||
'sortOrder': sort_order,
|
||||
}), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
|
||||
if not episodes:
|
||||
break
|
||||
@ -237,4 +241,10 @@ def _entries(self, show_id):
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(show_id), playlist_id=show_id)
|
||||
|
||||
sort_order = self._configuration_arg('sort_order', [self._SORT_ORDERS[0]])[0]
|
||||
if sort_order not in self._SORT_ORDERS:
|
||||
raise ValueError(
|
||||
f'Invalid sort order "{sort_order}". Allowed values are: {", ".join(self._SORT_ORDERS)}')
|
||||
|
||||
return self.playlist_result(self._entries(show_id, sort_order), playlist_id=show_id)
|
||||
|
@ -7,7 +7,6 @@
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
@ -211,6 +210,7 @@ def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_f
|
||||
|
||||
format_urls = set()
|
||||
formats = []
|
||||
has_drm = False
|
||||
query = {'client_id': self._CLIENT_ID}
|
||||
if secret_token:
|
||||
query['secret_token'] = secret_token
|
||||
@ -241,60 +241,29 @@ def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_f
|
||||
format_urls.add(format_url)
|
||||
formats.append({
|
||||
'format_id': 'download',
|
||||
'ext': urlhandle_detect_ext(urlh) or 'mp3',
|
||||
'ext': urlhandle_detect_ext(urlh, default='mp3'),
|
||||
'filesize': int_or_none(urlh.headers.get('Content-Length')),
|
||||
'url': format_url,
|
||||
'quality': 10,
|
||||
'format_note': 'Original',
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
def invalid_url(url):
|
||||
return not url or url in format_urls
|
||||
|
||||
def add_format(f, protocol, is_preview=False):
|
||||
mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
|
||||
if mobj:
|
||||
for k, v in mobj.groupdict().items():
|
||||
if not f.get(k):
|
||||
f[k] = v
|
||||
format_id_list = []
|
||||
if protocol:
|
||||
format_id_list.append(protocol)
|
||||
ext = f.get('ext')
|
||||
if ext == 'aac':
|
||||
f.update({
|
||||
'abr': 256,
|
||||
'quality': 5,
|
||||
'format_note': 'Premium',
|
||||
})
|
||||
for k in ('ext', 'abr'):
|
||||
v = str_or_none(f.get(k))
|
||||
if v:
|
||||
format_id_list.append(v)
|
||||
preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
|
||||
if preview:
|
||||
format_id_list.append('preview')
|
||||
abr = f.get('abr')
|
||||
if abr:
|
||||
f['abr'] = int(abr)
|
||||
if protocol in ('hls', 'hls-aes'):
|
||||
protocol = 'm3u8' if ext == 'aac' else 'm3u8_native'
|
||||
else:
|
||||
protocol = 'http'
|
||||
f.update({
|
||||
'format_id': '_'.join(format_id_list),
|
||||
'protocol': protocol,
|
||||
'preference': -10 if preview else None,
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
# New API
|
||||
for t in traverse_obj(info, ('media', 'transcodings', lambda _, v: url_or_none(v['url']))):
|
||||
for t in traverse_obj(info, ('media', 'transcodings', lambda _, v: url_or_none(v['url']) and v['preset'])):
|
||||
if extract_flat:
|
||||
break
|
||||
format_url = t['url']
|
||||
preset = t['preset']
|
||||
preset_base = preset.partition('_')[0]
|
||||
|
||||
protocol = traverse_obj(t, ('format', 'protocol', {str}))
|
||||
protocol = traverse_obj(t, ('format', 'protocol', {str})) or 'http'
|
||||
if protocol.startswith(('ctr-', 'cbc-')):
|
||||
has_drm = True
|
||||
continue
|
||||
if protocol == 'progressive':
|
||||
protocol = 'http'
|
||||
if protocol != 'hls' and '/hls' in format_url:
|
||||
@ -302,35 +271,60 @@ def add_format(f, protocol, is_preview=False):
|
||||
if protocol == 'encrypted-hls' or '/encrypted-hls' in format_url:
|
||||
protocol = 'hls-aes'
|
||||
|
||||
ext = None
|
||||
if preset := traverse_obj(t, ('preset', {str_or_none})):
|
||||
ext = preset.split('_')[0]
|
||||
if ext not in KNOWN_EXTENSIONS:
|
||||
ext = mimetype2ext(traverse_obj(t, ('format', 'mime_type', {str})))
|
||||
|
||||
identifier = join_nonempty(protocol, ext, delim='_')
|
||||
if not self._is_requested(identifier):
|
||||
self.write_debug(f'"{identifier}" is not a requested format, skipping')
|
||||
short_identifier = f'{protocol}_{preset_base}'
|
||||
if preset_base == 'abr':
|
||||
self.write_debug(f'Skipping broken "{short_identifier}" format')
|
||||
continue
|
||||
if not self._is_requested(short_identifier):
|
||||
self.write_debug(f'"{short_identifier}" is not a requested format, skipping')
|
||||
continue
|
||||
|
||||
# XXX: if not extract_flat, 429 error must be caught where _extract_info_dict is called
|
||||
stream_url = traverse_obj(self._call_api(
|
||||
format_url, track_id, f'Downloading {identifier} format info JSON',
|
||||
format_url, track_id, f'Downloading {short_identifier} format info JSON',
|
||||
query=query, headers=self._HEADERS), ('url', {url_or_none}))
|
||||
|
||||
if invalid_url(stream_url):
|
||||
continue
|
||||
format_urls.add(stream_url)
|
||||
add_format({
|
||||
|
||||
mime_type = traverse_obj(t, ('format', 'mime_type', {str}))
|
||||
codec = self._search_regex(r'codecs="([^"]+)"', mime_type, 'codec', default=None)
|
||||
ext = {
|
||||
'mp4a': 'm4a',
|
||||
'opus': 'opus',
|
||||
}.get(codec[:4] if codec else None) or mimetype2ext(mime_type, default=None)
|
||||
if not ext or ext == 'm3u8':
|
||||
ext = preset_base
|
||||
|
||||
is_premium = t.get('quality') == 'hq'
|
||||
abr = int_or_none(
|
||||
self._search_regex(r'(\d+)k$', preset, 'abr', default=None)
|
||||
or self._search_regex(r'\.(\d+)\.(?:opus|mp3)[/?]', stream_url, 'abr', default=None)
|
||||
or (256 if (is_premium and 'aac' in preset) else None))
|
||||
|
||||
is_preview = (t.get('snipped')
|
||||
or '/preview/' in format_url
|
||||
or re.search(r'/(?:preview|playlist)/0/30/', stream_url))
|
||||
|
||||
formats.append({
|
||||
'format_id': join_nonempty(protocol, preset, is_preview and 'preview', delim='_'),
|
||||
'url': stream_url,
|
||||
'ext': ext,
|
||||
}, protocol, t.get('snipped') or '/preview/' in format_url)
|
||||
'acodec': codec,
|
||||
'vcodec': 'none',
|
||||
'abr': abr,
|
||||
'protocol': 'm3u8_native' if protocol in ('hls', 'hls-aes') else 'http',
|
||||
'container': 'm4a_dash' if ext == 'm4a' else None,
|
||||
'quality': 5 if is_premium else 0 if (abr and abr >= 160) else -1,
|
||||
'format_note': 'Premium' if is_premium else None,
|
||||
'preference': -10 if is_preview else None,
|
||||
})
|
||||
|
||||
for f in formats:
|
||||
f['vcodec'] = 'none'
|
||||
|
||||
if not formats and info.get('policy') == 'BLOCK':
|
||||
self.raise_geo_restricted(metadata_available=True)
|
||||
if not formats:
|
||||
if has_drm:
|
||||
self.report_drm(track_id)
|
||||
if info.get('policy') == 'BLOCK':
|
||||
self.raise_geo_restricted(metadata_available=True)
|
||||
|
||||
user = info.get('user') or {}
|
||||
|
||||
|
@ -71,9 +71,11 @@ class SpankBangIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id') or mobj.group('id_2')
|
||||
country = self.get_param('geo_bypass_country') or 'US'
|
||||
self._set_cookie('.spankbang.com', 'country', country.upper())
|
||||
webpage = self._download_webpage(
|
||||
url.replace(f'/{video_id}/embed', f'/{video_id}/video'),
|
||||
video_id, headers={'Cookie': 'country=US'})
|
||||
video_id, impersonate=True)
|
||||
|
||||
if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage):
|
||||
raise ExtractorError(
|
||||
|
@ -2,13 +2,16 @@
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
def _extract_episode(data, episode_id=None):
|
||||
@ -58,15 +61,10 @@ def duration(key):
|
||||
|
||||
|
||||
class SpreakerIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
api\.spreaker\.com/
|
||||
(?:
|
||||
(?:download/)?episode|
|
||||
v2/episodes
|
||||
)/
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_VALID_URL = [
|
||||
r'https?://api\.spreaker\.com/(?:(?:download/)?episode|v2/episodes)/(?P<id>\d+)',
|
||||
r'https?://(?:www\.)?spreaker\.com/episode/[^#?/]*?(?P<id>\d+)/?(?:[?#]|$)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://api.spreaker.com/episode/12534508',
|
||||
'info_dict': {
|
||||
@ -83,7 +81,9 @@ class SpreakerIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'series': 'Success With Music (SWM)',
|
||||
'series': 'Success With Music | SWM',
|
||||
'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/777ce4f96b71b0e1b7c09a5e625210e3.jpg',
|
||||
'creators': ['SWM'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
|
||||
@ -91,52 +91,75 @@ class SpreakerIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'note': 'episode',
|
||||
'url': 'https://www.spreaker.com/episode/grunge-music-origins-the-raw-sound-that-defined-a-generation--60269615',
|
||||
'info_dict': {
|
||||
'id': '60269615',
|
||||
'display_id': 'grunge-music-origins-the-raw-sound-that-',
|
||||
'ext': 'mp3',
|
||||
'title': 'Grunge Music Origins - The Raw Sound that Defined a Generation',
|
||||
'description': str,
|
||||
'timestamp': 1717468905,
|
||||
'upload_date': '20240604',
|
||||
'uploader': 'Katie Brown 2',
|
||||
'uploader_id': '17733249',
|
||||
'duration': 818.83,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'series': '90s Grunge',
|
||||
'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/bb0d4178f7cf57cc8786dedbd9c5d969.jpg',
|
||||
'creators': ['Katie Brown 2'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.spreaker.com/episode/60269615',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
f'https://api.spreaker.com/v2/episodes/{episode_id}',
|
||||
episode_id)['response']['episode']
|
||||
f'https://api.spreaker.com/v2/episodes/{episode_id}', episode_id,
|
||||
query=traverse_obj(parse_qs(url), {'key': ('key', 0)}))['response']['episode']
|
||||
return _extract_episode(data, episode_id)
|
||||
|
||||
|
||||
class SpreakerPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
episode_id = self._search_regex(
|
||||
(r'data-episode_id=["\'](?P<id>\d+)',
|
||||
r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id')
|
||||
return self.url_result(
|
||||
f'https://api.spreaker.com/episode/{episode_id}',
|
||||
ie=SpreakerIE.ie_key(), video_id=episode_id)
|
||||
|
||||
|
||||
class SpreakerShowIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'
|
||||
_VALID_URL = [
|
||||
r'https?://api\.spreaker\.com/show/(?P<id>\d+)',
|
||||
r'https?://(?:www\.)?spreaker\.com/podcast/[\w-]+--(?P<id>[\d]+)',
|
||||
r'https?://(?:www\.)?spreaker\.com/show/(?P<id>\d+)/episodes/feed',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://api.spreaker.com/show/4652058',
|
||||
'info_dict': {
|
||||
'id': '4652058',
|
||||
},
|
||||
'playlist_mincount': 118,
|
||||
}, {
|
||||
'url': 'https://www.spreaker.com/podcast/health-wealth--5918323',
|
||||
'info_dict': {
|
||||
'id': '5918323',
|
||||
},
|
||||
'playlist_mincount': 60,
|
||||
}, {
|
||||
'url': 'https://www.spreaker.com/show/5887186/episodes/feed',
|
||||
'info_dict': {
|
||||
'id': '5887186',
|
||||
},
|
||||
'playlist_mincount': 290,
|
||||
}]
|
||||
|
||||
def _entries(self, show_id):
|
||||
def _entries(self, show_id, key=None):
|
||||
for page_num in itertools.count(1):
|
||||
episodes = self._download_json(
|
||||
f'https://api.spreaker.com/show/{show_id}/episodes',
|
||||
show_id, note=f'Downloading JSON page {page_num}', query={
|
||||
show_id, note=f'Downloading JSON page {page_num}', query=filter_dict({
|
||||
'page': page_num,
|
||||
'max_per_page': 100,
|
||||
})
|
||||
'key': key,
|
||||
}))
|
||||
pager = try_get(episodes, lambda x: x['response']['pager'], dict)
|
||||
if not pager:
|
||||
break
|
||||
@ -152,21 +175,5 @@ def _entries(self, show_id):
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
return self.playlist_result(self._entries(show_id), playlist_id=show_id)
|
||||
|
||||
|
||||
class SpreakerShowPageIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.spreaker.com/show/success-with-music',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
show_id = self._search_regex(
|
||||
r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
|
||||
return self.url_result(
|
||||
f'https://api.spreaker.com/show/{show_id}',
|
||||
ie=SpreakerShowIE.ie_key(), video_id=show_id)
|
||||
key = traverse_obj(parse_qs(url), ('key', 0))
|
||||
return self.playlist_result(self._entries(show_id, key), playlist_id=show_id)
|
||||
|
@ -28,24 +28,21 @@ class StripchatIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id, headers=self.geo_verification_headers())
|
||||
data = self._search_json(
|
||||
r'<script\b[^>]*>\s*window\.__PRELOADED_STATE__\s*=',
|
||||
webpage, 'data', video_id, transform_source=lowercase_escape)
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'<script\b[^>]*>\s*window\.__PRELOADED_STATE__\s*=(?P<value>.*?)<\/script>',
|
||||
webpage, 'data', default='{}', group='value'),
|
||||
video_id, transform_source=lowercase_escape, fatal=False)
|
||||
if not data:
|
||||
raise ExtractorError('Unable to find configuration for stream.')
|
||||
|
||||
if traverse_obj(data, ('viewCam', 'show'), expected_type=dict):
|
||||
raise ExtractorError('Model is in private show', expected=True)
|
||||
elif not traverse_obj(data, ('viewCam', 'model', 'isLive'), expected_type=bool):
|
||||
if traverse_obj(data, ('viewCam', 'show', {dict})):
|
||||
raise ExtractorError('Model is in a private show', expected=True)
|
||||
if not traverse_obj(data, ('viewCam', 'model', 'isLive', {bool})):
|
||||
raise UserNotLive(video_id=video_id)
|
||||
|
||||
model_id = traverse_obj(data, ('viewCam', 'model', 'id'), expected_type=int)
|
||||
model_id = data['viewCam']['model']['id']
|
||||
|
||||
formats = []
|
||||
for host in traverse_obj(data, ('config', 'data', (
|
||||
# HLS hosts are currently found in .configV3.static.features.hlsFallback.fallbackDomains[]
|
||||
# The rest of the path is for backwards compatibility and to guard against A/B testing
|
||||
for host in traverse_obj(data, ((('config', 'data'), ('configV3', 'static')), (
|
||||
(('features', 'featuresV2'), 'hlsFallback', 'fallbackDomains', ...), 'hlsStreamHost'))):
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'https://edge-hls.{host}/hls/{model_id}/master/{model_id}_auto.m3u8',
|
||||
@ -53,7 +50,7 @@ def _real_extract(self, url):
|
||||
if formats:
|
||||
break
|
||||
if not formats:
|
||||
self.raise_no_formats('No active streams found', expected=True)
|
||||
self.raise_no_formats('Unable to extract stream host', video_id=video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
@ -413,15 +413,6 @@ def extract_addr(addr, add_meta={}):
|
||||
for f in formats:
|
||||
self._set_cookie(urllib.parse.urlparse(f['url']).hostname, 'sid_tt', auth_cookie.value)
|
||||
|
||||
thumbnails = []
|
||||
for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak',
|
||||
'origin_cover', 'dynamic_cover'):
|
||||
for cover_url in traverse_obj(video_info, (cover_id, 'url_list', ...)):
|
||||
thumbnails.append({
|
||||
'id': cover_id,
|
||||
'url': cover_url,
|
||||
})
|
||||
|
||||
stats_info = aweme_detail.get('statistics') or {}
|
||||
music_info = aweme_detail.get('music') or {}
|
||||
labels = traverse_obj(aweme_detail, ('hybrid_label', ..., 'text'), expected_type=str)
|
||||
@ -467,7 +458,17 @@ def extract_addr(addr, add_meta={}):
|
||||
'formats': formats,
|
||||
'subtitles': self.extract_subtitles(
|
||||
aweme_detail, aweme_id, traverse_obj(author_info, 'uploader', 'uploader_id', 'channel_id')),
|
||||
'thumbnails': thumbnails,
|
||||
'thumbnails': [
|
||||
{
|
||||
'id': cover_id,
|
||||
'url': cover_url,
|
||||
'preference': -1 if cover_id in ('cover', 'origin_cover') else -2,
|
||||
}
|
||||
for cover_id in (
|
||||
'cover', 'ai_dynamic_cover', 'animated_cover',
|
||||
'ai_dynamic_cover_bak', 'origin_cover', 'dynamic_cover')
|
||||
for cover_url in traverse_obj(video_info, (cover_id, 'url_list', ...))
|
||||
],
|
||||
'duration': (traverse_obj(video_info, (
|
||||
(None, 'download_addr'), 'duration', {int_or_none(scale=1000)}, any))
|
||||
or traverse_obj(music_info, ('duration', {int_or_none}))),
|
||||
@ -600,11 +601,15 @@ def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id, extract_fl
|
||||
'repost_count': 'shareCount',
|
||||
'comment_count': 'commentCount',
|
||||
}), expected_type=int_or_none),
|
||||
'thumbnails': traverse_obj(aweme_detail, (
|
||||
(None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), {
|
||||
'url': ({url_or_none}, {self._proto_relative_url}),
|
||||
},
|
||||
)),
|
||||
'thumbnails': [
|
||||
{
|
||||
'id': cover_id,
|
||||
'url': self._proto_relative_url(cover_url),
|
||||
'preference': -2 if cover_id == 'dynamicCover' else -1,
|
||||
}
|
||||
for cover_id in ('thumbnail', 'cover', 'dynamicCover', 'originCover')
|
||||
for cover_url in traverse_obj(aweme_detail, ((None, 'video'), cover_id, {url_or_none}))
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
|
@ -189,26 +189,6 @@ class TumblrIE(InfoExtractor):
|
||||
'release_date': '20140227',
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}, {
|
||||
'url': 'http://sutiblr.tumblr.com/post/139638707273',
|
||||
'md5': '2dd184b3669e049ba40563a7d423f95c',
|
||||
'info_dict': {
|
||||
'id': 'ir7qBEIKqvq',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vine by sutiblr',
|
||||
'alt_title': 'Vine by sutiblr',
|
||||
'uploader': 'sutiblr',
|
||||
'uploader_id': '1198993975374495744',
|
||||
'upload_date': '20160220',
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1455940159,
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Vine'],
|
||||
'skip': 'Vine is unavailable',
|
||||
}, {
|
||||
'url': 'https://silami.tumblr.com/post/84250043974/my-bad-river-flows-in-you-impression-on-maschine',
|
||||
'md5': '3c92d7c3d867f14ccbeefa2119022277',
|
||||
@ -366,7 +346,6 @@ class TumblrIE(InfoExtractor):
|
||||
_providers = {
|
||||
'instagram': 'Instagram',
|
||||
'vimeo': 'Vimeo',
|
||||
'vine': 'Vine',
|
||||
'youtube': 'Youtube',
|
||||
'dailymotion': 'Dailymotion',
|
||||
'tiktok': 'TikTok',
|
||||
|
@ -409,26 +409,6 @@ class TwitterCardIE(InfoExtractor):
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
},
|
||||
{
|
||||
'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
|
||||
'info_dict': {
|
||||
'id': 'iBb2x00UVlv',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20151113',
|
||||
'uploader_id': '1189339351084113920',
|
||||
'uploader': 'ArsenalTerje',
|
||||
'title': 'Vine by ArsenalTerje',
|
||||
'timestamp': 1447451307,
|
||||
'alt_title': 'Vine by ArsenalTerje',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://[^?#]+\.jpg',
|
||||
'view_count': int,
|
||||
'repost_count': int,
|
||||
},
|
||||
'add_ie': ['Vine'],
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
},
|
||||
{
|
||||
'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
|
||||
'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
|
||||
@ -567,25 +547,6 @@ class TwitterIE(TwitterBaseIE):
|
||||
'age_limit': 0,
|
||||
'_old_archive_ids': ['twitter 700207533655363584'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
|
||||
'md5': '89a15ed345d13b86e9a5a5e051fa308a',
|
||||
'info_dict': {
|
||||
'id': 'MIOxnrUteUd',
|
||||
'ext': 'mp4',
|
||||
'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
|
||||
'uploader': 'TAKUMA',
|
||||
'uploader_id': '1004126642786242560',
|
||||
'timestamp': 1402826626,
|
||||
'upload_date': '20140615',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'alt_title': 'Vine by TAKUMA',
|
||||
'comment_count': int,
|
||||
'repost_count': int,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Vine'],
|
||||
}, {
|
||||
'url': 'https://twitter.com/captainamerica/status/719944021058060289',
|
||||
'info_dict': {
|
||||
|
113
yt_dlp/extractor/uliza.py
Normal file
113
yt_dlp/extractor/uliza.py
Normal file
@ -0,0 +1,113 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
make_archive_id,
|
||||
parse_qs,
|
||||
time_seconds,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class UlizaPlayerIE(InfoExtractor):
|
||||
_VALID_URL = r'https://player-api\.p\.uliza\.jp/v1/players/[^?#]+\?(?:[^#]*&)?name=(?P<id>[^#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://player-api.p.uliza.jp/v1/players/timeshift-disabled/pia/admin?type=normal&playerobjectname=ulizaPlayer&name=livestream01_dvr&repeatable=true',
|
||||
'info_dict': {
|
||||
'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
|
||||
'ext': 'mp4',
|
||||
'title': '88f3109a-f503-4d0f-a9f7-9f39ac745d84',
|
||||
'live_status': 'was_live',
|
||||
'_old_archive_ids': ['piaulizaportal 88f3109a-f503-4d0f-a9f7-9f39ac745d84'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://player-api.p.uliza.jp/v1/players/uliza_jp_gallery_normal/promotion/admin?type=presentation&name=cookings&targetid=player1',
|
||||
'info_dict': {
|
||||
'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
|
||||
'ext': 'mp4',
|
||||
'title': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
|
||||
'live_status': 'not_live',
|
||||
'_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://player-api.p.uliza.jp/v1/players/default-player/pia/admin?type=normal&name=pia_movie_uliza_fix&targetid=ulizahtml5&repeatable=true',
|
||||
'info_dict': {
|
||||
'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
|
||||
'ext': 'mp4',
|
||||
'title': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
|
||||
'live_status': 'not_live',
|
||||
'_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
player_data = self._download_webpage(
|
||||
url, display_id, headers={'Referer': 'https://player-api.p.uliza.jp/'},
|
||||
note='Fetching player data', errnote='Unable to fetch player data')
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data, 'm3u8 url')
|
||||
video_id = parse_qs(m3u8_url).get('ss', [display_id])[0]
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id)
|
||||
m3u8_type = self._search_regex(
|
||||
r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'formats': formats,
|
||||
'live_status': {
|
||||
'video': 'is_live',
|
||||
'dvr': 'was_live', # short-term archives
|
||||
}.get(m3u8_type, 'not_live'), # VOD or long-term archives
|
||||
'_old_archive_ids': [make_archive_id('PIAULIZAPortal', video_id)],
|
||||
}
|
||||
|
||||
|
||||
class UlizaPortalIE(InfoExtractor):
|
||||
IE_DESC = 'ulizaportal.jp'
|
||||
_VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44',
|
||||
'info_dict': {
|
||||
'id': 'ae350126-5e22-4a7f-a8ac-8d0fd448b800',
|
||||
'display_id': '005f18b7-e810-5618-cb82-0987c5755d44',
|
||||
'title': 'プレゼンテーションプレイヤーのサンプル',
|
||||
'live_status': 'not_live',
|
||||
'_old_archive_ids': ['piaulizaportal ae350126-5e22-4a7f-a8ac-8d0fd448b800'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1',
|
||||
'info_dict': {
|
||||
'id': '0644ecc8-e354-41b4-b957-3b08a2d63df1',
|
||||
'display_id': '005e1b23-fe93-5780-19a0-98e917cc4b7d',
|
||||
'title': '【確認用】視聴サンプルページ(ULIZA)',
|
||||
'live_status': 'not_live',
|
||||
'_old_archive_ids': ['piaulizaportal 0644ecc8-e354-41b4-b957-3b08a2d63df1'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'ignore_no_formats_error': True,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0)))
|
||||
if expires and expires <= time_seconds():
|
||||
raise ExtractorError('The link is expired', video_id=video_id, expected=True)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player_data_url = self._search_regex(
|
||||
r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"',
|
||||
webpage, 'player data url')
|
||||
return self.url_result(
|
||||
player_data_url, UlizaPlayerIE, url_transparent=True,
|
||||
display_id=video_id, video_title=self._html_extract_title(webpage))
|
@ -1,189 +0,0 @@
|
||||
import functools
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
qualities,
|
||||
remove_start,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
class VeohIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|videos|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
|
||||
'md5': '620e68e6a3cff80086df3348426c9ca3',
|
||||
'info_dict': {
|
||||
'id': 'v56314296nk7Zdmz3',
|
||||
'ext': 'mp4',
|
||||
'title': 'Straight Backs Are Stronger',
|
||||
'description': 'md5:203f976279939a6dc664d4001e13f5f4',
|
||||
'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th56314296\\.jpg(\\?.*)?',
|
||||
'uploader': 'LUMOback',
|
||||
'duration': 46,
|
||||
'view_count': int,
|
||||
'average_rating': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 0,
|
||||
'categories': ['technology_and_gaming'],
|
||||
'tags': ['posture', 'posture', 'sensor', 'back', 'pain', 'wearable', 'tech', 'lumo'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
|
||||
'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
|
||||
'info_dict': {
|
||||
'id': '27701988',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chile workers cover up to avoid skin damage',
|
||||
'description': 'md5:2bd151625a60a32822873efc246ba20d',
|
||||
'uploader': 'afp-news',
|
||||
'duration': 123,
|
||||
},
|
||||
'skip': 'This video has been deleted.',
|
||||
}, {
|
||||
'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
|
||||
'md5': '4fde7b9e33577bab2f2f8f260e30e979',
|
||||
'note': 'Embedded ooyala video',
|
||||
'info_dict': {
|
||||
'id': '69525809',
|
||||
'ext': 'mp4',
|
||||
'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
|
||||
'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
|
||||
'uploader': 'newsy-videos',
|
||||
},
|
||||
'skip': 'This video has been deleted.',
|
||||
}, {
|
||||
'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.veoh.com/videos/v16374379WA437rMH',
|
||||
'md5': 'cceb73f3909063d64f4b93d4defca1b3',
|
||||
'info_dict': {
|
||||
'id': 'v16374379WA437rMH',
|
||||
'ext': 'mp4',
|
||||
'title': 'Phantasmagoria 2, pt. 1-3',
|
||||
'description': 'Phantasmagoria: a Puzzle of Flesh',
|
||||
'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th16374379\\.jpg(\\?.*)?',
|
||||
'uploader': 'davidspackage',
|
||||
'duration': 968,
|
||||
'view_count': int,
|
||||
'average_rating': int,
|
||||
'comment_count': int,
|
||||
'age_limit': 18,
|
||||
'categories': ['technology_and_gaming', 'gaming'],
|
||||
'tags': ['puzzle', 'of', 'flesh'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
metadata = self._download_json(
|
||||
'https://www.veoh.com/watch/getVideo/' + video_id,
|
||||
video_id)
|
||||
video = metadata['video']
|
||||
title = video['title']
|
||||
|
||||
thumbnail_url = None
|
||||
q = qualities(['Regular', 'HQ'])
|
||||
formats = []
|
||||
for f_id, f_url in video.get('src', {}).items():
|
||||
if not f_url:
|
||||
continue
|
||||
if f_id == 'poster':
|
||||
thumbnail_url = f_url
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': f_id,
|
||||
'quality': q(f_id),
|
||||
'url': f_url,
|
||||
})
|
||||
|
||||
categories = metadata.get('categoryPath')
|
||||
if not categories:
|
||||
category = remove_start(strip_or_none(video.get('category')), 'category_')
|
||||
categories = [category] if category else None
|
||||
tags = video.get('tags')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': video.get('description'),
|
||||
'thumbnail': thumbnail_url,
|
||||
'uploader': video.get('author', {}).get('nickname'),
|
||||
'duration': int_or_none(video.get('lengthBySec')) or parse_duration(video.get('length')),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'formats': formats,
|
||||
'average_rating': int_or_none(video.get('rating')),
|
||||
'comment_count': int_or_none(video.get('numOfComments')),
|
||||
'age_limit': 18 if video.get('contentRatingId') == 2 else 0,
|
||||
'categories': categories,
|
||||
'tags': tags.split(', ') if tags else None,
|
||||
}
|
||||
|
||||
|
||||
class VeohUserIE(VeohIE): # XXX: Do not subclass from concrete IE
|
||||
_VALID_URL = r'https?://(?:www\.)?veoh\.com/users/(?P<id>[\w-]+)'
|
||||
IE_NAME = 'veoh:user'
|
||||
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://www.veoh.com/users/valentinazoe',
|
||||
'info_dict': {
|
||||
'id': 'valentinazoe',
|
||||
'title': 'valentinazoe (Uploads)',
|
||||
},
|
||||
'playlist_mincount': 75,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.veoh.com/users/PiensaLibre',
|
||||
'info_dict': {
|
||||
'id': 'PiensaLibre',
|
||||
'title': 'PiensaLibre (Uploads)',
|
||||
},
|
||||
'playlist_mincount': 2,
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 16
|
||||
|
||||
def _fetch_page(self, uploader, page):
|
||||
response = self._download_json(
|
||||
'https://www.veoh.com/users/published/videos', uploader,
|
||||
note=f'Downloading videos page {page + 1}',
|
||||
headers={
|
||||
'x-csrf-token': self._TOKEN,
|
||||
'content-type': 'application/json;charset=UTF-8',
|
||||
},
|
||||
data=json.dumps({
|
||||
'username': uploader,
|
||||
'maxResults': self._PAGE_SIZE,
|
||||
'page': page + 1,
|
||||
'requestName': 'userPage',
|
||||
}).encode())
|
||||
if not response.get('success'):
|
||||
raise ExtractorError(response['message'])
|
||||
|
||||
for video in response['videos']:
|
||||
yield self.url_result(f'https://www.veoh.com/watch/{video["permalinkId"]}', VeohIE,
|
||||
video['permalinkId'], video.get('title'))
|
||||
|
||||
def _real_initialize(self):
|
||||
webpage = self._download_webpage(
|
||||
'https://www.veoh.com', None, note='Downloading authorization token')
|
||||
self._TOKEN = self._search_regex(
|
||||
r'csrfToken:\s*(["\'])(?P<token>[0-9a-zA-Z]{40})\1', webpage,
|
||||
'request token', group='token')
|
||||
|
||||
def _real_extract(self, url):
|
||||
uploader = self._match_id(url)
|
||||
return self.playlist_result(OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, uploader),
|
||||
self._PAGE_SIZE), uploader, f'{uploader} (Uploads)')
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user