1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-08-15 00:48:28 +00:00

Merge remote-tracking branch 'upstream/master' into feat/youtube/sabr

This commit is contained in:
coletdjnz 2025-08-03 10:07:59 +12:00
commit 04493bb8b7
No known key found for this signature in database
GPG Key ID: 91984263BB39894A
148 changed files with 7452 additions and 7626 deletions

View File

@ -242,7 +242,7 @@ jobs:
permissions: permissions:
contents: read contents: read
actions: write # For cleaning up cache actions: write # For cleaning up cache
runs-on: macos-13 runs-on: macos-14
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@ -261,6 +261,8 @@ jobs:
- name: Install Requirements - name: Install Requirements
run: | run: |
brew install coreutils brew install coreutils
# We need to use system Python in order to roll our own universal2 curl_cffi wheel
brew uninstall --ignore-dependencies python3
python3 -m venv ~/yt-dlp-build-venv python3 -m venv ~/yt-dlp-build-venv
source ~/yt-dlp-build-venv/bin/activate source ~/yt-dlp-build-venv/bin/activate
python3 devscripts/install_deps.py -o --include build python3 devscripts/install_deps.py -o --include build

View File

@ -37,7 +37,7 @@ jobs:
matrix: matrix:
os: [ubuntu-latest] os: [ubuntu-latest]
# CPython 3.9 is in quick-test # CPython 3.9 is in quick-test
python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.11]
include: include:
# atleast one of each CPython/PyPy tests must be in windows # atleast one of each CPython/PyPy tests must be in windows
- os: windows-latest - os: windows-latest
@ -49,7 +49,7 @@ jobs:
- os: windows-latest - os: windows-latest
python-version: '3.13' python-version: '3.13'
- os: windows-latest - os: windows-latest
python-version: pypy-3.10 python-version: pypy-3.11
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}

View File

@ -28,13 +28,13 @@ jobs:
fail-fast: true fail-fast: true
matrix: matrix:
os: [ubuntu-latest] os: [ubuntu-latest]
python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.11]
include: include:
# atleast one of each CPython/PyPy tests must be in windows # atleast one of each CPython/PyPy tests must be in windows
- os: windows-latest - os: windows-latest
python-version: '3.9' python-version: '3.9'
- os: windows-latest - os: windows-latest
python-version: pypy-3.10 python-version: pypy-3.11
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}

View File

@ -25,7 +25,7 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
os: [ubuntu-latest, windows-latest] os: [ubuntu-latest, windows-latest]
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.10, pypy-3.11] python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.11]
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}

View File

@ -126,7 +126,7 @@ ### Are you willing to share account details if needed?
While these steps won't necessarily ensure that no misuse of the account takes place, these are still some good practices to follow. While these steps won't necessarily ensure that no misuse of the account takes place, these are still some good practices to follow.
- Look for people with `Member` (maintainers of the project) or `Contributor` (people who have previously contributed code) tag on their messages. - Look for people with `Member` (maintainers of the project) or `Contributor` (people who have previously contributed code) tag on their messages.
- Change the password before sharing the account to something random (use [this](https://passwordsgenerator.net/) if you don't have a random password generator). - Change the password before sharing the account to something random.
- Change the password after receiving the account back. - Change the password after receiving the account back.
### Is the website primarily used for piracy? ### Is the website primarily used for piracy?
@ -272,7 +272,7 @@ ## Adding support for a new site
You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`). You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`).
1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.10. Backward compatibility is not required for even older versions of Python. 1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.11. Backward compatibility is not required for even older versions of Python.
1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
```shell ```shell

View File

@ -784,3 +784,12 @@ eason1478
ceandreasen ceandreasen
chauhantirth chauhantirth
helpimnotdrowning helpimnotdrowning
adamralph
averageFOSSenjoyer
bubo
flanter21
Georift
moonshinerd
R0hanW
ShockedPlot7560
swayll

View File

@ -4,6 +4,97 @@ # Changelog
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
--> -->
### 2025.07.21
#### Important changes
- **Default behaviour changed from `--mtime` to `--no-mtime`**
yt-dlp no longer applies the server modified time to downloaded files by default. [Read more](https://github.com/yt-dlp/yt-dlp/issues/12780)
- Security: [[CVE-2025-54072](https://nvd.nist.gov/vuln/detail/CVE-2025-54072)] [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-45hg-7f49-5h56)
- When `--exec` is used on Windows, the filepath expanded from `{}` (or the default placeholder) is now properly escaped
#### Core changes
- [Allow extractors to designate formats/subtitles for impersonation](https://github.com/yt-dlp/yt-dlp/commit/32809eb2da92c649e540a5b714f6235036026161) ([#13778](https://github.com/yt-dlp/yt-dlp/issues/13778)) by [bashonly](https://github.com/bashonly) (With fixes in [3e49bc8](https://github.com/yt-dlp/yt-dlp/commit/3e49bc8a1bdb4109b857f2c361c358e86fa63405), [2ac3eb9](https://github.com/yt-dlp/yt-dlp/commit/2ac3eb98373d1c31341c5e918c83872c7ff409c6))
- [Don't let format testing alter the return code](https://github.com/yt-dlp/yt-dlp/commit/4919051e447c7f8ae9df8ba5c4208b6b5c04915a) ([#13767](https://github.com/yt-dlp/yt-dlp/issues/13767)) by [bashonly](https://github.com/bashonly)
- [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/commit/959ac99e98c3215437e573c22d64be42d361e863) by [Grub4K](https://github.com/Grub4K)
- [No longer enable `--mtime` by default](https://github.com/yt-dlp/yt-dlp/commit/f3008bc5f89d2691f2f8dfc51b406ef4e25281c3) ([#12781](https://github.com/yt-dlp/yt-dlp/issues/12781)) by [seproDev](https://github.com/seproDev)
- [Warn when skipping formats](https://github.com/yt-dlp/yt-dlp/commit/1f27a9f8baccb9105f2476154557540efe09a937) ([#13090](https://github.com/yt-dlp/yt-dlp/issues/13090)) by [bashonly](https://github.com/bashonly)
- **jsinterp**
- [Cache undefined variable names](https://github.com/yt-dlp/yt-dlp/commit/b342d27f3f82d913976509ddf5bff539ad8567ec) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly) (With fixes in [805519b](https://github.com/yt-dlp/yt-dlp/commit/805519bfaa7cb5443912dfe45ac774834ba65a16))
- [Fix variable scoping](https://github.com/yt-dlp/yt-dlp/commit/b6328ca05030d815222b25d208cc59a964623bf9) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
- **utils**
- `mimetype2ext`: [Always parse `flac` from `audio/flac`](https://github.com/yt-dlp/yt-dlp/commit/b8abd255e454acbe0023cdb946f9eb461ced7eeb) ([#13748](https://github.com/yt-dlp/yt-dlp/issues/13748)) by [bashonly](https://github.com/bashonly)
- `unified_timestamp`: [Return `int` values](https://github.com/yt-dlp/yt-dlp/commit/6be26626f7cfa71d28e0fac2861eb04758810c5d) ([#13796](https://github.com/yt-dlp/yt-dlp/issues/13796)) by [doe1080](https://github.com/doe1080)
- `urlhandle_detect_ext`: [Use `x-amz-meta-file-type` headers](https://github.com/yt-dlp/yt-dlp/commit/28bf46b7dafe2e241137763bf570a2f91ba8a53a) ([#13749](https://github.com/yt-dlp/yt-dlp/issues/13749)) by [bashonly](https://github.com/bashonly)
#### Extractor changes
- [Add `_search_nextjs_v13_data` helper](https://github.com/yt-dlp/yt-dlp/commit/5245231e4a39ecd5595d4337d46d85e150e2430a) ([#13398](https://github.com/yt-dlp/yt-dlp/issues/13398)) by [bashonly](https://github.com/bashonly) (With fixes in [b5fea53](https://github.com/yt-dlp/yt-dlp/commit/b5fea53f2099bed41ba1b17ab0ac87c8dba5a5ec))
- [Detect invalid m3u8 playlist data](https://github.com/yt-dlp/yt-dlp/commit/e99c0b838a9c5feb40c0dcd291bd7b8620b8d36d) ([#13601](https://github.com/yt-dlp/yt-dlp/issues/13601)) by [Grub4K](https://github.com/Grub4K)
- **10play**: [Support new site domain](https://github.com/yt-dlp/yt-dlp/commit/790c286ce3e0b534ca2d8f6648ced220d888f139) ([#13611](https://github.com/yt-dlp/yt-dlp/issues/13611)) by [Georift](https://github.com/Georift)
- **9gag**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/0b359b184dee0c7052be482857bf562de67e4928) ([#13678](https://github.com/yt-dlp/yt-dlp/issues/13678)) by [bashonly](https://github.com/bashonly)
- **aenetworks**: [Support new URL formats](https://github.com/yt-dlp/yt-dlp/commit/5f951ce929b56a822514f1a02cc06af030855ec7) ([#13747](https://github.com/yt-dlp/yt-dlp/issues/13747)) by [bashonly](https://github.com/bashonly)
- **archive.org**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d42a6ff0c4ca8893d722ff4e0c109aecbf4cc7cf) ([#13706](https://github.com/yt-dlp/yt-dlp/issues/13706)) by [rdamas](https://github.com/rdamas)
- **bandaichannel**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/23e9389f936ec5236a87815b8576e5ce567b2f77) ([#13152](https://github.com/yt-dlp/yt-dlp/issues/13152)) by [doe1080](https://github.com/doe1080)
- **bandcamp**: [Extract tags](https://github.com/yt-dlp/yt-dlp/commit/f9dff95cb1c138913011417b3bba020c0a691bba) ([#13480](https://github.com/yt-dlp/yt-dlp/issues/13480)) by [WouterGordts](https://github.com/WouterGordts)
- **bellmedia**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/6fb3947c0dc6d0e3eab5077c5bada8402f47a277) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080)
- **bilibili**: [Pass newer user-agent with API requests](https://github.com/yt-dlp/yt-dlp/commit/d3edc5d52a7159eda2331dbc7e14bf40a6585c81) ([#13736](https://github.com/yt-dlp/yt-dlp/issues/13736)) by [c-basalt](https://github.com/c-basalt)
- **bilibilibangumi**
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b15aa8d77257b86fa44c9a42a615dfe47ac5b3b7) ([#13800](https://github.com/yt-dlp/yt-dlp/issues/13800)) by [bashonly](https://github.com/bashonly)
- [Fix geo-block detection](https://github.com/yt-dlp/yt-dlp/commit/884f35d54a64f1e6e7be49459842f573fc3a2701) ([#13667](https://github.com/yt-dlp/yt-dlp/issues/13667)) by [bashonly](https://github.com/bashonly)
- **blackboardcollaborate**: [Support subtitles and authwalled videos](https://github.com/yt-dlp/yt-dlp/commit/dcc4cba39e2a79d3efce16afa28dbe245468489f) ([#12473](https://github.com/yt-dlp/yt-dlp/issues/12473)) by [flanter21](https://github.com/flanter21)
- **btvplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3ae61e0f313dd03a09060abc7a212775c3717818) ([#13541](https://github.com/yt-dlp/yt-dlp/issues/13541)) by [bubo](https://github.com/bubo)
- **ctv**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/9f54ea38984788811773ca2ceaca73864acf0e8a) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080)
- **dangalplay**: [Support other login regions](https://github.com/yt-dlp/yt-dlp/commit/09982bc33e2f1f9a1ff66e6738df44f15b36f6a6) ([#13768](https://github.com/yt-dlp/yt-dlp/issues/13768)) by [bashonly](https://github.com/bashonly)
- **francetv**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/ade876efb31d55d3394185ffc56942fdc8d325cc) ([#13726](https://github.com/yt-dlp/yt-dlp/issues/13726)) by [bashonly](https://github.com/bashonly)
- **hotstar**
- [Fix support for free accounts](https://github.com/yt-dlp/yt-dlp/commit/07d1d85f6387e4bdb107096f0131c7054f078bb9) ([#13700](https://github.com/yt-dlp/yt-dlp/issues/13700)) by [chauhantirth](https://github.com/chauhantirth)
- [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/7e0af2b1f0c3edb688603b022f3a9ca0bfdf75e9) ([#13727](https://github.com/yt-dlp/yt-dlp/issues/13727)) by [bashonly](https://github.com/bashonly) (With fixes in [ef103b2](https://github.com/yt-dlp/yt-dlp/commit/ef103b2d115bd0e880f9cfd2f7dd705f48e4b40d))
- **joqrag**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/6d39c420f7774562a106d90253e2ed5b75036321) ([#13152](https://github.com/yt-dlp/yt-dlp/issues/13152)) by [doe1080](https://github.com/doe1080)
- **limelight**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/5d693446e882931618c40c99bb593f0b87b30eb9) ([#13267](https://github.com/yt-dlp/yt-dlp/issues/13267)) by [doe1080](https://github.com/doe1080)
- **lrtradio**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b4b4486effdcb96bb6b8148171a49ff579b69a4a) ([#13717](https://github.com/yt-dlp/yt-dlp/issues/13717)) by [Pawka](https://github.com/Pawka)
- **mir24.tv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/7b4c96e0898db048259ef5fdf12ed14e3605dce3) ([#13651](https://github.com/yt-dlp/yt-dlp/issues/13651)) by [swayll](https://github.com/swayll)
- **mixlr**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/0f33950c778331bf4803c76e8b0ba1862df93431) ([#13561](https://github.com/yt-dlp/yt-dlp/issues/13561)) by [seproDev](https://github.com/seproDev), [ShockedPlot7560](https://github.com/ShockedPlot7560)
- **mlbtv**: [Make formats downloadable with ffmpeg](https://github.com/yt-dlp/yt-dlp/commit/87e3dc8c7f78929d2ef4f4a44e6a567e04cd8226) ([#13761](https://github.com/yt-dlp/yt-dlp/issues/13761)) by [bashonly](https://github.com/bashonly)
- **newspicks**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2aaf1aa71d174700859c9ec1a81109b78e34961c) ([#13612](https://github.com/yt-dlp/yt-dlp/issues/13612)) by [doe1080](https://github.com/doe1080)
- **nhkradiru**: [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/7c49a937887756efcfa162abdcf17e48c244cb0c) ([#12708](https://github.com/yt-dlp/yt-dlp/issues/12708)) by [garret1317](https://github.com/garret1317)
- **noovo**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/d57a0b5aa78d59324b037d37492fe86aa4fbf58a) ([#13429](https://github.com/yt-dlp/yt-dlp/issues/13429)) by [doe1080](https://github.com/doe1080)
- **patreon**: campaign: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d88b304d44c599d81acfa4231502270c8b9fe2f8) ([#13712](https://github.com/yt-dlp/yt-dlp/issues/13712)) by [bashonly](https://github.com/bashonly)
- **playerfm**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1a8474c3ca6dbe51bb153b2b8eef7b9a61fa7dc3) ([#13016](https://github.com/yt-dlp/yt-dlp/issues/13016)) by [R0hanW](https://github.com/R0hanW)
- **rai**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c8329fc572903eeed7edad1642773b2268b71a62) ([#13572](https://github.com/yt-dlp/yt-dlp/issues/13572)) by [moonshinerd](https://github.com/moonshinerd), [seproDev](https://github.com/seproDev)
- **raisudtirol**: [Support alternative domain](https://github.com/yt-dlp/yt-dlp/commit/85c3fa1925a9057ef4ae8af682686d5b3eb8e568) ([#13718](https://github.com/yt-dlp/yt-dlp/issues/13718)) by [barsnick](https://github.com/barsnick)
- **skeb**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/060c6a4501a0b8a92f1b9c12788f556d902c83c6) ([#13593](https://github.com/yt-dlp/yt-dlp/issues/13593)) by [doe1080](https://github.com/doe1080)
- **soundcloud**: [Always extract original format extension](https://github.com/yt-dlp/yt-dlp/commit/c1ac543c8166ff031d62e340b3244ca8556e3fb9) ([#13746](https://github.com/yt-dlp/yt-dlp/issues/13746)) by [bashonly](https://github.com/bashonly)
- **sproutvideo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0b41746964e1d0470ac286ce09408940a3a51147) ([#13610](https://github.com/yt-dlp/yt-dlp/issues/13610)) by [bashonly](https://github.com/bashonly)
- **thehighwire**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3a84be9d1660ef798ea28f929a20391bef6afda4) ([#13505](https://github.com/yt-dlp/yt-dlp/issues/13505)) by [swayll](https://github.com/swayll)
- **twitch**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/422cc8cb2ff2bd3b4c2bc64e23507b7e6f522c35) ([#13618](https://github.com/yt-dlp/yt-dlp/issues/13618)) by [bashonly](https://github.com/bashonly)
- **unitednationswebtv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/630f3389c33f0f7f6ec97e8917d20aeb4e4078da) ([#13538](https://github.com/yt-dlp/yt-dlp/issues/13538)) by [averageFOSSenjoyer](https://github.com/averageFOSSenjoyer)
- **vimeo**
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a5d697f62d8be78ffd472acb2f52c8bc32833003) ([#13692](https://github.com/yt-dlp/yt-dlp/issues/13692)) by [bashonly](https://github.com/bashonly)
- [Handle age-restricted videos](https://github.com/yt-dlp/yt-dlp/commit/a6db1d297ab40cc346de24aacbeab93112b2f4e1) ([#13719](https://github.com/yt-dlp/yt-dlp/issues/13719)) by [bashonly](https://github.com/bashonly)
- **youtube**
- [Do not require PO Token for premium accounts](https://github.com/yt-dlp/yt-dlp/commit/5b57b72c1a7c6bd249ffcebdf5630761ec664c10) ([#13640](https://github.com/yt-dlp/yt-dlp/issues/13640)) by [coletdjnz](https://github.com/coletdjnz)
- [Ensure context params are consistent for web clients](https://github.com/yt-dlp/yt-dlp/commit/6e5bee418bc108565108153fd745c8e7a59f16dd) ([#13701](https://github.com/yt-dlp/yt-dlp/issues/13701)) by [coletdjnz](https://github.com/coletdjnz)
- [Extract global nsig helper functions](https://github.com/yt-dlp/yt-dlp/commit/fca94ac5d63ed6578b5cd9c8129d97a8a713c39a) ([#13639](https://github.com/yt-dlp/yt-dlp/issues/13639)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
- [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/0e68332bcb9fba87c42805b7a051eeb2bed36206) ([#13659](https://github.com/yt-dlp/yt-dlp/issues/13659)) by [bashonly](https://github.com/bashonly)
- [Log bad playability statuses of player responses](https://github.com/yt-dlp/yt-dlp/commit/aa9f1f4d577e99897ac16cd19d4e217d688ea75d) ([#13647](https://github.com/yt-dlp/yt-dlp/issues/13647)) by [coletdjnz](https://github.com/coletdjnz)
- [Use impersonation for downloading subtitles](https://github.com/yt-dlp/yt-dlp/commit/8820101aa3152e5f4811541c645f8b5de231ba8c) ([#13786](https://github.com/yt-dlp/yt-dlp/issues/13786)) by [bashonly](https://github.com/bashonly)
- tab: [Fix subscriptions feed extraction](https://github.com/yt-dlp/yt-dlp/commit/c23d837b6524d1e7a4595948871ba1708cba4dfa) ([#13665](https://github.com/yt-dlp/yt-dlp/issues/13665)) by [bashonly](https://github.com/bashonly)
#### Downloader changes
- **hls**: [Do not fall back to ffmpeg when native is required](https://github.com/yt-dlp/yt-dlp/commit/a7113722ec33f30fc898caee9242af2b82188a53) ([#13655](https://github.com/yt-dlp/yt-dlp/issues/13655)) by [bashonly](https://github.com/bashonly)
#### Networking changes
- **Request Handler**
- requests
- [Refactor default headers](https://github.com/yt-dlp/yt-dlp/commit/a4561c7a66c39d88efe7ae51e7fa1986faf093fb) ([#13785](https://github.com/yt-dlp/yt-dlp/issues/13785)) by [bashonly](https://github.com/bashonly)
- [Work around partial read dropping data](https://github.com/yt-dlp/yt-dlp/commit/c2ff2dbaec7929015373fe002e9bd4849931a4ce) ([#13599](https://github.com/yt-dlp/yt-dlp/issues/13599)) by [Grub4K](https://github.com/Grub4K) (With fixes in [c316416](https://github.com/yt-dlp/yt-dlp/commit/c316416b972d1b05e58fbcc21e80428b900ce102))
#### Misc. changes
- **cleanup**
- [Bump ruff to 0.12.x](https://github.com/yt-dlp/yt-dlp/commit/ca5cce5b07d51efe7310b449cdefeca8d873e9df) ([#13596](https://github.com/yt-dlp/yt-dlp/issues/13596)) by [seproDev](https://github.com/seproDev)
- Miscellaneous: [9951fdd](https://github.com/yt-dlp/yt-dlp/commit/9951fdd0d08b655cb1af8cd7f32a3fb7e2b1324e) by [adamralph](https://github.com/adamralph), [bashonly](https://github.com/bashonly), [doe1080](https://github.com/doe1080), [hseg](https://github.com/hseg), [InvalidUsernameException](https://github.com/InvalidUsernameException), [seproDev](https://github.com/seproDev)
- **devscripts**: [Fix filename/directory Bash completions](https://github.com/yt-dlp/yt-dlp/commit/99093e96fd6a26dea9d6e4bd1e4b16283b6ad1ee) ([#13620](https://github.com/yt-dlp/yt-dlp/issues/13620)) by [barsnick](https://github.com/barsnick)
- **test**: download: [Support `playlist_maxcount`](https://github.com/yt-dlp/yt-dlp/commit/fd36b8f31bafbd8096bdb92a446a0c9c6081209c) ([#13433](https://github.com/yt-dlp/yt-dlp/issues/13433)) by [InvalidUsernameException](https://github.com/InvalidUsernameException)
### 2025.06.30 ### 2025.06.30
#### Core changes #### Core changes

View File

@ -172,7 +172,7 @@ # To install nightly with pip:
``` ```
## DEPENDENCIES ## DEPENDENCIES
Python versions 3.9+ (CPython) and 3.10+ (PyPy) are supported. Other versions and implementations may or may not work correctly. Python versions 3.9+ (CPython) and 3.11+ (PyPy) are supported. Other versions and implementations may or may not work correctly.
<!-- Python 3.5+ uses VC++14 and it is already embedded in the binary created <!-- Python 3.5+ uses VC++14 and it is already embedded in the binary created
<!x-- https://www.microsoft.com/en-us/download/details.aspx?id=26999 --x> <!x-- https://www.microsoft.com/en-us/download/details.aspx?id=26999 --x>
@ -278,7 +278,7 @@ # USAGE AND OPTIONS
<!-- MANPAGE: BEGIN EXCLUDED SECTION --> <!-- MANPAGE: BEGIN EXCLUDED SECTION -->
yt-dlp [OPTIONS] [--] URL [URL...] yt-dlp [OPTIONS] [--] URL [URL...]
`Ctrl+F` is your friend :D Tip: Use `CTRL`+`F` (or `Command`+`F`) to search by keywords
<!-- MANPAGE: END EXCLUDED SECTION --> <!-- MANPAGE: END EXCLUDED SECTION -->
<!-- Auto generated --> <!-- Auto generated -->
@ -640,9 +640,9 @@ ## Filesystem Options:
--no-part Do not use .part files - write directly into --no-part Do not use .part files - write directly into
output file output file
--mtime Use the Last-modified header to set the file --mtime Use the Last-modified header to set the file
modification time (default) modification time
--no-mtime Do not use the Last-modified header to set --no-mtime Do not use the Last-modified header to set
the file modification time the file modification time (default)
--write-description Write video description to a .description file --write-description Write video description to a .description file
--no-write-description Do not write video description (default) --no-write-description Do not write video description (default)
--write-info-json Write video metadata to a .info.json file --write-info-json Write video metadata to a .info.json file
@ -1903,6 +1903,10 @@ #### sonylivseries
#### tver #### tver
* `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated) * `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated)
#### vimeo
* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `web` client is used by default. The `web` client only works with account cookies or login credentials. The `android` and `ios` clients only work with previously cached OAuth tokens
* `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the web client's API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability
**Note**: These options may be changed/removed in the future without concern for backward compatibility **Note**: These options may be changed/removed in the future without concern for backward compatibility
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE --> <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->

View File

@ -62,16 +62,22 @@ def parse_options():
def exe(onedir): def exe(onedir):
"""@returns (name, path)""" """@returns (name, path)"""
platform_name, machine, extension = {
'win32': (None, MACHINE, '.exe'),
'darwin': ('macos', None, None),
}.get(OS_NAME, (OS_NAME, MACHINE, None))
name = '_'.join(filter(None, ( name = '_'.join(filter(None, (
'yt-dlp', 'yt-dlp',
{'win32': '', 'darwin': 'macos'}.get(OS_NAME, OS_NAME), platform_name,
MACHINE, machine,
))) )))
return name, ''.join(filter(None, ( return name, ''.join(filter(None, (
'dist/', 'dist/',
onedir and f'{name}/', onedir and f'{name}/',
name, name,
OS_NAME == 'win32' and '.exe', extension,
))) )))

View File

@ -262,5 +262,15 @@
{ {
"action": "remove", "action": "remove",
"when": "500761e41acb96953a5064e951d41d190c287e46" "when": "500761e41acb96953a5064e951d41d190c287e46"
},
{
"action": "add",
"when": "f3008bc5f89d2691f2f8dfc51b406ef4e25281c3",
"short": "[priority] **Default behaviour changed from `--mtime` to `--no-mtime`**\nyt-dlp no longer applies the server modified time to downloaded files by default. [Read more](https://github.com/yt-dlp/yt-dlp/issues/12780)"
},
{
"action": "add",
"when": "959ac99e98c3215437e573c22d64be42d361e863",
"short": "[priority] Security: [[CVE-2025-54072](https://nvd.nist.gov/vuln/detail/CVE-2025-54072)] [Fix `--exec` placeholder expansion on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-45hg-7f49-5h56)\n - When `--exec` is used on Windows, the filepath expanded from `{}` (or the default placeholder) is now properly escaped"
} }
] ]

View File

@ -133,7 +133,6 @@ # Supported sites
- **BaiduVideo**: 百度视频 - **BaiduVideo**: 百度视频
- **BanBye** - **BanBye**
- **BanByeChannel** - **BanByeChannel**
- **bandaichannel**
- **Bandcamp** - **Bandcamp**
- **Bandcamp:album** - **Bandcamp:album**
- **Bandcamp:user** - **Bandcamp:user**
@ -157,7 +156,6 @@ # Supported sites
- **Beeg** - **Beeg**
- **BehindKink**: (**Currently broken**) - **BehindKink**: (**Currently broken**)
- **Bellator** - **Bellator**
- **BellMedia**
- **BerufeTV** - **BerufeTV**
- **Bet**: (**Currently broken**) - **Bet**: (**Currently broken**)
- **bfi:player**: (**Currently broken**) - **bfi:player**: (**Currently broken**)
@ -197,6 +195,7 @@ # Supported sites
- **BitChute** - **BitChute**
- **BitChuteChannel** - **BitChuteChannel**
- **BlackboardCollaborate** - **BlackboardCollaborate**
- **BlackboardCollaborateLaunch**
- **BleacherReport**: (**Currently broken**) - **BleacherReport**: (**Currently broken**)
- **BleacherReportCMS**: (**Currently broken**) - **BleacherReportCMS**: (**Currently broken**)
- **blerp** - **blerp**
@ -225,6 +224,7 @@ # Supported sites
- **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org - **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org
- **bt:article**: Bergens Tidende Articles - **bt:article**: Bergens Tidende Articles
- **bt:vestlendingen**: Bergens Tidende - Vestlendingen - **bt:vestlendingen**: Bergens Tidende - Vestlendingen
- **BTVPlus**
- **Bundesliga** - **Bundesliga**
- **Bundestag** - **Bundestag**
- **BunnyCdn** - **BunnyCdn**
@ -317,7 +317,6 @@ # Supported sites
- **CSpan**: C-SPAN - **CSpan**: C-SPAN
- **CSpanCongress** - **CSpanCongress**
- **CtsNews**: 華視新聞 - **CtsNews**: 華視新聞
- **CTV**
- **CTVNews** - **CTVNews**
- **cu.ntv.co.jp**: 日テレ無料TADA! - **cu.ntv.co.jp**: 日テレ無料TADA!
- **CultureUnplugged** - **CultureUnplugged**
@ -652,7 +651,6 @@ # Supported sites
- **jiosaavn:show:playlist** - **jiosaavn:show:playlist**
- **jiosaavn:song** - **jiosaavn:song**
- **Joj** - **Joj**
- **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)
- **Jove** - **Jove**
- **JStream** - **JStream**
- **JTBC**: jtbc.co.kr - **JTBC**: jtbc.co.kr
@ -723,9 +721,6 @@ # Supported sites
- **life:embed** - **life:embed**
- **likee** - **likee**
- **likee:user** - **likee:user**
- **limelight**
- **limelight:channel**
- **limelight:channel_list**
- **LinkedIn**: [*linkedin*](## "netrc machine") - **LinkedIn**: [*linkedin*](## "netrc machine")
- **linkedin:events**: [*linkedin*](## "netrc machine") - **linkedin:events**: [*linkedin*](## "netrc machine")
- **linkedin:learning**: [*linkedin*](## "netrc machine") - **linkedin:learning**: [*linkedin*](## "netrc machine")
@ -807,6 +802,7 @@ # Supported sites
- **minds:channel** - **minds:channel**
- **minds:group** - **minds:group**
- **Minoto** - **Minoto**
- **mir24.tv**
- **mirrativ** - **mirrativ**
- **mirrativ:user** - **mirrativ:user**
- **MirrorCoUK** - **MirrorCoUK**
@ -817,6 +813,8 @@ # Supported sites
- **mixcloud** - **mixcloud**
- **mixcloud:playlist** - **mixcloud:playlist**
- **mixcloud:user** - **mixcloud:user**
- **Mixlr**
- **MixlrRecoring**
- **MLB** - **MLB**
- **MLBArticle** - **MLBArticle**
- **MLBTV**: [*mlb*](## "netrc machine") - **MLBTV**: [*mlb*](## "netrc machine")
@ -973,7 +971,6 @@ # Supported sites
- **NoicePodcast** - **NoicePodcast**
- **NonkTube** - **NonkTube**
- **NoodleMagazine** - **NoodleMagazine**
- **Noovo**
- **NOSNLArticle** - **NOSNLArticle**
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
- **NovaEmbed** - **NovaEmbed**
@ -1097,6 +1094,7 @@ # Supported sites
- **Platzi**: [*platzi*](## "netrc machine") - **Platzi**: [*platzi*](## "netrc machine")
- **PlatziCourse**: [*platzi*](## "netrc machine") - **PlatziCourse**: [*platzi*](## "netrc machine")
- **player.sky.it** - **player.sky.it**
- **PlayerFm**
- **playeur** - **playeur**
- **PlayPlusTV**: [*playplustv*](## "netrc machine") - **PlayPlusTV**: [*playplustv*](## "netrc machine")
- **PlaySuisse**: [*playsuisse*](## "netrc machine") - **PlaySuisse**: [*playsuisse*](## "netrc machine")
@ -1472,11 +1470,12 @@ # Supported sites
- **Tempo** - **Tempo**
- **TennisTV**: [*tennistv*](## "netrc machine") - **TennisTV**: [*tennistv*](## "netrc machine")
- **TF1** - **TF1**
- **TFO** - **TFO**: (**Currently broken**)
- **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine") - **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine")
- **theatercomplextown:vod**: [*theatercomplextown*](## "netrc machine") - **theatercomplextown:vod**: [*theatercomplextown*](## "netrc machine")
- **TheGuardianPodcast** - **TheGuardianPodcast**
- **TheGuardianPodcastPlaylist** - **TheGuardianPodcastPlaylist**
- **TheHighWire**
- **TheHoleTv** - **TheHoleTv**
- **TheIntercept** - **TheIntercept**
- **ThePlatform** - **ThePlatform**
@ -1544,8 +1543,8 @@ # Supported sites
- **tv2playseries.hu** - **tv2playseries.hu**
- **TV4**: tv4.se and tv4play.se - **TV4**: tv4.se and tv4play.se
- **TV5MONDE** - **TV5MONDE**
- **tv5unis** - **tv5unis**: (**Currently broken**)
- **tv5unis:video** - **tv5unis:video**: (**Currently broken**)
- **tv8.it** - **tv8.it**
- **tv8.it:live**: TV8 Live - **tv8.it:live**: TV8 Live
- **tv8.it:playlist**: TV8 Playlist - **tv8.it:playlist**: TV8 Playlist
@ -1600,6 +1599,7 @@ # Supported sites
- **UlizaPortal**: ulizaportal.jp - **UlizaPortal**: ulizaportal.jp
- **umg:de**: Universal Music Deutschland - **umg:de**: Universal Music Deutschland
- **Unistra** - **Unistra**
- **UnitedNationsWebTv**
- **Unity**: (**Currently broken**) - **Unity**: (**Currently broken**)
- **uol.com.br** - **uol.com.br**
- **uplynk** - **uplynk**

View File

@ -1959,6 +1959,37 @@ def test_search_nextjs_data(self):
with self.assertWarns(DeprecationWarning): with self.assertWarns(DeprecationWarning):
self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {}) self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
def test_search_nextjs_v13_data(self):
HTML = R'''
<script>(self.__next_f=self.__next_f||[]).push([0])</script>
<script>self.__next_f.push([2,"0:[\"$\",\"$L0\",null,{\"do_not_add_this\":\"fail\"}]\n"])</script>
<script>self.__next_f.push([1,"1:I[46975,[],\"HTTPAccessFallbackBoundary\"]\n2:I[32630,[\"8183\",\"static/chunks/8183-768193f6a9e33cdd.js\"]]\n"])</script>
<script nonce="abc123">self.__next_f.push([1,"e:[false,[\"$\",\"div\",null,{\"children\":[\"$\",\"$L18\",null,{\"foo\":\"bar\"}]}],false]\n "])</script>
<script>self.__next_f.push([1,"2a:[[\"$\",\"div\",null,{\"className\":\"flex flex-col\",\"children\":[]}],[\"$\",\"$L16\",null,{\"meta\":{\"dateCreated\":1730489700,\"uuid\":\"40cac41d-8d29-4ef5-aa11-75047b9f0907\"}}]]\n"])</script>
<script>self.__next_f.push([1,"df:[\"$undefined\",[\"$\",\"div\",null,{\"children\":[\"$\",\"$L17\",null,{}],\"do_not_include_this_field\":\"fail\"}],[\"$\",\"div\",null,{\"children\":[[\"$\",\"$L19\",null,{\"duplicated_field_name\":{\"x\":1}}],[\"$\",\"$L20\",null,{\"duplicated_field_name\":{\"y\":2}}]]}],\"$undefined\"]\n"])</script>
<script>self.__next_f.push([3,"MzM6WyIkIiwiJEwzMiIsbnVsbCx7ImRlY29kZWQiOiJzdWNjZXNzIn1d"])</script>
'''
EXPECTED = {
'18': {
'foo': 'bar',
},
'16': {
'meta': {
'dateCreated': 1730489700,
'uuid': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
},
},
'19': {
'duplicated_field_name': {'x': 1},
},
'20': {
'duplicated_field_name': {'y': 2},
},
}
self.assertEqual(self.ie._search_nextjs_v13_data(HTML, None), EXPECTED)
self.assertEqual(self.ie._search_nextjs_v13_data('', None, fatal=False), {})
self.assertEqual(self.ie._search_nextjs_v13_data(None, None, fatal=False), {})
def test_search_nuxt_json(self): def test_search_nuxt_json(self):
HTML_TMPL = '<script data-ssr="true" id="__NUXT_DATA__" type="application/json">[{}]</script>' HTML_TMPL = '<script data-ssr="true" id="__NUXT_DATA__" type="application/json">[{}]</script>'
VALID_DATA = ''' VALID_DATA = '''

View File

@ -21,9 +21,6 @@ def test_compat_passthrough(self):
with self.assertWarns(DeprecationWarning): with self.assertWarns(DeprecationWarning):
_ = compat.compat_basestring _ = compat.compat_basestring
with self.assertWarns(DeprecationWarning):
_ = compat.WINDOWS_VT_MODE
self.assertEqual(urllib.request.getproxies, getproxies) self.assertEqual(urllib.request.getproxies, getproxies)
with self.assertWarns(DeprecationWarning): with self.assertWarns(DeprecationWarning):

View File

@ -66,10 +66,6 @@ def _file_md5(fn):
@is_download_test @is_download_test
class TestDownload(unittest.TestCase): class TestDownload(unittest.TestCase):
# Parallel testing in nosetests. See
# http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html
_multiprocess_shared_ = True
maxDiff = None maxDiff = None
COMPLETED_TESTS = {} COMPLETED_TESTS = {}

View File

@ -1373,6 +1373,7 @@ def test_parse_resolution(self):
self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080})
self.assertEqual(parse_resolution('ep1x2'), {}) self.assertEqual(parse_resolution('ep1x2'), {})
self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080})
self.assertEqual(parse_resolution('1920w', lenient=True), {'width': 1920})
def test_parse_bitrate(self): def test_parse_bitrate(self):
self.assertEqual(parse_bitrate(None), None) self.assertEqual(parse_bitrate(None), None)

View File

@ -36,6 +36,7 @@
from .globals import ( from .globals import (
IN_CLI, IN_CLI,
LAZY_EXTRACTORS, LAZY_EXTRACTORS,
WINDOWS_VT_MODE,
plugin_ies, plugin_ies,
plugin_ies_overrides, plugin_ies_overrides,
plugin_pps, plugin_pps,
@ -52,7 +53,7 @@
SSLError, SSLError,
network_exceptions, network_exceptions,
) )
from .networking.impersonate import ImpersonateRequestHandler from .networking.impersonate import ImpersonateRequestHandler, ImpersonateTarget
from .plugins import directories as plugin_directories, load_all_plugins from .plugins import directories as plugin_directories, load_all_plugins
from .postprocessor import ( from .postprocessor import (
EmbedThumbnailPP, EmbedThumbnailPP,
@ -529,6 +530,7 @@ class YoutubeDL:
discontinuities such as ad breaks (default: False) discontinuities such as ad breaks (default: False)
extractor_args: A dictionary of arguments to be passed to the extractors. extractor_args: A dictionary of arguments to be passed to the extractors.
See "EXTRACTOR ARGUMENTS" for details. See "EXTRACTOR ARGUMENTS" for details.
Argument values must always be a list of string(s).
E.g. {'youtube': {'skip': ['dash', 'hls']}} E.g. {'youtube': {'skip': ['dash', 'hls']}}
mark_watched: Mark videos watched (even with --simulate). Only for YouTube mark_watched: Mark videos watched (even with --simulate). Only for YouTube
@ -2195,7 +2197,7 @@ def _filter(f):
return op(actual_value, comparison_value) return op(actual_value, comparison_value)
return _filter return _filter
def _check_formats(self, formats): def _check_formats(self, formats, warning=True):
for f in formats: for f in formats:
working = f.get('__working') working = f.get('__working')
if working is not None: if working is not None:
@ -2208,6 +2210,9 @@ def _check_formats(self, formats):
continue continue
temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None) temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
temp_file.close() temp_file.close()
# If FragmentFD fails when testing a fragment, it will wrongly set a non-zero return code.
# Save the actual return code for later. See https://github.com/yt-dlp/yt-dlp/issues/13750
original_retcode = self._download_retcode
try: try:
success, _ = self.dl(temp_file.name, f, test=True) success, _ = self.dl(temp_file.name, f, test=True)
except (DownloadError, OSError, ValueError, *network_exceptions): except (DownloadError, OSError, ValueError, *network_exceptions):
@ -2218,12 +2223,18 @@ def _check_formats(self, formats):
os.remove(temp_file.name) os.remove(temp_file.name)
except OSError: except OSError:
self.report_warning(f'Unable to delete temporary file "{temp_file.name}"') self.report_warning(f'Unable to delete temporary file "{temp_file.name}"')
# Restore the actual return code
self._download_retcode = original_retcode
f['__working'] = success f['__working'] = success
if success: if success:
f.pop('__needs_testing', None) f.pop('__needs_testing', None)
yield f yield f
else: else:
self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id'])) msg = f'Unable to download format {f["format_id"]}. Skipping...'
if warning:
self.report_warning(msg)
else:
self.to_screen(f'[info] {msg}')
def _select_formats(self, formats, selector): def _select_formats(self, formats, selector):
return list(selector({ return list(selector({
@ -2949,7 +2960,7 @@ def is_wellformed(f):
) )
if self.params.get('check_formats') is True: if self.params.get('check_formats') is True:
formats = LazyList(self._check_formats(formats[::-1]), reverse=True) formats = LazyList(self._check_formats(formats[::-1], warning=False), reverse=True)
if not formats or formats[0] is not info_dict: if not formats or formats[0] is not info_dict:
# only set the 'formats' fields if the original info_dict list them # only set the 'formats' fields if the original info_dict list them
@ -3222,6 +3233,7 @@ def dl(self, name, info, subtitle=False, test=False):
} }
else: else:
params = self.params params = self.params
fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params) fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
if not test: if not test:
for ph in self._progress_hooks: for ph in self._progress_hooks:
@ -3697,6 +3709,8 @@ def filter_fn(obj):
return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)} return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
elif isinstance(obj, (list, tuple, set, LazyList)): elif isinstance(obj, (list, tuple, set, LazyList)):
return list(map(filter_fn, obj)) return list(map(filter_fn, obj))
elif isinstance(obj, ImpersonateTarget):
return str(obj)
elif obj is None or isinstance(obj, (str, int, float, bool)): elif obj is None or isinstance(obj, (str, int, float, bool)):
return obj return obj
elif callable(obj): elif callable(obj):
@ -4029,8 +4043,7 @@ def get_encoding(stream):
if os.environ.get('TERM', '').lower() == 'dumb': if os.environ.get('TERM', '').lower() == 'dumb':
additional_info.append('dumb') additional_info.append('dumb')
if not supports_terminal_sequences(stream): if not supports_terminal_sequences(stream):
from .utils import WINDOWS_VT_MODE # Must be imported locally additional_info.append('No VT' if WINDOWS_VT_MODE.value is False else 'No ANSI')
additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
if additional_info: if additional_info:
ret = f'{ret} ({",".join(additional_info)})' ret = f'{ret} ({",".join(additional_info)})'
return ret return ret
@ -4176,6 +4189,31 @@ def _impersonate_target_available(self, target):
for rh in self._request_director.handlers.values() for rh in self._request_director.handlers.values()
if isinstance(rh, ImpersonateRequestHandler)) if isinstance(rh, ImpersonateRequestHandler))
def _parse_impersonate_targets(self, impersonate):
if impersonate in (True, ''):
impersonate = ImpersonateTarget()
requested_targets = [
t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t)
for t in variadic(impersonate)
] if impersonate else []
available_target = next(filter(self._impersonate_target_available, requested_targets), None)
return available_target, requested_targets
@staticmethod
def _unavailable_targets_message(requested_targets, note=None, is_error=False):
note = note or 'The extractor specified to use impersonation for this download'
specific_targets = ', '.join(filter(None, map(str, requested_targets)))
message = (
'no impersonate target is available' if not specific_targets
else f'none of these impersonate targets are available: {specific_targets}')
return (
f'{note}, but {message}. {"See" if is_error else "If you encounter errors, then see"}'
f' https://github.com/yt-dlp/yt-dlp#impersonation '
f'for information on installing the required dependencies')
def urlopen(self, req): def urlopen(self, req):
""" Start an HTTP download """ """ Start an HTTP download """
if isinstance(req, str): if isinstance(req, str):

View File

@ -37,7 +37,7 @@
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
from ..networking.exceptions import HTTPError as compat_HTTPError from ..networking.exceptions import HTTPError as compat_HTTPError
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) passthrough_module(__name__, '...utils', ('windows_enable_vt_mode',))
# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE # compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE

View File

@ -1335,7 +1335,7 @@ def prepare_line(line):
if len(cookie_list) != self._ENTRY_LEN: if len(cookie_list) != self._ENTRY_LEN:
raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}') raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}')
cookie = self._CookieFileEntry(*cookie_list) cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit(): if cookie.expires_at and not re.fullmatch(r'[0-9]+(?:\.[0-9]+)?', cookie.expires_at):
raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}') raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}')
return line return line

View File

@ -105,7 +105,7 @@ def _get_suitable_downloader(info_dict, protocol, params, default):
if external_downloader is None: if external_downloader is None:
if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params): if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params):
return FFmpegFD return FFmpegFD
elif external_downloader.lower() != 'native': elif external_downloader.lower() != 'native' and info_dict.get('impersonate') is None:
ed = get_external_downloader(external_downloader) ed = get_external_downloader(external_downloader)
if ed.can_download(info_dict, external_downloader): if ed.can_download(info_dict, external_downloader):
return ed return ed

View File

@ -495,3 +495,14 @@ def _debug_cmd(self, args, exe=None):
exe = os.path.basename(args[0]) exe = os.path.basename(args[0])
self.write_debug(f'{exe} command line: {shell_quote(args)}') self.write_debug(f'{exe} command line: {shell_quote(args)}')
def _get_impersonate_target(self, info_dict):
impersonate = info_dict.get('impersonate')
if impersonate is None:
return None
available_target, requested_targets = self.ydl._parse_impersonate_targets(impersonate)
if available_target:
return available_target
elif requested_targets:
self.report_warning(self.ydl._unavailable_targets_message(requested_targets))
return None

View File

@ -572,7 +572,21 @@ def _call_downloader(self, tmpfilename, info_dict):
if end_time: if end_time:
args += ['-t', str(end_time - start_time)] args += ['-t', str(end_time - start_time)]
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']] url = fmt['url']
if self.params.get('enable_file_urls') and url.startswith('file:'):
# The default protocol_whitelist is 'file,crypto,data' when reading local m3u8 URLs,
# so only local segments can be read unless we also include 'http,https,tcp,tls'
args += ['-protocol_whitelist', 'file,crypto,data,http,https,tcp,tls']
# ffmpeg incorrectly handles 'file:' URLs by only removing the
# 'file:' prefix and treating the rest as if it's a normal filepath.
# FFmpegPostProcessor also depends on this behavior, so we need to fixup the URLs:
# - On Windows/Cygwin, replace 'file:///' and 'file://localhost/' with 'file:'
# - On *nix, replace 'file://localhost/' with 'file:/'
# Ref: https://github.com/yt-dlp/yt-dlp/issues/13781
# https://trac.ffmpeg.org/ticket/2702
url = re.sub(r'^file://(?:localhost)?/', 'file:' if os.name == 'nt' else 'file:/', url)
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', url]
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
args += ['-c', 'copy'] args += ['-c', 'copy']

View File

@ -205,7 +205,7 @@ def is_ad_fragment_end(s):
line = line.strip() line = line.strip()
if line: if line:
if not line.startswith('#'): if not line.startswith('#'):
if format_index and discontinuity_count != format_index: if format_index is not None and discontinuity_count != format_index:
continue continue
if ad_frag_next: if ad_frag_next:
continue continue
@ -231,7 +231,7 @@ def is_ad_fragment_end(s):
byte_range = {} byte_range = {}
elif line.startswith('#EXT-X-MAP'): elif line.startswith('#EXT-X-MAP'):
if format_index and discontinuity_count != format_index: if format_index is not None and discontinuity_count != format_index:
continue continue
if frag_index > 0: if frag_index > 0:
self.report_error( self.report_error(

View File

@ -27,6 +27,10 @@ class HttpFD(FileDownloader):
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
url = info_dict['url'] url = info_dict['url']
request_data = info_dict.get('request_data', None) request_data = info_dict.get('request_data', None)
request_extensions = {}
impersonate_target = self._get_impersonate_target(info_dict)
if impersonate_target is not None:
request_extensions['impersonate'] = impersonate_target
class DownloadContext(dict): class DownloadContext(dict):
__getattr__ = dict.get __getattr__ = dict.get
@ -109,7 +113,7 @@ def establish_connection():
if try_call(lambda: range_end >= ctx.content_len): if try_call(lambda: range_end >= ctx.content_len):
range_end = ctx.content_len - 1 range_end = ctx.content_len - 1
request = Request(url, request_data, headers) request = Request(url, request_data, headers, extensions=request_extensions)
has_range = range_start is not None has_range = range_start is not None
if has_range: if has_range:
request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}' request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}'

View File

@ -201,7 +201,6 @@
BanByeChannelIE, BanByeChannelIE,
BanByeIE, BanByeIE,
) )
from .bandaichannel import BandaiChannelIE
from .bandcamp import ( from .bandcamp import (
BandcampAlbumIE, BandcampAlbumIE,
BandcampIE, BandcampIE,
@ -229,7 +228,6 @@
from .beatport import BeatportIE from .beatport import BeatportIE
from .beeg import BeegIE from .beeg import BeegIE
from .behindkink import BehindKinkIE from .behindkink import BehindKinkIE
from .bellmedia import BellMediaIE
from .berufetv import BerufeTVIE from .berufetv import BerufeTVIE
from .bet import BetIE from .bet import BetIE
from .bfi import BFIPlayerIE from .bfi import BFIPlayerIE
@ -275,7 +273,10 @@
BitChuteChannelIE, BitChuteChannelIE,
BitChuteIE, BitChuteIE,
) )
from .blackboardcollaborate import BlackboardCollaborateIE from .blackboardcollaborate import (
BlackboardCollaborateIE,
BlackboardCollaborateLaunchIE,
)
from .bleacherreport import ( from .bleacherreport import (
BleacherReportCMSIE, BleacherReportCMSIE,
BleacherReportIE, BleacherReportIE,
@ -309,6 +310,7 @@
BrilliantpalaClassesIE, BrilliantpalaClassesIE,
BrilliantpalaElearnIE, BrilliantpalaElearnIE,
) )
from .btvplus import BTVPlusIE
from .bundesliga import BundesligaIE from .bundesliga import BundesligaIE
from .bundestag import BundestagIE from .bundestag import BundestagIE
from .bunnycdn import BunnyCdnIE from .bunnycdn import BunnyCdnIE
@ -446,7 +448,6 @@
CSpanIE, CSpanIE,
) )
from .ctsnews import CtsNewsIE from .ctsnews import CtsNewsIE
from .ctv import CTVIE
from .ctvnews import CTVNewsIE from .ctvnews import CTVNewsIE
from .cultureunplugged import CultureUnpluggedIE from .cultureunplugged import CultureUnpluggedIE
from .curiositystream import ( from .curiositystream import (
@ -570,10 +571,6 @@
DWIE, DWIE,
DWArticleIE, DWArticleIE,
) )
from .eagleplatform import (
ClipYouEmbedIE,
EaglePlatformIE,
)
from .ebaumsworld import EbaumsWorldIE from .ebaumsworld import EbaumsWorldIE
from .ebay import EbayIE from .ebay import EbayIE
from .egghead import ( from .egghead import (
@ -639,6 +636,7 @@
FancodeVodIE, FancodeVodIE,
) )
from .fathom import FathomIE from .fathom import FathomIE
from .faulio import FaulioLiveIE
from .faz import FazIE from .faz import FazIE
from .fc2 import ( from .fc2 import (
FC2IE, FC2IE,
@ -928,7 +926,6 @@
JioSaavnSongIE, JioSaavnSongIE,
) )
from .joj import JojIE from .joj import JojIE
from .joqrag import JoqrAgIE
from .jove import JoveIE from .jove import JoveIE
from .jstream import JStreamIE from .jstream import JStreamIE
from .jtbc import ( from .jtbc import (
@ -1031,11 +1028,6 @@
LikeeIE, LikeeIE,
LikeeUserIE, LikeeUserIE,
) )
from .limelight import (
LimelightChannelIE,
LimelightChannelListIE,
LimelightMediaIE,
)
from .linkedin import ( from .linkedin import (
LinkedInEventsIE, LinkedInEventsIE,
LinkedInIE, LinkedInIE,
@ -1168,6 +1160,10 @@
MixcloudPlaylistIE, MixcloudPlaylistIE,
MixcloudUserIE, MixcloudUserIE,
) )
from .mixlr import (
MixlrIE,
MixlrRecoringIE,
)
from .mlb import ( from .mlb import (
MLBIE, MLBIE,
MLBTVIE, MLBTVIE,
@ -1378,7 +1374,6 @@
from .noice import NoicePodcastIE from .noice import NoicePodcastIE
from .nonktube import NonkTubeIE from .nonktube import NonkTubeIE
from .noodlemagazine import NoodleMagazineIE from .noodlemagazine import NoodleMagazineIE
from .noovo import NoovoIE
from .nosnl import NOSNLArticleIE from .nosnl import NOSNLArticleIE
from .nova import ( from .nova import (
NovaEmbedIE, NovaEmbedIE,
@ -1559,6 +1554,7 @@
PlatziCourseIE, PlatziCourseIE,
PlatziIE, PlatziIE,
) )
from .playerfm import PlayerFmIE
from .playplustv import PlayPlusTVIE from .playplustv import PlayPlusTVIE
from .playsuisse import PlaySuisseIE from .playsuisse import PlaySuisseIE
from .playtvak import PlaytvakIE from .playtvak import PlaytvakIE
@ -1569,6 +1565,7 @@
) )
from .plutotv import PlutoTVIE from .plutotv import PlutoTVIE
from .plvideo import PlVideoIE from .plvideo import PlVideoIE
from .plyr import PlyrEmbedIE
from .podbayfm import ( from .podbayfm import (
PodbayFMChannelIE, PodbayFMChannelIE,
PodbayFMIE, PodbayFMIE,
@ -1784,6 +1781,7 @@
RTVEALaCartaIE, RTVEALaCartaIE,
RTVEAudioIE, RTVEAudioIE,
RTVELiveIE, RTVELiveIE,
RTVEProgramIE,
RTVETelevisionIE, RTVETelevisionIE,
) )
from .rtvs import RTVSIE from .rtvs import RTVSIE
@ -2097,6 +2095,7 @@
TheGuardianPodcastIE, TheGuardianPodcastIE,
TheGuardianPodcastPlaylistIE, TheGuardianPodcastPlaylistIE,
) )
from .thehighwire import TheHighWireIE
from .theholetv import TheHoleTvIE from .theholetv import TheHoleTvIE
from .theintercept import TheInterceptIE from .theintercept import TheInterceptIE
from .theplatform import ( from .theplatform import (
@ -2166,7 +2165,6 @@
from .trueid import TrueIDIE from .trueid import TrueIDIE
from .trunews import TruNewsIE from .trunews import TruNewsIE
from .truth import TruthIE from .truth import TruthIE
from .trutv import TruTVIE
from .tube8 import Tube8IE from .tube8 import Tube8IE
from .tubetugraz import ( from .tubetugraz import (
TubeTuGrazIE, TubeTuGrazIE,
@ -2237,6 +2235,7 @@
from .tvplayer import TVPlayerIE from .tvplayer import TVPlayerIE
from .tvw import ( from .tvw import (
TvwIE, TvwIE,
TvwNewsIE,
TvwTvChannelsIE, TvwTvChannelsIE,
) )
from .tweakers import TweakersIE from .tweakers import TweakersIE
@ -2285,6 +2284,7 @@
) )
from .umg import UMGDeIE from .umg import UMGDeIE
from .unistra import UnistraIE from .unistra import UnistraIE
from .unitednations import UnitedNationsWebTvIE
from .unity import UnityIE from .unity import UnityIE
from .unsupported import ( from .unsupported import (
KnownDRMIE, KnownDRMIE,

View File

@ -48,7 +48,6 @@
'username_field': 'user', 'username_field': 'user',
'password_field': 'passwd', 'password_field': 'passwd',
'login_hostname': 'login.xfinity.com', 'login_hostname': 'login.xfinity.com',
'needs_newer_ua': True,
}, },
'TWC': { 'TWC': {
'name': 'Time Warner Cable | Spectrum', 'name': 'Time Warner Cable | Spectrum',
@ -1379,11 +1378,8 @@ def _download_webpage_handle(self, *args, **kwargs):
@staticmethod @staticmethod
def _get_mso_headers(mso_info): def _get_mso_headers(mso_info):
# yt-dlp's default user-agent is usually too old for some MSO's like Comcast_SSO # Not needed currently
# See: https://github.com/yt-dlp/yt-dlp/issues/10848 return {}
return {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0',
} if mso_info.get('needs_newer_ua') else {}
@staticmethod @staticmethod
def _get_mvpd_resource(provider_id, title, guid, rating): def _get_mvpd_resource(provider_id, title, guid, rating):

View File

@ -84,9 +84,10 @@ def _parse_video_data(self, video_data):
class AdobeTVEmbedIE(AdobeTVBaseIE): class AdobeTVEmbedIE(AdobeTVBaseIE):
_WORKING = False
IE_NAME = 'adobetv:embed' IE_NAME = 'adobetv:embed'
_VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P<id>\d+)' _VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P<id>\d+)'
_TEST = { _TESTS = [{
'url': 'https://tv.adobe.com/embed/22/4153', 'url': 'https://tv.adobe.com/embed/22/4153',
'md5': 'c8c0461bf04d54574fc2b4d07ac6783a', 'md5': 'c8c0461bf04d54574fc2b4d07ac6783a',
'info_dict': { 'info_dict': {
@ -94,12 +95,12 @@ class AdobeTVEmbedIE(AdobeTVBaseIE):
'ext': 'flv', 'ext': 'flv',
'title': 'Creating Graphics Optimized for BlackBerry', 'title': 'Creating Graphics Optimized for BlackBerry',
'description': 'md5:eac6e8dced38bdaae51cd94447927459', 'description': 'md5:eac6e8dced38bdaae51cd94447927459',
'thumbnail': r're:https?://.*\.jpg$', 'thumbnail': r're:https?://.+\.jpg',
'upload_date': '20091109', 'upload_date': '20091109',
'duration': 377, 'duration': 377,
'view_count': int, 'view_count': int,
}, },
} }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -110,10 +111,10 @@ def _real_extract(self, url):
class AdobeTVIE(AdobeTVBaseIE): class AdobeTVIE(AdobeTVBaseIE):
_WORKING = False
IE_NAME = 'adobetv' IE_NAME = 'adobetv'
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)'
_TESTS = [{
_TEST = {
'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/', 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
'md5': '9bc5727bcdd55251f35ad311ca74fa1e', 'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
'info_dict': { 'info_dict': {
@ -121,12 +122,12 @@ class AdobeTVIE(AdobeTVBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop', 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311', 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
'thumbnail': r're:https?://.*\.jpg$', 'thumbnail': r're:https?://.+\.jpg',
'upload_date': '20110914', 'upload_date': '20110914',
'duration': 60, 'duration': 60,
'view_count': int, 'view_count': int,
}, },
} }]
def _real_extract(self, url): def _real_extract(self, url):
language, show_urlname, urlname = self._match_valid_url(url).groups() language, show_urlname, urlname = self._match_valid_url(url).groups()
@ -159,10 +160,10 @@ def _extract_playlist_entries(self, display_id, query):
class AdobeTVShowIE(AdobeTVPlaylistBaseIE): class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
_WORKING = False
IE_NAME = 'adobetv:show' IE_NAME = 'adobetv:show'
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)'
_TESTS = [{
_TEST = {
'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost', 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost',
'info_dict': { 'info_dict': {
'id': '36', 'id': '36',
@ -170,7 +171,7 @@ class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27', 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27',
}, },
'playlist_mincount': 136, 'playlist_mincount': 136,
} }]
_RESOURCE = 'episode' _RESOURCE = 'episode'
_process_data = AdobeTVBaseIE._parse_video_data _process_data = AdobeTVBaseIE._parse_video_data
@ -195,16 +196,16 @@ def _real_extract(self, url):
class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
_WORKING = False
IE_NAME = 'adobetv:channel' IE_NAME = 'adobetv:channel'
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?' _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?'
_TESTS = [{
_TEST = {
'url': 'http://tv.adobe.com/channel/development', 'url': 'http://tv.adobe.com/channel/development',
'info_dict': { 'info_dict': {
'id': 'development', 'id': 'development',
}, },
'playlist_mincount': 96, 'playlist_mincount': 96,
} }]
_RESOURCE = 'show' _RESOURCE = 'show'
def _process_data(self, show_data): def _process_data(self, show_data):
@ -231,8 +232,7 @@ class AdobeTVVideoIE(AdobeTVBaseIE):
IE_NAME = 'adobetv:video' IE_NAME = 'adobetv:video'
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)' _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
_EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]'] _EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]']
_TESTS = [{
_TEST = {
# From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
'url': 'https://video.tv.adobe.com/v/2456/', 'url': 'https://video.tv.adobe.com/v/2456/',
'md5': '43662b577c018ad707a63766462b1e87', 'md5': '43662b577c018ad707a63766462b1e87',
@ -242,8 +242,20 @@ class AdobeTVVideoIE(AdobeTVBaseIE):
'title': 'New experience with Acrobat DC', 'title': 'New experience with Acrobat DC',
'description': 'New experience with Acrobat DC', 'description': 'New experience with Acrobat DC',
'duration': 248.667, 'duration': 248.667,
'thumbnail': r're:https?://images-tv\.adobe\.com/.+\.jpg',
}, },
} }]
_WEBPAGE_TESTS = [{
# FIXME: Invalid extension
'url': 'https://www.adobe.com/learn/acrobat/web/customize-toolbar',
'info_dict': {
'id': '3463980',
'ext': 'm3u8',
'title': 'Adobe Acrobat: How to Customize the Toolbar for Faster PDF Editing',
'description': 'md5:94368ab95ae24f9c1bee0cb346e03dc3',
'duration': 97.557,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

View File

@ -111,11 +111,9 @@ class AENetworksIE(AENetworksBaseIE):
IE_NAME = 'aenetworks' IE_NAME = 'aenetworks'
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault' IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
_VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id> _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
shows/[^/]+/season-\d+/episode-\d+| shows/[^/?#]+/season-\d+/episode-\d+|
(?: (?P<type>movie|special)s/[^/?#]+(?P<extra>/[^/?#]+)?|
(?:movie|special)s/[^/]+| (?:shows/[^/?#]+/)?videos/[^/?#]+
(?:shows/[^/]+/)?videos
)/[^/?#&]+
)''' )'''
_TESTS = [{ _TESTS = [{
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
@ -128,7 +126,7 @@ class AENetworksIE(AENetworksBaseIE):
'upload_date': '20120529', 'upload_date': '20120529',
'uploader': 'AENE-NEW', 'uploader': 'AENE-NEW',
'duration': 2592.0, 'duration': 2592.0,
'thumbnail': r're:^https?://.*\.jpe?g$', 'thumbnail': r're:https?://.+/.+\.jpg',
'chapters': 'count:5', 'chapters': 'count:5',
'tags': 'count:14', 'tags': 'count:14',
'categories': ['Mountain Men'], 'categories': ['Mountain Men'],
@ -139,10 +137,7 @@ class AENetworksIE(AENetworksBaseIE):
'series': 'Mountain Men', 'series': 'Mountain Men',
'age_limit': 0, 'age_limit': 0,
}, },
'params': { 'params': {'skip_download': 'm3u8'},
# m3u8 download
'skip_download': True,
},
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
'skip': 'Geo-restricted - This content is not available in your location.', 'skip': 'Geo-restricted - This content is not available in your location.',
}, { }, {
@ -156,7 +151,7 @@ class AENetworksIE(AENetworksBaseIE):
'upload_date': '20160112', 'upload_date': '20160112',
'uploader': 'AENE-NEW', 'uploader': 'AENE-NEW',
'duration': 1277.695, 'duration': 1277.695,
'thumbnail': r're:^https?://.*\.jpe?g$', 'thumbnail': r're:https?://.+/.+\.jpg',
'chapters': 'count:4', 'chapters': 'count:4',
'tags': 'count:23', 'tags': 'count:23',
'episode': 'Inlawful Entry', 'episode': 'Inlawful Entry',
@ -166,10 +161,53 @@ class AENetworksIE(AENetworksBaseIE):
'series': 'Duck Dynasty', 'series': 'Duck Dynasty',
'age_limit': 0, 'age_limit': 0,
}, },
'params': { 'params': {'skip_download': 'm3u8'},
# m3u8 download 'add_ie': ['ThePlatform'],
'skip_download': True, }, {
'url': 'https://play.mylifetime.com/movies/v-c-andrews-web-of-dreams',
'info_dict': {
'id': '1590627395981',
'ext': 'mp4',
'title': 'VC Andrews\' Web of Dreams',
'description': 'md5:2a8ba13ae64271c79eb65c0577d312ce',
'uploader': 'AENE-NEW',
'age_limit': 14,
'duration': 5253.665,
'thumbnail': r're:https?://.+/.+\.jpg',
'chapters': 'count:8',
'tags': ['lifetime', 'mylifetime', 'lifetime channel', "VC Andrews' Web of Dreams"],
'series': '',
'season': 'Season 0',
'season_number': 0,
'episode': 'VC Andrews\' Web of Dreams',
'episode_number': 0,
'timestamp': 1566489703.0,
'upload_date': '20190822',
}, },
'params': {'skip_download': 'm3u8'},
'add_ie': ['ThePlatform'],
}, {
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story',
'info_dict': {
'id': '1488235587551',
'ext': 'mp4',
'title': 'Hunting JonBenet\'s Killer: The Untold Story',
'description': 'md5:209869425ee392d74fe29201821e48b4',
'uploader': 'AENE-NEW',
'age_limit': 14,
'duration': 5003.903,
'thumbnail': r're:https?://.+/.+\.jpg',
'chapters': 'count:10',
'tags': 'count:11',
'series': '',
'season': 'Season 0',
'season_number': 0,
'episode': 'Hunting JonBenet\'s Killer: The Untold Story',
'episode_number': 0,
'timestamp': 1554987697.0,
'upload_date': '20190411',
},
'params': {'skip_download': 'm3u8'},
'add_ie': ['ThePlatform'], 'add_ie': ['ThePlatform'],
}, { }, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
@ -198,7 +236,9 @@ class AENetworksIE(AENetworksBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
domain, canonical = self._match_valid_url(url).groups() domain, canonical, url_type, extra = self._match_valid_url(url).group('domain', 'id', 'type', 'extra')
if url_type in ('movie', 'special') and not extra:
canonical += f'/full-{url_type}'
return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url) return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)

View File

@ -11,12 +11,11 @@ class APAIE(InfoExtractor):
_EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1'] _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1']
_TESTS = [{ _TESTS = [{
'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029', 'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
'md5': '2b12292faeb0a7d930c778c7a5b4759b',
'info_dict': { 'info_dict': {
'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029', 'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
'ext': 'mp4', 'ext': 'mp4',
'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029', 'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:https?://kf-vn\.sf\.apa\.at/vn/.+\.jpg',
}, },
}, { }, {
'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78', 'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
@ -28,6 +27,15 @@ class APAIE(InfoExtractor):
'url': 'http://uvp-kleinezeitung.sf.apa.at/embed/f1c44979-dba2-4ebf-b021-e4cf2cac3c81', 'url': 'http://uvp-kleinezeitung.sf.apa.at/embed/f1c44979-dba2-4ebf-b021-e4cf2cac3c81',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
'url': 'https://www.vol.at/blue-man-group/5593454',
'info_dict': {
'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
'ext': 'mp4',
'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
'thumbnail': r're:https?://kf-vn\.sf\.apa\.at/vn/.+\.jpg',
},
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)

View File

@ -16,6 +16,7 @@
dict_get, dict_get,
extract_attributes, extract_attributes,
get_element_by_id, get_element_by_id,
get_element_text_and_html_by_tag,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
js_to_json, js_to_json,
@ -32,7 +33,6 @@
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
urlhandle_detect_ext, urlhandle_detect_ext,
variadic,
) )
@ -72,6 +72,7 @@ class ArchiveOrgIE(InfoExtractor):
'display_id': 'Cops-v2.mp4', 'display_id': 'Cops-v2.mp4',
'thumbnail': r're:https://archive\.org/download/.*\.jpg', 'thumbnail': r're:https://archive\.org/download/.*\.jpg',
'duration': 1091.96, 'duration': 1091.96,
'track': 'Cops-v2',
}, },
}, { }, {
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect', 'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
@ -86,6 +87,7 @@ class ArchiveOrgIE(InfoExtractor):
'thumbnail': r're:https://archive\.org/download/.*\.jpg', 'thumbnail': r're:https://archive\.org/download/.*\.jpg',
'duration': 59.77, 'duration': 59.77,
'display_id': 'Commercial-JFK1960ElectionAdCampaignJingle.mpg', 'display_id': 'Commercial-JFK1960ElectionAdCampaignJingle.mpg',
'track': 'Commercial-JFK1960ElectionAdCampaignJingle',
}, },
}, { }, {
'url': 'https://archive.org/details/Election_Ads/Commercial-Nixon1960ElectionAdToughonDefense.mpg', 'url': 'https://archive.org/details/Election_Ads/Commercial-Nixon1960ElectionAdToughonDefense.mpg',
@ -102,6 +104,7 @@ class ArchiveOrgIE(InfoExtractor):
'duration': 59.51, 'duration': 59.51,
'license': 'http://creativecommons.org/licenses/publicdomain/', 'license': 'http://creativecommons.org/licenses/publicdomain/',
'thumbnail': r're:https://archive\.org/download/.*\.jpg', 'thumbnail': r're:https://archive\.org/download/.*\.jpg',
'track': 'Commercial-Nixon1960ElectionAdToughonDefense',
}, },
}, { }, {
'url': 'https://archive.org/details/gd1977-05-08.shure57.stevenson.29303.flac16', 'url': 'https://archive.org/details/gd1977-05-08.shure57.stevenson.29303.flac16',
@ -182,6 +185,7 @@ class ArchiveOrgIE(InfoExtractor):
'duration': 130.46, 'duration': 130.46,
'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_01_000117.jpg', 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_01_000117.jpg',
'display_id': 'irelandthemakingofarepublicreel1_01.mov', 'display_id': 'irelandthemakingofarepublicreel1_01.mov',
'track': 'irelandthemakingofarepublicreel1 01',
}, },
}, { }, {
'md5': '67335ee3b23a0da930841981c1e79b02', 'md5': '67335ee3b23a0da930841981c1e79b02',
@ -192,6 +196,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': 'irelandthemakingofarepublicreel1_02.mov', 'title': 'irelandthemakingofarepublicreel1_02.mov',
'display_id': 'irelandthemakingofarepublicreel1_02.mov', 'display_id': 'irelandthemakingofarepublicreel1_02.mov',
'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_02_001374.jpg', 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel1_02_001374.jpg',
'track': 'irelandthemakingofarepublicreel1 02',
}, },
}, { }, {
'md5': 'e470e86787893603f4a341a16c281eb5', 'md5': 'e470e86787893603f4a341a16c281eb5',
@ -202,6 +207,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': 'irelandthemakingofarepublicreel2.mov', 'title': 'irelandthemakingofarepublicreel2.mov',
'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg', 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg',
'display_id': 'irelandthemakingofarepublicreel2.mov', 'display_id': 'irelandthemakingofarepublicreel2.mov',
'track': 'irelandthemakingofarepublicreel2',
}, },
}, },
], ],
@ -225,19 +231,29 @@ class ArchiveOrgIE(InfoExtractor):
'release_date': '19950402', 'release_date': '19950402',
'timestamp': 1084927901, 'timestamp': 1084927901,
}, },
}, {
# metadata['metadata']['description'] is a list of strings instead of str
'url': 'https://archive.org/details/pra-KZ1908.02',
'info_dict': {
'id': 'pra-KZ1908.02',
'ext': 'mp3',
'display_id': 'KZ1908.02_01.wav',
'title': 'Crips and Bloods speak about gang life',
'description': 'md5:2b56b35ff021311e3554b47a285e70b3',
'uploader': 'jake@archive.org',
'duration': 1733.74,
'track': 'KZ1908.02 01',
'track_number': 1,
'timestamp': 1336026026,
'upload_date': '20120503',
'release_year': 1992,
},
}] }]
@staticmethod @staticmethod
def _playlist_data(webpage): def _playlist_data(webpage):
element = re.findall(r'''(?xs) element = get_element_text_and_html_by_tag('play-av', webpage)[1]
<input return json.loads(extract_attributes(element)['playlist'])
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
\s+class=['"]?js-play8-playlist['"]?
(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
\s*/>
''', webpage)[0]
return json.loads(extract_attributes(element)['value'])
def _real_extract(self, url): def _real_extract(self, url):
video_id = urllib.parse.unquote_plus(self._match_id(url)) video_id = urllib.parse.unquote_plus(self._match_id(url))
@ -274,34 +290,40 @@ def _real_extract(self, url):
m = metadata['metadata'] m = metadata['metadata']
identifier = m['identifier'] identifier = m['identifier']
info = { info = traverse_obj(m, {
'title': ('title', {str}),
'description': ('description', ({str}, (..., all, {' '.join})), {clean_html}, filter, any),
'uploader': (('uploader', 'adder'), {str}, any),
'creators': ('creator', (None, ...), {str}, filter, all, filter),
'license': ('licenseurl', {url_or_none}),
'release_date': ('date', {unified_strdate}),
'timestamp': (('publicdate', 'addeddate'), {unified_timestamp}, any),
'location': ('venue', {str}),
'release_year': ('year', {int_or_none}),
})
info.update({
'id': identifier, 'id': identifier,
'title': m['title'],
'description': clean_html(m.get('description')),
'uploader': dict_get(m, ['uploader', 'adder']),
'creators': traverse_obj(m, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
'license': m.get('licenseurl'),
'release_date': unified_strdate(m.get('date')),
'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])),
'webpage_url': f'https://archive.org/details/{identifier}', 'webpage_url': f'https://archive.org/details/{identifier}',
'location': m.get('venue'), })
'release_year': int_or_none(m.get('year'))}
for f in metadata['files']: for f in metadata['files']:
if f['name'] in entries: if f['name'] in entries:
entries[f['name']] = merge_dicts(entries[f['name']], { entries[f['name']] = merge_dicts(entries[f['name']], {
'id': identifier + '/' + f['name'], 'id': identifier + '/' + f['name'],
'title': f.get('title') or f['name'], **traverse_obj(f, {
'display_id': f['name'], 'title': (('title', 'name'), {str}, any),
'description': clean_html(f.get('description')), 'display_id': ('name', {str}),
'creators': traverse_obj(f, ('creator', {variadic}, {lambda x: x[0] and list(x)})), 'description': ('description', ({str}, (..., all, {' '.join})), {clean_html}, filter, any),
'duration': parse_duration(f.get('length')), 'creators': ('creator', (None, ...), {str}, filter, all, filter),
'track_number': int_or_none(f.get('track')), 'duration': ('length', {parse_duration}),
'album': f.get('album'), 'track_number': ('track', {int_or_none}),
'discnumber': int_or_none(f.get('disc')), 'album': ('album', {str}),
'release_year': int_or_none(f.get('year'))}) 'discnumber': ('disc', {int_or_none}),
'release_year': ('year', {int_or_none}),
}),
})
entry = entries[f['name']] entry = entries[f['name']]
elif traverse_obj(f, 'original', expected_type=str) in entries: elif traverse_obj(f, ('original', {str})) in entries:
entry = entries[f['original']] entry = entries[f['original']]
else: else:
continue continue

View File

@ -62,6 +62,20 @@ class ArcPublishingIE(InfoExtractor):
'url': 'arcpublishing:tronc:460f2931-8130-4719-8ea1-ffcb2d7cb685', 'url': 'arcpublishing:tronc:460f2931-8130-4719-8ea1-ffcb2d7cb685',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
'url': 'https://www.uppermichiganssource.com/2025/07/18/scattered-showers-storms-bring-heavy-rain-potential/',
'info_dict': {
'id': '508116f7-e999-48db-b7c2-60a04842679b',
'ext': 'mp4',
'title': 'Scattered showers & storms bring heavy rain potential',
'description': 'Scattered showers & storms bring heavy rain potential',
'duration': 2016,
'thumbnail': r're:https?://.+\.jpg',
'timestamp': 1752881287,
'upload_date': '20250718',
},
'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
}]
_POWA_DEFAULTS = [ _POWA_DEFAULTS = [
(['cmg', 'prisa'], '%s-config-prod.api.cdn.arcpublishing.com/video'), (['cmg', 'prisa'], '%s-config-prod.api.cdn.arcpublishing.com/video'),
([ ([

View File

@ -51,8 +51,8 @@ class ArteTVIE(ArteTVBaseIE):
'id': '109067-000-A', 'id': '109067-000-A',
'ext': 'mp4', 'ext': 'mp4',
'description': 'md5:d2ca367b8ecee028dddaa8bd1aebc739', 'description': 'md5:d2ca367b8ecee028dddaa8bd1aebc739',
'thumbnail': r're:https?://api-cdn\.arte\.tv/img/v2/image/.+',
'timestamp': 1713927600, 'timestamp': 1713927600,
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/3rR6PLzfbigSkkeHtkCZNF/940x530',
'duration': 7599, 'duration': 7599,
'title': 'La loi de Téhéran', 'title': 'La loi de Téhéran',
'upload_date': '20240424', 'upload_date': '20240424',
@ -62,6 +62,7 @@ class ArteTVIE(ArteTVBaseIE):
'fr-forced': 'mincount:1', 'fr-forced': 'mincount:1',
}, },
}, },
'skip': 'Invalid URL',
}, { }, {
'note': 'age-restricted', 'note': 'age-restricted',
'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/', 'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/',
@ -69,9 +70,9 @@ class ArteTVIE(ArteTVBaseIE):
'id': '006785-000-A', 'id': '006785-000-A',
'description': 'md5:c2f94fdfefc8a280e4dab68ab96ab0ba', 'description': 'md5:c2f94fdfefc8a280e4dab68ab96ab0ba',
'title': 'The Element of Crime', 'title': 'The Element of Crime',
'thumbnail': r're:https?://api-cdn\.arte\.tv/img/v2/image/.+',
'timestamp': 1696111200, 'timestamp': 1696111200,
'duration': 5849, 'duration': 5849,
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/q82dTTfyuCXupPsGxXsd7B/940x530',
'upload_date': '20230930', 'upload_date': '20230930',
'ext': 'mp4', 'ext': 'mp4',
}, },
@ -252,6 +253,30 @@ class ArteTVEmbedIE(InfoExtractor):
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A', 'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
# FIXME: Embed detection
'url': 'https://timesofmalta.com/article/watch-sunken-warships-north-sea-arte.1108358',
'info_dict': {
'id': '110288-000-A',
'ext': 'mp4',
'title': 'Danger on the Seabed',
'alt_title': 'Sunken Warships in the North Sea',
'description': 'md5:a2c84cbad37d280bddb6484087120add',
'duration': 3148,
'thumbnail': r're:https?://api-cdn\.arte\.tv/img/v2/image/.+',
'timestamp': 1741686820,
'upload_date': '20250311',
},
'params': {'skip_download': 'm3u8'},
}, {
# FIXME: Embed detection
'url': 'https://www.eurockeennes.fr/en-live/',
'info_dict': {
'id': 'en-live',
'title': 'Les Eurocks en live | Les Eurockéennes de Belfort 3-4-5-6 juillet 2025 sur la Presqu&#039;Île du Malsaucy',
},
'playlist_count': 4,
}]
def _real_extract(self, url): def _real_extract(self, url):
qs = parse_qs(url) qs = parse_qs(url)
@ -304,9 +329,9 @@ class ArteTVCategoryIE(ArteTVBaseIE):
'info_dict': { 'info_dict': {
'id': 'politics-and-society', 'id': 'politics-and-society',
'title': 'Politics and society', 'title': 'Politics and society',
'description': 'Investigative documentary series, geopolitical analysis, and international commentary', 'description': 'Watch documentaries and reportage about politics, society and current affairs.',
}, },
'playlist_mincount': 13, 'playlist_mincount': 3,
}] }]
@classmethod @classmethod

View File

@ -1,33 +0,0 @@
from .brightcove import BrightcoveNewBaseIE
from ..utils import extract_attributes
class BandaiChannelIE(BrightcoveNewBaseIE):
IE_NAME = 'bandaichannel'
_VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P<id>\d+/\d+)'
_TESTS = [{
'url': 'https://www.b-ch.com/titles/514/001',
'md5': 'a0f2d787baa5729bed71108257f613a4',
'info_dict': {
'id': '6128044564001',
'ext': 'mp4',
'title': 'メタルファイターMIKU 第1話',
'timestamp': 1580354056,
'uploader_id': '5797077852001',
'upload_date': '20200130',
'duration': 1387.733,
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
attrs = extract_attributes(self._search_regex(
r'(<video-js[^>]+\bid="bcplayer"[^>]*>)', webpage, 'player'))
bc = self._download_json(
'https://pbifcd.b-ch.com/v1/playbackinfo/ST/70/' + attrs['data-info'],
video_id, headers={'X-API-KEY': attrs['data-auth'].strip()})['bc']
return self._parse_brightcove_metadata(bc, bc['id'])

View File

@ -7,6 +7,7 @@
from ..utils import ( from ..utils import (
KNOWN_EXTENSIONS, KNOWN_EXTENSIONS,
ExtractorError, ExtractorError,
clean_html,
extract_attributes, extract_attributes,
float_or_none, float_or_none,
int_or_none, int_or_none,
@ -19,7 +20,7 @@
url_or_none, url_or_none,
urljoin, urljoin,
) )
from ..utils.traversal import find_element, traverse_obj from ..utils.traversal import find_element, find_elements, traverse_obj
class BandcampIE(InfoExtractor): class BandcampIE(InfoExtractor):
@ -35,14 +36,12 @@ class BandcampIE(InfoExtractor):
'duration': 9.8485, 'duration': 9.8485,
'uploader': 'youtube-dl "\'/\\ä↭', 'uploader': 'youtube-dl "\'/\\ä↭',
'upload_date': '20121129', 'upload_date': '20121129',
'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg',
'timestamp': 1354224127, 'timestamp': 1354224127,
'track': 'youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭', 'track': 'youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭',
'album_artist': 'youtube-dl "\'/\\ä↭',
'track_id': '1812978515', 'track_id': '1812978515',
'artist': 'youtube-dl "\'/\\ä↭',
'uploader_url': 'https://youtube-dl.bandcamp.com', 'uploader_url': 'https://youtube-dl.bandcamp.com',
'uploader_id': 'youtube-dl', 'uploader_id': 'youtube-dl',
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
'artists': ['youtube-dl "\'/\\ä↭'], 'artists': ['youtube-dl "\'/\\ä↭'],
'album_artists': ['youtube-dl "\'/\\ä↭'], 'album_artists': ['youtube-dl "\'/\\ä↭'],
}, },
@ -53,10 +52,9 @@ class BandcampIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '2650410135', 'id': '2650410135',
'ext': 'm4a', 'ext': 'm4a',
'acodec': r're:[fa]lac',
'title': 'Ben Prunty - Lanius (Battle)', 'title': 'Ben Prunty - Lanius (Battle)',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Ben Prunty', 'uploader': 'Ben Prunty',
'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg',
'timestamp': 1396508491, 'timestamp': 1396508491,
'upload_date': '20140403', 'upload_date': '20140403',
'release_timestamp': 1396483200, 'release_timestamp': 1396483200,
@ -65,11 +63,12 @@ class BandcampIE(InfoExtractor):
'track': 'Lanius (Battle)', 'track': 'Lanius (Battle)',
'track_number': 1, 'track_number': 1,
'track_id': '2650410135', 'track_id': '2650410135',
'artist': 'Ben Prunty',
'album_artist': 'Ben Prunty',
'album': 'FTL: Advanced Edition Soundtrack', 'album': 'FTL: Advanced Edition Soundtrack',
'uploader_url': 'https://benprunty.bandcamp.com', 'uploader_url': 'https://benprunty.bandcamp.com',
'uploader_id': 'benprunty', 'uploader_id': 'benprunty',
'tags': ['soundtrack', 'chiptunes', 'cinematic', 'electronic', 'video game music', 'California'],
'artists': ['Ben Prunty'],
'album_artists': ['Ben Prunty'],
}, },
}, { }, {
# no free download, mp3 128 # no free download, mp3 128
@ -79,8 +78,8 @@ class BandcampIE(InfoExtractor):
'id': '2584466013', 'id': '2584466013',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Mastodon - Hail to Fire', 'title': 'Mastodon - Hail to Fire',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Mastodon', 'uploader': 'Mastodon',
'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg',
'timestamp': 1322005399, 'timestamp': 1322005399,
'upload_date': '20111122', 'upload_date': '20111122',
'release_timestamp': 1076112000, 'release_timestamp': 1076112000,
@ -89,11 +88,12 @@ class BandcampIE(InfoExtractor):
'track': 'Hail to Fire', 'track': 'Hail to Fire',
'track_number': 5, 'track_number': 5,
'track_id': '2584466013', 'track_id': '2584466013',
'artist': 'Mastodon',
'album_artist': 'Mastodon',
'album': 'Call of the Mastodon', 'album': 'Call of the Mastodon',
'uploader_url': 'https://relapsealumni.bandcamp.com', 'uploader_url': 'https://relapsealumni.bandcamp.com',
'uploader_id': 'relapsealumni', 'uploader_id': 'relapsealumni',
'tags': ['Philadelphia'],
'artists': ['Mastodon'],
'album_artists': ['Mastodon'],
}, },
}, { }, {
# track from compilation album (artist/album_artist difference) # track from compilation album (artist/album_artist difference)
@ -103,8 +103,8 @@ class BandcampIE(InfoExtractor):
'id': '1978174799', 'id': '1978174799',
'ext': 'mp3', 'ext': 'mp3',
'title': 'submerse - submerse - Safehouse', 'title': 'submerse - submerse - Safehouse',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'submerse', 'uploader': 'submerse',
'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg',
'timestamp': 1480779297, 'timestamp': 1480779297,
'upload_date': '20161203', 'upload_date': '20161203',
'release_timestamp': 1481068800, 'release_timestamp': 1481068800,
@ -113,11 +113,36 @@ class BandcampIE(InfoExtractor):
'track': 'submerse - Safehouse', 'track': 'submerse - Safehouse',
'track_number': 3, 'track_number': 3,
'track_id': '1978174799', 'track_id': '1978174799',
'artist': 'submerse',
'album_artist': 'Diskotopia',
'album': 'DSK F/W 2016-2017 Free Compilation', 'album': 'DSK F/W 2016-2017 Free Compilation',
'uploader_url': 'https://diskotopia.bandcamp.com', 'uploader_url': 'https://diskotopia.bandcamp.com',
'uploader_id': 'diskotopia', 'uploader_id': 'diskotopia',
'tags': ['Japan'],
'artists': ['submerse'],
'album_artists': ['Diskotopia'],
},
}]
_WEBPAGE_TESTS = [{
# FIXME: Embed detection
'url': 'https://www.punknews.org/article/85809/stay-inside-super-sonic',
'info_dict': {
'id': '2475540375',
'ext': 'mp3',
'title': 'Stay Inside - Super Sonic',
'album': 'Lunger',
'album_artists': ['Stay Inside'],
'artists': ['Stay Inside'],
'duration': 166.157,
'release_date': '20251003',
'release_timestamp': 1759449600.0,
'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg',
'timestamp': 1749473029.0,
'track': 'Super Sonic',
'track_id': '2475540375',
'track_number': 3,
'upload_date': '20250609',
'uploader': 'Stay Inside',
'uploader_id': 'stayinside',
'uploader_url': 'https://stayinside.bandcamp.com',
}, },
}] }]
@ -252,6 +277,7 @@ def _real_extract(self, url):
'album': embed.get('album_title'), 'album': embed.get('album_title'),
'album_artist': album_artist, 'album_artist': album_artist,
'formats': formats, 'formats': formats,
'tags': traverse_obj(webpage, ({find_elements(cls='tag')}, ..., {clean_html})),
} }
@ -268,10 +294,10 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'id': '1353101989', 'id': '1353101989',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Blazo - Intro', 'title': 'Blazo - Intro',
'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg',
'timestamp': 1311756226, 'timestamp': 1311756226,
'upload_date': '20110727', 'upload_date': '20110727',
'uploader': 'Blazo', 'uploader': 'Blazo',
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
'album_artists': ['Blazo'], 'album_artists': ['Blazo'],
'uploader_url': 'https://blazo.bandcamp.com', 'uploader_url': 'https://blazo.bandcamp.com',
'release_date': '20110727', 'release_date': '20110727',
@ -291,6 +317,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'id': '38097443', 'id': '38097443',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)', 'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)',
'thumbnail': r're:https?://f4\.bcbits\.com/img/.+\.jpg',
'timestamp': 1311757238, 'timestamp': 1311757238,
'upload_date': '20110727', 'upload_date': '20110727',
'uploader': 'Blazo', 'uploader': 'Blazo',
@ -304,7 +331,6 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'uploader_id': 'blazo', 'uploader_id': 'blazo',
'album_artists': ['Blazo'], 'album_artists': ['Blazo'],
'artists': ['Blazo'], 'artists': ['Blazo'],
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
'release_timestamp': 1311724800.0, 'release_timestamp': 1311724800.0,
}, },
}, },

View File

@ -1,91 +0,0 @@
from .common import InfoExtractor
class BellMediaIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?
(?P<domain>
(?:
ctv|
tsn|
bnn(?:bloomberg)?|
thecomedynetwork|
discovery|
discoveryvelocity|
sciencechannel|
investigationdiscovery|
animalplanet|
bravo|
mtv|
space|
etalk|
marilyn
)\.ca|
(?:much|cp24)\.com
)/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
_TESTS = [{
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
'md5': '3e5b8e38370741d5089da79161646635',
'info_dict': {
'id': '1403070',
'ext': 'flv',
'title': 'David Cockfield\'s Top Picks',
'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
'upload_date': '20180525',
'timestamp': 1527288600,
'season_id': '73997',
'season': '2018',
'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg',
'tags': [],
'categories': ['ETFs'],
'season_number': 8,
'duration': 272.038,
'series': 'Market Call Tonight',
},
}, {
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
'only_matching': True,
}, {
'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549',
'only_matching': True,
}, {
'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654',
'only_matching': True,
}, {
'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009',
'only_matching': True,
}, {
'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016',
'only_matching': True,
}, {
'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
'only_matching': True,
}, {
'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
'only_matching': True,
}, {
'url': 'http://www.etalk.ca/video?videoid=663455',
'only_matching': True,
}, {
'url': 'https://www.cp24.com/video?clipId=1982548',
'only_matching': True,
}]
_DOMAINS = {
'thecomedynetwork': 'comedy',
'discoveryvelocity': 'discvel',
'sciencechannel': 'discsci',
'investigationdiscovery': 'invdisc',
'animalplanet': 'aniplan',
'etalk': 'ctv',
'bnnbloomberg': 'bnn',
'marilyn': 'ctv_marilyn',
}
def _real_extract(self, url):
domain, video_id = self._match_valid_url(url).groups()
domain = domain.split('.')[0]
return {
'_type': 'url_transparent',
'id': video_id,
'url': f'9c9media:{self._DOMAINS.get(domain, domain)}_web:{video_id}',
'ie_key': 'NineCNineMedia',
}

View File

@ -353,7 +353,7 @@ class BiliBiliIE(BilibiliBaseIE):
'id': 'BV1bK411W797', 'id': 'BV1bK411W797',
'title': '物语中的人物是如何吐槽自己的OP的', 'title': '物语中的人物是如何吐槽自己的OP的',
}, },
'playlist_count': 18, 'playlist_count': 23,
'playlist': [{ 'playlist': [{
'info_dict': { 'info_dict': {
'id': 'BV1bK411W797_p1', 'id': 'BV1bK411W797_p1',
@ -373,6 +373,7 @@ class BiliBiliIE(BilibiliBaseIE):
'_old_archive_ids': ['bilibili 498159642_part1'], '_old_archive_ids': ['bilibili 498159642_part1'],
}, },
}], }],
'params': {'playlist_items': '2'},
}, { }, {
'note': 'Specific page of Anthology', 'note': 'Specific page of Anthology',
'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1', 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
@ -899,13 +900,26 @@ def _real_extract(self, url):
'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id}, 'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id},
headers=headers)) headers=headers))
geo_blocked = traverse_obj(play_info, ( # play_info can be structured in at least three different ways, e.g.:
('result', ('raw', 'data')), 'plugins', # 1.) play_info['result']['video_info'] and play_info['code']
lambda _, v: v['name'] == 'AreaLimitPanel', # 2.) play_info['raw']['data']['video_info'] and play_info['code']
'config', 'is_block', {bool}, any)) # 3.) play_info['data']['result']['video_info'] and play_info['data']['code']
premium_only = play_info.get('code') == -10403 # So we need to transform any of the above into a common structure
status_code = play_info.get('code')
if 'raw' in play_info:
play_info = play_info['raw']
if 'data' in play_info:
play_info = play_info['data']
if status_code is None:
status_code = play_info.get('code')
if 'result' in play_info:
play_info = play_info['result']
video_info = traverse_obj(play_info, (('result', ('raw', 'data')), 'video_info', {dict}, any)) or {} geo_blocked = traverse_obj(play_info, (
'plugins', lambda _, v: v['name'] == 'AreaLimitPanel', 'config', 'is_block', {bool}, any))
premium_only = status_code == -10403
video_info = traverse_obj(play_info, ('video_info', {dict})) or {}
formats = self.extract_formats(video_info) formats = self.extract_formats(video_info)
if not formats: if not formats:
@ -915,8 +929,8 @@ def _real_extract(self, url):
self.raise_login_required('This video is for premium members only') self.raise_login_required('This video is for premium members only')
if traverse_obj(play_info, (( if traverse_obj(play_info, ((
('result', 'play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE' ('play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE' vs 'PLAY_NONE'
(('result', ('raw', 'data')), 'play_video_type'), # 'preview' vs 'whole' vs 'none' 'play_video_type', # 'preview' vs 'whole' vs 'none'
), any, {lambda x: x in ('PLAY_PREVIEW', 'preview')})): ), any, {lambda x: x in ('PLAY_PREVIEW', 'preview')})):
self.report_warning( self.report_warning(
'Only preview format is available, ' 'Only preview format is available, '
@ -1002,6 +1016,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE):
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
}, },
}], }],
'params': {'playlist_items': '2'},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -1057,6 +1072,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
}, },
}], }],
'params': {'playlist_items': '2'},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -1847,7 +1863,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
'thumbnail': r're:^https?://.+\.jpg', 'thumbnail': r're:^https?://.+\.jpg',
'timestamp': 1564836614, 'timestamp': 1564836614,
'upload_date': '20190803', 'upload_date': '20190803',
'uploader': 'tsukimi-つきみぐ', 'uploader': '十六夜tsukimiつきみぐ',
'view_count': int, 'view_count': int,
}, },
} }
@ -1902,10 +1918,10 @@ class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
'url': 'https://www.bilibili.com/audio/am10624', 'url': 'https://www.bilibili.com/audio/am10624',
'info_dict': { 'info_dict': {
'id': '10624', 'id': '10624',
'title': '每日新曲推荐每日11:00更新', 'title': '新曲推荐',
'description': '每天11:00更新为你推送最新音乐', 'description': '每天11:00更新为你推送最新音乐',
}, },
'playlist_count': 19, 'playlist_count': 16,
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,16 +1,27 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import parse_iso8601 from ..utils import (
UnsupportedError,
float_or_none,
int_or_none,
join_nonempty,
jwt_decode_hs256,
mimetype2ext,
parse_iso8601,
parse_qs,
url_or_none,
)
from ..utils.traversal import traverse_obj
class BlackboardCollaborateIE(InfoExtractor): class BlackboardCollaborateIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?P<region>[a-z-]+)\.bbcollab\.com/ (?P<region>[a-z]+)(?:-lti)?\.bbcollab\.com/
(?: (?:
collab/ui/session/playback/load| collab/ui/session/playback/load|
recording recording
)/ )/
(?P<id>[^/]+)''' (?P<id>[^/?#]+)'''
_TESTS = [ _TESTS = [
{ {
'url': 'https://us-lti.bbcollab.com/collab/ui/session/playback/load/0a633b6a88824deb8c918f470b22b256', 'url': 'https://us-lti.bbcollab.com/collab/ui/session/playback/load/0a633b6a88824deb8c918f470b22b256',
@ -19,9 +30,55 @@ class BlackboardCollaborateIE(InfoExtractor):
'id': '0a633b6a88824deb8c918f470b22b256', 'id': '0a633b6a88824deb8c918f470b22b256',
'title': 'HESI A2 Information Session - Thursday, May 6, 2021 - recording_1', 'title': 'HESI A2 Information Session - Thursday, May 6, 2021 - recording_1',
'ext': 'mp4', 'ext': 'mp4',
'duration': 1896000, 'duration': 1896,
'timestamp': 1620331399, 'timestamp': 1620333295,
'upload_date': '20210506', 'upload_date': '20210506',
'subtitles': {
'live_chat': 'mincount:1',
},
},
},
{
'url': 'https://eu.bbcollab.com/collab/ui/session/playback/load/4bde2dee104f40289a10f8e554270600',
'md5': '108db6a8f83dcb0c2a07793649581865',
'info_dict': {
'id': '4bde2dee104f40289a10f8e554270600',
'title': 'Meeting - Azerbaycanca erize formasi',
'ext': 'mp4',
'duration': 880,
'timestamp': 1671176868,
'upload_date': '20221216',
},
},
{
'url': 'https://eu.bbcollab.com/recording/f83be390ecff46c0bf7dccb9dddcf5f6',
'md5': 'e3b0b88ddf7847eae4b4c0e2d40b83a5',
'info_dict': {
'id': 'f83be390ecff46c0bf7dccb9dddcf5f6',
'title': 'Keynote lecture by Laura Carvalho - recording_1',
'ext': 'mp4',
'duration': 5506,
'timestamp': 1662721705,
'upload_date': '20220909',
'subtitles': {
'live_chat': 'mincount:1',
},
},
},
{
'url': 'https://eu.bbcollab.com/recording/c3e1e7c9e83d4cd9981c93c74888d496',
'md5': 'fdb2d8c43d66fbc0b0b74ef5e604eb1f',
'info_dict': {
'id': 'c3e1e7c9e83d4cd9981c93c74888d496',
'title': 'International Ally User Group - recording_18',
'ext': 'mp4',
'duration': 3479,
'timestamp': 1721919621,
'upload_date': '20240725',
'subtitles': {
'en': 'mincount:1',
'live_chat': 'mincount:1',
},
}, },
}, },
{ {
@ -42,22 +99,81 @@ class BlackboardCollaborateIE(InfoExtractor):
}, },
] ]
def _call_api(self, region, video_id, path=None, token=None, note=None, fatal=False):
# Ref: https://github.com/blackboard/BBDN-Collab-Postman-REST
return self._download_json(
join_nonempty(f'https://{region}.bbcollab.com/collab/api/csa/recordings', video_id, path, delim='/'),
video_id, note or 'Downloading JSON metadata', fatal=fatal,
headers={'Authorization': f'Bearer {token}'} if token else None)
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
region = mobj.group('region') region = mobj.group('region')
video_id = mobj.group('id') video_id = mobj.group('id')
info = self._download_json( token = parse_qs(url).get('authToken', [None])[-1]
f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/data', video_id)
duration = info.get('duration') video_info = self._call_api(region, video_id, path='data/secure', token=token, note='Trying auth token')
title = info['name'] if video_info:
upload_date = info.get('created') video_extra = self._call_api(region, video_id, token=token, note='Retrieving extra attributes')
streams = info['streams'] else:
formats = [{'format_id': k, 'url': url} for k, url in streams.items()] video_info = self._call_api(region, video_id, path='data', note='Trying fallback', fatal=True)
video_extra = {}
formats = traverse_obj(video_info, ('extStreams', lambda _, v: url_or_none(v['streamUrl']), {
'url': 'streamUrl',
'ext': ('contentType', {mimetype2ext}),
'aspect_ratio': ('aspectRatio', {float_or_none}),
}))
if filesize := traverse_obj(video_extra, ('storageSize', {int_or_none})):
for fmt in formats:
fmt['filesize'] = filesize
subtitles = {}
for subs in traverse_obj(video_info, ('subtitles', lambda _, v: url_or_none(v['url']))):
subtitles.setdefault(subs.get('lang') or 'und', []).append({
'name': traverse_obj(subs, ('label', {str})),
'url': subs['url'],
})
for live_chat_url in traverse_obj(video_info, ('chats', ..., 'url', {url_or_none})):
subtitles.setdefault('live_chat', []).append({'url': live_chat_url})
return { return {
'duration': duration, **traverse_obj(video_info, {
'title': ('name', {str}),
'timestamp': ('created', {parse_iso8601}),
'duration': ('duration', {int_or_none(scale=1000)}),
}),
'formats': formats, 'formats': formats,
'id': video_id, 'id': video_id,
'timestamp': parse_iso8601(upload_date), 'subtitles': subtitles,
'title': title,
} }
class BlackboardCollaborateLaunchIE(InfoExtractor):
_VALID_URL = r'https?://[a-z]+\.bbcollab\.com/launch/(?P<id>[^/?#]+)'
_TESTS = [
{
'url': 'https://au.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNzQwNDE2NDgzLCJpYXQiOjE3NDA0MTYxODMsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI3MzI4YzRjZTNmM2U0ZTcwYmY3MTY3N2RkZTgzMzk2NSIsImNvbnN1bWVySWQiOiJhM2Q3NGM0Y2QyZGU0MGJmODFkMjFlODNlMmEzNzM5MCIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.xuELw4EafEwUMoYcCHidGn4Tw9O1QCbYHzYGJUl0kKk',
'only_matching': True,
},
{
'url': 'https://us.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNjk0NDgxOTc3LCJpYXQiOjE2OTQ0ODE2NzcsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI3YWU0MTFhNTU3NjU0OWFiOTZlYjVmMTM1YmY3MWU5MCIsImNvbnN1bWVySWQiOiJBRUU2MEI4MDI2QzM3ODU2RjMwMzNEN0ZEOTQzMTFFNSIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.yOhRZNaIjXYoMYMpcTzgjZJCnIFaYf2cAzbco8OAxlY',
'only_matching': True,
},
{
'url': 'https://eu.bbcollab.com/launch/eyJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJiYkNvbGxhYkFwaSIsInN1YiI6ImJiQ29sbGFiQXBpIiwiZXhwIjoxNzUyNjgyODYwLCJpYXQiOjE3NTI2ODI1NjAsInJlc291cmNlQWNjZXNzVGlja2V0Ijp7InJlc291cmNlSWQiOiI4MjQzYjFiODg2Nzk0NTZkYjkwN2NmNDZmZmE1MmFhZiIsImNvbnN1bWVySWQiOiI5ZTY4NzYwZWJiNzM0MzRiYWY3NTQyZjA1YmJkOTMzMCIsInR5cGUiOiJSRUNPUkRJTkciLCJyZXN0cmljdGlvbiI6eyJ0eXBlIjoiVElNRSIsImV4cGlyYXRpb25Ib3VycyI6MCwiZXhwaXJhdGlvbk1pbnV0ZXMiOjUsIm1heFJlcXVlc3RzIjotMX0sImRpc3Bvc2l0aW9uIjoiTEFVTkNIIiwibGF1bmNoVHlwZSI6bnVsbCwibGF1bmNoQ29tcG9uZW50IjpudWxsLCJsYXVuY2hQYXJhbUtleSI6bnVsbH19.Xj4ymojYLwZ1vKPKZ-KxjpqQvFXoJekjRaG0npngwWs',
'only_matching': True,
},
]
def _real_extract(self, url):
token = self._match_id(url)
video_id = jwt_decode_hs256(token)['resourceAccessTicket']['resourceId']
redirect_url = self._request_webpage(url, video_id).url
if self.suitable(redirect_url):
raise UnsupportedError(redirect_url)
return self.url_result(redirect_url, BlackboardCollaborateIE, video_id)

View File

@ -19,8 +19,19 @@ class BloggerIE(InfoExtractor):
'id': 'BLOGGER-video-3c740e3a49197e16-796', 'id': 'BLOGGER-video-3c740e3a49197e16-796',
'title': 'BLOGGER-video-3c740e3a49197e16-796', 'title': 'BLOGGER-video-3c740e3a49197e16-796',
'ext': 'mp4', 'ext': 'mp4',
'thumbnail': r're:^https?://.*',
'duration': 76.068, 'duration': 76.068,
'thumbnail': r're:https?://i9\.ytimg\.com/vi_blogger/.+',
},
}]
_WEBPAGE_TESTS = [{
'url': 'https://blog.tomeuvizoso.net/2019/01/a-panfrost-milestone.html',
'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac',
'info_dict': {
'id': 'BLOGGER-video-3c740e3a49197e16-12203',
'ext': 'mp4',
'title': 'BLOGGER-video-3c740e3a49197e16-12203',
'duration': 76.068,
'thumbnail': r're:https?://i9\.ytimg\.com/vi_blogger/.+',
}, },
}] }]

View File

@ -0,0 +1,73 @@
from .common import InfoExtractor
from ..utils import (
bug_reports_message,
clean_html,
get_element_by_class,
js_to_json,
mimetype2ext,
strip_or_none,
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
class BTVPlusIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?btvplus\.bg/produkt/(?:predavaniya|seriali|novini)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://btvplus.bg/produkt/predavaniya/67271/btv-reporterite/btv-reporterite-12-07-2025-g',
'info_dict': {
'ext': 'mp4',
'id': '67271',
'title': 'bTV Репортерите - 12.07.2025 г.',
'thumbnail': 'https://cdn.btv.bg/media/images/940x529/Jul2025/2113606319.jpg',
},
}, {
'url': 'https://btvplus.bg/produkt/seriali/66942/sezon-2/plen-sezon-2-epizod-55',
'info_dict': {
'ext': 'mp4',
'id': '66942',
'title': 'Плен - сезон 2, епизод 55',
'thumbnail': 'https://cdn.btv.bg/media/images/940x529/Jun2025/2113595104.jpg',
},
}, {
'url': 'https://btvplus.bg/produkt/novini/67270/btv-novinite-centralna-emisija-12-07-2025',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player_url = self._search_regex(
r'var\s+videoUrl\s*=\s*[\'"]([^\'"]+)[\'"]',
webpage, 'player URL')
player_config = self._download_json(
urljoin('https://btvplus.bg', player_url), video_id)['config']
videojs_data = self._search_json(
r'videojs\(["\'][^"\']+["\'],', player_config, 'videojs data',
video_id, transform_source=js_to_json)
formats = []
subtitles = {}
for src in traverse_obj(videojs_data, ('sources', lambda _, v: url_or_none(v['src']))):
ext = mimetype2ext(src.get('type'))
if ext == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
src['src'], video_id, 'mp4', m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
self.report_warning(f'Unknown format type {ext}{bug_reports_message()}')
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
'title': (
strip_or_none(self._og_search_title(webpage, default=None))
or clean_html(get_element_by_class('product-title', webpage))),
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'description': self._og_search_description(webpage, default=None),
}

View File

@ -19,18 +19,16 @@ class CloudflareStreamIE(InfoExtractor):
'id': '31c9291ab41fac05471db4e73aa11717', 'id': '31c9291ab41fac05471db4e73aa11717',
'ext': 'mp4', 'ext': 'mp4',
'title': '31c9291ab41fac05471db4e73aa11717', 'title': '31c9291ab41fac05471db4e73aa11717',
'thumbnail': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg', 'thumbnail': r're:https?://cloudflarestream\.com/.+\.jpg',
},
'params': {
'skip_download': 'm3u8',
}, },
'params': {'skip_download': 'm3u8'},
}, { }, {
'url': 'https://watch.cloudflarestream.com/embed/sdk-iframe-integration.fla9.latest.js?video=0e8e040aec776862e1d632a699edf59e', 'url': 'https://watch.cloudflarestream.com/embed/sdk-iframe-integration.fla9.latest.js?video=0e8e040aec776862e1d632a699edf59e',
'info_dict': { 'info_dict': {
'id': '0e8e040aec776862e1d632a699edf59e', 'id': '0e8e040aec776862e1d632a699edf59e',
'ext': 'mp4', 'ext': 'mp4',
'title': '0e8e040aec776862e1d632a699edf59e', 'title': '0e8e040aec776862e1d632a699edf59e',
'thumbnail': 'https://cloudflarestream.com/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg', 'thumbnail': r're:https?://cloudflarestream\.com/.+\.jpg',
}, },
}, { }, {
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
@ -54,11 +52,21 @@ class CloudflareStreamIE(InfoExtractor):
'id': 'eaef9dea5159cf968be84241b5cedfe7', 'id': 'eaef9dea5159cf968be84241b5cedfe7',
'ext': 'mp4', 'ext': 'mp4',
'title': 'eaef9dea5159cf968be84241b5cedfe7', 'title': 'eaef9dea5159cf968be84241b5cedfe7',
'thumbnail': 'https://cloudflarestream.com/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg', 'thumbnail': r're:https?://cloudflarestream\.com/.+\.jpg',
}, },
'params': { 'params': {
'extractor_args': {'generic': {'impersonate': ['chrome']}},
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
}, {
# FIXME: Embed detection
'url': 'https://www.cloudflare.com/developer-platform/products/cloudflare-stream/',
'info_dict': {
'id': 'e7bd2dd67e0f8860b4ae81e33a966049',
'ext': 'mp4',
'title': 'e7bd2dd67e0f8860b4ae81e33a966049',
'thumbnail': r're:https?://cloudflarestream\.com/.+\.jpg',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -38,7 +38,6 @@
TransportError, TransportError,
network_exceptions, network_exceptions,
) )
from ..networking.impersonate import ImpersonateTarget
from ..utils import ( from ..utils import (
IDENTITY, IDENTITY,
JSON_LD_RE, JSON_LD_RE,
@ -259,6 +258,11 @@ class InfoExtractor:
* key The key (as hex) used to decrypt fragments. * key The key (as hex) used to decrypt fragments.
If `key` is given, any key URI will be ignored If `key` is given, any key URI will be ignored
* iv The IV (as hex) used to decrypt fragments * iv The IV (as hex) used to decrypt fragments
* impersonate Impersonate target(s). Can be any of the following entities:
* an instance of yt_dlp.networking.impersonate.ImpersonateTarget
* a string in the format of CLIENT[:OS]
* a list or a tuple of CLIENT[:OS] strings or ImpersonateTarget instances
* a boolean value; True means any impersonate target is sufficient
* downloader_options A dictionary of downloader options * downloader_options A dictionary of downloader options
(For internal use only) (For internal use only)
* http_chunk_size Chunk size for HTTP downloads * http_chunk_size Chunk size for HTTP downloads
@ -336,6 +340,7 @@ class InfoExtractor:
* "name": Name or description of the subtitles * "name": Name or description of the subtitles
* "http_headers": A dictionary of additional HTTP headers * "http_headers": A dictionary of additional HTTP headers
to add to the request. to add to the request.
* "impersonate": Impersonate target(s); same as the "formats" field
"ext" will be calculated from URL if missing "ext" will be calculated from URL if missing
automatic_captions: Like 'subtitles'; contains automatically generated automatic_captions: Like 'subtitles'; contains automatically generated
captions instead of normal subtitles captions instead of normal subtitles
@ -392,6 +397,8 @@ class InfoExtractor:
chapters: A list of dictionaries, with the following entries: chapters: A list of dictionaries, with the following entries:
* "start_time" - The start time of the chapter in seconds * "start_time" - The start time of the chapter in seconds
* "end_time" - The end time of the chapter in seconds * "end_time" - The end time of the chapter in seconds
(optional: core code can determine this value from
the next chapter's start_time or the video's duration)
* "title" (optional, string) * "title" (optional, string)
heatmap: A list of dictionaries, with the following entries: heatmap: A list of dictionaries, with the following entries:
* "start_time" - The start time of the data point in seconds * "start_time" - The start time of the data point in seconds
@ -406,7 +413,8 @@ class InfoExtractor:
'unlisted' or 'public'. Use 'InfoExtractor._availability' 'unlisted' or 'public'. Use 'InfoExtractor._availability'
to set it to set it
media_type: The type of media as classified by the site, e.g. "episode", "clip", "trailer" media_type: The type of media as classified by the site, e.g. "episode", "clip", "trailer"
_old_archive_ids: A list of old archive ids needed for backward compatibility _old_archive_ids: A list of old archive ids needed for backward
compatibility. Use yt_dlp.utils.make_archive_id to generate ids
_format_sort_fields: A list of fields to use for sorting formats _format_sort_fields: A list of fields to use for sorting formats
__post_extractor: A function to be called just before the metadata is __post_extractor: A function to be called just before the metadata is
written to either disk, logger or console. The function written to either disk, logger or console. The function
@ -884,26 +892,17 @@ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fa
extensions = {} extensions = {}
if impersonate in (True, ''): available_target, requested_targets = self._downloader._parse_impersonate_targets(impersonate)
impersonate = ImpersonateTarget()
requested_targets = [
t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t)
for t in variadic(impersonate)
] if impersonate else []
available_target = next(filter(self._downloader._impersonate_target_available, requested_targets), None)
if available_target: if available_target:
extensions['impersonate'] = available_target extensions['impersonate'] = available_target
elif requested_targets: elif requested_targets:
message = 'The extractor is attempting impersonation, but ' msg = 'The extractor is attempting impersonation'
message += (
'no impersonate target is available' if not str(impersonate)
else f'none of these impersonate targets are available: "{", ".join(map(str, requested_targets))}"')
info_msg = ('see https://github.com/yt-dlp/yt-dlp#impersonation '
'for information on installing the required dependencies')
if require_impersonation: if require_impersonation:
raise ExtractorError(f'{message}; {info_msg}', expected=True) raise ExtractorError(
self.report_warning(f'{message}; if you encounter errors, then {info_msg}', only_once=True) self._downloader._unavailable_targets_message(requested_targets, note=msg, is_error=True),
expected=True)
self.report_warning(
self._downloader._unavailable_targets_message(requested_targets, note=msg), only_once=True)
try: try:
return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query, extensions)) return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query, extensions))
@ -1783,6 +1782,59 @@ def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAU
r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data', r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
video_id, end_pattern='</script>', fatal=fatal, default=default, **kw) video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)
def _search_nextjs_v13_data(self, webpage, video_id, fatal=True):
"""Parses Next.js app router flight data that was introduced in Next.js v13"""
nextjs_data = {}
if not fatal and not isinstance(webpage, str):
return nextjs_data
def flatten(flight_data):
if not isinstance(flight_data, list):
return
if len(flight_data) == 4 and flight_data[0] == '$':
_, name, _, data = flight_data
if not isinstance(data, dict):
return
children = data.pop('children', None)
if data and isinstance(name, str) and re.fullmatch(r'\$L[0-9a-f]+', name):
# It is useful hydration JSON data
nextjs_data[name[2:]] = data
flatten(children)
return
for f in flight_data:
flatten(f)
flight_text = ''
# The pattern for the surrounding JS/tag should be strict as it's a hardcoded string in the next.js source
# Ref: https://github.com/vercel/next.js/blob/5a4a08fdc/packages/next/src/server/app-render/use-flight-response.tsx#L189
for flight_segment in re.findall(r'<script\b[^>]*>self\.__next_f\.push\((\[.+?\])\)</script>', webpage):
segment = self._parse_json(flight_segment, video_id, fatal=fatal, errnote=None if fatal else False)
# Some earlier versions of next.js "optimized" away this array structure; this is unsupported
# Ref: https://github.com/vercel/next.js/commit/0123a9d5c9a9a77a86f135b7ae30b46ca986d761
if not isinstance(segment, list) or len(segment) != 2:
self.write_debug(
f'{video_id}: Unsupported next.js flight data structure detected', only_once=True)
continue
# Only use the relevant payload type (1 == data)
# Ref: https://github.com/vercel/next.js/blob/5a4a08fdc/packages/next/src/server/app-render/use-flight-response.tsx#L11-L14
payload_type, chunk = segment
if payload_type == 1:
flight_text += chunk
for f in flight_text.splitlines():
prefix, _, body = f.lstrip().partition(':')
if not re.fullmatch(r'[0-9a-f]+', prefix):
continue
# The body still isn't guaranteed to be valid JSON, so parsing should always be non-fatal
if body.startswith('[') and body.endswith(']'):
flatten(self._parse_json(body, video_id, fatal=False, errnote=False))
elif body.startswith('{') and body.endswith('}'):
data = self._parse_json(body, video_id, fatal=False, errnote=False)
if data is not None:
nextjs_data[prefix] = data
return nextjs_data
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)): def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function""" """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
rectx = re.escape(context_name) rectx = re.escape(context_name)

View File

@ -96,6 +96,24 @@ class CondeNastIE(InfoExtractor):
'upload_date': '20150916', 'upload_date': '20150916',
'timestamp': 1442434920, 'timestamp': 1442434920,
}, },
}, {
# FIXME: Subtitles
'url': 'https://www.vanityfair.com/video/watch/vf-quiz-show-squid-game-s3',
'info_dict': {
'id': '6862f999c1afbc5ff06b4803',
'ext': 'mp4',
'title': '\'Squid Game\' Cast Tests How Well They Know Each Other',
'categories': ['Arts & Culture', 'Hollywood'],
'description': 'md5:7a9c668a1fc87648e77da13842ec1534',
'duration': 955,
'season': 'Season 1',
'series': 'Quizzing Each Other',
'tags': 'count:2',
'thumbnail': r're:https?://dwgyu36up6iuz\.cloudfront\.net/.+\.jpg',
'timestamp': 1751341306,
'upload_date': '20250701',
'uploader': 'vanityfair',
},
}, { }, {
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player', 'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
'only_matching': True, 'only_matching': True,

View File

@ -8,7 +8,6 @@
class CrooksAndLiarsIE(InfoExtractor): class CrooksAndLiarsIE(InfoExtractor):
_VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P<id>[A-Za-z0-9]+)' _VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P<id>[A-Za-z0-9]+)'
_EMBED_REGEX = [r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1'] _EMBED_REGEX = [r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1']
_TESTS = [{ _TESTS = [{
'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi', 'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi',
'info_dict': { 'info_dict': {
@ -16,7 +15,7 @@ class CrooksAndLiarsIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!', 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!',
'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:https?://crooksandliars\.com/files/.+',
'timestamp': 1428207000, 'timestamp': 1428207000,
'upload_date': '20150405', 'upload_date': '20150405',
'uploader': 'Heather', 'uploader': 'Heather',
@ -26,6 +25,20 @@ class CrooksAndLiarsIE(InfoExtractor):
'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA', 'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
'url': 'https://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
'info_dict': {
'id': '8RUoRhRi',
'ext': 'mp4',
'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!',
'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
'duration': 236,
'thumbnail': r're:https?://crooksandliars\.com/files/.+',
'timestamp': 1428207000,
'upload_date': '20150405',
'uploader': 'Heather',
},
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

View File

@ -1,49 +0,0 @@
from .common import InfoExtractor
class CTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P<id>(?:show|movie)s/[^/]+/[^/?#&]+)'
_TESTS = [{
'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88',
'info_dict': {
'id': '2102249',
'ext': 'flv',
'title': 'Wednesday, December 23, 2020',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.',
'timestamp': 1608732000,
'upload_date': '20201223',
'series': 'Your Morning',
'season': '2020-2021',
'season_number': 5,
'episode_number': 88,
'tags': ['Your Morning'],
'categories': ['Talk Show'],
'duration': 7467.126,
},
}, {
'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
content = self._download_json(
'https://www.ctv.ca/space-graphql/graphql', display_id, query={
'query': '''{
resolvedPath(path: "/%s") {
lastSegment {
content {
... on AxisContent {
axisId
videoPlayerDestCode
}
}
}
}
}''' % display_id, # noqa: UP031
})['data']['resolvedPath']['lastSegment']['content']
video_id = content['axisId']
return self.url_result(
'9c9media:{}:{}'.format(content['videoPlayerDestCode'], video_id),
'NineCNineMedia', video_id)

View File

@ -19,11 +19,22 @@ class DailyMailIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'', 'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'',
'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84', 'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84',
'thumbnail': r're:https?://i\.dailymail\.co\.uk/.+\.jpg',
}, },
}, { }, {
'url': 'http://www.dailymail.co.uk/embed/video/1295863.html', 'url': 'http://www.dailymail.co.uk/embed/video/1295863.html',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
'url': 'https://www.daily-news.gr/lifestyle/%ce%b7-%cf%84%cf%81%ce%b1%ce%b3%ce%bf%cf%85%ce%b4%ce%af%cf%83%cf%84%cf%81%ce%b9%ce%b1-jessie-j-%ce%bc%ce%bf%ce%b9%cf%81%ce%ac%cf%83%cf%84%ce%b7%ce%ba%ce%b5-%cf%83%cf%85%ce%b3%ce%ba%ce%bb%ce%bf%ce%bd/',
'info_dict': {
'id': '3463585',
'ext': 'mp4',
'title': 'Jessie J reveals she has undergone surgery as she shares clips',
'description': 'md5:9fa9a25feca5b656b0b4a39c922fad1e',
'thumbnail': r're:https?://i\.dailymail\.co\.uk/.+\.jpg',
},
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

View File

@ -119,13 +119,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
_EMBED_REGEX = [rf'(?ix)<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)["\'](?P<url>{_VALID_URL[5:]})'] _EMBED_REGEX = [rf'(?ix)<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)["\'](?P<url>{_VALID_URL[5:]})']
_TESTS = [{ _TESTS = [{
'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news', 'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
'md5': '074b95bdee76b9e3654137aee9c79dfe',
'info_dict': { 'info_dict': {
'id': 'x5kesuj', 'id': 'x5kesuj',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Office Christmas Party Review Jason Bateman, Olivia Munn, T.J. Miller', 'title': 'Office Christmas Party Review Jason Bateman, Olivia Munn, T.J. Miller',
'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller', 'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
'duration': 187, 'duration': 187,
'tags': 'count:5',
'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+',
'timestamp': 1493651285, 'timestamp': 1493651285,
'upload_date': '20170501', 'upload_date': '20170501',
'uploader': 'Deadline', 'uploader': 'Deadline',
@ -133,18 +134,17 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'age_limit': 0, 'age_limit': 0,
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'],
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1cmt4ZcZ9KiM/x1080',
}, },
}, { }, {
'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true', 'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true',
'md5': 'e2f9717c6604773f963f069ca53a07f8',
'info_dict': { 'info_dict': {
'id': 'x89eyek', 'id': 'x89eyek',
'ext': 'mp4', 'ext': 'mp4',
'title': "En quête d'esprit du 27/03/2022", 'title': 'En quête d\'esprit du 27/03/2022',
'description': 'md5:66542b9f4df2eb23f314fc097488e553', 'description': 'md5:66542b9f4df2eb23f314fc097488e553',
'duration': 2756, 'duration': 2756,
'tags': 'count:1',
'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+',
'timestamp': 1648383669, 'timestamp': 1648383669,
'upload_date': '20220327', 'upload_date': '20220327',
'uploader': 'CNEWS', 'uploader': 'CNEWS',
@ -152,8 +152,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'age_limit': 0, 'age_limit': 0,
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'tags': ['en_quete_d_esprit'],
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1clTH6StrxMP/x1080',
}, },
}, { }, {
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames', 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
@ -163,8 +161,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News', 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
'description': 'Several come bundled with the Steam Controller.', 'description': 'Several come bundled with the Steam Controller.',
'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
'duration': 74, 'duration': 74,
'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+',
'timestamp': 1425657362, 'timestamp': 1425657362,
'upload_date': '20150306', 'upload_date': '20150306',
'uploader': 'IGN', 'uploader': 'IGN',
@ -183,10 +181,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'uploader': 'Katy Perry', 'uploader': 'Katy Perry',
'upload_date': '20130905', 'upload_date': '20130905',
}, },
'params': { 'skip': 'Invalid URL',
'skip_download': True,
},
'skip': 'VEVO is only available in some countries',
}, { }, {
# age-restricted video # age-restricted video
'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband', 'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
@ -259,9 +254,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'uploader_id': 'x2vtgmm', 'uploader_id': 'x2vtgmm',
'age_limit': 0, 'age_limit': 0,
'tags': [], 'tags': [],
'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'thumbnail': r're:https://\w+.dmcdn.net/v/WnEY61cmvMxt2Fi6d/x1080',
}, },
}, { }, {
# https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj # https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj
@ -276,18 +271,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'info_dict': { 'info_dict': {
'id': 'x8u4owg', 'id': 'x8u4owg',
'ext': 'mp4', 'ext': 'mp4',
'description': 'À bord du « véloto », lalternative à la voiture pour la campagne',
'like_count': int, 'like_count': int,
'uploader': 'Le Parisien', 'uploader': 'Le Parisien',
'thumbnail': 'https://www.leparisien.fr/resizer/ho_GwveeYftNkLwg_cEta--5Bv4=/1200x675/cloudfront-eu-central-1.images.arcpublishing.com/leparisien/BFXJNEBN75EUNHGYJLORUC3TX4.jpg',
'upload_date': '20240309', 'upload_date': '20240309',
'view_count': int, 'view_count': int,
'tags': 'count:7',
'thumbnail': r're:https?://www\.leparisien\.fr/.+\.jpg',
'timestamp': 1709997866, 'timestamp': 1709997866,
'age_limit': 0, 'age_limit': 0,
'uploader_id': 'x32f7b', 'uploader_id': 'x32f7b',
'title': 'VIDÉO. Le «\xa0véloto\xa0», la voiture à pédales qui aimerait se faire une place sur les routes', 'title': 'VIDÉO. Le «\xa0véloto\xa0», la voiture à pédales qui aimerait se faire une place sur les routes',
'duration': 428.0, 'duration': 428.0,
'description': 'À bord du « véloto », lalternative à la voiture pour la campagne',
'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'],
}, },
}, { }, {
# https://geo.dailymotion.com/player/xry80.html?video=x8vu47w # https://geo.dailymotion.com/player/xry80.html?video=x8vu47w
@ -297,9 +292,9 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'like_count': int, 'like_count': int,
'uploader': 'Metatube', 'uploader': 'Metatube',
'thumbnail': r're:https://\w+.dmcdn.net/v/W1G_S1coGSFTfkTeR/x1080',
'upload_date': '20240326', 'upload_date': '20240326',
'view_count': int, 'view_count': int,
'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+',
'timestamp': 1711496732, 'timestamp': 1711496732,
'age_limit': 0, 'age_limit': 0,
'uploader_id': 'x2xpy74', 'uploader_id': 'x2xpy74',
@ -308,6 +303,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'description': 'Que lindura', 'description': 'Que lindura',
'tags': [], 'tags': [],
}, },
'skip': 'Invalid URL',
}, { }, {
# //geo.dailymotion.com/player/xysxq.html?video=k2Y4Mjp7krAF9iCuINM # //geo.dailymotion.com/player/xysxq.html?video=k2Y4Mjp7krAF9iCuINM
'url': 'https://lcp.fr/programmes/avant-la-catastrophe-la-naissance-de-la-dictature-nazie-1933-1936-346819', 'url': 'https://lcp.fr/programmes/avant-la-catastrophe-la-naissance-de-la-dictature-nazie-1933-1936-346819',
@ -322,11 +318,30 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'like_count': int, 'like_count': int,
'age_limit': 0, 'age_limit': 0,
'duration': 3220, 'duration': 3220,
'thumbnail': 'https://s1.dmcdn.net/v/Xvumk1djJBUZfjj2a/x1080',
'tags': [], 'tags': [],
'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+',
'timestamp': 1739919947, 'timestamp': 1739919947,
'upload_date': '20250218', 'upload_date': '20250218',
}, },
'skip': 'Invalid URL',
}, {
'url': 'https://forum.ionicframework.com/t/ionic-2-jw-player-dailymotion-player/83248',
'info_dict': {
'id': 'xwr14q',
'ext': 'mp4',
'title': 'Macklemore & Ryan Lewis - Thrift Shop (feat. Wanz)',
'age_limit': 0,
'description': 'md5:47fbe168b5a6ddc4a205e20dd6c841b2',
'duration': 234,
'like_count': int,
'tags': 'count:5',
'thumbnail': r're:https?://s[12]\.dmcdn\.net/v/.+',
'timestamp': 1358177670,
'upload_date': '20130114',
'uploader': 'Macklemore Official',
'uploader_id': 'x19qlwr',
'view_count': int,
},
}] }]
_GEO_BYPASS = False _GEO_BYPASS = False
_COMMON_MEDIA_FIELDS = '''description _COMMON_MEDIA_FIELDS = '''description
@ -540,7 +555,7 @@ class DailymotionSearchIE(DailymotionPlaylistBaseIE):
'id': 'king of turtles', 'id': 'king of turtles',
'title': 'king of turtles', 'title': 'king of turtles',
}, },
'playlist_mincount': 90, 'playlist_mincount': 0,
}] }]
_SEARCH_QUERY = 'query SEARCH_QUERY( $query: String! $page: Int $limit: Int ) { search { videos( query: $query first: $limit page: $page ) { edges { node { xid } } } } } ' _SEARCH_QUERY = 'query SEARCH_QUERY( $query: String! $page: Int $limit: Int ) { search { videos( query: $query first: $limit page: $page ) { edges { node { xid } } } } } '
@ -584,7 +599,7 @@ class DailymotionUserIE(DailymotionPlaylistBaseIE):
'info_dict': { 'info_dict': {
'id': 'nqtv', 'id': 'nqtv',
}, },
'playlist_mincount': 152, 'playlist_mincount': 148,
}, { }, {
'url': 'http://www.dailymotion.com/user/UnderProject', 'url': 'http://www.dailymotion.com/user/UnderProject',
'info_dict': { 'info_dict': {

View File

@ -11,8 +11,14 @@
class DangalPlayBaseIE(InfoExtractor): class DangalPlayBaseIE(InfoExtractor):
_NETRC_MACHINE = 'dangalplay' _NETRC_MACHINE = 'dangalplay'
_REGION = 'IN'
_OTV_USER_ID = None _OTV_USER_ID = None
_LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage' _LOGIN_HINT = (
'Pass credentials as -u "token" -p "USER_ID" '
'(where USER_ID is the value of "otv_user_id" in your browser local storage). '
'Your login region can be optionally suffixed to the username as @REGION '
'(where REGION is the two-letter "region" code found in your browser local storage), '
'e.g.: -u "token@IN" -p "USER_ID"')
_API_BASE = 'https://ottapi.dangalplay.com' _API_BASE = 'https://ottapi.dangalplay.com'
_AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js _AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js
_SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above _SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above
@ -20,8 +26,12 @@ class DangalPlayBaseIE(InfoExtractor):
def _perform_login(self, username, password): def _perform_login(self, username, password):
if self._OTV_USER_ID: if self._OTV_USER_ID:
return return
if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password): mobj = re.fullmatch(r'token(?:@(?P<region>[A-Z]{2}))?', username)
if not mobj or not re.fullmatch(r'[\da-f]{32}', password):
raise ExtractorError(self._LOGIN_HINT, expected=True) raise ExtractorError(self._LOGIN_HINT, expected=True)
if region := mobj.group('region'):
self._REGION = region
self.write_debug(f'Setting login region to "{self._REGION}"')
self._OTV_USER_ID = password self._OTV_USER_ID = password
def _real_initialize(self): def _real_initialize(self):
@ -52,7 +62,7 @@ def _call_api(self, path, display_id, note='Downloading JSON metadata', fatal=Tr
f'{self._API_BASE}/{path}', display_id, note, fatal=fatal, f'{self._API_BASE}/{path}', display_id, note, fatal=fatal,
headers={'Accept': 'application/json'}, query={ headers={'Accept': 'application/json'}, query={
'auth_token': self._AUTH_TOKEN, 'auth_token': self._AUTH_TOKEN,
'region': 'IN', 'region': self._REGION,
**query, **query,
}) })
@ -106,7 +116,7 @@ def _generate_api_data(self, data):
'catalog_id': catalog_id, 'catalog_id': catalog_id,
'content_id': content_id, 'content_id': content_id,
'category': '', 'category': '',
'region': 'IN', 'region': self._REGION,
'auth_token': self._AUTH_TOKEN, 'auth_token': self._AUTH_TOKEN,
'id': self._OTV_USER_ID, 'id': self._OTV_USER_ID,
'md5': hashlib.md5(unhashed.encode()).hexdigest(), 'md5': hashlib.md5(unhashed.encode()).hexdigest(),
@ -129,11 +139,14 @@ def _real_extract(self, url):
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 422: if isinstance(e.cause, HTTPError) and e.cause.status == 422:
error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {} error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {}
if error_info.get('code') == '1016': error_code = error_info.get('code')
if error_code == '1016':
self.raise_login_required( self.raise_login_required(
f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None) f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None)
elif msg := error_info.get('message'): elif error_code == '4028':
raise ExtractorError(msg) self.raise_login_required(
f'Your login region is unspecified or incorrect. {self._LOGIN_HINT}', method=None)
raise ExtractorError(join_nonempty(error_code, error_info.get('message'), delim=': '))
raise raise
m3u8_url = traverse_obj(details, ( m3u8_url = traverse_obj(details, (

View File

@ -12,13 +12,13 @@ class DBTVIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen', 'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f', 'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f',
'thumbnail': r're:https?://.*\.jpg', 'thumbnail': r're:https?://.+\.jpg',
'upload_date': '20160916', 'upload_date': '20160916',
'duration': 69, 'duration': 69,
'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ', 'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ',
'uploader': 'Dagbladet', 'uploader': 'Dagbladet',
}, },
'add_ie': ['Youtube'], 'skip': 'Invalid URL',
}, { }, {
'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false', 'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false',
'only_matching': True, 'only_matching': True,
@ -26,6 +26,20 @@ class DBTVIE(InfoExtractor):
'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw', 'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
# FIXME: Embed detection
'url': 'https://www.dagbladet.no/nyheter/rekordstort-russisk-angrep/83325693',
'info_dict': {
'id': '1HW7fYry',
'ext': 'mp4',
'title': 'Putin taler - så skjer dette',
'description': 'md5:3e8bacee33de861a9663d9a3fcc54e5e',
'display_id': 'putin-taler-sa-skjer-dette',
'thumbnail': r're:https?://cdn\.jwplayer\.com/v2/media/.+',
'timestamp': 1751043600,
'upload_date': '20250627',
},
}]
def _real_extract(self, url): def _real_extract(self, url):
display_id, video_id = self._match_valid_url(url).groups() display_id, video_id = self._match_valid_url(url).groups()

View File

@ -1,215 +0,0 @@
import functools
import re
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
smuggle_url,
unsmuggle_url,
url_or_none,
)
class EaglePlatformIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
eagleplatform:(?P<custom_host>[^/]+):|
https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
)
(?P<id>\d+)
'''
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1']
_TESTS = [{
# http://lenta.ru/news/2015/03/06/navalny/
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'info_dict': {
'id': '227304',
'ext': 'mp4',
'title': 'Навальный вышел на свободу',
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 87,
'view_count': int,
'age_limit': 0,
},
}, {
# http://muz-tv.ru/play/7129/
# http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
'url': 'eagleplatform:media.clipyou.ru:12820',
'md5': '358597369cf8ba56675c1df15e7af624',
'info_dict': {
'id': '12820',
'ext': 'mp4',
'title': "'O Sole Mio",
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 216,
'view_count': int,
},
'skip': 'Georestricted',
}, {
# referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/)
'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306',
'only_matching': True,
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
add_referer = functools.partial(smuggle_url, data={'referrer': url})
res = tuple(super()._extract_embed_urls(url, webpage))
if res:
return map(add_referer, res)
PLAYER_JS_RE = r'''
<script[^>]+
src=(?P<qjs>["\'])(?:https?:)?//(?P<host>(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs)
.+?
'''
# "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/)
mobj = re.search(
rf'''(?xs)
{PLAYER_JS_RE}
<div[^>]+
class=(?P<qclass>["\'])eagleplayer(?P=qclass)[^>]+
data-id=["\'](?P<id>\d+)
''', webpage)
if mobj is not None:
return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))]
# Generalization of "Javascript code usage", "Combined usage" and
# "Usage without attaching to DOM" embeddings (see
# http://dultonmedia.github.io/eplayer/)
mobj = re.search(
r'''(?xs)
%s
<script>
.+?
new\s+EaglePlayer\(
(?:[^,]+\s*,\s*)?
{
.+?
\bid\s*:\s*["\']?(?P<id>\d+)
.+?
}
\s*\)
.+?
</script>
''' % PLAYER_JS_RE, webpage) # noqa: UP031
if mobj is not None:
return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))]
@staticmethod
def _handle_error(response):
status = int_or_none(response.get('status', 200))
if status != 200:
raise ExtractorError(' '.join(response['errors']), expected=True)
def _download_json(self, url_or_request, video_id, *args, **kwargs):
try:
response = super()._download_json(
url_or_request, video_id, *args, **kwargs)
except ExtractorError as ee:
if isinstance(ee.cause, HTTPError):
response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id)
self._handle_error(response)
raise
return response
def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'):
return self._download_json(url_or_request, video_id, note)['data'][0]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = self._match_valid_url(url)
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
headers = {}
query = {
'id': video_id,
}
referrer = smuggled_data.get('referrer')
if referrer:
headers['Referer'] = referrer
query['referrer'] = referrer
player_data = self._download_json(
f'http://{host}/api/player_data', video_id,
headers=headers, query=query)
media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
title = media['title']
description = media.get('description')
thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:')
duration = int_or_none(media.get('duration'))
view_count = int_or_none(media.get('views'))
age_restriction = media.get('age_restriction')
age_limit = None
if age_restriction:
age_limit = 0 if age_restriction == 'allow_all' else 18
secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:')
formats = []
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
m3u8_formats_dict = {}
for f in m3u8_formats:
if f.get('height') is not None:
m3u8_formats_dict[f['height']] = f
mp4_data = self._download_json(
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
# http://lentaru.media.eagleplatform.com/player/player.js
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8),
video_id, 'Downloading mp4 JSON', fatal=False)
if mp4_data:
for format_id, format_url in mp4_data.get('data', {}).items():
if not url_or_none(format_url):
continue
height = int_or_none(format_id)
if height is not None and m3u8_formats_dict.get(height):
f = m3u8_formats_dict[height].copy()
f.update({
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
else:
f = {
'format_id': f'http-{format_id}',
'height': int_or_none(format_id),
}
f['url'] = format_url
formats.append(f)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'age_limit': age_limit,
'formats': formats,
}
class ClipYouEmbedIE(InfoExtractor):
_VALID_URL = False
@classmethod
def _extract_embed_urls(cls, url, webpage):
mobj = re.search(
r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
if mobj is not None:
yield smuggle_url('eagleplatform:{host}:{id}'.format(**mobj.groupdict()), {'referrer': url})

View File

@ -64,14 +64,12 @@ class ERTFlixCodenameIE(ERTFlixBaseIE):
_VALID_URL = r'ertflix:(?P<id>[\w-]+)' _VALID_URL = r'ertflix:(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'ertflix:monogramma-praxitelis-tzanoylinos', 'url': 'ertflix:monogramma-praxitelis-tzanoylinos',
'md5': '5b9c2cd171f09126167e4082fc1dd0ef',
'info_dict': { 'info_dict': {
'id': 'monogramma-praxitelis-tzanoylinos', 'id': 'monogramma-praxitelis-tzanoylinos',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:ef0b439902963d56c43ac83c3f41dd0e', 'title': 'monogramma-praxitelis-tzanoylinos',
}, },
}, }]
]
def _extract_formats_and_subs(self, video_id): def _extract_formats_and_subs(self, video_id):
media_info = self._call_api(video_id, codename=video_id) media_info = self._call_api(video_id, codename=video_id)
@ -131,13 +129,14 @@ class ERTFlixIE(ERTFlixBaseIE):
'duration': 3166, 'duration': 3166,
'age_limit': 8, 'age_limit': 8,
}, },
'skip': 'Invalid URL',
}, { }, {
'url': 'https://www.ertflix.gr/series/ser.3448-monogramma', 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma',
'info_dict': { 'info_dict': {
'id': 'ser.3448', 'id': 'ser.3448',
'age_limit': 8, 'age_limit': 8,
'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.', 'title': 'Monogramma',
'title': 'Μονόγραμμα', 'description': 'md5:e30cc640e6463da87f210a8ed10b2439',
}, },
'playlist_mincount': 64, 'playlist_mincount': 64,
}, { }, {
@ -145,28 +144,28 @@ class ERTFlixIE(ERTFlixBaseIE):
'info_dict': { 'info_dict': {
'id': 'ser.3448', 'id': 'ser.3448',
'age_limit': 8, 'age_limit': 8,
'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.', 'title': 'Monogramma',
'title': 'Μονόγραμμα', 'description': 'md5:e30cc640e6463da87f210a8ed10b2439',
}, },
'playlist_count': 22, 'playlist_mincount': 66,
}, { }, {
'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1&season=2021%20-%202022', 'url': 'https://www.ertflix.gr/series/ser.3448-monogramma?season=1&season=2021%20-%202022',
'info_dict': { 'info_dict': {
'id': 'ser.3448', 'id': 'ser.3448',
'age_limit': 8, 'age_limit': 8,
'description': 'Η εκπομπή σαράντα ετών που σημάδεψε τον πολιτισμό μας.', 'title': 'Monogramma',
'title': 'Μονόγραμμα', 'description': 'md5:e30cc640e6463da87f210a8ed10b2439',
}, },
'playlist_mincount': 36, 'playlist_mincount': 25,
}, { }, {
'url': 'https://www.ertflix.gr/series/ser.164991-to-diktuo-1?season=1-9', 'url': 'https://www.ertflix.gr/series/ser.164991-to-diktuo-1?season=1-9',
'info_dict': { 'info_dict': {
'id': 'ser.164991', 'id': 'ser.164991',
'age_limit': 8, 'age_limit': 8,
'description': 'Η πρώτη ελληνική εκπομπή με θεματολογία αποκλειστικά γύρω από το ίντερνετ.', 'title': 'The Network',
'title': 'Το δίκτυο', 'description': 'The first Greek show featuring topics exclusively around the internet.',
}, },
'playlist_mincount': 9, 'playlist_mincount': 0,
}, { }, {
'url': 'https://www.ertflix.gr/en/vod/vod.127652-ta-kalytera-mas-chronia-ep1-mia-volta-sto-feggari', 'url': 'https://www.ertflix.gr/en/vod/vod.127652-ta-kalytera-mas-chronia-ep1-mia-volta-sto-feggari',
'only_matching': True, 'only_matching': True,
@ -282,6 +281,16 @@ class ERTWebtvEmbedIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg', 'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg',
}, },
'skip': 'Invalid URL',
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.ertnews.gr/video/manolis-goyalles-o-anthropos-piso-apo-ti-diadiktyaki-vasilopita/',
'info_dict': {
'id': '2022/tv/news-themata-ianouarios/20220114-apotis6-gouales-pita.mp4',
'ext': 'mp4',
'title': 'VOD - 2022/tv/news-themata-ianouarios/20220114-apotis6-gouales-pita.mp4',
'thumbnail': r're:https?://www\.ert\.gr/themata/photos/.+\.jpg',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -81,13 +81,14 @@ class FacebookIE(InfoExtractor):
'description': 'md5:34675bda53336b1d16400265c2bb9b3b', 'description': 'md5:34675bda53336b1d16400265c2bb9b3b',
'uploader': 'RADIO KICKS FM', 'uploader': 'RADIO KICKS FM',
'upload_date': '20230818', 'upload_date': '20230818',
'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
'timestamp': 1692346159, 'timestamp': 1692346159,
'thumbnail': r're:^https?://.*',
'uploader_id': '100063551323670', 'uploader_id': '100063551323670',
'duration': 3133.583, 'duration': 3133.583,
'view_count': int, 'view_count': int,
'concurrent_view_count': 0, 'concurrent_view_count': 0,
}, },
'expected_warnings': ['Cannot parse data'],
}, { }, {
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf', 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
'md5': '6a40d33c0eccbb1af76cf0485a052659', 'md5': '6a40d33c0eccbb1af76cf0485a052659',
@ -106,17 +107,18 @@ class FacebookIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '274175099429670', 'id': '274175099429670',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Asif', 'title': '119 reactions · 1.4K shares | Asif Nawab Butt on Reels',
'description': '', 'description': '',
'uploader': 'Asif Nawab Butt', 'uploader': 'Asif Nawab Butt',
'upload_date': '20140506', 'upload_date': '20140506',
'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
'timestamp': 1399398998, 'timestamp': 1399398998,
'thumbnail': r're:^https?://.*', 'uploader_id': 'pfbid028xue38TBXRyNbiqBSV2LFs3QK3yopvKjupbqFoL6U9SKbx4p2SMdJjQSBvnjsHGWl',
'uploader_id': 'pfbid05AzrFTXgY37tqwaSgbFTTEpCLBjjEJHkigogwGiRPtKEpAsJYJpzE94H1RxYXWEtl',
'duration': 131.03, 'duration': 131.03,
'concurrent_view_count': int, 'concurrent_view_count': int,
'view_count': int, 'view_count': int,
}, },
'expected_warnings': ['Cannot parse data'],
}, { }, {
'note': 'Video with DASH manifest', 'note': 'Video with DASH manifest',
'url': 'https://www.facebook.com/video.php?v=957955867617029', 'url': 'https://www.facebook.com/video.php?v=957955867617029',
@ -158,7 +160,7 @@ class FacebookIE(InfoExtractor):
'id': '10153664894881749', 'id': '10153664894881749',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...', 'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
'timestamp': 1456259628, 'timestamp': 1456259628,
'upload_date': '20160223', 'upload_date': '20160223',
'uploader': 'Barack Obama', 'uploader': 'Barack Obama',
@ -168,7 +170,7 @@ class FacebookIE(InfoExtractor):
# have 1080P, but only up to 720p in swf params # have 1080P, but only up to 720p in swf params
# data.video.story.attachments[].media # data.video.story.attachments[].media
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/', 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
'md5': '1659aa21fb3dd1585874f668e81a72c8', 'md5': '70b82ebf5f0e9b91b2a49d3db3563611',
'info_dict': { 'info_dict': {
'id': '10155529876156509', 'id': '10155529876156509',
'ext': 'mp4', 'ext': 'mp4',
@ -177,7 +179,7 @@ class FacebookIE(InfoExtractor):
'timestamp': 1477818095, 'timestamp': 1477818095,
'upload_date': '20161030', 'upload_date': '20161030',
'uploader': 'CNN', 'uploader': 'CNN',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
'view_count': int, 'view_count': int,
'uploader_id': '100059479812265', 'uploader_id': '100059479812265',
'concurrent_view_count': int, 'concurrent_view_count': int,
@ -198,13 +200,11 @@ class FacebookIE(InfoExtractor):
'uploader': 'Yaroslav Korpan', 'uploader': 'Yaroslav Korpan',
'uploader_id': 'pfbid06AScABAWcW91qpiuGrLt99Ef9tvwHoXP6t8KeFYEqkSfreMtfa9nTveh8b2ZEVSWl', 'uploader_id': 'pfbid06AScABAWcW91qpiuGrLt99Ef9tvwHoXP6t8KeFYEqkSfreMtfa9nTveh8b2ZEVSWl',
'concurrent_view_count': int, 'concurrent_view_count': int,
'thumbnail': r're:^https?://.*', 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
'view_count': int, 'view_count': int,
'duration': 11736.446, 'duration': 11736.446,
}, },
'params': { 'skip': 'Invalid URL',
'skip_download': True,
},
}, { }, {
# FIXME: Cannot parse data error # FIXME: Cannot parse data error
'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471', 'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471',
@ -215,7 +215,7 @@ class FacebookIE(InfoExtractor):
'timestamp': 1477305000, 'timestamp': 1477305000,
'upload_date': '20161024', 'upload_date': '20161024',
'uploader': 'La Guía Del Varón', 'uploader': 'La Guía Del Varón',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
}, },
'skip': 'Requires logging in', 'skip': 'Requires logging in',
}, { }, {
@ -244,9 +244,10 @@ class FacebookIE(InfoExtractor):
'upload_date': '20171124', 'upload_date': '20171124',
'uploader': 'Vickie Gentry', 'uploader': 'Vickie Gentry',
'uploader_id': 'pfbid0FkkycT95ySNNyfCw4Cho6u5G7WbbZEcxT496Hq8rtx1K3LcTCATpR3wnyYhmyGC5l', 'uploader_id': 'pfbid0FkkycT95ySNNyfCw4Cho6u5G7WbbZEcxT496Hq8rtx1K3LcTCATpR3wnyYhmyGC5l',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
'duration': 148.224, 'duration': 148.224,
}, },
'skip': 'Invalid URL',
}, { }, {
# data.node.comet_sections.content.story.attachments[].styles.attachment.media # data.node.comet_sections.content.story.attachments[].styles.attachment.media
'url': 'https://www.facebook.com/attn/posts/pfbid0j1Czf2gGDVqeQ8KiMLFm3pWN8GxsQmeRrVhimWDzMuKQoR8r4b1knNsejELmUgyhl', 'url': 'https://www.facebook.com/attn/posts/pfbid0j1Czf2gGDVqeQ8KiMLFm3pWN8GxsQmeRrVhimWDzMuKQoR8r4b1knNsejELmUgyhl',
@ -260,7 +261,7 @@ class FacebookIE(InfoExtractor):
'duration': 132.675, 'duration': 132.675,
'uploader_id': '100064451419378', 'uploader_id': '100064451419378',
'view_count': int, 'view_count': int,
'thumbnail': r're:^https?://.*', 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
'timestamp': 1701975646, 'timestamp': 1701975646,
}, },
}, { }, {
@ -271,9 +272,9 @@ class FacebookIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Lela Evans', 'title': 'Lela Evans',
'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...', 'description': 'Today Makkovik\'s own Pilot Mandy Smith made her inaugural landing on the airstrip in her hometown. What a proud moment as we all cheered and...',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
'uploader': 'Lela Evans', 'uploader': 'Lela Evans',
'uploader_id': 'pfbid0swT2y7t6TAsZVBvcyeYPdhTMefGaS26mzUwML3vd1ma6ndGZKxsyS4Ssu3jitZLXl', 'uploader_id': 'pfbid02wjMpknobSMnyynK3TNKN4Ww1StcpAKXgowqTyge3bz7LwHZMQ68uiXzzbu7xeryBl',
'upload_date': '20231228', 'upload_date': '20231228',
'timestamp': 1703804085, 'timestamp': 1703804085,
'duration': 394.347, 'duration': 394.347,
@ -326,28 +327,27 @@ class FacebookIE(InfoExtractor):
'uploader_id': '100066514874195', 'uploader_id': '100066514874195',
'duration': 4524.001, 'duration': 4524.001,
'view_count': int, 'view_count': int,
'thumbnail': r're:^https?://.*', 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
'concurrent_view_count': int, 'concurrent_view_count': int,
}, },
'params': { 'params': {'skip_download': True},
'skip_download': True,
},
}, { }, {
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/', 'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/',
'info_dict': { 'info_dict': {
'id': '106560053808006', 'id': '106560053808006',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Josef', 'title': 'Josef Novak on Reels',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
'concurrent_view_count': int, 'concurrent_view_count': int,
'uploader_id': 'pfbid02gpfwRM2XvdEJfsERupwQiNmBiDArc38RMRYZnap372q6Vs7MtFTVy72mmFWpJBTKl', 'uploader_id': 'pfbid0cjYJYXpePWqhZ9DgpB6gKXrN2q3obwducdKm4wT7K5nkhbfKg5cneocYbsdaji7fl',
'timestamp': 1549275572, 'timestamp': 1549275572,
'duration': 3.283, 'duration': 3.283,
'uploader': 'Josef Novak', 'uploader': 'Josef Novak',
'description': '', 'description': '',
'upload_date': '20190204', 'upload_date': '20190204',
}, },
'expected_warnings': ['Cannot parse data'],
}, { }, {
# data.video.story.attachments[].media # data.video.story.attachments[].media
'url': 'https://www.facebook.com/watch/?v=647537299265662', 'url': 'https://www.facebook.com/watch/?v=647537299265662',
@ -406,7 +406,7 @@ class FacebookIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'ANALISI IN CAMPO OSCURO " Coaguli nel sangue dei vaccinati"', 'title': 'ANALISI IN CAMPO OSCURO " Coaguli nel sangue dei vaccinati"',
'description': 'Other event by Comitato Liberi Pensatori on Tuesday, October 18 2022', 'description': 'Other event by Comitato Liberi Pensatori on Tuesday, October 18 2022',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
'uploader': 'Comitato Liberi Pensatori', 'uploader': 'Comitato Liberi Pensatori',
'uploader_id': '100065709540881', 'uploader_id': '100065709540881',
}, },
@ -414,6 +414,56 @@ class FacebookIE(InfoExtractor):
'url': 'https://www.facebook.com/groups/1513990329015294/posts/d41d8cd9/2013209885760000/?app=fbl', 'url': 'https://www.facebook.com/groups/1513990329015294/posts/d41d8cd9/2013209885760000/?app=fbl',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
# <iframe> embed
'url': 'http://www.unique-almeria.com/mini-hollywood.html',
'md5': 'cba5d8c5021e9340dcefe925255e2c3e',
'info_dict': {
'id': '1529066599879',
'ext': 'mp4',
'title': 'Facebook video #1529066599879',
},
'expected_warnings': ['unable to extract uploader'],
}, {
# FIXME: Embed detection
# <iframe> embed, plugin video
'url': 'https://www.newsmemory.com/eedition/e-publishing-solutions/2-in-one-app/',
'md5': 'ae97d4a44f8cc9a8b1a4c03b9ed793af',
'info_dict': {
'id': '10155710648695814',
'ext': 'mp4',
'title': 'Download the all new and improved Trinidad Express App',
'concurrent_view_count': int,
'description': 'md5:4806195c99908e4189b45b1c23bd4f89',
'duration': 69.408,
'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
'timestamp': 1533919195,
'upload_date': '20180810',
'uploader': 'Trinidad Express Newspapers',
'uploader_id': '100064446413648',
'view_count': int,
},
'expected_warnings': ['Cannot parse data'],
}, {
# API embed
'url': 'https://www.curs.md/ro',
'md5': '090bae53b9bff2be993c896edc2ea205',
'info_dict': {
'id': '334484292523563',
'ext': 'mp4',
'title': 'md5:9abffe1c86cdd967ffa224e1ccc13b90',
'concurrent_view_count': int,
'description': 'md5:0ba98567a61c640f9fabf1882235b33d',
'duration': 8789.891,
'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
'timestamp': 1700603114,
'upload_date': '20231121',
'uploader': 'Istoria Moldovei',
'uploader_id': '100063529778592',
'view_count': int,
},
'params': {'extractor_args': {'generic': {'impersonate': ['chrome']}}},
}]
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)' _SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
_api_config = { _api_config = {
'graphURI': '/api/graphql/', 'graphURI': '/api/graphql/',
@ -898,20 +948,24 @@ def _real_extract(self, url):
class FacebookPluginsVideoIE(InfoExtractor): class FacebookPluginsVideoIE(InfoExtractor):
_VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)' _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560', 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560',
'md5': '5954e92cdfe51fe5782ae9bda7058a07', 'md5': 'af83aeae1d595f377c6e47a450828155',
'info_dict': { 'info_dict': {
'id': '10154383743583686', 'id': '10154383743583686',
'ext': 'mp4', 'ext': 'mp4',
# TODO: Fix title, uploader
'title': 'What to do during the haze?', 'title': 'What to do during the haze?',
'uploader': 'Gov.sg', 'concurrent_view_count': int,
'upload_date': '20160826', 'description': 'md5:81839c0979803a014b20798df255ed0b',
'duration': 65.087,
'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
'timestamp': 1472184808, 'timestamp': 1472184808,
'upload_date': '20160826',
'uploader': 'gov.sg',
'uploader_id': '100064718678925',
'view_count': int,
}, },
'add_ie': [FacebookIE.ie_key()], 'expected_warnings': ['Cannot parse data'],
}, { }, {
'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104', 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104',
'only_matching': True, 'only_matching': True,
@ -945,7 +999,7 @@ class FacebookRedirectURLIE(InfoExtractor):
'tags': 'count:11', 'tags': 'count:11',
'duration': 3332, 'duration': 3332,
'live_status': 'not_live', 'live_status': 'not_live',
'thumbnail': 'https://i.ytimg.com/vi/pO8h3EaFRdo/maxresdefault.jpg', 'thumbnail': r're:https?://i\.ytimg\.com/vi/.+',
'channel_url': 'https://www.youtube.com/channel/UCGBpxWJr9FNOcFYA5GkKrMg', 'channel_url': 'https://www.youtube.com/channel/UCGBpxWJr9FNOcFYA5GkKrMg',
'availability': 'public', 'availability': 'public',
'uploader_url': 'http://www.youtube.com/user/brtvofficial', 'uploader_url': 'http://www.youtube.com/user/brtvofficial',
@ -954,8 +1008,7 @@ class FacebookRedirectURLIE(InfoExtractor):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
}, },
'add_ie': ['Youtube'], 'skip': 'Youtube video is now private',
'params': {'skip_download': 'Youtube'},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -968,22 +1021,20 @@ def _real_extract(self, url):
class FacebookReelIE(InfoExtractor): class FacebookReelIE(InfoExtractor):
_VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/reel/(?P<id>\d+)' _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/reel/(?P<id>\d+)'
IE_NAME = 'facebook:reel' IE_NAME = 'facebook:reel'
_TESTS = [{ _TESTS = [{
'url': 'https://www.facebook.com/reel/1195289147628387', 'url': 'https://www.facebook.com/reel/1195289147628387',
'md5': 'a53256d10fc2105441fe0c4212ed8cea', 'md5': 'aeb0153ecb2eaacdf2dc2bf88f593fef',
'info_dict': { 'info_dict': {
'id': '1195289147628387', 'id': '1195289147628387',
'ext': 'mp4', 'ext': 'mp4',
'title': r're:9\.6K views · 355 reactions .+ Let the “Slapathon” commence!! .+ LL COOL J · Mama Said Knock You Out$', 'title': '9.7K views · 352 reactions | When your trying to help your partner out with an arrest and #FAAFO games begin. Let the “Slapathon” commence!! 👊👋 | Beast Camp Training',
'description': r're:When your trying to help your partner .+ LL COOL J · Mama Said Knock You Out$', 'description': 'md5:5a767dc7e78718667b150a7facc4a34f',
'uploader': 'Beast Camp Training', 'uploader': '9.7K views &#xb7; 352 reactions | When your trying to help your partner out with an arrest and #FAAFO games begin. Let the &#x201c;Slapathon&#x201d; commence!! &#x1f44a;&#x1f44b; | Beast Camp Training',
'uploader_id': '100040874179269', 'uploader_id': '100040874179269',
'duration': 9.579, 'duration': 9.579,
'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
'timestamp': 1637502609, 'timestamp': 1637502609,
'upload_date': '20211121', 'upload_date': '20211121',
'thumbnail': r're:^https?://.*',
'like_count': int,
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
}, },
@ -998,7 +1049,6 @@ def _real_extract(self, url):
class FacebookAdsIE(InfoExtractor): class FacebookAdsIE(InfoExtractor):
_VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/ads/library/?\?(?:[^#]+&)?id=(?P<id>\d+)' _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/ads/library/?\?(?:[^#]+&)?id=(?P<id>\d+)'
IE_NAME = 'facebook:ads' IE_NAME = 'facebook:ads'
_TESTS = [{ _TESTS = [{
'url': 'https://www.facebook.com/ads/library/?id=899206155126718', 'url': 'https://www.facebook.com/ads/library/?id=899206155126718',
'info_dict': { 'info_dict': {
@ -1008,12 +1058,13 @@ class FacebookAdsIE(InfoExtractor):
'description': 'md5:0822724069e3aca97cbed5dabbab282e', 'description': 'md5:0822724069e3aca97cbed5dabbab282e',
'uploader': 'Kandao', 'uploader': 'Kandao',
'uploader_id': '774114102743284', 'uploader_id': '774114102743284',
'uploader_url': r're:^https?://.*', 'uploader_url': 'https://facebook.com/KandaoVR',
'timestamp': 1702548330, 'timestamp': 1702548330,
'thumbnail': r're:^https?://.*', 'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
'upload_date': '20231214', 'upload_date': '20231214',
'like_count': int, 'like_count': int,
}, },
'skip': 'Invalid URL',
}, { }, {
# key 'watermarked_video_sd_url' missing # key 'watermarked_video_sd_url' missing
'url': 'https://www.facebook.com/ads/library/?id=501152689226254', 'url': 'https://www.facebook.com/ads/library/?id=501152689226254',
@ -1024,9 +1075,9 @@ class FacebookAdsIE(InfoExtractor):
'description': 'md5:02a446ace7ff8c3c37a2892922492490', 'description': 'md5:02a446ace7ff8c3c37a2892922492490',
'uploader': 'mat.nawrocki', 'uploader': 'mat.nawrocki',
'uploader_id': '148586968341456', 'uploader_id': '148586968341456',
'uploader_url': r're:^https?://.*', 'uploader_url': 'https://www.instagram.com/_u/mat.nawrocki',
'thumbnail': r're:https?://scontent\.fitm\d-1\.fna\.fbcdn\.net/.+',
'timestamp': 1723452305, 'timestamp': 1723452305,
'thumbnail': r're:^https?://.*',
'upload_date': '20240812', 'upload_date': '20240812',
'like_count': int, 'like_count': int,
}, },
@ -1037,12 +1088,13 @@ class FacebookAdsIE(InfoExtractor):
'title': 'Jusqu\u2019\u00e0 -25% sur une s\u00e9lection de vins p\u00e9tillants italiens ', 'title': 'Jusqu\u2019\u00e0 -25% sur une s\u00e9lection de vins p\u00e9tillants italiens ',
'uploader': 'Eataly Paris Marais', 'uploader': 'Eataly Paris Marais',
'uploader_id': '2086668958314152', 'uploader_id': '2086668958314152',
'uploader_url': r're:^https?://.*', 'uploader_url': 'https://facebook.com/EatalyParisMarais',
'timestamp': 1703571529, 'timestamp': 1703571529,
'upload_date': '20231226', 'upload_date': '20231226',
'like_count': int, 'like_count': int,
}, },
'playlist_count': 3, 'playlist_count': 3,
'skip': 'Invalid URL',
}, { }, {
'url': 'https://es-la.facebook.com/ads/library/?id=901230958115569', 'url': 'https://es-la.facebook.com/ads/library/?id=901230958115569',
'only_matching': True, 'only_matching': True,

105
yt_dlp/extractor/faulio.py Normal file
View File

@ -0,0 +1,105 @@
import re
import urllib.parse
from .common import InfoExtractor
from ..utils import js_to_json, url_or_none
from ..utils.traversal import traverse_obj
class FaulioLiveIE(InfoExtractor):
_DOMAINS = (
'aloula.sba.sa',
'bahry.com',
'maraya.sba.net.ae',
'sat7plus.org',
)
_VALID_URL = fr'https?://(?:{"|".join(map(re.escape, _DOMAINS))})/(?:(?:en|ar|fa)/)?live/(?P<id>[a-zA-Z0-9-]+)'
_TESTS = [{
'url': 'https://aloula.sba.sa/live/saudiatv',
'info_dict': {
'id': 'aloula.faulio.com_saudiatv',
'title': str,
'description': str,
'ext': 'mp4',
'live_status': 'is_live',
},
'params': {
'skip_download': 'Livestream',
},
}, {
'url': 'https://bahry.com/live/1',
'info_dict': {
'id': 'bahry.faulio.com_1',
'title': str,
'description': str,
'ext': 'mp4',
'live_status': 'is_live',
},
'params': {
'skip_download': 'Livestream',
},
}, {
'url': 'https://maraya.sba.net.ae/live/1',
'info_dict': {
'id': 'maraya.faulio.com_1',
'title': str,
'description': str,
'ext': 'mp4',
'live_status': 'is_live',
},
'params': {
'skip_download': 'Livestream',
},
}, {
'url': 'https://sat7plus.org/live/pars',
'info_dict': {
'id': 'sat7.faulio.com_pars',
'title': str,
'description': str,
'ext': 'mp4',
'live_status': 'is_live',
},
'params': {
'skip_download': 'Livestream',
},
}, {
'url': 'https://sat7plus.org/fa/live/arabic',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
config_data = self._search_json(
r'window\.__NUXT__\.config=', webpage, 'config', video_id, transform_source=js_to_json)
api_base = config_data['public']['TRANSLATIONS_API_URL']
channel = traverse_obj(
self._download_json(f'{api_base}/channels', video_id),
(lambda k, v: v['url'] == video_id, any))
formats = []
subtitles = {}
if hls_url := traverse_obj(channel, ('streams', 'hls', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
hls_url, video_id, 'mp4', m3u8_id='hls', live=True, fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
if mpd_url := traverse_obj(channel, ('streams', 'mpd', {url_or_none})):
fmts, subs = self._extract_mpd_formats_and_subtitles(
mpd_url, video_id, mpd_id='dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'id': f'{urllib.parse.urlparse(api_base).hostname}_{video_id}',
**traverse_obj(channel, {
'title': ('title', {str}),
'description': ('description', {str}),
}),
'formats': formats,
'subtitles': subtitles,
'is_live': True,
}

View File

@ -22,8 +22,23 @@ class FC2IE(InfoExtractor):
'md5': 'a6ebe8ebe0396518689d963774a54eb7', 'md5': 'a6ebe8ebe0396518689d963774a54eb7',
'info_dict': { 'info_dict': {
'id': '20121103kUan1KHs', 'id': '20121103kUan1KHs',
'ext': 'flv',
'title': 'Boxing again with Puff', 'title': 'Boxing again with Puff',
'ext': 'mp4',
'thumbnail': r're:https?://.+\.jpe?g',
},
'params': {
'skip_download': 'm3u8',
},
}, {
# Direct video url
'url': 'https://video.fc2.com/content/20121209FP73fxDx',
'md5': '066bdb9b3a56a97f49cbf0d0b8a75a1f',
'info_dict': {
'id': '20121209FP73fxDx',
'title': 'Farewelling The Wiggles Live in Sydney Dec 8 2012',
'ext': 'mp4',
'thumbnail': r're:https?://.+\.jpe?g',
'description': 'Saying goodbye to the Wiggles at their Celebration Concert in Sydney, and what a concert that was!',
}, },
}, { }, {
'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/', 'url': 'http://video.fc2.com/en/content/20150125cEva0hDn/',
@ -104,7 +119,7 @@ def _real_extract(self, url):
'title': title, 'title': title,
'url': vid_url, 'url': vid_url,
'ext': 'mp4', 'ext': 'mp4',
'protocol': 'm3u8_native', 'protocol': 'm3u8_native' if vidplaylist.get('type') == 2 else 'https',
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
} }

View File

@ -1,9 +1,7 @@
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError,
float_or_none, float_or_none,
url_or_none, url_or_none,
) )
@ -58,16 +56,7 @@ class FrancaisFacileIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = urllib.parse.unquote(self._match_id(url)) display_id = urllib.parse.unquote(self._match_id(url))
webpage = self._download_webpage(url, display_id)
try: # yt-dlp's default user-agents are too old and blocked by the site
webpage = self._download_webpage(url, display_id, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
})
except ExtractorError as e:
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
raise
# Retry with impersonation if hardcoded UA is insufficient
webpage = self._download_webpage(url, display_id, impersonate=True)
data = self._search_json( data = self._search_json(
r'<script[^>]+\bdata-media-id=[^>]+\btype="application/json"[^>]*>', r'<script[^>]+\bdata-media-id=[^>]+\btype="application/json"[^>]*>',

View File

@ -1,4 +1,3 @@
import json
import re import re
import urllib.parse import urllib.parse
@ -19,7 +18,11 @@
unsmuggle_url, unsmuggle_url,
url_or_none, url_or_none,
) )
from ..utils.traversal import find_element, traverse_obj from ..utils.traversal import (
find_element,
get_first,
traverse_obj,
)
class FranceTVBaseInfoExtractor(InfoExtractor): class FranceTVBaseInfoExtractor(InfoExtractor):
@ -121,9 +124,10 @@ def _extract_video(self, video_id, hostname=None):
elif code := traverse_obj(dinfo, ('code', {int})): elif code := traverse_obj(dinfo, ('code', {int})):
if code == 2009: if code == 2009:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES) self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
elif code in (2015, 2017): elif code in (2015, 2017, 2019):
# 2015: L'accès à cette vidéo est impossible. (DRM-only) # 2015: L'accès à cette vidéo est impossible. (DRM-only)
# 2017: Cette vidéo n'est pas disponible depuis le site web mobile (b/c DRM) # 2017: Cette vidéo n'est pas disponible depuis le site web mobile (b/c DRM)
# 2019: L'accès à cette vidéo est incompatible avec votre configuration. (DRM-only)
drm_formats = True drm_formats = True
continue continue
self.report_warning( self.report_warning(
@ -258,7 +262,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html', 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
'info_dict': { 'info_dict': {
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', # old: c5bda21d-2c6f-4470-8849-3d8327adb2ba' 'id': 'b2cf9fd8-e971-4757-8651-848f2772df61', # old: ec217ecc-0733-48cf-ac06-af1347b849d1
'ext': 'mp4', 'ext': 'mp4',
'title': '13h15, le dimanche... - Les mystères de Jésus', 'title': '13h15, le dimanche... - Les mystères de Jésus',
'timestamp': 1502623500, 'timestamp': 1502623500,
@ -269,7 +273,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'add_ie': [FranceTVIE.ie_key()], 'skip': 'Unfortunately, this video is no longer available',
}, { }, {
# geo-restricted # geo-restricted
'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html', 'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html',
@ -287,7 +291,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 1441, 'duration': 1441,
}, },
'skip': 'No longer available', 'skip': 'Unfortunately, this video is no longer available',
}, { }, {
# geo-restricted livestream (workflow == 'token-akamai') # geo-restricted livestream (workflow == 'token-akamai')
'url': 'https://www.france.tv/france-4/direct.html', 'url': 'https://www.france.tv/france-4/direct.html',
@ -308,6 +312,19 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
'live_status': 'is_live', 'live_status': 'is_live',
}, },
'params': {'skip_download': 'livestream'}, 'params': {'skip_download': 'livestream'},
}, {
# Not geo-restricted
'url': 'https://www.france.tv/france-2/la-maison-des-maternelles/5574051-nous-sommes-amis-et-nous-avons-fait-un-enfant-ensemble.html',
'info_dict': {
'id': 'b448bfe4-9fe7-11ee-97d8-2ba3426fa3df',
'ext': 'mp4',
'title': 'Nous sommes amis et nous avons fait un enfant ensemble - Émission du jeudi 21 décembre 2023',
'duration': 1065,
'thumbnail': r're:https?://.+/.+\.jpg',
'timestamp': 1703147921,
'upload_date': '20231221',
},
'params': {'skip_download': 'm3u8'},
}, { }, {
# france3 # france3
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html', 'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
@ -342,30 +359,16 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
# XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.goplay
def _find_json(self, s):
return self._search_json(
r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
nextjs_data = self._search_nextjs_v13_data(webpage, display_id)
nextjs_data = traverse_obj( if get_first(nextjs_data, ('isLive', {bool})):
re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
(..., {json.loads}, ..., {self._find_json}, ..., 'children', ..., ..., 'children', ..., ..., 'children'))
if traverse_obj(nextjs_data, (..., ..., 'children', ..., 'isLive', {bool}, any)):
# For livestreams we need the id of the stream instead of the currently airing episode id # For livestreams we need the id of the stream instead of the currently airing episode id
video_id = traverse_obj(nextjs_data, ( video_id = get_first(nextjs_data, ('options', 'id', {str}))
..., ..., 'children', ..., 'children', ..., 'children', ..., 'children', ..., ...,
'children', ..., ..., 'children', ..., ..., 'children', (..., (..., ...)),
'options', 'id', {str}, any))
else: else:
video_id = traverse_obj(nextjs_data, ( video_id = get_first(nextjs_data, ('video', ('playerReplayId', 'siId'), {str}))
..., ..., ..., 'children',
lambda _, v: v['video']['url'] == urllib.parse.urlparse(url).path,
'video', ('playerReplayId', 'siId'), {str}, any))
if not video_id: if not video_id:
raise ExtractorError('Unable to extract video ID') raise ExtractorError('Unable to extract video ID')

File diff suppressed because it is too large Load Diff

View File

@ -112,16 +112,17 @@ class GlomexIE(GlomexBaseIE):
_TESTS = [{ _TESTS = [{
'url': 'https://video.glomex.com/sport/v-cb24uwg77hgh-nach-2-0-sieg-guardiola-mit-mancity-vor-naechstem-titel', 'url': 'https://video.glomex.com/sport/v-cb24uwg77hgh-nach-2-0-sieg-guardiola-mit-mancity-vor-naechstem-titel',
'md5': 'cec33a943c4240c9cb33abea8c26242e',
'info_dict': { 'info_dict': {
'id': 'v-cb24uwg77hgh', 'id': 'v-cb24uwg77hgh',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:38a90cedcfadd72982c81acf13556e0c', 'title': 'Nach 2:0-Sieg: Guardiola mit ManCity vor nächstem Titel',
'description': 'md5:1ea6b6caff1443fcbbba159e432eedb8', 'description': 'md5:1ea6b6caff1443fcbbba159e432eedb8',
'duration': 29600, 'duration': 29600,
'thumbnail': r're:https?://i[a-z0-9]thumbs\.glomex\.com/.+',
'timestamp': 1619895017, 'timestamp': 1619895017,
'upload_date': '20210501', 'upload_date': '20210501',
}, },
'params': {'skip_download': 'm3u8'},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -140,16 +141,17 @@ class GlomexEmbedIE(GlomexBaseIE):
_TESTS = [{ _TESTS = [{
'url': 'https://player.glomex.com/integration/1/iframe-player.html?integrationId=4059a013k56vb2yd&playlistId=v-cfa6lye0dkdd-sf', 'url': 'https://player.glomex.com/integration/1/iframe-player.html?integrationId=4059a013k56vb2yd&playlistId=v-cfa6lye0dkdd-sf',
'md5': '68f259b98cc01918ac34180142fce287',
'info_dict': { 'info_dict': {
'id': 'v-cfa6lye0dkdd-sf', 'id': 'v-cfa6lye0dkdd-sf',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Φώφη Γεννηματά: Ο επικήδειος λόγος του 17χρονου γιου της, Γιώργου',
'thumbnail': r're:https?://i[a-z0-9]thumbs\.glomex\.com/.+',
'timestamp': 1635337199, 'timestamp': 1635337199,
'duration': 133080, 'duration': 133080,
'upload_date': '20211027', 'upload_date': '20211027',
'description': 'md5:e741185fc309310ff5d0c789b437be66', 'description': 'md5:e741185fc309310ff5d0c789b437be66',
'title': 'md5:35647293513a6c92363817a0fb0a7961',
}, },
'params': {'skip_download': 'm3u8'},
}, { }, {
'url': 'https://player.glomex.com/integration/1/iframe-player.html?origin=fullpage&integrationId=19syy24xjn1oqlpc&playlistId=rl-vcb49w1fb592p&playlistIndex=0', 'url': 'https://player.glomex.com/integration/1/iframe-player.html?origin=fullpage&integrationId=19syy24xjn1oqlpc&playlistId=rl-vcb49w1fb592p&playlistIndex=0',
'info_dict': { 'info_dict': {
@ -157,12 +159,27 @@ class GlomexEmbedIE(GlomexBaseIE):
}, },
'playlist_count': 100, 'playlist_count': 100,
}, { }, {
# Geo-restricted
'url': 'https://player.glomex.com/integration/1/iframe-player.html?playlistId=cl-bgqaata6aw8x&integrationId=19syy24xjn1oqlpc', 'url': 'https://player.glomex.com/integration/1/iframe-player.html?playlistId=cl-bgqaata6aw8x&integrationId=19syy24xjn1oqlpc',
'info_dict': { 'info_dict': {
'id': 'cl-bgqaata6aw8x', 'id': 'cl-bgqaata6aw8x',
}, },
'playlist_mincount': 2, 'playlist_mincount': 2,
}] }]
_WEBPAGE_TESTS = [{
'url': 'https://www.skai.gr/news/world/iatrikos-syllogos-tourkias-to-turkovac-aplo-dialyma-erntogan-eiste-apateones-kai-pseytes',
'info_dict': {
'id': 'v-ch2nkhcirwc9-sf',
'ext': 'mp4',
'title': 'Ιατρικός Σύλλογος Τουρκίας: Το Turkovac είναι ένα απλό διάλυμα –Ερντογάν: Είστε απατεώνες και ψεύτες',
'description': 'md5:8b517a61d577efe7e36fde72fd535995',
'duration': 460000,
'thumbnail': r're:https?://i[a-z0-9]thumbs\.glomex\.com/.+',
'timestamp': 1641885019,
'upload_date': '20220111',
},
'params': {'skip_download': 'm3u8'},
}]
@classmethod @classmethod
def build_player_url(cls, video_id, integration, origin_url=None): def build_player_url(cls, video_id, integration, origin_url=None):

View File

@ -5,16 +5,11 @@
import hmac import hmac
import json import json
import os import os
import re
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import ExtractorError, int_or_none
ExtractorError, from ..utils.traversal import get_first, traverse_obj
int_or_none,
remove_end,
traverse_obj,
)
class GoPlayIE(InfoExtractor): class GoPlayIE(InfoExtractor):
@ -27,10 +22,10 @@ class GoPlayIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '2baa4560-87a0-421b-bffc-359914e3c387', 'id': '2baa4560-87a0-421b-bffc-359914e3c387',
'ext': 'mp4', 'ext': 'mp4',
'title': 'S22 - Aflevering 1', 'title': 'De Slimste Mens ter Wereld - S22 - Aflevering 1',
'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}', 'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}',
'series': 'De Slimste Mens ter Wereld', 'series': 'De Slimste Mens ter Wereld',
'episode': 'Episode 1', 'episode': 'Wordt aangekondigd',
'season_number': 22, 'season_number': 22,
'episode_number': 1, 'episode_number': 1,
'season': 'Season 22', 'season': 'Season 22',
@ -52,7 +47,7 @@ class GoPlayIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee', 'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee',
'ext': 'mp4', 'ext': 'mp4',
'title': 'S11 - Aflevering 1', 'title': 'De Mol - S11 - Aflevering 1',
'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}', 'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}',
'episode': 'Episode 1', 'episode': 'Episode 1',
'series': 'De Mol', 'series': 'De Mol',
@ -75,21 +70,13 @@ def _real_initialize(self):
if not self._id_token: if not self._id_token:
raise self.raise_login_required(method='password') raise self.raise_login_required(method='password')
# XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.francetv
def _find_json(self, s):
return self._search_json(
r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
nextjs_data = traverse_obj( nextjs_data = self._search_nextjs_v13_data(webpage, display_id)
re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage), meta = get_first(nextjs_data, (
(..., {json.loads}, ..., {self._find_json}, ...)) lambda k, v: k in ('video', 'meta') and v['path'] == urllib.parse.urlparse(url).path))
meta = traverse_obj(nextjs_data, (
..., ..., 'children', ..., ..., 'children',
lambda _, v: v['video']['path'] == urllib.parse.urlparse(url).path, 'video', any))
video_id = meta['uuid'] video_id = meta['uuid']
info_dict = traverse_obj(meta, { info_dict = traverse_obj(meta, {
@ -98,19 +85,18 @@ def _real_extract(self, url):
}) })
if traverse_obj(meta, ('program', 'subtype')) != 'movie': if traverse_obj(meta, ('program', 'subtype')) != 'movie':
for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)): for season_data in traverse_obj(nextjs_data, (..., 'playlists', ..., {dict})):
episode_data = traverse_obj( episode_data = traverse_obj(season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
if not episode_data: if not episode_data:
continue continue
episode_title = traverse_obj( season_number = traverse_obj(season_data, ('season', {int_or_none}))
episode_data, 'contextualTitle', 'episodeTitle', expected_type=str)
info_dict.update({ info_dict.update({
'title': episode_title or info_dict.get('title'), 'episode': traverse_obj(episode_data, ('episodeTitle', {str})),
'series': remove_end(info_dict.get('title'), f' - {episode_title}'),
'season_number': traverse_obj(season_data, ('season', {int_or_none})),
'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})), 'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})),
'season_number': season_number,
'series': self._search_regex(
fr'^(.+)? - S{season_number} - ', info_dict.get('title'), 'series', default=None),
}) })
break break

View File

@ -12,8 +12,11 @@
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
determine_ext, determine_ext,
filter_dict,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
jwt_decode_hs256,
parse_iso8601,
str_or_none, str_or_none,
url_or_none, url_or_none,
) )
@ -21,35 +24,49 @@
class HotStarBaseIE(InfoExtractor): class HotStarBaseIE(InfoExtractor):
_TOKEN_NAME = 'userUP'
_BASE_URL = 'https://www.hotstar.com' _BASE_URL = 'https://www.hotstar.com'
_API_URL = 'https://api.hotstar.com' _API_URL = 'https://api.hotstar.com'
_API_URL_V2 = 'https://apix.hotstar.com/v2' _API_URL_V2 = 'https://apix.hotstar.com/v2'
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
_FREE_HEADERS = {
'user-agent': 'Hotstar;in.startv.hotstar/25.06.30.0.11580 (Android/12)',
'x-hs-client': 'platform:android;app_id:in.startv.hotstar;app_version:25.06.30.0;os:Android;os_version:12;schema_version:0.0.1523',
'x-hs-platform': 'android',
}
_SUB_HEADERS = {
'user-agent': 'Disney+;in.startv.hotstar.dplus.tv/23.08.14.4.2915 (Android/13)',
'x-hs-client': 'platform:androidtv;app_id:in.startv.hotstar.dplus.tv;app_version:23.08.14.4;os:Android;os_version:13;schema_version:0.0.970',
'x-hs-platform': 'androidtv',
}
def _has_active_subscription(self, cookies, server_time):
server_time = int_or_none(server_time) or int(time.time())
expiry = traverse_obj(cookies, (
self._TOKEN_NAME, 'value', {jwt_decode_hs256}, 'sub', {json.loads},
'subscriptions', 'in', ..., 'expiry', {parse_iso8601}, all, {max})) or 0
return expiry > server_time
def _call_api_v1(self, path, *args, **kwargs): def _call_api_v1(self, path, *args, **kwargs):
return self._download_json( return self._download_json(
f'{self._API_URL}/o/v1/{path}', *args, **kwargs, f'{self._API_URL}/o/v1/{path}', *args, **kwargs,
headers={'x-country-code': 'IN', 'x-platform-code': 'PCTV'}) headers={'x-country-code': 'IN', 'x-platform-code': 'PCTV'})
def _call_api_impl(self, path, video_id, query, st=None, cookies=None): def _call_api_impl(self, path, video_id, query, cookies=None, st=None):
if not cookies or not cookies.get('userUP'):
self.raise_login_required()
st = int_or_none(st) or int(time.time()) st = int_or_none(st) or int(time.time())
exp = st + 6000 exp = st + 6000
auth = f'st={st}~exp={exp}~acl=/*' auth = f'st={st}~exp={exp}~acl=/*'
auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
response = self._download_json( response = self._download_json(
f'{self._API_URL_V2}/{path}', video_id, query=query, f'{self._API_URL_V2}/{path}', video_id, query=query,
headers={ headers=filter_dict({
'user-agent': 'Disney+;in.startv.hotstar.dplus.tv/23.08.14.4.2915 (Android/13)', **(self._SUB_HEADERS if self._has_active_subscription(cookies, st) else self._FREE_HEADERS),
'hotstarauth': auth, 'hotstarauth': auth,
'x-hs-usertoken': cookies['userUP'].value, 'x-hs-usertoken': traverse_obj(cookies, (self._TOKEN_NAME, 'value')),
'x-hs-device-id': traverse_obj(cookies, ('deviceId', 'value')) or str(uuid.uuid4()), 'x-hs-device-id': traverse_obj(cookies, ('deviceId', 'value')) or str(uuid.uuid4()),
'x-hs-client': 'platform:androidtv;app_id:in.startv.hotstar.dplus.tv;app_version:23.08.14.4;os:Android;os_version:13;schema_version:0.0.970',
'x-hs-platform': 'androidtv',
'content-type': 'application/json', 'content-type': 'application/json',
}) }))
if not traverse_obj(response, ('success', {dict})): if not traverse_obj(response, ('success', {dict})):
raise ExtractorError('API call was unsuccessful') raise ExtractorError('API call was unsuccessful')
@ -61,21 +78,22 @@ def _call_api_v2(self, path, video_id, content_type, cookies=None, st=None):
'filters': f'content_type={content_type}', 'filters': f'content_type={content_type}',
'client_capabilities': json.dumps({ 'client_capabilities': json.dumps({
'package': ['dash', 'hls'], 'package': ['dash', 'hls'],
'container': ['fmp4br', 'fmp4'], 'container': ['fmp4', 'fmp4br', 'ts'],
'ads': ['non_ssai', 'ssai'], 'ads': ['non_ssai', 'ssai'],
'audio_channel': ['atmos', 'dolby51', 'stereo'], 'audio_channel': ['stereo', 'dolby51', 'atmos'],
'encryption': ['plain', 'widevine'], # wv only so we can raise appropriate error 'encryption': ['plain', 'widevine'], # wv only so we can raise appropriate error
'video_codec': ['h265', 'h264'], 'video_codec': ['h264', 'h265'],
'ladder': ['tv', 'full'], 'video_codec_non_secure': ['h264', 'h265', 'vp9'],
'resolution': ['4k', 'hd'], 'ladder': ['phone', 'tv', 'full'],
'true_resolution': ['4k', 'hd'], 'resolution': ['hd', '4k'],
'dynamic_range': ['hdr', 'sdr'], 'true_resolution': ['hd', '4k'],
'dynamic_range': ['sdr', 'hdr'],
}, separators=(',', ':')), }, separators=(',', ':')),
'drm_parameters': json.dumps({ 'drm_parameters': json.dumps({
'widevine_security_level': ['SW_SECURE_DECODE', 'SW_SECURE_CRYPTO'], 'widevine_security_level': ['SW_SECURE_DECODE', 'SW_SECURE_CRYPTO'],
'hdcp_version': ['HDCP_V2_2', 'HDCP_V2_1', 'HDCP_V2', 'HDCP_V1'], 'hdcp_version': ['HDCP_V2_2', 'HDCP_V2_1', 'HDCP_V2', 'HDCP_V1'],
}, separators=(',', ':')), }, separators=(',', ':')),
}, st=st, cookies=cookies) }, cookies=cookies, st=st)
@staticmethod @staticmethod
def _parse_metadata_v1(video_data): def _parse_metadata_v1(video_data):
@ -274,6 +292,8 @@ def _real_extract(self, url):
video_id, video_type = self._match_valid_url(url).group('id', 'type') video_id, video_type = self._match_valid_url(url).group('id', 'type')
video_type = self._TYPE[video_type] video_type = self._TYPE[video_type]
cookies = self._get_cookies(url) # Cookies before any request cookies = self._get_cookies(url) # Cookies before any request
if not cookies or not cookies.get(self._TOKEN_NAME):
self.raise_login_required()
video_data = traverse_obj( video_data = traverse_obj(
self._call_api_v1(f'{video_type}/detail', video_id, fatal=False, query={ self._call_api_v1(f'{video_type}/detail', video_id, fatal=False, query={
@ -292,7 +312,7 @@ def _real_extract(self, url):
# See https://github.com/yt-dlp/yt-dlp/issues/396 # See https://github.com/yt-dlp/yt-dlp/issues/396
st = self._request_webpage( st = self._request_webpage(
f'{self._BASE_URL}/in', video_id, 'Fetching server time').get_header('x-origin-date') f'{self._BASE_URL}/in', video_id, 'Fetching server time').get_header('x-origin-date')
watch = self._call_api_v2('pages/watch', video_id, content_type, cookies=cookies, st=st) watch = self._call_api_v2('pages/watch', video_id, content_type, cookies, st)
player_config = traverse_obj(watch, ( player_config = traverse_obj(watch, (
'page', 'spaces', 'player', 'widget_wrappers', lambda _, v: v['template'] == 'PlayerWidget', 'page', 'spaces', 'player', 'widget_wrappers', lambda _, v: v['template'] == 'PlayerWidget',
'widget', 'data', 'player_config', {dict}, any, {require('player config')})) 'widget', 'data', 'player_config', {dict}, any, {require('player config')}))
@ -364,10 +384,13 @@ def _real_extract(self, url):
formats.extend(current_formats) formats.extend(current_formats)
subs = self._merge_subtitles(subs, current_subs) subs = self._merge_subtitles(subs, current_subs)
if not formats and geo_restricted: if not formats:
self.raise_geo_restricted(countries=['IN'], metadata_available=True) if geo_restricted:
elif not formats and has_drm: self.raise_geo_restricted(countries=['IN'], metadata_available=True)
self.report_drm(video_id) elif has_drm:
self.report_drm(video_id)
elif not self._has_active_subscription(cookies, st):
self.raise_no_formats('Your account does not have access to this content', expected=True)
self._remove_duplicate_formats(formats) self._remove_duplicate_formats(formats)
for f in formats: for f in formats:
f.setdefault('http_headers', {}).update(headers) f.setdefault('http_headers', {}).update(headers)

View File

@ -11,7 +11,6 @@
class IndavideoEmbedIE(InfoExtractor): class IndavideoEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)' _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
# Some example URLs covered by generic extractor: # Some example URLs covered by generic extractor:
# https://indavideo.hu/video/Vicces_cica_1
# https://index.indavideo.hu/video/Hod_Nemetorszagban # https://index.indavideo.hu/video/Hod_Nemetorszagban
# https://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko # https://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
# https://film.indavideo.hu/video/f_farkaslesen # https://film.indavideo.hu/video/f_farkaslesen
@ -25,14 +24,14 @@ class IndavideoEmbedIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Cicatánc', 'title': 'Cicatánc',
'description': '', 'description': '',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'cukiajanlo', 'uploader': 'cukiajanlo',
'uploader_id': '83729', 'uploader_id': '83729',
'thumbnail': r're:https?://pics\.indavideo\.hu/videos/.+\.jpg',
'timestamp': 1439193826, 'timestamp': 1439193826,
'upload_date': '20150810', 'upload_date': '20150810',
'duration': 72, 'duration': 72,
'age_limit': 0, 'age_limit': 0,
'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'], 'tags': 'count:5',
}, },
}, { }, {
'url': 'https://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1', 'url': 'https://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
@ -45,14 +44,30 @@ class IndavideoEmbedIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Vicces cica', 'title': 'Vicces cica',
'description': 'Játszik a tablettel. :D', 'description': 'Játszik a tablettel. :D',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:https?://pics\.indavideo\.hu/videos/.+\.jpg',
'uploader': 'Jet_Pack', 'uploader': 'Jet_Pack',
'uploader_id': '491217', 'uploader_id': '491217',
'timestamp': 1390821212, 'timestamp': 1390821212,
'upload_date': '20140127', 'upload_date': '20140127',
'duration': 7, 'duration': 7,
'age_limit': 0, 'age_limit': 0,
'tags': ['cica', 'Jet_Pack'], 'tags': 'count:2',
},
}, {
'url': 'https://palyazat.indavideo.hu/video/RUSH_1',
'info_dict': {
'id': '3808180',
'ext': 'mp4',
'title': 'RUSH',
'age_limit': 0,
'description': '',
'duration': 650,
'tags': 'count:2',
'thumbnail': r're:https?://pics\.indavideo\.hu/videos/.+\.jpg',
'timestamp': 1729136266,
'upload_date': '20241017',
'uploader': '7summerfilms',
'uploader_id': '1628496',
}, },
}] }]

View File

@ -22,18 +22,17 @@ class JojIE(InfoExtractor):
'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932', 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',
'ext': 'mp4', 'ext': 'mp4',
'title': 'NOVÉ BÝVANIE', 'title': 'NOVÉ BÝVANIE',
'thumbnail': r're:^https?://.*?$',
'duration': 3118, 'duration': 3118,
'thumbnail': r're:https?://img\.joj\.sk/.+',
}, },
}, { }, {
'url': 'https://media.joj.sk/embed/CSM0Na0l0p1', 'url': 'https://media.joj.sk/embed/CSM0Na0l0p1',
'info_dict': { 'info_dict': {
'id': 'CSM0Na0l0p1', 'id': 'CSM0Na0l0p1',
'ext': 'mp4', 'ext': 'mp4',
'height': 576,
'title': 'Extrémne rodiny 2 - POKRAČOVANIE (2012/04/09 21:30:00)', 'title': 'Extrémne rodiny 2 - POKRAČOVANIE (2012/04/09 21:30:00)',
'duration': 3937, 'duration': 3937,
'thumbnail': r're:^https?://.*?$', 'thumbnail': r're:https?://img\.joj\.sk/.+',
}, },
}, { }, {
'url': 'https://media.joj.sk/embed/9i1cxv', 'url': 'https://media.joj.sk/embed/9i1cxv',
@ -45,6 +44,15 @@ class JojIE(InfoExtractor):
'url': 'joj:9i1cxv', 'url': 'joj:9i1cxv',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
# FIXME: Embed detection
'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
'info_dict': {
'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
'title': 'Slovenskom sa prehnala vlna silných búrok',
},
'playlist_mincount': 5,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

View File

@ -1,112 +0,0 @@
import datetime as dt
import urllib.parse
from .common import InfoExtractor
from ..utils import (
clean_html,
datetime_from_str,
unified_timestamp,
urljoin,
)
class JoqrAgIE(InfoExtractor):
IE_DESC = '超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)'
_VALID_URL = [r'https?://www\.uniqueradio\.jp/agplayer5/(?:player|inc-player-hls)\.php',
r'https?://(?:www\.)?joqr\.co\.jp/ag/',
r'https?://(?:www\.)?joqr\.co\.jp/qr/ag(?:daily|regular)program/?(?:$|[#?])']
_TESTS = [{
'url': 'https://www.uniqueradio.jp/agplayer5/player.php',
'info_dict': {
'id': 'live',
'title': str,
'channel': '超!A&G+',
'description': str,
'live_status': 'is_live',
'release_timestamp': int,
},
'params': {
'skip_download': True,
'ignore_no_formats_error': True,
},
}, {
'url': 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php',
'only_matching': True,
}, {
'url': 'https://www.joqr.co.jp/ag/article/103760/',
'only_matching': True,
}, {
'url': 'http://www.joqr.co.jp/qr/agdailyprogram/',
'only_matching': True,
}, {
'url': 'http://www.joqr.co.jp/qr/agregularprogram/',
'only_matching': True,
}]
def _extract_metadata(self, variable, html):
return clean_html(urllib.parse.unquote_plus(self._search_regex(
rf'var\s+{variable}\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
html, 'metadata', group='value', default=''))) or None
def _extract_start_timestamp(self, video_id, is_live):
def extract_start_time_from(date_str):
dt_ = datetime_from_str(date_str) + dt.timedelta(hours=9)
date = dt_.strftime('%Y%m%d')
start_time = self._search_regex(
r'<h3[^>]+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+\s*(\d{1,2}:\d{1,2})',
self._download_webpage(
f'https://www.joqr.co.jp/qr/agdailyprogram/?date={date}', video_id,
note=f'Downloading program list of {date}', fatal=False,
errnote=f'Failed to download program list of {date}') or '',
'start time', default=None)
if start_time:
return unified_timestamp(f'{dt_.strftime("%Y/%m/%d")} {start_time} +09:00')
return None
start_timestamp = extract_start_time_from('today')
if not start_timestamp:
return None
if not is_live or start_timestamp < datetime_from_str('now').timestamp():
return start_timestamp
else:
return extract_start_time_from('yesterday')
def _real_extract(self, url):
video_id = 'live'
metadata = self._download_webpage(
'https://www.uniqueradio.jp/aandg', video_id,
note='Downloading metadata', errnote='Failed to download metadata')
title = self._extract_metadata('Program_name', metadata)
if not title or title == '放送休止':
formats = []
live_status = 'is_upcoming'
release_timestamp = self._extract_start_timestamp(video_id, False)
msg = 'This stream is not currently live'
if release_timestamp:
msg += (' and will start at '
+ dt.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S'))
self.raise_no_formats(msg, expected=True)
else:
m3u8_path = self._search_regex(
r'<source\s[^>]*\bsrc="([^"]+)"',
self._download_webpage(
'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php', video_id,
note='Downloading player data', errnote='Failed to download player data'),
'm3u8 url')
formats = self._extract_m3u8_formats(
urljoin('https://www.uniqueradio.jp/', m3u8_path), video_id)
live_status = 'is_live'
release_timestamp = self._extract_start_timestamp(video_id, True)
return {
'id': video_id,
'title': title,
'channel': '超!A&G+',
'description': self._extract_metadata('Program_text', metadata),
'formats': formats,
'live_status': live_status,
'release_timestamp': release_timestamp,
}

View File

@ -8,7 +8,6 @@ class JWPlatformIE(InfoExtractor):
_VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})' _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
_TESTS = [{ _TESTS = [{
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js', 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
'md5': '3aa16e4f6860e6e78b7df5829519aed3',
'info_dict': { 'info_dict': {
'id': 'nPripu9l', 'id': 'nPripu9l',
'ext': 'mp4', 'ext': 'mp4',
@ -17,13 +16,12 @@ class JWPlatformIE(InfoExtractor):
'upload_date': '20081127', 'upload_date': '20081127',
'timestamp': 1227796140, 'timestamp': 1227796140,
'duration': 32.0, 'duration': 32.0,
'thumbnail': 'https://cdn.jwplayer.com/v2/media/nPripu9l/poster.jpg?width=720', 'thumbnail': r're:https?://cdn\.jwplayer\.com/v2/media/.+',
}, },
}, { }, {
'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js', 'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{ _WEBPAGE_TESTS = [{
# JWPlatform iframe # JWPlatform iframe
'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved', 'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
@ -33,10 +31,11 @@ class JWPlatformIE(InfoExtractor):
'upload_date': '20160719', 'upload_date': '20160719',
'timestamp': 1468923808, 'timestamp': 1468923808,
'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4', 'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/AG26UQXM/poster.jpg?width=720', 'thumbnail': r're:https?://cdn\.jwplayer\.com/v2/media/.+',
'description': '', 'description': '',
'duration': 294.0, 'duration': 294.0,
}, },
'skip': 'Site no longer embeds JWPlatform',
}, { }, {
# Player url not surrounded by quotes # Player url not surrounded by quotes
'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/school-trip', 'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/school-trip',
@ -45,12 +44,12 @@ class JWPlatformIE(InfoExtractor):
'title': 'Klassenfahrt', 'title': 'Klassenfahrt',
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20230109', 'upload_date': '20230109',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/jUxh5uin/poster.jpg?width=720', 'thumbnail': r're:https?://cdn\.jwplayer\.com/v2/media/.+',
'timestamp': 1673270298, 'timestamp': 1673270298,
'description': '', 'description': '',
'duration': 5193.0, 'duration': 5193.0,
}, },
'params': {'allowed_extractors': ['generic', 'jwplatform']}, 'skip': 'Site no longer embeds JWPlatform',
}, { }, {
# iframe src attribute includes backslash before URL string # iframe src attribute includes backslash before URL string
'url': 'https://www.elespectador.com/colombia/video-asi-se-evito-la-fuga-de-john-poulos-presunto-feminicida-de-valentina-trespalacios-explicacion', 'url': 'https://www.elespectador.com/colombia/video-asi-se-evito-la-fuga-de-john-poulos-presunto-feminicida-de-valentina-trespalacios-explicacion',
@ -59,11 +58,24 @@ class JWPlatformIE(InfoExtractor):
'title': 'Así se evitó la fuga de John Poulos, presunto feminicida de Valentina Trespalacios', 'title': 'Así se evitó la fuga de John Poulos, presunto feminicida de Valentina Trespalacios',
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20230127', 'upload_date': '20230127',
'thumbnail': 'https://cdn.jwplayer.com/v2/media/QD3gsexj/poster.jpg?width=720', 'thumbnail': r're:https?://cdn\.jwplayer\.com/v2/media/.+',
'timestamp': 1674862986, 'timestamp': 1674862986,
'description': 'md5:128fd74591c4e1fc2da598c5cb6f5ce4', 'description': 'md5:128fd74591c4e1fc2da598c5cb6f5ce4',
'duration': 263.0, 'duration': 263.0,
}, },
}, {
'url': 'https://www.skimag.com/video/ski-people-1980',
'info_dict': {
'id': 'YTmgRiNU',
'ext': 'mp4',
'title': 'Ski People (1980)',
'channel': 'snow',
'description': 'md5:cf9c3d101452c91e141f292b19fe4843',
'duration': 5688.0,
'thumbnail': r're:https?://cdn\.jwplayer\.com/v2/media/.+',
'timestamp': 1610407738,
'upload_date': '20210111',
},
}] }]
@classmethod @classmethod

View File

@ -41,149 +41,188 @@ class KalturaIE(InfoExtractor):
2: 'ttml', 2: 'ttml',
3: 'vtt', 3: 'vtt',
} }
_TESTS = [ _TESTS = [{
{ 'url': 'kaltura:269692:1_1jc2y3e4',
'url': 'kaltura:269692:1_1jc2y3e4', 'md5': '3adcbdb3dcc02d647539e53f284ba171',
'md5': '3adcbdb3dcc02d647539e53f284ba171', 'info_dict': {
'info_dict': { 'id': '1_1jc2y3e4',
'id': '1_1jc2y3e4', 'ext': 'mp4',
'ext': 'mp4', 'title': 'Straight from the Heart',
'title': 'Straight from the Heart', 'upload_date': '20131219',
'upload_date': '20131219', 'uploader_id': 'mlundberg@wolfgangsvault.com',
'uploader_id': 'mlundberg@wolfgangsvault.com', 'description': 'The Allman Brothers Band, 12/16/1981',
'description': 'The Allman Brothers Band, 12/16/1981', 'thumbnail': r're:https?://.+/thumbnail/.+',
'thumbnail': 're:^https?://.*/thumbnail/.*', 'timestamp': int,
'timestamp': int, },
'skip': 'The access to this service is forbidden since the specified partner is blocked',
}, {
'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4',
'only_matching': True,
}, {
'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3',
'only_matching': True,
}, {
'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342',
'only_matching': True,
}, {
# video with subtitles
'url': 'kaltura:111032:1_cw786r8q',
'only_matching': True,
}, {
# video with ttml subtitles (no fileExt)
'url': 'kaltura:1926081:0_l5ye1133',
'info_dict': {
'id': '0_l5ye1133',
'ext': 'mp4',
'title': 'What Can You Do With Python?',
'upload_date': '20160221',
'uploader_id': 'stork',
'thumbnail': r're:https?://.+/thumbnail/.+',
'timestamp': int,
'subtitles': {
'en': [{
'ext': 'ttml',
}],
}, },
'skip': 'The access to this service is forbidden since the specified partner is blocked',
}, },
{ 'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/',
'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4', 'params': {'skip_download': True},
'only_matching': True, }, {
'url': 'https://www.kaltura.com/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
'only_matching': True,
}, {
'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
'only_matching': True,
}, {
# unavailable source format
'url': 'kaltura:513551:1_66x4rg7o',
'only_matching': True,
}, {
# html5lib URL using kwidget player
'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.46/mwEmbedFrame.php/p/691292/uiconf_id/20499062/entry_id/0_c076mna6?wid=_691292&iframeembed=true&playerId=kaltura_player_1420508608&entry_id=0_c076mna6&flashvars%5BakamaiHD.loadingPolicy%5D=preInitialize&flashvars%5BakamaiHD.asyncInit%5D=true&flashvars%5BstreamerType%5D=hdnetwork',
'info_dict': {
'id': '0_c076mna6',
'ext': 'mp4',
'title': 'md5:4883e7acbcbf42583a2dddc97dee4855',
'duration': 3608,
'uploader_id': 'commons@swinburne.edu.au',
'timestamp': 1408086874,
'view_count': int,
'upload_date': '20140815',
'thumbnail': r're:https?://cfvod\.kaltura\.com/.+',
}, },
{ }, {
'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3', # html5lib playlist URL using kwidget player
'only_matching': True, 'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.89/mwEmbedFrame.php/p/2019031/uiconf_id/40436601?wid=1_4j3m32cv&iframeembed=true&playerId=kaltura_player_&flashvars[playlistAPI.kpl0Id]=1_jovey5nu&flashvars[ks]=&&flashvars[imageDefaultDuration]=30&flashvars[localizationCode]=en&flashvars[leadWithHTML5]=true&flashvars[forceMobileHTML5]=true&flashvars[nextPrevBtn.plugin]=true&flashvars[hotspots.plugin]=true&flashvars[sideBarContainer.plugin]=true&flashvars[sideBarContainer.position]=left&flashvars[sideBarContainer.clickToClose]=true&flashvars[chapters.plugin]=true&flashvars[chapters.layout]=vertical&flashvars[chapters.thumbnailRotator]=false&flashvars[streamSelector.plugin]=true&flashvars[EmbedPlayer.SpinnerTarget]=videoHolder&flashvars[dualScreen.plugin]=true&flashvars[playlistAPI.playlistUrl]=https://canvasgatechtest.kaf.kaltura.com/playlist/details/{playlistAPI.kpl0Id}/categoryid/126428551',
'info_dict': {
'id': '1_jovey5nu',
'title': '00-00 Introduction',
}, },
{ 'playlist': [
'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342', {
'only_matching': True, 'info_dict': {
}, 'id': '1_b1y5hlvx',
{ 'ext': 'mp4',
# video with subtitles 'title': 'CS7646_00-00 Introductio_Introduction',
'url': 'kaltura:111032:1_cw786r8q', 'duration': 91,
'only_matching': True, 'thumbnail': r're:https?://cfvod\.kaltura\.com/.+',
}, 'view_count': int,
{ 'timestamp': 1533154447,
# video with ttml subtitles (no fileExt) 'upload_date': '20180801',
'url': 'kaltura:1926081:0_l5ye1133', 'uploader_id': 'djoyner3',
'info_dict': { },
'id': '0_l5ye1133', }, {
'ext': 'mp4', 'info_dict': {
'title': 'What Can You Do With Python?', 'id': '1_jfb7mdpn',
'upload_date': '20160221', 'ext': 'mp4',
'uploader_id': 'stork', 'title': 'CS7646_00-00 Introductio_Three parts to the course',
'thumbnail': 're:^https?://.*/thumbnail/.*', 'duration': 63,
'timestamp': int, 'thumbnail': r're:https?://cfvod\.kaltura\.com/.+',
'subtitles': { 'view_count': int,
'en': [{ 'timestamp': 1533154489,
'ext': 'ttml', 'upload_date': '20180801',
}], 'uploader_id': 'djoyner3',
},
}, {
'info_dict': {
'id': '1_8xflxdp7',
'ext': 'mp4',
'title': 'CS7646_00-00 Introductio_Textbooks',
'duration': 37,
'thumbnail': r're:https?://cfvod\.kaltura\.com/.+',
'view_count': int,
'timestamp': 1533154512,
'upload_date': '20180801',
'uploader_id': 'djoyner3',
},
}, {
'info_dict': {
'id': '1_3hqew8kn',
'ext': 'mp4',
'title': 'CS7646_00-00 Introductio_Prerequisites',
'duration': 49,
'thumbnail': r're:https?://cfvod\.kaltura\.com/.+',
'view_count': int,
'timestamp': 1533154536,
'upload_date': '20180801',
'uploader_id': 'djoyner3',
}, },
}, },
'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/', ],
'params': { }]
'skip_download': True, _WEBPAGE_TESTS = [{
}, 'url': 'https://www.cornell.edu/VIDEO/nima-arkani-hamed-standard-models-of-particle-physics',
'info_dict': {
'id': '1_sgtvehim',
'ext': 'mp4',
'title': 'Our "Standard Models" of particle physics and cosmology',
'duration': 5420,
'thumbnail': r're:https?://cdnsecakmi\.kaltura\.com/.+',
'timestamp': 1321158993,
'upload_date': '20111113',
'uploader_id': 'kps1',
'view_count': int,
}, },
{ }, {
'url': 'https://www.kaltura.com/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto', 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
'only_matching': True, 'info_dict': {
'id': '0_utuok90b',
'ext': 'mp4',
'title': '06_matthew_brender_raj_dutt',
'duration': 331,
'thumbnail': r're:https?://cfvod\.kaltura\.com/.+',
'timestamp': 1466638791,
'upload_date': '20160622',
'uploader_id': '',
'view_count': int,
}, },
{ }, {
'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto', 'url': 'https://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
'only_matching': True, 'info_dict': {
'id': '0_izeg5utt',
'ext': 'mp4',
'title': '35871',
'duration': 3403,
'thumbnail': r're:https?://cfvod\.kaltura\.com/.+',
'timestamp': 1355743100,
'upload_date': '20121217',
'uploader_id': 'cplapp@learn360.com',
'view_count': int,
}, },
{ }, {
# unavailable source format 'url': 'https://www.cns.nyu.edu/~eero/math-tools17/Videos/lecture-05sep2017.html',
'url': 'kaltura:513551:1_66x4rg7o', 'info_dict': {
'only_matching': True, 'id': '1_9gzouybz',
'ext': 'mp4',
'title': 'lecture-05sep2017',
'duration': 7219,
'thumbnail': r're:https?://cfvod\.kaltura\.com/.+',
'timestamp': 1505340777,
'upload_date': '20170913',
'uploader_id': 'eps2',
'view_count': int,
}, },
{ }]
# html5lib URL using kwidget player
'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.46/mwEmbedFrame.php/p/691292/uiconf_id/20499062/entry_id/0_c076mna6?wid=_691292&iframeembed=true&playerId=kaltura_player_1420508608&entry_id=0_c076mna6&flashvars%5BakamaiHD.loadingPolicy%5D=preInitialize&flashvars%5BakamaiHD.asyncInit%5D=true&flashvars%5BstreamerType%5D=hdnetwork',
'info_dict': {
'id': '0_c076mna6',
'ext': 'mp4',
'title': 'md5:4883e7acbcbf42583a2dddc97dee4855',
'duration': 3608,
'uploader_id': 'commons@swinburne.edu.au',
'timestamp': 1408086874,
'view_count': int,
'upload_date': '20140815',
'thumbnail': 'http://cfvod.kaltura.com/p/691292/sp/69129200/thumbnail/entry_id/0_c076mna6/version/100022',
},
},
{
# html5lib playlist URL using kwidget player
'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.89/mwEmbedFrame.php/p/2019031/uiconf_id/40436601?wid=1_4j3m32cv&iframeembed=true&playerId=kaltura_player_&flashvars[playlistAPI.kpl0Id]=1_jovey5nu&flashvars[ks]=&&flashvars[imageDefaultDuration]=30&flashvars[localizationCode]=en&flashvars[leadWithHTML5]=true&flashvars[forceMobileHTML5]=true&flashvars[nextPrevBtn.plugin]=true&flashvars[hotspots.plugin]=true&flashvars[sideBarContainer.plugin]=true&flashvars[sideBarContainer.position]=left&flashvars[sideBarContainer.clickToClose]=true&flashvars[chapters.plugin]=true&flashvars[chapters.layout]=vertical&flashvars[chapters.thumbnailRotator]=false&flashvars[streamSelector.plugin]=true&flashvars[EmbedPlayer.SpinnerTarget]=videoHolder&flashvars[dualScreen.plugin]=true&flashvars[playlistAPI.playlistUrl]=https://canvasgatechtest.kaf.kaltura.com/playlist/details/{playlistAPI.kpl0Id}/categoryid/126428551',
'info_dict': {
'id': '1_jovey5nu',
'title': '00-00 Introduction',
},
'playlist': [
{
'info_dict': {
'id': '1_b1y5hlvx',
'ext': 'mp4',
'title': 'CS7646_00-00 Introductio_Introduction',
'duration': 91,
'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_b1y5hlvx/version/100001',
'view_count': int,
'timestamp': 1533154447,
'upload_date': '20180801',
'uploader_id': 'djoyner3',
},
}, {
'info_dict': {
'id': '1_jfb7mdpn',
'ext': 'mp4',
'title': 'CS7646_00-00 Introductio_Three parts to the course',
'duration': 63,
'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_jfb7mdpn/version/100001',
'view_count': int,
'timestamp': 1533154489,
'upload_date': '20180801',
'uploader_id': 'djoyner3',
},
}, {
'info_dict': {
'id': '1_8xflxdp7',
'ext': 'mp4',
'title': 'CS7646_00-00 Introductio_Textbooks',
'duration': 37,
'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_8xflxdp7/version/100001',
'view_count': int,
'timestamp': 1533154512,
'upload_date': '20180801',
'uploader_id': 'djoyner3',
},
}, {
'info_dict': {
'id': '1_3hqew8kn',
'ext': 'mp4',
'title': 'CS7646_00-00 Introductio_Prerequisites',
'duration': 49,
'thumbnail': 'http://cfvod.kaltura.com/p/2019031/sp/201903100/thumbnail/entry_id/1_3hqew8kn/version/100001',
'view_count': int,
'timestamp': 1533154536,
'upload_date': '20180801',
'uploader_id': 'djoyner3',
},
},
],
},
]
@classmethod @classmethod
def _extract_embed_urls(cls, url, webpage): def _extract_embed_urls(cls, url, webpage):

View File

@ -89,6 +89,15 @@ class KinjaEmbedIE(InfoExtractor):
'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE', 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
'info_dict': {
'id': '106351',
'ext': 'mp4',
'title': 'Dont Understand Bitcoin? This Man Will Mumble An Explanation At You',
},
'skip': 'Invalid URL',
}]
_JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform') _JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform')
_PROVIDER_MAP = { _PROVIDER_MAP = {
'fb': ('facebook.com/video.php?v=', 'Facebook'), 'fb': ('facebook.com/video.php?v=', 'Facebook'),

View File

@ -18,12 +18,10 @@ class LibsynIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '6385796', 'id': '6385796',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Champion Minded - Developing a Growth Mindset', 'title': 'The Allistair McCaw Podcast - Developing a Growth Mindset',
# description fetched using another request: 'duration': 834.0,
# http://html5-player.libsyn.com/embed/getitemdetails?item_id=6385796 'thumbnail': r're:https?://assets\.libsyn\.com/.+',
# 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
'upload_date': '20180320', 'upload_date': '20180320',
'thumbnail': 're:^https?://.*',
}, },
}, { }, {
'url': 'https://html5-player.libsyn.com/embed/episode/id/3727166/height/75/width/200/theme/standard/direction/no/autoplay/no/autonext/no/thumbnail/no/preload/no/no_addthis/no/', 'url': 'https://html5-player.libsyn.com/embed/episode/id/3727166/height/75/width/200/theme/standard/direction/no/autoplay/no/autonext/no/thumbnail/no/preload/no/no_addthis/no/',
@ -32,8 +30,32 @@ class LibsynIE(InfoExtractor):
'id': '3727166', 'id': '3727166',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career', 'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career',
'thumbnail': r're:https?://assets\.libsyn\.com/.+',
'upload_date': '20150818', 'upload_date': '20150818',
'thumbnail': 're:^https?://.*', },
'skip': 'Invalid URL',
}]
_WEBPAGE_TESTS = [{
'url': 'https://html5-player.libsyn.com/',
'md5': '50cff329596b8f674d4449ed077ef2f9',
'info_dict': {
'id': '2378831',
'ext': 'mp3',
'title': 'md5:54108b15f98e1b4056612c10b50106b2',
'duration': 3561.0,
'thumbnail': r're:https?://assets\.libsyn\.com/.+',
'upload_date': '20130630',
},
}, {
'url': 'https://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
'md5': '23576952577f9604520a730d90371761',
'info_dict': {
'id': '3793998',
'ext': 'mp3',
'title': 'Underground Wellness Radio - Jack Tips: 5 Steps to Permanent Gut Healing',
'duration': 3989.0,
'thumbnail': r're:https?://assets\.libsyn\.com/.+',
'upload_date': '20141126',
}, },
}] }]

View File

@ -1,358 +0,0 @@
import re
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
determine_ext,
float_or_none,
int_or_none,
smuggle_url,
try_get,
unsmuggle_url,
)
class LimelightBaseIE(InfoExtractor):
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
@classmethod
def _extract_embed_urls(cls, url, webpage):
lm = {
'Media': 'media',
'Channel': 'channel',
'ChannelList': 'channel_list',
}
def smuggle(url):
return smuggle_url(url, {'source_url': url})
entries = []
for kind, video_id in re.findall(
r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
webpage):
entries.append(cls.url_result(
smuggle(f'limelight:{lm[kind]}:{video_id}'),
f'Limelight{kind}', video_id))
for mobj in re.finditer(
# As per [1] class attribute should be exactly equal to
# LimelightEmbeddedPlayerFlash but numerous examples seen
# that don't exactly match it (e.g. [2]).
# 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
# 2. http://www.sedona.com/FacilitatorTraining2017
r'''(?sx)
<object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
<param[^>]+
name=(["\'])flashVars\2[^>]+
value=(["\'])(?:(?!\3).)*(?P<kind>media|channel(?:List)?)Id=(?P<id>[a-z0-9]{32})
''', webpage):
kind, video_id = mobj.group('kind'), mobj.group('id')
entries.append(cls.url_result(
smuggle(f'limelight:{kind}:{video_id}'),
f'Limelight{kind.capitalize()}', video_id))
# http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page)
for video_id in re.findall(
r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P<id>[a-z0-9]{32})',
webpage):
entries.append(cls.url_result(
smuggle(f'limelight:media:{video_id}'),
LimelightMediaIE.ie_key(), video_id))
return entries
def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
headers = {}
if referer:
headers['Referer'] = referer
try:
return self._download_json(
self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
item_id, f'Downloading PlaylistService {method} JSON',
fatal=fatal, headers=headers)
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
error = self._parse_json(e.cause.response.read().decode(), item_id)['detail']['contentAccessPermission']
if error == 'CountryDisabled':
self.raise_geo_restricted()
raise ExtractorError(error, expected=True)
raise
def _extract(self, item_id, pc_method, mobile_method, referer=None):
pc = self._call_playlist_service(item_id, pc_method, referer=referer)
mobile = self._call_playlist_service(
item_id, mobile_method, fatal=False, referer=referer)
return pc, mobile
def _extract_info(self, pc, mobile, i, referer):
get_item = lambda x, y: try_get(x, lambda x: x[y][i], dict) or {}
pc_item = get_item(pc, 'playlistItems')
mobile_item = get_item(mobile, 'mediaList')
video_id = pc_item.get('mediaId') or mobile_item['mediaId']
title = pc_item.get('title') or mobile_item['title']
formats = []
urls = []
for stream in pc_item.get('streams', []):
stream_url = stream.get('url')
if not stream_url or stream_url in urls:
continue
if not self.get_param('allow_unplayable_formats') and stream.get('drmProtected'):
continue
urls.append(stream_url)
ext = determine_ext(stream_url)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
stream_url, video_id, f4m_id='hds', fatal=False))
else:
fmt = {
'url': stream_url,
'abr': float_or_none(stream.get('audioBitRate')),
'fps': float_or_none(stream.get('videoFrameRate')),
'ext': ext,
}
width = int_or_none(stream.get('videoWidthInPixels'))
height = int_or_none(stream.get('videoHeightInPixels'))
vbr = float_or_none(stream.get('videoBitRate'))
if width or height or vbr:
fmt.update({
'width': width,
'height': height,
'vbr': vbr,
})
else:
fmt['vcodec'] = 'none'
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', stream_url)
if rtmp:
format_id = 'rtmp'
if stream.get('videoBitRate'):
format_id += '-%d' % int_or_none(stream['videoBitRate'])
http_format_id = format_id.replace('rtmp', 'http')
CDN_HOSTS = (
('delvenetworks.com', 'cpl.delvenetworks.com'),
('video.llnw.net', 's2.content.video.llnw.net'),
)
for cdn_host, http_host in CDN_HOSTS:
if cdn_host not in rtmp.group('host').lower():
continue
http_url = 'http://{}/{}'.format(http_host, rtmp.group('playpath')[4:])
urls.append(http_url)
if self._is_valid_url(http_url, video_id, http_format_id):
http_fmt = fmt.copy()
http_fmt.update({
'url': http_url,
'format_id': http_format_id,
})
formats.append(http_fmt)
break
fmt.update({
'url': rtmp.group('url'),
'play_path': rtmp.group('playpath'),
'app': rtmp.group('app'),
'ext': 'flv',
'format_id': format_id,
})
formats.append(fmt)
for mobile_url in mobile_item.get('mobileUrls', []):
media_url = mobile_url.get('mobileUrl')
format_id = mobile_url.get('targetMediaPlatform')
if not media_url or media_url in urls:
continue
if (format_id in ('Widevine', 'SmoothStreaming')
and not self.get_param('allow_unplayable_formats', False)):
continue
urls.append(media_url)
ext = determine_ext(media_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=format_id, fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
stream_url, video_id, f4m_id=format_id, fatal=False))
else:
formats.append({
'url': media_url,
'format_id': format_id,
'quality': -10,
'ext': ext,
})
subtitles = {}
for flag in mobile_item.get('flags'):
if flag == 'ClosedCaptions':
closed_captions = self._call_playlist_service(
video_id, 'getClosedCaptionsDetailsByMediaId',
False, referer) or []
for cc in closed_captions:
cc_url = cc.get('webvttFileUrl')
if not cc_url:
continue
lang = cc.get('languageCode') or self._search_regex(r'/([a-z]{2})\.vtt', cc_url, 'lang', default='en')
subtitles.setdefault(lang, []).append({
'url': cc_url,
})
break
get_meta = lambda x: pc_item.get(x) or mobile_item.get(x)
return {
'id': video_id,
'title': title,
'description': get_meta('description'),
'formats': formats,
'duration': float_or_none(get_meta('durationInMilliseconds'), 1000),
'thumbnail': get_meta('previewImageUrl') or get_meta('thumbnailImageUrl'),
'subtitles': subtitles,
}
class LimelightMediaIE(LimelightBaseIE):
IE_NAME = 'limelight'
_VALID_URL = r'''(?x)
(?:
limelight:media:|
https?://
(?:
link\.videoplatform\.limelight\.com/media/|
assets\.delvenetworks\.com/player/loader\.swf
)
\?.*?\bmediaId=
)
(?P<id>[a-z0-9]{32})
'''
_TESTS = [{
'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
'info_dict': {
'id': '3ffd040b522b4485b6d84effc750cd86',
'ext': 'mp4',
'title': 'HaP and the HB Prince Trailer',
'description': 'md5:8005b944181778e313d95c1237ddb640',
'thumbnail': r're:^https?://.*\.jpeg$',
'duration': 144.23,
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
# video with subtitles
'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335',
'md5': '2fa3bad9ac321e23860ca23bc2c69e3d',
'info_dict': {
'id': 'a3e00274d4564ec4a9b29b9466432335',
'ext': 'mp4',
'title': '3Play Media Overview Video',
'thumbnail': r're:^https?://.*\.jpeg$',
'duration': 78.101,
# TODO: extract all languages that were accessible via API
# 'subtitles': 'mincount:9',
'subtitles': 'mincount:1',
},
}, {
'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
'only_matching': True,
}]
_PLAYLIST_SERVICE_PATH = 'media'
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url)
source_url = smuggled_data.get('source_url')
self._initialize_geo_bypass({
'countries': smuggled_data.get('geo_countries'),
})
pc, mobile = self._extract(
video_id, 'getPlaylistByMediaId',
'getMobilePlaylistByMediaId', source_url)
return self._extract_info(pc, mobile, 0, source_url)
class LimelightChannelIE(LimelightBaseIE):
IE_NAME = 'limelight:channel'
_VALID_URL = r'''(?x)
(?:
limelight:channel:|
https?://
(?:
link\.videoplatform\.limelight\.com/media/|
assets\.delvenetworks\.com/player/loader\.swf
)
\?.*?\bchannelId=
)
(?P<id>[a-z0-9]{32})
'''
_TESTS = [{
'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
'info_dict': {
'id': 'ab6a524c379342f9b23642917020c082',
'title': 'Javascript Sample Code',
'description': 'Javascript Sample Code - http://www.delvenetworks.com/sample-code/playerCode-demo.html',
},
'playlist_mincount': 3,
}, {
'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082',
'only_matching': True,
}]
_PLAYLIST_SERVICE_PATH = 'channel'
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
channel_id = self._match_id(url)
source_url = smuggled_data.get('source_url')
pc, mobile = self._extract(
channel_id, 'getPlaylistByChannelId',
'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
source_url)
entries = [
self._extract_info(pc, mobile, i, source_url)
for i in range(len(pc['playlistItems']))]
return self.playlist_result(
entries, channel_id, pc.get('title'), mobile.get('description'))
class LimelightChannelListIE(LimelightBaseIE):
IE_NAME = 'limelight:channel_list'
_VALID_URL = r'''(?x)
(?:
limelight:channel_list:|
https?://
(?:
link\.videoplatform\.limelight\.com/media/|
assets\.delvenetworks\.com/player/loader\.swf
)
\?.*?\bchannelListId=
)
(?P<id>[a-z0-9]{32})
'''
_TESTS = [{
'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
'info_dict': {
'id': '301b117890c4465c8179ede21fd92e2b',
'title': 'Website - Hero Player',
},
'playlist_mincount': 2,
}, {
'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b',
'only_matching': True,
}]
_PLAYLIST_SERVICE_PATH = 'channel_list'
def _real_extract(self, url):
channel_list_id = self._match_id(url)
channel_list = self._call_playlist_service(
channel_list_id, 'getMobileChannelListById')
entries = [
self.url_result('limelight:channel:{}'.format(channel['id']), 'LimelightChannel')
for channel in channel_list['channelList']]
return self.playlist_result(
entries, channel_list_id, channel_list['title'])

View File

@ -3,6 +3,7 @@
class LiveJournalIE(InfoExtractor): class LiveJournalIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P<id>\d+)' _VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272', 'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272',

View File

@ -134,7 +134,7 @@ class LRTRadioIE(LRTBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id, path = self._match_valid_url(url).group('id', 'path') video_id, path = self._match_valid_url(url).group('id', 'path')
media = self._download_json( media = self._download_json(
'https://www.lrt.lt/radioteka/api/media', video_id, 'https://www.lrt.lt/rest-api/media', video_id,
query={'url': f'/mediateka/irasas/{video_id}/{path}'}) query={'url': f'/mediateka/irasas/{video_id}/{path}'})
return { return {

View File

@ -16,91 +16,103 @@ class MainStreamingIE(InfoExtractor):
_EMBED_REGEX = [rf'<iframe[^>]+?src=["\']?(?P<url>{_VALID_URL})["\']?'] _EMBED_REGEX = [rf'<iframe[^>]+?src=["\']?(?P<url>{_VALID_URL})["\']?']
IE_DESC = 'MainStreaming Player' IE_DESC = 'MainStreaming Player'
_TESTS = [ _TESTS = [{
{ # Live stream offline, has alternative content id
# Live stream offline, has alternative content id 'url': 'https://webtools-e18da6642b684f8aa9ae449862783a56.msvdn.net/embed/53EN6GxbWaJC',
'url': 'https://webtools-e18da6642b684f8aa9ae449862783a56.msvdn.net/embed/53EN6GxbWaJC', 'info_dict': {
'info_dict': { 'id': '53EN6GxbWaJC',
'id': '53EN6GxbWaJC', 'title': 'Diretta homepage 2021-12-31 12:00',
'title': 'Diretta homepage 2021-12-31 12:00', 'description': '',
'description': '', 'live_status': 'was_live',
'live_status': 'was_live', 'ext': 'mp4',
'ext': 'mp4', 'thumbnail': r're:https?://[\w-]+\.msvdn\.net/image/\w+/poster',
'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
},
'expected_warnings': [
'Ignoring alternative content ID: WDAF1KOWUpH3',
'MainStreaming said: Live event is OFFLINE',
],
'skip': 'live stream offline',
}, {
# playlist
'url': 'https://webtools-e18da6642b684f8aa9ae449862783a56.msvdn.net/embed/WDAF1KOWUpH3',
'info_dict': {
'id': 'WDAF1KOWUpH3',
'title': 'Playlist homepage',
},
'playlist_mincount': 2,
}, {
# livestream
'url': 'https://webtools-859c1818ed614cc5b0047439470927b0.msvdn.net/embed/tDoFkZD3T1Lw',
'info_dict': {
'id': 'tDoFkZD3T1Lw',
'title': r're:Class CNBC Live \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'live_status': 'is_live',
'ext': 'mp4',
'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
},
'skip': 'live stream',
}, {
'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/EUlZfGWkGpOd?autoPlay=false',
'info_dict': {
'id': 'EUlZfGWkGpOd',
'title': 'La Settimana ',
'description': '03 Ottobre ore 02:00',
'ext': 'mp4',
'live_status': 'not_live',
'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
'duration': 1512,
},
}, {
# video without webtools- prefix
'url': 'https://f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/MfuWmzL2lGkA?autoplay=false&T=1635860445',
'info_dict': {
'id': 'MfuWmzL2lGkA',
'title': 'TG Mattina',
'description': '06 Ottobre ore 08:00',
'ext': 'mp4',
'live_status': 'not_live',
'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
'duration': 789.04,
},
}, {
# always-on livestream with DVR
'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/HVvPMzy',
'info_dict': {
'id': 'HVvPMzy',
'title': r're:^Diretta LaC News24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'description': 'canale all news',
'live_status': 'is_live',
'ext': 'mp4',
'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
},
'params': {
'skip_download': True,
},
}, {
# no host
'url': 'https://webtools.msvdn.net/embed/MfuWmzL2lGkA',
'only_matching': True,
}, {
'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/amp_embed/tDoFkZD3T1Lw',
'only_matching': True,
}, {
'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/content/tDoFkZD3T1Lw#',
'only_matching': True,
}, },
] 'expected_warnings': [
'Ignoring alternative content ID: WDAF1KOWUpH3',
'MainStreaming said: Live event is OFFLINE',
],
'skip': 'live stream offline',
}, {
# playlist
'url': 'https://webtools-e18da6642b684f8aa9ae449862783a56.msvdn.net/embed/WDAF1KOWUpH3',
'info_dict': {
'id': 'WDAF1KOWUpH3',
'title': 'Playlist homepage',
},
'playlist_mincount': 2,
}, {
# livestream
'url': 'https://webtools-859c1818ed614cc5b0047439470927b0.msvdn.net/embed/tDoFkZD3T1Lw',
'info_dict': {
'id': 'tDoFkZD3T1Lw',
'title': str,
'live_status': 'is_live',
'ext': 'mp4',
'thumbnail': r're:https?://[\w-]+\.msvdn\.net/image/\w+/poster',
},
'skip': 'live stream',
}, {
'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/EUlZfGWkGpOd?autoPlay=false',
'info_dict': {
'id': 'EUlZfGWkGpOd',
'title': 'La Settimana ',
'description': '03 Ottobre ore 02:00',
'ext': 'mp4',
'live_status': 'not_live',
'thumbnail': r're:https?://[\w-]+\.msvdn\.net/image/\w+/poster',
'duration': 1512,
},
'skip': 'Invalid URL',
}, {
# video without webtools- prefix
'url': 'https://f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/MfuWmzL2lGkA?autoplay=false&T=1635860445',
'info_dict': {
'id': 'MfuWmzL2lGkA',
'title': 'TG Mattina',
'description': '06 Ottobre ore 08:00',
'ext': 'mp4',
'live_status': 'not_live',
'thumbnail': r're:https?://[\w-]+\.msvdn\.net/image/\w+/poster',
'duration': 789.04,
},
'skip': 'Invalid URL',
}, {
# always-on livestream with DVR
'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/HVvPMzy',
'info_dict': {
'id': 'HVvPMzy',
'title': str,
'description': 'canale all news',
'live_status': 'is_live',
'ext': 'mp4',
'thumbnail': r're:https?://[\w-]+\.msvdn\.net/image/\w+/poster',
},
'params': {'skip_download': 'm3u8'},
}, {
# no host
'url': 'https://webtools.msvdn.net/embed/MfuWmzL2lGkA',
'only_matching': True,
}, {
'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/amp_embed/tDoFkZD3T1Lw',
'only_matching': True,
}, {
'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/content/tDoFkZD3T1Lw#',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
# FIXME: Embed detection
'url': 'https://www.lacplay.it/video/in-evidenza_728/lac-storie-p-250-i-santi-pietro-e-paolo_77297/',
'info_dict': {
'id': 'u7kiX5DUaHYr',
'ext': 'mp4',
'title': 'I Santi Pietro e Paolo',
'description': 'md5:ff6be24916ba6b9ae990bf5f3df4911e',
'duration': 1700.0,
'thumbnail': r're:https?://.+',
'tags': '06/07/2025',
'live_status': 'not_live',
},
}]
def _playlist_entries(self, host, playlist_content): def _playlist_entries(self, host, playlist_content):
for entry in playlist_content: for entry in playlist_content:

View File

@ -42,6 +42,7 @@ class MedialaanIE(InfoExtractor):
'id': '193993', 'id': '193993',
'ext': 'mp4', 'ext': 'mp4',
'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?', 'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
'thumbnail': r're:https?://images\.mychannels\.video/imgix/.+',
'timestamp': 1611663540, 'timestamp': 1611663540,
'upload_date': '20210126', 'upload_date': '20210126',
'duration': 238, 'duration': 238,
@ -68,6 +69,19 @@ class MedialaanIE(InfoExtractor):
'url': 'https://embed.mychannels.video/embed/193993', 'url': 'https://embed.mychannels.video/embed/193993',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
'url': 'https://www.demorgen.be/snelnieuws/tom-waes-promoot-alcoholtesten-op-werchter-ik-ben-de-laatste-persoon-die-met-de-vinger-moet-wijzen~b7457c0d/',
'info_dict': {
'id': '1576607',
'ext': 'mp4',
'title': 'Tom Waes blaastest',
'duration': 62,
'thumbnail': r're:https?://video-images\.persgroep\.be/aws_generated.+\.jpg',
'timestamp': 1751730795,
'upload_date': '20250705',
},
'params': {'extractor_args': {'generic': {'impersonate': ['chrome']}}},
}]
@classmethod @classmethod
def _extract_embed_urls(cls, url, webpage): def _extract_embed_urls(cls, url, webpage):

View File

@ -31,10 +31,9 @@ class MegaTVComIE(MegaTVComBaseIE):
IE_NAME = 'megatvcom' IE_NAME = 'megatvcom'
IE_DESC = 'megatv.com videos' IE_DESC = 'megatv.com videos'
_VALID_URL = r'https?://(?:www\.)?megatv\.com/(?:\d{4}/\d{2}/\d{2}|[^/]+/(?P<id>\d+))/(?P<slug>[^/]+)' _VALID_URL = r'https?://(?:www\.)?megatv\.com/(?:\d{4}/\d{2}/\d{2}|[^/]+/(?P<id>\d+))/(?P<slug>[^/]+)'
_TESTS = [{ _TESTS = [{
# FIXME: Unable to extract article id
'url': 'https://www.megatv.com/2021/10/23/egkainia-gia-ti-nea-skini-omega-tou-dimotikou-theatrou-peiraia/', 'url': 'https://www.megatv.com/2021/10/23/egkainia-gia-ti-nea-skini-omega-tou-dimotikou-theatrou-peiraia/',
'md5': '6546a1a37fff0dd51c9dce5f490b7d7d',
'info_dict': { 'info_dict': {
'id': '520979', 'id': '520979',
'ext': 'mp4', 'ext': 'mp4',
@ -43,20 +42,19 @@ class MegaTVComIE(MegaTVComBaseIE):
'timestamp': 1634975747, 'timestamp': 1634975747,
'upload_date': '20211023', 'upload_date': '20211023',
'display_id': 'egkainia-gia-ti-nea-skini-omega-tou-dimotikou-theatrou-peiraia', 'display_id': 'egkainia-gia-ti-nea-skini-omega-tou-dimotikou-theatrou-peiraia',
'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/10/ΠΕΙΡΑΙΑΣ-1024x450.jpg', 'thumbnail': r're:https?://www\.megatv\.com/wp-content/uploads/.+\.jpg',
}, },
}, { }, {
'url': 'https://www.megatv.com/tvshows/527800/epeisodio-65-12/', 'url': 'https://www.megatv.com/tvshows/527800/epeisodio-65-12/',
'md5': 'cba2085d45c1abeb8e7e9b7e1d6c0072',
'info_dict': { 'info_dict': {
'id': '527800', 'id': '527800',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:fc322cb51f682eecfe2f54cd5ab3a157', 'title': 'Η Γη της Ελιάς: Επεισόδιο 65 - A\' ΚΥΚΛΟΣ ',
'description': 'md5:b2b7ed3690a78f2a0156eb790fdc00df', 'description': 'md5:b2b7ed3690a78f2a0156eb790fdc00df',
'timestamp': 1636048859, 'timestamp': 1636048859,
'upload_date': '20211104', 'upload_date': '20211104',
'display_id': 'epeisodio-65-12', 'display_id': 'epeisodio-65-12',
'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/11/16-1-1.jpg', 'thumbnail': r're:https?://www\.megatv\.com/wp-content/uploads/.+\.jpg',
}, },
}] }]
@ -104,8 +102,8 @@ class MegaTVComEmbedIE(MegaTVComBaseIE):
IE_DESC = 'megatv.com embedded videos' IE_DESC = 'megatv.com embedded videos'
_VALID_URL = r'(?:https?:)?//(?:www\.)?megatv\.com/embed/?\?p=(?P<id>\d+)' _VALID_URL = r'(?:https?:)?//(?:www\.)?megatv\.com/embed/?\?p=(?P<id>\d+)'
_EMBED_REGEX = [rf'''<iframe[^>]+?src=(?P<_q1>["'])(?P<url>{_VALID_URL})(?P=_q1)'''] _EMBED_REGEX = [rf'''<iframe[^>]+?src=(?P<_q1>["'])(?P<url>{_VALID_URL})(?P=_q1)''']
_TESTS = [{ _TESTS = [{
# FIXME: Unable to extract article id
'url': 'https://www.megatv.com/embed/?p=2020520979', 'url': 'https://www.megatv.com/embed/?p=2020520979',
'md5': '6546a1a37fff0dd51c9dce5f490b7d7d', 'md5': '6546a1a37fff0dd51c9dce5f490b7d7d',
'info_dict': { 'info_dict': {
@ -119,6 +117,7 @@ class MegaTVComEmbedIE(MegaTVComBaseIE):
'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/10/ΠΕΙΡΑΙΑΣ-1024x450.jpg', 'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/10/ΠΕΙΡΑΙΑΣ-1024x450.jpg',
}, },
}, { }, {
# FIXME: Unable to extract article id
'url': 'https://www.megatv.com/embed/?p=2020534081', 'url': 'https://www.megatv.com/embed/?p=2020534081',
'md5': '6ac8b3ce4dc6120c802f780a1e6b3812', 'md5': '6ac8b3ce4dc6120c802f780a1e6b3812',
'info_dict': { 'info_dict': {
@ -132,6 +131,15 @@ class MegaTVComEmbedIE(MegaTVComBaseIE):
'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/11/Capture-266.jpg', 'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/11/Capture-266.jpg',
}, },
}] }]
_WEBPAGE_TESTS = [{
# FIXME: Unable to extract article id
'url': 'https://www.in.gr/2021/12/18/greece/apokalypsi-mega-poios-parelave-tin-ereyna-tsiodra-ek-merous-tis-kyvernisis-o-prothypourgos-telika-gnorize/',
'info_dict': {
'id': 'apokalypsi-mega-poios-parelave-tin-ereyna-tsiodra-ek-merous-tis-kyvernisis-o-prothypourgos-telika-gnorize',
'title': 'md5:5e569cf996ec111057c2764ec272848f',
},
'playlist_count': 2,
}]
def _match_canonical_url(self, webpage): def _match_canonical_url(self, webpage):
LINK_RE = r'''(?x) LINK_RE = r'''(?x)

View File

@ -18,7 +18,7 @@ class MirrativIE(MirrativBaseIE):
IE_NAME = 'mirrativ' IE_NAME = 'mirrativ'
_VALID_URL = r'https?://(?:www\.)?mirrativ\.com/live/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?mirrativ\.com/live/(?P<id>[^/?#&]+)'
TESTS = [{ _TESTS = [{
'url': 'https://mirrativ.com/live/UQomuS7EMgHoxRHjEhNiHw', 'url': 'https://mirrativ.com/live/UQomuS7EMgHoxRHjEhNiHw',
'info_dict': { 'info_dict': {
'id': 'UQomuS7EMgHoxRHjEhNiHw', 'id': 'UQomuS7EMgHoxRHjEhNiHw',

View File

@ -79,7 +79,7 @@ class MiTeleIE(TelecincoBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_akamai_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
pre_player = self._search_json( pre_player = self._search_json(
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=', r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=',
webpage, 'Pre Player', display_id)['prePlayer'] webpage, 'Pre Player', display_id)['prePlayer']

134
yt_dlp/extractor/mixlr.py Normal file
View File

@ -0,0 +1,134 @@
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import int_or_none, parse_iso8601, url_or_none, urlhandle_detect_ext
from ..utils.traversal import traverse_obj
class MixlrIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<username>[\w-]+)\.mixlr\.com/events/(?P<id>\d+)'
_TESTS = [{
'url': 'https://suncity-104-9fm.mixlr.com/events/4387115',
'info_dict': {
'id': '4387115',
'ext': 'mp3',
'title': r're:SUNCITY 104.9FM\'s live audio \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
'uploader': 'suncity-104-9fm',
'like_count': int,
'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/cd5b34d05fa2cee72d80477724a2f02e.png',
'timestamp': 1751943773,
'upload_date': '20250708',
'release_timestamp': 1751943764,
'release_date': '20250708',
'live_status': 'is_live',
},
}, {
'url': 'https://brcountdown.mixlr.com/events/4395480',
'info_dict': {
'id': '4395480',
'ext': 'aac',
'title': r're:Beats Revolution Countdown Episodio 461 \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
'description': 'md5:5cacd089723f7add3f266bd588315bb3',
'uploader': 'brcountdown',
'like_count': int,
'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/c48727a59f690b87a55d47d123ba0d6d.jpg',
'timestamp': 1752354007,
'upload_date': '20250712',
'release_timestamp': 1752354000,
'release_date': '20250712',
'live_status': 'is_live',
},
}, {
'url': 'https://www.brcountdown.mixlr.com/events/4395480',
'only_matching': True,
}]
def _real_extract(self, url):
username, event_id = self._match_valid_url(url).group('username', 'id')
broadcast_info = self._download_json(
f'https://api.mixlr.com/v3/channels/{username}/events/{event_id}', event_id)
formats = []
format_url = traverse_obj(
broadcast_info, ('included', 0, 'attributes', 'progressive_stream_url', {url_or_none}))
if format_url:
urlh = self._request_webpage(
HEADRequest(format_url), event_id, fatal=False, note='Checking stream')
if urlh and urlh.status == 200:
ext = urlhandle_detect_ext(urlh)
if ext == 'octet-stream':
self.report_warning(
'The server did not return a valid file extension for the stream URL. '
'Assuming an mp3 stream; postprocessing may fail if this is incorrect')
ext = 'mp3'
formats.append({
'url': format_url,
'ext': ext,
'vcodec': 'none',
})
release_timestamp = traverse_obj(
broadcast_info, ('data', 'attributes', 'starts_at', {str}))
if not formats and release_timestamp:
self.raise_no_formats(f'This event will start at {release_timestamp}', expected=True)
return {
'id': event_id,
'uploader': username,
'formats': formats,
'release_timestamp': parse_iso8601(release_timestamp),
**traverse_obj(broadcast_info, ('included', 0, 'attributes', {
'title': ('title', {str}),
'timestamp': ('started_at', {parse_iso8601}),
'concurrent_view_count': ('concurrent_view_count', {int_or_none}),
'like_count': ('heart_count', {int_or_none}),
'is_live': ('live', {bool}),
})),
**traverse_obj(broadcast_info, ('data', 'attributes', {
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('started_at', {parse_iso8601}),
'concurrent_view_count': ('concurrent_view_count', {int_or_none}),
'like_count': ('heart_count', {int_or_none}),
'thumbnail': ('artwork_url', {url_or_none}),
'uploader_id': ('broadcaster_id', {str}),
})),
}
class MixlrRecoringIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<username>[\w-]+)\.mixlr\.com/recordings/(?P<id>\d+)'
_TESTS = [{
'url': 'https://biblewayng.mixlr.com/recordings/2375193',
'info_dict': {
'id': '2375193',
'ext': 'mp3',
'title': "God's Jewels and Their Resting Place Bro. Adeniji",
'description': 'Preached February 21, 2024 in the evening',
'uploader_id': '8659190',
'duration': 10968,
'thumbnail': r're:https://imagecdn\.mixlr\.com/cdn-cgi/image/[^/?#]+/ceca120ef707f642abeea6e29cd74238.jpg',
'timestamp': 1708544542,
'upload_date': '20240221',
},
}]
def _real_extract(self, url):
username, recording_id = self._match_valid_url(url).group('username', 'id')
recording_info = self._download_json(
f'https://api.mixlr.com/v3/channels/{username}/recordings/{recording_id}', recording_id)
return {
'id': recording_id,
**traverse_obj(recording_info, ('data', 'attributes', {
'ext': ('file_format', {str}),
'url': ('url', {url_or_none}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'duration': ('duration', {int_or_none}),
'thumbnail': ('artwork_url', {url_or_none}),
'uploader_id': ('user_id', {str}),
})),
}

View File

@ -105,89 +105,85 @@ class MLBIE(MLBBaseIE):
r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1', r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
r'data-video-link=["\'](?P<url>http://m\.mlb\.com/video/[^"\']+)', r'data-video-link=["\'](?P<url>http://m\.mlb\.com/video/[^"\']+)',
] ]
_TESTS = [ _TESTS = [{
{ 'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933', 'info_dict': {
'md5': '632358dacfceec06bad823b83d21df2d', 'id': '34698933',
'info_dict': { 'ext': 'mp4',
'id': '34698933', 'title': 'Ackley\'s spectacular catch',
'ext': 'mp4', 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
'title': "Ackley's spectacular catch", 'duration': 66,
'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0', 'timestamp': 1405995000,
'duration': 66, 'upload_date': '20140722',
'timestamp': 1405995000, 'thumbnail': r're:https?://.+\.jpg',
'upload_date': '20140722',
'thumbnail': r're:^https?://.*\.jpg$',
},
}, },
{ }, {
'url': 'https://www.mlb.com/video/stanton-prepares-for-derby/c-34496663', 'url': 'https://www.mlb.com/video/stanton-prepares-for-derby/c-34496663',
'md5': 'bf2619bf9cacc0a564fc35e6aeb9219f', 'info_dict': {
'info_dict': { 'id': '34496663',
'id': '34496663', 'ext': 'mp4',
'ext': 'mp4', 'title': 'Stanton prepares for Derby',
'title': 'Stanton prepares for Derby', 'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57', 'duration': 46,
'duration': 46, 'timestamp': 1405120200,
'timestamp': 1405120200, 'upload_date': '20140711',
'upload_date': '20140711', 'thumbnail': r're:https?://.+\.jpg',
'thumbnail': r're:^https?://.*\.jpg$',
},
}, },
{ }, {
'url': 'https://www.mlb.com/video/cespedes-repeats-as-derby-champ/c-34578115', 'url': 'https://www.mlb.com/video/cespedes-repeats-as-derby-champ/c-34578115',
'md5': '99bb9176531adc600b90880fb8be9328', 'info_dict': {
'info_dict': { 'id': '34578115',
'id': '34578115', 'ext': 'mp4',
'ext': 'mp4', 'title': 'Cespedes repeats as Derby champ',
'title': 'Cespedes repeats as Derby champ', 'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
'description': 'md5:08df253ce265d4cf6fb09f581fafad07', 'duration': 488,
'duration': 488, 'timestamp': 1405414336,
'timestamp': 1405414336, 'upload_date': '20140715',
'upload_date': '20140715', 'thumbnail': r're:https?://.+\.jpg',
'thumbnail': r're:^https?://.*\.jpg$',
},
}, },
{ }, {
'url': 'https://www.mlb.com/video/bautista-on-home-run-derby/c-34577915', 'url': 'https://www.mlb.com/video/bautista-on-home-run-derby/c-34577915',
'md5': 'da8b57a12b060e7663ee1eebd6f330ec', 'info_dict': {
'info_dict': { 'id': '34577915',
'id': '34577915', 'ext': 'mp4',
'ext': 'mp4', 'title': 'Bautista on Home Run Derby',
'title': 'Bautista on Home Run Derby', 'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
'description': 'md5:b80b34031143d0986dddc64a8839f0fb', 'duration': 52,
'duration': 52, 'timestamp': 1405405122,
'timestamp': 1405405122, 'upload_date': '20140715',
'upload_date': '20140715', 'thumbnail': r're:https?://.+\.jpg',
'thumbnail': r're:^https?://.*\.jpg$',
},
}, },
{ }, {
'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694', 'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
'only_matching': True,
}, {
'url': 'http://mlb.mlb.com/shared/video/embed/embed.html?content_id=36599553',
'only_matching': True,
}, {
'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
'only_matching': True,
}, {
'url': 'https://www.mlb.com/cardinals/video/piscottys-great-sliding-catch/c-51175783',
'only_matching': True,
}, {
# From http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer
'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.mlbdailydish.com/2013/2/25/4028804/mlb-classic-video-vault-open-watch-embed-share',
'info_dict': {
'id': 'mlb-classic-video-vault-open-watch-embed-share',
'title': 'MLB Classic vault is open! Don\'t avert your eyes!',
'age_limit': 0,
'description': 'All the video needed to hold you over until real baseball starts next month.',
'thumbnail': r're:https?://cdn\.vox-cdn\.com/thumbor/.+\.jpg',
}, },
{ 'playlist_count': 3,
'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb', }]
'only_matching': True,
},
{
'url': 'http://mlb.mlb.com/shared/video/embed/embed.html?content_id=36599553',
'only_matching': True,
},
{
'url': 'http://mlb.mlb.com/es/video/play.jsp?content_id=36599553',
'only_matching': True,
},
{
'url': 'https://www.mlb.com/cardinals/video/piscottys-great-sliding-catch/c-51175783',
'only_matching': True,
},
{
# From http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer
'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
'only_matching': True,
},
]
_TIMESTAMP_KEY = 'date' _TIMESTAMP_KEY = 'date'
@staticmethod @staticmethod
@ -215,20 +211,19 @@ def _download_video_data(self, display_id):
class MLBVideoIE(MLBBaseIE): class MLBVideoIE(MLBBaseIE):
_VALID_URL = r'https?://(?:www\.)?mlb\.com/(?:[^/]+/)*video/(?P<id>[^/?&#]+)' _VALID_URL = r'https?://(?:www\.)?mlb\.com/(?:[^/]+/)*video/(?P<id>[^/?&#]+)'
_TEST = { _TESTS = [{
'url': 'https://www.mlb.com/mariners/video/ackley-s-spectacular-catch-c34698933', 'url': 'https://www.mlb.com/mariners/video/ackley-s-spectacular-catch-c34698933',
'md5': '632358dacfceec06bad823b83d21df2d',
'info_dict': { 'info_dict': {
'id': 'c04a8863-f569-42e6-9f87-992393657614', 'id': 'c04a8863-f569-42e6-9f87-992393657614',
'ext': 'mp4', 'ext': 'mp4',
'title': "Ackley's spectacular catch", 'title': 'Ackley\'s spectacular catch',
'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0', 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
'duration': 66, 'duration': 66,
'timestamp': 1405995000, 'timestamp': 1405995000,
'upload_date': '20140722', 'upload_date': '20140722',
'thumbnail': r're:^https?://.+', 'thumbnail': r're:https?://.+',
}, },
} }]
_TIMESTAMP_KEY = 'timestamp' _TIMESTAMP_KEY = 'timestamp'
@classmethod @classmethod
@ -457,12 +452,9 @@ def _extract_formats_and_subtitles(self, broadcast, video_id):
self.report_warning(f'No formats available for {format_id} broadcast; skipping') self.report_warning(f'No formats available for {format_id} broadcast; skipping')
return [], {} return [], {}
cdn_headers = {'x-cdn-token': token}
fmts, subs = self._extract_m3u8_formats_and_subtitles( fmts, subs = self._extract_m3u8_formats_and_subtitles(
m3u8_url.replace(f'/{token}/', '/'), video_id, 'mp4', m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
m3u8_id=format_id, fatal=False, headers=cdn_headers)
for fmt in fmts: for fmt in fmts:
fmt['http_headers'] = cdn_headers
fmt.setdefault('format_note', join_nonempty(feed, medium, delim=' ')) fmt.setdefault('format_note', join_nonempty(feed, medium, delim=' '))
fmt.setdefault('language', language) fmt.setdefault('language', language)
if fmt.get('vcodec') == 'none' and fmt['language'] == 'en': if fmt.get('vcodec') == 'none' and fmt['language'] == 'en':

View File

@ -1,4 +1,5 @@
import re import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -38,7 +39,7 @@ class N1InfoIIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:\w+\.)?n1info\.\w+|nova\.rs)/(?:[^/?#]+/){1,2}(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:(?:\w+\.)?n1info\.\w+|nova\.rs)/(?:[^/?#]+/){1,2}(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
# YouTube embedded # YouTube embedded
'url': 'https://rs.n1info.com/sport-klub/tenis/kako-je-djokovic-propustio-istorijsku-priliku-video/', 'url': 'https://sportklub.n1info.rs/tenis/us-open/glava-telo-igra-kako-je-novak-ispustio-istorijsku-sansu/',
'md5': '987ce6fd72acfecc453281e066b87973', 'md5': '987ce6fd72acfecc453281e066b87973',
'info_dict': { 'info_dict': {
'id': 'L5Hd4hQVUpk', 'id': 'L5Hd4hQVUpk',
@ -67,36 +68,24 @@ class N1InfoIIE(InfoExtractor):
'playable_in_embed': True, 'playable_in_embed': True,
'availability': 'public', 'availability': 'public',
'live_status': 'not_live', 'live_status': 'not_live',
'media_type': 'video',
}, },
}, { }, {
'url': 'https://rs.n1info.com/vesti/djilas-los-plan-za-metro-nece-resiti-nijedan-saobracajni-problem/', 'url': 'https://n1info.si/novice/svet/v-srbiji-samo-ta-konec-tedna-vec-kot-200-pozarov/',
'info_dict': { 'info_dict': {
'id': 'bgmetrosot2409zta20210924174316682-n1info-rs-worldwide', 'id': '2182656',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Đilas: Predlog izgradnje metroa besmislen; SNS odbacuje navode', 'title': 'V Srbiji samo ta konec tedna več kot 200 požarov',
'upload_date': '20210924', 'timestamp': 1753611983,
'timestamp': 1632481347, 'upload_date': '20250727',
'thumbnail': 'http://n1info.rs/wp-content/themes/ucnewsportal-n1/dist/assets/images/placeholder-image-video.jpg', 'thumbnail': 'https://n1info.si/media/images/2025/7/1753611048_Pozar.width-1200.webp',
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://n1info.si/novice/slovenija/zadnji-dnevi-na-kopaliscu-ilirija-ilirija-ni-umrla-ubili-so-jo/',
'info_dict': {
'id': 'ljsottomazilirija3060921-n1info-si-worldwide',
'ext': 'mp4',
'title': 'Zadnji dnevi na kopališču Ilirija: “Ilirija ni umrla, ubili so jo”',
'timestamp': 1632567630,
'upload_date': '20210925',
'thumbnail': 'https://n1info.si/wp-content/uploads/2021/09/06/1630945843-tomaz3.png',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
# Reddit embedded # Reddit embedded
'url': 'https://ba.n1info.com/lifestyle/vucic-bolji-od-tita-ako-izgubi-ja-cu-da-crknem-jugoslavija-je-gotova/', 'url': 'https://nova.rs/vesti/drustvo/ako-vucic-izgubi-izbore-ja-cu-da-crknem-jugoslavija-je-gotova/',
'info_dict': { 'info_dict': {
'id': '2wmfee9eycp71', 'id': '2wmfee9eycp71',
'ext': 'mp4', 'ext': 'mp4',
@ -113,9 +102,6 @@ class N1InfoIIE(InfoExtractor):
'duration': 134, 'duration': 134,
'thumbnail': 'https://external-preview.redd.it/5nmmawSeGx60miQM3Iq-ueC9oyCLTLjjqX-qqY8uRsc.png?format=pjpg&auto=webp&s=2f973400b04d23f871b608b178e47fc01f9b8f1d', 'thumbnail': 'https://external-preview.redd.it/5nmmawSeGx60miQM3Iq-ueC9oyCLTLjjqX-qqY8uRsc.png?format=pjpg&auto=webp&s=2f973400b04d23f871b608b178e47fc01f9b8f1d',
}, },
'params': {
'skip_download': True,
},
}, { }, {
'url': 'https://nova.rs/vesti/politika/zaklina-tatalovic-ani-brnabic-pricate-lazi-video/', 'url': 'https://nova.rs/vesti/politika/zaklina-tatalovic-ani-brnabic-pricate-lazi-video/',
'info_dict': { 'info_dict': {
@ -126,6 +112,9 @@ class N1InfoIIE(InfoExtractor):
'timestamp': 1635861677, 'timestamp': 1635861677,
'thumbnail': 'https://nova.rs/wp-content/uploads/2021/11/02/1635860298-TNJG_Ana_Brnabic_i_Zaklina_Tatalovic_100_dana_Vlade_GP.jpg', 'thumbnail': 'https://nova.rs/wp-content/uploads/2021/11/02/1635860298-TNJG_Ana_Brnabic_i_Zaklina_Tatalovic_100_dana_Vlade_GP.jpg',
}, },
'params': {
'skip_download': True,
},
}, { }, {
'url': 'https://n1info.rs/vesti/cuta-biti-u-kosovskoj-mitrovici-znaci-da-te-docekaju-eksplozivnim-napravama/', 'url': 'https://n1info.rs/vesti/cuta-biti-u-kosovskoj-mitrovici-znaci-da-te-docekaju-eksplozivnim-napravama/',
'info_dict': { 'info_dict': {
@ -155,12 +144,15 @@ def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h1[^>]+>(.+?)</h1>', webpage, 'title') title = self._og_search_title(webpage) or self._html_extract_title(webpage)
timestamp = unified_timestamp(self._html_search_meta('article:published_time', webpage)) timestamp = unified_timestamp(self._og_search_property('published_time', webpage, default=None) or self._html_search_meta('article:published_time', webpage))
plugin_data = re.findall(r'\$bp\("(?:Brid|TargetVideo)_\d+",\s(.+)\);', webpage) plugin_data = re.findall(r'\$bp\("(?:Brid|TargetVideo)_\d+",\s(.+)\);', webpage)
entries = [] entries = []
if plugin_data: if plugin_data:
site_id = self._html_search_regex(r'site:(\d+)', webpage, 'site id') site_id = self._html_search_regex(r'site:(\d+)', webpage, 'site id', default=None)
if site_id is None:
site_id = self._search_regex(
r'partners/(\d+)', self._html_search_meta('contentUrl', webpage, fatal=True), 'site ID')
for video_data in plugin_data: for video_data in plugin_data:
video_id = self._parse_json(video_data, title)['video'] video_id = self._parse_json(video_data, title)['video']
entries.append({ entries.append({
@ -191,10 +183,13 @@ def _real_extract(self, url):
for embedded_video in embedded_videos: for embedded_video in embedded_videos:
video_data = extract_attributes(embedded_video) video_data = extract_attributes(embedded_video)
url = video_data.get('src') or '' url = video_data.get('src') or ''
if url.startswith('https://www.youtube.com'): hostname = urllib.parse.urlparse(url).hostname
if hostname == 'www.youtube.com':
entries.append(self.url_result(url, ie='Youtube')) entries.append(self.url_result(url, ie='Youtube'))
elif url.startswith('https://www.redditmedia.com'): elif hostname == 'www.redditmedia.com':
entries.append(self.url_result(url, ie='Reddit')) entries.append(self.url_result(url, ie='Reddit'))
elif hostname == 'www.facebook.com' and 'plugins/video' in url:
entries.append(self.url_result(url, ie='FacebookPluginsVideo'))
return { return {
'_type': 'playlist', '_type': 'playlist',

View File

@ -138,95 +138,88 @@ def _extract_nbcu_video(self, url, display_id, old_ie_key=None):
class NBCIE(NBCUniversalBaseIE): class NBCIE(NBCUniversalBaseIE):
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/?#]+/video/[^/?#]+/(?P<id>\w+))' _VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/?#]+/video/[^/?#]+/(?P<id>\w+))'
_TESTS = [ _TESTS = [{
{ 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237', 'info_dict': {
'info_dict': { 'id': '2848237',
'id': '2848237', 'ext': 'mp4',
'ext': 'mp4', 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s', 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.', 'timestamp': 1424246400,
'timestamp': 1424246400, 'upload_date': '20150218',
'upload_date': '20150218', 'uploader': 'NBCU-COM',
'uploader': 'NBCU-COM', 'episode': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
'episode': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s', 'episode_number': 86,
'episode_number': 86, 'season': 'Season 2',
'season': 'Season 2', 'season_number': 2,
'season_number': 2, 'series': 'Tonight',
'series': 'Tonight', 'duration': 236.504,
'duration': 236.504, 'tags': 'count:2',
'tags': 'count:2', 'thumbnail': r're:https?://.+\.jpg',
'thumbnail': r're:https?://.+\.jpg', 'categories': ['Series/The Tonight Show Starring Jimmy Fallon'],
'categories': ['Series/The Tonight Show Starring Jimmy Fallon'], 'media_type': 'Full Episode',
'media_type': 'Full Episode', 'age_limit': 14,
'age_limit': 14, '_old_archive_ids': ['theplatform 2848237'],
'_old_archive_ids': ['theplatform 2848237'],
},
'params': {
'skip_download': 'm3u8',
},
}, },
{ 'params': {
'url': 'https://www.nbc.com/the-golden-globe-awards/video/oprah-winfrey-receives-cecil-b-de-mille-award-at-the-2018-golden-globes/3646439', 'skip_download': 'm3u8',
'info_dict': {
'id': '3646439',
'ext': 'mp4',
'title': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
'episode': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
'episode_number': 1,
'season': 'Season 75',
'season_number': 75,
'series': 'Golden Globes',
'description': 'Oprah Winfrey receives the Cecil B. de Mille Award at the 75th Annual Golden Globe Awards.',
'uploader': 'NBCU-COM',
'upload_date': '20180107',
'timestamp': 1515312000,
'duration': 569.703,
'tags': 'count:8',
'thumbnail': r're:https?://.+\.jpg',
'media_type': 'Highlight',
'age_limit': 0,
'categories': ['Series/The Golden Globe Awards'],
'_old_archive_ids': ['theplatform 3646439'],
},
'params': {
'skip_download': 'm3u8',
},
}, },
{ }, {
# Needs to be extracted from webpage instead of GraphQL 'url': 'https://www.nbc.com/the-golden-globe-awards/video/oprah-winfrey-receives-cecil-b-de-mille-award-at-the-2018-golden-globes/3646439',
'url': 'https://www.nbc.com/paris2024/video/ali-truwit-found-purpose-pool-after-her-life-changed/para24_sww_alitruwittodayshow_240823', 'info_dict': {
'info_dict': { 'id': '3646439',
'id': 'para24_sww_alitruwittodayshow_240823', 'ext': 'mp4',
'ext': 'mp4', 'title': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
'title': 'Ali Truwit found purpose in the pool after her life changed', 'episode': 'Oprah Winfrey Receives Cecil B. de Mille Award at the 2018 Golden Globes',
'description': 'md5:c16d7489e1516593de1cc5d3f39b9bdb', 'episode_number': 1,
'uploader': 'NBCU-SPORTS', 'season': 'Season 75',
'duration': 311.077, 'season_number': 75,
'thumbnail': r're:https?://.+\.jpg', 'series': 'Golden Globes',
'episode': 'Ali Truwit found purpose in the pool after her life changed', 'description': 'Oprah Winfrey receives the Cecil B. de Mille Award at the 75th Annual Golden Globe Awards.',
'timestamp': 1724435902.0, 'uploader': 'NBCU-COM',
'upload_date': '20240823', 'upload_date': '20180107',
'_old_archive_ids': ['theplatform para24_sww_alitruwittodayshow_240823'], 'timestamp': 1515312000,
}, 'duration': 569.703,
'params': { 'tags': 'count:8',
'skip_download': 'm3u8', 'thumbnail': r're:https?://.+\.jpg',
}, 'media_type': 'Highlight',
'age_limit': 0,
'categories': ['Series/The Golden Globe Awards'],
'_old_archive_ids': ['theplatform 3646439'],
}, },
{ 'params': {
'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978', 'skip_download': 'm3u8',
'only_matching': True,
}, },
{ }, {
'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310', # Needs to be extracted from webpage instead of GraphQL
'only_matching': True, 'url': 'https://www.nbc.com/paris2024/video/ali-truwit-found-purpose-pool-after-her-life-changed/para24_sww_alitruwittodayshow_240823',
'info_dict': {
'id': 'para24_sww_alitruwittodayshow_240823',
'ext': 'mp4',
'title': 'Ali Truwit found purpose in the pool after her life changed',
'description': 'md5:c16d7489e1516593de1cc5d3f39b9bdb',
'uploader': 'NBCU-SPORTS',
'duration': 311.077,
'thumbnail': r're:https?://.+\.jpg',
'episode': 'Ali Truwit found purpose in the pool after her life changed',
'timestamp': 1724435902.0,
'upload_date': '20240823',
'_old_archive_ids': ['theplatform para24_sww_alitruwittodayshow_240823'],
}, },
{ 'params': {
# Percent escaped url 'skip_download': 'm3u8',
'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189',
'only_matching': True,
}, },
] }, {
'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978',
'only_matching': True,
}, {
'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
'only_matching': True,
}, {
# Percent escaped url
'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189',
'only_matching': True,
}]
_SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI1Yzg2YjdkYy04NDI3LTRjNDUtOGQwZi1iNDkzYmE3MmQwYjQiLCJuYmYiOjE1Nzg3MDM2MzEsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTc4NzAzNjMxfQ.QQKIsBhAjGQTMdAqRTqhcz2Cddr4Y2hEjnSiOeKKki4nLrkDOsjQMmqeTR0hSRarraxH54wBgLvsxI7LHwKMvr7G8QpynNAxylHlQD3yhN9tFhxt4KR5wW3as02B-W2TznK9bhNWPKIyHND95Uo2Mi6rEQoq8tM9O09WPWaanE5BX_-r6Llr6dPq5F0Lpx2QOn2xYRb1T4nFxdFTNoss8GBds8OvChTiKpXMLHegLTc1OS4H_1a8tO_37jDwSdJuZ8iTyRLV4kZ2cpL6OL5JPMObD4-HQiec_dfcYgMKPiIfP9ZqdXpec2SVaCLsWEk86ZYvD97hLIQrK5rrKd1y-A' _SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI1Yzg2YjdkYy04NDI3LTRjNDUtOGQwZi1iNDkzYmE3MmQwYjQiLCJuYmYiOjE1Nzg3MDM2MzEsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTc4NzAzNjMxfQ.QQKIsBhAjGQTMdAqRTqhcz2Cddr4Y2hEjnSiOeKKki4nLrkDOsjQMmqeTR0hSRarraxH54wBgLvsxI7LHwKMvr7G8QpynNAxylHlQD3yhN9tFhxt4KR5wW3as02B-W2TznK9bhNWPKIyHND95Uo2Mi6rEQoq8tM9O09WPWaanE5BX_-r6Llr6dPq5F0Lpx2QOn2xYRb1T4nFxdFTNoss8GBds8OvChTiKpXMLHegLTc1OS4H_1a8tO_37jDwSdJuZ8iTyRLV4kZ2cpL6OL5JPMObD4-HQiec_dfcYgMKPiIfP9ZqdXpec2SVaCLsWEk86ZYvD97hLIQrK5rrKd1y-A'
def _real_extract(self, url): def _real_extract(self, url):
@ -378,6 +371,15 @@ class NBCSportsIE(InfoExtractor):
'url': 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen', 'url': 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
'info_dict': {
'id': 'ln7x1qSThw4k',
'ext': 'flv',
'title': "PFT Live: New leader in the 'new-look' defense",
},
'skip': 'Invalid URL',
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -389,7 +391,7 @@ def _real_extract(self, url):
class NBCSportsStreamIE(AdobePassIE): class NBCSportsStreamIE(AdobePassIE):
_WORKING = False _WORKING = False
_VALID_URL = r'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)' _VALID_URL = r'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
_TEST = { _TESTS = [{
'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559', 'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
'info_dict': { 'info_dict': {
'id': '206559', 'id': '206559',
@ -402,7 +404,7 @@ class NBCSportsStreamIE(AdobePassIE):
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Requires Adobe Pass Authentication', 'skip': 'Requires Adobe Pass Authentication',
} }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -449,98 +451,100 @@ class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)' _VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1'] _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1']
_TESTS = [ _TESTS = [{
{ 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880', 'md5': 'fb3dcd2d7b1dd9804305fa2fc95ab610', # md5 tends to fluctuate
'md5': 'fb3dcd2d7b1dd9804305fa2fc95ab610', # md5 tends to fluctuate 'info_dict': {
'info_dict': { 'id': '269389891880',
'id': '269389891880', 'ext': 'mp4',
'ext': 'mp4', 'title': 'How Twitter Reacted To The Snowden Interview',
'title': 'How Twitter Reacted To The Snowden Interview', 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', 'timestamp': 1401363060,
'timestamp': 1401363060, 'upload_date': '20140529',
'upload_date': '20140529', 'duration': 46.0,
'duration': 46.0, 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/140529/p_tweet_snow_140529.jpg',
'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/140529/p_tweet_snow_140529.jpg',
},
}, },
{ }, {
'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156', 'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
'md5': 'fdbf39ab73a72df5896b6234ff98518a', 'md5': 'fdbf39ab73a72df5896b6234ff98518a',
'info_dict': { 'info_dict': {
'id': '529953347624', 'id': '529953347624',
'ext': 'mp4', 'ext': 'mp4',
'title': 'FULL EPISODE: Family Business', 'title': 'FULL EPISODE: Family Business',
'description': 'md5:757988edbaae9d7be1d585eb5d55cc04', 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
},
'skip': 'This page is unavailable.',
}, },
{ 'skip': 'This page is unavailable.',
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844', }, {
'md5': '40d0e48c68896359c80372306ece0fc3', 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
'info_dict': { 'md5': '40d0e48c68896359c80372306ece0fc3',
'id': '394064451844', 'info_dict': {
'ext': 'mp4', 'id': '394064451844',
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', 'ext': 'mp4',
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
'timestamp': 1423104900, 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
'upload_date': '20150205', 'timestamp': 1423104900,
'duration': 1236.0, 'upload_date': '20150205',
'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/nn_netcast_150204.jpg', 'duration': 1236.0,
}, 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/nn_netcast_150204.jpg',
}, },
{ }, {
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456', 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
'md5': 'ffb59bcf0733dc3c7f0ace907f5e3939', 'md5': 'ffb59bcf0733dc3c7f0ace907f5e3939',
'info_dict': { 'info_dict': {
'id': 'n431456', 'id': 'n431456',
'ext': 'mp4', 'ext': 'mp4',
'title': "Volkswagen U.S. Chief: We 'Totally Screwed Up'", 'title': "Volkswagen U.S. Chief: We 'Totally Screwed Up'",
'description': 'md5:d22d1281a24f22ea0880741bb4dd6301', 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
'upload_date': '20150922', 'upload_date': '20150922',
'timestamp': 1442917800, 'timestamp': 1442917800,
'duration': 37.0, 'duration': 37.0,
'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/x_lon_vwhorn_150922.jpg', 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/__NEW/x_lon_vwhorn_150922.jpg',
},
}, },
{ }, {
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788', 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
'md5': '693d1fa21d23afcc9b04c66b227ed9ff', 'md5': '693d1fa21d23afcc9b04c66b227ed9ff',
'info_dict': { 'info_dict': {
'id': '669831235788', 'id': '669831235788',
'ext': 'mp4', 'ext': 'mp4',
'title': 'See the aurora borealis from space in stunning new NASA video', 'title': 'See the aurora borealis from space in stunning new NASA video',
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1', 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
'upload_date': '20160420', 'upload_date': '20160420',
'timestamp': 1461152093, 'timestamp': 1461152093,
'duration': 69.0, 'duration': 69.0,
'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/201604/2016-04-20T11-35-09-133Z--1280x720.jpg', 'thumbnail': 'https://media-cldnry.s-nbcnews.com/image/upload/MSNBC/Components/Video/201604/2016-04-20T11-35-09-133Z--1280x720.jpg',
},
}, },
{ 'skip': 'Invalid URL',
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924', }, {
'md5': '6d236bf4f3dddc226633ce6e2c3f814d', 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
'info_dict': { 'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
'id': '314487875924', 'info_dict': {
'ext': 'mp4', 'id': '314487875924',
'title': 'The chaotic GOP immigration vote', 'ext': 'mp4',
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.', 'title': 'The chaotic GOP immigration vote',
'thumbnail': r're:^https?://.*\.jpg$', 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
'timestamp': 1406937606, 'thumbnail': r're:https?://.+\.jpg',
'upload_date': '20140802', 'timestamp': 1406937606,
'duration': 940.0, 'upload_date': '20140802',
}, 'duration': 940.0,
}, },
{ 'skip': 'Invalid URL',
'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952', }, {
'only_matching': True, 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
'only_matching': True,
}, {
# From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
'info_dict': {
'id': 'x_dtl_oa_LettermanliftPR_160608',
'ext': 'mp4',
'title': 'David Letterman: A Preview',
}, },
{ 'skip': 'Invalid URL',
# From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html }]
'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
'only_matching': True,
},
]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -610,10 +614,10 @@ class NBCOlympicsIE(InfoExtractor):
'display_id': 'watch-final-minutes-team-usas-mens-basketball-gold', 'display_id': 'watch-final-minutes-team-usas-mens-basketball-gold',
'title': 'Watch the final minutes of Team USA\'s men\'s basketball gold', 'title': 'Watch the final minutes of Team USA\'s men\'s basketball gold',
'description': 'md5:f704f591217305c9559b23b877aa8d31', 'description': 'md5:f704f591217305c9559b23b877aa8d31',
'episode': 'Watch the final minutes of Team USA\'s men\'s basketball gold',
'uploader': 'NBCU-SPORTS', 'uploader': 'NBCU-SPORTS',
'duration': 387.053, 'duration': 387.053,
'thumbnail': r're:https://.+/.+\.jpg', 'thumbnail': r're:https?://.+\.jpg',
'chapters': [],
'timestamp': 1723346984, 'timestamp': 1723346984,
'upload_date': '20240811', 'upload_date': '20240811',
}, },
@ -652,33 +656,31 @@ class NBCOlympicsStreamIE(AdobePassIE):
_WORKING = False _WORKING = False
IE_NAME = 'nbcolympics:stream' IE_NAME = 'nbcolympics:stream'
_VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)' _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
_TESTS = [ _TESTS = [{
{ 'note': 'Tokenized m3u8 source URL',
'note': 'Tokenized m3u8 source URL', 'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11',
'url': 'https://stream.nbcolympics.com/womens-soccer-group-round-11', 'info_dict': {
'info_dict': { 'id': '2019740',
'id': '2019740', 'ext': 'mp4',
'ext': 'mp4', 'title': r"re:Women's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$",
'title': r"re:Women's Group Stage - Netherlands vs\. Brazil [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$",
},
'params': {
'skip_download': 'm3u8',
},
'skip': 'Livestream',
}, {
'note': 'Plain m3u8 source URL',
'url': 'https://stream.nbcolympics.com/gymnastics-event-finals-mens-floor-pommel-horse-womens-vault-bars',
'info_dict': {
'id': '2021729',
'ext': 'mp4',
'title': r're:Event Finals: M Floor, W Vault, M Pommel, W Uneven Bars [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
},
'params': {
'skip_download': 'm3u8',
},
'skip': 'Livestream',
}, },
] 'params': {
'skip_download': 'm3u8',
},
'skip': 'Livestream',
}, {
'note': 'Plain m3u8 source URL',
'url': 'https://stream.nbcolympics.com/gymnastics-event-finals-mens-floor-pommel-horse-womens-vault-bars',
'info_dict': {
'id': '2021729',
'ext': 'mp4',
'title': r're:Event Finals: M Floor, W Vault, M Pommel, W Uneven Bars [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
},
'params': {
'skip_download': 'm3u8',
},
'skip': 'Livestream',
}]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
@ -758,9 +760,7 @@ class NBCStationsIE(InfoExtractor):
'channel_id': 'KNBC', 'channel_id': 'KNBC',
'channel': 'nbclosangeles', 'channel': 'nbclosangeles',
}, },
'params': { 'skip': 'Site changed',
'skip_download': 'm3u8',
},
}, { }, {
'url': 'https://www.telemundoarizona.com/responde/huracan-complica-reembolso-para-televidente-de-tucson/2247002/', 'url': 'https://www.telemundoarizona.com/responde/huracan-complica-reembolso-para-televidente-de-tucson/2247002/',
'info_dict': { 'info_dict': {

View File

@ -34,7 +34,6 @@ class NetEaseMusicBaseIE(InfoExtractor):
'sky', # SVIP tier; 沉浸环绕声 (Surround Audio); flac 'sky', # SVIP tier; 沉浸环绕声 (Surround Audio); flac
) )
_API_BASE = 'http://music.163.com/api/' _API_BASE = 'http://music.163.com/api/'
_GEO_BYPASS = False
def _create_eapi_cipher(self, api_path, query_body, cookies): def _create_eapi_cipher(self, api_path, query_body, cookies):
request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':')) request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':'))
@ -64,6 +63,8 @@ def _download_eapi_json(self, path, video_id, query_body, headers={}, **kwargs):
'MUSIC_U': ('MUSIC_U', {lambda i: i.value}), 'MUSIC_U': ('MUSIC_U', {lambda i: i.value}),
}), }),
} }
if self._x_forwarded_for_ip:
headers.setdefault('X-Real-IP', self._x_forwarded_for_ip)
return self._download_json( return self._download_json(
urljoin('https://interface3.music.163.com/', f'/eapi{path}'), video_id, urljoin('https://interface3.music.163.com/', f'/eapi{path}'), video_id,
data=self._create_eapi_cipher(f'/api{path}', query_body, cookies), headers={ data=self._create_eapi_cipher(f'/api{path}', query_body, cookies), headers={

View File

@ -18,7 +18,6 @@ class NewsPicksIE(InfoExtractor):
'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】', 'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】',
'cast': 'count:4', 'cast': 'count:4',
'description': 'md5:09397aad46d6ded6487ff13f138acadf', 'description': 'md5:09397aad46d6ded6487ff13f138acadf',
'duration': 2940,
'release_date': '20220117', 'release_date': '20220117',
'release_timestamp': 1642424400, 'release_timestamp': 1642424400,
'series': 'HORIE ONE', 'series': 'HORIE ONE',
@ -35,7 +34,6 @@ class NewsPicksIE(InfoExtractor):
'title': '【検証】専門家は、KADOKAWAをどう見るか', 'title': '【検証】専門家は、KADOKAWAをどう見るか',
'cast': 'count:3', 'cast': 'count:3',
'description': 'md5:2c2d4bf77484a4333ec995d676f9a91d', 'description': 'md5:2c2d4bf77484a4333ec995d676f9a91d',
'duration': 1320,
'release_date': '20240622', 'release_date': '20240622',
'release_timestamp': 1719088080, 'release_timestamp': 1719088080,
'series': 'NPレポート', 'series': 'NPレポート',

View File

@ -3,7 +3,6 @@
import itertools import itertools
import json import json
import re import re
import time
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
@ -16,12 +15,12 @@
float_or_none, float_or_none,
int_or_none, int_or_none,
parse_bitrate, parse_bitrate,
parse_duration,
parse_iso8601, parse_iso8601,
parse_qs, parse_qs,
parse_resolution, parse_resolution,
qualities, qualities,
str_or_none, str_or_none,
time_seconds,
truncate_string, truncate_string,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
@ -38,8 +37,14 @@
class NiconicoBaseIE(InfoExtractor): class NiconicoBaseIE(InfoExtractor):
_API_BASE = 'https://nvapi.nicovideo.jp'
_BASE_URL = 'https://www.nicovideo.jp'
_GEO_BYPASS = False _GEO_BYPASS = False
_GEO_COUNTRIES = ['JP'] _GEO_COUNTRIES = ['JP']
_HEADERS = {
'X-Frontend-ID': '6',
'X-Frontend-Version': '0',
}
_LOGIN_BASE = 'https://account.nicovideo.jp' _LOGIN_BASE = 'https://account.nicovideo.jp'
_NETRC_MACHINE = 'niconico' _NETRC_MACHINE = 'niconico'
@ -99,146 +104,266 @@ class NiconicoIE(NiconicoBaseIE):
IE_NAME = 'niconico' IE_NAME = 'niconico'
IE_DESC = 'ニコニコ動画' IE_DESC = 'ニコニコ動画'
_VALID_URL = r'https?://(?:(?:embed|sp|www)\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?\d+)'
_ERROR_MAP = {
'FORBIDDEN': {
'ADMINISTRATOR_DELETE_VIDEO': 'Video unavailable, possibly removed by admins',
'CHANNEL_MEMBER_ONLY': 'Channel members only',
'DELETED_CHANNEL_VIDEO': 'Video unavailable, channel was closed',
'DELETED_COMMUNITY_VIDEO': 'Video unavailable, community deleted or missing',
'DEFAULT': 'Page unavailable, check the URL',
'HARMFUL_VIDEO': 'Sensitive content, login required',
'HIDDEN_VIDEO': 'Video unavailable, set to private',
'NOT_ALLOWED': 'No permission',
'PPV_VIDEO': 'PPV video, payment information required',
'PREMIUM_ONLY': 'Premium members only',
},
'INVALID_PARAMETER': {
'DEFAULT': 'Video unavailable, may not exist or was deleted',
},
'MAINTENANCE': {
'DEFAULT': 'Maintenance is in progress',
},
'NOT_FOUND': {
'DEFAULT': 'Video unavailable, may not exist or was deleted',
'RIGHT_HOLDER_DELETE_VIDEO': 'Removed by rights-holder request',
},
'UNAUTHORIZED': {
'DEFAULT': 'Invalid session, re-login required',
},
'UNKNOWN': {
'DEFAULT': 'Failed to fetch content',
},
}
_STATUS_MAP = {
'needs_auth': 'PPV video, payment information required',
'premium_only': 'Premium members only',
'subscriber_only': 'Channel members only',
}
_TESTS = [{ _TESTS = [{
'url': 'http://www.nicovideo.jp/watch/sm22312215', 'url': 'https://www.nicovideo.jp/watch/1173108780',
'info_dict': { 'info_dict': {
'id': 'sm22312215', 'id': 'sm9',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Big Buck Bunny', 'title': '新・豪血寺一族 -煩悩解放 - レッツゴー!陰陽師',
'thumbnail': r're:https?://.*', 'availability': 'public',
'uploader': 'takuya0301', 'channel': '中の',
'uploader_id': '2698420', 'channel_id': '4',
'upload_date': '20131123',
'timestamp': int, # timestamp is unstable
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
'duration': 33,
'view_count': int,
'comment_count': int, 'comment_count': int,
'description': 'md5:b7f6d3e6c29552cc19fdea6a4b7dc194',
'display_id': '1173108780',
'duration': 320,
'genres': ['未設定'], 'genres': ['未設定'],
'tags': [], 'like_count': int,
'tags': 'mincount:5',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1173108780,
'upload_date': '20070305',
'uploader': '中の',
'uploader_id': '4',
'view_count': int,
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
# File downloaded with and without credentials are different, so omit 'url': 'https://www.nicovideo.jp/watch/sm8628149',
# the md5 field 'info_dict': {
'url': 'http://www.nicovideo.jp/watch/nm14296458', 'id': 'sm8628149',
'ext': 'mp4',
'title': '【東方】Bad Apple!!\u3000PV【影絵】',
'availability': 'public',
'channel': 'あにら',
'channel_id': '10731211',
'comment_count': int,
'description': 'md5:1999669158cb77a45bab123c4fafe1d7',
'display_id': 'sm8628149',
'duration': 219,
'genres': ['ゲーム'],
'like_count': int,
'tags': 'mincount:3',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1256580802,
'upload_date': '20091026',
'uploader': 'あにら',
'uploader_id': '10731211',
'view_count': int,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.nicovideo.jp/watch/nm14296458',
'info_dict': { 'info_dict': {
'id': 'nm14296458', 'id': 'nm14296458',
'ext': 'mp4', 'ext': 'mp4',
'title': '【Kagamine Rin】Dance on media【Original】take2!', 'title': '【鏡音リン】Dance on media【オリジナル】take2!',
'availability': 'public',
'channel': 'りょうた',
'channel_id': '18822557',
'comment_count': int,
'description': 'md5:9368f2b1f4178de64f2602c2f3d6cbf5', 'description': 'md5:9368f2b1f4178de64f2602c2f3d6cbf5',
'thumbnail': r're:https?://.*', 'display_id': 'nm14296458',
'duration': 208,
'genres': ['音楽・サウンド'],
'like_count': int,
'tags': 'mincount:1',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1304065916,
'upload_date': '20110429',
'uploader': 'りょうた', 'uploader': 'りょうた',
'uploader_id': '18822557', 'uploader_id': '18822557',
'upload_date': '20110429',
'timestamp': 1304065916,
'duration': 208.0,
'comment_count': int,
'view_count': int, 'view_count': int,
'genres': ['音楽・サウンド'],
'tags': ['Translation_Request', 'Kagamine_Rin', 'Rin_Original'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
# 'video exists but is marked as "deleted" 'url': 'https://www.nicovideo.jp/watch/nl1872567',
# md5 is unstable
'url': 'http://www.nicovideo.jp/watch/sm10000',
'info_dict': { 'info_dict': {
'id': 'sm10000', 'id': 'nl1872567',
'ext': 'unknown_video',
'description': 'deleted',
'title': 'ドラえもんエターナル第3話「決戦第3新東京市」前編',
'thumbnail': r're:https?://.*',
'upload_date': '20071224',
'timestamp': int, # timestamp field has different value if logged in
'duration': 304,
'view_count': int,
},
'skip': 'Requires an account',
}, {
'url': 'http://www.nicovideo.jp/watch/so22543406',
'info_dict': {
'id': '1388129933',
'ext': 'mp4', 'ext': 'mp4',
'title': '【第1回】RADIOアニメロミックス ラブライブのぞえりRadio Garden', 'title': '【12/25放送分】『生対談!!ひろゆきと戀塚のニコニコを作った人 』前半',
'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1', 'availability': 'public',
'thumbnail': r're:https?://.*', 'channel': 'nicolive',
'timestamp': 1388851200, 'channel_id': '394',
'upload_date': '20140104',
'uploader': 'アニメロチャンネル',
'uploader_id': '312',
},
'skip': 'The viewing period of the video you were searching for has expired.',
}, {
# video not available via `getflv`; "old" HTML5 video
'url': 'http://www.nicovideo.jp/watch/sm1151009',
'info_dict': {
'id': 'sm1151009',
'ext': 'mp4',
'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)',
'description': 'md5:f95a3d259172667b293530cc2e41ebda',
'thumbnail': r're:https?://.*',
'duration': 184,
'timestamp': 1190835883,
'upload_date': '20070926',
'uploader': 'denden2',
'uploader_id': '1392194',
'view_count': int,
'comment_count': int,
'genres': ['ゲーム'],
'tags': [],
},
'params': {'skip_download': 'm3u8'},
}, {
# "New" HTML5 video
'url': 'http://www.nicovideo.jp/watch/sm31464864',
'info_dict': {
'id': 'sm31464864',
'ext': 'mp4',
'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質',
'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
'timestamp': 1498481660,
'upload_date': '20170626',
'uploader': 'no-namamae',
'uploader_id': '40826363',
'thumbnail': r're:https?://.*',
'duration': 198,
'view_count': int,
'comment_count': int,
'genres': ['アニメ'],
'tags': [],
},
'params': {'skip_download': 'm3u8'},
}, {
# Video without owner
'url': 'http://www.nicovideo.jp/watch/sm18238488',
'info_dict': {
'id': 'sm18238488',
'ext': 'mp4',
'title': '【実写版】ミュータントタートルズ',
'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
'timestamp': 1341128008,
'upload_date': '20120701',
'thumbnail': r're:https?://.*',
'duration': 5271,
'view_count': int,
'comment_count': int, 'comment_count': int,
'description': 'md5:79fc3a54cfdc93ecc2b883285149e548',
'display_id': 'nl1872567',
'duration': 586,
'genres': ['エンターテイメント'], 'genres': ['エンターテイメント'],
'tags': [], 'like_count': int,
'tags': 'mincount:3',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1198637246,
'upload_date': '20071226',
'uploader': 'nicolive',
'uploader_id': '394',
'view_count': int,
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
'only_matching': True,
}, {
'note': 'a video that is only served as an ENCRYPTED HLS.',
'url': 'https://www.nicovideo.jp/watch/so38016254', 'url': 'https://www.nicovideo.jp/watch/so38016254',
'only_matching': True, 'info_dict': {
'id': 'so38016254',
'ext': 'mp4',
'title': '「のんのんびより のんすとっぷ」 PV',
'availability': 'public',
'channel': 'のんのんびより のんすとっぷ',
'channel_id': 'ch2647028',
'comment_count': int,
'description': 'md5:6e2ff55b33e3645d59ef010869cde6a2',
'display_id': 'so38016254',
'duration': 114,
'genres': ['アニメ'],
'like_count': int,
'tags': 'mincount:4',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1609146000,
'upload_date': '20201228',
'uploader': 'のんのんびより のんすとっぷ',
'uploader_id': 'ch2647028',
'view_count': int,
},
'params': {'skip_download': 'm3u8'},
}, {
# smile official, but marked as user video
'url': 'https://www.nicovideo.jp/watch/so37602536',
'info_dict': {
'id': 'so37602536',
'ext': 'mp4',
'title': '田中有紀とゆきだるまと! 限定放送アーカイブ第12回',
'availability': 'subscriber_only',
'channel': 'あみあみ16',
'channel_id': '91072761',
'comment_count': int,
'description': 'md5:2ee357ec4e76d7804fb59af77107ab67',
'display_id': 'so37602536',
'duration': 980,
'genres': ['エンターテイメント'],
'like_count': int,
'tags': 'count:4',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1601377200,
'upload_date': '20200929',
'uploader': 'あみあみ16',
'uploader_id': '91072761',
'view_count': int,
},
'params': {'skip_download': 'm3u8'},
'skip': 'Channel members only',
}, {
'url': 'https://www.nicovideo.jp/watch/so41370536',
'info_dict': {
'id': 'so41370536',
'ext': 'mp4',
'title': 'ZUN【出演者別】超パーティー2022',
'availability': 'premium_only',
'channel': 'ニコニコ超会議チャンネル',
'channel_id': 'ch2607134',
'comment_count': int,
'description': 'md5:5692db5ac40d3a374fc5ec182d0249c3',
'display_id': 'so41370536',
'duration': 63,
'genres': ['音楽・サウンド'],
'like_count': int,
'tags': 'mincount:5',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1668394800,
'upload_date': '20221114',
'uploader': 'ニコニコ超会議チャンネル',
'uploader_id': 'ch2607134',
'view_count': int,
},
'params': {'skip_download': 'm3u8'},
'skip': 'Premium members only',
}, {
'url': 'https://www.nicovideo.jp/watch/so37574174',
'info_dict': {
'id': 'so37574174',
'ext': 'mp4',
'title': 'ひぐらしのなく頃に 廿回し編\u3000第1回',
'availability': 'subscriber_only',
'channel': '「ひぐらしのなく頃に」オフィシャルチャンネル',
'channel_id': 'ch2646036',
'comment_count': int,
'description': 'md5:5296196d51d9c0b7272b73f9a99c236a',
'display_id': 'so37574174',
'duration': 1931,
'genres': ['ラジオ'],
'like_count': int,
'tags': 'mincount:5',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1601028000,
'upload_date': '20200925',
'uploader': '「ひぐらしのなく頃に」オフィシャルチャンネル',
'uploader_id': 'ch2646036',
'view_count': int,
},
'params': {'skip_download': 'm3u8'},
'skip': 'Channel members only',
}, {
'url': 'https://www.nicovideo.jp/watch/so44060088',
'info_dict': {
'id': 'so44060088',
'ext': 'mp4',
'title': '松田的超英雄電波。《仮面ライダーガッチャード 放送終了記念特別番組》',
'availability': 'subscriber_only',
'channel': 'あみあみチャンネル',
'channel_id': 'ch2638921',
'comment_count': int,
'description': 'md5:9dec5bb9a172b6d20a255ecb64fbd03e',
'display_id': 'so44060088',
'duration': 1881,
'genres': ['ラジオ'],
'like_count': int,
'tags': 'mincount:7',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1725361200,
'upload_date': '20240903',
'uploader': 'あみあみチャンネル',
'uploader_id': 'ch2638921',
'view_count': int,
},
'params': {'skip_download': 'm3u8'},
'skip': 'Channel members only; specified continuous membership period required',
}] }]
_VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)' def _extract_formats(self, api_data, video_id):
def _yield_dms_formats(self, api_data, video_id):
fmt_filter = lambda _, v: v['isAvailable'] and v['id'] fmt_filter = lambda _, v: v['isAvailable'] and v['id']
videos = traverse_obj(api_data, ('media', 'domand', 'videos', fmt_filter)) videos = traverse_obj(api_data, ('media', 'domand', 'videos', fmt_filter))
audios = traverse_obj(api_data, ('media', 'domand', 'audios', fmt_filter)) audios = traverse_obj(api_data, ('media', 'domand', 'audios', fmt_filter))
@ -247,164 +372,135 @@ def _yield_dms_formats(self, api_data, video_id):
if not all((videos, audios, access_key, track_id)): if not all((videos, audios, access_key, track_id)):
return return
dms_m3u8_url = self._download_json( m3u8_url = self._download_json(
f'https://nvapi.nicovideo.jp/v1/watch/{video_id}/access-rights/hls', video_id, f'{self._API_BASE}/v1/watch/{video_id}/access-rights/hls',
data=json.dumps({ video_id, headers={
'Accept': 'application/json;charset=utf-8',
'Content-Type': 'application/json',
'X-Access-Right-Key': access_key,
'X-Request-With': self._BASE_URL,
**self._HEADERS,
}, query={
'actionTrackId': track_id,
}, data=json.dumps({
'outputs': list(itertools.product((v['id'] for v in videos), (a['id'] for a in audios))), 'outputs': list(itertools.product((v['id'] for v in videos), (a['id'] for a in audios))),
}).encode(), query={'actionTrackId': track_id}, headers={ }).encode(),
'x-access-right-key': access_key, )['data']['contentUrl']
'x-frontend-id': 6, raw_fmts = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
'x-frontend-version': 0,
'x-request-with': 'https://www.nicovideo.jp',
})['data']['contentUrl']
# Getting all audio formats results in duplicate video formats which we filter out later
dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id, 'mp4')
# m3u8 extraction does not provide audio bitrates, so extract from the API data and fix formats = []
for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'): for a_fmt in traverse_obj(raw_fmts, lambda _, v: v['vcodec'] == 'none'):
yield { formats.append({
**audio_fmt, **a_fmt,
**traverse_obj(audios, (lambda _, v: audio_fmt['format_id'].startswith(v['id']), { **traverse_obj(audios, (lambda _, v: a_fmt['format_id'].startswith(v['id']), {
'format_id': ('id', {str}),
'abr': ('bitRate', {float_or_none(scale=1000)}), 'abr': ('bitRate', {float_or_none(scale=1000)}),
'asr': ('samplingRate', {int_or_none}), 'asr': ('samplingRate', {int_or_none}),
'format_id': ('id', {str}),
'quality': ('qualityLevel', {int_or_none}), 'quality': ('qualityLevel', {int_or_none}),
}), get_all=False), }, any)),
'acodec': 'aac', 'acodec': 'aac',
} })
# Sort before removing dupes to keep the format dicts with the lowest tbr # Sort first, keeping the lowest-tbr formats
video_fmts = sorted((fmt for fmt in dms_fmts if fmt['vcodec'] != 'none'), key=lambda f: f['tbr']) v_fmts = sorted((fmt for fmt in raw_fmts if fmt['vcodec'] != 'none'), key=lambda f: f['tbr'])
self._remove_duplicate_formats(video_fmts) self._remove_duplicate_formats(v_fmts)
# Calculate the true vbr/tbr by subtracting the lowest abr # Calculate the true vbr/tbr by subtracting the lowest abr
min_abr = min(traverse_obj(audios, (..., 'bitRate', {float_or_none})), default=0) / 1000 min_abr = traverse_obj(audios, (..., 'bitRate', {float_or_none(scale=1000)}, all, {min})) or 0
for video_fmt in video_fmts: for v_fmt in v_fmts:
video_fmt['tbr'] -= min_abr v_fmt['format_id'] = url_basename(v_fmt['url']).rpartition('.')[0]
video_fmt['format_id'] = url_basename(video_fmt['url']).rpartition('.')[0] v_fmt['quality'] = traverse_obj(videos, (
video_fmt['quality'] = traverse_obj(videos, ( lambda _, v: v['id'] == v_fmt['format_id'], 'qualityLevel', {int_or_none}, any)) or -1
lambda _, v: v['id'] == video_fmt['format_id'], 'qualityLevel', {int_or_none}, any)) or -1 v_fmt['tbr'] -= min_abr
yield video_fmt formats.extend(v_fmts)
def _extract_server_response(self, webpage, video_id, fatal=True): return formats
try:
return traverse_obj(
self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id),
('data', 'response', {dict}, {require('server response')}))
except ExtractorError:
if not fatal:
return {}
raise
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
try: path = 'v3' if self.is_logged_in else 'v3_guest'
webpage, handle = self._download_webpage_handle( api_resp = self._download_json(
f'https://www.nicovideo.jp/watch/{video_id}', video_id, f'{self._BASE_URL}/api/watch/{path}/{video_id}', video_id,
headers=self.geo_verification_headers()) 'Downloading API JSON', 'Unable to fetch data', headers={
if video_id.startswith('so'): **self._HEADERS,
video_id = self._match_id(handle.url) **self.geo_verification_headers(),
}, query={
'actionTrackId': f'AAAAAAAAAA_{round(time_seconds() * 1000)}',
}, expected_status=[400, 404])
api_data = self._extract_server_response(webpage, video_id) api_data = api_resp['data']
except ExtractorError as e: scheduled_time = traverse_obj(api_data, ('publishScheduledAt', {str}))
try: status = traverse_obj(api_resp, ('meta', 'status', {int}))
api_data = self._download_json(
f'https://www.nicovideo.jp/api/watch/v3/{video_id}', video_id,
'Downloading API JSON', 'Unable to fetch data', query={
'_frontendId': '6',
'_frontendVersion': '0',
'actionTrackId': f'AAAAAAAAAA_{round(time.time() * 1000)}',
}, headers=self.geo_verification_headers())['data']
except ExtractorError:
if not isinstance(e.cause, HTTPError):
# Raise if original exception was from _parse_json or utils.traversal.require
raise
# The webpage server response has more detailed error info than the API response
webpage = e.cause.response.read().decode('utf-8', 'replace')
reason_code = self._extract_server_response(
webpage, video_id, fatal=False).get('reasonCode')
if not reason_code:
raise
if reason_code in ('DOMESTIC_VIDEO', 'HIGH_RISK_COUNTRY_VIDEO'):
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
elif reason_code == 'HIDDEN_VIDEO':
raise ExtractorError(
'The viewing period of this video has expired', expected=True)
elif reason_code == 'DELETED_VIDEO':
raise ExtractorError('This video has been deleted', expected=True)
raise ExtractorError(f'Niconico says: {reason_code}')
availability = self._availability(**(traverse_obj(api_data, ('payment', 'video', { if status != 200:
'needs_premium': ('isPremium', {bool}), err_code = traverse_obj(api_resp, ('meta', 'errorCode', {str.upper}))
reason_code = traverse_obj(api_data, ('reasonCode', {str_or_none}))
err_msg = traverse_obj(self._ERROR_MAP, (err_code, (reason_code, 'DEFAULT'), {str}, any))
if reason_code in ('DOMESTIC_VIDEO', 'HIGH_RISK_COUNTRY_VIDEO'):
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
elif reason_code == 'HARMFUL_VIDEO' and traverse_obj(api_data, (
'viewer', 'allowSensitiveContents', {bool},
)) is False:
err_msg = 'Sensitive content, adjust display settings to watch'
elif reason_code == 'HIDDEN_VIDEO' and scheduled_time:
err_msg = f'This content is scheduled to be released at {scheduled_time}'
elif reason_code in ('CHANNEL_MEMBER_ONLY', 'HARMFUL_VIDEO', 'HIDDEN_VIDEO', 'PPV_VIDEO', 'PREMIUM_ONLY'):
self.raise_login_required(err_msg)
if err_msg:
raise ExtractorError(err_msg, expected=True)
if status and status >= 500:
raise ExtractorError('Service temporarily unavailable', expected=True)
raise ExtractorError(f'API returned error status {status}')
availability = self._availability(**traverse_obj(api_data, ('payment', 'video', {
'needs_auth': (('isContinuationBenefit', 'isPpv'), {bool}, any),
'needs_subscription': ('isAdmission', {bool}), 'needs_subscription': ('isAdmission', {bool}),
})) or {'needs_auth': True})) 'needs_premium': ('isPremium', {bool}),
}))) or 'public'
formats = list(self._yield_dms_formats(api_data, video_id)) formats = self._extract_formats(api_data, video_id)
if not formats: err_msg = self._STATUS_MAP.get(availability)
fail_msg = clean_html(self._html_search_regex( if not formats and err_msg:
r'<p[^>]+\bclass="fail-message"[^>]*>(?P<msg>.+?)</p>', self.raise_login_required(err_msg, metadata_available=True)
webpage, 'fail message', default=None, group='msg'))
if fail_msg:
self.to_screen(f'Niconico said: {fail_msg}')
if fail_msg and 'された地域と同じ地域からのみ視聴できます。' in fail_msg:
availability = None
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
elif availability == 'premium_only':
self.raise_login_required('This video requires premium', metadata_available=True)
elif availability == 'subscriber_only':
self.raise_login_required('This video is for members only', metadata_available=True)
elif availability == 'needs_auth':
self.raise_login_required(metadata_available=False)
# Start extracting information
tags = None
if webpage:
# use og:video:tag (not logged in)
og_video_tags = re.finditer(r'<meta\s+property="og:video:tag"\s*content="(.*?)">', webpage)
tags = list(filter(None, (clean_html(x.group(1)) for x in og_video_tags)))
if not tags:
# use keywords and split with comma (not logged in)
kwds = self._html_search_meta('keywords', webpage, default=None)
if kwds:
tags = [x for x in kwds.split(',') if x]
if not tags:
# find in json (logged in)
tags = traverse_obj(api_data, ('tag', 'items', ..., 'name'))
thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp']) thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp'])
def get_video_info(*items, get_first=True, **kwargs):
return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs)
return { return {
'id': video_id,
'_api_data': api_data,
'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None),
'formats': formats,
'availability': availability, 'availability': availability,
'thumbnails': [{ 'display_id': video_id,
'id': key, 'formats': formats,
'url': url, 'genres': traverse_obj(api_data, ('genre', 'label', {str}, filter, all, filter)),
'ext': 'jpg', 'release_timestamp': parse_iso8601(scheduled_time),
'preference': thumb_prefs(key),
**parse_resolution(url, lenient=True),
} for key, url in (get_video_info('thumbnail') or {}).items() if url],
'description': clean_html(get_video_info('description')),
'uploader': traverse_obj(api_data, ('owner', 'nickname'), ('channel', 'name'), ('community', 'name')),
'uploader_id': str_or_none(traverse_obj(api_data, ('owner', 'id'), ('channel', 'id'), ('community', 'id'))),
'timestamp': parse_iso8601(get_video_info('registeredAt')) or parse_iso8601(
self._html_search_meta('video:release_date', webpage, 'date published', default=None)),
'channel': traverse_obj(api_data, ('channel', 'name'), ('community', 'name')),
'channel_id': traverse_obj(api_data, ('channel', 'id'), ('community', 'id')),
'view_count': int_or_none(get_video_info('count', 'view')),
'tags': tags,
'genre': traverse_obj(api_data, ('genre', 'label'), ('genre', 'key')),
'comment_count': get_video_info('count', 'comment', expected_type=int),
'duration': (
parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None))
or get_video_info('duration')),
'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}',
'subtitles': self.extract_subtitles(video_id, api_data), 'subtitles': self.extract_subtitles(video_id, api_data),
'tags': traverse_obj(api_data, ('tag', 'items', ..., 'name', {str}, filter, all, filter)),
'thumbnails': [{
'ext': 'jpg',
'id': key,
'preference': thumb_prefs(key),
'url': url,
**parse_resolution(url, lenient=True),
} for key, url in traverse_obj(api_data, (
'video', 'thumbnail', {dict}), default={}).items()],
**traverse_obj(api_data, (('channel', 'owner'), any, {
'channel': (('name', 'nickname'), {str}, any),
'channel_id': ('id', {str_or_none}),
'uploader': (('name', 'nickname'), {str}, any),
'uploader_id': ('id', {str_or_none}),
})),
**traverse_obj(api_data, ('video', {
'id': ('id', {str_or_none}),
'title': ('title', {str}),
'description': ('description', {clean_html}, filter),
'duration': ('duration', {int_or_none}),
'timestamp': ('registeredAt', {parse_iso8601}),
})),
**traverse_obj(api_data, ('video', 'count', {
'comment_count': ('comment', {int_or_none}),
'like_count': ('like', {int_or_none}),
'view_count': ('view', {int_or_none}),
})),
} }
def _get_subtitles(self, video_id, api_data): def _get_subtitles(self, video_id, api_data):
@ -413,21 +509,19 @@ def _get_subtitles(self, video_id, api_data):
return return
danmaku = traverse_obj(self._download_json( danmaku = traverse_obj(self._download_json(
f'{comments_info["server"]}/v1/threads', video_id, data=json.dumps({ f'{comments_info["server"]}/v1/threads', video_id,
'Downloading comments', 'Failed to download comments', headers={
'Content-Type': 'text/plain;charset=UTF-8',
'Origin': self._BASE_URL,
'Referer': f'{self._BASE_URL}/',
'X-Client-Os-Type': 'others',
**self._HEADERS,
}, data=json.dumps({
'additionals': {}, 'additionals': {},
'params': comments_info.get('params'), 'params': comments_info.get('params'),
'threadKey': comments_info.get('threadKey'), 'threadKey': comments_info.get('threadKey'),
}).encode(), fatal=False, }).encode(), fatal=False,
headers={ ), ('data', 'threads', ..., 'comments', ...))
'Referer': 'https://www.nicovideo.jp/',
'Origin': 'https://www.nicovideo.jp',
'Content-Type': 'text/plain;charset=UTF-8',
'x-client-os-type': 'others',
'x-frontend-id': '6',
'x-frontend-version': '0',
},
note='Downloading comments', errnote='Failed to download comments'),
('data', 'threads', ..., 'comments', ...))
return { return {
'comments': [{ 'comments': [{

View File

@ -1,6 +1,3 @@
import json
import re
from .brightcove import BrightcoveNewIE from .brightcove import BrightcoveNewIE
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -11,7 +8,12 @@
str_or_none, str_or_none,
url_or_none, url_or_none,
) )
from ..utils.traversal import require, traverse_obj, value from ..utils.traversal import (
get_first,
require,
traverse_obj,
value,
)
class NineNowIE(InfoExtractor): class NineNowIE(InfoExtractor):
@ -101,20 +103,11 @@ class NineNowIE(InfoExtractor):
}] }]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId={}' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId={}'
# XXX: For parsing next.js v15+ data; see also yt_dlp.extractor.francetv and yt_dlp.extractor.goplay
def _find_json(self, s):
return self._search_json(
r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
def _real_extract(self, url): def _real_extract(self, url):
display_id, video_type = self._match_valid_url(url).group('id', 'type') display_id, video_type = self._match_valid_url(url).group('id', 'type')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
common_data = traverse_obj( common_data = get_first(self._search_nextjs_v13_data(webpage, display_id), ('payload', {dict}))
re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
(..., {json.loads}, ..., {self._find_json},
lambda _, v: v['payload'][video_type]['slug'] == display_id,
'payload', any, {require('video data')}))
if traverse_obj(common_data, (video_type, 'video', 'drm', {bool})): if traverse_obj(common_data, (video_type, 'video', 'drm', {bool})):
self.report_drm(display_id) self.report_drm(display_id)

View File

@ -1,100 +0,0 @@
from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from ..utils import (
int_or_none,
js_to_json,
smuggle_url,
try_get,
)
class NoovoIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/]+\.)?noovo\.ca/videos/(?P<id>[^/]+/[^/?#&]+)'
_TESTS = [{
# clip
'url': 'http://noovo.ca/videos/rpm-plus/chrysler-imperial',
'info_dict': {
'id': '5386045029001',
'ext': 'mp4',
'title': 'Chrysler Imperial',
'description': 'md5:de3c898d1eb810f3e6243e08c8b4a056',
'timestamp': 1491399228,
'upload_date': '20170405',
'uploader_id': '618566855001',
'series': 'RPM+',
},
'params': {
'skip_download': True,
},
}, {
# episode
'url': 'http://noovo.ca/videos/l-amour-est-dans-le-pre/episode-13-8',
'info_dict': {
'id': '5395865725001',
'title': 'Épisode 13 : Les retrouvailles',
'description': 'md5:888c3330f0c1b4476c5bc99a1c040473',
'ext': 'mp4',
'timestamp': 1492019320,
'upload_date': '20170412',
'uploader_id': '618566855001',
'series': "L'amour est dans le pré",
'season_number': 5,
'episode': 'Épisode 13',
'episode_number': 13,
},
'params': {
'skip_download': True,
},
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/618566855001/default_default/index.html?videoId=%s'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
brightcove_id = self._search_regex(
r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
data = self._parse_json(
self._search_regex(
r'(?s)dataLayer\.push\(\s*({.+?})\s*\);', webpage, 'data',
default='{}'),
video_id, transform_source=js_to_json, fatal=False)
title = try_get(
data, lambda x: x['video']['nom'],
str) or self._html_search_meta(
'dcterms.Title', webpage, 'title', fatal=True)
description = self._html_search_meta(
('dcterms.Description', 'description'), webpage, 'description')
series = try_get(
data, lambda x: x['emission']['nom']) or self._search_regex(
r'<div[^>]+class="banner-card__subtitle h4"[^>]*>([^<]+)',
webpage, 'series', default=None)
season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {}
season = try_get(season_el, lambda x: x['nom'], str)
season_number = int_or_none(try_get(season_el, lambda x: x['numero']))
episode_el = try_get(season_el, lambda x: x['episode'], dict) or {}
episode = try_get(episode_el, lambda x: x['nom'], str)
episode_number = int_or_none(try_get(episode_el, lambda x: x['numero']))
return {
'_type': 'url_transparent',
'ie_key': BrightcoveNewIE.ie_key(),
'url': smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
{'geo_countries': ['CA']}),
'id': brightcove_id,
'title': title,
'description': description,
'series': series,
'season': season,
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
}

View File

@ -73,163 +73,179 @@ def _parse_fragment(url):
class PanoptoIE(PanoptoBaseIE): class PanoptoIE(PanoptoBaseIE):
_VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)id=(?P<id>[a-f0-9-]+)' _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)id=(?P<id>[a-f0-9-]+)'
_EMBED_REGEX = [rf'<iframe[^>]+src=["\'](?P<url>{PanoptoBaseIE.BASE_URL_RE}/Pages/(Viewer|Embed|Sessions/List)\.aspx[^"\']+)'] _EMBED_REGEX = [rf'<iframe[^>]+src=["\'](?P<url>{PanoptoBaseIE.BASE_URL_RE}/Pages/(Viewer|Embed|Sessions/List)\.aspx[^"\']+)']
_TESTS = [ _TESTS = [{
{ 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb', 'info_dict': {
'info_dict': { 'id': '26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
'id': '26b3ae9e-4a48-4dcc-96ba-0befba08a0fb', 'title': 'Panopto for Business - Use Cases',
'title': 'Panopto for Business - Use Cases', 'timestamp': 1459184200,
'timestamp': 1459184200, 'thumbnail': r're:https?://demo\.hosted\.panopto\.com/.+',
'thumbnail': r're:https://demo\.hosted\.panopto\.com/.+', 'upload_date': '20160328',
'upload_date': '20160328', 'ext': 'mp4',
'ext': 'mp4', 'cast': [],
'cast': [], 'chapters': [],
'chapters': [], 'duration': 88.17099999999999,
'duration': 88.17099999999999, 'average_rating': int,
'average_rating': int, 'tags': [],
'uploader_id': '2db6b718-47a0-4b0b-9e17-ab0b00f42b1e', 'uploader_id': '2db6b718-47a0-4b0b-9e17-ab0b00f42b1e',
'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a', 'channel_id': 'bb0b58ff-b31b-47a0-9aa2-af6f0113613a',
'channel': 'Showcase Videos', 'channel': 'Product',
},
}, },
{ }, {
'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59', 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59',
'info_dict': { 'info_dict': {
'id': 'ed01b077-c9e5-4c7b-b8ff-15fa306d7a59', 'id': 'ed01b077-c9e5-4c7b-b8ff-15fa306d7a59',
'title': 'Overcoming Top 4 Challenges of Enterprise Video', 'title': 'Overcoming Top 4 Challenges of Enterprise Video',
'uploader': 'Panopto Support', 'uploader': 'Panopto Support',
'timestamp': 1449409251, 'timestamp': 1449409251,
'thumbnail': r're:https://demo\.hosted\.panopto\.com/.+', 'thumbnail': r're:https?://demo\.hosted\.panopto\.com/.+',
'upload_date': '20151206', 'upload_date': '20151206',
'ext': 'mp4', 'ext': 'mp4',
'chapters': 'count:12', 'chapters': 'count:13',
'cast': ['Panopto Support'], 'cast': ['Panopto Support'],
'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c', 'tags': [],
'average_rating': int, 'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c',
'description': 'md5:4391837802b3fc856dadf630c4b375d1', 'average_rating': int,
'duration': 1088.2659999999998, 'description': 'md5:4391837802b3fc856dadf630c4b375d1',
'channel_id': '9f3c1921-43bb-4bda-8b3a-b8d2f05a8546', 'duration': 1088.2659999999998,
'channel': 'Webcasts', 'channel_id': '9f3c1921-43bb-4bda-8b3a-b8d2f05a8546',
}, 'channel': 'Webcasts',
}, },
{ }, {
# Extra params in URL # Extra params in URL
'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?randomparam=thisisnotreal&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true', 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?randomparam=thisisnotreal&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true',
'info_dict': { 'info_dict': {
'id': '5fa74e93-3d87-4694-b60e-aaa4012214ed', 'id': '5fa74e93-3d87-4694-b60e-aaa4012214ed',
'ext': 'mp4', 'ext': 'mp4',
'duration': 129.513, 'duration': 129.513,
'cast': ['Kathryn Kelly'], 'cast': ['Kathryn Kelly'],
'uploader_id': '316a0a58-7fa2-4cd9-be1c-64270d284a56', 'uploader_id': '316a0a58-7fa2-4cd9-be1c-64270d284a56',
'timestamp': 1569845768, 'timestamp': 1569845768,
'tags': ['Viewer', 'Enterprise'], 'tags': ['Viewer', 'Enterprise'],
'chapters': [], 'chapters': [],
'upload_date': '20190930', 'upload_date': '20190930',
'thumbnail': r're:https://howtovideos\.hosted\.panopto\.com/.+', 'thumbnail': r're:https?://howtovideos\.hosted\.panopto\.com/.+',
'description': 'md5:2d844aaa1b1a14ad0e2601a0993b431f', 'description': 'md5:2d844aaa1b1a14ad0e2601a0993b431f',
'title': 'Getting Started: View a Video', 'title': 'Getting Started: View a Video',
'average_rating': int, 'average_rating': int,
'uploader': 'Kathryn Kelly', 'uploader': 'Kathryn Kelly',
'channel_id': 'fb93bc3c-6750-4b80-a05b-a921013735d3', 'channel_id': 'fb93bc3c-6750-4b80-a05b-a921013735d3',
'channel': 'Getting Started', 'channel': 'Getting Started',
},
}, },
{ 'skip': 'Invalid URL',
# Does not allow normal Viewer.aspx. AUDIO livestream has no url, so should be skipped and only give one stream. }, {
'url': 'https://unisa.au.panopto.com/Panopto/Pages/Embed.aspx?id=9d9a0fa3-e99a-4ebd-a281-aac2017f4da4', # Does not allow normal Viewer.aspx. AUDIO livestream has no url, so should be skipped and only give one stream.
'info_dict': { 'url': 'https://unisa.au.panopto.com/Panopto/Pages/Embed.aspx?id=9d9a0fa3-e99a-4ebd-a281-aac2017f4da4',
'id': '9d9a0fa3-e99a-4ebd-a281-aac2017f4da4', 'info_dict': {
'ext': 'mp4', 'id': '9d9a0fa3-e99a-4ebd-a281-aac2017f4da4',
'cast': ['LTS CLI Script'], 'ext': 'mp4',
'chapters': [], 'cast': ['LTS CLI Script'],
'duration': 2178.45, 'chapters': [],
'description': 'md5:ee5cf653919f55b72bce2dbcf829c9fa', 'duration': 2178.45,
'channel_id': 'b23e673f-c287-4cb1-8344-aae9005a69f8', 'description': 'md5:ee5cf653919f55b72bce2dbcf829c9fa',
'average_rating': int, 'channel_id': 'b23e673f-c287-4cb1-8344-aae9005a69f8',
'uploader_id': '38377323-6a23-41e2-9ff6-a8e8004bf6f7', 'average_rating': int,
'uploader': 'LTS CLI Script', 'uploader_id': '38377323-6a23-41e2-9ff6-a8e8004bf6f7',
'timestamp': 1572458134, 'uploader': 'LTS CLI Script',
'title': 'WW2 Vets Interview 3 Ronald Stanley George', 'tags': [],
'thumbnail': r're:https://unisa\.au\.panopto\.com/.+', 'timestamp': 1572458134,
'channel': 'World War II Veteran Interviews', 'title': 'WW2 Vets Interview 3 Ronald Stanley George',
'upload_date': '20191030', 'thumbnail': r're:https?://unisa\.au\.panopto\.com/.+',
}, 'channel': 'World War II Veteran Interviews',
'upload_date': '20191030',
}, },
{ }, {
# Slides/storyboard # Slides/storyboard
'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=a7f12f1d-3872-4310-84b0-f8d8ab15326b', 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=a7f12f1d-3872-4310-84b0-f8d8ab15326b',
'info_dict': { 'info_dict': {
'id': 'a7f12f1d-3872-4310-84b0-f8d8ab15326b', 'id': 'a7f12f1d-3872-4310-84b0-f8d8ab15326b',
'ext': 'mhtml', 'ext': 'mhtml',
'timestamp': 1448798857, 'timestamp': 1448798857,
'duration': 4712.681, 'duration': 4712.681,
'title': 'Cache Memory - CompSci 15-213, Lecture 12', 'title': 'Cache Memory - CompSci 15-213, Lecture 12',
'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a', 'channel_id': '0202d932-6d28-4fb2-b373-af6f0121c8f0',
'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c', 'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c',
'upload_date': '20151129', 'upload_date': '20151129',
'average_rating': 0, 'average_rating': 0,
'uploader': 'Panopto Support', 'uploader': 'Panopto Support',
'channel': 'Showcase Videos', 'channel': 'Customer Demonstrations',
'description': 'md5:55e51d54233ddb0e6c2ed388ca73822c', 'description': 'md5:55e51d54233ddb0e6c2ed388ca73822c',
'cast': ['ISR Videographer', 'Panopto Support'], 'cast': ['ISR Videographer', 'Panopto Support'],
'chapters': 'count:28', 'chapters': 'count:28',
'thumbnail': r're:https://demo\.hosted\.panopto\.com/.+', 'tags': [],
}, 'thumbnail': r're:https?://demo\.hosted\.panopto\.com/.+',
'params': {'format': 'mhtml', 'skip_download': True},
}, },
{ 'params': {'format': 'mhtml', 'skip_download': True},
'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=8285224a-9a2b-4957-84f2-acb0000c4ea9', }, {
'info_dict': { 'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=8285224a-9a2b-4957-84f2-acb0000c4ea9',
'id': '8285224a-9a2b-4957-84f2-acb0000c4ea9', 'info_dict': {
'ext': 'mp4', 'id': '8285224a-9a2b-4957-84f2-acb0000c4ea9',
'chapters': [], 'ext': 'mp4',
'title': 'Company Policy', 'chapters': [],
'average_rating': 0, 'title': 'Company Policy',
'timestamp': 1615058901, 'average_rating': 0,
'channel': 'Human Resources', 'timestamp': 1615058901,
'tags': ['HumanResources'], 'channel': 'Human Resources',
'duration': 1604.243, 'tags': ['HumanResources'],
'thumbnail': r're:https://na-training-1\.hosted\.panopto\.com/.+', 'duration': 1604.243,
'uploader_id': '8e8ba0a3-424f-40df-a4f1-ab3a01375103', 'thumbnail': r're:https?://na-training-1\.hosted\.panopto\.com/.+',
'uploader': 'Cait M.', 'uploader_id': '8e8ba0a3-424f-40df-a4f1-ab3a01375103',
'upload_date': '20210306', 'uploader': 'Cait M.',
'cast': ['Cait M.'], 'upload_date': '20210306',
'subtitles': {'en-US': [{'ext': 'srt', 'data': 'md5:a3f4d25963fdeace838f327097c13265'}], 'cast': ['Cait M.'],
'es-ES': [{'ext': 'srt', 'data': 'md5:57e9dad365fd0fbaf0468eac4949f189'}]}, },
}, 'params': {'writesubtitles': True, 'skip_download': True},
'params': {'writesubtitles': True, 'skip_download': True}, }, {
}, { # On Panopto there are two subs: "Default" and en-US. en-US is blank and should be skipped.
# On Panopto there are two subs: "Default" and en-US. en-US is blank and should be skipped. 'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=940cbd41-f616-4a45-b13e-aaf1000c915b',
'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=940cbd41-f616-4a45-b13e-aaf1000c915b', 'info_dict': {
'info_dict': { 'id': '940cbd41-f616-4a45-b13e-aaf1000c915b',
'id': '940cbd41-f616-4a45-b13e-aaf1000c915b', 'ext': 'mp4',
'ext': 'mp4', 'subtitles': 'count:1',
'subtitles': 'count:1', 'title': 'HR Benefits Review Meeting*',
'title': 'HR Benefits Review Meeting*', 'cast': ['Panopto Support'],
'cast': ['Panopto Support'], 'chapters': [],
'chapters': [], 'timestamp': 1575024251,
'timestamp': 1575024251, 'thumbnail': r're:https://na-training-1\.hosted\.panopto\.com/.+',
'thumbnail': r're:https://na-training-1\.hosted\.panopto\.com/.+', 'channel': 'Zoom',
'channel': 'Zoom', 'description': 'md5:04f90a9c2c68b7828144abfb170f0106',
'description': 'md5:04f90a9c2c68b7828144abfb170f0106', 'uploader': 'Panopto Support',
'uploader': 'Panopto Support', 'average_rating': 0,
'average_rating': 0, 'duration': 409.34499999999997,
'duration': 409.34499999999997, 'tags': [],
'uploader_id': 'b6ac04ad-38b8-4724-a004-a851004ea3df', 'uploader_id': 'b6ac04ad-38b8-4724-a004-a851004ea3df',
'upload_date': '20191129', 'upload_date': '20191129',
},
'params': {'writesubtitles': True, 'skip_download': True},
}, },
{ 'params': {'writesubtitles': True, 'skip_download': True},
'url': 'https://ucc.cloud.panopto.eu/Panopto/Pages/Viewer.aspx?id=0e8484a4-4ceb-4d98-a63f-ac0200b455cb', }, {
'only_matching': True, 'url': 'https://ucc.cloud.panopto.eu/Panopto/Pages/Viewer.aspx?id=0e8484a4-4ceb-4d98-a63f-ac0200b455cb',
'only_matching': True,
}, {
'url': 'https://brown.hosted.panopto.com/Panopto/Pages/Embed.aspx?id=0b3ff73b-36a0-46c5-8455-aadf010a3638',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.monash.edu/learning-teaching/teachhq/learning-technologies/panopto/how-to/insert-a-quiz-into-a-panopto-video',
'info_dict': {
'id': '0bd3f16c-824a-436a-8486-ac5900693aef',
'ext': 'mp4',
'title': 'Quizzes in Panopto',
'average_rating': 0,
'cast': ['Stephanie Luo'],
'chapters': 'count:8',
'channel': 'Panopto',
'description': 'md5:731ce802eee75808b1181db1ff1b5002',
'duration': 185.833,
'tags': [],
'thumbnail': r're:https?://monash\.au\.panopto\.com/.+',
'timestamp': 1607562188,
'upload_date': '20201210',
'uploader': 'Stephanie Luo',
'uploader_id': 'b18ca46d-20df-4ff5-b0b3-aa7a00085617',
}, },
{ 'params': {'extractor_args': {'generic': {'impersonate': ['chrome']}}},
'url': 'https://brown.hosted.panopto.com/Panopto/Pages/Embed.aspx?id=0b3ff73b-36a0-46c5-8455-aadf010a3638', }]
'only_matching': True,
},
]
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
@ -423,27 +439,23 @@ def _real_extract(self, url):
class PanoptoPlaylistIE(PanoptoBaseIE): class PanoptoPlaylistIE(PanoptoBaseIE):
_VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)pid=(?P<id>[a-f0-9-]+)' _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)pid=(?P<id>[a-f0-9-]+)'
_TESTS = [ _TESTS = [{
{ 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=f3b39fcf-882f-4849-93d6-a9f401236d36&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true',
'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=f3b39fcf-882f-4849-93d6-a9f401236d36&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true', 'info_dict': {
'info_dict': { 'id': 'f3b39fcf-882f-4849-93d6-a9f401236d36',
'title': 'Featured Video Tutorials', 'title': 'Featured Video Tutorials',
'id': 'f3b39fcf-882f-4849-93d6-a9f401236d36', 'description': '',
'description': '',
},
'playlist_mincount': 36,
}, },
{ 'playlist_mincount': 19,
'url': 'https://utsa.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=e2900555-3ad4-4bdb-854d-ad2401686190', }, {
'info_dict': { 'url': 'https://utsa.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=e2900555-3ad4-4bdb-854d-ad2401686190',
'title': 'Library Website Introduction Playlist', 'info_dict': {
'id': 'e2900555-3ad4-4bdb-854d-ad2401686190', 'id': 'e2900555-3ad4-4bdb-854d-ad2401686190',
'description': 'md5:f958bca50a1cbda15fdc1e20d32b3ecb', 'title': 'Library Website Introduction Playlist',
}, 'description': 'md5:f958bca50a1cbda15fdc1e20d32b3ecb',
'playlist_mincount': 4,
}, },
'playlist_mincount': 4,
] }]
def _entries(self, base_url, playlist_id, session_list_id): def _entries(self, base_url, playlist_id, session_list_id):
session_list_info = self._call_api( session_list_info = self._call_api(
@ -486,35 +498,29 @@ def _real_extract(self, url):
class PanoptoListIE(PanoptoBaseIE): class PanoptoListIE(PanoptoBaseIE):
_VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/Sessions/List\.aspx' _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/Sessions/List\.aspx'
_PAGE_SIZE = 250 _PAGE_SIZE = 250
_TESTS = [ _TESTS = [{
{ 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%22e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a%22',
'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%22e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a%22', 'info_dict': {
'info_dict': { 'id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
'id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a', 'title': 'Showcase Videos',
'title': 'Showcase Videos',
},
'playlist_mincount': 140,
}, },
{ 'playlist_mincount': 8,
'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#view=2&maxResults=250', }, {
'info_dict': { 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#view=2&maxResults=250',
'id': 'panopto_list', 'info_dict': {
'title': 'panopto_list', 'id': 'panopto_list',
}, 'title': 'panopto_list',
'playlist_mincount': 300,
}, },
{ 'playlist_mincount': 300,
# Folder that contains 8 folders and a playlist }, {
'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx?noredirect=true#folderID=%224b9de7ae-0080-4158-8496-a9ba01692c2e%22', # Folder that contains 8 folders and a playlist
'info_dict': { 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx?noredirect=true#folderID=%224b9de7ae-0080-4158-8496-a9ba01692c2e%22',
'id': '4b9de7ae-0080-4158-8496-a9ba01692c2e', 'info_dict': {
'title': 'Video Tutorials', 'id': '4b9de7ae-0080-4158-8496-a9ba01692c2e',
}, 'title': 'Video Tutorials',
'playlist_mincount': 9,
}, },
'playlist_mincount': 9,
] }]
def _fetch_page(self, base_url, query_params, display_id, page): def _fetch_page(self, base_url, query_params, display_id, page):

View File

@ -1,63 +1,63 @@
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import parse_duration, parse_iso8601, url_or_none
int_or_none, from ..utils.traversal import traverse_obj
try_get,
unified_timestamp,
)
class ParlviewIE(InfoExtractor): class ParlviewIE(InfoExtractor):
_WORKING = False _VALID_URL = r'https?://(?:www\.)?aph\.gov\.au/News_and_Events/Watch_Read_Listen/ParlView/video/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?parlview\.aph\.gov\.au/(?:[^/]+)?\bvideoID=(?P<id>\d{6})'
_TESTS = [{ _TESTS = [{
'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=542661', 'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/3406614',
'info_dict': { 'info_dict': {
'id': '542661', 'id': '3406614',
'ext': 'mp4', 'ext': 'mp4',
'title': "Australia's Family Law System [Part 2]", 'title': 'Senate Chamber',
'duration': 5799, 'description': 'Official Recording of Senate Proceedings from the Australian Parliament',
'description': 'md5:7099883b391619dbae435891ca871a62', 'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/SenateParlview06.jpg',
'timestamp': 1621430700, 'upload_date': '20250325',
'upload_date': '20210519', 'duration': 17999,
'uploader': 'Joint Committee', 'timestamp': 1742939400,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=539936', 'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/SV1394.dv',
'only_matching': True, 'info_dict': {
'id': 'SV1394.dv',
'ext': 'mp4',
'title': 'Senate Select Committee on Uranium Mining and Milling [Part 1]',
'description': 'Official Recording of Senate Committee Proceedings from the Australian Parliament',
'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/CommitteeThumbnail06.jpg',
'upload_date': '19960822',
'duration': 14765,
'timestamp': 840754200,
},
'params': {
'skip_download': True,
},
}] }]
_API_URL = 'https://parlview.aph.gov.au/api_v3/1/playback/getUniversalPlayerConfig?videoID=%s&format=json'
_MEDIA_INFO_URL = 'https://parlview.aph.gov.au/ajaxPlayer.php?videoID=%s&tabNum=4&action=loadTab'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) video_details = self._download_json(
media = self._download_json(self._API_URL % video_id, video_id).get('media') f'https://vodapi.aph.gov.au/api/search/parlview/{video_id}', video_id)['videoDetails']
timestamp = try_get(media, lambda x: x['timeMap']['source']['timecode_offsets'][0], str) or '/'
stream = try_get(media, lambda x: x['renditions'][0], dict) formats, subtitles = self._extract_m3u8_formats_and_subtitles(
if not stream: video_details['files']['file']['url'], video_id, 'mp4')
self.raise_no_formats('No streams were detected')
elif stream.get('streamType') != 'VOD':
self.raise_no_formats('Unknown type of stream was detected: "{}"'.format(str(stream.get('streamType'))))
formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native')
media_info = self._download_webpage( DURATION_RE = re.compile(r'(?P<duration>\d+:\d+:\d+):\d+')
self._MEDIA_INFO_URL % video_id, video_id, note='Downloading media info', fatal=False)
return { return {
'id': video_id, 'id': video_id,
'url': url,
'title': self._html_search_regex(r'<h2>([^<]+)<', webpage, 'title', fatal=False),
'formats': formats, 'formats': formats,
'duration': int_or_none(media.get('duration')), 'subtitles': subtitles,
'timestamp': unified_timestamp(timestamp.split('/', 1)[1].replace('_', ' ')), **traverse_obj(video_details, {
'description': self._html_search_regex( 'title': (('parlViewTitle', 'title'), {str}, any),
r'<div[^>]+class="descripti?on"[^>]*>[^>]+<strong>[^>]+>[^>]+>([^<]+)', 'description': ('parlViewDescription', {str}),
webpage, 'description', fatal=False), 'duration': ('files', 'file', 'duration', {DURATION_RE.fullmatch}, 'duration', {parse_duration}),
'uploader': self._html_search_regex( 'timestamp': ('recordingFrom', {parse_iso8601}),
r'<td>[^>]+>Channel:[^>]+>([^<]+)', media_info, 'channel', fatal=False), 'thumbnail': ('thumbUrl', {url_or_none}),
'thumbnail': media.get('staticImage'), }),
} }

View File

@ -19,7 +19,7 @@
url_or_none, url_or_none,
urljoin, urljoin,
) )
from ..utils.traversal import traverse_obj, value from ..utils.traversal import require, traverse_obj, value
class PatreonBaseIE(InfoExtractor): class PatreonBaseIE(InfoExtractor):
@ -462,7 +462,7 @@ class PatreonCampaignIE(PatreonBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?://(?:www\.)?patreon\.com/(?: https?://(?:www\.)?patreon\.com/(?:
(?:m|api/campaigns)/(?P<campaign_id>\d+)| (?:m|api/campaigns)/(?P<campaign_id>\d+)|
(?:c/)?(?P<vanity>(?!creation[?/]|posts/|rss[?/])[\w-]+) (?:cw?/)?(?P<vanity>(?!creation[?/]|posts/|rss[?/])[\w-]+)
)(?:/posts)?/?(?:$|[?#])''' )(?:/posts)?/?(?:$|[?#])'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.patreon.com/dissonancepod/', 'url': 'https://www.patreon.com/dissonancepod/',
@ -531,6 +531,28 @@ class PatreonCampaignIE(PatreonBaseIE):
'age_limit': 0, 'age_limit': 0,
}, },
'playlist_mincount': 331, 'playlist_mincount': 331,
'skip': 'Channel removed',
}, {
# next.js v13 data, see https://github.com/yt-dlp/yt-dlp/issues/13622
'url': 'https://www.patreon.com/c/anythingelse/posts',
'info_dict': {
'id': '9631148',
'title': 'Anything Else?',
'description': 'md5:2ee1db4aed2f9460c2b295825a24aa08',
'uploader': 'dan ',
'uploader_id': '13852412',
'uploader_url': 'https://www.patreon.com/anythingelse',
'channel': 'Anything Else?',
'channel_id': '9631148',
'channel_url': 'https://www.patreon.com/anythingelse',
'channel_follower_count': int,
'age_limit': 0,
'thumbnail': r're:https?://.+/.+',
},
'playlist_mincount': 151,
}, {
'url': 'https://www.patreon.com/cw/anythingelse',
'only_matching': True,
}, { }, {
'url': 'https://www.patreon.com/c/OgSog/posts', 'url': 'https://www.patreon.com/c/OgSog/posts',
'only_matching': True, 'only_matching': True,
@ -572,8 +594,11 @@ def _real_extract(self, url):
campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity') campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity')
if campaign_id is None: if campaign_id is None:
webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent}) webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent})
campaign_id = self._search_nextjs_data( campaign_id = traverse_obj(self._search_nextjs_data(webpage, vanity, default=None), (
webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id'] 'props', 'pageProps', 'bootstrapEnvelope', 'pageBootstrap', 'campaign', 'data', 'id', {str}))
if not campaign_id:
campaign_id = traverse_obj(self._search_nextjs_v13_data(webpage, vanity), (
lambda _, v: v['type'] == 'campaign', 'id', {str}, any, {require('campaign ID')}))
params = { params = {
'json-api-use-default-includes': 'false', 'json-api-use-default-includes': 'false',

View File

@ -1331,7 +1331,7 @@ class PeerTubeIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'What is PeerTube?', 'title': 'What is PeerTube?',
'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10', 'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
'thumbnail': r're:https?://.*\.(?:jpg|png)', 'thumbnail': r're:https?://framatube\.org/lazy-static/thumbnails/.+\.jpg',
'timestamp': 1538391166, 'timestamp': 1538391166,
'upload_date': '20181001', 'upload_date': '20181001',
'uploader': 'Framasoft', 'uploader': 'Framasoft',
@ -1346,19 +1346,34 @@ class PeerTubeIE(InfoExtractor):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'dislike_count': int, 'dislike_count': int,
'tags': ['framasoft', 'peertube'], 'tags': 'count:2',
'categories': ['Science & Technology'], 'categories': ['Science & Technology'],
}, },
'expected_warnings': ['HTTP Error 400: Bad Request'],
'params': {'skip_download': 'm3u8'},
}, { }, {
'url': 'https://peertube2.cpy.re/w/122d093a-1ede-43bd-bd34-59d2931ffc5e', 'url': 'https://peertube2.cpy.re/w/122d093a-1ede-43bd-bd34-59d2931ffc5e',
'info_dict': { 'info_dict': {
'id': '122d093a-1ede-43bd-bd34-59d2931ffc5e', 'id': '122d093a-1ede-43bd-bd34-59d2931ffc5e',
'ext': 'mp4', 'ext': 'mp4',
'title': 'E2E tests', 'title': 'E2E tests',
'uploader_id': '37855', 'categories': ['Unknown'],
'channel': 'Main chocobozzz channel',
'channel_id': '5187',
'channel_url': 'https://peertube2.cpy.re/video-channels/chocobozzz_channel',
'description': 'md5:67daf92c833c41c95db874e18fcb2786',
'dislike_count': int,
'duration': 52,
'license': 'Unknown',
'like_count': int,
'tags': [],
'thumbnail': r're:https?://peertube2\.cpy\.re/lazy-static/thumbnails/.+\.jpg',
'timestamp': 1589276219, 'timestamp': 1589276219,
'upload_date': '20200512', 'upload_date': '20200512',
'uploader': 'chocobozzz', 'uploader': 'chocobozzz',
'uploader_id': '37855',
'uploader_url': 'https://peertube2.cpy.re/accounts/chocobozzz',
'view_count': int,
}, },
}, { }, {
'url': 'https://peertube2.cpy.re/w/3fbif9S3WmtTP8gGsC5HBd', 'url': 'https://peertube2.cpy.re/w/3fbif9S3WmtTP8gGsC5HBd',
@ -1366,10 +1381,23 @@ class PeerTubeIE(InfoExtractor):
'id': '3fbif9S3WmtTP8gGsC5HBd', 'id': '3fbif9S3WmtTP8gGsC5HBd',
'ext': 'mp4', 'ext': 'mp4',
'title': 'E2E tests', 'title': 'E2E tests',
'uploader_id': '37855', 'categories': ['Unknown'],
'channel': 'Main chocobozzz channel',
'channel_id': '5187',
'channel_url': 'https://peertube2.cpy.re/video-channels/chocobozzz_channel',
'description': 'md5:67daf92c833c41c95db874e18fcb2786',
'dislike_count': int,
'duration': 52,
'license': 'Unknown',
'like_count': int,
'tags': [],
'thumbnail': r're:https?://peertube2\.cpy\.re/lazy-static/thumbnails/.+\.jpg',
'timestamp': 1589276219, 'timestamp': 1589276219,
'upload_date': '20200512', 'upload_date': '20200512',
'uploader': 'chocobozzz', 'uploader': 'chocobozzz',
'uploader_id': '37855',
'uploader_url': 'https://peertube2.cpy.re/accounts/chocobozzz',
'view_count': int,
}, },
}, { }, {
'url': 'https://peertube2.cpy.re/api/v1/videos/3fbif9S3WmtTP8gGsC5HBd', 'url': 'https://peertube2.cpy.re/api/v1/videos/3fbif9S3WmtTP8gGsC5HBd',
@ -1377,13 +1405,26 @@ class PeerTubeIE(InfoExtractor):
'id': '3fbif9S3WmtTP8gGsC5HBd', 'id': '3fbif9S3WmtTP8gGsC5HBd',
'ext': 'mp4', 'ext': 'mp4',
'title': 'E2E tests', 'title': 'E2E tests',
'uploader_id': '37855', 'categories': ['Unknown'],
'channel': 'Main chocobozzz channel',
'channel_id': '5187',
'channel_url': 'https://peertube2.cpy.re/video-channels/chocobozzz_channel',
'description': 'md5:67daf92c833c41c95db874e18fcb2786',
'dislike_count': int,
'duration': 52,
'license': 'Unknown',
'like_count': int,
'tags': [],
'thumbnail': r're:https?://peertube2\.cpy\.re/lazy-static/thumbnails/.+\.jpg',
'timestamp': 1589276219, 'timestamp': 1589276219,
'upload_date': '20200512', 'upload_date': '20200512',
'uploader': 'chocobozzz', 'uploader': 'chocobozzz',
'uploader_id': '37855',
'uploader_url': 'https://peertube2.cpy.re/accounts/chocobozzz',
'view_count': int,
}, },
}, { }, {
# Issue #26002 # https://github.com/ytdl-org/youtube-dl/issues/26002
'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc', 'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc',
'info_dict': { 'info_dict': {
'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc', 'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc',
@ -1394,6 +1435,7 @@ class PeerTubeIE(InfoExtractor):
'upload_date': '20200420', 'upload_date': '20200420',
'uploader': 'Drew DeVault', 'uploader': 'Drew DeVault',
}, },
'skip': 'Invalid URL',
}, { }, {
'url': 'https://peertube.debian.social/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44', 'url': 'https://peertube.debian.social/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
'only_matching': True, 'only_matching': True,
@ -1411,6 +1453,33 @@ class PeerTubeIE(InfoExtractor):
'url': 'peertube:framatube.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205', 'url': 'peertube:framatube.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
'url': 'https://video.macver.org/w/6gvhZpUGQVd4SQ6oYDc9pC',
'info_dict': {
'id': '6gvhZpUGQVd4SQ6oYDc9pC',
'ext': 'mp4',
'title': 'Minecraft, but if you say a block, it gets deleted',
'categories': ['Gaming'],
'channel': 'Waffle Irons Gaming',
'channel_id': '4',
'channel_url': 'https://video.macver.org/video-channels/waffle_irons',
'description': 'md5:eda8daf64b0dadd00cc248f28eef213c',
'dislike_count': int,
'duration': 1643,
'license': 'Attribution - Non Commercial',
'like_count': int,
'tags': 'count:1',
'thumbnail': r're:https?://video\.macver\.org/lazy-static/thumbnails/.+\.jpg',
'timestamp': 1751142352,
'upload_date': '20250628',
'uploader': 'Bog',
'uploader_id': '3',
'uploader_url': 'https://video.macver.org/accounts/bog',
'view_count': int,
},
'expected_warnings': ['HTTP Error 400: Bad Request', 'Ignoring subtitle tracks found in the HLS manifest'],
'params': {'skip_download': 'm3u8'},
}]
@staticmethod @staticmethod
def _extract_peertube_url(webpage, source_url): def _extract_peertube_url(webpage, source_url):
@ -1580,31 +1649,47 @@ class PeerTubePlaylistIE(InfoExtractor):
'id': 'hFdJoTuyhNJVa1cDWd1d12', 'id': 'hFdJoTuyhNJVa1cDWd1d12',
'description': 'Diversas palestras do Richard Stallman no Brasil.', 'description': 'Diversas palestras do Richard Stallman no Brasil.',
'title': 'Richard Stallman no Brasil', 'title': 'Richard Stallman no Brasil',
'channel': 'debianbrazilteam',
'channel_id': 1522,
'thumbnail': r're:https?://peertube\.debian\.social/lazy-static/thumbnails/.+\.jpg',
'timestamp': 1599676222, 'timestamp': 1599676222,
'upload_date': '20200909',
}, },
'playlist_mincount': 9, 'playlist_mincount': 9,
}, { }, {
'url': 'https://peertube2.cpy.re/a/chocobozzz/videos', 'url': 'https://peertube2.cpy.re/a/chocobozzz/videos',
'info_dict': { 'info_dict': {
'id': 'chocobozzz', 'id': 'chocobozzz',
'timestamp': 1553874564,
'title': 'chocobozzz', 'title': 'chocobozzz',
'channel': 'chocobozzz',
'channel_id': 37855,
'thumbnail': '',
'timestamp': 1553874564,
'upload_date': '20190329',
}, },
'playlist_mincount': 2, 'playlist_mincount': 2,
}, { }, {
'url': 'https://framatube.org/c/bf54d359-cfad-4935-9d45-9d6be93f63e8/videos', 'url': 'https://framatube.org/c/bf54d359-cfad-4935-9d45-9d6be93f63e8/videos',
'info_dict': { 'info_dict': {
'id': 'bf54d359-cfad-4935-9d45-9d6be93f63e8', 'id': 'bf54d359-cfad-4935-9d45-9d6be93f63e8',
'timestamp': 1519917377,
'title': 'Les vidéos de Framasoft', 'title': 'Les vidéos de Framasoft',
'channel': 'framasoft',
'channel_id': 3,
'thumbnail': '',
'timestamp': 1519917377,
'upload_date': '20180301',
}, },
'playlist_mincount': 345, 'playlist_mincount': 345,
}, { }, {
'url': 'https://peertube2.cpy.re/c/blender_open_movies@video.blender.org/videos', 'url': 'https://peertube2.cpy.re/c/blender_open_movies@video.blender.org/videos',
'info_dict': { 'info_dict': {
'id': 'blender_open_movies@video.blender.org', 'id': 'blender_open_movies@video.blender.org',
'timestamp': 1542287810,
'title': 'Official Blender Open Movies', 'title': 'Official Blender Open Movies',
'channel': 'blender',
'channel_id': 1926,
'thumbnail': '',
'timestamp': 1540472902,
'upload_date': '20181025',
}, },
'playlist_mincount': 11, 'playlist_mincount': 11,
}] }]

View File

@ -0,0 +1,70 @@
from .common import InfoExtractor
from ..utils import clean_html, clean_podcast_url, int_or_none, str_or_none, url_or_none
from ..utils.traversal import traverse_obj
class PlayerFmIE(InfoExtractor):
_VALID_URL = r'(?P<url>https?://(?:www\.)?player\.fm/(?:series/)?[\w-]+/(?P<id>[\w-]+))'
_TESTS = [{
'url': 'https://player.fm/series/chapo-trap-house/movie-mindset-33-casino-feat-felix',
'info_dict': {
'ext': 'mp3',
'id': '478606546',
'display_id': 'movie-mindset-33-casino-feat-felix',
'thumbnail': r're:^https://.*\.(jpg|png)',
'title': 'Movie Mindset 33 - Casino feat. Felix',
'creators': ['Chapo Trap House'],
'description': r're:The first episode of this season of Movie Mindset is free .+ we feel about it\.',
'duration': 6830,
'timestamp': 1745406000,
'upload_date': '20250423',
},
}, {
'url': 'https://player.fm/series/nbc-nightly-news-with-tom-llamas/thursday-april-17-2025',
'info_dict': {
'ext': 'mp3',
'id': '477635490',
'display_id': 'thursday-april-17-2025',
'title': 'Thursday, April 17, 2025',
'thumbnail': r're:^https://.*\.(jpg|png)',
'duration': 1143,
'description': 'md5:4890b8cf9a55a787561cd5d59dfcda82',
'creators': ['NBC News'],
'timestamp': 1744941374,
'upload_date': '20250418',
},
}, {
'url': 'https://player.fm/series/soccer-101/ep-109-its-kicking-off-how-have-the-rules-for-kickoff-changed-what-are-the-best-approaches-to-getting-the-game-underway-and-how-could-we-improve-on-the-present-system-ack3NzL3yibvs4pf',
'info_dict': {
'ext': 'mp3',
'id': '481418710',
'thumbnail': r're:^https://.*\.(jpg|png)',
'title': r're:#109 It\'s kicking off! How have the rules for kickoff changed, .+ the present system\?',
'creators': ['TSS'],
'duration': 1510,
'display_id': 'md5:b52ecacaefab891b59db69721bfd9b13',
'description': 'md5:52a39e36d08d8919527454f152ad3c25',
'timestamp': 1659102055,
'upload_date': '20220729',
},
}]
def _real_extract(self, url):
display_id, url = self._match_valid_url(url).group('id', 'url')
data = self._download_json(f'{url}.json', display_id)
return {
'display_id': display_id,
'vcodec': 'none',
**traverse_obj(data, {
'id': ('id', {int}, {str_or_none}),
'url': ('url', {clean_podcast_url}),
'title': ('title', {str}),
'description': ('description', {clean_html}),
'duration': ('duration', {int_or_none}),
'thumbnail': (('image', ('series', 'image')), 'url', {url_or_none}, any),
'filesize': ('size', {int_or_none}),
'timestamp': ('publishedAt', {int_or_none}),
'creators': ('series', 'author', {str}, filter, all, filter),
}),
}

View File

@ -19,6 +19,7 @@ class PlaywireIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.png$', 'thumbnail': r're:^https?://.*\.png$',
'duration': 145.94, 'duration': 145.94,
}, },
'skip': 'Invalid URL',
}, { }, {
# m3u8 in f4m # m3u8 in f4m
'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json', 'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json',
@ -27,10 +28,7 @@ class PlaywireIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'ITV EL SHOW FULL', 'title': 'ITV EL SHOW FULL',
}, },
'params': { 'skip': 'Invalid URL',
# m3u8 download
'skip_download': True,
},
}, { }, {
# Multiple resolutions while bitrates missing # Multiple resolutions while bitrates missing
'url': 'http://cdn.playwire.com/11625/embed/85228.html', 'url': 'http://cdn.playwire.com/11625/embed/85228.html',
@ -42,6 +40,15 @@ class PlaywireIE(InfoExtractor):
'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json', 'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
'url': 'https://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
'info_dict': {
'id': '3519514',
'ext': 'mp4',
'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
},
'skip': 'Site no longer embeds Playwire',
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)

104
yt_dlp/extractor/plyr.py Normal file
View File

@ -0,0 +1,104 @@
import re
from .common import InfoExtractor
from .vimeo import VimeoIE
class PlyrEmbedIE(InfoExtractor):
_VALID_URL = False
_WEBPAGE_TESTS = [{
# data-plyr-embed-id="https://player.vimeo.com/video/522319456/90e5c96063?dnt=1"
'url': 'https://www.dhm.de/zeughauskino/filmreihen/online-filmreihen/filme-des-marshall-plans/200000000-mouths/',
'info_dict': {
'id': '522319456',
'ext': 'mp4',
'title': '200.000.000 Mouths (195051)',
'uploader': 'Zeughauskino',
'uploader_url': '',
'comment_count': int,
'like_count': int,
'duration': 963,
'thumbnail': 'https://i.vimeocdn.com/video/1081797161-9f09ddb4b7faa86e834e006b8e4b9c2cbaa0baa7da493211bf0796ae133a5ab8-d',
'timestamp': 1615467405,
'upload_date': '20210311',
'release_timestamp': 1615467405,
'release_date': '20210311',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, {
# data-plyr-provider="vimeo" data-plyr-embed-id="803435276"
'url': 'https://www.inarcassa.it/',
'info_dict': {
'id': '803435276',
'ext': 'mp4',
'title': 'HOME_Moto_Perpetuo',
'uploader': 'Inarcassa',
'uploader_url': '',
'duration': 38,
'thumbnail': 'https://i.vimeocdn.com/video/1663734769-945ad7ffabb16dbca009c023fd1d7b36bdb426a3dbae8345ed758136fe28f89a-d',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, {
# data-plyr-embed-id="https://youtu.be/GF-BjYKoAqI"
'url': 'https://www.profile.nl',
'info_dict': {
'id': 'GF-BjYKoAqI',
'ext': 'mp4',
'title': 'PROFILE: Recruitment Profile',
'description': '',
'media_type': 'video',
'uploader': 'Profile Nederland',
'uploader_id': '@profilenederland',
'uploader_url': 'https://www.youtube.com/@profilenederland',
'channel': 'Profile Nederland',
'channel_id': 'UC9AUkB0Tv39-TBYjs05n3vg',
'channel_url': 'https://www.youtube.com/channel/UC9AUkB0Tv39-TBYjs05n3vg',
'channel_follower_count': int,
'view_count': int,
'like_count': int,
'age_limit': 0,
'duration': 39,
'thumbnail': 'https://i.ytimg.com/vi/GF-BjYKoAqI/maxresdefault.jpg',
'categories': ['Autos & Vehicles'],
'tags': [],
'timestamp': 1675692990,
'upload_date': '20230206',
'playable_in_embed': True,
'availability': 'public',
'live_status': 'not_live',
},
}, {
# data-plyr-embed-id="B1TZV8rNZoc" data-plyr-provider="youtube"
'url': 'https://www.vnis.edu.vn',
'info_dict': {
'id': 'vnis.edu',
'title': 'VNIS Education - Master Agent các Trường hàng đầu Bắc Mỹ',
'description': 'md5:4dafcf7335bb018780e4426da8ab8e4e',
'age_limit': 0,
'thumbnail': 'https://vnis.edu.vn/wp-content/uploads/2021/05/ve-welcome-en.png',
'timestamp': 1753233356,
'upload_date': '20250723',
},
'playlist_count': 3,
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
plyr_embeds = re.finditer(r'''(?x)
<div[^>]+(?:
data-plyr-embed-id="(?P<id1>[^"]+)"[^>]+data-plyr-provider="(?P<provider1>[^"]+)"|
data-plyr-provider="(?P<provider2>[^"]+)"[^>]+data-plyr-embed-id="(?P<id2>[^"]+)"
)[^>]*>''', webpage)
for mobj in plyr_embeds:
embed_id = mobj.group('id1') or mobj.group('id2')
provider = mobj.group('provider1') or mobj.group('provider2')
if provider == 'vimeo':
if not re.match(r'https?://', embed_id):
embed_id = f'https://player.vimeo.com/video/{embed_id}'
yield VimeoIE._smuggle_referrer(embed_id, url)
elif provider == 'youtube':
if not re.match(r'https?://', embed_id):
embed_id = f'https://youtube.com/watch?v={embed_id}'
yield embed_id

View File

@ -81,7 +81,7 @@ def fix_cdata(s):
# geo flag is a bit unreliable and not properly set all the time # geo flag is a bit unreliable and not properly set all the time
geoprotection = xpath_text(relinker, './geoprotection', default='N') == 'Y' geoprotection = xpath_text(relinker, './geoprotection', default='N') == 'Y'
ext = determine_ext(media_url) ext = determine_ext(media_url).lower()
formats = [] formats = []
if ext == 'mp3': if ext == 'mp3':
@ -108,7 +108,7 @@ def fix_cdata(s):
'format_id': join_nonempty('https', bitrate, delim='-'), 'format_id': join_nonempty('https', bitrate, delim='-'),
}) })
else: else:
raise ExtractorError('Unrecognized media file found') raise ExtractorError(f'Unrecognized media extension "{ext}"')
if (not formats and geoprotection is True) or '/video_no_available.mp4' in media_url: if (not formats and geoprotection is True) or '/video_no_available.mp4' in media_url:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
@ -503,6 +503,28 @@ class RaiPlaySoundIE(RaiBaseIE):
'upload_date': '20211201', 'upload_date': '20211201',
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
}, {
# case-sensitivity test for uppercase extension
'url': 'https://www.raiplaysound.it/audio/2020/05/Storia--Lunita-dItalia-e-lunificazione-della-Germania-b4c16390-7f3f-4282-b353-d94897dacb7c.html',
'md5': 'c69ebd69282f0effd7ef67b7e2f6c7d8',
'info_dict': {
'id': 'b4c16390-7f3f-4282-b353-d94897dacb7c',
'ext': 'mp3',
'title': "Storia | 01 L'unità d'Italia e l'unificazione della Germania",
'alt_title': 'md5:ed4ed82585c52057b71b43994a59b705',
'description': 'md5:92818b6f31b2c150567d56b75db2ea7f',
'uploader': 'rai radio 3',
'duration': 2439.0,
'thumbnail': 'https://www.raiplaysound.it/dl/img/2023/09/07/1694084898279_Maturadio-LOGO-2048x1152.jpg',
'creators': ['rai radio 3'],
'series': 'Maturadio',
'season': 'Season 9',
'season_number': 9,
'episode': "01. L'unità d'Italia e l'unificazione della Germania",
'episode_number': 1,
'timestamp': 1590400740,
'upload_date': '20200525',
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -765,7 +787,7 @@ class RaiCulturaIE(RaiNewsIE): # XXX: Do not subclass from concrete IE
class RaiSudtirolIE(RaiBaseIE): class RaiSudtirolIE(RaiBaseIE):
_VALID_URL = r'https?://raisudtirol\.rai\.it/.+media=(?P<id>\w+)' _VALID_URL = r'https?://rai(?:bz|sudtirol)\.rai\.it/.+media=(?P<id>\w+)'
_TESTS = [{ _TESTS = [{
# mp4 file # mp4 file
'url': 'https://raisudtirol.rai.it/la/index.php?media=Ptv1619729460', 'url': 'https://raisudtirol.rai.it/la/index.php?media=Ptv1619729460',
@ -791,6 +813,9 @@ class RaiSudtirolIE(RaiBaseIE):
'formats': 'count:6', 'formats': 'count:6',
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
}, {
'url': 'https://raibz.rai.it/de/index.php?media=Ptv1751660400',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -3,9 +3,9 @@
class RoyaLiveIE(InfoExtractor): class RoyaLiveIE(InfoExtractor):
_VALID_URL = r'https?://roya\.tv/live-stream/(?P<id>\d+)' _VALID_URL = r'https?://(?:en\.)?roya\.tv/live-stream/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://roya.tv/live-stream/1', 'url': 'https://en.roya.tv/live-stream/1',
'info_dict': { 'info_dict': {
'id': '1', 'id': '1',
'title': r're:Roya TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}', 'title': r're:Roya TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}',

View File

@ -6,9 +6,11 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
InAdvancePagedList,
clean_html, clean_html,
determine_ext, determine_ext,
float_or_none, float_or_none,
int_or_none,
make_archive_id, make_archive_id,
parse_iso8601, parse_iso8601,
qualities, qualities,
@ -371,3 +373,62 @@ def _real_extract(self, url):
raise ExtractorError('The webpage doesn\'t contain any video', expected=True) raise ExtractorError('The webpage doesn\'t contain any video', expected=True)
return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key()) return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key())
class RTVEProgramIE(RTVEBaseIE):
IE_NAME = 'rtve.es:program'
IE_DESC = 'RTVE.es programs'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/play/videos/(?P<id>[\w-]+)/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://www.rtve.es/play/videos/saber-vivir/',
'info_dict': {
'id': '111570',
'title': 'Saber vivir - Programa de ciencia y futuro en RTVE Play',
},
'playlist_mincount': 400,
}]
_PAGE_SIZE = 60
def _fetch_page(self, program_id, page_num):
return self._download_json(
f'https://www.rtve.es/api/programas/{program_id}/videos',
program_id, note=f'Downloading page {page_num}',
query={
'type': 39816,
'page': page_num,
'size': 60,
})
def _entries(self, page_data):
for video in traverse_obj(page_data, ('page', 'items', lambda _, v: url_or_none(v['htmlUrl']))):
yield self.url_result(
video['htmlUrl'], RTVEALaCartaIE, url_transparent=True,
**traverse_obj(video, {
'id': ('id', {str}),
'title': ('longTitle', {str}),
'description': ('shortDescription', {str}),
'duration': ('duration', {float_or_none(scale=1000)}),
'series': (('programInfo', 'title'), {str}, any),
'season_number': ('temporadaOrden', {int_or_none}),
'season_id': ('temporadaId', {str}),
'season': ('temporada', {str}),
'episode_number': ('episode', {int_or_none}),
'episode': ('title', {str}),
'thumbnail': ('thumbnail', {url_or_none}),
}),
)
def _real_extract(self, url):
program_slug = self._match_id(url)
program_page = self._download_webpage(url, program_slug)
program_id = self._html_search_meta('DC.identifier', program_page, 'Program ID', fatal=True)
first_page = self._fetch_page(program_id, 1)
page_count = traverse_obj(first_page, ('page', 'totalPages', {int})) or 1
entries = InAdvancePagedList(
lambda idx: self._entries(self._fetch_page(program_id, idx + 1) if idx else first_page),
page_count, self._PAGE_SIZE)
return self.playlist_result(entries, program_id, self._html_extract_title(program_page))

View File

@ -115,7 +115,6 @@ class RutubeIE(RutubeBaseIE):
_TESTS = [{ _TESTS = [{
'url': 'https://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', 'url': 'https://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
'md5': '3d73fdfe5bb81b9aef139e22ef3de26a',
'info_dict': { 'info_dict': {
'id': '3eac3b4561676c17df9132a9a1e62e3e', 'id': '3eac3b4561676c17df9132a9a1e62e3e',
'ext': 'mp4', 'ext': 'mp4',
@ -128,10 +127,11 @@ class RutubeIE(RutubeBaseIE):
'upload_date': '20131016', 'upload_date': '20131016',
'age_limit': 0, 'age_limit': 0,
'view_count': int, 'view_count': int,
'thumbnail': 'https://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg', 'thumbnail': r're:https?://pic\.rutubelist\.ru/video/.+\.(?:jpg|png)',
'categories': ['Новости и СМИ'], 'categories': ['Новости и СМИ'],
'chapters': [], 'chapters': [],
}, },
'params': {'skip_download': 'm3u8'},
}, { }, {
'url': 'https://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', 'url': 'https://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661',
'only_matching': True, 'only_matching': True,
@ -146,7 +146,6 @@ class RutubeIE(RutubeBaseIE):
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg', 'url': 'https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg',
'md5': '4fce7b4fcc7b1bcaa3f45eb1e1ad0dd7',
'info_dict': { 'info_dict': {
'id': '884fb55f07a97ab673c7d654553e0f48', 'id': '884fb55f07a97ab673c7d654553e0f48',
'ext': 'mp4', 'ext': 'mp4',
@ -163,6 +162,7 @@ class RutubeIE(RutubeBaseIE):
'categories': ['Видеоигры'], 'categories': ['Видеоигры'],
'chapters': [], 'chapters': [],
}, },
'params': {'skip_download': 'm3u8'},
}, { }, {
'url': 'https://rutube.ru/video/c65b465ad0c98c89f3b25cb03dcc87c6/', 'url': 'https://rutube.ru/video/c65b465ad0c98c89f3b25cb03dcc87c6/',
'info_dict': { 'info_dict': {
@ -171,7 +171,7 @@ class RutubeIE(RutubeBaseIE):
'chapters': 'count:4', 'chapters': 'count:4',
'categories': ['Бизнес и предпринимательство'], 'categories': ['Бизнес и предпринимательство'],
'description': 'md5:252feac1305257d8c1bab215cedde75d', 'description': 'md5:252feac1305257d8c1bab215cedde75d',
'thumbnail': 'https://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png', 'thumbnail': r're:https?://pic\.rutubelist\.ru/video/.+\.(?:jpg|png)',
'duration': 782, 'duration': 782,
'age_limit': 0, 'age_limit': 0,
'uploader_id': '23491359', 'uploader_id': '23491359',
@ -181,6 +181,7 @@ class RutubeIE(RutubeBaseIE):
'title': 'Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании #1', 'title': 'Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании #1',
'uploader': 'Стас Быков', 'uploader': 'Стас Быков',
}, },
'params': {'skip_download': 'm3u8'},
}, { }, {
'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/', 'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/',
'info_dict': { 'info_dict': {
@ -188,16 +189,17 @@ class RutubeIE(RutubeBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'categories': ['Телепередачи'], 'categories': ['Телепередачи'],
'description': '', 'description': '',
'thumbnail': 'https://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg', 'thumbnail': r're:https?://pic\.rutubelist\.ru/video/.+\.(?:jpg|png)',
'live_status': 'is_live', 'live_status': 'is_live',
'age_limit': 0, 'age_limit': 0,
'uploader_id': '23460655', 'uploader_id': '23460655',
'timestamp': 1652972968, 'timestamp': 1652972968,
'view_count': int, 'view_count': int,
'upload_date': '20220519', 'upload_date': '20220519',
'title': r're:Первый канал. Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'title': str,
'uploader': 'Первый канал', 'uploader': 'Первый канал',
}, },
'skip': 'Invalid URL',
}, { }, {
'url': 'https://rutube.ru/play/embed/03a9cb54bac3376af4c5cb0f18444e01/', 'url': 'https://rutube.ru/play/embed/03a9cb54bac3376af4c5cb0f18444e01/',
'info_dict': { 'info_dict': {
@ -211,11 +213,12 @@ class RutubeIE(RutubeBaseIE):
'duration': 293, 'duration': 293,
'uploader': 'MOEX - Московская биржа', 'uploader': 'MOEX - Московская биржа',
'timestamp': 1724946628, 'timestamp': 1724946628,
'thumbnail': 'https://pic.rutubelist.ru/video/2e/24/2e241fddb459baf0fa54acfca44874f4.jpg', 'thumbnail': r're:https?://pic\.rutubelist\.ru/video/.+\.(?:jpg|png)',
'view_count': int, 'view_count': int,
'uploader_id': '38420507', 'uploader_id': '38420507',
'categories': ['Интервью'], 'categories': ['Интервью'],
}, },
'params': {'skip_download': 'm3u8'},
}, { }, {
'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/', 'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/',
'only_matching': True, 'only_matching': True,
@ -223,6 +226,26 @@ class RutubeIE(RutubeBaseIE):
'url': 'https://rutube.ru/live/video/private/c58f502c7bb34a8fcdd976b221fca292/', 'url': 'https://rutube.ru/live/video/private/c58f502c7bb34a8fcdd976b221fca292/',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
'url': 'https://novate.ru/blogs/170625/73644/',
'info_dict': {
'id': 'b0c96c75a4e5b274721bbced6ed8fb64',
'ext': 'mp4',
'title': 'Где в России находится единственная в своем роде скальная торпедная батарея',
'age_limit': 0,
'categories': ['Наука'],
'chapters': [],
'description': 'md5:2ed82e6b81958a43da6fb4d56f949e1f',
'duration': 182,
'thumbnail': r're:https?://pic\.rutubelist\.ru/video/.+\.(?:jpg|png)',
'timestamp': 1749950158,
'upload_date': '20250615',
'uploader': 'Novate',
'uploader_id': '24044809',
'view_count': int,
},
'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -256,12 +279,10 @@ class RutubeEmbedIE(RutubeBaseIE):
'chapters': [], 'chapters': [],
'description': 'md5:a5acea57bbc3ccdc3cacd1f11a014b5b', 'description': 'md5:a5acea57bbc3ccdc3cacd1f11a014b5b',
'view_count': int, 'view_count': int,
'thumbnail': 'https://pic.rutubelist.ru/video/d3/03/d3031f4670a6e6170d88fb3607948418.jpg', 'thumbnail': r're:https?://pic\.rutubelist\.ru/video/.+\.(?:jpg|png)',
'categories': ['Сериалы'], 'categories': ['Сериалы'],
}, },
'params': { 'params': {'skip_download': 'm3u8'},
'skip_download': True,
},
}, { }, {
'url': 'https://rutube.ru/play/embed/8083783', 'url': 'https://rutube.ru/play/embed/8083783',
'only_matching': True, 'only_matching': True,

View File

@ -16,96 +16,88 @@ class RUTVIE(InfoExtractor):
) )
(?P<id>\d+) (?P<id>\d+)
''' '''
_EMBED_URLS = [ _EMBED_REGEX = [
r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1',
r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)', r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)',
] ]
_TESTS = [ _TESTS = [{
{ 'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724',
'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724', 'info_dict': {
'info_dict': { 'id': '774471',
'id': '774471', 'ext': 'mp4',
'ext': 'mp4', 'title': 'Монологи на все времена. Концерт',
'title': 'Монологи на все времена', 'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5',
'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5', 'duration': 2906,
'duration': 2906, 'thumbnail': r're:https?://cdn-st2\.smotrim\.ru/.+\.jpg',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, },
{ 'params': {'skip_download': 'm3u8'},
'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638', }, {
'info_dict': { 'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638',
'id': '774016', 'info_dict': {
'ext': 'mp4', 'id': '774016',
'title': 'Чужой в семье Сталина', 'ext': 'mp4',
'description': '', 'title': 'Чужой в семье Сталина',
'duration': 2539, 'description': '',
}, 'duration': 2539,
'params': {
# m3u8 download
'skip_download': True,
},
}, },
{ 'skip': 'Invalid URL',
'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000', }, {
'info_dict': { 'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000',
'id': '766888', 'info_dict': {
'ext': 'mp4', 'id': '766888',
'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"', 'ext': 'mp4',
'description': 'md5:65ddd47f9830c4f42ed6475f8730c995', 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"',
'duration': 279, 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995',
}, 'duration': 279,
'params': { 'thumbnail': r're:https?://cdn-st2\.smotrim\.ru/.+\.jpg',
# m3u8 download
'skip_download': True,
},
}, },
{ 'params': {'skip_download': 'm3u8'},
'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169', }, {
'info_dict': { 'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169',
'id': '771852', 'info_dict': {
'ext': 'mp4', 'id': '771852',
'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет', 'ext': 'mp4',
'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8', 'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет',
'duration': 3096, 'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8',
}, 'duration': 3096,
'params': { 'thumbnail': r're:https?://cdn-st2\.smotrim\.ru/.+\.jpg',
# m3u8 download
'skip_download': True,
},
}, },
{ 'params': {'skip_download': 'm3u8'},
'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014', }, {
'info_dict': { 'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014',
'id': '51499', 'info_dict': {
'ext': 'flv', 'id': '51499',
'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ', 'ext': 'flv',
'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c', 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ',
}, 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c',
'skip': 'Translation has finished',
}, },
{ 'skip': 'Invalid URL',
'url': 'http://player.rutv.ru/iframe/live/id/21/showZoomBtn/false/isPlay/true/', }, {
'info_dict': { 'url': 'http://player.rutv.ru/iframe/live/id/21/showZoomBtn/false/isPlay/true/',
'id': '21', 'info_dict': {
'ext': 'mp4', 'id': '21',
'title': 're:^Россия 24. Прямой эфир [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'ext': 'mp4',
'is_live': True, 'title': str,
}, 'is_live': True,
'params': {
# m3u8 download
'skip_download': True,
},
}, },
{ 'skip': 'Invalid URL',
'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/', }, {
'only_matching': True, 'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'http://istoriya-teatra.ru/news/item/f00/s05/n0000545/index.shtml',
'info_dict': {
'id': '1952012',
'ext': 'mp4',
'title': 'Новости культуры. Эфир от 10.10.2019 (23:30). Театр Сатиры отмечает день рождения премьерой',
'description': 'md5:fced27112ff01ff8fc4a452fc088bad6',
'duration': 191,
'thumbnail': r're:https?://cdn-st2\.smotrim\.ru/.+\.jpg',
}, },
] 'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)

View File

@ -18,6 +18,7 @@
class RuutuIE(InfoExtractor): class RuutuIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
@ -26,112 +27,111 @@ class RuutuIE(InfoExtractor):
) )
(?P<id>\d+) (?P<id>\d+)
''' '''
_TESTS = [ _TESTS = [{
{ 'url': 'http://www.ruutu.fi/video/2058907',
'url': 'http://www.ruutu.fi/video/2058907', 'md5': 'ab2093f39be1ca8581963451b3c0234f',
'md5': 'ab2093f39be1ca8581963451b3c0234f', 'info_dict': {
'info_dict': { 'id': '2058907',
'id': '2058907', 'ext': 'mp4',
'ext': 'mp4', 'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!',
'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!', 'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6',
'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6', 'thumbnail': r're:^https?://.*\.jpg$',
'thumbnail': r're:^https?://.*\.jpg$', 'duration': 114,
'duration': 114, 'age_limit': 0,
'age_limit': 0, 'upload_date': '20150508',
'upload_date': '20150508',
},
}, },
{ }, {
'url': 'http://www.ruutu.fi/video/2057306', 'url': 'http://www.ruutu.fi/video/2057306',
'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9', 'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9',
'info_dict': { 'info_dict': {
'id': '2057306', 'id': '2057306',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Superpesis: katso koko kausi Ruudussa', 'title': 'Superpesis: katso koko kausi Ruudussa',
'description': 'md5:bfb7336df2a12dc21d18fa696c9f8f23', 'description': 'md5:bfb7336df2a12dc21d18fa696c9f8f23',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 40, 'duration': 40,
'age_limit': 0, 'age_limit': 0,
'upload_date': '20150507', 'upload_date': '20150507',
'series': 'Superpesis', 'series': 'Superpesis',
'categories': ['Urheilu'], 'categories': ['Urheilu'],
},
}, },
{ }, {
'url': 'http://www.supla.fi/supla/2231370', 'url': 'http://www.supla.fi/supla/2231370',
'md5': 'df14e782d49a2c0df03d3be2a54ef949', 'md5': 'df14e782d49a2c0df03d3be2a54ef949',
'info_dict': { 'info_dict': {
'id': '2231370', 'id': '2231370',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Osa 1: Mikael Jungner', 'title': 'Osa 1: Mikael Jungner',
'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe', 'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 0, 'age_limit': 0,
'upload_date': '20151012', 'upload_date': '20151012',
'series': 'Läpivalaisu', 'series': 'Läpivalaisu',
},
}, },
}, {
# Episode where <SourceFile> is "NOT-USED", but has other # Episode where <SourceFile> is "NOT-USED", but has other
# downloadable sources available. # downloadable sources available.
{ 'url': 'http://www.ruutu.fi/video/3193728',
'url': 'http://www.ruutu.fi/video/3193728', 'only_matching': True,
'only_matching': True, }, {
# audio podcast
'url': 'https://www.supla.fi/supla/3382410',
'md5': 'b9d7155fed37b2ebf6021d74c4b8e908',
'info_dict': {
'id': '3382410',
'ext': 'mp3',
'title': 'Mikä ihmeen poltergeist?',
'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52',
'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 0,
'upload_date': '20190320',
'series': 'Mysteeritarinat',
'duration': 1324,
}, },
{ 'expected_warnings': [
# audio podcast 'HTTP Error 502: Bad Gateway',
'url': 'https://www.supla.fi/supla/3382410', 'Failed to download m3u8 information',
'md5': 'b9d7155fed37b2ebf6021d74c4b8e908', ],
'info_dict': { }, {
'id': '3382410', 'url': 'http://www.supla.fi/audio/2231370',
'ext': 'mp3', 'only_matching': True,
'title': 'Mikä ihmeen poltergeist?', }, {
'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52', 'url': 'https://static.nelonenmedia.fi/player/misc/embed_player.html?nid=3618790',
'thumbnail': r're:^https?://.*\.jpg$', 'only_matching': True,
'age_limit': 0, }, {
'upload_date': '20190320', # episode
'series': 'Mysteeritarinat', 'url': 'https://www.ruutu.fi/video/3401964',
'duration': 1324, 'info_dict': {
}, 'id': '3401964',
'expected_warnings': [ 'ext': 'mp4',
'HTTP Error 502: Bad Gateway', 'title': 'Temptation Island Suomi - Kausi 5 - Jakso 17',
'Failed to download m3u8 information', 'description': 'md5:87cf01d5e1e88adf0c8a2937d2bd42ba',
], 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2582,
'age_limit': 12,
'upload_date': '20190508',
'series': 'Temptation Island Suomi',
'season_number': 5,
'episode_number': 17,
'categories': ['Reality ja tositapahtumat', 'Kotimaiset suosikit', 'Romantiikka ja parisuhde'],
}, },
{ 'params': {
'url': 'http://www.supla.fi/audio/2231370', 'skip_download': True,
'only_matching': True,
}, },
{ }, {
'url': 'https://static.nelonenmedia.fi/player/misc/embed_player.html?nid=3618790', # premium
'only_matching': True, 'url': 'https://www.ruutu.fi/video/3618715',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
# FIXME: Broken IE
'url': 'https://www.hs.fi/maailma/art-2000011353059.html',
'info_dict': {
'id': '4746675',
'ext': 'mp4',
'title': 'Yhdysvaltojen Texasin osavaltiota ovat koetelleet tuhoisat tulvat',
}, },
{ }]
# episode
'url': 'https://www.ruutu.fi/video/3401964',
'info_dict': {
'id': '3401964',
'ext': 'mp4',
'title': 'Temptation Island Suomi - Kausi 5 - Jakso 17',
'description': 'md5:87cf01d5e1e88adf0c8a2937d2bd42ba',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2582,
'age_limit': 12,
'upload_date': '20190508',
'series': 'Temptation Island Suomi',
'season_number': 5,
'episode_number': 17,
'categories': ['Reality ja tositapahtumat', 'Kotimaiset suosikit', 'Romantiikka ja parisuhde'],
},
'params': {
'skip_download': True,
},
},
{
# premium
'url': 'https://www.ruutu.fi/video/3618715',
'only_matching': True,
},
]
_API_BASE = 'https://gatling.nelonenmedia.fi' _API_BASE = 'https://gatling.nelonenmedia.fi'
@classmethod @classmethod

View File

@ -23,13 +23,10 @@ class SenateISVPIE(InfoExtractor):
'id': 'judiciary031715', 'id': 'judiciary031715',
'ext': 'mp4', 'ext': 'mp4',
'title': 'ISVP', 'title': 'ISVP',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$', 'thumbnail': r're:https?://.+\.(?:jpe?g|png)',
'_old_archive_ids': ['senategov judiciary031715'], '_old_archive_ids': ['senategov judiciary031715'],
}, },
'params': { 'params': {'skip_download': 'm3u8'},
# m3u8 download
'skip_download': True,
},
'expected_warnings': ['Failed to download m3u8 information'], 'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false', 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false',
@ -39,10 +36,6 @@ class SenateISVPIE(InfoExtractor):
'title': 'Integrated Senate Video Player', 'title': 'Integrated Senate Video Player',
'_old_archive_ids': ['senategov commerce011514'], '_old_archive_ids': ['senategov commerce011514'],
}, },
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'This video is not available.', 'skip': 'This video is not available.',
}, { }, {
'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi', 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi',
@ -60,7 +53,7 @@ class SenateISVPIE(InfoExtractor):
'id': 'help090920', 'id': 'help090920',
'ext': 'mp4', 'ext': 'mp4',
'title': 'ISVP', 'title': 'ISVP',
'thumbnail': 'https://www.help.senate.gov/assets/images/video-poster.png', 'thumbnail': r're:https?://.+\.(?:jpe?g|png)',
'_old_archive_ids': ['senategov help090920'], '_old_archive_ids': ['senategov help090920'],
}, },
}, { }, {
@ -68,6 +61,17 @@ class SenateISVPIE(InfoExtractor):
'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715', 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
# FIXME: Embed detection
'url': 'https://www.hsgac.senate.gov/subcommittees/bmfwra/hearings/match-ready-oversight-of-the-federal-governments-border-management-and-personnel-readiness-efforts-for-the-decade-of-sports/',
'info_dict': {
'id': 'govtaff061025',
'ext': 'mp4',
'title': 'ISVP',
'thumbnail': r're:https?://.+\.(?:jpe?g|png)',
'_old_archive_ids': ['senategov govtaff061025'],
},
}]
_COMMITTEES = { _COMMITTEES = {
'ag': ('76440', 'https://ag-f.akamaihd.net', '2036803', 'agriculture'), 'ag': ('76440', 'https://ag-f.akamaihd.net', '2036803', 'agriculture'),
@ -150,10 +154,10 @@ class SenateGovIE(InfoExtractor):
'id': 'help090920', 'id': 'help090920',
'display_id': 'vaccines-saving-lives-ensuring-confidence-and-protecting-public-health', 'display_id': 'vaccines-saving-lives-ensuring-confidence-and-protecting-public-health',
'title': 'Vaccines: Saving Lives, Ensuring Confidence, and Protecting Public Health', 'title': 'Vaccines: Saving Lives, Ensuring Confidence, and Protecting Public Health',
'description': 'The U.S. Senate Committee on Health, Education, Labor & Pensions', 'description': 'Full Committee Hearing on September 9, 2020 at 6:00 AM',
'ext': 'mp4', 'ext': 'mp4',
'age_limit': 0, 'age_limit': 0,
'thumbnail': 'https://www.help.senate.gov/assets/images/sharelogo.jpg', 'thumbnail': r're:https?://.+\.(?:jpe?g|png)',
'_old_archive_ids': ['senategov help090920'], '_old_archive_ids': ['senategov help090920'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
@ -165,7 +169,7 @@ class SenateGovIE(InfoExtractor):
'title': 'Review of the FY2019 Budget Request for the U.S. Army', 'title': 'Review of the FY2019 Budget Request for the U.S. Army',
'ext': 'mp4', 'ext': 'mp4',
'age_limit': 0, 'age_limit': 0,
'thumbnail': 'https://www.appropriations.senate.gov/themes/appropriations/images/video-poster-flash-fit.png', 'thumbnail': r're:https?://.+\.(?:jpe?g|png)',
'_old_archive_ids': ['senategov appropsA051518'], '_old_archive_ids': ['senategov appropsA051518'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
@ -178,7 +182,7 @@ class SenateGovIE(InfoExtractor):
'title': '21st Century Communities: Public Transportation Infrastructure Investment and FAST Act Reauthorization', 'title': '21st Century Communities: Public Transportation Infrastructure Investment and FAST Act Reauthorization',
'description': 'The Official website of The United States Committee on Banking, Housing, and Urban Affairs', 'description': 'The Official website of The United States Committee on Banking, Housing, and Urban Affairs',
'ext': 'mp4', 'ext': 'mp4',
'thumbnail': 'https://www.banking.senate.gov/themes/banking/images/sharelogo.jpg', 'thumbnail': r're:https?://.+\.(?:jpe?g|png)',
'age_limit': 0, 'age_limit': 0,
'_old_archive_ids': ['senategov banking041521'], '_old_archive_ids': ['senategov banking041521'],
}, },

View File

@ -76,17 +76,18 @@ class SimplecastIE(SimplecastBaseIE):
'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876', 'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Errant Signal - Chris Franklin & New Wave Video Essays', 'title': 'Errant Signal - Chris Franklin & New Wave Video Essays',
'channel_url': 'https://the-re-bind-io-podcast.simplecast.com',
'episode': 'Episode 1',
'episode_number': 1, 'episode_number': 1,
'episode_id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876', 'episode_id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
'description': 'md5:34752789d3d2702e2d2c975fbd14f357', 'description': 'md5:34752789d3d2702e2d2c975fbd14f357',
'season': 'Season 1',
'season_number': 1, 'season_number': 1,
'season_id': 'e23df0da-bae4-4531-8bbf-71364a88dc13', 'season_id': 'e23df0da-bae4-4531-8bbf-71364a88dc13',
'series': 'The RE:BIND.io Podcast', 'series': 'The RE:BIND.io Podcast',
'duration': 5343, 'duration': 5343,
'timestamp': 1580979475, 'timestamp': 1580979475,
'upload_date': '20200206', 'upload_date': '20200206',
'webpage_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
'channel_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com$',
} }
_TESTS = [{ _TESTS = [{
'url': 'https://api.simplecast.com/episodes/b6dc49a2-9404-4853-9aa9-9cfc097be876', 'url': 'https://api.simplecast.com/episodes/b6dc49a2-9404-4853-9aa9-9cfc097be876',
@ -96,6 +97,29 @@ class SimplecastIE(SimplecastBaseIE):
'url': 'https://player.simplecast.com/b6dc49a2-9404-4853-9aa9-9cfc097be876', 'url': 'https://player.simplecast.com/b6dc49a2-9404-4853-9aa9-9cfc097be876',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
# FIXME: Embed detection
'url': 'https://poddtoppen.se/podcast/1498417306/the-rebindio-podcast/errant-signal-chris-franklin-new-wave-video-essays',
'md5': '8c93be7be54251bf29ee97464eabe61c',
'info_dict': {
'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
'ext': 'mp3',
'title': 'Errant Signal - Chris Franklin & New Wave Video Essays',
'channel_url': 'https://the-re-bind-io-podcast.simplecast.com',
'description': 'md5:34752789d3d2702e2d2c975fbd14f357',
'display_id': 'errant-signal-chris-franklin-new-wave-video-essays',
'duration': 5343,
'episode': 'Episode 1',
'episode_id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
'episode_number': 1,
'season': 'Season 1',
'season_id': 'e23df0da-bae4-4531-8bbf-71364a88dc13',
'season_number': 1,
'series': 'The RE:BIND.io Podcast',
'timestamp': 1580979475,
'upload_date': '20200206',
},
}]
def _real_extract(self, url): def _real_extract(self, url):
episode_id = self._match_id(url) episode_id = self._match_id(url)
@ -106,11 +130,11 @@ def _real_extract(self, url):
class SimplecastEpisodeIE(SimplecastBaseIE): class SimplecastEpisodeIE(SimplecastBaseIE):
IE_NAME = 'simplecast:episode' IE_NAME = 'simplecast:episode'
_VALID_URL = r'https?://(?!api\.)[^/]+\.simplecast\.com/episodes/(?P<id>[^/?&#]+)' _VALID_URL = r'https?://(?!api\.)[^/]+\.simplecast\.com/episodes/(?P<id>[^/?&#]+)'
_TEST = { _TESTS = [{
'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes/errant-signal-chris-franklin-new-wave-video-essays', 'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
'md5': '8c93be7be54251bf29ee97464eabe61c', 'md5': '8c93be7be54251bf29ee97464eabe61c',
'info_dict': SimplecastIE._COMMON_TEST_INFO, 'info_dict': SimplecastIE._COMMON_TEST_INFO,
} }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
@ -124,7 +148,7 @@ class SimplecastPodcastIE(SimplecastBaseIE):
_VALID_URL = r'https?://(?!(?:api|cdn|embed|feeds|player)\.)(?P<id>[^/]+)\.simplecast\.com(?!/episodes/[^/?&#]+)' _VALID_URL = r'https?://(?!(?:api|cdn|embed|feeds|player)\.)(?P<id>[^/]+)\.simplecast\.com(?!/episodes/[^/?&#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://the-re-bind-io-podcast.simplecast.com', 'url': 'https://the-re-bind-io-podcast.simplecast.com',
'playlist_mincount': 33, 'playlist_mincount': 32,
'info_dict': { 'info_dict': {
'id': '07d28d26-7522-42eb-8c53-2bdcfc81c43c', 'id': '07d28d26-7522-42eb-8c53-2bdcfc81c43c',
'title': 'The RE:BIND.io Podcast', 'title': 'The RE:BIND.io Podcast',

View File

@ -1,140 +1,118 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError, determine_ext, parse_qs, traverse_obj from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
str_or_none,
url_or_none,
)
from ..utils.traversal import traverse_obj
class SkebIE(InfoExtractor): class SkebIE(InfoExtractor):
_VALID_URL = r'https?://skeb\.jp/@[^/]+/works/(?P<id>\d+)' _VALID_URL = r'https?://skeb\.jp/@(?P<uploader_id>[^/?#]+)/works/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://skeb.jp/@riiru_wm/works/10', 'url': 'https://skeb.jp/@riiru_wm/works/10',
'info_dict': { 'info_dict': {
'id': '466853', 'id': '466853',
'title': '内容はおまかせします! by 姫ノ森りぃる@一周年',
'description': 'md5:1ec50901efc3437cfbfe3790468d532d',
'uploader': '姫ノ森りぃる@一周年',
'uploader_id': 'riiru_wm',
'age_limit': 0,
'tags': [],
'url': r're:https://skeb.+',
'thumbnail': r're:https://skeb.+',
'subtitles': {
'jpn': [{
'url': r're:https://skeb.+',
'ext': 'vtt',
}],
},
'width': 720,
'height': 405,
'duration': 313,
'fps': 30,
'ext': 'mp4', 'ext': 'mp4',
'title': '10-1',
'description': 'md5:1ec50901efc3437cfbfe3790468d532d',
'duration': 313,
'genres': ['video'],
'thumbnail': r're:https?://.+',
'uploader': '姫ノ森りぃる@ひとづま',
'uploader_id': 'riiru_wm',
}, },
}, { }, {
'url': 'https://skeb.jp/@furukawa_nob/works/3', 'url': 'https://skeb.jp/@furukawa_nob/works/3',
'info_dict': { 'info_dict': {
'id': '489408', 'id': '489408',
'title': 'いつもお世話になってお... by 古川ノブ@音楽とVlo...',
'description': 'md5:5adc2e41d06d33b558bf7b1faeb7b9c2',
'uploader': '古川ノブ@音楽とVlogのVtuber',
'uploader_id': 'furukawa_nob',
'age_limit': 0,
'tags': [
'よろしく', '大丈夫', 'お願い', 'でした',
'是非', 'O', 'バー', '遊び', 'おはよう',
'オーバ', 'ボイス',
],
'url': r're:https://skeb.+',
'thumbnail': r're:https://skeb.+',
'subtitles': {
'jpn': [{
'url': r're:https://skeb.+',
'ext': 'vtt',
}],
},
'duration': 98,
'ext': 'mp3', 'ext': 'mp3',
'vcodec': 'none', 'title': '3-1',
'abr': 128, 'description': 'md5:6de1f8f876426a6ac321c123848176a8',
'duration': 98,
'genres': ['voice'],
'tags': 'count:11',
'thumbnail': r're:https?://.+',
'uploader': '古川ノブ@宮城の動画勢Vtuber',
'uploader_id': 'furukawa_nob',
}, },
}, { }, {
'url': 'https://skeb.jp/@mollowmollow/works/6', 'url': 'https://skeb.jp/@Rizu_panda_cube/works/626',
'info_dict': { 'info_dict': {
'id': '6', 'id': '626',
'title': 'ヒロ。\n\n私のキャラク... by 諸々', 'description': 'md5:834557b39ca56960c5f77dd6ddabe775',
'description': 'md5:aa6cbf2ba320b50bce219632de195f07', 'uploader': 'りづ100億%',
'_type': 'playlist', 'uploader_id': 'Rizu_panda_cube',
'entries': [{ 'tags': 'count:57',
'id': '486430', 'genres': ['video'],
'title': 'ヒロ。\n\n私のキャラク... by 諸々',
'description': 'md5:aa6cbf2ba320b50bce219632de195f07',
}, {
'id': '486431',
'title': 'ヒロ。\n\n私のキャラク... by 諸々',
}],
}, },
'playlist_count': 2,
'expected_warnings': ['Skipping unsupported extension'],
}] }]
def _real_extract(self, url): def _call_api(self, uploader_id, work_id):
video_id = self._match_id(url) return self._download_json(
nuxt_data = self._search_nuxt_data(self._download_webpage(url, video_id), video_id) f'https://skeb.jp/api/users/{uploader_id}/works/{work_id}', work_id, headers={
'Accept': 'application/json',
'Authorization': 'Bearer null',
})
parent = { def _real_extract(self, url):
'id': video_id, uploader_id, work_id = self._match_valid_url(url).group('uploader_id', 'id')
'title': nuxt_data.get('title'), try:
'description': nuxt_data.get('description'), works = self._call_api(uploader_id, work_id)
'uploader': traverse_obj(nuxt_data, ('creator', 'name')), except ExtractorError as e:
'uploader_id': traverse_obj(nuxt_data, ('creator', 'screen_name')), if not isinstance(e.cause, HTTPError) or e.cause.status != 429:
'age_limit': 18 if nuxt_data.get('nsfw') else 0, raise
'tags': nuxt_data.get('tag_list'), webpage = e.cause.response.read().decode()
value = self._search_regex(
r'document\.cookie\s*=\s*["\']request_key=([^;"\']+)', webpage, 'request key')
self._set_cookie('skeb.jp', 'request_key', value)
works = self._call_api(uploader_id, work_id)
info = {
'uploader_id': uploader_id,
**traverse_obj(works, {
'age_limit': ('nsfw', {bool}, {lambda x: 18 if x else None}),
'description': (('source_body', 'body'), {clean_html}, filter, any),
'genres': ('genre', {str}, filter, all, filter),
'tags': ('tag_list', ..., {str}, filter, all, filter),
'uploader': ('creator', 'name', {str}),
}),
} }
entries = [] entries = []
for item in nuxt_data.get('previews') or []: for idx, preview in enumerate(traverse_obj(works, ('previews', lambda _, v: url_or_none(v['url']))), 1):
vid_url = item.get('url') ext = traverse_obj(preview, ('information', 'extension', {str}))
given_ext = traverse_obj(item, ('information', 'extension')) if ext not in ('mp3', 'mp4'):
preview_ext = determine_ext(vid_url, default_ext=None) self.report_warning(f'Skipping unsupported extension "{ext}"')
if not preview_ext:
content_disposition = parse_qs(vid_url)['response-content-disposition'][0]
preview_ext = self._search_regex(
r'filename="[^"]+\.([^\.]+?)"', content_disposition,
'preview file extension', fatal=False, group=1)
if preview_ext not in ('mp4', 'mp3'):
continue continue
if not vid_url or not item.get('id'):
continue
width, height = traverse_obj(item, ('information', 'width')), traverse_obj(item, ('information', 'height'))
if width is not None and height is not None:
# the longest side is at most 720px for non-client viewers
max_size = max(width, height)
width, height = (x * 720 // max_size for x in (width, height))
entries.append({ entries.append({
**parent, 'ext': ext,
'id': str(item['id']), 'title': f'{work_id}-{idx}',
'url': vid_url,
'thumbnail': item.get('poster_url'),
'subtitles': { 'subtitles': {
'jpn': [{ 'ja': [{
'url': item.get('vtt_url'),
'ext': 'vtt', 'ext': 'vtt',
'url': preview['vtt_url'],
}], }],
} if item.get('vtt_url') else None, } if url_or_none(preview.get('vtt_url')) else None,
'width': width, 'vcodec': 'none' if ext == 'mp3' else None,
'height': height, **info,
'duration': traverse_obj(item, ('information', 'duration')), **traverse_obj(preview, {
'fps': traverse_obj(item, ('information', 'frame_rate')), 'id': ('id', {str_or_none}),
'ext': preview_ext or given_ext, 'thumbnail': ('poster_url', {url_or_none}),
'vcodec': 'none' if preview_ext == 'mp3' else None, 'url': ('url', {url_or_none}),
# you'll always get 128kbps MP3 for non-client viewers }),
'abr': 128 if preview_ext == 'mp3' else None, **traverse_obj(preview, ('information', {
'duration': ('duration', {int_or_none}),
'fps': ('frame_rate', {int_or_none}),
'height': ('height', {int_or_none}),
'width': ('width', {int_or_none}),
})),
}) })
if not entries: return self.playlist_result(entries, work_id, **info)
raise ExtractorError('No video/audio attachment found in this commission.', expected=True)
elif len(entries) == 1:
return entries[0]
else:
parent.update({
'_type': 'playlist',
'entries': entries,
})
return parent

View File

@ -26,11 +26,47 @@
class SoundcloudEmbedIE(InfoExtractor): class SoundcloudEmbedIE(InfoExtractor):
_VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?\burl=(?P<id>.+)' _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?\burl=(?P<id>.+)'
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1'] _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1']
_TEST = { _TESTS = [{
# from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/ # from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/
'url': 'https://w.soundcloud.com/player/?visual=true&url=https%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey', 'url': 'https://w.soundcloud.com/player/?visual=true&url=https%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey',
'only_matching': True, 'only_matching': True,
} }]
_WEBPAGE_TESTS = [{
'url': 'https://news.sophos.com/en-us/2023/08/10/s3-ep147-what-if-you-type-in-your-password-during-a-meeting/',
'info_dict': {
'id': '1588847423',
'ext': 'm4a',
'title': 'S3 Ep147: What if you type in your password during a meeting?',
'artists': ['Naked Security'],
'description': 'md5:6931a0630b920413c8c904407bf4b3b2',
'duration': 942.762,
'genres': ['Technology'],
'license': 'all-rights-reserved',
'repost_count': int,
'tags': 'count:4',
'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
'timestamp': 1691624365,
'track': 'S3 Ep147: What if you type in your password during a meeting?',
'upload_date': '20230809',
'uploader': 'Naked Security',
'uploader_id': '61390843',
'uploader_url': 'https://soundcloud.com/sophossecurity',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.guitarplayer.com/lessons/november-2023-guitar-player-lesson-audio',
'info_dict': {
'id': '1695754080',
'title': 'A Tribute to Brian Setzers Guitar Mastery',
'album': 'A Tribute to Brian Setzers Guitar Mastery',
'album_artists': ['Guitar Player'],
'album_type': 'playlist',
'description': '',
'uploader': 'Guitar Player',
'uploader_id': '489924156',
},
'playlist_mincount': 7,
}]
def _real_extract(self, url): def _real_extract(self, url):
query = parse_qs(url) query = parse_qs(url)
@ -242,7 +278,7 @@ def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_f
format_urls.add(format_url) format_urls.add(format_url)
formats.append({ formats.append({
'format_id': 'download', 'format_id': 'download',
'ext': urlhandle_detect_ext(urlh, default='mp3'), 'ext': urlhandle_detect_ext(urlh),
'filesize': int_or_none(urlh.headers.get('Content-Length')), 'filesize': int_or_none(urlh.headers.get('Content-Length')),
'url': format_url, 'url': format_url,
'quality': 10, 'quality': 10,
@ -407,269 +443,256 @@ class SoundcloudIE(SoundcloudBaseIE):
) )
''' '''
IE_NAME = 'soundcloud' IE_NAME = 'soundcloud'
_TESTS = [ _TESTS = [{
{ 'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', 'md5': 'de9bac153e7427a7333b4b0c1b6a18d2',
'md5': 'de9bac153e7427a7333b4b0c1b6a18d2', 'info_dict': {
'info_dict': { 'id': '62986583',
'id': '62986583', 'ext': 'opus',
'ext': 'opus', 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'track': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
'track': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'description': 'md5:7b6074e00887ad79f59b647c8fb6d5ae',
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d', 'uploader': 'E.T. ExTerrestrial Music',
'uploader': 'E.T. ExTerrestrial Music', 'uploader_id': '1571244',
'uploader_id': '1571244', 'timestamp': 1349920598,
'timestamp': 1349920598, 'upload_date': '20121011',
'upload_date': '20121011', 'duration': 143.216,
'duration': 143.216, 'license': 'all-rights-reserved',
'license': 'all-rights-reserved', 'view_count': int,
'view_count': int, 'like_count': int,
'like_count': int, 'comment_count': int,
'comment_count': int, 'repost_count': int,
'repost_count': int, 'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg', 'uploader_url': 'https://soundcloud.com/ethmusic',
'uploader_url': 'https://soundcloud.com/ethmusic', 'tags': 'count:14',
'tags': 'count:14',
},
}, },
# geo-restricted }, {
{ # Geo-restricted
'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep', 'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
'info_dict': { 'info_dict': {
'id': '47127627', 'id': '47127627',
'ext': 'opus', 'ext': 'opus',
'title': 'Goldrushed', 'title': 'Goldrushed',
'track': 'Goldrushed', 'track': 'Goldrushed',
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 'description': 'md5:c0080b79a3710811d60234f94f391a40',
'uploader': 'The Royal Concept', 'uploader': 'The Royal Concept',
'uploader_id': '9615865', 'uploader_id': '9615865',
'timestamp': 1337635207, 'timestamp': 1337635207,
'upload_date': '20120521', 'upload_date': '20120521',
'duration': 227.103, 'duration': 227.103,
'license': 'all-rights-reserved', 'license': 'all-rights-reserved',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'comment_count': int, 'comment_count': int,
'repost_count': int, 'repost_count': int,
'uploader_url': 'https://soundcloud.com/the-concept-band', 'uploader_url': 'https://soundcloud.com/the-concept-band',
'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg', 'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
'genres': ['Alternative'], 'genres': ['Alternative'],
'artists': ['The Royal Concept'], 'artists': ['The Royal Concept'],
'tags': [], 'tags': [],
},
}, },
}, {
# private link # private link
{ 'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp', 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604', 'info_dict': {
'info_dict': { 'id': '123998367',
'id': '123998367', 'ext': 'mp3',
'ext': 'mp3', 'title': 'Youtube - Dl Test Video \'\' Ä↭',
'title': 'Youtube - Dl Test Video \'\' Ä↭', 'track': 'Youtube - Dl Test Video \'\' Ä↭',
'track': 'Youtube - Dl Test Video \'\' Ä↭', 'description': 'md5:610b729ee06ac4cedaa28607212948f3',
'description': 'test chars: "\'/\\ä↭', 'uploader': 'jaimeMF',
'uploader': 'jaimeMF', 'uploader_id': '69767071',
'uploader_id': '69767071', 'timestamp': 1386604920,
'timestamp': 1386604920, 'upload_date': '20131209',
'upload_date': '20131209', 'duration': 9.927,
'duration': 9.927, 'license': 'all-rights-reserved',
'license': 'all-rights-reserved', 'view_count': int,
'view_count': int, 'like_count': int,
'like_count': int, 'comment_count': int,
'comment_count': int, 'repost_count': int,
'repost_count': int, 'uploader_url': 'https://soundcloud.com/jaimemf',
'uploader_url': 'https://soundcloud.com/jaimemf', 'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png', 'genres': ['youtubedl'],
'genres': ['youtubedl'], 'tags': [],
'tags': [],
},
}, },
}, {
# private link (alt format) # private link (alt format)
{ 'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp',
'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp', 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604', 'info_dict': {
'info_dict': { 'id': '123998367',
'id': '123998367', 'ext': 'mp3',
'ext': 'mp3', 'title': 'Youtube - Dl Test Video \'\' Ä↭',
'title': 'Youtube - Dl Test Video \'\' Ä↭', 'track': 'Youtube - Dl Test Video \'\' Ä↭',
'track': 'Youtube - Dl Test Video \'\' Ä↭', 'description': 'md5:610b729ee06ac4cedaa28607212948f3',
'description': 'test chars: "\'/\\ä↭', 'uploader': 'jaimeMF',
'uploader': 'jaimeMF', 'uploader_id': '69767071',
'uploader_id': '69767071', 'timestamp': 1386604920,
'timestamp': 1386604920, 'upload_date': '20131209',
'upload_date': '20131209', 'duration': 9.927,
'duration': 9.927, 'license': 'all-rights-reserved',
'license': 'all-rights-reserved', 'view_count': int,
'view_count': int, 'like_count': int,
'like_count': int, 'comment_count': int,
'comment_count': int, 'repost_count': int,
'repost_count': int, 'uploader_url': 'https://soundcloud.com/jaimemf',
'uploader_url': 'https://soundcloud.com/jaimemf', 'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png', 'genres': ['youtubedl'],
'genres': ['youtubedl'], 'tags': [],
'tags': [],
},
}, },
}, {
# downloadable song # downloadable song
{ 'url': 'https://soundcloud.com/the80m/the-following',
'url': 'https://soundcloud.com/the80m/the-following', 'md5': 'ecb87d7705d5f53e6c02a63760573c75', # wav: '9ffcddb08c87d74fb5808a3c183a1d04'
'md5': 'ecb87d7705d5f53e6c02a63760573c75', # wav: '9ffcddb08c87d74fb5808a3c183a1d04' 'info_dict': {
'info_dict': { 'id': '343609555',
'id': '343609555', 'ext': 'opus', # wav original available with auth
'ext': 'opus', # wav original available with auth 'title': 'The Following',
'title': 'The Following', 'track': 'The Following',
'track': 'The Following', 'description': '',
'description': '', 'uploader': '80M',
'uploader': '80M', 'uploader_id': '312384765',
'uploader_id': '312384765', 'uploader_url': 'https://soundcloud.com/the80m',
'uploader_url': 'https://soundcloud.com/the80m', 'upload_date': '20170922',
'upload_date': '20170922', 'timestamp': 1506120436,
'timestamp': 1506120436, 'duration': 397.228,
'duration': 397.228, 'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
'thumbnail': 'https://i1.sndcdn.com/artworks-000243916348-ktoo7d-original.jpg', 'license': 'all-rights-reserved',
'license': 'all-rights-reserved', 'like_count': int,
'like_count': int, 'comment_count': int,
'comment_count': int, 'repost_count': int,
'repost_count': int, 'view_count': int,
'view_count': int, 'genres': ['Dance & EDM'],
'genres': ['Dance & EDM'], 'artists': ['80M'],
'artists': ['80M'], 'tags': 'count:4',
'tags': ['80M', 'EDM', 'Dance', 'Music'],
},
'expected_warnings': ['Original download format is only available for registered users'],
}, },
'expected_warnings': ['Original download format is only available for registered users'],
}, {
# private link, downloadable format # private link, downloadable format
# tags with spaces (e.g. "Uplifting Trance", "Ori Uplift") # tags with spaces (e.g. "Uplifting Trance", "Ori Uplift")
{ 'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd', 'md5': '2e1530d0e9986a833a67cb34fc90ece0', # wav: '64a60b16e617d41d0bef032b7f55441e'
'md5': '2e1530d0e9986a833a67cb34fc90ece0', # wav: '64a60b16e617d41d0bef032b7f55441e' 'info_dict': {
'info_dict': { 'id': '340344461',
'id': '340344461', 'ext': 'opus', # wav original available with auth
'ext': 'opus', # wav original available with auth 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 'track': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
'track': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', 'uploader': 'Ori Uplift Music',
'uploader': 'Ori Uplift Music', 'uploader_id': '12563093',
'uploader_id': '12563093', 'timestamp': 1504206263,
'timestamp': 1504206263, 'upload_date': '20170831',
'upload_date': '20170831', 'duration': 7449.096,
'duration': 7449.096, 'license': 'all-rights-reserved',
'license': 'all-rights-reserved', 'view_count': int,
'view_count': int, 'like_count': int,
'like_count': int, 'comment_count': int,
'comment_count': int, 'repost_count': int,
'repost_count': int, 'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg', 'uploader_url': 'https://soundcloud.com/oriuplift',
'uploader_url': 'https://soundcloud.com/oriuplift', 'genres': ['Trance'],
'genres': ['Trance'], 'artists': ['Ori Uplift'],
'artists': ['Ori Uplift'], 'tags': 'count:6',
'tags': ['Orchestral', 'Emotional', 'Uplifting Trance', 'Trance', 'Ori Uplift', 'UpOnly'],
},
'expected_warnings': ['Original download format is only available for registered users'],
}, },
'expected_warnings': ['Original download format is only available for registered users'],
}, {
# no album art, use avatar pic for thumbnail # no album art, use avatar pic for thumbnail
{ 'url': 'https://soundcloud.com/garyvee/sideways-prod-mad-real',
'url': 'https://soundcloud.com/garyvee/sideways-prod-mad-real', 'md5': '59c7872bc44e5d99b7211891664760c2',
'md5': '59c7872bc44e5d99b7211891664760c2', 'info_dict': {
'info_dict': { 'id': '309699954',
'id': '309699954', 'ext': 'mp3',
'ext': 'mp3', 'title': 'Sideways (Prod. Mad Real)',
'title': 'Sideways (Prod. Mad Real)', 'track': 'Sideways (Prod. Mad Real)',
'track': 'Sideways (Prod. Mad Real)', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 'uploader': 'garyvee',
'uploader': 'garyvee', 'uploader_id': '2366352',
'uploader_id': '2366352', 'timestamp': 1488152409,
'timestamp': 1488152409, 'upload_date': '20170226',
'upload_date': '20170226', 'duration': 207.012,
'duration': 207.012, 'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
'thumbnail': r're:https?://.*\.jpg', 'license': 'all-rights-reserved',
'license': 'all-rights-reserved', 'view_count': int,
'view_count': int, 'like_count': int,
'like_count': int, 'comment_count': int,
'comment_count': int, 'repost_count': int,
'repost_count': int, 'uploader_url': 'https://soundcloud.com/garyvee',
'uploader_url': 'https://soundcloud.com/garyvee', 'artists': ['MadReal'],
'artists': ['MadReal'], 'tags': [],
'tags': [],
},
'params': {
'skip_download': True,
},
}, },
{ 'params': {'skip_download': 'm3u8'},
'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer', }, {
'md5': '8227c3473a4264df6b02ad7e5b7527ac', 'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
'info_dict': { 'md5': '8227c3473a4264df6b02ad7e5b7527ac',
'id': '583011102', 'info_dict': {
'ext': 'opus', 'id': '583011102',
'title': 'Mezzo Valzer', 'ext': 'm4a',
'track': 'Mezzo Valzer', 'title': 'Mezzo Valzer',
'description': 'md5:f4d5f39d52e0ccc2b4f665326428901a', 'track': 'Mezzo Valzer',
'uploader': 'Giovanni Sarani', 'description': 'md5:f4d5f39d52e0ccc2b4f665326428901a',
'uploader_id': '3352531', 'uploader': 'Giovanni Sarani',
'timestamp': 1551394171, 'uploader_id': '3352531',
'upload_date': '20190228', 'timestamp': 1551394171,
'duration': 180.157, 'upload_date': '20190228',
'thumbnail': r're:https?://.*\.jpg', 'duration': 180.134,
'license': 'all-rights-reserved', 'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
'view_count': int, 'license': 'all-rights-reserved',
'like_count': int, 'view_count': int,
'comment_count': int, 'like_count': int,
'repost_count': int, 'comment_count': int,
'genres': ['Piano'], 'repost_count': int,
'uploader_url': 'https://soundcloud.com/giovannisarani', 'genres': ['Piano'],
'tags': 'count:10', 'uploader_url': 'https://soundcloud.com/giovannisarani',
}, 'tags': 'count:10',
}, },
'params': {'skip_download': 'm3u8'},
}, {
# .png "original" artwork, 160kbps m4a HLS format # .png "original" artwork, 160kbps m4a HLS format
{ 'url': 'https://soundcloud.com/skorxh/audio-dealer',
'url': 'https://soundcloud.com/skorxh/audio-dealer', 'info_dict': {
'info_dict': { 'id': '2011421339',
'id': '2011421339', 'ext': 'm4a',
'ext': 'm4a', 'title': 'audio dealer',
'title': 'audio dealer', 'description': '',
'description': '', 'uploader': '$KORCH',
'uploader': '$KORCH', 'uploader_id': '150292288',
'uploader_id': '150292288', 'uploader_url': 'https://soundcloud.com/skorxh',
'uploader_url': 'https://soundcloud.com/skorxh', 'comment_count': int,
'comment_count': int, 'view_count': int,
'view_count': int, 'like_count': int,
'like_count': int, 'repost_count': int,
'repost_count': int, 'duration': 213.469,
'duration': 213.469, 'tags': [],
'tags': [], 'artists': ['$KORXH'],
'artists': ['$KORXH'], 'track': 'audio dealer',
'track': 'audio dealer', 'timestamp': 1737143201,
'timestamp': 1737143201, 'upload_date': '20250117',
'upload_date': '20250117', 'license': 'all-rights-reserved',
'license': 'all-rights-reserved', 'thumbnail': r're:https?://[ai]1\.sndcdn\.com/.+\.(?:jpg|png)',
'thumbnail': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-original.png', 'thumbnails': [
'thumbnails': [ {'id': 'mini', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-mini.jpg'},
{'id': 'mini', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-mini.jpg'}, {'id': 'tiny', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-tiny.jpg'},
{'id': 'tiny', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-tiny.jpg'}, {'id': 'small', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-small.jpg'},
{'id': 'small', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-small.jpg'}, {'id': 'badge', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-badge.jpg'},
{'id': 'badge', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-badge.jpg'}, {'id': 't67x67', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-t67x67.jpg'},
{'id': 't67x67', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-t67x67.jpg'}, {'id': 'large', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-large.jpg'},
{'id': 'large', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-large.jpg'}, {'id': 't300x300', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-t300x300.jpg'},
{'id': 't300x300', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-t300x300.jpg'}, {'id': 'crop', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-crop.jpg'},
{'id': 'crop', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-crop.jpg'}, {'id': 't500x500', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-t500x500.jpg'},
{'id': 't500x500', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-t500x500.jpg'}, {'id': 'original', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-original.png'},
{'id': 'original', 'url': 'https://i1.sndcdn.com/artworks-a1wKGMYNreDLTMrT-fGjRiw-original.png'}, ],
],
},
'params': {'skip_download': 'm3u8', 'format': 'hls_aac_160k'},
}, },
{ 'params': {'skip_download': 'm3u8', 'format': 'hls_aac_160k'},
# AAC HQ format available (account with active subscription needed) }, {
'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1', # AAC HQ format available (account with active subscription needed)
'only_matching': True, 'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1',
}, 'only_matching': True,
{ }, {
# Go+ (account with active subscription needed) # Go+ (account with active subscription needed)
'url': 'https://soundcloud.com/taylorswiftofficial/look-what-you-made-me-do', 'url': 'https://soundcloud.com/taylorswiftofficial/look-what-you-made-me-do',
'only_matching': True, 'only_matching': True,
}, }]
]
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
@ -907,7 +930,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
'id': '7098329', 'id': '7098329',
'title': 'Grynpyret (Spotlight)', 'title': 'Grynpyret (Spotlight)',
}, },
'playlist_mincount': 1, 'playlist_mincount': 0,
}, { }, {
'url': 'https://soundcloud.com/one-thousand-and-one/comments', 'url': 'https://soundcloud.com/one-thousand-and-one/comments',
'info_dict': { 'info_dict': {
@ -998,7 +1021,7 @@ class SoundcloudRelatedIE(SoundcloudPagedPlaylistBaseIE):
'id': '1084577272', 'id': '1084577272',
'title': 'Sexapil - Pingers 5 (Recommended)', 'title': 'Sexapil - Pingers 5 (Recommended)',
}, },
'playlist_mincount': 50, 'playlist_mincount': 49,
}, { }, {
'url': 'https://soundcloud.com/wajang/sexapil-pingers-5/albums', 'url': 'https://soundcloud.com/wajang/sexapil-pingers-5/albums',
'info_dict': { 'info_dict': {
@ -1045,7 +1068,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
'info_dict': { 'info_dict': {
'id': '4110309', 'id': '4110309',
'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]', 'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
'description': 're:.*?TILT Brass - Bowery Poetry Club', 'description': 'md5:e4373f7177fe3db292a8552b4ec41bc6',
'uploader': 'Non-Site Records', 'uploader': 'Non-Site Records',
'uploader_id': '33660914', 'uploader_id': '33660914',
'album_artists': ['Non-Site Records'], 'album_artists': ['Non-Site Records'],

View File

@ -8,6 +8,7 @@
class SportBoxIE(InfoExtractor): class SportBoxIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)' _VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
_EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"'] _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"']
_TESTS = [{ _TESTS = [{
@ -17,7 +18,7 @@ class SportBoxIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»', 'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:https?://.+\.jpg',
'duration': 292, 'duration': 292,
'view_count': int, 'view_count': int,
'timestamp': 1426237001, 'timestamp': 1426237001,
@ -40,6 +41,15 @@ class SportBoxIE(InfoExtractor):
'url': 'https://matchtv.ru/vdl/player/media/109158', 'url': 'https://matchtv.ru/vdl/player/media/109158',
'only_matching': True, 'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{
'url': 'http://www.vestifinance.ru/articles/25753',
'info_dict': {
'id': '25753',
'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
},
'playlist_count': 3,
'skip': 'Invalid URL',
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)

Some files were not shown because too many files have changed in this diff Show More