mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-07-27 07:38:30 +00:00
Merge branch 'yt-dlp:master' into feat/ie-next-flight
This commit is contained in:
commit
e2f25c0588
41
.github/workflows/signature-tests.yml
vendored
Normal file
41
.github/workflows/signature-tests.yml
vendored
Normal file
@ -0,0 +1,41 @@
|
||||
name: Signature Tests
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- .github/workflows/signature-tests.yml
|
||||
- test/test_youtube_signature.py
|
||||
- yt_dlp/jsinterp.py
|
||||
pull_request:
|
||||
paths:
|
||||
- .github/workflows/signature-tests.yml
|
||||
- test/test_youtube_signature.py
|
||||
- yt_dlp/jsinterp.py
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: signature-tests-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
name: Signature Tests
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest]
|
||||
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.10, pypy-3.11]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install test requirements
|
||||
run: python3 ./devscripts/install_deps.py --only-optional --include test
|
||||
- name: Run tests
|
||||
timeout-minutes: 15
|
||||
run: |
|
||||
python3 -m yt_dlp -v || true # Print debug head
|
||||
python3 ./devscripts/run_tests.py test/test_youtube_signature.py
|
@ -779,3 +779,8 @@ brian6932
|
||||
iednod55
|
||||
maxbin123
|
||||
nullpos
|
||||
anlar
|
||||
eason1478
|
||||
ceandreasen
|
||||
chauhantirth
|
||||
helpimnotdrowning
|
||||
|
42
Changelog.md
42
Changelog.md
@ -4,6 +4,48 @@ # Changelog
|
||||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||
-->
|
||||
|
||||
### 2025.06.30
|
||||
|
||||
#### Core changes
|
||||
- **jsinterp**: [Fix `extract_object`](https://github.com/yt-dlp/yt-dlp/commit/958153a226214c86879e36211ac191bf78289578) ([#13580](https://github.com/yt-dlp/yt-dlp/issues/13580)) by [seproDev](https://github.com/seproDev)
|
||||
|
||||
#### Extractor changes
|
||||
- **bilibilispacevideo**: [Extract hidden-mode collections as playlists](https://github.com/yt-dlp/yt-dlp/commit/99b85ac102047446e6adf5b62bfc3c8d80b53778) ([#13533](https://github.com/yt-dlp/yt-dlp/issues/13533)) by [c-basalt](https://github.com/c-basalt)
|
||||
- **hotstar**
|
||||
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b5bd057fe86550f3aa67f2fc8790d1c6a251c57b) ([#13530](https://github.com/yt-dlp/yt-dlp/issues/13530)) by [bashonly](https://github.com/bashonly), [chauhantirth](https://github.com/chauhantirth) (With fixes in [e9f1576](https://github.com/yt-dlp/yt-dlp/commit/e9f157669e24953a88d15ce22053649db7a8e81e) by [bashonly](https://github.com/bashonly))
|
||||
- [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/0a6b1044899f452cd10b6c7a6b00fa985a9a8b97) ([#13560](https://github.com/yt-dlp/yt-dlp/issues/13560)) by [bashonly](https://github.com/bashonly)
|
||||
- [Raise for login required](https://github.com/yt-dlp/yt-dlp/commit/5e292baad62c749b6c340621ab2d0f904165ddfb) ([#10405](https://github.com/yt-dlp/yt-dlp/issues/10405)) by [bashonly](https://github.com/bashonly)
|
||||
- series: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4bd9a7ade7e0508b9795b3e72a69eeb40788b62b) ([#13564](https://github.com/yt-dlp/yt-dlp/issues/13564)) by [bashonly](https://github.com/bashonly)
|
||||
- **jiocinema**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/7e2504f941a11ea2b0dba00de3f0295cdc253e79) ([#13565](https://github.com/yt-dlp/yt-dlp/issues/13565)) by [bashonly](https://github.com/bashonly)
|
||||
- **kick**: [Support subscriber-only content](https://github.com/yt-dlp/yt-dlp/commit/b16722ede83377f77ea8352dcd0a6ca8e83b8f0f) ([#13550](https://github.com/yt-dlp/yt-dlp/issues/13550)) by [helpimnotdrowning](https://github.com/helpimnotdrowning)
|
||||
- **niconico**: live: [Fix extractor and downloader](https://github.com/yt-dlp/yt-dlp/commit/06c1a8cdffe14050206683253726875144192ef5) ([#13158](https://github.com/yt-dlp/yt-dlp/issues/13158)) by [doe1080](https://github.com/doe1080)
|
||||
- **sauceplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/35fc33fbc51c7f5392fb2300f65abf6cf107ef90) ([#13567](https://github.com/yt-dlp/yt-dlp/issues/13567)) by [bashonly](https://github.com/bashonly), [ceandreasen](https://github.com/ceandreasen)
|
||||
- **sproutvideo**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/11b9416e10cff7513167d76d6c47774fcdd3e26a) ([#13589](https://github.com/yt-dlp/yt-dlp/issues/13589)) by [bashonly](https://github.com/bashonly)
|
||||
- **youtube**: [Fix premium formats extraction](https://github.com/yt-dlp/yt-dlp/commit/2ba5391cd68ed4f2415c827d2cecbcbc75ace10b) ([#13586](https://github.com/yt-dlp/yt-dlp/issues/13586)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Misc. changes
|
||||
- **ci**: [Add signature tests](https://github.com/yt-dlp/yt-dlp/commit/1b883846347addeab12663fd74317fd544341a1c) ([#13582](https://github.com/yt-dlp/yt-dlp/issues/13582)) by [bashonly](https://github.com/bashonly)
|
||||
- **cleanup**: Miscellaneous: [b018784](https://github.com/yt-dlp/yt-dlp/commit/b0187844988e557c7e1e6bb1aabd4c1176768d86) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
### 2025.06.25
|
||||
|
||||
#### Extractor changes
|
||||
- [Add `_search_nuxt_json` helper](https://github.com/yt-dlp/yt-dlp/commit/51887484e46ab6015c041cb1ab626a55f25a03bd) ([#13386](https://github.com/yt-dlp/yt-dlp/issues/13386)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
|
||||
- **brightcove**: new: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/e6bd4a3da295b760ab20b39c18ce8934d312c2bf) ([#13461](https://github.com/yt-dlp/yt-dlp/issues/13461)) by [doe1080](https://github.com/doe1080)
|
||||
- **huya**: live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2600849badb0d08c55b58dcc77a13af6ba423da6) ([#13520](https://github.com/yt-dlp/yt-dlp/issues/13520)) by [doe1080](https://github.com/doe1080)
|
||||
- **hypergryph**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/1722c55400ff30bb5aee5dd7a262f0b7e9ce2f0e) ([#13415](https://github.com/yt-dlp/yt-dlp/issues/13415)) by [doe1080](https://github.com/doe1080), [eason1478](https://github.com/eason1478)
|
||||
- **lsm**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/c57412d1f9cf0124adc972a47858ac42b740c61d) ([#13126](https://github.com/yt-dlp/yt-dlp/issues/13126)) by [Caesim404](https://github.com/Caesim404)
|
||||
- **mave**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1838a1ce5d4ade80770ba9162eaffc9a1607dc70) ([#13380](https://github.com/yt-dlp/yt-dlp/issues/13380)) by [anlar](https://github.com/anlar)
|
||||
- **sportdeutschland**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a4ce4327c9836691d3b6b00e44a90b6741601ed8) ([#13519](https://github.com/yt-dlp/yt-dlp/issues/13519)) by [DTrombett](https://github.com/DTrombett)
|
||||
- **sproutvideo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5b559d0072b7164daf06bacdc41c6f11283452c8) ([#13544](https://github.com/yt-dlp/yt-dlp/issues/13544)) by [bashonly](https://github.com/bashonly)
|
||||
- **tv8.it**: [Support slugless URLs](https://github.com/yt-dlp/yt-dlp/commit/3bd30291601c47fa4a257983473884103ecab0c7) ([#13478](https://github.com/yt-dlp/yt-dlp/issues/13478)) by [DTrombett](https://github.com/DTrombett)
|
||||
- **youtube**
|
||||
- [Check any `ios` m3u8 formats prior to download](https://github.com/yt-dlp/yt-dlp/commit/8f94b76cbf7bbd9dfd8762c63cdea04f90f1297f) ([#13524](https://github.com/yt-dlp/yt-dlp/issues/13524)) by [bashonly](https://github.com/bashonly)
|
||||
- [Improve player context payloads](https://github.com/yt-dlp/yt-dlp/commit/ff6f94041aeee19c5559e1c1cd693960a1c1dd14) ([#13539](https://github.com/yt-dlp/yt-dlp/issues/13539)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
#### Misc. changes
|
||||
- **test**: `traversal`: [Fix morsel tests for Python 3.14](https://github.com/yt-dlp/yt-dlp/commit/73bf10211668e4a59ccafd790e06ee82d9fea9ea) ([#13471](https://github.com/yt-dlp/yt-dlp/issues/13471)) by [Grub4K](https://github.com/Grub4K)
|
||||
|
||||
### 2025.06.09
|
||||
|
||||
#### Extractor changes
|
||||
|
10
README.md
10
README.md
@ -1156,15 +1156,15 @@ # CONFIGURATION
|
||||
* `/etc/yt-dlp/config`
|
||||
* `/etc/yt-dlp/config.txt`
|
||||
|
||||
E.g. with the following configuration file, yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory:
|
||||
E.g. with the following configuration file, yt-dlp will always extract the audio, copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory:
|
||||
```
|
||||
# Lines starting with # are comments
|
||||
|
||||
# Always extract audio
|
||||
-x
|
||||
|
||||
# Do not copy the mtime
|
||||
--no-mtime
|
||||
# Copy the mtime
|
||||
--mtime
|
||||
|
||||
# Use this proxy
|
||||
--proxy 127.0.0.1:3128
|
||||
@ -1799,6 +1799,7 @@ #### youtube
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv`, `tv_simply` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
|
||||
* `webpage_skip`: Skip extraction of embedded webpage data. One or both of `player_response`, `initial_data`. These options are for testing purposes and don't skip any network requests
|
||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||
* `player_js_variant`: The player javascript variant to use for signature and nsig deciphering. The known variants are: `main`, `tce`, `tv`, `tv_es6`, `phone`, `tablet`. Only `main` is recommended as a possible workaround; the others are for debugging purposes. The default is to use what is prescribed by the site, and can be selected with `actual`
|
||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||
@ -2262,6 +2263,7 @@ ### Differences in default behavior
|
||||
* yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests.
|
||||
* The sub-modules `swfinterp`, `casefold` are removed.
|
||||
* Passing `--simulate` (or calling `extract_info` with `download=False`) no longer alters the default format selection. See [#9843](https://github.com/yt-dlp/yt-dlp/issues/9843) for details.
|
||||
* yt-dlp no longer applies the server modified time to downloaded files by default. Use `--mtime` or `--compat-options mtime-by-default` to revert this.
|
||||
|
||||
For ease of use, a few more compat options are available:
|
||||
|
||||
@ -2271,7 +2273,7 @@ ### Differences in default behavior
|
||||
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization`
|
||||
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
|
||||
* `--compat-options 2023`: Same as `--compat-options 2024,prefer-vp9-sort`
|
||||
* `--compat-options 2024`: Currently does nothing. Use this to enable all future compat options
|
||||
* `--compat-options 2024`: Same as `--compat-options mtime-by-default`. Use this to enable all future compat options
|
||||
|
||||
The following compat options restore vulnerable behavior from before security patches:
|
||||
|
||||
|
@ -10,9 +10,13 @@ __yt_dlp()
|
||||
diropts="--cache-dir"
|
||||
|
||||
if [[ ${prev} =~ ${fileopts} ]]; then
|
||||
local IFS=$'\n'
|
||||
type compopt &>/dev/null && compopt -o filenames
|
||||
COMPREPLY=( $(compgen -f -- ${cur}) )
|
||||
return 0
|
||||
elif [[ ${prev} =~ ${diropts} ]]; then
|
||||
local IFS=$'\n'
|
||||
type compopt &>/dev/null && compopt -o dirnames
|
||||
COMPREPLY=( $(compgen -d -- ${cur}) )
|
||||
return 0
|
||||
fi
|
||||
|
@ -254,5 +254,13 @@
|
||||
{
|
||||
"action": "remove",
|
||||
"when": "d596824c2f8428362c072518856065070616e348"
|
||||
},
|
||||
{
|
||||
"action": "remove",
|
||||
"when": "7b81634fb1d15999757e7a9883daa6ef09ea785b"
|
||||
},
|
||||
{
|
||||
"action": "remove",
|
||||
"when": "500761e41acb96953a5064e951d41d190c287e46"
|
||||
}
|
||||
]
|
||||
|
@ -75,7 +75,7 @@ dev = [
|
||||
]
|
||||
static-analysis = [
|
||||
"autopep8~=2.0",
|
||||
"ruff~=0.11.0",
|
||||
"ruff~=0.12.0",
|
||||
]
|
||||
test = [
|
||||
"pytest~=8.1",
|
||||
@ -210,10 +210,12 @@ ignore = [
|
||||
"TD001", # invalid-todo-tag
|
||||
"TD002", # missing-todo-author
|
||||
"TD003", # missing-todo-link
|
||||
"PLC0415", # import-outside-top-level
|
||||
"PLE0604", # invalid-all-object (false positives)
|
||||
"PLE0643", # potential-index-error (false positives)
|
||||
"PLW0603", # global-statement
|
||||
"PLW1510", # subprocess-run-without-check
|
||||
"PLW1641", # eq-without-hash
|
||||
"PLW2901", # redefined-loop-name
|
||||
"RUF001", # ambiguous-unicode-character-string
|
||||
"RUF012", # mutable-class-default
|
||||
|
@ -575,9 +575,7 @@ # Supported sites
|
||||
- **HollywoodReporterPlaylist**
|
||||
- **Holodex**
|
||||
- **HotNewHipHop**: (**Currently broken**)
|
||||
- **hotstar**
|
||||
- **hotstar:playlist**
|
||||
- **hotstar:season**
|
||||
- **hotstar**: JioHotstar
|
||||
- **hotstar:series**
|
||||
- **hrfernsehen**
|
||||
- **HRTi**: [*hrti*](## "netrc machine")
|
||||
@ -590,7 +588,7 @@ # Supported sites
|
||||
- **Hungama**
|
||||
- **HungamaAlbumPlaylist**
|
||||
- **HungamaSong**
|
||||
- **huya:live**: huya.com
|
||||
- **huya:live**: 虎牙直播
|
||||
- **huya:video**: 虎牙视频
|
||||
- **Hypem**
|
||||
- **Hytale**
|
||||
@ -647,8 +645,6 @@ # Supported sites
|
||||
- **Jamendo**
|
||||
- **JamendoAlbum**
|
||||
- **JeuxVideo**: (**Currently broken**)
|
||||
- **jiocinema**: [*jiocinema*](## "netrc machine")
|
||||
- **jiocinema:series**: [*jiocinema*](## "netrc machine")
|
||||
- **jiosaavn:album**
|
||||
- **jiosaavn:artist**
|
||||
- **jiosaavn:playlist**
|
||||
@ -776,6 +772,7 @@ # Supported sites
|
||||
- **massengeschmack.tv**
|
||||
- **Masters**
|
||||
- **MatchTV**
|
||||
- **Mave**
|
||||
- **MBN**: mbn.co.kr (매일방송)
|
||||
- **MDR**: MDR.DE
|
||||
- **MedalTV**
|
||||
@ -832,7 +829,7 @@ # Supported sites
|
||||
- **Mojevideo**: mojevideo.sk
|
||||
- **Mojvideo**
|
||||
- **Monstercat**
|
||||
- **MonsterSirenHypergryphMusic**
|
||||
- **monstersiren**: 塞壬唱片
|
||||
- **Motherless**
|
||||
- **MotherlessGallery**
|
||||
- **MotherlessGroup**
|
||||
@ -1298,6 +1295,7 @@ # Supported sites
|
||||
- **SampleFocus**
|
||||
- **Sangiin**: 参議院インターネット審議中継 (archive)
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **SaucePlus**: Sauce+
|
||||
- **SBS**: sbs.com.au
|
||||
- **sbs.co.kr**
|
||||
- **sbs.co.kr:allvod_program**
|
||||
|
@ -36,6 +36,18 @@ def do_GET(self):
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
|
||||
elif self.path == '/fake.m3u8':
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Length', '1024')
|
||||
self.end_headers()
|
||||
self.wfile.write(1024 * b'\x00')
|
||||
elif self.path == '/bipbop.m3u8':
|
||||
with open('test/testdata/m3u8/bipbop_16x9.m3u8', 'rb') as f:
|
||||
data = f.read()
|
||||
self.send_response(200)
|
||||
self.send_header('Content-Length', str(len(data)))
|
||||
self.end_headers()
|
||||
self.wfile.write(data)
|
||||
else:
|
||||
assert False
|
||||
|
||||
@ -2107,5 +2119,45 @@ def test_search_nuxt_json(self):
|
||||
self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT)
|
||||
|
||||
|
||||
class TestInfoExtractorNetwork(unittest.TestCase):
|
||||
def setUp(self, /):
|
||||
self.httpd = http.server.HTTPServer(
|
||||
('127.0.0.1', 0), InfoExtractorTestRequestHandler)
|
||||
self.port = http_server_port(self.httpd)
|
||||
|
||||
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||
self.server_thread.daemon = True
|
||||
self.server_thread.start()
|
||||
|
||||
self.called = False
|
||||
|
||||
def require_warning(*args, **kwargs):
|
||||
self.called = True
|
||||
|
||||
self.ydl = FakeYDL()
|
||||
self.ydl.report_warning = require_warning
|
||||
self.ie = DummyIE(self.ydl)
|
||||
|
||||
def tearDown(self, /):
|
||||
self.ydl.close()
|
||||
self.httpd.shutdown()
|
||||
self.httpd.server_close()
|
||||
self.server_thread.join(1)
|
||||
|
||||
def test_extract_m3u8_formats(self):
|
||||
formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles(
|
||||
f'http://127.0.0.1:{self.port}/bipbop.m3u8', None, fatal=False)
|
||||
self.assertFalse(self.called)
|
||||
self.assertTrue(formats)
|
||||
self.assertTrue(subtitles)
|
||||
|
||||
def test_extract_m3u8_formats_warning(self):
|
||||
formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles(
|
||||
f'http://127.0.0.1:{self.port}/fake.m3u8', None, fatal=False)
|
||||
self.assertTrue(self.called, 'Warning was not issued for binary m3u8 file')
|
||||
self.assertFalse(formats)
|
||||
self.assertFalse(subtitles)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
from test.helper import (
|
||||
assertGreaterEqual,
|
||||
assertLessEqual,
|
||||
expect_info_dict,
|
||||
expect_warnings,
|
||||
get_params,
|
||||
@ -121,10 +122,13 @@ def print_skipping(reason):
|
||||
params = get_params(test_case.get('params', {}))
|
||||
params['outtmpl'] = tname + '_' + params['outtmpl']
|
||||
if is_playlist and 'playlist' not in test_case:
|
||||
params.setdefault('extract_flat', 'in_playlist')
|
||||
params.setdefault('playlistend', test_case.get(
|
||||
'playlist_mincount', test_case.get('playlist_count', -2) + 1))
|
||||
params.setdefault('playlistend', max(
|
||||
test_case.get('playlist_mincount', -1),
|
||||
test_case.get('playlist_count', -2) + 1,
|
||||
test_case.get('playlist_maxcount', -2) + 1))
|
||||
params.setdefault('skip_download', True)
|
||||
if 'playlist_duration_sum' not in test_case:
|
||||
params.setdefault('extract_flat', 'in_playlist')
|
||||
|
||||
ydl = YoutubeDL(params, auto_init=False)
|
||||
ydl.add_default_info_extractors()
|
||||
@ -159,6 +163,7 @@ def try_rm_tcs_files(tcs=None):
|
||||
try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
|
||||
try_rm_tcs_files()
|
||||
try:
|
||||
test_url = test_case['url']
|
||||
try_num = 1
|
||||
while True:
|
||||
try:
|
||||
@ -166,7 +171,7 @@ def try_rm_tcs_files(tcs=None):
|
||||
# for outside error handling, and returns the exit code
|
||||
# instead of the result dict.
|
||||
res_dict = ydl.extract_info(
|
||||
test_case['url'],
|
||||
test_url,
|
||||
force_generic_extractor=params.get('force_generic_extractor', False))
|
||||
except (DownloadError, ExtractorError) as err:
|
||||
# Check if the exception is not a network related one
|
||||
@ -194,23 +199,23 @@ def try_rm_tcs_files(tcs=None):
|
||||
self.assertTrue('entries' in res_dict)
|
||||
expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
|
||||
|
||||
num_entries = len(res_dict.get('entries', []))
|
||||
if 'playlist_mincount' in test_case:
|
||||
mincount = test_case['playlist_mincount']
|
||||
assertGreaterEqual(
|
||||
self,
|
||||
len(res_dict['entries']),
|
||||
test_case['playlist_mincount'],
|
||||
'Expected at least %d in playlist %s, but got only %d' % (
|
||||
test_case['playlist_mincount'], test_case['url'],
|
||||
len(res_dict['entries'])))
|
||||
self, num_entries, mincount,
|
||||
f'Expected at least {mincount} entries in playlist {test_url}, but got only {num_entries}')
|
||||
if 'playlist_count' in test_case:
|
||||
count = test_case['playlist_count']
|
||||
got = num_entries if num_entries <= count else 'more'
|
||||
self.assertEqual(
|
||||
len(res_dict['entries']),
|
||||
test_case['playlist_count'],
|
||||
'Expected %d entries in playlist %s, but got %d.' % (
|
||||
test_case['playlist_count'],
|
||||
test_case['url'],
|
||||
len(res_dict['entries']),
|
||||
))
|
||||
num_entries, count,
|
||||
f'Expected exactly {count} entries in playlist {test_url}, but got {got}')
|
||||
if 'playlist_maxcount' in test_case:
|
||||
maxcount = test_case['playlist_maxcount']
|
||||
assertLessEqual(
|
||||
self, num_entries, maxcount,
|
||||
f'Expected at most {maxcount} entries in playlist {test_url}, but got more')
|
||||
if 'playlist_duration_sum' in test_case:
|
||||
got_duration = sum(e['duration'] for e in res_dict['entries'])
|
||||
self.assertEqual(
|
||||
|
@ -478,6 +478,10 @@ def test_extract_function_with_global_stack(self):
|
||||
func = jsi.extract_function('c', {'e': 10}, {'f': 100, 'g': 1000})
|
||||
self.assertEqual(func([1]), 1111)
|
||||
|
||||
def test_extract_object(self):
|
||||
jsi = JSInterpreter('var a={};a.xy={};var xy;var zxy={};xy={z:function(){return "abc"}};')
|
||||
self.assertTrue('z' in jsi.extract_object('xy', None))
|
||||
|
||||
def test_increment_decrement(self):
|
||||
self._test('function f() { var x = 1; return ++x; }', 2)
|
||||
self._test('function f() { var x = 1; return x++; }', 1)
|
||||
@ -486,6 +490,57 @@ def test_increment_decrement(self):
|
||||
self._test('function f() { var a = "test--"; return a; }', 'test--')
|
||||
self._test('function f() { var b = 1; var a = "b--"; return a; }', 'b--')
|
||||
|
||||
def test_nested_function_scoping(self):
|
||||
self._test(R'''
|
||||
function f() {
|
||||
var g = function() {
|
||||
var P = 2;
|
||||
return P;
|
||||
};
|
||||
var P = 1;
|
||||
g();
|
||||
return P;
|
||||
}
|
||||
''', 1)
|
||||
self._test(R'''
|
||||
function f() {
|
||||
var x = function() {
|
||||
for (var w = 1, M = []; w < 2; w++) switch (w) {
|
||||
case 1:
|
||||
M.push("a");
|
||||
case 2:
|
||||
M.push("b");
|
||||
}
|
||||
return M
|
||||
};
|
||||
var w = "c";
|
||||
var M = "d";
|
||||
var y = x();
|
||||
y.push(w);
|
||||
y.push(M);
|
||||
return y;
|
||||
}
|
||||
''', ['a', 'b', 'c', 'd'])
|
||||
self._test(R'''
|
||||
function f() {
|
||||
var P, Q;
|
||||
var z = 100;
|
||||
var g = function() {
|
||||
var P, Q; P = 2; Q = 15;
|
||||
z = 0;
|
||||
return P+Q;
|
||||
};
|
||||
P = 1; Q = 10;
|
||||
var x = g(), y = 3;
|
||||
return P+Q+x+y+z;
|
||||
}
|
||||
''', 31)
|
||||
|
||||
def test_undefined_varnames(self):
|
||||
jsi = JSInterpreter('function f(){ var a; return [a, b]; }')
|
||||
self._test(jsi, [JS_Undefined, JS_Undefined])
|
||||
self.assertEqual(jsi._undefined_varnames, {'b'})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -22,7 +22,6 @@
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import warnings
|
||||
import zlib
|
||||
@ -223,10 +222,7 @@ def do_GET(self):
|
||||
if encoding == 'br' and brotli:
|
||||
payload = brotli.compress(payload)
|
||||
elif encoding == 'gzip':
|
||||
buf = io.BytesIO()
|
||||
with gzip.GzipFile(fileobj=buf, mode='wb') as f:
|
||||
f.write(payload)
|
||||
payload = buf.getvalue()
|
||||
payload = gzip.compress(payload, mtime=0)
|
||||
elif encoding == 'deflate':
|
||||
payload = zlib.compress(payload)
|
||||
elif encoding == 'unsupported':
|
||||
@ -729,6 +725,17 @@ def test_keep_header_casing(self, handler):
|
||||
|
||||
assert 'X-test-heaDer: test' in res
|
||||
|
||||
def test_partial_read_then_full_read(self, handler):
|
||||
with handler() as rh:
|
||||
for encoding in ('', 'gzip', 'deflate'):
|
||||
res = validate_and_send(rh, Request(
|
||||
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||
headers={'ytdl-encoding': encoding}))
|
||||
assert res.headers.get('Content-Encoding') == encoding
|
||||
assert res.read(6) == b'<html>'
|
||||
assert res.read(0) == b''
|
||||
assert res.read() == b'<video src="/vid.mp4" /></html>'
|
||||
|
||||
|
||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||
class TestClientCertificate:
|
||||
|
@ -133,6 +133,11 @@
|
||||
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
|
||||
'IAOAOq0QJ8wRAAgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_E2u-m37KtXJoOySqa0',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/e12fbea4/player_ias.vflset/en_US/base.js',
|
||||
'gN7a-hudCuAuPH6fByOk1_GNXN0yNMHShjZXS2VOgsEItAJz0tipeavEOmNdYN-wUtcEqD3bCXjc0iyKfAyZxCBGgIARwsSdQfJ2CJtt',
|
||||
'JC2JfQdSswRAIgGBCxZyAfKyi0cjXCb3DqEctUw-NYdNmOEvaepit0zJAtIEsgOV2SXZjhSHMNy0NXNG_1kOyBf6HPuAuCduh-a',
|
||||
),
|
||||
]
|
||||
|
||||
_NSIG_TESTS = [
|
||||
@ -328,6 +333,50 @@
|
||||
'https://www.youtube.com/s/player/fc2a56a5/tv-player-ias.vflset/tv-player-ias.js',
|
||||
'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/a74bf670/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', 'hQP7k1hA22OrNTnq',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/6275f73c/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/20c72c18/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/9fe2e06e/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '6r5ekNIiEMPutZy',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/680f8c75/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '0ml9caTwpa55Jf',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/14397202/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', 'ozZFAN21okDdJTa',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/5dcb2c1f/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', 'p7iTbRZDYAF',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/a10d7fcc/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '9Zue7DDHJSD',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/8e20cb06/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', '5-4tTneTROTpMzba',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/e12fbea4/player_ias_tce.vflset/en_US/base.js',
|
||||
'kM5r52fugSZRAKHfo3', 'XkeRfXIPOkSwfg',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/ef259203/player_ias_tce.vflset/en_US/base.js',
|
||||
'rPqBC01nJpqhhi2iA2U', 'hY7dbiKFT51UIA',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
|
@ -482,7 +482,8 @@ class YoutubeDL:
|
||||
The following options do not work when used through the API:
|
||||
filename, abort-on-error, multistreams, no-live-chat,
|
||||
format-sort, no-clean-infojson, no-playlist-metafiles,
|
||||
no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort.
|
||||
no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort,
|
||||
mtime-by-default.
|
||||
Refer __init__.py for their implementation
|
||||
progress_template: Dictionary of templates for progress outputs.
|
||||
Allowed keys are 'download', 'postprocess',
|
||||
@ -2219,6 +2220,7 @@ def _check_formats(self, formats):
|
||||
self.report_warning(f'Unable to delete temporary file "{temp_file.name}"')
|
||||
f['__working'] = success
|
||||
if success:
|
||||
f.pop('__needs_testing', None)
|
||||
yield f
|
||||
else:
|
||||
self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id']))
|
||||
@ -3963,6 +3965,7 @@ def simplified_codec(f, field):
|
||||
self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
|
||||
(self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
|
||||
else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
|
||||
self._format_out('Untested', self.Styles.WARNING) if f.get('__needs_testing') else None,
|
||||
format_field(f, 'format_note'),
|
||||
format_field(f, 'container', ignore=(None, f.get('ext'))),
|
||||
delim=', '), delim=' '),
|
||||
|
@ -159,6 +159,12 @@ def set_default_compat(compat_name, opt_name, default=True, remove_compat=True):
|
||||
elif 'prefer-vp9-sort' in opts.compat_opts:
|
||||
opts.format_sort.extend(FormatSorter._prefer_vp9_sort)
|
||||
|
||||
if 'mtime-by-default' in opts.compat_opts:
|
||||
if opts.updatetime is None:
|
||||
opts.updatetime = True
|
||||
else:
|
||||
_unused_compat_opt('mtime-by-default')
|
||||
|
||||
_video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False)
|
||||
_audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False)
|
||||
if _video_multistreams_set is False and _audio_multistreams_set is False:
|
||||
|
@ -435,7 +435,7 @@ def sub_bytes_inv(data):
|
||||
|
||||
|
||||
def rotate(data):
|
||||
return data[1:] + [data[0]]
|
||||
return [*data[1:], data[0]]
|
||||
|
||||
|
||||
def key_schedule_core(data, rcon_iteration):
|
||||
|
@ -302,7 +302,7 @@ def _finish_frag_download(self, ctx, info_dict):
|
||||
elif to_file:
|
||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||
filetime = ctx.get('fragment_filetime')
|
||||
if self.params.get('updatetime', True) and filetime:
|
||||
if self.params.get('updatetime') and filetime:
|
||||
with contextlib.suppress(Exception):
|
||||
os.utime(ctx['filename'], (time.time(), filetime))
|
||||
|
||||
|
@ -94,12 +94,19 @@ def real_download(self, filename, info_dict):
|
||||
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
|
||||
if can_download:
|
||||
has_ffmpeg = FFmpegFD.available()
|
||||
no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s
|
||||
if no_crypto and has_ffmpeg:
|
||||
can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available'
|
||||
elif no_crypto:
|
||||
message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; '
|
||||
'Decryption will be performed natively, but will be extremely slow')
|
||||
if not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s:
|
||||
# Even if pycryptodomex isn't available, force HlsFD for m3u8s that won't work with ffmpeg
|
||||
ffmpeg_can_dl = not traverse_obj(info_dict, ((
|
||||
'extra_param_to_segment_url', 'extra_param_to_key_url',
|
||||
'hls_media_playlist_data', ('hls_aes', ('uri', 'key', 'iv')),
|
||||
), any))
|
||||
message = 'The stream has AES-128 encryption and {} available'.format(
|
||||
'neither ffmpeg nor pycryptodomex are' if ffmpeg_can_dl and not has_ffmpeg else
|
||||
'pycryptodomex is not')
|
||||
if has_ffmpeg and ffmpeg_can_dl:
|
||||
can_download = False
|
||||
else:
|
||||
message += '; decryption will be performed natively, but will be extremely slow'
|
||||
elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s):
|
||||
install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and '
|
||||
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
|
||||
|
@ -348,7 +348,7 @@ def retry(e):
|
||||
self.try_rename(ctx.tmpfilename, ctx.filename)
|
||||
|
||||
# Update file modification time
|
||||
if self.params.get('updatetime', True):
|
||||
if self.params.get('updatetime'):
|
||||
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None))
|
||||
|
||||
self._hook_progress({
|
||||
|
@ -5,47 +5,46 @@
|
||||
from .common import FileDownloader
|
||||
from .external import FFmpegFD
|
||||
from ..networking import Request
|
||||
from ..utils import DownloadError, str_or_none, try_get
|
||||
from ..networking.websocket import WebSocketResponse
|
||||
from ..utils import DownloadError, str_or_none, truncate_string
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class NiconicoLiveFD(FileDownloader):
|
||||
""" Downloads niconico live without being stopped """
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
video_id = info_dict['video_id']
|
||||
ws_url = info_dict['url']
|
||||
ws_extractor = info_dict['ws']
|
||||
ws_origin_host = info_dict['origin']
|
||||
live_quality = info_dict.get('live_quality', 'high')
|
||||
live_latency = info_dict.get('live_latency', 'high')
|
||||
video_id = info_dict['id']
|
||||
opts = info_dict['downloader_options']
|
||||
quality, ws_extractor, ws_url = opts['max_quality'], opts['ws'], opts['ws_url']
|
||||
dl = FFmpegFD(self.ydl, self.params or {})
|
||||
|
||||
new_info_dict = info_dict.copy()
|
||||
new_info_dict.update({
|
||||
'protocol': 'm3u8',
|
||||
})
|
||||
new_info_dict['protocol'] = 'm3u8'
|
||||
|
||||
def communicate_ws(reconnect):
|
||||
if reconnect:
|
||||
ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'}))
|
||||
# Support --load-info-json as if it is a reconnect attempt
|
||||
if reconnect or not isinstance(ws_extractor, WebSocketResponse):
|
||||
ws = self.ydl.urlopen(Request(
|
||||
ws_url, headers={'Origin': 'https://live.nicovideo.jp'}))
|
||||
if self.ydl.params.get('verbose', False):
|
||||
self.to_screen('[debug] Sending startWatching request')
|
||||
self.write_debug('Sending startWatching request')
|
||||
ws.send(json.dumps({
|
||||
'type': 'startWatching',
|
||||
'data': {
|
||||
'reconnect': True,
|
||||
'room': {
|
||||
'commentable': True,
|
||||
'protocol': 'webSocket',
|
||||
},
|
||||
'stream': {
|
||||
'quality': live_quality,
|
||||
'protocol': 'hls+fmp4',
|
||||
'latency': live_latency,
|
||||
'accessRightMethod': 'single_cookie',
|
||||
'chasePlay': False,
|
||||
'latency': 'high',
|
||||
'protocol': 'hls',
|
||||
'quality': quality,
|
||||
},
|
||||
'room': {
|
||||
'protocol': 'webSocket',
|
||||
'commentable': True,
|
||||
},
|
||||
'reconnect': True,
|
||||
},
|
||||
'type': 'startWatching',
|
||||
}))
|
||||
else:
|
||||
ws = ws_extractor
|
||||
@ -58,7 +57,6 @@ def communicate_ws(reconnect):
|
||||
if not data or not isinstance(data, dict):
|
||||
continue
|
||||
if data.get('type') == 'ping':
|
||||
# pong back
|
||||
ws.send(r'{"type":"pong"}')
|
||||
ws.send(r'{"type":"keepSeat"}')
|
||||
elif data.get('type') == 'disconnect':
|
||||
@ -66,12 +64,10 @@ def communicate_ws(reconnect):
|
||||
return True
|
||||
elif data.get('type') == 'error':
|
||||
self.write_debug(data)
|
||||
message = try_get(data, lambda x: x['body']['code'], str) or recv
|
||||
message = traverse_obj(data, ('body', 'code', {str_or_none}), default=recv)
|
||||
return DownloadError(message)
|
||||
elif self.ydl.params.get('verbose', False):
|
||||
if len(recv) > 100:
|
||||
recv = recv[:100] + '...'
|
||||
self.to_screen(f'[debug] Server said: {recv}')
|
||||
self.write_debug(f'Server response: {truncate_string(recv, 100)}')
|
||||
|
||||
def ws_main():
|
||||
reconnect = False
|
||||
@ -81,7 +77,8 @@ def ws_main():
|
||||
if ret is True:
|
||||
return
|
||||
except BaseException as e:
|
||||
self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e)))
|
||||
self.to_screen(
|
||||
f'[niconico:live] {video_id}: Connection error occured, reconnecting after 10 seconds: {e}')
|
||||
time.sleep(10)
|
||||
continue
|
||||
finally:
|
||||
|
@ -805,9 +805,7 @@
|
||||
from .hotnewhiphop import HotNewHipHopIE
|
||||
from .hotstar import (
|
||||
HotStarIE,
|
||||
HotStarPlaylistIE,
|
||||
HotStarPrefixIE,
|
||||
HotStarSeasonIE,
|
||||
HotStarSeriesIE,
|
||||
)
|
||||
from .hrefli import HrefLiRedirectIE
|
||||
@ -921,10 +919,6 @@
|
||||
ShugiinItvVodIE,
|
||||
)
|
||||
from .jeuxvideo import JeuxVideoIE
|
||||
from .jiocinema import (
|
||||
JioCinemaIE,
|
||||
JioCinemaSeriesIE,
|
||||
)
|
||||
from .jiosaavn import (
|
||||
JioSaavnAlbumIE,
|
||||
JioSaavnArtistIE,
|
||||
@ -1107,6 +1101,7 @@
|
||||
from .massengeschmacktv import MassengeschmackTVIE
|
||||
from .masters import MastersIE
|
||||
from .matchtv import MatchTVIE
|
||||
from .mave import MaveIE
|
||||
from .mbn import MBNIE
|
||||
from .mdr import MDRIE
|
||||
from .medaltv import MedalTVIE
|
||||
@ -1152,6 +1147,7 @@
|
||||
MindsIE,
|
||||
)
|
||||
from .minoto import MinotoIE
|
||||
from .mir24tv import Mir24TvIE
|
||||
from .mirrativ import (
|
||||
MirrativIE,
|
||||
MirrativUserIE,
|
||||
@ -1829,6 +1825,7 @@
|
||||
from .saitosan import SaitosanIE
|
||||
from .samplefocus import SampleFocusIE
|
||||
from .sapo import SapoIE
|
||||
from .sauceplus import SaucePlusIE
|
||||
from .sbs import SBSIE
|
||||
from .sbscokr import (
|
||||
SBSCoKrAllvodProgramIE,
|
||||
|
@ -900,7 +900,9 @@ def _real_extract(self, url):
|
||||
headers=headers))
|
||||
|
||||
geo_blocked = traverse_obj(play_info, (
|
||||
'raw', 'data', 'plugins', lambda _, v: v['name'] == 'AreaLimitPanel', 'config', 'is_block', {bool}, any))
|
||||
('result', ('raw', 'data')), 'plugins',
|
||||
lambda _, v: v['name'] == 'AreaLimitPanel',
|
||||
'config', 'is_block', {bool}, any))
|
||||
premium_only = play_info.get('code') == -10403
|
||||
|
||||
video_info = traverse_obj(play_info, (('result', ('raw', 'data')), 'video_info', {dict}, any)) or {}
|
||||
@ -914,7 +916,7 @@ def _real_extract(self, url):
|
||||
|
||||
if traverse_obj(play_info, ((
|
||||
('result', 'play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE'
|
||||
('raw', 'data', 'play_video_type'), # 'preview' vs 'whole'
|
||||
(('result', ('raw', 'data')), 'play_video_type'), # 'preview' vs 'whole' vs 'none'
|
||||
), any, {lambda x: x in ('PLAY_PREVIEW', 'preview')})):
|
||||
self.report_warning(
|
||||
'Only preview format is available, '
|
||||
@ -1226,6 +1228,26 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
'id': '313580179',
|
||||
},
|
||||
'playlist_mincount': 92,
|
||||
}, {
|
||||
# Hidden-mode collection
|
||||
'url': 'https://space.bilibili.com/3669403/video',
|
||||
'info_dict': {
|
||||
'id': '3669403',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': '3669403_3958082',
|
||||
'title': '合集·直播回放',
|
||||
'description': '',
|
||||
'uploader': '月路Yuel',
|
||||
'uploader_id': '3669403',
|
||||
'timestamp': int,
|
||||
'upload_date': str,
|
||||
'thumbnail': str,
|
||||
},
|
||||
}],
|
||||
'params': {'playlist_items': '7'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -1282,8 +1304,14 @@ def get_metadata(page_data):
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
|
||||
yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
|
||||
for entry in traverse_obj(page_data, ('list', 'vlist', ..., {dict})):
|
||||
if traverse_obj(entry, ('meta', 'attribute')) == 156:
|
||||
# hidden-mode collection doesn't show its videos in uploads; extract as playlist instead
|
||||
yield self.url_result(
|
||||
f'https://space.bilibili.com/{entry["mid"]}/lists/{entry["meta"]["id"]}?type=season',
|
||||
BilibiliCollectionListIE, f'{entry["mid"]}_{entry["meta"]["id"]}')
|
||||
else:
|
||||
yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
|
||||
|
||||
metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
|
||||
return self.playlist_result(paged_list, playlist_id)
|
||||
|
@ -11,7 +11,7 @@
|
||||
|
||||
|
||||
class CloudyCDNIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:https?:)?//embed\.cloudycdn\.services/(?P<site_id>[^/?#]+)/media/(?P<id>[\w-]+)'
|
||||
_VALID_URL = r'(?:https?:)?//embed\.(?P<domain>cloudycdn\.services|backscreen\.com)/(?P<site_id>[^/?#]+)/media/(?P<id>[\w-]+)'
|
||||
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
|
||||
_TESTS = [{
|
||||
'url': 'https://embed.cloudycdn.services/ltv/media/46k_d23-6000-105?',
|
||||
@ -23,7 +23,7 @@ class CloudyCDNIE(InfoExtractor):
|
||||
'duration': 1442,
|
||||
'upload_date': '20231121',
|
||||
'title': 'D23-6000-105_cetstud',
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
|
||||
'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://embed.cloudycdn.services/izm/media/26e_lv-8-5-1',
|
||||
@ -33,7 +33,7 @@ class CloudyCDNIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'LV-8-5-1',
|
||||
'timestamp': 1669767167,
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/488306/placeholder1679423604.jpg',
|
||||
'thumbnail': 'https://store.bstrm.net/tmsp00120/assets/media/488306/placeholder1679423604.jpg',
|
||||
'duration': 1205,
|
||||
'upload_date': '20221130',
|
||||
},
|
||||
@ -48,9 +48,21 @@ class CloudyCDNIE(InfoExtractor):
|
||||
'duration': 1673,
|
||||
'title': 'D24-6000-074-cetstud',
|
||||
'timestamp': 1718902233,
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/788392/placeholder1718903938.jpg',
|
||||
'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/788392/placeholder1718903938.jpg',
|
||||
},
|
||||
'params': {'format': 'bv'},
|
||||
}, {
|
||||
'url': 'https://embed.backscreen.com/ltv/media/32j_z25-0600-127?',
|
||||
'md5': '9b6fa09ac1a4de53d4f42b94affc3b42',
|
||||
'info_dict': {
|
||||
'id': '32j_z25-0600-127',
|
||||
'ext': 'mp4',
|
||||
'title': 'Z25-0600-127-DZ',
|
||||
'duration': 1906,
|
||||
'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/977427/placeholder1746633646.jpg',
|
||||
'timestamp': 1746632402,
|
||||
'upload_date': '20250507',
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
|
||||
@ -60,17 +72,17 @@ class CloudyCDNIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20230223',
|
||||
'duration': 629,
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/518407/placeholder1678748124.jpg',
|
||||
'thumbnail': 'https://store.bstrm.net/tmsp00120/assets/media/518407/placeholder1678748124.jpg',
|
||||
'timestamp': 1677181513,
|
||||
'title': 'LIB-2',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
site_id, video_id = self._match_valid_url(url).group('site_id', 'id')
|
||||
domain, site_id, video_id = self._match_valid_url(url).group('domain', 'site_id', 'id')
|
||||
|
||||
data = self._download_json(
|
||||
f'https://player.cloudycdn.services/player/{site_id}/media/{video_id}/',
|
||||
f'https://player.{domain}/player/{site_id}/media/{video_id}/',
|
||||
video_id, data=urlencode_postdata({
|
||||
'version': '6.4.0',
|
||||
'referer': url,
|
||||
|
@ -1,6 +1,7 @@
|
||||
import base64
|
||||
import binascii
|
||||
import collections
|
||||
import contextlib
|
||||
import functools
|
||||
import getpass
|
||||
import http.client
|
||||
@ -264,6 +265,9 @@ class InfoExtractor:
|
||||
* http_chunk_size Chunk size for HTTP downloads
|
||||
* ffmpeg_args Extra arguments for ffmpeg downloader (input)
|
||||
* ffmpeg_args_out Extra arguments for ffmpeg downloader (output)
|
||||
* ws (NiconicoLiveFD only) WebSocketResponse
|
||||
* ws_url (NiconicoLiveFD only) Websockets URL
|
||||
* max_quality (NiconicoLiveFD only) Max stream quality string
|
||||
* is_dash_periods Whether the format is a result of merging
|
||||
multiple DASH periods.
|
||||
RTMP formats can also have the additional fields: page_url,
|
||||
@ -2180,21 +2184,33 @@ def _extract_m3u8_formats_and_subtitles(
|
||||
raise ExtractorError(errnote, video_id=video_id)
|
||||
self.report_warning(f'{errnote}{bug_reports_message()}')
|
||||
return [], {}
|
||||
|
||||
res = self._download_webpage_handle(
|
||||
m3u8_url, video_id,
|
||||
note='Downloading m3u8 information' if note is None else note,
|
||||
errnote='Failed to download m3u8 information' if errnote is None else errnote,
|
||||
if note is None:
|
||||
note = 'Downloading m3u8 information'
|
||||
if errnote is None:
|
||||
errnote = 'Failed to download m3u8 information'
|
||||
response = self._request_webpage(
|
||||
m3u8_url, video_id, note=note, errnote=errnote,
|
||||
fatal=fatal, data=data, headers=headers, query=query)
|
||||
|
||||
if res is False:
|
||||
if response is False:
|
||||
return [], {}
|
||||
|
||||
m3u8_doc, urlh = res
|
||||
m3u8_url = urlh.url
|
||||
with contextlib.closing(response):
|
||||
prefix = response.read(512)
|
||||
if not prefix.startswith(b'#EXTM3U'):
|
||||
msg = 'Response data has no m3u header'
|
||||
if fatal:
|
||||
raise ExtractorError(msg, video_id=video_id)
|
||||
self.report_warning(f'{msg}{bug_reports_message()}', video_id=video_id)
|
||||
return [], {}
|
||||
|
||||
content = self._webpage_read_content(
|
||||
response, m3u8_url, video_id, note=note, errnote=errnote,
|
||||
fatal=fatal, prefix=prefix, data=data)
|
||||
if content is False:
|
||||
return [], {}
|
||||
|
||||
return self._parse_m3u8_formats_and_subtitles(
|
||||
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
|
||||
content, response.url, ext=ext, entry_protocol=entry_protocol,
|
||||
preference=preference, quality=quality, m3u8_id=m3u8_id,
|
||||
note=note, errnote=errnote, fatal=fatal, live=live, data=data,
|
||||
headers=headers, query=query, video_id=video_id)
|
||||
|
@ -17,8 +17,140 @@
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class FloatplaneIE(InfoExtractor):
|
||||
class FloatplaneBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
post_id = self._match_id(url)
|
||||
|
||||
post_data = self._download_json(
|
||||
f'{self._BASE_URL}/api/v3/content/post', post_id, query={'id': post_id},
|
||||
note='Downloading post data', errnote='Unable to download post data',
|
||||
impersonate=self._IMPERSONATE_TARGET)
|
||||
|
||||
if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))):
|
||||
raise ExtractorError('Post does not contain a video or audio track', expected=True)
|
||||
|
||||
uploader_url = format_field(
|
||||
post_data, [('creator', 'urlname')], f'{self._BASE_URL}/channel/%s/home') or None
|
||||
|
||||
common_info = {
|
||||
'uploader_url': uploader_url,
|
||||
'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))),
|
||||
'availability': self._availability(needs_subscription=True),
|
||||
**traverse_obj(post_data, {
|
||||
'uploader': ('creator', 'title', {str}),
|
||||
'uploader_id': ('creator', 'id', {str}),
|
||||
'channel': ('channel', 'title', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
'release_timestamp': ('releaseDate', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
items = []
|
||||
for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)):
|
||||
media_id = media['id']
|
||||
media_typ = media.get('type') or 'video'
|
||||
|
||||
metadata = self._download_json(
|
||||
f'{self._BASE_URL}/api/v3/content/{media_typ}', media_id, query={'id': media_id},
|
||||
note=f'Downloading {media_typ} metadata', impersonate=self._IMPERSONATE_TARGET)
|
||||
|
||||
stream = self._download_json(
|
||||
f'{self._BASE_URL}/api/v2/cdn/delivery', media_id, query={
|
||||
'type': 'vod' if media_typ == 'video' else 'aod',
|
||||
'guid': metadata['guid'],
|
||||
}, note=f'Downloading {media_typ} stream data',
|
||||
impersonate=self._IMPERSONATE_TARGET)
|
||||
|
||||
path_template = traverse_obj(stream, ('resource', 'uri', {str}))
|
||||
|
||||
def format_path(params):
|
||||
path = path_template
|
||||
for i, val in (params or {}).items():
|
||||
path = path.replace(f'{{qualityLevelParams.{i}}}', val)
|
||||
return path
|
||||
|
||||
formats = []
|
||||
for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)):
|
||||
url = urljoin(stream['cdn'], format_path(traverse_obj(
|
||||
stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict}))))
|
||||
format_id = traverse_obj(quality, ('name', {str}))
|
||||
hls_aes = {}
|
||||
m3u8_data = None
|
||||
|
||||
# If we need impersonation for the API, then we need it for HLS keys too: extract in advance
|
||||
if self._IMPERSONATE_TARGET is not None:
|
||||
m3u8_data = self._download_webpage(
|
||||
url, media_id, fatal=False, impersonate=self._IMPERSONATE_TARGET, headers=self._HEADERS,
|
||||
note=join_nonempty('Downloading', format_id, 'm3u8 information', delim=' '),
|
||||
errnote=join_nonempty('Failed to download', format_id, 'm3u8 information', delim=' '))
|
||||
if not m3u8_data:
|
||||
continue
|
||||
|
||||
key_url = self._search_regex(
|
||||
r'#EXT-X-KEY:METHOD=AES-128,URI="(https?://[^"]+)"',
|
||||
m3u8_data, 'HLS AES key URI', default=None)
|
||||
if key_url:
|
||||
urlh = self._request_webpage(
|
||||
key_url, media_id, fatal=False, impersonate=self._IMPERSONATE_TARGET, headers=self._HEADERS,
|
||||
note=join_nonempty('Downloading', format_id, 'HLS AES key', delim=' '),
|
||||
errnote=join_nonempty('Failed to download', format_id, 'HLS AES key', delim=' '))
|
||||
if urlh:
|
||||
hls_aes['key'] = urlh.read().hex()
|
||||
|
||||
formats.append({
|
||||
**traverse_obj(quality, {
|
||||
'format_note': ('label', {str}),
|
||||
'width': ('width', {int}),
|
||||
'height': ('height', {int}),
|
||||
}),
|
||||
**parse_codecs(quality.get('codecs')),
|
||||
'url': url,
|
||||
'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'),
|
||||
'format_id': format_id,
|
||||
'hls_media_playlist_data': m3u8_data,
|
||||
'hls_aes': hls_aes or None,
|
||||
})
|
||||
items.append({
|
||||
**common_info,
|
||||
'id': media_id,
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
post_info = {
|
||||
**common_info,
|
||||
'id': post_id,
|
||||
'display_id': post_id,
|
||||
**traverse_obj(post_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('text', {clean_html}),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'dislike_count': ('dislikes', {int_or_none}),
|
||||
'comment_count': ('comments', {int_or_none}),
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
'http_headers': self._HEADERS,
|
||||
}
|
||||
|
||||
if len(items) > 1:
|
||||
return self.playlist_result(items, **post_info)
|
||||
|
||||
post_info.update(items[0])
|
||||
return post_info
|
||||
|
||||
|
||||
class FloatplaneIE(FloatplaneBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/post/(?P<id>\w+)'
|
||||
_BASE_URL = 'https://www.floatplane.com'
|
||||
_IMPERSONATE_TARGET = None
|
||||
_HEADERS = {
|
||||
'Origin': _BASE_URL,
|
||||
'Referer': f'{_BASE_URL}/',
|
||||
}
|
||||
_TESTS = [{
|
||||
'url': 'https://www.floatplane.com/post/2Yf3UedF7C',
|
||||
'info_dict': {
|
||||
@ -170,105 +302,9 @@ class FloatplaneIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._get_cookies('https://www.floatplane.com').get('sails.sid'):
|
||||
if not self._get_cookies(self._BASE_URL).get('sails.sid'):
|
||||
self.raise_login_required()
|
||||
|
||||
def _real_extract(self, url):
|
||||
post_id = self._match_id(url)
|
||||
|
||||
post_data = self._download_json(
|
||||
'https://www.floatplane.com/api/v3/content/post', post_id, query={'id': post_id},
|
||||
note='Downloading post data', errnote='Unable to download post data')
|
||||
|
||||
if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))):
|
||||
raise ExtractorError('Post does not contain a video or audio track', expected=True)
|
||||
|
||||
uploader_url = format_field(
|
||||
post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None
|
||||
|
||||
common_info = {
|
||||
'uploader_url': uploader_url,
|
||||
'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))),
|
||||
'availability': self._availability(needs_subscription=True),
|
||||
**traverse_obj(post_data, {
|
||||
'uploader': ('creator', 'title', {str}),
|
||||
'uploader_id': ('creator', 'id', {str}),
|
||||
'channel': ('channel', 'title', {str}),
|
||||
'channel_id': ('channel', 'id', {str}),
|
||||
'release_timestamp': ('releaseDate', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
|
||||
items = []
|
||||
for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)):
|
||||
media_id = media['id']
|
||||
media_typ = media.get('type') or 'video'
|
||||
|
||||
metadata = self._download_json(
|
||||
f'https://www.floatplane.com/api/v3/content/{media_typ}', media_id, query={'id': media_id},
|
||||
note=f'Downloading {media_typ} metadata')
|
||||
|
||||
stream = self._download_json(
|
||||
'https://www.floatplane.com/api/v2/cdn/delivery', media_id, query={
|
||||
'type': 'vod' if media_typ == 'video' else 'aod',
|
||||
'guid': metadata['guid'],
|
||||
}, note=f'Downloading {media_typ} stream data')
|
||||
|
||||
path_template = traverse_obj(stream, ('resource', 'uri', {str}))
|
||||
|
||||
def format_path(params):
|
||||
path = path_template
|
||||
for i, val in (params or {}).items():
|
||||
path = path.replace(f'{{qualityLevelParams.{i}}}', val)
|
||||
return path
|
||||
|
||||
formats = []
|
||||
for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)):
|
||||
url = urljoin(stream['cdn'], format_path(traverse_obj(
|
||||
stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict}))))
|
||||
formats.append({
|
||||
**traverse_obj(quality, {
|
||||
'format_id': ('name', {str}),
|
||||
'format_note': ('label', {str}),
|
||||
'width': ('width', {int}),
|
||||
'height': ('height', {int}),
|
||||
}),
|
||||
**parse_codecs(quality.get('codecs')),
|
||||
'url': url,
|
||||
'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'),
|
||||
})
|
||||
|
||||
items.append({
|
||||
**common_info,
|
||||
'id': media_id,
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
post_info = {
|
||||
**common_info,
|
||||
'id': post_id,
|
||||
'display_id': post_id,
|
||||
**traverse_obj(post_data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('text', {clean_html}),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'dislike_count': ('dislikes', {int_or_none}),
|
||||
'comment_count': ('comments', {int_or_none}),
|
||||
'thumbnail': ('thumbnail', 'path', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
if len(items) > 1:
|
||||
return self.playlist_result(items, **post_info)
|
||||
|
||||
post_info.update(items[0])
|
||||
return post_info
|
||||
|
||||
|
||||
class FloatplaneChannelIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/channel/(?P<id>[\w-]+)/home(?:/(?P<channel>[\w-]+))?'
|
||||
|
@ -1,3 +1,4 @@
|
||||
import functools
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
@ -9,18 +10,20 @@
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class HotStarBaseIE(InfoExtractor):
|
||||
_BASE_URL = 'https://www.hotstar.com'
|
||||
_API_URL = 'https://api.hotstar.com'
|
||||
_API_URL_V2 = 'https://apix.hotstar.com/v2'
|
||||
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
|
||||
|
||||
def _call_api_v1(self, path, *args, **kwargs):
|
||||
@ -29,57 +32,86 @@ def _call_api_v1(self, path, *args, **kwargs):
|
||||
headers={'x-country-code': 'IN', 'x-platform-code': 'PCTV'})
|
||||
|
||||
def _call_api_impl(self, path, video_id, query, st=None, cookies=None):
|
||||
if not cookies or not cookies.get('userUP'):
|
||||
self.raise_login_required()
|
||||
|
||||
st = int_or_none(st) or int(time.time())
|
||||
exp = st + 6000
|
||||
auth = f'st={st}~exp={exp}~acl=/*'
|
||||
auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
|
||||
|
||||
if cookies and cookies.get('userUP'):
|
||||
token = cookies.get('userUP').value
|
||||
else:
|
||||
token = self._download_json(
|
||||
f'{self._API_URL}/um/v3/users',
|
||||
video_id, note='Downloading token',
|
||||
data=json.dumps({'device_ids': [{'id': str(uuid.uuid4()), 'type': 'device_id'}]}).encode(),
|
||||
headers={
|
||||
'hotstarauth': auth,
|
||||
'x-hs-platform': 'PCTV', # or 'web'
|
||||
'Content-Type': 'application/json',
|
||||
})['user_identity']
|
||||
|
||||
response = self._download_json(
|
||||
f'{self._API_URL}/{path}', video_id, query=query,
|
||||
f'{self._API_URL_V2}/{path}', video_id, query=query,
|
||||
headers={
|
||||
'user-agent': 'Disney+;in.startv.hotstar.dplus.tv/23.08.14.4.2915 (Android/13)',
|
||||
'hotstarauth': auth,
|
||||
'x-hs-appversion': '6.72.2',
|
||||
'x-hs-platform': 'web',
|
||||
'x-hs-usertoken': token,
|
||||
'x-hs-usertoken': cookies['userUP'].value,
|
||||
'x-hs-device-id': traverse_obj(cookies, ('deviceId', 'value')) or str(uuid.uuid4()),
|
||||
'x-hs-client': 'platform:androidtv;app_id:in.startv.hotstar.dplus.tv;app_version:23.08.14.4;os:Android;os_version:13;schema_version:0.0.970',
|
||||
'x-hs-platform': 'androidtv',
|
||||
'content-type': 'application/json',
|
||||
})
|
||||
|
||||
if response['message'] != "Playback URL's fetched successfully":
|
||||
raise ExtractorError(
|
||||
response['message'], expected=True)
|
||||
return response['data']
|
||||
if not traverse_obj(response, ('success', {dict})):
|
||||
raise ExtractorError('API call was unsuccessful')
|
||||
return response['success']
|
||||
|
||||
def _call_api_v2(self, path, video_id, st=None, cookies=None):
|
||||
return self._call_api_impl(
|
||||
f'{path}/content/{video_id}', video_id, st=st, cookies=cookies, query={
|
||||
'desired-config': 'audio_channel:stereo|container:fmp4|dynamic_range:hdr|encryption:plain|ladder:tv|package:dash|resolution:fhd|subs-tag:HotstarVIP|video_codec:h265',
|
||||
'device-id': cookies.get('device_id').value if cookies.get('device_id') else str(uuid.uuid4()),
|
||||
'os-name': 'Windows',
|
||||
'os-version': '10',
|
||||
})
|
||||
def _call_api_v2(self, path, video_id, content_type, cookies=None, st=None):
|
||||
return self._call_api_impl(f'{path}', video_id, query={
|
||||
'content_id': video_id,
|
||||
'filters': f'content_type={content_type}',
|
||||
'client_capabilities': json.dumps({
|
||||
'package': ['dash', 'hls'],
|
||||
'container': ['fmp4br', 'fmp4'],
|
||||
'ads': ['non_ssai', 'ssai'],
|
||||
'audio_channel': ['atmos', 'dolby51', 'stereo'],
|
||||
'encryption': ['plain', 'widevine'], # wv only so we can raise appropriate error
|
||||
'video_codec': ['h265', 'h264'],
|
||||
'ladder': ['tv', 'full'],
|
||||
'resolution': ['4k', 'hd'],
|
||||
'true_resolution': ['4k', 'hd'],
|
||||
'dynamic_range': ['hdr', 'sdr'],
|
||||
}, separators=(',', ':')),
|
||||
'drm_parameters': json.dumps({
|
||||
'widevine_security_level': ['SW_SECURE_DECODE', 'SW_SECURE_CRYPTO'],
|
||||
'hdcp_version': ['HDCP_V2_2', 'HDCP_V2_1', 'HDCP_V2', 'HDCP_V1'],
|
||||
}, separators=(',', ':')),
|
||||
}, st=st, cookies=cookies)
|
||||
|
||||
def _playlist_entries(self, path, item_id, root=None, **kwargs):
|
||||
results = self._call_api_v1(path, item_id, **kwargs)['body']['results']
|
||||
for video in traverse_obj(results, (('assets', None), 'items', ...)):
|
||||
if video.get('contentId'):
|
||||
yield self.url_result(
|
||||
HotStarIE._video_url(video['contentId'], root=root), HotStarIE, video['contentId'])
|
||||
@staticmethod
|
||||
def _parse_metadata_v1(video_data):
|
||||
return traverse_obj(video_data, {
|
||||
'id': ('contentId', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'timestamp': (('broadcastDate', 'startDate'), {int_or_none}, any),
|
||||
'release_year': ('year', {int_or_none}),
|
||||
'channel': ('channelName', {str}),
|
||||
'channel_id': ('channelId', {int}, {str_or_none}),
|
||||
'series': ('showName', {str}),
|
||||
'season': ('seasonName', {str}),
|
||||
'season_number': ('seasonNo', {int_or_none}),
|
||||
'season_id': ('seasonId', {int}, {str_or_none}),
|
||||
'episode': ('title', {str}),
|
||||
'episode_number': ('episodeNo', {int_or_none}),
|
||||
})
|
||||
|
||||
def _fetch_page(self, path, item_id, name, query, root, page):
|
||||
results = self._call_api_v1(
|
||||
path, item_id, note=f'Downloading {name} page {page + 1} JSON', query={
|
||||
**query,
|
||||
'tao': page * self._PAGE_SIZE,
|
||||
'tas': self._PAGE_SIZE,
|
||||
})['body']['results']
|
||||
|
||||
for video in traverse_obj(results, (('assets', None), 'items', lambda _, v: v['contentId'])):
|
||||
yield self.url_result(
|
||||
HotStarIE._video_url(video['contentId'], root=root), HotStarIE, **self._parse_metadata_v1(video))
|
||||
|
||||
|
||||
class HotStarIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar'
|
||||
IE_DESC = 'JioHotstar'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/)
|
||||
(?:
|
||||
@ -114,15 +146,16 @@ class HotStarIE(HotStarBaseIE):
|
||||
'upload_date': '20190501',
|
||||
'duration': 1219,
|
||||
'channel': 'StarPlus',
|
||||
'channel_id': '3',
|
||||
'channel_id': '821',
|
||||
'series': 'Ek Bhram - Sarvagun Sampanna',
|
||||
'season': 'Chapter 1',
|
||||
'season_number': 1,
|
||||
'season_id': '6771',
|
||||
'season_id': '1260004607',
|
||||
'episode': 'Janhvi Targets Suman',
|
||||
'episode_number': 8,
|
||||
},
|
||||
}, {
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, { # Metadata call gets HTTP Error 504 with tas=10000
|
||||
'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/anupama-anuj-share-a-moment/1000282843',
|
||||
'info_dict': {
|
||||
'id': '1000282843',
|
||||
@ -134,14 +167,14 @@ class HotStarIE(HotStarBaseIE):
|
||||
'channel': 'StarPlus',
|
||||
'series': 'Anupama',
|
||||
'season_number': 1,
|
||||
'season_id': '7399',
|
||||
'season_id': '1260022018',
|
||||
'upload_date': '20230307',
|
||||
'episode': 'Anupama, Anuj Share a Moment',
|
||||
'episode_number': 853,
|
||||
'duration': 1272,
|
||||
'channel_id': '3',
|
||||
'duration': 1266,
|
||||
'channel_id': '821',
|
||||
},
|
||||
'skip': 'HTTP Error 504: Gateway Time-out', # XXX: Investigate 504 errors on some episodes
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/shows/kana-kaanum-kaalangal/1260097087/back-to-school/1260097320',
|
||||
'info_dict': {
|
||||
@ -154,14 +187,15 @@ class HotStarIE(HotStarBaseIE):
|
||||
'channel': 'Hotstar Specials',
|
||||
'series': 'Kana Kaanum Kaalangal',
|
||||
'season_number': 1,
|
||||
'season_id': '9441',
|
||||
'season_id': '1260097089',
|
||||
'upload_date': '20220421',
|
||||
'episode': 'Back To School',
|
||||
'episode_number': 1,
|
||||
'duration': 1810,
|
||||
'channel_id': '54',
|
||||
'channel_id': '1260003991',
|
||||
},
|
||||
}, {
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, { # Metadata call gets HTTP Error 504 with tas=10000
|
||||
'url': 'https://www.hotstar.com/in/clips/e3-sairat-kahani-pyaar-ki/1000262286',
|
||||
'info_dict': {
|
||||
'id': '1000262286',
|
||||
@ -173,6 +207,7 @@ class HotStarIE(HotStarBaseIE):
|
||||
'timestamp': 1622943900,
|
||||
'duration': 5395,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/movies/premam/1000091195',
|
||||
'info_dict': {
|
||||
@ -180,12 +215,13 @@ class HotStarIE(HotStarBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Premam',
|
||||
'release_year': 2015,
|
||||
'description': 'md5:d833c654e4187b5e34757eafb5b72d7f',
|
||||
'description': 'md5:096cd8aaae8dab56524823dc19dfa9f7',
|
||||
'timestamp': 1462149000,
|
||||
'upload_date': '20160502',
|
||||
'episode': 'Premam',
|
||||
'duration': 8994,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
|
||||
'only_matching': True,
|
||||
@ -208,6 +244,13 @@ class HotStarIE(HotStarBaseIE):
|
||||
None: 'content',
|
||||
}
|
||||
|
||||
_CONTENT_TYPE = {
|
||||
'movie': 'MOVIE',
|
||||
'episode': 'EPISODE',
|
||||
'match': 'SPORT',
|
||||
'content': 'CLIPS',
|
||||
}
|
||||
|
||||
_IGNORE_MAP = {
|
||||
'res': 'resolution',
|
||||
'vcodec': 'video_codec',
|
||||
@ -229,38 +272,48 @@ def _video_url(cls, video_id, video_type=None, *, slug='ignore_me', root=None):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, video_type = self._match_valid_url(url).group('id', 'type')
|
||||
video_type = self._TYPE.get(video_type, video_type)
|
||||
video_type = self._TYPE[video_type]
|
||||
cookies = self._get_cookies(url) # Cookies before any request
|
||||
|
||||
video_data = traverse_obj(
|
||||
self._call_api_v1(
|
||||
f'{video_type}/detail', video_id, fatal=False, query={'tas': 10000, 'contentId': video_id}),
|
||||
('body', 'results', 'item', {dict})) or {}
|
||||
if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'):
|
||||
self._call_api_v1(f'{video_type}/detail', video_id, fatal=False, query={
|
||||
'tas': 5, # See https://github.com/yt-dlp/yt-dlp/issues/7946
|
||||
'contentId': video_id,
|
||||
}), ('body', 'results', 'item', {dict})) or {}
|
||||
|
||||
if video_data.get('drmProtected'):
|
||||
self.report_drm(video_id)
|
||||
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/396
|
||||
st = self._download_webpage_handle(f'{self._BASE_URL}/in', video_id)[1].headers.get('x-origin-date')
|
||||
|
||||
geo_restricted = False
|
||||
formats, subs = [], {}
|
||||
formats, subs, has_drm = [], {}, False
|
||||
headers = {'Referer': f'{self._BASE_URL}/in'}
|
||||
content_type = traverse_obj(video_data, ('contentType', {str})) or self._CONTENT_TYPE[video_type]
|
||||
|
||||
# change to v2 in the future
|
||||
playback_sets = self._call_api_v2('play/v1/playback', video_id, st=st, cookies=cookies)['playBackSets']
|
||||
for playback_set in playback_sets:
|
||||
if not isinstance(playback_set, dict):
|
||||
continue
|
||||
tags = str_or_none(playback_set.get('tagsCombination')) or ''
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/396
|
||||
st = self._request_webpage(
|
||||
f'{self._BASE_URL}/in', video_id, 'Fetching server time').get_header('x-origin-date')
|
||||
watch = self._call_api_v2('pages/watch', video_id, content_type, cookies=cookies, st=st)
|
||||
player_config = traverse_obj(watch, (
|
||||
'page', 'spaces', 'player', 'widget_wrappers', lambda _, v: v['template'] == 'PlayerWidget',
|
||||
'widget', 'data', 'player_config', {dict}, any, {require('player config')}))
|
||||
|
||||
for playback_set in traverse_obj(player_config, (
|
||||
('media_asset', 'media_asset_v2'),
|
||||
('primary', 'fallback'),
|
||||
all, lambda _, v: url_or_none(v['content_url']),
|
||||
)):
|
||||
tags = str_or_none(playback_set.get('playback_tags')) or ''
|
||||
if any(f'{prefix}:{ignore}' in tags
|
||||
for key, prefix in self._IGNORE_MAP.items()
|
||||
for ignore in self._configuration_arg(key)):
|
||||
continue
|
||||
|
||||
format_url = url_or_none(playback_set.get('playbackUrl'))
|
||||
if not format_url:
|
||||
tag_dict = dict((*t.split(':', 1), None)[:2] for t in tags.split(';'))
|
||||
if tag_dict.get('encryption') not in ('plain', None):
|
||||
has_drm = True
|
||||
continue
|
||||
format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', format_url)
|
||||
|
||||
format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', playback_set['content_url'])
|
||||
ext = determine_ext(format_url)
|
||||
|
||||
current_formats, current_subs = [], {}
|
||||
@ -280,14 +333,12 @@ def _real_extract(self, url):
|
||||
'height': int_or_none(playback_set.get('height')),
|
||||
}]
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status in (403, 474):
|
||||
geo_restricted = True
|
||||
else:
|
||||
self.write_debug(e)
|
||||
continue
|
||||
|
||||
tag_dict = dict((*t.split(':', 1), None)[:2] for t in tags.split(';'))
|
||||
if tag_dict.get('encryption') not in ('plain', None):
|
||||
for f in current_formats:
|
||||
f['has_drm'] = True
|
||||
for f in current_formats:
|
||||
for k, v in self._TAG_FIELDS.items():
|
||||
if not f.get(k):
|
||||
@ -299,6 +350,11 @@ def _real_extract(self, url):
|
||||
'stereo': 2,
|
||||
'dolby51': 6,
|
||||
}.get(tag_dict.get('audio_channel'))
|
||||
if (
|
||||
'Audio_Description' in f['format_id']
|
||||
or 'Audio Description' in (f.get('format_note') or '')
|
||||
):
|
||||
f['source_preference'] = -99 + (f.get('source_preference') or -1)
|
||||
f['format_note'] = join_nonempty(
|
||||
tag_dict.get('ladder'),
|
||||
tag_dict.get('audio_channel') if f.get('acodec') != 'none' else None,
|
||||
@ -310,27 +366,17 @@ def _real_extract(self, url):
|
||||
|
||||
if not formats and geo_restricted:
|
||||
self.raise_geo_restricted(countries=['IN'], metadata_available=True)
|
||||
elif not formats and has_drm:
|
||||
self.report_drm(video_id)
|
||||
self._remove_duplicate_formats(formats)
|
||||
for f in formats:
|
||||
f.setdefault('http_headers', {}).update(headers)
|
||||
|
||||
return {
|
||||
**self._parse_metadata_v1(video_data),
|
||||
'id': video_id,
|
||||
'title': video_data.get('title'),
|
||||
'description': video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': int_or_none(traverse_obj(video_data, 'broadcastDate', 'startDate')),
|
||||
'release_year': int_or_none(video_data.get('year')),
|
||||
'formats': formats,
|
||||
'subtitles': subs,
|
||||
'channel': video_data.get('channelName'),
|
||||
'channel_id': str_or_none(video_data.get('channelId')),
|
||||
'series': video_data.get('showName'),
|
||||
'season': video_data.get('seasonName'),
|
||||
'season_number': int_or_none(video_data.get('seasonNo')),
|
||||
'season_id': str_or_none(video_data.get('seasonId')),
|
||||
'episode': video_data.get('title'),
|
||||
'episode_number': int_or_none(video_data.get('episodeNo')),
|
||||
}
|
||||
|
||||
|
||||
@ -371,64 +417,6 @@ def _real_extract(self, url):
|
||||
return self.url_result(HotStarIE._video_url(video_id, video_type), HotStarIE, video_id)
|
||||
|
||||
|
||||
class HotStarPlaylistIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)(?:/[^/]+){2}/list/[^/]+/t-(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
|
||||
'info_dict': {
|
||||
'id': '3_2_26',
|
||||
},
|
||||
'playlist_mincount': 20,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/shows/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/tv/karthika-deepam/15457/list/popular-clips/t-3_2_1272',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id_ = self._match_id(url)
|
||||
return self.playlist_result(
|
||||
self._playlist_entries('tray/find', id_, query={'tas': 10000, 'uqId': id_}), id_)
|
||||
|
||||
|
||||
class HotStarSeasonIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar:season'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/\w+)/seasons/[^/]+/ss-(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.hotstar.com/tv/radhakrishn/1260000646/seasons/season-2/ss-8028',
|
||||
'info_dict': {
|
||||
'id': '8028',
|
||||
},
|
||||
'playlist_mincount': 35,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/tv/ishqbaaz/9567/seasons/season-2/ss-4357',
|
||||
'info_dict': {
|
||||
'id': '4357',
|
||||
},
|
||||
'playlist_mincount': 30,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/tv/bigg-boss/14714/seasons/season-4/ss-8208/',
|
||||
'info_dict': {
|
||||
'id': '8208',
|
||||
},
|
||||
'playlist_mincount': 19,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/shows/bigg-boss/14714/seasons/season-4/ss-8208/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, season_id = self._match_valid_url(url).groups()
|
||||
return self.playlist_result(self._playlist_entries(
|
||||
'season/asset', season_id, url, query={'tao': 0, 'tas': 0, 'size': 10000, 'id': season_id}), season_id)
|
||||
|
||||
|
||||
class HotStarSeriesIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar:series'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/(?P<id>\d+))/?(?:[#?]|$)'
|
||||
@ -443,25 +431,29 @@ class HotStarSeriesIE(HotStarBaseIE):
|
||||
'info_dict': {
|
||||
'id': '1260050431',
|
||||
},
|
||||
'playlist_mincount': 43,
|
||||
'playlist_mincount': 42,
|
||||
}, {
|
||||
'url': 'https://www.hotstar.com/in/tv/mahabharat/435/',
|
||||
'info_dict': {
|
||||
'id': '435',
|
||||
},
|
||||
'playlist_mincount': 267,
|
||||
}, {
|
||||
}, { # HTTP Error 504 with tas=10000 (possibly because total size is over 1000 items?)
|
||||
'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/',
|
||||
'info_dict': {
|
||||
'id': '1260022017',
|
||||
},
|
||||
'playlist_mincount': 940,
|
||||
'playlist_mincount': 1601,
|
||||
}]
|
||||
_PAGE_SIZE = 100
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, series_id = self._match_valid_url(url).groups()
|
||||
id_ = self._call_api_v1(
|
||||
url, series_id = self._match_valid_url(url).group('url', 'id')
|
||||
eid = self._call_api_v1(
|
||||
'show/detail', series_id, query={'contentId': series_id})['body']['results']['item']['id']
|
||||
|
||||
return self.playlist_result(self._playlist_entries(
|
||||
'tray/g/1/items', series_id, url, query={'tao': 0, 'tas': 10000, 'etid': 0, 'eid': id_}), series_id)
|
||||
entries = OnDemandPagedList(functools.partial(
|
||||
self._fetch_page, 'tray/g/1/items', series_id,
|
||||
'series', {'etid': 0, 'eid': eid}, url), self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, series_id)
|
||||
|
@ -7,12 +7,13 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
@ -22,8 +23,8 @@
|
||||
class HuyaLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?!(?:video/play/))(?P<id>[^/#?&]+)(?:\D|$)'
|
||||
IE_NAME = 'huya:live'
|
||||
IE_DESC = 'huya.com'
|
||||
TESTS = [{
|
||||
IE_DESC = '虎牙直播'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.huya.com/572329',
|
||||
'info_dict': {
|
||||
'id': '572329',
|
||||
@ -149,63 +150,94 @@ class HuyaVideoIE(InfoExtractor):
|
||||
'id': '1002412640',
|
||||
'ext': 'mp4',
|
||||
'title': '8月3日',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'duration': 14,
|
||||
'categories': ['主机游戏'],
|
||||
'duration': 14.0,
|
||||
'uploader': '虎牙-ATS欧卡车队青木',
|
||||
'uploader_id': '1564376151',
|
||||
'upload_date': '20240803',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'timestamp': 1722675433,
|
||||
},
|
||||
},
|
||||
{
|
||||
}, {
|
||||
'url': 'https://www.huya.com/video/play/556054543.html',
|
||||
'info_dict': {
|
||||
'id': '556054543',
|
||||
'ext': 'mp4',
|
||||
'title': '我不挑事 也不怕事',
|
||||
'thumbnail': r're:https?://.*\.jpg',
|
||||
'duration': 1864,
|
||||
'categories': ['英雄联盟'],
|
||||
'description': 'md5:58184869687d18ce62dc7b4b2ad21201',
|
||||
'duration': 1864.0,
|
||||
'uploader': '卡尔',
|
||||
'uploader_id': '367138632',
|
||||
'upload_date': '20210811',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'tags': 'count:4',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'timestamp': 1628675950,
|
||||
},
|
||||
}, {
|
||||
# Only m3u8 available
|
||||
'url': 'https://www.huya.com/video/play/1063345618.html',
|
||||
'info_dict': {
|
||||
'id': '1063345618',
|
||||
'ext': 'mp4',
|
||||
'title': '峡谷第一中!黑铁上钻石顶级教学对抗elo',
|
||||
'categories': ['英雄联盟'],
|
||||
'comment_count': int,
|
||||
'duration': 21603.0,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'timestamp': 1749668803,
|
||||
'upload_date': '20250611',
|
||||
'uploader': '北枫CC',
|
||||
'uploader_id': '2183525275',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url: str):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://liveapi.huya.com/moment/getMomentContent', video_id,
|
||||
query={'videoId': video_id})['data']['moment']['videoInfo']
|
||||
moment = self._download_json(
|
||||
'https://liveapi.huya.com/moment/getMomentContent',
|
||||
video_id, query={'videoId': video_id})['data']['moment']
|
||||
|
||||
formats = []
|
||||
for definition in traverse_obj(video_data, ('definitions', lambda _, v: url_or_none(v['url']))):
|
||||
formats.append({
|
||||
'url': definition['url'],
|
||||
**traverse_obj(definition, {
|
||||
'format_id': ('defName', {str}),
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
for definition in traverse_obj(moment, (
|
||||
'videoInfo', 'definitions', lambda _, v: url_or_none(v['m3u8']),
|
||||
)):
|
||||
fmts = self._extract_m3u8_formats(definition['m3u8'], video_id, 'mp4', fatal=False)
|
||||
for fmt in fmts:
|
||||
fmt.update(**traverse_obj(definition, {
|
||||
'filesize': ('size', {int_or_none}),
|
||||
}),
|
||||
})
|
||||
'format_id': ('defName', {str}),
|
||||
'height': ('height', {int_or_none}),
|
||||
'quality': ('definition', {int_or_none}),
|
||||
'width': ('width', {int_or_none}),
|
||||
}))
|
||||
formats.extend(fmts)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
**traverse_obj(moment, {
|
||||
'comment_count': ('commentCount', {int_or_none}),
|
||||
'description': ('content', {clean_html}, filter),
|
||||
'like_count': ('favorCount', {int_or_none}),
|
||||
'timestamp': ('cTime', {int_or_none}),
|
||||
}),
|
||||
**traverse_obj(moment, ('videoInfo', {
|
||||
'title': ('videoTitle', {str}),
|
||||
'thumbnail': ('videoCover', {url_or_none}),
|
||||
'categories': ('category', {str}, filter, all, filter),
|
||||
'duration': ('videoDuration', {parse_duration}),
|
||||
'tags': ('tags', ..., {str}, filter, all, filter),
|
||||
'thumbnail': (('videoBigCover', 'videoCover'), {url_or_none}, {update_url(query=None)}, any),
|
||||
'uploader': ('nickName', {str}),
|
||||
'uploader_id': ('uid', {str_or_none}),
|
||||
'upload_date': ('videoUploadTime', {unified_strdate}),
|
||||
'view_count': ('videoPlayNum', {int_or_none}),
|
||||
'comment_count': ('videoCommentNum', {int_or_none}),
|
||||
'like_count': ('favorCount', {int_or_none}),
|
||||
}),
|
||||
})),
|
||||
}
|
||||
|
@ -1,408 +0,0 @@
|
||||
import base64
|
||||
import itertools
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
jwt_decode_hs256,
|
||||
parse_age_limit,
|
||||
try_call,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class JioCinemaBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'jiocinema'
|
||||
_GEO_BYPASS = False
|
||||
_ACCESS_TOKEN = None
|
||||
_REFRESH_TOKEN = None
|
||||
_GUEST_TOKEN = None
|
||||
_USER_ID = None
|
||||
_DEVICE_ID = None
|
||||
_API_HEADERS = {'Origin': 'https://www.jiocinema.com', 'Referer': 'https://www.jiocinema.com/'}
|
||||
_APP_NAME = {'appName': 'RJIL_JioCinema'}
|
||||
_APP_VERSION = {'appVersion': '5.0.0'}
|
||||
_API_SIGNATURES = 'o668nxgzwff'
|
||||
_METADATA_API_BASE = 'https://content-jiovoot.voot.com/psapi'
|
||||
_ACCESS_HINT = 'the `accessToken` from your browser local storage'
|
||||
_LOGIN_HINT = (
|
||||
'Log in with "-u phone -p <PHONE_NUMBER>" to authenticate with OTP, '
|
||||
f'or use "-u token -p <ACCESS_TOKEN>" to log in with {_ACCESS_HINT}. '
|
||||
'If you have previously logged in with yt-dlp and your session '
|
||||
'has been cached, you can use "-u device -p <DEVICE_ID>"')
|
||||
|
||||
def _cache_token(self, token_type):
|
||||
assert token_type in ('access', 'refresh', 'all')
|
||||
if token_type in ('access', 'all'):
|
||||
self.cache.store(
|
||||
JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-access', JioCinemaBaseIE._ACCESS_TOKEN)
|
||||
if token_type in ('refresh', 'all'):
|
||||
self.cache.store(
|
||||
JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh', JioCinemaBaseIE._REFRESH_TOKEN)
|
||||
|
||||
def _call_api(self, url, video_id, note='Downloading API JSON', headers={}, data={}):
|
||||
return self._download_json(
|
||||
url, video_id, note, data=json.dumps(data, separators=(',', ':')).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json',
|
||||
**self._API_HEADERS,
|
||||
**headers,
|
||||
}, expected_status=(400, 403, 474))
|
||||
|
||||
def _call_auth_api(self, service, endpoint, note, headers={}, data={}):
|
||||
return self._call_api(
|
||||
f'https://auth-jiocinema.voot.com/{service}service/apis/v4/{endpoint}',
|
||||
None, note=note, headers=headers, data=data)
|
||||
|
||||
def _refresh_token(self):
|
||||
if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._DEVICE_ID:
|
||||
raise ExtractorError('User token has expired', expected=True)
|
||||
response = self._call_auth_api(
|
||||
'token', 'refreshtoken', 'Refreshing token',
|
||||
headers={'accesstoken': self._ACCESS_TOKEN}, data={
|
||||
**self._APP_NAME,
|
||||
'deviceId': self._DEVICE_ID,
|
||||
'refreshToken': self._REFRESH_TOKEN,
|
||||
**self._APP_VERSION,
|
||||
})
|
||||
refresh_token = response.get('refreshTokenId')
|
||||
if refresh_token and refresh_token != JioCinemaBaseIE._REFRESH_TOKEN:
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
|
||||
self._cache_token('refresh')
|
||||
JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
|
||||
self._cache_token('access')
|
||||
|
||||
def _fetch_guest_token(self):
|
||||
JioCinemaBaseIE._DEVICE_ID = ''.join(random.choices(string.digits, k=10))
|
||||
guest_token = self._call_auth_api(
|
||||
'token', 'guest', 'Downloading guest token', data={
|
||||
**self._APP_NAME,
|
||||
'deviceType': 'phone',
|
||||
'os': 'ios',
|
||||
'deviceId': self._DEVICE_ID,
|
||||
'freshLaunch': False,
|
||||
'adId': self._DEVICE_ID,
|
||||
**self._APP_VERSION,
|
||||
})
|
||||
self._GUEST_TOKEN = guest_token['authToken']
|
||||
self._USER_ID = guest_token['userId']
|
||||
|
||||
def _call_login_api(self, endpoint, guest_token, data, note):
|
||||
return self._call_auth_api(
|
||||
'user', f'loginotp/{endpoint}', note, headers={
|
||||
**self.geo_verification_headers(),
|
||||
'accesstoken': self._GUEST_TOKEN,
|
||||
**self._APP_NAME,
|
||||
**traverse_obj(guest_token, 'data', {
|
||||
'deviceType': ('deviceType', {str}),
|
||||
'os': ('os', {str}),
|
||||
})}, data=data)
|
||||
|
||||
def _is_token_expired(self, token):
|
||||
return (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 180)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self._ACCESS_TOKEN and not self._is_token_expired(self._ACCESS_TOKEN):
|
||||
return
|
||||
|
||||
UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||
|
||||
if username.lower() == 'token':
|
||||
if try_call(lambda: jwt_decode_hs256(password)):
|
||||
JioCinemaBaseIE._ACCESS_TOKEN = password
|
||||
refresh_hint = 'the `refreshToken` UUID from your browser local storage'
|
||||
refresh_token = self._configuration_arg('refresh_token', [''], ie_key=JioCinemaIE)[0]
|
||||
if not refresh_token:
|
||||
self.to_screen(
|
||||
'To extend the life of your login session, in addition to your access token, '
|
||||
'you can pass --extractor-args "jiocinema:refresh_token=REFRESH_TOKEN" '
|
||||
f'where REFRESH_TOKEN is {refresh_hint}')
|
||||
elif re.fullmatch(UUID_RE, refresh_token):
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
|
||||
else:
|
||||
self.report_warning(f'Invalid refresh_token value. Use {refresh_hint}')
|
||||
else:
|
||||
raise ExtractorError(
|
||||
f'The password given could not be decoded as a token; use {self._ACCESS_HINT}', expected=True)
|
||||
|
||||
elif username.lower() == 'device' and re.fullmatch(rf'(?:{UUID_RE}|\d+)', password):
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-refresh')
|
||||
JioCinemaBaseIE._ACCESS_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-access')
|
||||
if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._ACCESS_TOKEN:
|
||||
raise ExtractorError(f'Failed to load cached tokens for device ID "{password}"', expected=True)
|
||||
|
||||
elif username.lower() == 'phone' and re.fullmatch(r'\+?\d+', password):
|
||||
self._fetch_guest_token()
|
||||
guest_token = jwt_decode_hs256(self._GUEST_TOKEN)
|
||||
initial_data = {
|
||||
'number': base64.b64encode(password.encode()).decode(),
|
||||
**self._APP_VERSION,
|
||||
}
|
||||
response = self._call_login_api('send', guest_token, initial_data, 'Requesting OTP')
|
||||
if not traverse_obj(response, ('OTPInfo', {dict})):
|
||||
raise ExtractorError('There was a problem with the phone number login attempt')
|
||||
|
||||
is_iphone = guest_token.get('os') == 'ios'
|
||||
response = self._call_login_api('verify', guest_token, {
|
||||
'deviceInfo': {
|
||||
'consumptionDeviceName': 'iPhone' if is_iphone else 'Android',
|
||||
'info': {
|
||||
'platform': {'name': 'iPhone OS' if is_iphone else 'Android'},
|
||||
'androidId': self._DEVICE_ID,
|
||||
'type': 'iOS' if is_iphone else 'Android',
|
||||
},
|
||||
},
|
||||
**initial_data,
|
||||
'otp': self._get_tfa_info('the one-time password sent to your phone'),
|
||||
}, 'Submitting OTP')
|
||||
if traverse_obj(response, 'code') == 1043:
|
||||
raise ExtractorError('Wrong OTP', expected=True)
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = response['refreshToken']
|
||||
JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
|
||||
|
||||
else:
|
||||
raise ExtractorError(self._LOGIN_HINT, expected=True)
|
||||
|
||||
user_token = jwt_decode_hs256(JioCinemaBaseIE._ACCESS_TOKEN)['data']
|
||||
JioCinemaBaseIE._USER_ID = user_token['userId']
|
||||
JioCinemaBaseIE._DEVICE_ID = user_token['deviceId']
|
||||
if JioCinemaBaseIE._REFRESH_TOKEN and username != 'device':
|
||||
self._cache_token('all')
|
||||
if self.get_param('cachedir') is not False:
|
||||
self.to_screen(
|
||||
f'NOTE: For subsequent logins you can use "-u device -p {JioCinemaBaseIE._DEVICE_ID}"')
|
||||
elif not JioCinemaBaseIE._REFRESH_TOKEN:
|
||||
JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(
|
||||
JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh')
|
||||
if JioCinemaBaseIE._REFRESH_TOKEN:
|
||||
self._cache_token('access')
|
||||
self.to_screen(f'Logging in as device ID "{JioCinemaBaseIE._DEVICE_ID}"')
|
||||
if self._is_token_expired(JioCinemaBaseIE._ACCESS_TOKEN):
|
||||
self._refresh_token()
|
||||
|
||||
|
||||
class JioCinemaIE(JioCinemaBaseIE):
|
||||
IE_NAME = 'jiocinema'
|
||||
_VALID_URL = r'https?://(?:www\.)?jiocinema\.com/?(?:movies?/[^/?#]+/|tv-shows/(?:[^/?#]+/){3})(?P<id>\d{3,})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiocinema.com/tv-shows/agnisakshi-ek-samjhauta/1/pradeep-to-stop-the-wedding/3759931',
|
||||
'info_dict': {
|
||||
'id': '3759931',
|
||||
'ext': 'mp4',
|
||||
'title': 'Pradeep to stop the wedding?',
|
||||
'description': 'md5:75f72d1d1a66976633345a3de6d672b1',
|
||||
'episode': 'Pradeep to stop the wedding?',
|
||||
'episode_number': 89,
|
||||
'season': 'Agnisakshi…Ek Samjhauta-S1',
|
||||
'season_number': 1,
|
||||
'series': 'Agnisakshi Ek Samjhauta',
|
||||
'duration': 1238.0,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'age_limit': 13,
|
||||
'season_id': '3698031',
|
||||
'upload_date': '20230606',
|
||||
'timestamp': 1686009600,
|
||||
'release_date': '20230607',
|
||||
'genres': ['Drama'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.jiocinema.com/movies/bhediya/3754021/watch',
|
||||
'info_dict': {
|
||||
'id': '3754021',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bhediya',
|
||||
'description': 'md5:a6bf2900371ac2fc3f1447401a9f7bb0',
|
||||
'episode': 'Bhediya',
|
||||
'duration': 8500.0,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'age_limit': 13,
|
||||
'upload_date': '20230525',
|
||||
'timestamp': 1685026200,
|
||||
'release_date': '20230524',
|
||||
'genres': ['Comedy'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _extract_formats_and_subtitles(self, playback, video_id):
|
||||
m3u8_url = traverse_obj(playback, (
|
||||
'data', 'playbackUrls', lambda _, v: v['streamtype'] == 'hls', 'url', {url_or_none}, any))
|
||||
if not m3u8_url: # DRM-only content only serves dash urls
|
||||
self.report_drm(video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, m3u8_id='hls')
|
||||
self._remove_duplicate_formats(formats)
|
||||
|
||||
return {
|
||||
# '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p
|
||||
'formats': traverse_obj(formats, (
|
||||
lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)),
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if not self._ACCESS_TOKEN and self._is_token_expired(self._GUEST_TOKEN):
|
||||
self._fetch_guest_token()
|
||||
elif self._ACCESS_TOKEN and self._is_token_expired(self._ACCESS_TOKEN):
|
||||
self._refresh_token()
|
||||
|
||||
playback = self._call_api(
|
||||
f'https://apis-jiovoot.voot.com/playbackjv/v3/{video_id}', video_id,
|
||||
'Downloading playback JSON', headers={
|
||||
**self.geo_verification_headers(),
|
||||
'accesstoken': self._ACCESS_TOKEN or self._GUEST_TOKEN,
|
||||
**self._APP_NAME,
|
||||
'deviceid': self._DEVICE_ID,
|
||||
'uniqueid': self._USER_ID,
|
||||
'x-apisignatures': self._API_SIGNATURES,
|
||||
'x-platform': 'androidweb',
|
||||
'x-platform-token': 'web',
|
||||
}, data={
|
||||
'4k': False,
|
||||
'ageGroup': '18+',
|
||||
'appVersion': '3.4.0',
|
||||
'bitrateProfile': 'xhdpi',
|
||||
'capability': {
|
||||
'drmCapability': {
|
||||
'aesSupport': 'yes',
|
||||
'fairPlayDrmSupport': 'none',
|
||||
'playreadyDrmSupport': 'none',
|
||||
'widevineDRMSupport': 'none',
|
||||
},
|
||||
'frameRateCapability': [{
|
||||
'frameRateSupport': '30fps',
|
||||
'videoQuality': '1440p',
|
||||
}],
|
||||
},
|
||||
'continueWatchingRequired': False,
|
||||
'dolby': False,
|
||||
'downloadRequest': False,
|
||||
'hevc': False,
|
||||
'kidsSafe': False,
|
||||
'manufacturer': 'Windows',
|
||||
'model': 'Windows',
|
||||
'multiAudioRequired': True,
|
||||
'osVersion': '10',
|
||||
'parentalPinValid': True,
|
||||
'x-apisignatures': self._API_SIGNATURES,
|
||||
})
|
||||
|
||||
status_code = traverse_obj(playback, ('code', {int}))
|
||||
if status_code == 474:
|
||||
self.raise_geo_restricted(countries=['IN'])
|
||||
elif status_code == 1008:
|
||||
error_msg = 'This content is only available for premium users'
|
||||
if self._ACCESS_TOKEN:
|
||||
raise ExtractorError(error_msg, expected=True)
|
||||
self.raise_login_required(f'{error_msg}. {self._LOGIN_HINT}', method=None)
|
||||
elif status_code == 400:
|
||||
raise ExtractorError('The requested content is not available', expected=True)
|
||||
elif status_code is not None and status_code != 200:
|
||||
raise ExtractorError(
|
||||
f'JioCinema says: {traverse_obj(playback, ("message", {str})) or status_code}')
|
||||
|
||||
metadata = self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/query/asset-details',
|
||||
video_id, fatal=False, query={
|
||||
'ids': f'include:{video_id}',
|
||||
'responseType': 'common',
|
||||
'devicePlatformType': 'desktop',
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'http_headers': self._API_HEADERS,
|
||||
**self._extract_formats_and_subtitles(playback, video_id),
|
||||
**traverse_obj(playback, ('data', {
|
||||
# fallback metadata
|
||||
'title': ('name', {str}),
|
||||
'description': ('fullSynopsis', {str}),
|
||||
'series': ('show', 'name', {str}, filter),
|
||||
'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}),
|
||||
'season_number': ('episode', 'season', {int_or_none}, filter),
|
||||
'episode': ('fullTitle', {str}),
|
||||
'episode_number': ('episode', 'episodeNo', {int_or_none}, filter),
|
||||
'age_limit': ('ageNemonic', {parse_age_limit}),
|
||||
'duration': ('totalDuration', {float_or_none}),
|
||||
'thumbnail': ('images', {url_or_none}),
|
||||
})),
|
||||
**traverse_obj(metadata, ('result', 0, {
|
||||
'title': ('fullTitle', {str}),
|
||||
'description': ('fullSynopsis', {str}),
|
||||
'series': ('showName', {str}, filter),
|
||||
'season': ('seasonName', {str}, filter),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'season_id': ('seasonId', {str}, filter),
|
||||
'episode': ('fullTitle', {str}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
'timestamp': ('uploadTime', {int_or_none}),
|
||||
'release_date': ('telecastDate', {str}),
|
||||
'age_limit': ('ageNemonic', {parse_age_limit}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'genres': ('genres', ..., {str}),
|
||||
'thumbnail': ('seo', 'ogImage', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
|
||||
class JioCinemaSeriesIE(JioCinemaBaseIE):
|
||||
IE_NAME = 'jiocinema:series'
|
||||
_VALID_URL = r'https?://(?:www\.)?jiocinema\.com/tv-shows/(?P<slug>[\w-]+)/(?P<id>\d{3,})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jiocinema.com/tv-shows/naagin/3499917',
|
||||
'info_dict': {
|
||||
'id': '3499917',
|
||||
'title': 'naagin',
|
||||
},
|
||||
'playlist_mincount': 120,
|
||||
}, {
|
||||
'url': 'https://www.jiocinema.com/tv-shows/mtv-splitsvilla-x5/3499820',
|
||||
'info_dict': {
|
||||
'id': '3499820',
|
||||
'title': 'mtv-splitsvilla-x5',
|
||||
},
|
||||
'playlist_mincount': 310,
|
||||
}]
|
||||
|
||||
def _entries(self, series_id):
|
||||
seasons = traverse_obj(self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/view/show/{series_id}', series_id,
|
||||
'Downloading series metadata JSON', query={'responseType': 'common'}), (
|
||||
'trays', lambda _, v: v['trayId'] == 'season-by-show-multifilter',
|
||||
'trayTabs', lambda _, v: v['id']))
|
||||
|
||||
for season_num, season in enumerate(seasons, start=1):
|
||||
season_id = season['id']
|
||||
label = season.get('label') or season_num
|
||||
for page_num in itertools.count(1):
|
||||
episodes = traverse_obj(self._download_json(
|
||||
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',
|
||||
season_id, f'Downloading season {label} page {page_num} JSON', query={
|
||||
'sort': 'episode:asc',
|
||||
'id': season_id,
|
||||
'responseType': 'common',
|
||||
'page': page_num,
|
||||
}), ('result', lambda _, v: v['id'] and url_or_none(v['slug'])))
|
||||
if not episodes:
|
||||
break
|
||||
for episode in episodes:
|
||||
yield self.url_result(
|
||||
episode['slug'], JioCinemaIE, **traverse_obj(episode, {
|
||||
'video_id': 'id',
|
||||
'video_title': ('fullTitle', {str}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('episode', {int_or_none}),
|
||||
}))
|
||||
|
||||
def _real_extract(self, url):
|
||||
slug, series_id = self._match_valid_url(url).group('slug', 'id')
|
||||
return self.playlist_result(self._entries(series_id), series_id, slug)
|
@ -1,12 +1,12 @@
|
||||
import functools
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
UserNotLive,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
@ -16,21 +16,17 @@
|
||||
|
||||
|
||||
class KickBaseIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._request_webpage(
|
||||
HEADRequest('https://kick.com/'), None, 'Setting up session', fatal=False, impersonate=True)
|
||||
xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN')
|
||||
if not xsrf_token:
|
||||
self.write_debug('kick.com did not set XSRF-TOKEN cookie')
|
||||
KickBaseIE._API_HEADERS = {
|
||||
'Authorization': f'Bearer {xsrf_token.value}',
|
||||
'X-XSRF-TOKEN': xsrf_token.value,
|
||||
} if xsrf_token else {}
|
||||
@functools.cached_property
|
||||
def _api_headers(self):
|
||||
token = traverse_obj(
|
||||
self._get_cookies('https://kick.com/'),
|
||||
('session_token', 'value', {urllib.parse.unquote}))
|
||||
return {'Authorization': f'Bearer {token}'} if token else {}
|
||||
|
||||
def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
|
||||
return self._download_json(
|
||||
f'https://kick.com/api/{path}', display_id, note=note,
|
||||
headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs)
|
||||
headers={**self._api_headers, **headers}, impersonate=True, **kwargs)
|
||||
|
||||
|
||||
class KickIE(KickBaseIE):
|
||||
|
@ -167,11 +167,11 @@ class LSMLTVEmbedIE(InfoExtractor):
|
||||
'duration': 1442,
|
||||
'upload_date': '20231121',
|
||||
'title': 'D23-6000-105_cetstud',
|
||||
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
|
||||
'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/660858/placeholder1700589200.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://ltv.lsm.lv/embed?enablesdkjs=1&c=eyJpdiI6IncwVzZmUFk2MU12enVWK1I3SUcwQ1E9PSIsInZhbHVlIjoid3FhV29vamc3T2sxL1RaRmJ5Rm1GTXozU0o2dVczdUtLK0cwZEZJMDQ2a3ZIRG5DK2pneGlnbktBQy9uazVleHN6VXhxdWIweWNvcHRDSnlISlNYOHlVZ1lpcTUrcWZSTUZPQW14TVdkMW9aOUtRWVNDcFF4eWpHNGcrT0VZbUNFQStKQk91cGpndW9FVjJIa0lpbkh3PT0iLCJtYWMiOiIyZGI1NDJlMWRlM2QyMGNhOGEwYTM2MmNlN2JlOGRhY2QyYjdkMmEzN2RlOTEzYTVkNzI1ODlhZDlhZjU4MjQ2IiwidGFnIjoiIn0=',
|
||||
'md5': 'a1711e190fe680fdb68fd8413b378e87',
|
||||
'md5': 'f236cef2fd5953612754e4e66be51e7a',
|
||||
'info_dict': {
|
||||
'id': 'wUnFArIPDSY',
|
||||
'ext': 'mp4',
|
||||
@ -198,6 +198,8 @@ class LSMLTVEmbedIE(InfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/@LTV16plus',
|
||||
'like_count': int,
|
||||
'description': 'md5:7ff0c42ba971e3c13e4b8a2ff03b70b5',
|
||||
'media_type': 'livestream',
|
||||
'timestamp': 1652550741,
|
||||
},
|
||||
}]
|
||||
|
||||
@ -208,7 +210,7 @@ def _real_extract(self, url):
|
||||
r'window\.ltvEmbedPayload\s*=', webpage, 'embed json', video_id)
|
||||
embed_type = traverse_obj(data, ('source', 'name', {str}))
|
||||
|
||||
if embed_type == 'telia':
|
||||
if embed_type in ('backscreen', 'telia'): # 'telia' only for backwards compat
|
||||
ie_key = 'CloudyCDN'
|
||||
embed_url = traverse_obj(data, ('source', 'embed_url', {url_or_none}))
|
||||
elif embed_type == 'youtube':
|
||||
@ -226,9 +228,9 @@ def _real_extract(self, url):
|
||||
|
||||
|
||||
class LSMReplayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:ieraksts|statja)/[^/?#]+/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:skaties/|klausies/)?(?:ieraksts|statja)/[^/?#]+/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
|
||||
'url': 'https://replay.lsm.lv/lv/skaties/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
|
||||
'md5': '64f72a360ca530d5ed89c77646c9eee5',
|
||||
'info_dict': {
|
||||
'id': '46k_d23-6000-105',
|
||||
@ -241,20 +243,23 @@ class LSMReplayIE(InfoExtractor):
|
||||
'thumbnail': 'https://ltv.lsm.lv/storage/media/8/7/large/5/1f9604e1.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://replay.lsm.lv/lv/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam',
|
||||
'md5': '719b33875cd1429846eeeaeec6df2830',
|
||||
'url': 'https://replay.lsm.lv/lv/klausies/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam',
|
||||
'md5': '84feb80fd7e6ec07744726a9f01cda4d',
|
||||
'info_dict': {
|
||||
'id': 'a342781',
|
||||
'ext': 'mp3',
|
||||
'id': '183522',
|
||||
'ext': 'm4a',
|
||||
'duration': 1823,
|
||||
'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām',
|
||||
'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/large_fd4675ac.jpg',
|
||||
'upload_date': '20231102',
|
||||
'timestamp': 1698921060,
|
||||
'timestamp': 1698913860,
|
||||
'description': 'md5:7bac3b2dd41e44325032943251c357b1',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://replay.lsm.lv/ru/statja/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
|
||||
'url': 'https://replay.lsm.lv/ru/skaties/statja/ltv/355067/v-kengaragse-nacalas-ukladka-relsov',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@ -267,12 +272,24 @@ def _real_extract(self, url):
|
||||
|
||||
data = self._search_nuxt_data(
|
||||
self._fix_nuxt_data(webpage), video_id, context_name='__REPLAY__')
|
||||
playback_type = data['playback']['type']
|
||||
|
||||
if playback_type == 'playable_audio_lr':
|
||||
playback_data = {
|
||||
'formats': self._extract_m3u8_formats(data['playback']['service']['hls_url'], video_id),
|
||||
}
|
||||
elif playback_type == 'embed':
|
||||
playback_data = {
|
||||
'_type': 'url_transparent',
|
||||
'url': data['playback']['service']['url'],
|
||||
}
|
||||
else:
|
||||
raise ExtractorError(f'Unsupported playback type "{playback_type}"')
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
**playback_data,
|
||||
**traverse_obj(data, {
|
||||
'url': ('playback', 'service', 'url', {url_or_none}),
|
||||
'title': ('mediaItem', 'title'),
|
||||
'description': ('mediaItem', ('lead', 'body')),
|
||||
'duration': ('mediaItem', 'duration', {int_or_none}),
|
||||
|
107
yt_dlp/extractor/mave.py
Normal file
107
yt_dlp/extractor/mave.py
Normal file
@ -0,0 +1,107 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class MaveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?P<channel>[\w-]+)\.mave\.digital/(?P<id>ep-\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://ochenlichnoe.mave.digital/ep-25',
|
||||
'md5': 'aa3e513ef588b4366df1520657cbc10c',
|
||||
'info_dict': {
|
||||
'id': '4035f587-914b-44b6-aa5a-d76685ad9bc2',
|
||||
'ext': 'mp3',
|
||||
'display_id': 'ochenlichnoe-ep-25',
|
||||
'title': 'Между мной и миром: психология самооценки',
|
||||
'description': 'md5:4b7463baaccb6982f326bce5c700382a',
|
||||
'uploader': 'Самарский университет',
|
||||
'channel': 'Очень личное',
|
||||
'channel_id': 'ochenlichnoe',
|
||||
'channel_url': 'https://ochenlichnoe.mave.digital/',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'duration': 3744,
|
||||
'thumbnail': r're:https://.+/storage/podcasts/.+\.jpg',
|
||||
'series': 'Очень личное',
|
||||
'series_id': '2e0c3749-6df2-4946-82f4-50691419c065',
|
||||
'season': 'Season 3',
|
||||
'season_number': 3,
|
||||
'episode': 'Episode 3',
|
||||
'episode_number': 3,
|
||||
'timestamp': 1747817300,
|
||||
'upload_date': '20250521',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://budem.mave.digital/ep-12',
|
||||
'md5': 'e1ce2780fcdb6f17821aa3ca3e8c919f',
|
||||
'info_dict': {
|
||||
'id': '41898bb5-ff57-4797-9236-37a8e537aa21',
|
||||
'ext': 'mp3',
|
||||
'display_id': 'budem-ep-12',
|
||||
'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана',
|
||||
'description': 'md5:fa3bdd59ee829dfaf16e3efcb13f1d19',
|
||||
'uploader': 'Полина Цветкова+Евгения Акопова',
|
||||
'channel': 'Все там будем',
|
||||
'channel_id': 'budem',
|
||||
'channel_url': 'https://budem.mave.digital/',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'age_limit': 18,
|
||||
'duration': 3664,
|
||||
'thumbnail': r're:https://.+/storage/podcasts/.+\.jpg',
|
||||
'series': 'Все там будем',
|
||||
'series_id': 'fe9347bf-c009-4ebd-87e8-b06f2f324746',
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'episode': 'Episode 5',
|
||||
'episode_number': 5,
|
||||
'timestamp': 1735538400,
|
||||
'upload_date': '20241230',
|
||||
},
|
||||
}]
|
||||
_API_BASE_URL = 'https://api.mave.digital/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id, slug = self._match_valid_url(url).group('channel', 'id')
|
||||
display_id = f'{channel_id}-{slug}'
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data = traverse_obj(
|
||||
self._search_nuxt_json(webpage, display_id),
|
||||
('data', lambda _, v: v['activeEpisodeData'], any, {require('podcast data')}))
|
||||
|
||||
return {
|
||||
'display_id': display_id,
|
||||
'channel_id': channel_id,
|
||||
'channel_url': f'https://{channel_id}.mave.digital/',
|
||||
'vcodec': 'none',
|
||||
'thumbnail': re.sub(r'_\d+(?=\.(?:jpg|png))', '', self._og_search_thumbnail(webpage, default='')) or None,
|
||||
**traverse_obj(data, ('activeEpisodeData', {
|
||||
'url': ('audio', {urljoin(self._API_BASE_URL)}),
|
||||
'id': ('id', {str}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {clean_html}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'season_number': ('season', {int_or_none}),
|
||||
'episode_number': ('number', {int_or_none}),
|
||||
'view_count': ('listenings', {int_or_none}),
|
||||
'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any),
|
||||
'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any),
|
||||
'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}),
|
||||
'timestamp': ('publish_date', {parse_iso8601}),
|
||||
})),
|
||||
**traverse_obj(data, ('podcast', 'podcast', {
|
||||
'series_id': ('id', {str}),
|
||||
'series': ('title', {str}),
|
||||
'channel': ('title', {str}),
|
||||
'uploader': ('author', {str}),
|
||||
})),
|
||||
}
|
37
yt_dlp/extractor/mir24tv.py
Normal file
37
yt_dlp/extractor/mir24tv.py
Normal file
@ -0,0 +1,37 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import parse_qs, url_or_none
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class Mir24TvIE(InfoExtractor):
|
||||
IE_NAME = 'mir24.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?mir24\.tv/news/(?P<id>[0-9]+)/[^/?#]+'
|
||||
_TESTS = [{
|
||||
'url': 'https://mir24.tv/news/16635210/dni-kultury-rossii-otkrylis-v-uzbekistane.-na-prazdnichnom-koncerte-vystupili-zvezdy-rossijskoj-estrada',
|
||||
'info_dict': {
|
||||
'id': '16635210',
|
||||
'title': 'Дни культуры России открылись в Узбекистане. На праздничном концерте выступили звезды российской эстрады',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:https://images\.mir24\.tv/.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id, impersonate=True)
|
||||
|
||||
iframe_url = self._search_regex(
|
||||
r'<iframe\b[^>]+\bsrc=["\'](https?://mir24\.tv/players/[^"\']+)',
|
||||
webpage, 'iframe URL')
|
||||
|
||||
m3u8_url = traverse_obj(iframe_url, (
|
||||
{parse_qs}, 'source', -1, {self._proto_relative_url}, {url_or_none}, {require('m3u8 URL')}))
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@ -1,53 +1,72 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class NewsPicksIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://newspicks\.com/movie-series/(?P<channel_id>\d+)\?movieId=(?P<id>\d+)'
|
||||
|
||||
_VALID_URL = r'https?://newspicks\.com/movie-series/(?P<id>[^?/#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://newspicks.com/movie-series/11?movieId=1813',
|
||||
'url': 'https://newspicks.com/movie-series/11/?movieId=1813',
|
||||
'info_dict': {
|
||||
'id': '1813',
|
||||
'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】',
|
||||
'description': 'md5:09397aad46d6ded6487ff13f138acadf',
|
||||
'channel': 'HORIE ONE',
|
||||
'channel_id': '11',
|
||||
'release_date': '20220117',
|
||||
'thumbnail': r're:https://.+jpg',
|
||||
'ext': 'mp4',
|
||||
'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】',
|
||||
'cast': 'count:4',
|
||||
'description': 'md5:09397aad46d6ded6487ff13f138acadf',
|
||||
'duration': 2940,
|
||||
'release_date': '20220117',
|
||||
'release_timestamp': 1642424400,
|
||||
'series': 'HORIE ONE',
|
||||
'series_id': '11',
|
||||
'thumbnail': r're:https?://resources\.newspicks\.com/.+\.(?:jpe?g|png)',
|
||||
'timestamp': 1642424420,
|
||||
'upload_date': '20220117',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://newspicks.com/movie-series/158/?movieId=3932',
|
||||
'info_dict': {
|
||||
'id': '3932',
|
||||
'ext': 'mp4',
|
||||
'title': '【検証】専門家は、KADOKAWAをどう見るか',
|
||||
'cast': 'count:3',
|
||||
'description': 'md5:2c2d4bf77484a4333ec995d676f9a91d',
|
||||
'duration': 1320,
|
||||
'release_date': '20240622',
|
||||
'release_timestamp': 1719088080,
|
||||
'series': 'NPレポート',
|
||||
'series_id': '158',
|
||||
'thumbnail': r're:https?://resources\.newspicks\.com/.+\.(?:jpe?g|png)',
|
||||
'timestamp': 1719086400,
|
||||
'upload_date': '20240622',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, channel_id = self._match_valid_url(url).group('id', 'channel_id')
|
||||
series_id = self._match_id(url)
|
||||
video_id = traverse_obj(parse_qs(url), ('movieId', -1, {str}, {require('movie ID')}))
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
entries = self._parse_html5_media_entries(
|
||||
url, webpage.replace('movie-for-pc', 'movie'), video_id, 'hls')
|
||||
if not entries:
|
||||
raise ExtractorError('No HTML5 media elements found')
|
||||
info = entries[0]
|
||||
|
||||
title = self._html_search_meta('og:title', webpage, fatal=False)
|
||||
description = self._html_search_meta(
|
||||
('og:description', 'twitter:title'), webpage, fatal=False)
|
||||
channel = self._html_search_regex(
|
||||
r'value="11".+?<div\s+class="title">(.+?)</div', webpage, 'channel name', fatal=False)
|
||||
if not title or not channel:
|
||||
title, channel = re.split(r'\s*|\s*', self._html_extract_title(webpage))
|
||||
fragment = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['fragment']
|
||||
m3u8_url = traverse_obj(fragment, ('movie', 'movieUrl', {url_or_none}, {require('m3u8 URL')}))
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
|
||||
|
||||
release_date = self._search_regex(
|
||||
r'<span\s+class="on-air-date">\s*(\d+)年(\d+)月(\d+)日\s*</span>',
|
||||
webpage, 'release date', fatal=False, group=(1, 2, 3))
|
||||
|
||||
info.update({
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'channel': channel,
|
||||
'channel_id': channel_id,
|
||||
'release_date': ('%04d%02d%02d' % tuple(map(int, release_date))) if release_date else None,
|
||||
})
|
||||
return info
|
||||
'formats': formats,
|
||||
'series': traverse_obj(fragment, ('series', 'title', {str})),
|
||||
'series_id': series_id,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(fragment, ('movie', {
|
||||
'title': ('title', {str}),
|
||||
'cast': ('relatedUsers', ..., 'displayName', {str}, filter, all, filter),
|
||||
'description': ('explanation', {clean_html}),
|
||||
'release_timestamp': ('onAirStartDate', {parse_iso8601}),
|
||||
'thumbnail': (('image', 'coverImageUrl'), {url_or_none}, any),
|
||||
'timestamp': ('published', {parse_iso8601}),
|
||||
})),
|
||||
}
|
||||
|
@ -8,6 +8,8 @@
|
||||
get_element_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
make_archive_id,
|
||||
orderedSet,
|
||||
parse_duration,
|
||||
remove_end,
|
||||
traverse_obj,
|
||||
@ -16,6 +18,7 @@
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
@ -495,7 +498,7 @@ def _real_extract(self, url):
|
||||
chapters = None
|
||||
if chapter_durations and chapter_titles and len(chapter_durations) == len(chapter_titles):
|
||||
start_time = chapter_durations
|
||||
end_time = chapter_durations[1:] + [duration]
|
||||
end_time = [*chapter_durations[1:], duration]
|
||||
chapters = [{
|
||||
'start_time': s,
|
||||
'end_time': e,
|
||||
@ -591,102 +594,179 @@ class NhkRadiruIE(InfoExtractor):
|
||||
IE_DESC = 'NHK らじる (Radiru/Rajiru)'
|
||||
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_4003239',
|
||||
'skip': 'Episode expired on 2024-06-09',
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=LG96ZW5KZ4_01_4251382',
|
||||
'skip': 'Episode expires on 2025-07-14',
|
||||
'info_dict': {
|
||||
'title': 'ジャズ・トゥナイト ジャズ「Night and Day」特集',
|
||||
'id': '0449_01_4003239',
|
||||
'title': 'クラシックの庭\u3000特集「ドボルザークを聴く」(1)交響曲を中心に',
|
||||
'id': 'LG96ZW5KZ4_01_4251382',
|
||||
'ext': 'm4a',
|
||||
'uploader': 'NHK FM 東京',
|
||||
'description': 'md5:ad05f3c3f3f6e99b2e69f9b5e49551dc',
|
||||
'series': 'ジャズ・トゥナイト',
|
||||
'channel': 'NHK FM 東京',
|
||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
|
||||
'upload_date': '20240601',
|
||||
'series_id': '0449_01',
|
||||
'release_date': '20240601',
|
||||
'timestamp': 1717257600,
|
||||
'release_timestamp': 1717250400,
|
||||
'description': 'md5:652d3c38a25b77959c716421eba1617a',
|
||||
'uploader': 'NHK FM・東京',
|
||||
'channel': 'NHK FM・東京',
|
||||
'duration': 6597.0,
|
||||
'thumbnail': 'https://www.nhk.jp/static/assets/images/radioseries/rs/LG96ZW5KZ4/LG96ZW5KZ4-eyecatch_a67c6e949325016c0724f2ed3eec8a2f.jpg',
|
||||
'categories': ['音楽', 'クラシック・オペラ'],
|
||||
'cast': ['田添菜穂子'],
|
||||
'series': 'クラシックの庭',
|
||||
'series_id': 'LG96ZW5KZ4',
|
||||
'episode': '特集「ドボルザークを聴く」(1)交響曲を中心に',
|
||||
'episode_id': 'QP1Q2ZXZY3',
|
||||
'timestamp': 1751871000,
|
||||
'upload_date': '20250707',
|
||||
'release_timestamp': 1751864403,
|
||||
'release_date': '20250707',
|
||||
},
|
||||
}, {
|
||||
# playlist, airs every weekday so it should _hopefully_ be okay forever
|
||||
'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=0458_01',
|
||||
'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=Z9L1V2M24L_01',
|
||||
'info_dict': {
|
||||
'id': '0458_01',
|
||||
'id': 'Z9L1V2M24L_01',
|
||||
'title': 'ベストオブクラシック',
|
||||
'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。',
|
||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg',
|
||||
'series_id': '0458_01',
|
||||
'thumbnail': 'https://www.nhk.jp/static/assets/images/radioseries/rs/Z9L1V2M24L/Z9L1V2M24L-eyecatch_83ed28b4782907998875965fee60a351.jpg',
|
||||
'series_id': 'Z9L1V2M24L_01',
|
||||
'uploader': 'NHK FM',
|
||||
'channel': 'NHK FM',
|
||||
'series': 'ベストオブクラシック',
|
||||
},
|
||||
'playlist_mincount': 3,
|
||||
}, {
|
||||
# one with letters in the id
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F683_01_3910688',
|
||||
'note': 'Expires on 2025-03-31',
|
||||
'info_dict': {
|
||||
'id': 'F683_01_3910688',
|
||||
'ext': 'm4a',
|
||||
'title': '夏目漱石「文鳥」第1回',
|
||||
'series': '【らじる文庫】夏目漱石「文鳥」(全4回)',
|
||||
'series_id': 'F683_01',
|
||||
'description': '朗読:浅井理アナウンサー',
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F683/img/roudoku_05_rod_640.jpg',
|
||||
'upload_date': '20240106',
|
||||
'release_date': '20240106',
|
||||
'uploader': 'NHK R1',
|
||||
'release_timestamp': 1704511800,
|
||||
'channel': 'NHK R1',
|
||||
'timestamp': 1704512700,
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata',
|
||||
'Failed to get extended metadata. API returned Error 1: Invalid parameters'],
|
||||
}, {
|
||||
# news
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_4012173',
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=18439M2W42_02_4251212',
|
||||
'skip': 'Expires on 2025-07-15',
|
||||
'info_dict': {
|
||||
'id': 'F261_01_4012173',
|
||||
'id': '18439M2W42_02_4251212',
|
||||
'ext': 'm4a',
|
||||
'channel': 'NHKラジオ第1',
|
||||
'title': 'マイあさ! 午前5時のNHKニュース 2025年7月8日',
|
||||
'uploader': 'NHKラジオ第1',
|
||||
'channel': 'NHKラジオ第1',
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/18439M2W42/img/series_945_thumbnail.jpg',
|
||||
'series': 'NHKラジオニュース',
|
||||
'title': '午前0時のNHKニュース',
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
|
||||
'release_timestamp': 1718290800,
|
||||
'release_date': '20240613',
|
||||
'timestamp': 1718291400,
|
||||
'upload_date': '20240613',
|
||||
'timestamp': 1751919420,
|
||||
'upload_date': '20250707',
|
||||
'release_timestamp': 1751918400,
|
||||
'release_date': '20250707',
|
||||
},
|
||||
}, {
|
||||
# fallback when extended metadata fails
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=2834_01_4009298',
|
||||
'skip': 'Expires on 2024-06-07',
|
||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=J8792PY43V_20_4253945',
|
||||
'skip': 'Expires on 2025-09-01',
|
||||
'info_dict': {
|
||||
'id': '2834_01_4009298',
|
||||
'title': 'まち☆キラ!開成町特集',
|
||||
'id': 'J8792PY43V_20_4253945',
|
||||
'ext': 'm4a',
|
||||
'release_date': '20240531',
|
||||
'upload_date': '20240531',
|
||||
'series': 'はま☆キラ!',
|
||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/2834/g2834.jpg',
|
||||
'channel': 'NHK R1,FM',
|
||||
'description': '',
|
||||
'timestamp': 1717123800,
|
||||
'uploader': 'NHK R1,FM',
|
||||
'release_timestamp': 1717120800,
|
||||
'series_id': '2834_01',
|
||||
'title': '「後絶たない筋肉増強剤の使用」ワールドリポート',
|
||||
'description': '大濱 敦(ソウル支局)',
|
||||
'uploader': 'NHK R1',
|
||||
'channel': 'NHK R1',
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/J8792PY43V/img/corner/box_31_thumbnail.jpg',
|
||||
'series': 'マイあさ! ワールドリポート',
|
||||
'series_id': 'J8792PY43V_20',
|
||||
'timestamp': 1751837100,
|
||||
'upload_date': '20250706',
|
||||
'release_timestamp': 1751835600,
|
||||
'release_date': '20250706',
|
||||
|
||||
},
|
||||
'expected_warnings': ['Failed to get extended metadata. API returned empty list.'],
|
||||
'expected_warnings': ['Failed to download extended metadata: HTTP Error 404: Not Found'],
|
||||
}]
|
||||
|
||||
_API_URL_TMPL = None
|
||||
|
||||
# The `_format_*` and `_make_*` functions are ported from: https://www.nhk.or.jp/radio/assets/js/timetable_detail_new.js
|
||||
|
||||
def _format_act_list(self, act_list):
|
||||
role_groups = {}
|
||||
for act in traverse_obj(act_list, (..., {dict})):
|
||||
role = act.get('role')
|
||||
if role not in role_groups:
|
||||
role_groups[role] = []
|
||||
role_groups[role].append(act)
|
||||
|
||||
formatted_roles = []
|
||||
for role, acts in role_groups.items():
|
||||
for i, act in enumerate(acts):
|
||||
res = f'【{role}】' if i == 0 and role is not None else ''
|
||||
if title := act.get('title'):
|
||||
res += f'{title}…'
|
||||
formatted_roles.append(join_nonempty(res, act.get('name'), delim=''))
|
||||
return join_nonempty(*formatted_roles, delim=',')
|
||||
|
||||
def _make_artists(self, track, key):
|
||||
artists = []
|
||||
for artist in traverse_obj(track, (key, ..., {dict})):
|
||||
if res := join_nonempty(*traverse_obj(artist, ((
|
||||
('role', filter, {'{}…'.format}),
|
||||
('part', filter, {'({})'.format}),
|
||||
('name', filter),
|
||||
), {str})), delim=''):
|
||||
artists.append(res)
|
||||
|
||||
return '、'.join(artists) or None
|
||||
|
||||
def _make_duration(self, track, key):
|
||||
d = traverse_obj(track, (key, {parse_duration}))
|
||||
if d is None:
|
||||
return None
|
||||
hours, remainder = divmod(d, 3600)
|
||||
minutes, seconds = divmod(remainder, 60)
|
||||
res = '('
|
||||
if hours > 0:
|
||||
res += f'{int(hours)}時間'
|
||||
if minutes > 0:
|
||||
res += f'{int(minutes)}分'
|
||||
res += f'{int(seconds):02}秒)'
|
||||
return res
|
||||
|
||||
def _format_music_list(self, music_list):
|
||||
tracks = []
|
||||
for track in traverse_obj(music_list, (..., {dict})):
|
||||
track_details = traverse_obj(track, ((
|
||||
('name', filter, {'「{}」'.format}),
|
||||
('lyricist', filter, {'{}:作詞'.format}),
|
||||
('composer', filter, {'{}:作曲'.format}),
|
||||
('arranger', filter, {'{}:編曲'.format}),
|
||||
), {str}))
|
||||
|
||||
track_details.append(self._make_artists(track, 'byArtist'))
|
||||
track_details.append(self._make_duration(track, 'duration'))
|
||||
|
||||
if label := join_nonempty('label', 'code', delim=' ', from_dict=track):
|
||||
track_details.append(f'<{label}>')
|
||||
if location := traverse_obj(track, ('location', {str})):
|
||||
track_details.append(f'~{location}~')
|
||||
tracks.append(join_nonempty(*track_details, delim='\n'))
|
||||
return '\n\n'.join(tracks)
|
||||
|
||||
def _format_description(self, response):
|
||||
detailed_description = traverse_obj(response, ('detailedDescription', {dict})) or {}
|
||||
return join_nonempty(
|
||||
join_nonempty('epg80', 'epg200', delim='\n\n', from_dict=detailed_description),
|
||||
traverse_obj(response, ('misc', 'actList', {self._format_act_list})),
|
||||
traverse_obj(response, ('misc', 'musicList', {self._format_music_list})),
|
||||
delim='\n\n')
|
||||
|
||||
def _get_thumbnails(self, data, keys, name=None, preference=-1):
|
||||
thumbnails = []
|
||||
for size, thumb in traverse_obj(data, (
|
||||
*variadic(keys, (str, bytes, dict, set)), {dict.items},
|
||||
lambda _, v: v[0] != 'copyright' and url_or_none(v[1]['url']),
|
||||
)):
|
||||
thumbnails.append({
|
||||
'url': thumb['url'],
|
||||
'width': int_or_none(thumb.get('width')),
|
||||
'height': int_or_none(thumb.get('height')),
|
||||
'preference': preference,
|
||||
'id': join_nonempty(name, size),
|
||||
})
|
||||
preference -= 1
|
||||
return thumbnails
|
||||
|
||||
def _extract_extended_metadata(self, episode_id, aa_vinfo):
|
||||
service, _, area = traverse_obj(aa_vinfo, (2, {str}, {lambda x: (x or '').partition(',')}))
|
||||
date_id = aa_vinfo[3]
|
||||
|
||||
detail_url = try_call(
|
||||
lambda: self._API_URL_TMPL.format(area=area, service=service, dateid=aa_vinfo[3]))
|
||||
lambda: self._API_URL_TMPL.format(broadcastEventId=join_nonempty(service, area, date_id)))
|
||||
if not detail_url:
|
||||
return {}
|
||||
|
||||
@ -699,36 +779,37 @@ def _extract_extended_metadata(self, episode_id, aa_vinfo):
|
||||
if error := traverse_obj(response, ('error', {dict})):
|
||||
self.report_warning(
|
||||
'Failed to get extended metadata. API returned '
|
||||
f'Error {join_nonempty("code", "message", from_dict=error, delim=": ")}')
|
||||
f'Error {join_nonempty("statuscode", "message", from_dict=error, delim=": ")}')
|
||||
return {}
|
||||
|
||||
full_meta = traverse_obj(response, ('list', service, 0, {dict}))
|
||||
if not full_meta:
|
||||
self.report_warning('Failed to get extended metadata. API returned empty list.')
|
||||
return {}
|
||||
station = traverse_obj(response, ('publishedOn', 'broadcastDisplayName', {str}))
|
||||
|
||||
station = ' '.join(traverse_obj(full_meta, (('service', 'area'), 'name', {str}))) or None
|
||||
thumbnails = [{
|
||||
'id': str(id_),
|
||||
'preference': 1 if id_.startswith('thumbnail') else -2 if id_.startswith('logo') else -1,
|
||||
**traverse_obj(thumb, {
|
||||
'url': 'url',
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
}),
|
||||
} for id_, thumb in traverse_obj(full_meta, ('images', {dict.items}, lambda _, v: v[1]['url']))]
|
||||
thumbnails = []
|
||||
thumbnails.extend(self._get_thumbnails(response, ('about', 'eyecatch')))
|
||||
for num, dct in enumerate(traverse_obj(response, ('about', 'eyecatchList', ...))):
|
||||
thumbnails.extend(self._get_thumbnails(dct, None, join_nonempty('list', num), -2))
|
||||
thumbnails.extend(
|
||||
self._get_thumbnails(response, ('about', 'partOfSeries', 'eyecatch'), 'series', -3))
|
||||
|
||||
return filter_dict({
|
||||
'description': self._format_description(response),
|
||||
'cast': traverse_obj(response, ('misc', 'actList', ..., 'name', {str})),
|
||||
'thumbnails': thumbnails,
|
||||
**traverse_obj(response, {
|
||||
'title': ('name', {str}),
|
||||
'timestamp': ('endDate', {unified_timestamp}),
|
||||
'release_timestamp': ('startDate', {unified_timestamp}),
|
||||
'duration': ('duration', {parse_duration}),
|
||||
}),
|
||||
**traverse_obj(response, ('identifierGroup', {
|
||||
'series': ('radioSeriesName', {str}),
|
||||
'series_id': ('radioSeriesId', {str}),
|
||||
'episode': ('radioEpisodeName', {str}),
|
||||
'episode_id': ('radioEpisodeId', {str}),
|
||||
'categories': ('genre', ..., ['name1', 'name2'], {str}, all, {orderedSet}),
|
||||
})),
|
||||
'channel': station,
|
||||
'uploader': station,
|
||||
'description': join_nonempty(
|
||||
'subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta),
|
||||
'thumbnails': thumbnails,
|
||||
**traverse_obj(full_meta, {
|
||||
'title': ('title', {str}),
|
||||
'timestamp': ('end_time', {unified_timestamp}),
|
||||
'release_timestamp': ('start_time', {unified_timestamp}),
|
||||
}),
|
||||
})
|
||||
|
||||
def _extract_episode_info(self, episode, programme_id, series_meta):
|
||||
@ -782,7 +863,9 @@ def _real_extract(self, url):
|
||||
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
|
||||
programme_id = f'{site_id}_{corner_id}'
|
||||
|
||||
if site_id == 'F261': # XXX: News programmes use old API (for now?)
|
||||
# XXX: News programmes use the old API
|
||||
# Can't move this to NhkRadioNewsPageIE because news items still use the normal URL format
|
||||
if site_id == '18439M2W42':
|
||||
meta = self._download_json(
|
||||
'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json', programme_id)['main']
|
||||
series_meta = traverse_obj(meta, {
|
||||
@ -843,8 +926,8 @@ class NhkRadioNewsPageIE(InfoExtractor):
|
||||
'url': 'https://www.nhk.or.jp/radionews/',
|
||||
'playlist_mincount': 5,
|
||||
'info_dict': {
|
||||
'id': 'F261_01',
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
|
||||
'id': '18439M2W42_01',
|
||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/18439M2W42/img/series_945_thumbnail.jpg',
|
||||
'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d',
|
||||
'channel': 'NHKラジオ第1',
|
||||
'uploader': 'NHKラジオ第1',
|
||||
@ -853,7 +936,7 @@ class NhkRadioNewsPageIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=F261_01', NhkRadiruIE)
|
||||
return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=18439M2W42_01', NhkRadiruIE)
|
||||
|
||||
|
||||
class NhkRadiruLiveIE(InfoExtractor):
|
||||
@ -863,11 +946,12 @@ class NhkRadiruLiveIE(InfoExtractor):
|
||||
# radio 1, no area specified
|
||||
'url': 'https://www.nhk.or.jp/radio/player/?ch=r1',
|
||||
'info_dict': {
|
||||
'id': 'r1-tokyo',
|
||||
'title': 're:^NHKネットラジオ第1 東京.+$',
|
||||
'id': 'bs-r1-130',
|
||||
'title': 're:^NHKラジオ第1・東京.+$',
|
||||
'ext': 'm4a',
|
||||
'thumbnail': 'https://www.nhk.or.jp/common/img/media/r1-200x200.png',
|
||||
'thumbnail': 'https://www.nhk.jp/assets/images/broadcastservice/bs/r1/r1-logo.svg',
|
||||
'live_status': 'is_live',
|
||||
'_old_archive_ids': ['nhkradirulive r1-tokyo'],
|
||||
},
|
||||
}, {
|
||||
# radio 2, area specified
|
||||
@ -875,26 +959,28 @@ class NhkRadiruLiveIE(InfoExtractor):
|
||||
'url': 'https://www.nhk.or.jp/radio/player/?ch=r2',
|
||||
'params': {'extractor_args': {'nhkradirulive': {'area': ['fukuoka']}}},
|
||||
'info_dict': {
|
||||
'id': 'r2-fukuoka',
|
||||
'title': 're:^NHKネットラジオ第2 福岡.+$',
|
||||
'id': 'bs-r2-400',
|
||||
'title': 're:^NHKラジオ第2.+$',
|
||||
'ext': 'm4a',
|
||||
'thumbnail': 'https://www.nhk.or.jp/common/img/media/r2-200x200.png',
|
||||
'thumbnail': 'https://www.nhk.jp/assets/images/broadcastservice/bs/r2/r2-logo.svg',
|
||||
'live_status': 'is_live',
|
||||
'_old_archive_ids': ['nhkradirulive r2-fukuoka'],
|
||||
},
|
||||
}, {
|
||||
# fm, area specified
|
||||
'url': 'https://www.nhk.or.jp/radio/player/?ch=fm',
|
||||
'params': {'extractor_args': {'nhkradirulive': {'area': ['sapporo']}}},
|
||||
'info_dict': {
|
||||
'id': 'fm-sapporo',
|
||||
'title': 're:^NHKネットラジオFM 札幌.+$',
|
||||
'id': 'bs-r3-010',
|
||||
'title': 're:^NHK FM・札幌.+$',
|
||||
'ext': 'm4a',
|
||||
'thumbnail': 'https://www.nhk.or.jp/common/img/media/fm-200x200.png',
|
||||
'thumbnail': 'https://www.nhk.jp/assets/images/broadcastservice/bs/r3/r3-logo.svg',
|
||||
'live_status': 'is_live',
|
||||
'_old_archive_ids': ['nhkradirulive fm-sapporo'],
|
||||
},
|
||||
}]
|
||||
|
||||
_NOA_STATION_IDS = {'r1': 'n1', 'r2': 'n2', 'fm': 'n3'}
|
||||
_NOA_STATION_IDS = {'r1': 'r1', 'r2': 'r2', 'fm': 'r3'}
|
||||
|
||||
def _real_extract(self, url):
|
||||
station = self._match_id(url)
|
||||
@ -911,12 +997,15 @@ def _real_extract(self, url):
|
||||
noa_info = self._download_json(
|
||||
f'https:{config.find(".//url_program_noa").text}'.format(area=data.find('areakey').text),
|
||||
station, note=f'Downloading {area} station metadata', fatal=False)
|
||||
present_info = traverse_obj(noa_info, ('nowonair_list', self._NOA_STATION_IDS.get(station), 'present'))
|
||||
broadcast_service = traverse_obj(noa_info, (self._NOA_STATION_IDS.get(station), 'publishedOn'))
|
||||
|
||||
return {
|
||||
'title': ' '.join(traverse_obj(present_info, (('service', 'area'), 'name', {str}))),
|
||||
'id': join_nonempty(station, area),
|
||||
'thumbnails': traverse_obj(present_info, ('service', 'images', ..., {
|
||||
**traverse_obj(broadcast_service, {
|
||||
'title': ('broadcastDisplayName', {str}),
|
||||
'id': ('id', {str}),
|
||||
}),
|
||||
'_old_archive_ids': [make_archive_id(self, join_nonempty(station, area))],
|
||||
'thumbnails': traverse_obj(broadcast_service, ('logo', ..., {
|
||||
'url': 'url',
|
||||
'width': ('width', {int_or_none}),
|
||||
'height': ('height', {int_or_none}),
|
||||
|
@ -4,16 +4,15 @@
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor, SearchInfoExtractor
|
||||
from ..networking import Request
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_bitrate,
|
||||
@ -22,9 +21,8 @@
|
||||
parse_qs,
|
||||
parse_resolution,
|
||||
qualities,
|
||||
remove_start,
|
||||
str_or_none,
|
||||
unescapeHTML,
|
||||
truncate_string,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
@ -32,7 +30,11 @@
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import find_element, require, traverse_obj
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
require,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class NiconicoBaseIE(InfoExtractor):
|
||||
@ -806,41 +808,39 @@ class NiconicoLiveIE(NiconicoBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id)
|
||||
webpage = self._download_webpage(url, video_id, expected_status=404)
|
||||
if err_msg := traverse_obj(webpage, ({find_element(cls='message')}, {clean_html})):
|
||||
raise ExtractorError(err_msg, expected=True)
|
||||
|
||||
embedded_data = self._parse_json(unescapeHTML(self._search_regex(
|
||||
r'<script\s+id="embedded-data"\s*data-props="(.+?)"', webpage, 'embedded data')), video_id)
|
||||
|
||||
ws_url = traverse_obj(embedded_data, ('site', 'relive', 'webSocketUrl'))
|
||||
if not ws_url:
|
||||
raise ExtractorError('The live hasn\'t started yet or already ended.', expected=True)
|
||||
ws_url = update_url_query(ws_url, {
|
||||
'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9',
|
||||
})
|
||||
|
||||
hostname = remove_start(urllib.parse.urlparse(urlh.url).hostname, 'sp.')
|
||||
embedded_data = traverse_obj(webpage, (
|
||||
{find_element(tag='script', id='embedded-data', html=True)},
|
||||
{extract_attributes}, 'data-props', {json.loads}))
|
||||
frontend_id = traverse_obj(embedded_data, ('site', 'frontendId', {str_or_none}), default='9')
|
||||
|
||||
ws_url = traverse_obj(embedded_data, (
|
||||
'site', 'relive', 'webSocketUrl', {url_or_none}, {require('websocket URL')}))
|
||||
ws_url = update_url_query(ws_url, {'frontend_id': frontend_id})
|
||||
ws = self._request_webpage(
|
||||
Request(ws_url, headers={'Origin': f'https://{hostname}'}),
|
||||
video_id=video_id, note='Connecting to WebSocket server')
|
||||
ws_url, video_id, 'Connecting to WebSocket server',
|
||||
headers={'Origin': 'https://live.nicovideo.jp'})
|
||||
|
||||
self.write_debug('Sending HLS server request')
|
||||
ws.send(json.dumps({
|
||||
'type': 'startWatching',
|
||||
'data': {
|
||||
'reconnect': False,
|
||||
'room': {
|
||||
'commentable': True,
|
||||
'protocol': 'webSocket',
|
||||
},
|
||||
'stream': {
|
||||
'quality': 'abr',
|
||||
'protocol': 'hls',
|
||||
'latency': 'high',
|
||||
'accessRightMethod': 'single_cookie',
|
||||
'chasePlay': False,
|
||||
'latency': 'high',
|
||||
'protocol': 'hls',
|
||||
'quality': 'abr',
|
||||
},
|
||||
'room': {
|
||||
'protocol': 'webSocket',
|
||||
'commentable': True,
|
||||
},
|
||||
'reconnect': False,
|
||||
},
|
||||
'type': 'startWatching',
|
||||
}))
|
||||
|
||||
while True:
|
||||
@ -860,17 +860,15 @@ def _real_extract(self, url):
|
||||
raise ExtractorError('Disconnected at middle of extraction')
|
||||
elif data.get('type') == 'error':
|
||||
self.write_debug(recv)
|
||||
message = traverse_obj(data, ('body', 'code')) or recv
|
||||
message = traverse_obj(data, ('body', 'code', {str_or_none}), default=recv)
|
||||
raise ExtractorError(message)
|
||||
elif self.get_param('verbose', False):
|
||||
if len(recv) > 100:
|
||||
recv = recv[:100] + '...'
|
||||
self.write_debug(f'Server said: {recv}')
|
||||
self.write_debug(f'Server response: {truncate_string(recv, 100)}')
|
||||
|
||||
title = traverse_obj(embedded_data, ('program', 'title')) or self._html_search_meta(
|
||||
('og:title', 'twitter:title'), webpage, 'live title', fatal=False)
|
||||
|
||||
raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail')) or {}
|
||||
raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail', {dict})) or {}
|
||||
thumbnails = []
|
||||
for name, value in raw_thumbs.items():
|
||||
if not isinstance(value, dict):
|
||||
@ -897,31 +895,30 @@ def _real_extract(self, url):
|
||||
cookie['domain'], cookie['name'], cookie['value'],
|
||||
expire_time=unified_timestamp(cookie.get('expires')), path=cookie['path'], secure=cookie['secure'])
|
||||
|
||||
fmt_common = {
|
||||
'live_latency': 'high',
|
||||
'origin': hostname,
|
||||
'protocol': 'niconico_live',
|
||||
'video_id': video_id,
|
||||
'ws': ws,
|
||||
}
|
||||
q_iter = (q for q in qualities[1:] if not q.startswith('audio_')) # ignore initial 'abr'
|
||||
a_map = {96: 'audio_low', 192: 'audio_high'}
|
||||
|
||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
|
||||
for fmt in formats:
|
||||
fmt['protocol'] = 'niconico_live'
|
||||
if fmt.get('acodec') == 'none':
|
||||
fmt['format_id'] = next(q_iter, fmt['format_id'])
|
||||
elif fmt.get('vcodec') == 'none':
|
||||
abr = parse_bitrate(fmt['url'].lower())
|
||||
fmt.update({
|
||||
'abr': abr,
|
||||
'acodec': 'mp4a.40.2',
|
||||
'format_id': a_map.get(abr, fmt['format_id']),
|
||||
})
|
||||
fmt.update(fmt_common)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'downloader_options': {
|
||||
'max_quality': traverse_obj(embedded_data, ('program', 'stream', 'maxQuality', {str})) or 'normal',
|
||||
'ws': ws,
|
||||
'ws_url': ws_url,
|
||||
},
|
||||
**traverse_obj(embedded_data, {
|
||||
'view_count': ('program', 'statistics', 'watchCount'),
|
||||
'comment_count': ('program', 'statistics', 'commentCount'),
|
||||
|
@ -1,6 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
@ -61,10 +60,10 @@ def _real_extract(self, url):
|
||||
post = self._download_json(
|
||||
'https://9gag.com/v1/post', post_id, query={
|
||||
'id': post_id,
|
||||
})['data']['post']
|
||||
}, impersonate=True)['data']['post']
|
||||
|
||||
if post.get('type') != 'Animated':
|
||||
raise ExtractorError(
|
||||
self.raise_no_formats(
|
||||
'The given url does not contain a video',
|
||||
expected=True)
|
||||
|
||||
|
41
yt_dlp/extractor/sauceplus.py
Normal file
41
yt_dlp/extractor/sauceplus.py
Normal file
@ -0,0 +1,41 @@
|
||||
from .floatplane import FloatplaneBaseIE
|
||||
|
||||
|
||||
class SaucePlusIE(FloatplaneBaseIE):
|
||||
IE_DESC = 'Sauce+'
|
||||
_VALID_URL = r'https?://(?:(?:www|beta)\.)?sauceplus\.com/post/(?P<id>\w+)'
|
||||
_BASE_URL = 'https://www.sauceplus.com'
|
||||
_HEADERS = {
|
||||
'Origin': _BASE_URL,
|
||||
'Referer': f'{_BASE_URL}/',
|
||||
}
|
||||
_IMPERSONATE_TARGET = True
|
||||
_TESTS = [{
|
||||
'url': 'https://www.sauceplus.com/post/YbBwIa2A5g',
|
||||
'info_dict': {
|
||||
'id': 'eit4Ugu5TL',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'YbBwIa2A5g',
|
||||
'title': 'Scare the Coyote - Episode 3',
|
||||
'description': '',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'duration': 2975,
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'release_date': '20250627',
|
||||
'release_timestamp': 1750993500,
|
||||
'uploader': 'Scare The Coyote',
|
||||
'uploader_id': '683e0a3269688656a5a49a44',
|
||||
'uploader_url': 'https://www.sauceplus.com/channel/ScareTheCoyote/home',
|
||||
'channel': 'Scare The Coyote',
|
||||
'channel_id': '683e0a326968866ceba49a45',
|
||||
'channel_url': 'https://www.sauceplus.com/channel/ScareTheCoyote/home/main',
|
||||
'availability': 'subscriber_only',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._get_cookies(self._BASE_URL).get('__Host-sp-sess'):
|
||||
self.raise_login_required()
|
@ -213,7 +213,7 @@ class CieloTVItIE(SkyItIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
class TV8ItIE(SkyItVideoIE): # XXX: Do not subclass from concrete IE
|
||||
IE_NAME = 'tv8.it'
|
||||
_VALID_URL = r'https?://(?:www\.)?tv8\.it/(?:show)?video/[0-9a-z-]+-(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?tv8\.it/(?:show)?video/(?:[0-9a-z-]+-)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tv8.it/video/ogni-mattina-ucciso-asino-di-andrea-lo-cicero-630529',
|
||||
'md5': '9ab906a3f75ea342ed928442f9dabd21',
|
||||
@ -227,6 +227,19 @@ class TV8ItIE(SkyItVideoIE): # XXX: Do not subclass from concrete IE
|
||||
'thumbnail': 'https://videoplatform.sky.it/still/2020/11/18/1605717753954_ogni-mattina-ucciso-asino-di-andrea-lo-cicero_videostill_1.jpg',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.tv8.it/video/964361',
|
||||
'md5': '1e58e807154658a16edc29e45be38107',
|
||||
'info_dict': {
|
||||
'id': '964361',
|
||||
'ext': 'mp4',
|
||||
'title': 'GialappaShow - S.4 Ep.2',
|
||||
'description': 'md5:60bb4ff5af18bbeeaedabc1de5f9e1e2',
|
||||
'duration': 8030,
|
||||
'thumbnail': 'https://videoplatform.sky.it/captures/494/2024/11/06/964361/964361_1730888412914_thumb_494.jpg',
|
||||
'timestamp': 1730821499,
|
||||
'upload_date': '20241105',
|
||||
},
|
||||
}]
|
||||
_DOMAIN = 'mtv8'
|
||||
|
||||
|
@ -25,6 +25,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'upload_date': '20230114',
|
||||
'timestamp': 1673733618,
|
||||
},
|
||||
'skip': 'not found',
|
||||
}, {
|
||||
'url': 'https://sportdeutschland.tv/deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0',
|
||||
'info_dict': {
|
||||
@ -41,6 +42,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'upload_date': '20220309',
|
||||
'timestamp': 1646860727.0,
|
||||
},
|
||||
'skip': 'not found',
|
||||
}, {
|
||||
'url': 'https://sportdeutschland.tv/ggcbremen/formationswochenende-latein-2023',
|
||||
'info_dict': {
|
||||
@ -68,6 +70,7 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}],
|
||||
'skip': 'not found',
|
||||
}, {
|
||||
'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1',
|
||||
'info_dict': {
|
||||
@ -82,13 +85,30 @@ class SportDeutschlandIE(InfoExtractor):
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'skip': 'live',
|
||||
}, {
|
||||
'url': 'https://sportdeutschland.tv/rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates',
|
||||
'md5': '35c11a19395c938cdd076b93bda54cde',
|
||||
'info_dict': {
|
||||
'id': '9f27a97d-1544-4d0b-aa03-48d92d17a03a',
|
||||
'ext': 'mp4',
|
||||
'title': 'GFL2: Rostock Griffins vs. Elmshorn Fighting Pirates',
|
||||
'display_id': 'rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates',
|
||||
'channel': 'Rostock Griffins',
|
||||
'channel_url': 'https://sportdeutschland.tv/rostock-griffins',
|
||||
'live_status': 'was_live',
|
||||
'description': 'md5:60cb00067e55dafa27b0933a43d72862',
|
||||
'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b',
|
||||
'timestamp': 1749913117,
|
||||
'upload_date': '20250614',
|
||||
},
|
||||
}]
|
||||
|
||||
def _process_video(self, asset_id, video):
|
||||
is_live = video['type'] == 'mux_live'
|
||||
token = self._download_json(
|
||||
f'https://api.sportdeutschland.tv/api/frontend/asset-token/{asset_id}',
|
||||
video['id'], query={'type': video['type'], 'playback_id': video['src']})['token']
|
||||
f'https://api.sportdeutschland.tv/api/web/personal/asset-token/{asset_id}',
|
||||
video['id'], query={'type': video['type'], 'playback_id': video['src']},
|
||||
headers={'Referer': 'https://sportdeutschland.tv/'})['token']
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
f'https://stream.mux.com/{video["src"]}.m3u8?token={token}', video['id'], live=is_live)
|
||||
|
||||
|
@ -41,6 +41,7 @@ class SproutVideoIE(InfoExtractor):
|
||||
'duration': 703,
|
||||
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||
},
|
||||
'skip': 'Account Disabled',
|
||||
}, {
|
||||
# http formats 'sd' and 'hd' are available
|
||||
'url': 'https://videos.sproutvideo.com/embed/119cd6bc1a18e6cd98/30751a1761ae5b90',
|
||||
@ -98,10 +99,17 @@ def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}))
|
||||
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}), impersonate=True)
|
||||
data = self._search_json(
|
||||
r'var\s+dat\s*=\s*["\']', webpage, 'data', video_id, contains_pattern=r'[A-Za-z0-9+/=]+',
|
||||
end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode())
|
||||
r'(?:var|const|let)\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id,
|
||||
contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];',
|
||||
transform_source=lambda x: base64.b64decode(x).decode())
|
||||
|
||||
# SproutVideo may send player info for 'SMPTE Color Monitor Test' [a791d7b71b12ecc52e]
|
||||
# e.g. if the user-agent we used with the webpage request is too old
|
||||
video_uid = data['videoUid']
|
||||
if video_id != video_uid:
|
||||
raise ExtractorError(f'{self.IE_NAME} sent the wrong video data ({video_uid})')
|
||||
|
||||
formats, subtitles = [], {}
|
||||
headers = {
|
||||
|
@ -6,6 +6,7 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
UserNotLive,
|
||||
@ -188,19 +189,39 @@ def _get_thumbnails(self, thumbnail):
|
||||
}] if thumbnail else None
|
||||
|
||||
def _extract_twitch_m3u8_formats(self, path, video_id, token, signature, live_from_start=False):
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'{self._USHER_BASE}/{path}/{video_id}.m3u8', video_id, 'mp4', query={
|
||||
'allow_source': 'true',
|
||||
'allow_audio_only': 'true',
|
||||
'allow_spectre': 'true',
|
||||
'p': random.randint(1000000, 10000000),
|
||||
'platform': 'web',
|
||||
'player': 'twitchweb',
|
||||
'supported_codecs': 'av1,h265,h264',
|
||||
'playlist_include_framerate': 'true',
|
||||
'sig': signature,
|
||||
'token': token,
|
||||
})
|
||||
try:
|
||||
formats = self._extract_m3u8_formats(
|
||||
f'{self._USHER_BASE}/{path}/{video_id}.m3u8', video_id, 'mp4', query={
|
||||
'allow_source': 'true',
|
||||
'allow_audio_only': 'true',
|
||||
'allow_spectre': 'true',
|
||||
'p': random.randint(1000000, 10000000),
|
||||
'platform': 'web',
|
||||
'player': 'twitchweb',
|
||||
'supported_codecs': 'av1,h265,h264',
|
||||
'playlist_include_framerate': 'true',
|
||||
'sig': signature,
|
||||
'token': token,
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if (
|
||||
not isinstance(e.cause, HTTPError)
|
||||
or e.cause.status != 403
|
||||
or e.cause.response.get_header('content-type') != 'application/json'
|
||||
):
|
||||
raise
|
||||
|
||||
error_info = traverse_obj(e.cause.response.read(), ({json.loads}, 0, {dict})) or {}
|
||||
if error_info.get('error_code') in ('vod_manifest_restricted', 'unauthorized_entitlements'):
|
||||
common_msg = 'access to this subscriber-only content'
|
||||
if self._get_cookies('https://gql.twitch.tv').get('auth-token'):
|
||||
raise ExtractorError(f'Your account does not have {common_msg}', expected=True)
|
||||
self.raise_login_required(f'You must be logged into an account that has {common_msg}')
|
||||
|
||||
if error_msg := join_nonempty('error_code', 'error', from_dict=error_info, delim=': '):
|
||||
raise ExtractorError(error_msg, expected=True)
|
||||
raise
|
||||
|
||||
for fmt in formats:
|
||||
if fmt.get('vcodec') and fmt['vcodec'].startswith('av01'):
|
||||
# mpegts does not yet have proper support for av1
|
||||
|
@ -1,5 +1,6 @@
|
||||
import calendar
|
||||
import copy
|
||||
import dataclasses
|
||||
import datetime as dt
|
||||
import enum
|
||||
import functools
|
||||
@ -38,6 +39,60 @@ class _PoTokenContext(enum.Enum):
|
||||
SUBS = 'subs'
|
||||
|
||||
|
||||
class StreamingProtocol(enum.Enum):
|
||||
HTTPS = 'https'
|
||||
DASH = 'dash'
|
||||
HLS = 'hls'
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class BasePoTokenPolicy:
|
||||
required: bool = False
|
||||
# Try to fetch a PO Token even if it is not required.
|
||||
recommended: bool = False
|
||||
not_required_for_premium: bool = False
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class GvsPoTokenPolicy(BasePoTokenPolicy):
|
||||
not_required_with_player_token: bool = False
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PlayerPoTokenPolicy(BasePoTokenPolicy):
|
||||
pass
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class SubsPoTokenPolicy(BasePoTokenPolicy):
|
||||
pass
|
||||
|
||||
|
||||
WEB_PO_TOKEN_POLICIES = {
|
||||
'GVS_PO_TOKEN_POLICY': {
|
||||
StreamingProtocol.HTTPS: GvsPoTokenPolicy(
|
||||
required=True,
|
||||
recommended=True,
|
||||
not_required_for_premium=True,
|
||||
not_required_with_player_token=False,
|
||||
),
|
||||
StreamingProtocol.DASH: GvsPoTokenPolicy(
|
||||
required=True,
|
||||
recommended=True,
|
||||
not_required_for_premium=True,
|
||||
not_required_with_player_token=False,
|
||||
),
|
||||
StreamingProtocol.HLS: GvsPoTokenPolicy(
|
||||
required=False,
|
||||
recommended=True,
|
||||
),
|
||||
},
|
||||
'PLAYER_PO_TOKEN_POLICY': PlayerPoTokenPolicy(required=False),
|
||||
# In rollout, currently detected via experiment
|
||||
# Premium users DO require a PO Token for subtitles
|
||||
'SUBS_PO_TOKEN_POLICY': SubsPoTokenPolicy(required=False),
|
||||
}
|
||||
|
||||
# any clients starting with _ cannot be explicitly requested by the user
|
||||
INNERTUBE_CLIENTS = {
|
||||
'web': {
|
||||
@ -48,8 +103,9 @@ class _PoTokenContext(enum.Enum):
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
|
||||
'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
|
||||
'SUPPORTS_COOKIES': True,
|
||||
**WEB_PO_TOKEN_POLICIES,
|
||||
'PLAYER_PARAMS': '8AEB',
|
||||
},
|
||||
# Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
|
||||
'web_safari': {
|
||||
@ -61,8 +117,9 @@ class _PoTokenContext(enum.Enum):
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
|
||||
'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
|
||||
'SUPPORTS_COOKIES': True,
|
||||
**WEB_PO_TOKEN_POLICIES,
|
||||
'PLAYER_PARAMS': '8AEB',
|
||||
},
|
||||
'web_embedded': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
@ -83,7 +140,24 @@ class _PoTokenContext(enum.Enum):
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
|
||||
'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
|
||||
'GVS_PO_TOKEN_POLICY': {
|
||||
StreamingProtocol.HTTPS: GvsPoTokenPolicy(
|
||||
required=True,
|
||||
recommended=True,
|
||||
not_required_for_premium=True,
|
||||
not_required_with_player_token=False,
|
||||
),
|
||||
StreamingProtocol.DASH: GvsPoTokenPolicy(
|
||||
required=True,
|
||||
recommended=True,
|
||||
not_required_for_premium=True,
|
||||
not_required_with_player_token=False,
|
||||
),
|
||||
StreamingProtocol.HLS: GvsPoTokenPolicy(
|
||||
required=False,
|
||||
recommended=True,
|
||||
),
|
||||
},
|
||||
'SUPPORTS_COOKIES': True,
|
||||
},
|
||||
# This client now requires sign-in for every video
|
||||
@ -95,7 +169,24 @@ class _PoTokenContext(enum.Enum):
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
|
||||
'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
|
||||
'GVS_PO_TOKEN_POLICY': {
|
||||
StreamingProtocol.HTTPS: GvsPoTokenPolicy(
|
||||
required=True,
|
||||
recommended=True,
|
||||
not_required_for_premium=True,
|
||||
not_required_with_player_token=False,
|
||||
),
|
||||
StreamingProtocol.DASH: GvsPoTokenPolicy(
|
||||
required=True,
|
||||
recommended=True,
|
||||
not_required_for_premium=True,
|
||||
not_required_with_player_token=False,
|
||||
),
|
||||
StreamingProtocol.HLS: GvsPoTokenPolicy(
|
||||
required=False,
|
||||
recommended=True,
|
||||
),
|
||||
},
|
||||
'REQUIRE_AUTH': True,
|
||||
'SUPPORTS_COOKIES': True,
|
||||
},
|
||||
@ -112,7 +203,24 @@ class _PoTokenContext(enum.Enum):
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
|
||||
'REQUIRE_JS_PLAYER': False,
|
||||
'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
|
||||
'GVS_PO_TOKEN_POLICY': {
|
||||
StreamingProtocol.HTTPS: GvsPoTokenPolicy(
|
||||
required=True,
|
||||
recommended=True,
|
||||
not_required_with_player_token=True,
|
||||
),
|
||||
StreamingProtocol.DASH: GvsPoTokenPolicy(
|
||||
required=True,
|
||||
recommended=True,
|
||||
not_required_with_player_token=True,
|
||||
),
|
||||
StreamingProtocol.HLS: GvsPoTokenPolicy(
|
||||
required=False,
|
||||
recommended=True,
|
||||
not_required_with_player_token=True,
|
||||
),
|
||||
},
|
||||
'PLAYER_PO_TOKEN_POLICY': PlayerPoTokenPolicy(required=False, recommended=True),
|
||||
},
|
||||
# YouTube Kids videos aren't returned on this client for some reason
|
||||
'android_vr': {
|
||||
@ -146,7 +254,21 @@ class _PoTokenContext(enum.Enum):
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
|
||||
'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
|
||||
'GVS_PO_TOKEN_POLICY': {
|
||||
StreamingProtocol.HTTPS: GvsPoTokenPolicy(
|
||||
required=True,
|
||||
recommended=True,
|
||||
not_required_with_player_token=True,
|
||||
),
|
||||
# HLS Livestreams require POT 30 seconds in
|
||||
# TODO: Rolling out
|
||||
StreamingProtocol.HLS: GvsPoTokenPolicy(
|
||||
required=False,
|
||||
recommended=True,
|
||||
not_required_with_player_token=True,
|
||||
),
|
||||
},
|
||||
'PLAYER_PO_TOKEN_POLICY': PlayerPoTokenPolicy(required=False, recommended=True),
|
||||
'REQUIRE_JS_PLAYER': False,
|
||||
},
|
||||
# mweb has 'ultralow' formats
|
||||
@ -161,7 +283,24 @@ class _PoTokenContext(enum.Enum):
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
|
||||
'PO_TOKEN_REQUIRED_CONTEXTS': [_PoTokenContext.GVS],
|
||||
'GVS_PO_TOKEN_POLICY': {
|
||||
StreamingProtocol.HTTPS: GvsPoTokenPolicy(
|
||||
required=True,
|
||||
recommended=True,
|
||||
not_required_for_premium=True,
|
||||
not_required_with_player_token=False,
|
||||
),
|
||||
StreamingProtocol.DASH: GvsPoTokenPolicy(
|
||||
required=True,
|
||||
recommended=True,
|
||||
not_required_for_premium=True,
|
||||
not_required_with_player_token=False,
|
||||
),
|
||||
StreamingProtocol.HLS: GvsPoTokenPolicy(
|
||||
required=False,
|
||||
recommended=True,
|
||||
),
|
||||
},
|
||||
'SUPPORTS_COOKIES': True,
|
||||
},
|
||||
'tv': {
|
||||
@ -174,6 +313,7 @@ class _PoTokenContext(enum.Enum):
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
|
||||
'SUPPORTS_COOKIES': True,
|
||||
'PLAYER_PARAMS': '8AEB',
|
||||
},
|
||||
'tv_simply': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
@ -224,7 +364,11 @@ def build_innertube_clients():
|
||||
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
|
||||
ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
|
||||
ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
|
||||
ytcfg.setdefault('PO_TOKEN_REQUIRED_CONTEXTS', [])
|
||||
ytcfg.setdefault('GVS_PO_TOKEN_POLICY', {})
|
||||
for protocol in StreamingProtocol:
|
||||
ytcfg['GVS_PO_TOKEN_POLICY'].setdefault(protocol, GvsPoTokenPolicy())
|
||||
ytcfg.setdefault('PLAYER_PO_TOKEN_POLICY', PlayerPoTokenPolicy())
|
||||
ytcfg.setdefault('SUBS_PO_TOKEN_POLICY', SubsPoTokenPolicy())
|
||||
ytcfg.setdefault('REQUIRE_AUTH', False)
|
||||
ytcfg.setdefault('SUPPORTS_COOKIES', False)
|
||||
ytcfg.setdefault('PLAYER_PARAMS', None)
|
||||
|
@ -317,17 +317,31 @@ def _extract_lockup_view_model(self, view_model):
|
||||
content_id = view_model.get('contentId')
|
||||
if not content_id:
|
||||
return
|
||||
|
||||
content_type = view_model.get('contentType')
|
||||
if content_type not in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'):
|
||||
if content_type == 'LOCKUP_CONTENT_TYPE_VIDEO':
|
||||
ie = YoutubeIE
|
||||
url = f'https://www.youtube.com/watch?v={content_id}'
|
||||
thumb_keys = (None,)
|
||||
elif content_type in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'):
|
||||
ie = YoutubeTabIE
|
||||
url = f'https://www.youtube.com/playlist?list={content_id}'
|
||||
thumb_keys = ('collectionThumbnailViewModel', 'primaryThumbnail')
|
||||
else:
|
||||
self.report_warning(
|
||||
f'Unsupported lockup view model content type "{content_type}"{bug_reports_message()}', only_once=True)
|
||||
f'Unsupported lockup view model content type "{content_type}"{bug_reports_message()}',
|
||||
only_once=True)
|
||||
return
|
||||
|
||||
return self.url_result(
|
||||
f'https://www.youtube.com/playlist?list={content_id}', ie=YoutubeTabIE, video_id=content_id,
|
||||
url, ie, content_id,
|
||||
title=traverse_obj(view_model, (
|
||||
'metadata', 'lockupMetadataViewModel', 'title', 'content', {str})),
|
||||
thumbnails=self._extract_thumbnails(view_model, (
|
||||
'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail', 'thumbnailViewModel', 'image'), final_key='sources'))
|
||||
'contentImage', *thumb_keys, 'thumbnailViewModel', 'image'), final_key='sources'),
|
||||
duration=traverse_obj(view_model, (
|
||||
'contentImage', 'thumbnailViewModel', 'overlays', ..., 'thumbnailOverlayBadgeViewModel',
|
||||
'thumbnailBadges', ..., 'thumbnailBadgeViewModel', 'text', {parse_duration}, any)))
|
||||
|
||||
def _rich_entries(self, rich_grid_renderer):
|
||||
if lockup_view_model := traverse_obj(rich_grid_renderer, ('content', 'lockupViewModel', {dict})):
|
||||
|
@ -18,6 +18,9 @@
|
||||
from ._base import (
|
||||
INNERTUBE_CLIENTS,
|
||||
BadgeType,
|
||||
GvsPoTokenPolicy,
|
||||
PlayerPoTokenPolicy,
|
||||
StreamingProtocol,
|
||||
YoutubeBaseInfoExtractor,
|
||||
_PoTokenContext,
|
||||
_split_innertube_client,
|
||||
@ -26,7 +29,7 @@
|
||||
from .pot._director import initialize_pot_director
|
||||
from .pot.provider import PoTokenContext, PoTokenRequest
|
||||
from ..openload import PhantomJSwrapper
|
||||
from ...jsinterp import JSInterpreter
|
||||
from ...jsinterp import JSInterpreter, LocalNameSpace
|
||||
from ...networking.exceptions import HTTPError
|
||||
from ...utils import (
|
||||
NO_DEFAULT,
|
||||
@ -71,9 +74,11 @@
|
||||
from ...utils.networking import clean_headers, clean_proxies, select_proxy
|
||||
|
||||
STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
|
||||
STREAMING_DATA_INITIAL_PO_TOKEN = '__yt_dlp_po_token'
|
||||
STREAMING_DATA_FETCH_SUBS_PO_TOKEN = '__yt_dlp_fetch_subs_po_token'
|
||||
STREAMING_DATA_FETCH_GVS_PO_TOKEN = '__yt_dlp_fetch_gvs_po_token'
|
||||
STREAMING_DATA_PLAYER_TOKEN_PROVIDED = '__yt_dlp_player_token_provided'
|
||||
STREAMING_DATA_INNERTUBE_CONTEXT = '__yt_dlp_innertube_context'
|
||||
STREAMING_DATA_IS_PREMIUM_SUBSCRIBER = '__yt_dlp_is_premium_subscriber'
|
||||
|
||||
PO_TOKEN_GUIDE_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/PO-Token-Guide'
|
||||
|
||||
@ -253,6 +258,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'srt', 'vtt')
|
||||
_DEFAULT_CLIENTS = ('tv', 'ios', 'web')
|
||||
_DEFAULT_AUTHED_CLIENTS = ('tv', 'web')
|
||||
# Premium does not require POT (except for subtitles)
|
||||
_DEFAULT_PREMIUM_CLIENTS = ('tv', 'web')
|
||||
|
||||
_GEO_BYPASS = False
|
||||
|
||||
@ -1801,6 +1808,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'tablet': 'player-plasma-ias-tablet-en_US.vflset/base.js',
|
||||
}
|
||||
_INVERSE_PLAYER_JS_VARIANT_MAP = {v: k for k, v in _PLAYER_JS_VARIANT_MAP.items()}
|
||||
_NSIG_FUNC_CACHE_ID = 'nsig func'
|
||||
_DUMMY_STRING = 'dlp_wins'
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
@ -1831,7 +1840,8 @@ def refetch_manifest(format_id, delay):
|
||||
if time.time() <= start_time + delay:
|
||||
return
|
||||
|
||||
_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
|
||||
_, _, _, _, prs, player_url = self._initial_extract(
|
||||
url, smuggled_data, webpage_url, 'web', video_id)
|
||||
video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
|
||||
microformats = traverse_obj(
|
||||
prs, (..., 'microformat', 'playerMicroformatRenderer'),
|
||||
@ -2204,7 +2214,7 @@ def _decrypt_nsig(self, s, video_id, player_url):
|
||||
self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
|
||||
|
||||
try:
|
||||
extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
|
||||
extract_nsig = self._cached(self._extract_n_function_from_code, self._NSIG_FUNC_CACHE_ID, player_url)
|
||||
ret = extract_nsig(jsi, func_code)(s)
|
||||
except JSInterpreter.Exception as e:
|
||||
try:
|
||||
@ -2312,16 +2322,18 @@ def _interpret_player_js_global_var(self, jscode, player_url):
|
||||
|
||||
jsi = JSInterpreter(varcode)
|
||||
interpret_global_var = self._cached(jsi.interpret_expression, 'js global list', player_url)
|
||||
return varname, interpret_global_var(varvalue, {}, allow_recursion=10)
|
||||
return varname, interpret_global_var(varvalue, LocalNameSpace(), allow_recursion=10)
|
||||
|
||||
def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
|
||||
# Fixup global array
|
||||
varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
|
||||
if varname and global_list:
|
||||
nsig_code = f'var {varname}={json.dumps(global_list)}; {nsig_code}'
|
||||
else:
|
||||
varname = 'dlp_wins'
|
||||
varname = self._DUMMY_STRING
|
||||
global_list = []
|
||||
|
||||
# Fixup typeof check
|
||||
undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+'
|
||||
fixed_code = re.sub(
|
||||
fr'''(?x)
|
||||
@ -2334,6 +2346,32 @@ def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
|
||||
self.write_debug(join_nonempty(
|
||||
'No typeof statement found in nsig function code',
|
||||
player_url and f' player = {player_url}', delim='\n'), only_once=True)
|
||||
|
||||
# Fixup global funcs
|
||||
jsi = JSInterpreter(fixed_code)
|
||||
cache_id = (self._NSIG_FUNC_CACHE_ID, player_url)
|
||||
try:
|
||||
self._cached(
|
||||
self._extract_n_function_from_code, *cache_id)(jsi, (argnames, fixed_code))(self._DUMMY_STRING)
|
||||
except JSInterpreter.Exception:
|
||||
self._player_cache.pop(cache_id, None)
|
||||
|
||||
global_funcnames = jsi._undefined_varnames
|
||||
debug_names = []
|
||||
jsi = JSInterpreter(jscode)
|
||||
for func_name in global_funcnames:
|
||||
try:
|
||||
func_args, func_code = jsi.extract_function_code(func_name)
|
||||
fixed_code = f'var {func_name} = function({", ".join(func_args)}) {{ {func_code} }}; {fixed_code}'
|
||||
debug_names.append(func_name)
|
||||
except Exception:
|
||||
self.report_warning(join_nonempty(
|
||||
f'Unable to extract global nsig function {func_name} from player JS',
|
||||
player_url and f' player = {player_url}', delim='\n'), only_once=True)
|
||||
|
||||
if debug_names:
|
||||
self.write_debug(f'Extracted global nsig functions: {", ".join(debug_names)}')
|
||||
|
||||
return argnames, fixed_code
|
||||
|
||||
def _extract_n_function_code(self, video_id, player_url):
|
||||
@ -2347,7 +2385,7 @@ def _extract_n_function_code(self, video_id, player_url):
|
||||
|
||||
func_name = self._extract_n_function_name(jscode, player_url=player_url)
|
||||
|
||||
# XXX: Workaround for the global array variable and lack of `typeof` implementation
|
||||
# XXX: Work around (a) global array variable, (b) `typeof` short-circuit, (c) global functions
|
||||
func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url)
|
||||
|
||||
return jsi, player_id, func_code
|
||||
@ -2861,7 +2899,7 @@ def _get_config_po_token(self, client: str, context: _PoTokenContext):
|
||||
only_once=True)
|
||||
continue
|
||||
|
||||
def fetch_po_token(self, client='web', context=_PoTokenContext.GVS, ytcfg=None, visitor_data=None,
|
||||
def fetch_po_token(self, client='web', context: _PoTokenContext = _PoTokenContext.GVS, ytcfg=None, visitor_data=None,
|
||||
data_sync_id=None, session_index=None, player_url=None, video_id=None, webpage=None,
|
||||
required=False, **kwargs):
|
||||
"""
|
||||
@ -2946,7 +2984,6 @@ def _fetch_po_token(self, client, **kwargs):
|
||||
fetch_pot_policy == 'never'
|
||||
or (
|
||||
fetch_pot_policy == 'auto'
|
||||
and _PoTokenContext(context) not in self._get_default_ytcfg(client)['PO_TOKEN_REQUIRED_CONTEXTS']
|
||||
and not kwargs.get('required', False)
|
||||
)
|
||||
):
|
||||
@ -3005,19 +3042,19 @@ def _is_agegated(player_response):
|
||||
def _is_unplayable(player_response):
|
||||
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
|
||||
|
||||
def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, visitor_data, data_sync_id, po_token):
|
||||
def _extract_player_response(self, client, video_id, webpage_ytcfg, player_ytcfg, player_url, initial_pr, visitor_data, data_sync_id, po_token):
|
||||
headers = self.generate_api_headers(
|
||||
ytcfg=player_ytcfg,
|
||||
default_client=client,
|
||||
visitor_data=visitor_data,
|
||||
session_index=self._extract_session_index(master_ytcfg, player_ytcfg),
|
||||
session_index=self._extract_session_index(webpage_ytcfg, player_ytcfg),
|
||||
delegated_session_id=(
|
||||
self._parse_data_sync_id(data_sync_id)[0]
|
||||
or self._extract_delegated_session_id(master_ytcfg, initial_pr, player_ytcfg)
|
||||
or self._extract_delegated_session_id(webpage_ytcfg, initial_pr, player_ytcfg)
|
||||
),
|
||||
user_session_id=(
|
||||
self._parse_data_sync_id(data_sync_id)[1]
|
||||
or self._extract_user_session_id(master_ytcfg, initial_pr, player_ytcfg)
|
||||
or self._extract_user_session_id(webpage_ytcfg, initial_pr, player_ytcfg)
|
||||
),
|
||||
)
|
||||
|
||||
@ -3033,7 +3070,7 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
|
||||
if po_token:
|
||||
yt_query['serviceIntegrityDimensions'] = {'poToken': po_token}
|
||||
|
||||
sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
|
||||
sts = self._extract_signature_timestamp(video_id, player_url, webpage_ytcfg, fatal=False) if player_url else None
|
||||
yt_query.update(self._generate_player_context(sts))
|
||||
return self._extract_response(
|
||||
item_id=video_id, ep='player', query=yt_query,
|
||||
@ -3042,10 +3079,14 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
|
||||
note='Downloading {} player API JSON'.format(client.replace('_', ' ').strip()),
|
||||
) or None
|
||||
|
||||
def _get_requested_clients(self, url, smuggled_data):
|
||||
def _get_requested_clients(self, url, smuggled_data, is_premium_subscriber):
|
||||
requested_clients = []
|
||||
excluded_clients = []
|
||||
default_clients = self._DEFAULT_AUTHED_CLIENTS if self.is_authenticated else self._DEFAULT_CLIENTS
|
||||
default_clients = (
|
||||
self._DEFAULT_PREMIUM_CLIENTS if is_premium_subscriber
|
||||
else self._DEFAULT_AUTHED_CLIENTS if self.is_authenticated
|
||||
else self._DEFAULT_CLIENTS
|
||||
)
|
||||
allowed_clients = sorted(
|
||||
(client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
|
||||
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
|
||||
@ -3087,11 +3128,12 @@ def _invalid_player_response(self, pr, video_id):
|
||||
if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
|
||||
return pr_id
|
||||
|
||||
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
|
||||
def _extract_player_responses(self, clients, video_id, webpage, webpage_client, webpage_ytcfg, is_premium_subscriber):
|
||||
initial_pr = None
|
||||
if webpage:
|
||||
initial_pr = self._search_json(
|
||||
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
|
||||
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage,
|
||||
f'{webpage_client} client initial player response', video_id, fatal=False)
|
||||
|
||||
prs = []
|
||||
deprioritized_prs = []
|
||||
@ -3122,11 +3164,11 @@ def append_client(*client_names):
|
||||
while clients:
|
||||
deprioritize_pr = False
|
||||
client, base_client, variant = _split_innertube_client(clients.pop())
|
||||
player_ytcfg = master_ytcfg if client == 'web' else {}
|
||||
if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
|
||||
player_ytcfg = webpage_ytcfg if client == webpage_client else {}
|
||||
if 'configs' not in self._configuration_arg('player_skip') and client != webpage_client:
|
||||
player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
|
||||
|
||||
player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
|
||||
player_url = player_url or self._extract_player_url(webpage_ytcfg, player_ytcfg, webpage=webpage)
|
||||
require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
|
||||
if 'js' in self._configuration_arg('player_skip'):
|
||||
require_js_player = False
|
||||
@ -3136,10 +3178,12 @@ def append_client(*client_names):
|
||||
player_url = self._download_player_url(video_id)
|
||||
tried_iframe_fallback = True
|
||||
|
||||
pr = initial_pr if client == 'web' else None
|
||||
pr = None
|
||||
if client == webpage_client and 'player_response' not in self._configuration_arg('webpage_skip'):
|
||||
pr = initial_pr
|
||||
|
||||
visitor_data = visitor_data or self._extract_visitor_data(master_ytcfg, initial_pr, player_ytcfg)
|
||||
data_sync_id = data_sync_id or self._extract_data_sync_id(master_ytcfg, initial_pr, player_ytcfg)
|
||||
visitor_data = visitor_data or self._extract_visitor_data(webpage_ytcfg, initial_pr, player_ytcfg)
|
||||
data_sync_id = data_sync_id or self._extract_data_sync_id(webpage_ytcfg, initial_pr, player_ytcfg)
|
||||
|
||||
fetch_po_token_args = {
|
||||
'client': client,
|
||||
@ -3148,53 +3192,26 @@ def append_client(*client_names):
|
||||
'data_sync_id': data_sync_id if self.is_authenticated else None,
|
||||
'player_url': player_url if require_js_player else None,
|
||||
'webpage': webpage,
|
||||
'session_index': self._extract_session_index(master_ytcfg, player_ytcfg),
|
||||
'session_index': self._extract_session_index(webpage_ytcfg, player_ytcfg),
|
||||
'ytcfg': player_ytcfg or self._get_default_ytcfg(client),
|
||||
}
|
||||
|
||||
# Don't need a player PO token for WEB if using player response from webpage
|
||||
player_pot_policy: PlayerPoTokenPolicy = self._get_default_ytcfg(client)['PLAYER_PO_TOKEN_POLICY']
|
||||
player_po_token = None if pr else self.fetch_po_token(
|
||||
context=_PoTokenContext.PLAYER, **fetch_po_token_args)
|
||||
context=_PoTokenContext.PLAYER, **fetch_po_token_args,
|
||||
required=player_pot_policy.required or player_pot_policy.recommended)
|
||||
|
||||
gvs_po_token = self.fetch_po_token(
|
||||
context=_PoTokenContext.GVS, **fetch_po_token_args)
|
||||
fetch_gvs_po_token_func = functools.partial(
|
||||
self.fetch_po_token, context=_PoTokenContext.GVS, **fetch_po_token_args)
|
||||
|
||||
fetch_subs_po_token_func = functools.partial(
|
||||
self.fetch_po_token,
|
||||
context=_PoTokenContext.SUBS,
|
||||
**fetch_po_token_args,
|
||||
)
|
||||
|
||||
required_pot_contexts = self._get_default_ytcfg(client)['PO_TOKEN_REQUIRED_CONTEXTS']
|
||||
|
||||
if (
|
||||
not player_po_token
|
||||
and _PoTokenContext.PLAYER in required_pot_contexts
|
||||
):
|
||||
# TODO: may need to skip player response request. Unsure yet..
|
||||
self.report_warning(
|
||||
f'No Player PO Token provided for {client} client, '
|
||||
f'which may be required for working {client} formats. This client will be deprioritized'
|
||||
f'You can manually pass a Player PO Token for this client with --extractor-args "youtube:po_token={client}.player+XXX". '
|
||||
f'For more information, refer to {PO_TOKEN_GUIDE_URL} .', only_once=True)
|
||||
deprioritize_pr = True
|
||||
|
||||
if (
|
||||
not gvs_po_token
|
||||
and _PoTokenContext.GVS in required_pot_contexts
|
||||
and 'missing_pot' in self._configuration_arg('formats')
|
||||
):
|
||||
# note: warning with help message is provided later during format processing
|
||||
self.report_warning(
|
||||
f'No GVS PO Token provided for {client} client, '
|
||||
f'which may be required for working {client} formats. This client will be deprioritized',
|
||||
only_once=True)
|
||||
deprioritize_pr = True
|
||||
self.fetch_po_token, context=_PoTokenContext.SUBS, **fetch_po_token_args)
|
||||
|
||||
try:
|
||||
pr = pr or self._extract_player_response(
|
||||
client, video_id,
|
||||
master_ytcfg=player_ytcfg or master_ytcfg,
|
||||
webpage_ytcfg=player_ytcfg or webpage_ytcfg,
|
||||
player_ytcfg=player_ytcfg,
|
||||
player_url=player_url,
|
||||
initial_pr=initial_pr,
|
||||
@ -3212,12 +3229,16 @@ def append_client(*client_names):
|
||||
innertube_context = traverse_obj(player_ytcfg or self._get_default_ytcfg(client), 'INNERTUBE_CONTEXT')
|
||||
sd = pr.setdefault('streamingData', {})
|
||||
sd[STREAMING_DATA_CLIENT_NAME] = client
|
||||
sd[STREAMING_DATA_INITIAL_PO_TOKEN] = gvs_po_token
|
||||
sd[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
|
||||
sd[STREAMING_DATA_PLAYER_TOKEN_PROVIDED] = bool(player_po_token)
|
||||
sd[STREAMING_DATA_INNERTUBE_CONTEXT] = innertube_context
|
||||
sd[STREAMING_DATA_FETCH_SUBS_PO_TOKEN] = fetch_subs_po_token_func
|
||||
sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
|
||||
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
|
||||
f[STREAMING_DATA_CLIENT_NAME] = client
|
||||
f[STREAMING_DATA_INITIAL_PO_TOKEN] = gvs_po_token
|
||||
f[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
|
||||
f[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
|
||||
f[STREAMING_DATA_PLAYER_TOKEN_PROVIDED] = bool(player_po_token)
|
||||
if deprioritize_pr:
|
||||
deprioritized_prs.append(pr)
|
||||
else:
|
||||
@ -3243,6 +3264,10 @@ def append_client(*client_names):
|
||||
# web_creator may work around age-verification for all videos but requires PO token
|
||||
append_client('tv_embedded', 'web_creator')
|
||||
|
||||
status = traverse_obj(pr, ('playabilityStatus', 'status', {str}))
|
||||
if status not in ('OK', 'LIVE_STREAM_OFFLINE', 'AGE_CHECK_REQUIRED', 'AGE_VERIFICATION_REQUIRED'):
|
||||
self.write_debug(f'{video_id}: {client} player response playability status: {status}')
|
||||
|
||||
prs.extend(deprioritized_prs)
|
||||
|
||||
if skipped_clients:
|
||||
@ -3323,6 +3348,15 @@ def build_fragments(f):
|
||||
}),
|
||||
} for range_start in range(0, f['filesize'], CHUNK_SIZE))
|
||||
|
||||
def gvs_pot_required(policy, is_premium_subscriber, has_player_token):
|
||||
return (
|
||||
policy.required
|
||||
and not (policy.not_required_with_player_token and has_player_token)
|
||||
and not (policy.not_required_for_premium and is_premium_subscriber))
|
||||
|
||||
# save pots per client to avoid fetching again
|
||||
gvs_pots = {}
|
||||
|
||||
for fmt in streaming_formats:
|
||||
client_name = fmt[STREAMING_DATA_CLIENT_NAME]
|
||||
if fmt.get('targetDurationSec'):
|
||||
@ -3382,7 +3416,7 @@ def build_fragments(f):
|
||||
encrypted_sig = try_get(sc, lambda x: x['s'][0])
|
||||
if not all((sc, fmt_url, player_url, encrypted_sig)):
|
||||
msg = f'Some {client_name} client https formats have been skipped as they are missing a url. '
|
||||
if client_name == 'web':
|
||||
if client_name in ('web', 'web_safari'):
|
||||
msg += 'YouTube is forcing SABR streaming for this client. '
|
||||
else:
|
||||
msg += (
|
||||
@ -3442,18 +3476,25 @@ def build_fragments(f):
|
||||
self.report_warning(
|
||||
'Some formats are possibly damaged. They will be deprioritized', video_id, only_once=True)
|
||||
|
||||
po_token = fmt.get(STREAMING_DATA_INITIAL_PO_TOKEN)
|
||||
fetch_po_token_func = fmt[STREAMING_DATA_FETCH_GVS_PO_TOKEN]
|
||||
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HTTPS]
|
||||
|
||||
require_po_token = (
|
||||
itag not in ['18']
|
||||
and gvs_pot_required(
|
||||
pot_policy, fmt[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER],
|
||||
fmt[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]))
|
||||
|
||||
po_token = (
|
||||
gvs_pots.get(client_name)
|
||||
or fetch_po_token_func(required=require_po_token or pot_policy.recommended))
|
||||
|
||||
if po_token:
|
||||
fmt_url = update_url_query(fmt_url, {'pot': po_token})
|
||||
if client_name not in gvs_pots:
|
||||
gvs_pots[client_name] = po_token
|
||||
|
||||
# Clients that require PO Token return videoplayback URLs that may return 403
|
||||
require_po_token = (
|
||||
not po_token
|
||||
and _PoTokenContext.GVS in self._get_default_ytcfg(client_name)['PO_TOKEN_REQUIRED_CONTEXTS']
|
||||
and itag not in ['18']) # these formats do not require PO Token
|
||||
|
||||
if require_po_token and 'missing_pot' not in self._configuration_arg('formats'):
|
||||
if not po_token and require_po_token and 'missing_pot' not in self._configuration_arg('formats'):
|
||||
self._report_pot_format_skipped(video_id, client_name, 'https')
|
||||
continue
|
||||
|
||||
@ -3468,7 +3509,7 @@ def build_fragments(f):
|
||||
name, fmt.get('isDrc') and 'DRC',
|
||||
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
||||
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
||||
is_damaged and 'DAMAGED', require_po_token and 'MISSING POT',
|
||||
is_damaged and 'DAMAGED', require_po_token and not po_token and 'MISSING POT',
|
||||
(self.get_param('verbose') or all_formats) and short_client_name(client_name),
|
||||
delim=', '),
|
||||
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
|
||||
@ -3531,7 +3572,7 @@ def build_fragments(f):
|
||||
elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
|
||||
skip_manifests.add('dash')
|
||||
|
||||
def process_manifest_format(f, proto, client_name, itag, po_token):
|
||||
def process_manifest_format(f, proto, client_name, itag, missing_pot):
|
||||
key = (proto, f.get('language'))
|
||||
if not all_formats and key in itags[itag]:
|
||||
return False
|
||||
@ -3539,19 +3580,15 @@ def process_manifest_format(f, proto, client_name, itag, po_token):
|
||||
if f.get('source_preference') is None:
|
||||
f['source_preference'] = -1
|
||||
|
||||
# Clients that require PO Token return videoplayback URLs that may return 403
|
||||
# hls does not currently require PO Token
|
||||
if (
|
||||
not po_token
|
||||
and _PoTokenContext.GVS in self._get_default_ytcfg(client_name)['PO_TOKEN_REQUIRED_CONTEXTS']
|
||||
and proto != 'hls'
|
||||
):
|
||||
if 'missing_pot' not in self._configuration_arg('formats'):
|
||||
self._report_pot_format_skipped(video_id, client_name, proto)
|
||||
return False
|
||||
if missing_pot:
|
||||
f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ')
|
||||
f['source_preference'] -= 20
|
||||
|
||||
# XXX: Check if IOS HLS formats are affected by PO token enforcement; temporary
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/13511
|
||||
if proto == 'hls' and client_name == 'ios':
|
||||
f['__needs_testing'] = True
|
||||
|
||||
itags[itag].add(key)
|
||||
|
||||
if itag and all_formats:
|
||||
@ -3586,39 +3623,62 @@ def process_manifest_format(f, proto, client_name, itag, po_token):
|
||||
subtitles = {}
|
||||
for sd in streaming_data:
|
||||
client_name = sd[STREAMING_DATA_CLIENT_NAME]
|
||||
po_token = sd.get(STREAMING_DATA_INITIAL_PO_TOKEN)
|
||||
fetch_pot_func = sd[STREAMING_DATA_FETCH_GVS_PO_TOKEN]
|
||||
is_premium_subscriber = sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER]
|
||||
has_player_token = sd[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]
|
||||
|
||||
hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
|
||||
if hls_manifest_url:
|
||||
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(
|
||||
client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.HLS]
|
||||
require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, has_player_token)
|
||||
po_token = gvs_pots.get(client_name, fetch_pot_func(required=require_po_token or pot_policy.recommended))
|
||||
if po_token:
|
||||
hls_manifest_url = hls_manifest_url.rstrip('/') + f'/pot/{po_token}'
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
|
||||
for sub in traverse_obj(subs, (..., ..., {dict})):
|
||||
# HLS subs (m3u8) do not need a PO token; save client name for debugging
|
||||
sub[STREAMING_DATA_CLIENT_NAME] = client_name
|
||||
subtitles = self._merge_subtitles(subs, subtitles)
|
||||
for f in fmts:
|
||||
if process_manifest_format(f, 'hls', client_name, self._search_regex(
|
||||
r'/itag/(\d+)', f['url'], 'itag', default=None), po_token):
|
||||
yield f
|
||||
if client_name not in gvs_pots:
|
||||
gvs_pots[client_name] = po_token
|
||||
if require_po_token and not po_token and 'missing_pot' not in self._configuration_arg('formats'):
|
||||
self._report_pot_format_skipped(video_id, client_name, 'hls')
|
||||
else:
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
|
||||
for sub in traverse_obj(subs, (..., ..., {dict})):
|
||||
# TODO: If HLS video requires a PO Token, do the subs also require pot?
|
||||
# Save client name for debugging
|
||||
sub[STREAMING_DATA_CLIENT_NAME] = client_name
|
||||
subtitles = self._merge_subtitles(subs, subtitles)
|
||||
for f in fmts:
|
||||
if process_manifest_format(f, 'hls', client_name, self._search_regex(
|
||||
r'/itag/(\d+)', f['url'], 'itag', default=None), require_po_token and not po_token):
|
||||
yield f
|
||||
|
||||
dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
|
||||
if dash_manifest_url:
|
||||
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(
|
||||
client_name)['GVS_PO_TOKEN_POLICY'][StreamingProtocol.DASH]
|
||||
require_po_token = gvs_pot_required(pot_policy, is_premium_subscriber, has_player_token)
|
||||
po_token = gvs_pots.get(client_name, fetch_pot_func(required=require_po_token or pot_policy.recommended))
|
||||
if po_token:
|
||||
dash_manifest_url = dash_manifest_url.rstrip('/') + f'/pot/{po_token}'
|
||||
formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
|
||||
for sub in traverse_obj(subs, (..., ..., {dict})):
|
||||
# TODO: Investigate if DASH subs ever need a PO token; save client name for debugging
|
||||
sub[STREAMING_DATA_CLIENT_NAME] = client_name
|
||||
subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
|
||||
for f in formats:
|
||||
if process_manifest_format(f, 'dash', client_name, f['format_id'], po_token):
|
||||
f['filesize'] = int_or_none(self._search_regex(
|
||||
r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
|
||||
if needs_live_processing:
|
||||
f['is_from_start'] = True
|
||||
if client_name not in gvs_pots:
|
||||
gvs_pots[client_name] = po_token
|
||||
if require_po_token and not po_token and 'missing_pot' not in self._configuration_arg('formats'):
|
||||
self._report_pot_format_skipped(video_id, client_name, 'dash')
|
||||
else:
|
||||
formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
|
||||
for sub in traverse_obj(subs, (..., ..., {dict})):
|
||||
# TODO: If DASH video requires a PO Token, do the subs also require pot?
|
||||
# Save client name for debugging
|
||||
sub[STREAMING_DATA_CLIENT_NAME] = client_name
|
||||
subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
|
||||
for f in formats:
|
||||
if process_manifest_format(f, 'dash', client_name, f['format_id'], require_po_token and not po_token):
|
||||
f['filesize'] = int_or_none(self._search_regex(
|
||||
r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
|
||||
if needs_live_processing:
|
||||
f['is_from_start'] = True
|
||||
|
||||
yield f
|
||||
yield f
|
||||
yield subtitles
|
||||
|
||||
def _extract_storyboard(self, player_responses, duration):
|
||||
@ -3659,22 +3719,22 @@ def _extract_storyboard(self, player_responses, duration):
|
||||
} for j in range(math.ceil(fragment_count))],
|
||||
}
|
||||
|
||||
def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
|
||||
def _download_initial_webpage(self, webpage_url, webpage_client, video_id):
|
||||
webpage = None
|
||||
if 'webpage' not in self._configuration_arg('player_skip'):
|
||||
if webpage_url and 'webpage' not in self._configuration_arg('player_skip'):
|
||||
query = {'bpctr': '9999999999', 'has_verified': '1'}
|
||||
pp = self._configuration_arg('player_params', [None], casesense=True)[0]
|
||||
pp = (
|
||||
self._configuration_arg('player_params', [None], casesense=True)[0]
|
||||
or traverse_obj(INNERTUBE_CLIENTS, (webpage_client, 'PLAYER_PARAMS', {str}))
|
||||
)
|
||||
if pp:
|
||||
query['pp'] = pp
|
||||
webpage = self._download_webpage_with_retries(webpage_url, video_id, query=query)
|
||||
|
||||
master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
|
||||
|
||||
player_responses, player_url = self._extract_player_responses(
|
||||
self._get_requested_clients(url, smuggled_data),
|
||||
video_id, webpage, master_ytcfg, smuggled_data)
|
||||
|
||||
return webpage, master_ytcfg, player_responses, player_url
|
||||
webpage = self._download_webpage_with_retries(
|
||||
webpage_url, video_id, query=query,
|
||||
headers=traverse_obj(self._get_default_ytcfg(webpage_client), {
|
||||
'User-Agent': ('INNERTUBE_CONTEXT', 'client', 'userAgent', {str}),
|
||||
}))
|
||||
return webpage
|
||||
|
||||
def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
|
||||
live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
|
||||
@ -3699,14 +3759,60 @@ def _list_formats(self, video_id, microformats, video_details, player_responses,
|
||||
|
||||
return live_broadcast_details, live_status, streaming_data, formats, subtitles
|
||||
|
||||
def _download_initial_data(self, video_id, webpage, webpage_client, webpage_ytcfg):
|
||||
initial_data = None
|
||||
if webpage and 'initial_data' not in self._configuration_arg('webpage_skip'):
|
||||
initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
|
||||
if not traverse_obj(initial_data, 'contents'):
|
||||
self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
|
||||
initial_data = None
|
||||
if not initial_data and 'initial_data' not in self._configuration_arg('player_skip'):
|
||||
query = {'videoId': video_id}
|
||||
query.update(self._get_checkok_params())
|
||||
initial_data = self._extract_response(
|
||||
item_id=video_id, ep='next', fatal=False,
|
||||
ytcfg=webpage_ytcfg, query=query, check_get_keys='contents',
|
||||
note='Downloading initial data API JSON', default_client=webpage_client)
|
||||
return initial_data
|
||||
|
||||
def _is_premium_subscriber(self, initial_data):
|
||||
if not self.is_authenticated or not initial_data:
|
||||
return False
|
||||
|
||||
tlr = traverse_obj(
|
||||
initial_data, ('topbar', 'desktopTopbarRenderer', 'logo', 'topbarLogoRenderer'))
|
||||
return (
|
||||
traverse_obj(tlr, ('iconImage', 'iconType')) == 'YOUTUBE_PREMIUM_LOGO'
|
||||
or 'premium' in (self._get_text(tlr, 'tooltipText') or '').lower()
|
||||
)
|
||||
|
||||
def _initial_extract(self, url, smuggled_data, webpage_url, webpage_client, video_id):
|
||||
# This function is also used by live-from-start refresh
|
||||
webpage = self._download_initial_webpage(webpage_url, webpage_client, video_id)
|
||||
webpage_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg(webpage_client)
|
||||
|
||||
initial_data = self._download_initial_data(video_id, webpage, webpage_client, webpage_ytcfg)
|
||||
|
||||
is_premium_subscriber = self._is_premium_subscriber(initial_data)
|
||||
if is_premium_subscriber:
|
||||
self.write_debug('Detected YouTube Premium subscription')
|
||||
|
||||
player_responses, player_url = self._extract_player_responses(
|
||||
self._get_requested_clients(url, smuggled_data, is_premium_subscriber),
|
||||
video_id, webpage, webpage_client, webpage_ytcfg, is_premium_subscriber)
|
||||
|
||||
return webpage, webpage_ytcfg, initial_data, is_premium_subscriber, player_responses, player_url
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
video_id = self._match_id(url)
|
||||
|
||||
base_url = self.http_scheme() + '//www.youtube.com/'
|
||||
webpage_url = base_url + 'watch?v=' + video_id
|
||||
webpage_client = 'web'
|
||||
|
||||
webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
|
||||
webpage, webpage_ytcfg, initial_data, is_premium_subscriber, player_responses, player_url = self._initial_extract(
|
||||
url, smuggled_data, webpage_url, webpage_client, video_id)
|
||||
|
||||
playability_statuses = traverse_obj(
|
||||
player_responses, (..., 'playabilityStatus'), expected_type=dict)
|
||||
@ -3943,7 +4049,9 @@ def get_lang_code(track):
|
||||
def process_language(container, base_url, lang_code, sub_name, client_name, query):
|
||||
lang_subs = container.setdefault(lang_code, [])
|
||||
for fmt in self._SUBTITLE_FORMATS:
|
||||
query = {**query, 'fmt': fmt}
|
||||
# xosf=1 results in undesirable text position data for vtt, json3 & srv* subtitles
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/13654
|
||||
query = {**query, 'fmt': fmt, 'xosf': []}
|
||||
lang_subs.append({
|
||||
'ext': fmt,
|
||||
'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
|
||||
@ -3979,7 +4087,7 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
|
||||
pctr = pr['captions']['playerCaptionsTracklistRenderer']
|
||||
client_name = pr['streamingData'][STREAMING_DATA_CLIENT_NAME]
|
||||
innertube_client_name = pr['streamingData'][STREAMING_DATA_INNERTUBE_CONTEXT]['client']['clientName']
|
||||
required_contexts = self._get_default_ytcfg(client_name)['PO_TOKEN_REQUIRED_CONTEXTS']
|
||||
pot_policy: GvsPoTokenPolicy = self._get_default_ytcfg(client_name)['SUBS_PO_TOKEN_POLICY']
|
||||
fetch_subs_po_token_func = pr['streamingData'][STREAMING_DATA_FETCH_SUBS_PO_TOKEN]
|
||||
|
||||
pot_params = {}
|
||||
@ -3992,11 +4100,11 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
|
||||
requires_pot = (
|
||||
# We can detect the experiment for now
|
||||
any(e in traverse_obj(qs, ('exp', ...)) for e in ('xpe', 'xpv'))
|
||||
or _PoTokenContext.SUBS in required_contexts)
|
||||
or (pot_policy.required and not (pot_policy.not_required_for_premium and is_premium_subscriber)))
|
||||
|
||||
if not already_fetched_pot:
|
||||
already_fetched_pot = True
|
||||
if subs_po_token := fetch_subs_po_token_func(required=requires_pot):
|
||||
if subs_po_token := fetch_subs_po_token_func(required=requires_pot or pot_policy.recommended):
|
||||
pot_params.update({
|
||||
'pot': subs_po_token,
|
||||
'potc': '1',
|
||||
@ -4099,21 +4207,6 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
|
||||
'release_year': int_or_none(release_year),
|
||||
})
|
||||
|
||||
initial_data = None
|
||||
if webpage:
|
||||
initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
|
||||
if not traverse_obj(initial_data, 'contents'):
|
||||
self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
|
||||
initial_data = None
|
||||
if not initial_data and 'initial_data' not in self._configuration_arg('player_skip'):
|
||||
query = {'videoId': video_id}
|
||||
query.update(self._get_checkok_params())
|
||||
initial_data = self._extract_response(
|
||||
item_id=video_id, ep='next', fatal=False,
|
||||
ytcfg=master_ytcfg, query=query, check_get_keys='contents',
|
||||
headers=self.generate_api_headers(ytcfg=master_ytcfg),
|
||||
note='Downloading initial data API JSON')
|
||||
|
||||
COMMENTS_SECTION_IDS = ('comment-item-section', 'engagement-panel-comments-section')
|
||||
info['comment_count'] = traverse_obj(initial_data, (
|
||||
'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
|
||||
@ -4280,6 +4373,7 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
|
||||
|
||||
if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
|
||||
# Newly uploaded videos' HLS formats are potentially problematic and need to be checked
|
||||
# XXX: This is redundant for as long as we are already checking all IOS HLS formats
|
||||
upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc)
|
||||
if upload_datetime >= datetime_from_str('today-2days'):
|
||||
for fmt in info['formats']:
|
||||
@ -4311,7 +4405,7 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer
|
||||
self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
|
||||
or get_first(microformats, 'isUnlisted', expected_type=bool))))
|
||||
|
||||
info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
|
||||
info['__post_extractor'] = self.extract_comments(webpage_ytcfg, video_id, contents, webpage)
|
||||
|
||||
self.mark_watched(video_id, player_responses)
|
||||
|
||||
|
@ -222,6 +222,14 @@ def __setitem__(self, key, value):
|
||||
def __delitem__(self, key):
|
||||
raise NotImplementedError('Deleting is not supported')
|
||||
|
||||
def set_local(self, key, value):
|
||||
self.maps[0][key] = value
|
||||
|
||||
def get_local(self, key):
|
||||
if key in self.maps[0]:
|
||||
return self.maps[0][key]
|
||||
return JS_Undefined
|
||||
|
||||
|
||||
class Debugger:
|
||||
import sys
|
||||
@ -271,6 +279,7 @@ class JSInterpreter:
|
||||
def __init__(self, code, objects=None):
|
||||
self.code, self._functions = code, {}
|
||||
self._objects = {} if objects is None else objects
|
||||
self._undefined_varnames = set()
|
||||
|
||||
class Exception(ExtractorError): # noqa: A001
|
||||
def __init__(self, msg, expr=None, *args, **kwargs):
|
||||
@ -381,7 +390,7 @@ def _dump(self, obj, namespace):
|
||||
return self._named_object(namespace, obj)
|
||||
|
||||
@Debugger.wrap_interpreter
|
||||
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
||||
def interpret_statement(self, stmt, local_vars, allow_recursion=100, _is_var_declaration=False):
|
||||
if allow_recursion < 0:
|
||||
raise self.Exception('Recursion limit reached')
|
||||
allow_recursion -= 1
|
||||
@ -401,6 +410,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
||||
if m.group('throw'):
|
||||
raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion))
|
||||
should_return = not m.group('var')
|
||||
_is_var_declaration = _is_var_declaration or bool(m.group('var'))
|
||||
if not expr:
|
||||
return None, should_return
|
||||
|
||||
@ -585,7 +595,8 @@ def dict_item(key, val):
|
||||
sub_expressions = list(self._separate(expr))
|
||||
if len(sub_expressions) > 1:
|
||||
for sub_expr in sub_expressions:
|
||||
ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
|
||||
ret, should_abort = self.interpret_statement(
|
||||
sub_expr, local_vars, allow_recursion, _is_var_declaration=_is_var_declaration)
|
||||
if should_abort:
|
||||
return ret, True
|
||||
return ret, False
|
||||
@ -599,8 +610,12 @@ def dict_item(key, val):
|
||||
left_val = local_vars.get(m.group('out'))
|
||||
|
||||
if not m.group('index'):
|
||||
local_vars[m.group('out')] = self._operator(
|
||||
eval_result = self._operator(
|
||||
m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
|
||||
if _is_var_declaration:
|
||||
local_vars.set_local(m.group('out'), eval_result)
|
||||
else:
|
||||
local_vars[m.group('out')] = eval_result
|
||||
return local_vars[m.group('out')], should_return
|
||||
elif left_val in (None, JS_Undefined):
|
||||
raise self.Exception(f'Cannot index undefined variable {m.group("out")}', expr)
|
||||
@ -654,7 +669,19 @@ def dict_item(key, val):
|
||||
return float('NaN'), should_return
|
||||
|
||||
elif m and m.group('return'):
|
||||
return local_vars.get(m.group('name'), JS_Undefined), should_return
|
||||
var = m.group('name')
|
||||
# Declared variables
|
||||
if _is_var_declaration:
|
||||
ret = local_vars.get_local(var)
|
||||
# Register varname in local namespace
|
||||
# Set value as JS_Undefined or its pre-existing value
|
||||
local_vars.set_local(var, ret)
|
||||
else:
|
||||
ret = local_vars.get(var, NO_DEFAULT)
|
||||
if ret is NO_DEFAULT:
|
||||
ret = JS_Undefined
|
||||
self._undefined_varnames.add(var)
|
||||
return ret, should_return
|
||||
|
||||
with contextlib.suppress(ValueError):
|
||||
return json.loads(js_to_json(expr, strict=True)), should_return
|
||||
@ -857,7 +884,7 @@ def extract_object(self, objname, *global_stack):
|
||||
obj = {}
|
||||
obj_m = re.search(
|
||||
r'''(?x)
|
||||
(?<!\.)%s\s*=\s*{\s*
|
||||
(?<![a-zA-Z$0-9.])%s\s*=\s*{\s*
|
||||
(?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
|
||||
}\s*;
|
||||
''' % (re.escape(objname), _FUNC_NAME_RE),
|
||||
|
@ -140,6 +140,12 @@ def __init__(self, res: requests.models.Response):
|
||||
|
||||
def read(self, amt: int | None = None):
|
||||
try:
|
||||
# Work around issue with `.read(amt)` then `.read()`
|
||||
# See: https://github.com/urllib3/urllib3/issues/3636
|
||||
if amt is None:
|
||||
# Python 3.9 preallocates the whole read buffer, read in chunks
|
||||
read_chunk = functools.partial(self.fp.read, 1 << 20, decode_content=True)
|
||||
return b''.join(iter(read_chunk, b''))
|
||||
# Interact with urllib3 response directly.
|
||||
return self.fp.read(amt, decode_content=True)
|
||||
|
||||
|
@ -529,14 +529,14 @@ def _preset_alias_callback(option, opt_str, value, parser):
|
||||
'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
|
||||
'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
|
||||
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
|
||||
'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext', 'prefer-vp9-sort',
|
||||
'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext', 'prefer-vp9-sort', 'mtime-by-default',
|
||||
}, 'aliases': {
|
||||
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'],
|
||||
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx', '-allow-unsafe-ext', '-prefer-vp9-sort'],
|
||||
'2021': ['2022', 'no-certifi', 'filename-sanitization'],
|
||||
'2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'],
|
||||
'2023': ['2024', 'prefer-vp9-sort'],
|
||||
'2024': [],
|
||||
'2024': ['mtime-by-default'],
|
||||
},
|
||||
}, help=(
|
||||
'Options that can help keep compatibility with youtube-dl or youtube-dlc '
|
||||
@ -1466,12 +1466,12 @@ def _preset_alias_callback(option, opt_str, value, parser):
|
||||
help='Do not use .part files - write directly into output file')
|
||||
filesystem.add_option(
|
||||
'--mtime',
|
||||
action='store_true', dest='updatetime', default=True,
|
||||
help='Use the Last-modified header to set the file modification time (default)')
|
||||
action='store_true', dest='updatetime', default=None,
|
||||
help='Use the Last-modified header to set the file modification time')
|
||||
filesystem.add_option(
|
||||
'--no-mtime',
|
||||
action='store_false', dest='updatetime',
|
||||
help='Do not use the Last-modified header to set the file modification time')
|
||||
help='Do not use the Last-modified header to set the file modification time (default)')
|
||||
filesystem.add_option(
|
||||
'--write-description',
|
||||
action='store_true', dest='writedescription', default=False,
|
||||
|
@ -1,8 +1,8 @@
|
||||
# Autogenerated by devscripts/update-version.py
|
||||
|
||||
__version__ = '2025.06.09'
|
||||
__version__ = '2025.06.30'
|
||||
|
||||
RELEASE_GIT_HEAD = '339614a173c74b42d63e858c446a9cae262a13af'
|
||||
RELEASE_GIT_HEAD = 'b0187844988e557c7e1e6bb1aabd4c1176768d86'
|
||||
|
||||
VARIANT = None
|
||||
|
||||
@ -12,4 +12,4 @@
|
||||
|
||||
ORIGIN = 'yt-dlp/yt-dlp'
|
||||
|
||||
_pkg_version = '2025.06.09'
|
||||
_pkg_version = '2025.06.30'
|
||||
|
Loading…
Reference in New Issue
Block a user