diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 6aa52c595..ea391bc15 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -775,3 +775,7 @@ GeoffreyFrogeye
Pawka
v3DJG6GL
yozel
+brian6932
+iednod55
+maxbin123
+nullpos
diff --git a/Changelog.md b/Changelog.md
index 80b72da05..dd95abc86 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -4,6 +4,61 @@ # Changelog
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
-->
+### 2025.06.09
+
+#### Extractor changes
+- [Improve JSON LD thumbnails extraction](https://github.com/yt-dlp/yt-dlp/commit/85c8a405e3651dc041b758f4744d4fb3c4c55e01) ([#13368](https://github.com/yt-dlp/yt-dlp/issues/13368)) by [bashonly](https://github.com/bashonly), [doe1080](https://github.com/doe1080)
+- **10play**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6d265388c6e943419ac99e9151cf75a3265f980f) ([#13349](https://github.com/yt-dlp/yt-dlp/issues/13349)) by [bashonly](https://github.com/bashonly)
+- **adobepass**
+ - [Add Fubo MSO](https://github.com/yt-dlp/yt-dlp/commit/eee90acc47d7f8de24afaa8b0271ccaefdf6e88c) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123)
+ - [Always add newer user-agent when required](https://github.com/yt-dlp/yt-dlp/commit/0ee1102268cf31b07f8a8318a47424c66b2f7378) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
+ - [Fix Philo MSO authentication](https://github.com/yt-dlp/yt-dlp/commit/943083edcd3df45aaa597a6967bc6c95b720f54c) ([#13335](https://github.com/yt-dlp/yt-dlp/issues/13335)) by [Sipherdrakon](https://github.com/Sipherdrakon)
+ - [Rework to require software statement](https://github.com/yt-dlp/yt-dlp/commit/711c5d5d098fee2992a1a624b1c4b30364b91426) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly), [maxbin123](https://github.com/maxbin123)
+ - [Validate login URL before sending credentials](https://github.com/yt-dlp/yt-dlp/commit/89c1b349ad81318d9d3bea76c01c891696e58d38) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
+- **aenetworks**
+ - [Fix playlist extractors](https://github.com/yt-dlp/yt-dlp/commit/f37d599a697e82fe68b423865897d55bae34f373) ([#13408](https://github.com/yt-dlp/yt-dlp/issues/13408)) by [Sipherdrakon](https://github.com/Sipherdrakon)
+ - [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/6693d6603358ae6beca834dbd822a7917498b813) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123)
+- **bilibilibangumi**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/13e55162719528d42d2133e16b65ff59a667a6e4) ([#13416](https://github.com/yt-dlp/yt-dlp/issues/13416)) by [c-basalt](https://github.com/c-basalt)
+- **brightcove**: new: [Adapt to new AdobePass requirement](https://github.com/yt-dlp/yt-dlp/commit/98f8eec956e3b16cb66a3d49cc71af3807db795e) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
+- **cu.ntv.co.jp**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/aa863ddab9b1d104678e9cf39bb76f5b14fca660) ([#13302](https://github.com/yt-dlp/yt-dlp/issues/13302)) by [doe1080](https://github.com/doe1080), [nullpos](https://github.com/nullpos)
+- **go**: [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/2e5bf002dad16f5ce35aa2023d392c9e518fcd8f) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly), [maxbin123](https://github.com/maxbin123)
+- **nbc**: [Rework and adapt extractors to new AdobePass flow](https://github.com/yt-dlp/yt-dlp/commit/2d7949d5642bc37d1e71bf00c9a55260e5505d58) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
+- **nobelprize**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/97ddfefeb4faba6e61cd80996c16952b8eab16f3) ([#13205](https://github.com/yt-dlp/yt-dlp/issues/13205)) by [doe1080](https://github.com/doe1080)
+- **odnoklassniki**: [Detect and raise when login is required](https://github.com/yt-dlp/yt-dlp/commit/148a1eb4c59e127965396c7a6e6acf1979de459e) ([#13361](https://github.com/yt-dlp/yt-dlp/issues/13361)) by [bashonly](https://github.com/bashonly)
+- **patreon**: [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/e0d6c0822930f6e63f574d46d946a58b73ecd10c) ([#13266](https://github.com/yt-dlp/yt-dlp/issues/13266)) by [bashonly](https://github.com/bashonly) (With fixes in [1a8a03e](https://github.com/yt-dlp/yt-dlp/commit/1a8a03ea8d827107319a18076ee3505090667c5a))
+- **podchaser**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/538eb305673c26bff6a2b12f1c96375fe02ce41a) ([#13271](https://github.com/yt-dlp/yt-dlp/issues/13271)) by [bashonly](https://github.com/bashonly)
+- **sr**: mediathek: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/e3c605a61f4cc2de9059f37434fa108c3c20f58e) ([#13294](https://github.com/yt-dlp/yt-dlp/issues/13294)) by [doe1080](https://github.com/doe1080)
+- **stacommu**: [Avoid partial stream formats](https://github.com/yt-dlp/yt-dlp/commit/5d96527be80dc1ed1702d9cd548ff86de570ad70) ([#13412](https://github.com/yt-dlp/yt-dlp/issues/13412)) by [bashonly](https://github.com/bashonly)
+- **startrek**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a8bf0011bde92b3f1324a98bfbd38932fd3ebe18) ([#13188](https://github.com/yt-dlp/yt-dlp/issues/13188)) by [doe1080](https://github.com/doe1080)
+- **svt**: play: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/e1b6062f8c4a3fa33c65269d48d09ec78de765a2) ([#13329](https://github.com/yt-dlp/yt-dlp/issues/13329)) by [barsnick](https://github.com/barsnick), [bashonly](https://github.com/bashonly)
+- **telecinco**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/03dba2012d9bd3f402fa8c2f122afba89bbd22a4) ([#13379](https://github.com/yt-dlp/yt-dlp/issues/13379)) by [bashonly](https://github.com/bashonly)
+- **theplatform**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/ed108b3ea481c6a4b5215a9302ba92d74baa2425) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
+- **toutiao**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f8051e3a61686c5db1de5f5746366ecfbc3ad20c) ([#13246](https://github.com/yt-dlp/yt-dlp/issues/13246)) by [doe1080](https://github.com/doe1080)
+- **turner**: [Adapt extractors to new AdobePass flow](https://github.com/yt-dlp/yt-dlp/commit/0daddc780d3ac5bebc3a3ec5b884d9243cbc0745) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
+- **twitcasting**: [Fix password-protected livestream support](https://github.com/yt-dlp/yt-dlp/commit/52f9729c9a92ad4656d746ff0b1acecb87b3e96d) ([#13097](https://github.com/yt-dlp/yt-dlp/issues/13097)) by [bashonly](https://github.com/bashonly)
+- **twitter**: broadcast: [Support events URLs](https://github.com/yt-dlp/yt-dlp/commit/7794374de8afb20499b023107e2abfd4e6b93ee4) ([#13248](https://github.com/yt-dlp/yt-dlp/issues/13248)) by [doe1080](https://github.com/doe1080)
+- **umg**: de: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/4e7c1ea346b510280218b47e8653dbbca3a69870) ([#13373](https://github.com/yt-dlp/yt-dlp/issues/13373)) by [doe1080](https://github.com/doe1080)
+- **vice**: [Mark extractors as broken](https://github.com/yt-dlp/yt-dlp/commit/6121559e027a04574690799c1776bc42bb51af31) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
+- **vimeo**: [Extract subtitles from player subdomain](https://github.com/yt-dlp/yt-dlp/commit/c723c4e5e78263df178dbe69844a3d05f3ef9e35) ([#13350](https://github.com/yt-dlp/yt-dlp/issues/13350)) by [bashonly](https://github.com/bashonly)
+- **watchespn**: [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/b094747e93cfb0a2c53007120e37d0d84d41f030) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123)
+- **weverse**: [Support login with oauth refresh tokens](https://github.com/yt-dlp/yt-dlp/commit/3fe72e9eea38d9a58211cde42cfaa577ce020e2c) ([#13284](https://github.com/yt-dlp/yt-dlp/issues/13284)) by [bashonly](https://github.com/bashonly)
+- **youtube**
+ - [Add `tv_simply` player client](https://github.com/yt-dlp/yt-dlp/commit/1fd0e88b67db53ad163393d6965f68e908fa70e3) ([#13389](https://github.com/yt-dlp/yt-dlp/issues/13389)) by [gamer191](https://github.com/gamer191)
+ - [Extract srt subtitles](https://github.com/yt-dlp/yt-dlp/commit/231349786e8c42089c2e079ec94c0ea866c37999) ([#13411](https://github.com/yt-dlp/yt-dlp/issues/13411)) by [gamer191](https://github.com/gamer191)
+ - [Fix `--mark-watched` support](https://github.com/yt-dlp/yt-dlp/commit/b5be29fa58ec98226e11621fd9c58585bcff6879) ([#13222](https://github.com/yt-dlp/yt-dlp/issues/13222)) by [brian6932](https://github.com/brian6932), [iednod55](https://github.com/iednod55)
+ - [Fix automatic captions for some client combinations](https://github.com/yt-dlp/yt-dlp/commit/53ea743a9c158f8ca2d75a09ca44ba68606042d8) ([#13268](https://github.com/yt-dlp/yt-dlp/issues/13268)) by [bashonly](https://github.com/bashonly)
+ - [Improve signature extraction debug output](https://github.com/yt-dlp/yt-dlp/commit/d30a49742cfa22e61c47df4ac0e7334d648fb85d) ([#13327](https://github.com/yt-dlp/yt-dlp/issues/13327)) by [bashonly](https://github.com/bashonly)
+ - [Rework nsig function name extraction](https://github.com/yt-dlp/yt-dlp/commit/9e38b273b7ac942e7e9fc05a651ed810ab7d30ba) ([#13403](https://github.com/yt-dlp/yt-dlp/issues/13403)) by [Grub4K](https://github.com/Grub4K)
+ - [nsig code improvements and cleanup](https://github.com/yt-dlp/yt-dlp/commit/f7bbf5a617f9ab54ef51eaef99be36e175b5e9c3) ([#13280](https://github.com/yt-dlp/yt-dlp/issues/13280)) by [bashonly](https://github.com/bashonly)
+- **zdf**: [Fix language extraction and format sorting](https://github.com/yt-dlp/yt-dlp/commit/db162b76f6bdece50babe2e0cacfe56888c2e125) ([#13313](https://github.com/yt-dlp/yt-dlp/issues/13313)) by [InvalidUsernameException](https://github.com/InvalidUsernameException)
+
+#### Misc. changes
+- **build**
+ - [Exclude `pkg_resources` from being collected](https://github.com/yt-dlp/yt-dlp/commit/cc749a8a3b8b6e5c05318868c72a403f376a1b38) ([#13320](https://github.com/yt-dlp/yt-dlp/issues/13320)) by [bashonly](https://github.com/bashonly)
+ - [Fix macOS requirements caching](https://github.com/yt-dlp/yt-dlp/commit/201812100f315c6727a4418698d5b4e8a79863d4) ([#13328](https://github.com/yt-dlp/yt-dlp/issues/13328)) by [bashonly](https://github.com/bashonly)
+- **cleanup**: Miscellaneous: [339614a](https://github.com/yt-dlp/yt-dlp/commit/339614a173c74b42d63e858c446a9cae262a13af) by [bashonly](https://github.com/bashonly)
+- **test**: postprocessors: [Remove binary thumbnail test data](https://github.com/yt-dlp/yt-dlp/commit/a9b370069838e84d44ac7ad095d657003665885a) ([#13341](https://github.com/yt-dlp/yt-dlp/issues/13341)) by [bashonly](https://github.com/bashonly)
+
### 2025.05.22
#### Core changes
diff --git a/README.md b/README.md
index 6e2dc6243..0f9a7d556 100644
--- a/README.md
+++ b/README.md
@@ -1795,9 +1795,9 @@ # EXTRACTOR ARGUMENTS
The following extractors use this feature:
#### youtube
-* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
+* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube/_base.py](https://github.com/yt-dlp/yt-dlp/blob/415b4c9f955b1a0391204bd24a7132590e7b3bdb/yt_dlp/extractor/youtube/_base.py#L402-L409) for the list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
-* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
+* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv`, `tv_simply` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `player_js_variant`: The player javascript variant to use for signature and nsig deciphering. The known variants are: `main`, `tce`, `tv`, `tv_es6`, `phone`, `tablet`. Only `main` is recommended as a possible workaround; the others are for debugging purposes. The default is to use what is prescribed by the site, and can be selected with `actual`
diff --git a/supportedsites.md b/supportedsites.md
index c2d7b4555..1fe381603 100644
--- a/supportedsites.md
+++ b/supportedsites.md
@@ -5,6 +5,8 @@ # Supported sites
Not all sites listed here are guaranteed to work; websites are constantly changing and sometimes this breaks yt-dlp's support for them.
The only reliable way to check if a site is supported is to try it.
+ - **10play**: [*10play*](## "netrc machine")
+ - **10play:season**
- **17live**
- **17live:clip**
- **17live:vod**
@@ -295,7 +297,7 @@ # Supported sites
- **CNNIndonesia**
- **ComedyCentral**
- **ComedyCentralTV**
- - **ConanClassic**
+ - **ConanClassic**: (**Currently broken**)
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
- **CONtv**
- **CookingChannel**
@@ -317,7 +319,7 @@ # Supported sites
- **CtsNews**: 華視新聞
- **CTV**
- **CTVNews**
- - **cu.ntv.co.jp**: Nippon Television Network
+ - **cu.ntv.co.jp**: 日テレ無料TADA!
- **CultureUnplugged**
- **curiositystream**: [*curiositystream*](## "netrc machine")
- **curiositystream:collections**: [*curiositystream*](## "netrc machine")
@@ -882,19 +884,19 @@ # Supported sites
- **Naver**
- **Naver:live**
- **navernow**
- - **nba**
- - **nba:channel**
- - **nba:embed**
- - **nba:watch**
- - **nba:watch:collection**
- - **nba:watch:embed**
+ - **nba**: (**Currently broken**)
+ - **nba:channel**: (**Currently broken**)
+ - **nba:embed**: (**Currently broken**)
+ - **nba:watch**: (**Currently broken**)
+ - **nba:watch:collection**: (**Currently broken**)
+ - **nba:watch:embed**: (**Currently broken**)
- **NBC**
- **NBCNews**
- **nbcolympics**
- - **nbcolympics:stream**
- - **NBCSports**
- - **NBCSportsStream**
- - **NBCSportsVPlayer**
+ - **nbcolympics:stream**: (**Currently broken**)
+ - **NBCSports**: (**Currently broken**)
+ - **NBCSportsStream**: (**Currently broken**)
+ - **NBCSportsVPlayer**: (**Currently broken**)
- **NBCStations**
- **ndr**: NDR.de - Norddeutscher Rundfunk
- **ndr:embed**
@@ -970,7 +972,7 @@ # Supported sites
- **Nitter**
- **njoy**: N-JOY
- **njoy:embed**
- - **NobelPrize**: (**Currently broken**)
+ - **NobelPrize**
- **NoicePodcast**
- **NonkTube**
- **NoodleMagazine**
@@ -1393,14 +1395,14 @@ # Supported sites
- **SpreakerShow**
- **SpringboardPlatform**
- **SproutVideo**
- - **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**)
+ - **sr:mediathek**: Saarländischer Rundfunk
- **SRGSSR**
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
- **StacommuLive**: [*stacommu*](## "netrc machine")
- **StacommuVOD**: [*stacommu*](## "netrc machine")
- **StagePlusVODConcert**: [*stageplus*](## "netrc machine")
- **stanfordoc**: Stanford Open ClassRoom
- - **StarTrek**: (**Currently broken**)
+ - **startrek**: STAR TREK
- **startv**
- **Steam**
- **SteamCommunityBroadcast**
@@ -1423,12 +1425,11 @@ # Supported sites
- **SunPorno**
- **sverigesradio:episode**
- **sverigesradio:publication**
- - **SVT**
- - **SVTPage**
- - **SVTPlay**: SVT Play and Öppet arkiv
- - **SVTSeries**
+ - **svt:page**
+ - **svt:play**: SVT Play and Öppet arkiv
+ - **svt:play:series**
- **SwearnetEpisode**
- - **Syfy**: (**Currently broken**)
+ - **Syfy**
- **SYVDK**
- **SztvHu**
- **t-online.de**: (**Currently broken**)
@@ -1472,8 +1473,6 @@ # Supported sites
- **Telewebion**: (**Currently broken**)
- **Tempo**
- **TennisTV**: [*tennistv*](## "netrc machine")
- - **TenPlay**: [*10play*](## "netrc machine")
- - **TenPlaySeason**
- **TF1**
- **TFO**
- **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine")
@@ -1511,6 +1510,7 @@ # Supported sites
- **tokfm:podcast**
- **ToonGoggles**
- **tou.tv**: [*toutv*](## "netrc machine")
+ - **toutiao**: 今日头条
- **Toypics**: Toypics video (**Currently broken**)
- **ToypicsUser**: Toypics user profile (**Currently broken**)
- **TrailerAddict**: (**Currently broken**)
@@ -1600,7 +1600,7 @@ # Supported sites
- **UKTVPlay**
- **UlizaPlayer**
- **UlizaPortal**: ulizaportal.jp
- - **umg:de**: Universal Music Deutschland (**Currently broken**)
+ - **umg:de**: Universal Music Deutschland
- **Unistra**
- **Unity**: (**Currently broken**)
- **uol.com.br**
@@ -1623,9 +1623,9 @@ # Supported sites
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
- **vh1.com**
- **vhx:embed**: [*vimeo*](## "netrc machine")
- - **vice**
- - **vice:article**
- - **vice:show**
+ - **vice**: (**Currently broken**)
+ - **vice:article**: (**Currently broken**)
+ - **vice:show**: (**Currently broken**)
- **Viddler**
- **Videa**
- **video.arnes.si**: Arnes Video
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index bc89b2955..e6c8d574e 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -1947,6 +1947,137 @@ def test_search_nextjs_data(self):
with self.assertWarns(DeprecationWarning):
self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
+ def test_search_nuxt_json(self):
+ HTML_TMPL = ''
+ VALID_DATA = '''
+ ["ShallowReactive",1],
+ {"data":2,"state":21,"once":25,"_errors":28,"_server_errors":30},
+ ["ShallowReactive",3],
+ {"$abcdef123456":4},
+ {"podcast":5,"activeEpisodeData":7},
+ {"podcast":6,"seasons":14},
+ {"title":10,"id":11},
+ ["Reactive",8],
+ {"episode":9,"creators":18,"empty_list":20},
+ {"title":12,"id":13,"refs":34,"empty_refs":35},
+ "Series Title",
+ "podcast-id-01",
+ "Episode Title",
+ "episode-id-99",
+ [15,16,17],
+ 1,
+ 2,
+ 3,
+ [19],
+ "Podcast Creator",
+ [],
+ {"$ssite-config":22},
+ {"env":23,"name":24,"map":26,"numbers":14},
+ "production",
+ "podcast-website",
+ ["Set"],
+ ["Reactive",27],
+ ["Map"],
+ ["ShallowReactive",29],
+ {},
+ ["NuxtError",31],
+ {"status":32,"message":33},
+ 503,
+ "Service Unavailable",
+ [36,37],
+ [38,39],
+ ["Ref",40],
+ ["ShallowRef",41],
+ ["EmptyRef",42],
+ ["EmptyShallowRef",43],
+ "ref",
+ "shallow_ref",
+ "{\\"ref\\":1}",
+ "{\\"shallow_ref\\":2}"
+ '''
+ PAYLOAD = {
+ 'data': {
+ '$abcdef123456': {
+ 'podcast': {
+ 'podcast': {
+ 'title': 'Series Title',
+ 'id': 'podcast-id-01',
+ },
+ 'seasons': [1, 2, 3],
+ },
+ 'activeEpisodeData': {
+ 'episode': {
+ 'title': 'Episode Title',
+ 'id': 'episode-id-99',
+ 'refs': ['ref', 'shallow_ref'],
+ 'empty_refs': [{'ref': 1}, {'shallow_ref': 2}],
+ },
+ 'creators': ['Podcast Creator'],
+ 'empty_list': [],
+ },
+ },
+ },
+ 'state': {
+ '$ssite-config': {
+ 'env': 'production',
+ 'name': 'podcast-website',
+ 'map': [],
+ 'numbers': [1, 2, 3],
+ },
+ },
+ 'once': [],
+ '_errors': {},
+ '_server_errors': {
+ 'status': 503,
+ 'message': 'Service Unavailable',
+ },
+ }
+ PARTIALLY_INVALID = [(
+ '''
+ {"data":1},
+ {"invalid_raw_list":2},
+ [15,16,17]
+ ''',
+ {'data': {'invalid_raw_list': [None, None, None]}},
+ ), (
+ '''
+ {"data":1},
+ ["EmptyRef",2],
+ "not valid JSON"
+ ''',
+ {'data': None},
+ ), (
+ '''
+ {"data":1},
+ ["EmptyShallowRef",2],
+ "not valid JSON"
+ ''',
+ {'data': None},
+ )]
+ INVALID = [
+ '''
+ []
+ ''',
+ '''
+ ["unsupported",1],
+ {"data":2},
+ {}
+ ''',
+ ]
+ DEFAULT = object()
+
+ self.assertEqual(self.ie._search_nuxt_json(HTML_TMPL.format(VALID_DATA), None), PAYLOAD)
+ self.assertEqual(self.ie._search_nuxt_json('', None, fatal=False), {})
+ self.assertIs(self.ie._search_nuxt_json('', None, default=DEFAULT), DEFAULT)
+
+ for data, expected in PARTIALLY_INVALID:
+ self.assertEqual(
+ self.ie._search_nuxt_json(HTML_TMPL.format(data), None, fatal=False), expected)
+
+ for data in INVALID:
+ self.assertIs(
+ self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_devalue.py b/test/test_devalue.py
new file mode 100644
index 000000000..29eb89e87
--- /dev/null
+++ b/test/test_devalue.py
@@ -0,0 +1,235 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import datetime as dt
+import json
+import math
+import re
+import unittest
+
+from yt_dlp.utils.jslib import devalue
+
+
+TEST_CASES_EQUALS = [{
+ 'name': 'int',
+ 'unparsed': [-42],
+ 'parsed': -42,
+}, {
+ 'name': 'str',
+ 'unparsed': ['woo!!!'],
+ 'parsed': 'woo!!!',
+}, {
+ 'name': 'Number',
+ 'unparsed': [['Object', 42]],
+ 'parsed': 42,
+}, {
+ 'name': 'String',
+ 'unparsed': [['Object', 'yar']],
+ 'parsed': 'yar',
+}, {
+ 'name': 'Infinity',
+ 'unparsed': -4,
+ 'parsed': math.inf,
+}, {
+ 'name': 'negative Infinity',
+ 'unparsed': -5,
+ 'parsed': -math.inf,
+}, {
+ 'name': 'negative zero',
+ 'unparsed': -6,
+ 'parsed': -0.0,
+}, {
+ 'name': 'RegExp',
+ 'unparsed': [['RegExp', 'regexp', 'gim']], # XXX: flags are ignored
+ 'parsed': re.compile('regexp'),
+}, {
+ 'name': 'Date',
+ 'unparsed': [['Date', '2001-09-09T01:46:40.000Z']],
+ 'parsed': dt.datetime.fromtimestamp(1e9, tz=dt.timezone.utc),
+}, {
+ 'name': 'Array',
+ 'unparsed': [[1, 2, 3], 'a', 'b', 'c'],
+ 'parsed': ['a', 'b', 'c'],
+}, {
+ 'name': 'Array (empty)',
+ 'unparsed': [[]],
+ 'parsed': [],
+}, {
+ 'name': 'Array (sparse)',
+ 'unparsed': [[-2, 1, -2], 'b'],
+ 'parsed': [None, 'b', None],
+}, {
+ 'name': 'Object',
+ 'unparsed': [{'foo': 1, 'x-y': 2}, 'bar', 'z'],
+ 'parsed': {'foo': 'bar', 'x-y': 'z'},
+}, {
+ 'name': 'Set',
+ 'unparsed': [['Set', 1, 2, 3], 1, 2, 3],
+ 'parsed': [1, 2, 3],
+}, {
+ 'name': 'Map',
+ 'unparsed': [['Map', 1, 2], 'a', 'b'],
+ 'parsed': [['a', 'b']],
+}, {
+ 'name': 'BigInt',
+ 'unparsed': [['BigInt', '1']],
+ 'parsed': 1,
+}, {
+ 'name': 'Uint8Array',
+ 'unparsed': [['Uint8Array', 'AQID']],
+ 'parsed': [1, 2, 3],
+}, {
+ 'name': 'ArrayBuffer',
+ 'unparsed': [['ArrayBuffer', 'AQID']],
+ 'parsed': [1, 2, 3],
+}, {
+ 'name': 'str (repetition)',
+ 'unparsed': [[1, 1], 'a string'],
+ 'parsed': ['a string', 'a string'],
+}, {
+ 'name': 'None (repetition)',
+ 'unparsed': [[1, 1], None],
+ 'parsed': [None, None],
+}, {
+ 'name': 'dict (repetition)',
+ 'unparsed': [[1, 1], {}],
+ 'parsed': [{}, {}],
+}, {
+ 'name': 'Object without prototype',
+ 'unparsed': [['null']],
+ 'parsed': {},
+}, {
+ 'name': 'cross-realm POJO',
+ 'unparsed': [{}],
+ 'parsed': {},
+}]
+
+TEST_CASES_IS = [{
+ 'name': 'bool',
+ 'unparsed': [True],
+ 'parsed': True,
+}, {
+ 'name': 'Boolean',
+ 'unparsed': [['Object', False]],
+ 'parsed': False,
+}, {
+ 'name': 'undefined',
+ 'unparsed': -1,
+ 'parsed': None,
+}, {
+ 'name': 'null',
+ 'unparsed': [None],
+ 'parsed': None,
+}, {
+ 'name': 'NaN',
+ 'unparsed': -3,
+ 'parsed': math.nan,
+}]
+
+TEST_CASES_INVALID = [{
+ 'name': 'empty string',
+ 'unparsed': '',
+ 'error': ValueError,
+ 'pattern': r'expected int or list as input',
+}, {
+ 'name': 'hole',
+ 'unparsed': -2,
+ 'error': ValueError,
+ 'pattern': r'invalid integer input',
+}, {
+ 'name': 'string',
+ 'unparsed': 'hello',
+ 'error': ValueError,
+ 'pattern': r'expected int or list as input',
+}, {
+ 'name': 'number',
+ 'unparsed': 42,
+ 'error': ValueError,
+ 'pattern': r'invalid integer input',
+}, {
+ 'name': 'boolean',
+ 'unparsed': True,
+ 'error': ValueError,
+ 'pattern': r'expected int or list as input',
+}, {
+ 'name': 'null',
+ 'unparsed': None,
+ 'error': ValueError,
+ 'pattern': r'expected int or list as input',
+}, {
+ 'name': 'object',
+ 'unparsed': {},
+ 'error': ValueError,
+ 'pattern': r'expected int or list as input',
+}, {
+ 'name': 'empty array',
+ 'unparsed': [],
+ 'error': ValueError,
+ 'pattern': r'expected a non-empty list as input',
+}, {
+ 'name': 'Python negative indexing',
+ 'unparsed': [[1, 2, 3, 4, 5, 6, 7, -7], 1, 2, 3, 4, 5, 6, 7],
+ 'error': IndexError,
+ 'pattern': r'invalid index: -7',
+}]
+
+
+class TestDevalue(unittest.TestCase):
+ def test_devalue_parse_equals(self):
+ for tc in TEST_CASES_EQUALS:
+ self.assertEqual(devalue.parse(tc['unparsed']), tc['parsed'], tc['name'])
+
+ def test_devalue_parse_is(self):
+ for tc in TEST_CASES_IS:
+ self.assertIs(devalue.parse(tc['unparsed']), tc['parsed'], tc['name'])
+
+ def test_devalue_parse_invalid(self):
+ for tc in TEST_CASES_INVALID:
+ with self.assertRaisesRegex(tc['error'], tc['pattern'], msg=tc['name']):
+ devalue.parse(tc['unparsed'])
+
+ def test_devalue_parse_cyclical(self):
+ name = 'Map (cyclical)'
+ result = devalue.parse([['Map', 1, 0], 'self'])
+ self.assertEqual(result[0][0], 'self', name)
+ self.assertIs(result, result[0][1], name)
+
+ name = 'Set (cyclical)'
+ result = devalue.parse([['Set', 0, 1], 42])
+ self.assertEqual(result[1], 42, name)
+ self.assertIs(result, result[0], name)
+
+ result = devalue.parse([[0]])
+ self.assertIs(result, result[0], 'Array (cyclical)')
+
+ name = 'Object (cyclical)'
+ result = devalue.parse([{'self': 0}])
+ self.assertIs(result, result['self'], name)
+
+ name = 'Object with null prototype (cyclical)'
+ result = devalue.parse([['null', 'self', 0]])
+ self.assertIs(result, result['self'], name)
+
+ name = 'Objects (cyclical)'
+ result = devalue.parse([[1, 2], {'second': 2}, {'first': 1}])
+ self.assertIs(result[0], result[1]['first'], name)
+ self.assertIs(result[1], result[0]['second'], name)
+
+ def test_devalue_parse_revivers(self):
+ self.assertEqual(
+ devalue.parse([['indirect', 1], {'a': 2}, 'b'], revivers={'indirect': lambda x: x}),
+ {'a': 'b'}, 'revivers (indirect)')
+
+ self.assertEqual(
+ devalue.parse([['parse', 1], '{"a":0}'], revivers={'parse': lambda x: json.loads(x)}),
+ {'a': 0}, 'revivers (parse)')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_pot/test_pot_builtin_utils.py b/test/test_pot/test_pot_builtin_utils.py
index a95fc4e15..7645ba601 100644
--- a/test/test_pot/test_pot_builtin_utils.py
+++ b/test/test_pot/test_pot_builtin_utils.py
@@ -11,7 +11,7 @@ class TestGetWebPoContentBinding:
@pytest.mark.parametrize('client_name, context, is_authenticated, expected', [
*[(client, context, is_authenticated, expected) for client in [
- 'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER']
+ 'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'TVHTML5_SIMPLY']
for context, is_authenticated, expected in [
(PoTokenContext.GVS, False, ('example-visitor-data', ContentBindingType.VISITOR_DATA)),
(PoTokenContext.PLAYER, False, ('example-video-id', ContentBindingType.VIDEO_ID)),
diff --git a/test/test_pot/test_pot_builtin_webpospec.py b/test/test_pot/test_pot_builtin_webpospec.py
index c5fb6f382..078008415 100644
--- a/test/test_pot/test_pot_builtin_webpospec.py
+++ b/test/test_pot/test_pot_builtin_webpospec.py
@@ -49,7 +49,7 @@ def test_not_supports(self, ie, logger, pot_request, client_name, context, is_au
@pytest.mark.parametrize('client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected', [
*[(client, context, is_authenticated, remote_host, source_address, request_proxy, expected) for client in [
- 'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER']
+ 'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'TVHTML5_SIMPLY']
for context, is_authenticated, remote_host, source_address, request_proxy, expected in [
(PoTokenContext.GVS, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}),
(PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'video_id'}),
diff --git a/test/test_traversal.py b/test/test_traversal.py
index bc433029d..52215f5a7 100644
--- a/test/test_traversal.py
+++ b/test/test_traversal.py
@@ -416,18 +416,8 @@ def test_traversal_unbranching(self):
'`any` should allow further branching'
def test_traversal_morsel(self):
- values = {
- 'expires': 'a',
- 'path': 'b',
- 'comment': 'c',
- 'domain': 'd',
- 'max-age': 'e',
- 'secure': 'f',
- 'httponly': 'g',
- 'version': 'h',
- 'samesite': 'i',
- }
morsel = http.cookies.Morsel()
+ values = dict(zip(morsel, 'abcdefghijklmnop'))
morsel.set('item_key', 'item_value', 'coded_value')
morsel.update(values)
values['key'] = 'item_key'
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 3f777aed7..3336b6bff 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -320,6 +320,14 @@
'https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js',
'D3XWVpYgwhLLKNK4AGX', 'aZrQ1qWJ5yv5h',
),
+ (
+ 'https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js',
+ 'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
+ ),
+ (
+ 'https://www.youtube.com/s/player/fc2a56a5/tv-player-ias.vflset/tv-player-ias.js',
+ 'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
+ ),
]
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index ea6264a0d..309489672 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -490,7 +490,7 @@ class YoutubeDL:
The template is mapped on a dictionary with keys 'progress' and 'info'
retry_sleep_functions: Dictionary of functions that takes the number of attempts
as argument and returns the time to sleep in seconds.
- Allowed keys are 'http', 'fragment', 'file_access'
+ Allowed keys are 'http', 'fragment', 'file_access', 'extractor'
download_ranges: A callback function that gets called for every video with
the signature (info_dict, ydl) -> Iterable[Section].
Only the returned sections will be downloaded.
diff --git a/yt_dlp/extractor/aenetworks.py b/yt_dlp/extractor/aenetworks.py
index c6a1b1509..e5c922b41 100644
--- a/yt_dlp/extractor/aenetworks.py
+++ b/yt_dlp/extractor/aenetworks.py
@@ -1,3 +1,5 @@
+import json
+
from .theplatform import ThePlatformIE
from ..utils import (
ExtractorError,
@@ -6,7 +8,6 @@
remove_start,
traverse_obj,
update_url_query,
- urlencode_postdata,
)
@@ -204,18 +205,19 @@ def _real_extract(self, url):
class AENetworksListBaseIE(AENetworksBaseIE):
def _call_api(self, resource, slug, brand, fields):
return self._download_json(
- 'https://yoga.appsvcs.aetnd.com/graphql',
- slug, query={'brand': brand}, data=urlencode_postdata({
+ 'https://yoga.appsvcs.aetnd.com/graphql', slug,
+ query={'brand': brand}, headers={'Content-Type': 'application/json'},
+ data=json.dumps({
'query': '''{
%s(slug: "%s") {
%s
}
}''' % (resource, slug, fields), # noqa: UP031
- }))['data'][resource]
+ }).encode())['data'][resource]
def _real_extract(self, url):
domain, slug = self._match_valid_url(url).groups()
- _, brand = self._DOMAIN_MAP[domain]
+ _, brand, _ = self._DOMAIN_MAP[domain]
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
base_url = f'http://watch.{domain}'
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index 6508942a4..43c9000ce 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -816,6 +816,26 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
'upload_date': '20111104',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
+ }, {
+ 'note': 'new playurlSSRData scheme',
+ 'url': 'https://www.bilibili.com/bangumi/play/ep678060',
+ 'info_dict': {
+ 'id': '678060',
+ 'ext': 'mp4',
+ 'series': '去你家吃饭好吗',
+ 'series_id': '6198',
+ 'season': '第二季',
+ 'season_id': '42542',
+ 'season_number': 2,
+ 'episode': '吴老二:你家大公鸡养不熟,能煮熟吗…',
+ 'episode_id': '678060',
+ 'episode_number': 61,
+ 'title': '一只小九九丫 吴老二:你家大公鸡养不熟,能煮熟吗…',
+ 'duration': 266.123,
+ 'timestamp': 1663315904,
+ 'upload_date': '20220916',
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ },
}, {
'url': 'https://www.bilibili.com/bangumi/play/ep267851',
'info_dict': {
@@ -879,12 +899,26 @@ def _real_extract(self, url):
'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id},
headers=headers))
+ geo_blocked = traverse_obj(play_info, (
+ 'raw', 'data', 'plugins', lambda _, v: v['name'] == 'AreaLimitPanel', 'config', 'is_block', {bool}, any))
premium_only = play_info.get('code') == -10403
- play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
- formats = self.extract_formats(play_info)
- if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
- self.raise_login_required('This video is for premium members only')
+ video_info = traverse_obj(play_info, (('result', ('raw', 'data')), 'video_info', {dict}, any)) or {}
+ formats = self.extract_formats(video_info)
+
+ if not formats:
+ if geo_blocked:
+ self.raise_geo_restricted()
+ elif premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage:
+ self.raise_login_required('This video is for premium members only')
+
+ if traverse_obj(play_info, ((
+ ('result', 'play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE'
+ ('raw', 'data', 'play_video_type'), # 'preview' vs 'whole'
+ ), any, {lambda x: x in ('PLAY_PREVIEW', 'preview')})):
+ self.report_warning(
+ 'Only preview format is available, '
+ f'you have to become a premium member to access full video. {self._login_hint()}')
bangumi_info = self._download_json(
'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
@@ -922,7 +956,7 @@ def _real_extract(self, url):
'season': str_or_none(season_title),
'season_id': str_or_none(season_id),
'season_number': season_number,
- 'duration': float_or_none(play_info.get('timelength'), scale=1000),
+ 'duration': float_or_none(video_info.get('timelength'), scale=1000),
'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
'__post_extractor': self.extract_comments(aid),
'http_headers': {'Referer': url},
diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py
index d4ac7a0c2..c0f2f8b57 100644
--- a/yt_dlp/extractor/brightcove.py
+++ b/yt_dlp/extractor/brightcove.py
@@ -495,8 +495,6 @@ def _real_extract(self, url):
class BrightcoveNewBaseIE(AdobePassIE):
def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
- title = json_data['name'].strip()
-
formats, subtitles = [], {}
sources = json_data.get('sources') or []
for source in sources:
@@ -600,16 +598,18 @@ def build_format_id(kind):
return {
'id': video_id,
- 'title': title,
- 'description': clean_html(json_data.get('description')),
'thumbnails': thumbnails,
'duration': duration,
- 'timestamp': parse_iso8601(json_data.get('published_at')),
- 'uploader_id': json_data.get('account_id'),
'formats': formats,
'subtitles': subtitles,
- 'tags': json_data.get('tags', []),
'is_live': is_live,
+ **traverse_obj(json_data, {
+ 'title': ('name', {clean_html}),
+ 'description': ('description', {clean_html}),
+ 'tags': ('tags', ..., {str}, filter, all, filter),
+ 'timestamp': ('published_at', {parse_iso8601}),
+ 'uploader_id': ('account_id', {str}),
+ }),
}
@@ -645,10 +645,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
'uploader_id': '4036320279001',
'formats': 'mincount:39',
},
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
+ 'skip': '404 Not Found',
}, {
# playlist stream
'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001',
@@ -709,7 +706,6 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
'ext': 'mp4',
'title': 'TGD_01-032_5',
'thumbnail': r're:^https?://.*\.jpg$',
- 'tags': [],
'timestamp': 1646078943,
'uploader_id': '1569565978001',
'upload_date': '20220228',
@@ -721,7 +717,6 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
'ext': 'mp4',
'title': 'TGD 01-087 (Airs 05.25.22)_Segment 5',
'thumbnail': r're:^https?://.*\.jpg$',
- 'tags': [],
'timestamp': 1651604591,
'uploader_id': '1569565978001',
'upload_date': '20220503',
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 1174bd4f5..6058f66ae 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -101,6 +101,7 @@
xpath_with_ns,
)
from ..utils._utils import _request_dump_filename
+from ..utils.jslib import devalue
class InfoExtractor:
@@ -1795,6 +1796,63 @@ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal
ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
return traverse_obj(ret, traverse) or {}
+ def _resolve_nuxt_array(self, array, video_id, *, fatal=True, default=NO_DEFAULT):
+ """Resolves Nuxt rich JSON payload arrays"""
+ # Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57
+ # https://github.com/nuxt/nuxt/pull/19205
+ if default is not NO_DEFAULT:
+ fatal = False
+
+ if not isinstance(array, list) or not array:
+ error_msg = 'Unable to resolve Nuxt JSON data: invalid input'
+ if fatal:
+ raise ExtractorError(error_msg, video_id=video_id)
+ elif default is NO_DEFAULT:
+ self.report_warning(error_msg, video_id=video_id)
+ return {} if default is NO_DEFAULT else default
+
+ def indirect_reviver(data):
+ return data
+
+ def json_reviver(data):
+ return json.loads(data)
+
+ gen = devalue.parse_iter(array, revivers={
+ 'NuxtError': indirect_reviver,
+ 'EmptyShallowRef': json_reviver,
+ 'EmptyRef': json_reviver,
+ 'ShallowRef': indirect_reviver,
+ 'ShallowReactive': indirect_reviver,
+ 'Ref': indirect_reviver,
+ 'Reactive': indirect_reviver,
+ })
+
+ while True:
+ try:
+ error_msg = f'Error resolving Nuxt JSON: {gen.send(None)}'
+ if fatal:
+ raise ExtractorError(error_msg, video_id=video_id)
+ elif default is NO_DEFAULT:
+ self.report_warning(error_msg, video_id=video_id, only_once=True)
+ else:
+ self.write_debug(f'{video_id}: {error_msg}', only_once=True)
+ except StopIteration as error:
+ return error.value or ({} if default is NO_DEFAULT else default)
+
+ def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT):
+ """Parses metadata from Nuxt rich JSON payloads embedded in HTML"""
+ passed_default = default is not NO_DEFAULT
+
+ array = self._search_json(
+ r'