diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 5710f9a9e2..6aa52c5958 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -770,3 +770,8 @@ NeonMan
pj47x
troex
WouterGordts
+baierjan
+GeoffreyFrogeye
+Pawka
+v3DJG6GL
+yozel
diff --git a/Changelog.md b/Changelog.md
index 513724bf48..80b72da05a 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -4,6 +4,52 @@ # Changelog
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
-->
+### 2025.05.22
+
+#### Core changes
+- **cookies**: [Fix Linux desktop environment detection](https://github.com/yt-dlp/yt-dlp/commit/e491fd4d090db3af52a82863fb0553dd5e17fb85) ([#13197](https://github.com/yt-dlp/yt-dlp/issues/13197)) by [mbway](https://github.com/mbway)
+- **jsinterp**: [Fix increment/decrement evaluation](https://github.com/yt-dlp/yt-dlp/commit/167d7a9f0ffd1b4fe600193441bdb7358db2740b) ([#13238](https://github.com/yt-dlp/yt-dlp/issues/13238)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
+
+#### Extractor changes
+- **1tv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/41c0a1fb89628696f8bb88e2b9f3a68f355b8c26) ([#13168](https://github.com/yt-dlp/yt-dlp/issues/13168)) by [bashonly](https://github.com/bashonly)
+- **amcnetworks**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/464c84fedf78eef822a431361155f108b5df96d7) ([#13147](https://github.com/yt-dlp/yt-dlp/issues/13147)) by [bashonly](https://github.com/bashonly)
+- **bitchute**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1d0f6539c47e5d5c68c3c47cdb7075339e2885ac) ([#13081](https://github.com/yt-dlp/yt-dlp/issues/13081)) by [bashonly](https://github.com/bashonly)
+- **cartoonnetwork**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/7dbb47f84f0ee1266a3a01f58c9bc4c76d76794a) ([#13148](https://github.com/yt-dlp/yt-dlp/issues/13148)) by [bashonly](https://github.com/bashonly)
+- **iprima**: [Fix login support](https://github.com/yt-dlp/yt-dlp/commit/a7d9a5eb79ceeecb851389f3f2c88597871ca3f2) ([#12937](https://github.com/yt-dlp/yt-dlp/issues/12937)) by [baierjan](https://github.com/baierjan)
+- **jiosaavn**
+ - artist: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/586b557b124f954d3f625360ebe970989022ad97) ([#12803](https://github.com/yt-dlp/yt-dlp/issues/12803)) by [subrat-lima](https://github.com/subrat-lima)
+ - playlist, show: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/317f4b8006c2c0f0f64f095b1485163ad97c9053) ([#12803](https://github.com/yt-dlp/yt-dlp/issues/12803)) by [subrat-lima](https://github.com/subrat-lima)
+ - show: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6839276496d8814cf16f58b637e45663467928e6) ([#12803](https://github.com/yt-dlp/yt-dlp/issues/12803)) by [subrat-lima](https://github.com/subrat-lima)
+- **lrtradio**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/abf58dcd6a09e14eec4ea82ae12f79a0337cb383) ([#13200](https://github.com/yt-dlp/yt-dlp/issues/13200)) by [Pawka](https://github.com/Pawka)
+- **nebula**: [Support `--mark-watched`](https://github.com/yt-dlp/yt-dlp/commit/20f288bdc2173c7cc58d709d25ca193c1f6001e7) ([#13120](https://github.com/yt-dlp/yt-dlp/issues/13120)) by [GeoffreyFrogeye](https://github.com/GeoffreyFrogeye)
+- **niconico**
+ - [Fix error handling](https://github.com/yt-dlp/yt-dlp/commit/f569be4602c2a857087e495d5d7ed6060cd97abe) ([#13236](https://github.com/yt-dlp/yt-dlp/issues/13236)) by [bashonly](https://github.com/bashonly)
+ - live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7a7b85c9014d96421e18aa7ea5f4c1bee5ceece0) ([#13045](https://github.com/yt-dlp/yt-dlp/issues/13045)) by [doe1080](https://github.com/doe1080)
+- **nytimesarticle**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/b26bc32579c00ef579d75a835807ccc87d20ee0a) ([#13104](https://github.com/yt-dlp/yt-dlp/issues/13104)) by [bashonly](https://github.com/bashonly)
+- **once**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/f475e8b529d18efdad603ffda02a56e707fe0e2c) ([#13164](https://github.com/yt-dlp/yt-dlp/issues/13164)) by [bashonly](https://github.com/bashonly)
+- **picarto**: vod: [Support `/profile/` video URLs](https://github.com/yt-dlp/yt-dlp/commit/31e090cb787f3504ec25485adff9a2a51d056734) ([#13227](https://github.com/yt-dlp/yt-dlp/issues/13227)) by [subrat-lima](https://github.com/subrat-lima)
+- **playsuisse**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/d880e060803ae8ed5a047e578cca01e1f0e630ce) ([#12466](https://github.com/yt-dlp/yt-dlp/issues/12466)) by [v3DJG6GL](https://github.com/v3DJG6GL)
+- **sprout**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/cbcfe6378dde33a650e3852ab17ad4503b8e008d) ([#13149](https://github.com/yt-dlp/yt-dlp/issues/13149)) by [bashonly](https://github.com/bashonly)
+- **svtpage**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/ea8498ed534642dd7e925961b97b934987142fd3) ([#12957](https://github.com/yt-dlp/yt-dlp/issues/12957)) by [diman8](https://github.com/diman8)
+- **twitch**: [Support `--live-from-start`](https://github.com/yt-dlp/yt-dlp/commit/00b1bec55249cf2ad6271d36492c51b34b6459d1) ([#13202](https://github.com/yt-dlp/yt-dlp/issues/13202)) by [bashonly](https://github.com/bashonly)
+- **vimeo**: event: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/545c1a5b6f2fe88722b41aef0e7485bf3be3f3f9) ([#13216](https://github.com/yt-dlp/yt-dlp/issues/13216)) by [bashonly](https://github.com/bashonly)
+- **wat.tv**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/f123cc83b3aea45053f5fa1d9141048b01fc2774) ([#13111](https://github.com/yt-dlp/yt-dlp/issues/13111)) by [bashonly](https://github.com/bashonly)
+- **weverse**: [Fix live extraction](https://github.com/yt-dlp/yt-dlp/commit/5328eda8820cc5f21dcf917684d23fbdca41831d) ([#13084](https://github.com/yt-dlp/yt-dlp/issues/13084)) by [bashonly](https://github.com/bashonly)
+- **xinpianchang**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/83fabf352489d52843f67e6e9cc752db86d27e6e) ([#13245](https://github.com/yt-dlp/yt-dlp/issues/13245)) by [garret1317](https://github.com/garret1317)
+- **youtube**
+ - [Add PO token support for subtitles](https://github.com/yt-dlp/yt-dlp/commit/32ed5f107c6c641958d1cd2752e130de4db55a13) ([#13234](https://github.com/yt-dlp/yt-dlp/issues/13234)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz)
+ - [Add `web_embedded` client for age-restricted videos](https://github.com/yt-dlp/yt-dlp/commit/0feec6dc131f488428bf881519e7c69766fbb9ae) ([#13089](https://github.com/yt-dlp/yt-dlp/issues/13089)) by [bashonly](https://github.com/bashonly)
+ - [Add a PO Token Provider Framework](https://github.com/yt-dlp/yt-dlp/commit/2685654a37141cca63eda3a92da0e2706e23ccfd) ([#12840](https://github.com/yt-dlp/yt-dlp/issues/12840)) by [coletdjnz](https://github.com/coletdjnz)
+ - [Extract `media_type` for all videos](https://github.com/yt-dlp/yt-dlp/commit/ded11ebc9afba6ba33923375103e9be2d7c804e7) ([#13136](https://github.com/yt-dlp/yt-dlp/issues/13136)) by [bashonly](https://github.com/bashonly)
+ - [Fix `--live-from-start` support for premieres](https://github.com/yt-dlp/yt-dlp/commit/8f303afb43395be360cafd7ad4ce2b6e2eedfb8a) ([#13079](https://github.com/yt-dlp/yt-dlp/issues/13079)) by [arabcoders](https://github.com/arabcoders)
+ - [Fix geo-restriction error handling](https://github.com/yt-dlp/yt-dlp/commit/c7e575e31608c19c5b26c10a4229db89db5fc9a8) ([#13217](https://github.com/yt-dlp/yt-dlp/issues/13217)) by [yozel](https://github.com/yozel)
+
+#### Misc. changes
+- **build**
+ - [Bump PyInstaller to v6.13.0](https://github.com/yt-dlp/yt-dlp/commit/17cf9088d0d535e4a7feffbf02bd49cd9dae5ab9) ([#13082](https://github.com/yt-dlp/yt-dlp/issues/13082)) by [bashonly](https://github.com/bashonly)
+ - [Bump run-on-arch-action to v3](https://github.com/yt-dlp/yt-dlp/commit/9064d2482d1fe722bbb4a49731fe0711c410d1c8) ([#13088](https://github.com/yt-dlp/yt-dlp/issues/13088)) by [bashonly](https://github.com/bashonly)
+- **cleanup**: Miscellaneous: [7977b32](https://github.com/yt-dlp/yt-dlp/commit/7977b329ed97b216e37bd402f4935f28c00eac9e) by [bashonly](https://github.com/bashonly)
+
### 2025.04.30
#### Important changes
diff --git a/README.md b/README.md
index 0696884e83..fe013c56a0 100644
--- a/README.md
+++ b/README.md
@@ -44,6 +44,7 @@
* [Post-processing Options](#post-processing-options)
* [SponsorBlock Options](#sponsorblock-options)
* [Extractor Options](#extractor-options)
+ * [Preset Aliases](#preset-aliases)
* [CONFIGURATION](#configuration)
* [Configuration file encoding](#configuration-file-encoding)
* [Authentication with netrc](#authentication-with-netrc)
@@ -348,8 +349,8 @@ ## General Options:
--no-flat-playlist Fully extract the videos of a playlist
(default)
--live-from-start Download livestreams from the start.
- Currently only supported for YouTube
- (Experimental)
+ Currently experimental and only supported
+ for YouTube and Twitch
--no-live-from-start Download livestreams from the current time
(default)
--wait-for-video MIN[-MAX] Wait for scheduled streams to become
@@ -375,12 +376,12 @@ ## General Options:
an alias starts with a dash "-", it is
prefixed with "--". Arguments are parsed
according to the Python string formatting
- mini-language. E.g. --alias get-audio,-X
- "-S=aext:{0},abr -x --audio-format {0}"
- creates options "--get-audio" and "-X" that
- takes an argument (ARG0) and expands to
- "-S=aext:ARG0,abr -x --audio-format ARG0".
- All defined aliases are listed in the --help
+ mini-language. E.g. --alias get-audio,-X "-S
+ aext:{0},abr -x --audio-format {0}" creates
+ options "--get-audio" and "-X" that takes an
+ argument (ARG0) and expands to "-S
+ aext:ARG0,abr -x --audio-format ARG0". All
+ defined aliases are listed in the --help
output. Alias options can trigger more
aliases; so be careful to avoid defining
recursive options. As a safety measure, each
@@ -1108,6 +1109,10 @@ ## Extractor Options:
arguments for different extractors
## Preset Aliases:
+Predefined aliases for convenience and ease of use. Note that future
+ versions of yt-dlp may add or adjust presets, but the existing preset
+ names will not be changed or removed
+
-t mp3 -f 'ba[acodec^=mp3]/ba/b' -x --audio-format
mp3
@@ -1798,6 +1803,7 @@ #### youtube
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
+* `player_js_variant`: The player javascript variant to use for signature and nsig deciphering. The known variants are: `main`, `tce`, `tv`, `tv_es6`, `phone`, `tablet`. Only `main` is recommended as a possible workaround; the others are for debugging purposes. The default is to use what is prescribed by the site, and can be selected with `actual`
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
@@ -1807,8 +1813,12 @@ #### youtube
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
* `data_sync_id`: Overrides the account Data Sync ID used in Innertube API requests. This may be needed if you are using an account with `youtube:player_skip=webpage,configs` or `youtubetab:skip=webpage`
* `visitor_data`: Overrides the Visitor Data used in Innertube API requests. This should be used with `player_skip=webpage,configs` and without cookies. Note: this may have adverse effects if used improperly. If a session from a browser is wanted, you should pass cookies instead (which contain the Visitor ID)
-* `po_token`: Proof of Origin (PO) Token(s) to use. Comma seperated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be either `gvs` (Google Video Server URLs) or `player` (Innertube player request)
-* `player_js_variant`: The player javascript variant to use for signature and nsig deciphering. The known variants are: `main`, `tce`, `tv`, `tv_es6`, `phone`, `tablet`. Only `main` is recommended as a possible workaround; the others are for debugging purposes. The default is to use what is prescribed by the site, and can be selected with `actual`
+* `po_token`: Proof of Origin (PO) Token(s) to use. Comma seperated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be any of `gvs` (Google Video Server URLs), `player` (Innertube player request) or `subs` (Subtitles)
+* `pot_trace`: Enable debug logging for PO Token fetching. Either `true` or `false` (default)
+* `fetch_pot`: Policy to use for fetching a PO Token from providers. One of `always` (always try fetch a PO Token regardless if the client requires one for the given context), `never` (never fetch a PO Token), or `auto` (default; only fetch a PO Token if the client requires one for the given context)
+
+#### youtubepot-webpo
+* `bind_to_visitor_id`: Whether to use the Visitor ID instead of Visitor Data for caching WebPO tokens. Either `true` (default) or `false`
#### youtubetab (YouTube playlists, channels, feeds, etc.)
* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
diff --git a/supportedsites.md b/supportedsites.md
index 03bd8a7c39..c2d7b45556 100644
--- a/supportedsites.md
+++ b/supportedsites.md
@@ -246,7 +246,6 @@ # Supported sites
- **Canalplus**: mycanal.fr and piwiplus.fr
- **Canalsurmas**
- **CaracolTvPlay**: [*caracoltv-play*](## "netrc machine")
- - **CartoonNetwork**
- **cbc.ca**
- **cbc.ca:player**
- **cbc.ca:player:playlist**
@@ -649,7 +648,10 @@ # Supported sites
- **jiocinema**: [*jiocinema*](## "netrc machine")
- **jiocinema:series**: [*jiocinema*](## "netrc machine")
- **jiosaavn:album**
+ - **jiosaavn:artist**
- **jiosaavn:playlist**
+ - **jiosaavn:show**
+ - **jiosaavn:show:playlist**
- **jiosaavn:song**
- **Joj**
- **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)
@@ -1081,8 +1083,8 @@ # Supported sites
- **Photobucket**
- **PiaLive**
- **Piapro**: [*piapro*](## "netrc machine")
- - **Picarto**
- - **PicartoVod**
+ - **picarto**
+ - **picarto:vod**
- **Piksel**
- **Pinkbike**
- **Pinterest**
@@ -1390,7 +1392,6 @@ # Supported sites
- **Spreaker**
- **SpreakerShow**
- **SpringboardPlatform**
- - **Sprout**
- **SproutVideo**
- **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**)
- **SRGSSR**
@@ -1656,6 +1657,7 @@ # Supported sites
- **vimeo**: [*vimeo*](## "netrc machine")
- **vimeo:album**: [*vimeo*](## "netrc machine")
- **vimeo:channel**: [*vimeo*](## "netrc machine")
+ - **vimeo:event**: [*vimeo*](## "netrc machine")
- **vimeo:group**: [*vimeo*](## "netrc machine")
- **vimeo:likes**: [*vimeo*](## "netrc machine") Vimeo user likes
- **vimeo:ondemand**: [*vimeo*](## "netrc machine")
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 708a04f92d..91312e4e5f 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -1435,6 +1435,27 @@ def test_load_plugins_compat(self):
FakeYDL().close()
assert all_plugins_loaded.value
+ def test_close_hooks(self):
+ # Should call all registered close hooks on close
+ close_hook_called = False
+ close_hook_two_called = False
+
+ def close_hook():
+ nonlocal close_hook_called
+ close_hook_called = True
+
+ def close_hook_two():
+ nonlocal close_hook_two_called
+ close_hook_two_called = True
+
+ ydl = FakeYDL()
+ ydl.add_close_hook(close_hook)
+ ydl.add_close_hook(close_hook_two)
+
+ ydl.close()
+ self.assertTrue(close_hook_called, 'Close hook was not called')
+ self.assertTrue(close_hook_two_called, 'Close hook two was not called')
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_cookies.py b/test/test_cookies.py
index 4b9b9b5a91..f956ab1876 100644
--- a/test/test_cookies.py
+++ b/test/test_cookies.py
@@ -58,6 +58,14 @@ def test_get_desktop_environment(self):
({'DESKTOP_SESSION': 'kde'}, _LinuxDesktopEnvironment.KDE3),
({'DESKTOP_SESSION': 'xfce'}, _LinuxDesktopEnvironment.XFCE),
+ ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'gnome'}, _LinuxDesktopEnvironment.GNOME),
+ ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'mate'}, _LinuxDesktopEnvironment.GNOME),
+ ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4),
+ ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'kde'}, _LinuxDesktopEnvironment.KDE3),
+ ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'xfce'}, _LinuxDesktopEnvironment.XFCE),
+
+ ({'XDG_CURRENT_DESKTOP': 'my_custom_de', 'DESKTOP_SESSION': 'my_custom_de', 'GNOME_DESKTOP_SESSION_ID': 1}, _LinuxDesktopEnvironment.GNOME),
+
({'GNOME_DESKTOP_SESSION_ID': 1}, _LinuxDesktopEnvironment.GNOME),
({'KDE_FULL_SESSION': 1}, _LinuxDesktopEnvironment.KDE3),
({'KDE_FULL_SESSION': 1, 'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4),
diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 7ef0c48a70..ef3c681701 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -478,6 +478,14 @@ def test_extract_function_with_global_stack(self):
func = jsi.extract_function('c', {'e': 10}, {'f': 100, 'g': 1000})
self.assertEqual(func([1]), 1111)
+ def test_increment_decrement(self):
+ self._test('function f() { var x = 1; return ++x; }', 2)
+ self._test('function f() { var x = 1; return x++; }', 1)
+ self._test('function f() { var x = 1; x--; return x }', 0)
+ self._test('function f() { var y; var x = 1; x++, --x, x--, x--, y="z", "abc", x++; return --x }', -1)
+ self._test('function f() { var a = "test--"; return a; }', 'test--')
+ self._test('function f() { var b = 1; var a = "b--"; return a; }', 'b--')
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_networking_utils.py b/test/test_networking_utils.py
index 204fe87bda..a2feacba71 100644
--- a/test/test_networking_utils.py
+++ b/test/test_networking_utils.py
@@ -20,7 +20,6 @@
add_accept_encoding_header,
get_redirect_method,
make_socks_proxy_opts,
- select_proxy,
ssl_load_certs,
)
from yt_dlp.networking.exceptions import (
@@ -28,7 +27,7 @@
IncompleteRead,
)
from yt_dlp.socks import ProxyType
-from yt_dlp.utils.networking import HTTPHeaderDict
+from yt_dlp.utils.networking import HTTPHeaderDict, select_proxy
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
diff --git a/test/test_pot/conftest.py b/test/test_pot/conftest.py
new file mode 100644
index 0000000000..ff0667e928
--- /dev/null
+++ b/test/test_pot/conftest.py
@@ -0,0 +1,71 @@
+import collections
+
+import pytest
+
+from yt_dlp import YoutubeDL
+from yt_dlp.cookies import YoutubeDLCookieJar
+from yt_dlp.extractor.common import InfoExtractor
+from yt_dlp.extractor.youtube.pot._provider import IEContentProviderLogger
+from yt_dlp.extractor.youtube.pot.provider import PoTokenRequest, PoTokenContext
+from yt_dlp.utils.networking import HTTPHeaderDict
+
+
+class MockLogger(IEContentProviderLogger):
+
+ log_level = IEContentProviderLogger.LogLevel.TRACE
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.messages = collections.defaultdict(list)
+
+ def trace(self, message: str):
+ self.messages['trace'].append(message)
+
+ def debug(self, message: str):
+ self.messages['debug'].append(message)
+
+ def info(self, message: str):
+ self.messages['info'].append(message)
+
+ def warning(self, message: str, *, once=False):
+ self.messages['warning'].append(message)
+
+ def error(self, message: str):
+ self.messages['error'].append(message)
+
+
+@pytest.fixture
+def ie() -> InfoExtractor:
+ ydl = YoutubeDL()
+ return ydl.get_info_extractor('Youtube')
+
+
+@pytest.fixture
+def logger() -> MockLogger:
+ return MockLogger()
+
+
+@pytest.fixture()
+def pot_request() -> PoTokenRequest:
+ return PoTokenRequest(
+ context=PoTokenContext.GVS,
+ innertube_context={'client': {'clientName': 'WEB'}},
+ innertube_host='youtube.com',
+ session_index=None,
+ player_url=None,
+ is_authenticated=False,
+ video_webpage=None,
+
+ visitor_data='example-visitor-data',
+ data_sync_id='example-data-sync-id',
+ video_id='example-video-id',
+
+ request_cookiejar=YoutubeDLCookieJar(),
+ request_proxy=None,
+ request_headers=HTTPHeaderDict(),
+ request_timeout=None,
+ request_source_address=None,
+ request_verify_tls=True,
+
+ bypass_cache=False,
+ )
diff --git a/test/test_pot/test_pot_builtin_memorycache.py b/test/test_pot/test_pot_builtin_memorycache.py
new file mode 100644
index 0000000000..ea19fbe29f
--- /dev/null
+++ b/test/test_pot/test_pot_builtin_memorycache.py
@@ -0,0 +1,117 @@
+import threading
+import time
+from collections import OrderedDict
+import pytest
+from yt_dlp.extractor.youtube.pot._provider import IEContentProvider, BuiltinIEContentProvider
+from yt_dlp.utils import bug_reports_message
+from yt_dlp.extractor.youtube.pot._builtin.memory_cache import MemoryLRUPCP, memorylru_preference, initialize_global_cache
+from yt_dlp.version import __version__
+from yt_dlp.extractor.youtube.pot._registry import _pot_cache_providers, _pot_memory_cache
+
+
+class TestMemoryLRUPCS:
+
+ def test_base_type(self):
+ assert issubclass(MemoryLRUPCP, IEContentProvider)
+ assert issubclass(MemoryLRUPCP, BuiltinIEContentProvider)
+
+ @pytest.fixture
+ def pcp(self, ie, logger) -> MemoryLRUPCP:
+ return MemoryLRUPCP(ie, logger, {}, initialize_cache=lambda max_size: (OrderedDict(), threading.Lock(), max_size))
+
+ def test_is_registered(self):
+ assert _pot_cache_providers.value.get('MemoryLRU') == MemoryLRUPCP
+
+ def test_initialization(self, pcp):
+ assert pcp.PROVIDER_NAME == 'memory'
+ assert pcp.PROVIDER_VERSION == __version__
+ assert pcp.BUG_REPORT_MESSAGE == bug_reports_message(before='')
+ assert pcp.is_available()
+
+ def test_store_and_get(self, pcp):
+ pcp.store('key1', 'value1', int(time.time()) + 60)
+ assert pcp.get('key1') == 'value1'
+ assert len(pcp.cache) == 1
+
+ def test_store_ignore_expired(self, pcp):
+ pcp.store('key1', 'value1', int(time.time()) - 1)
+ assert len(pcp.cache) == 0
+ assert pcp.get('key1') is None
+ assert len(pcp.cache) == 0
+
+ def test_store_override_existing_key(self, ie, logger):
+ MAX_SIZE = 2
+ pcp = MemoryLRUPCP(ie, logger, {}, initialize_cache=lambda max_size: (OrderedDict(), threading.Lock(), MAX_SIZE))
+ pcp.store('key1', 'value1', int(time.time()) + 60)
+ pcp.store('key2', 'value2', int(time.time()) + 60)
+ assert len(pcp.cache) == 2
+ pcp.store('key1', 'value2', int(time.time()) + 60)
+ # Ensure that the override key gets added to the end of the cache instead of in the same position
+ pcp.store('key3', 'value3', int(time.time()) + 60)
+ assert pcp.get('key1') == 'value2'
+
+ def test_store_ignore_expired_existing_key(self, pcp):
+ pcp.store('key1', 'value2', int(time.time()) + 60)
+ pcp.store('key1', 'value1', int(time.time()) - 1)
+ assert len(pcp.cache) == 1
+ assert pcp.get('key1') == 'value2'
+ assert len(pcp.cache) == 1
+
+ def test_get_key_expired(self, pcp):
+ pcp.store('key1', 'value1', int(time.time()) + 60)
+ assert pcp.get('key1') == 'value1'
+ assert len(pcp.cache) == 1
+ pcp.cache['key1'] = ('value1', int(time.time()) - 1)
+ assert pcp.get('key1') is None
+ assert len(pcp.cache) == 0
+
+ def test_lru_eviction(self, ie, logger):
+ MAX_SIZE = 2
+ provider = MemoryLRUPCP(ie, logger, {}, initialize_cache=lambda max_size: (OrderedDict(), threading.Lock(), MAX_SIZE))
+ provider.store('key1', 'value1', int(time.time()) + 5)
+ provider.store('key2', 'value2', int(time.time()) + 5)
+ assert len(provider.cache) == 2
+
+ assert provider.get('key1') == 'value1'
+
+ provider.store('key3', 'value3', int(time.time()) + 5)
+ assert len(provider.cache) == 2
+
+ assert provider.get('key2') is None
+
+ provider.store('key4', 'value4', int(time.time()) + 5)
+ assert len(provider.cache) == 2
+
+ assert provider.get('key1') is None
+ assert provider.get('key3') == 'value3'
+ assert provider.get('key4') == 'value4'
+
+ def test_delete(self, pcp):
+ pcp.store('key1', 'value1', int(time.time()) + 5)
+ assert len(pcp.cache) == 1
+ assert pcp.get('key1') == 'value1'
+ pcp.delete('key1')
+ assert len(pcp.cache) == 0
+ assert pcp.get('key1') is None
+
+ def test_use_global_cache_default(self, ie, logger):
+ pcp = MemoryLRUPCP(ie, logger, {})
+ assert pcp.max_size == _pot_memory_cache.value['max_size'] == 25
+ assert pcp.cache is _pot_memory_cache.value['cache']
+ assert pcp.lock is _pot_memory_cache.value['lock']
+
+ pcp2 = MemoryLRUPCP(ie, logger, {})
+ assert pcp.max_size == pcp2.max_size == _pot_memory_cache.value['max_size'] == 25
+ assert pcp.cache is pcp2.cache is _pot_memory_cache.value['cache']
+ assert pcp.lock is pcp2.lock is _pot_memory_cache.value['lock']
+
+ def test_fail_max_size_change_global(self, ie, logger):
+ pcp = MemoryLRUPCP(ie, logger, {})
+ assert pcp.max_size == _pot_memory_cache.value['max_size'] == 25
+ with pytest.raises(ValueError, match='Cannot change max_size of initialized global memory cache'):
+ initialize_global_cache(50)
+
+ assert pcp.max_size == _pot_memory_cache.value['max_size'] == 25
+
+ def test_memory_lru_preference(self, pcp, ie, pot_request):
+ assert memorylru_preference(pcp, pot_request) == 10000
diff --git a/test/test_pot/test_pot_builtin_utils.py b/test/test_pot/test_pot_builtin_utils.py
new file mode 100644
index 0000000000..a95fc4e159
--- /dev/null
+++ b/test/test_pot/test_pot_builtin_utils.py
@@ -0,0 +1,47 @@
+import pytest
+from yt_dlp.extractor.youtube.pot.provider import (
+ PoTokenContext,
+
+)
+
+from yt_dlp.extractor.youtube.pot.utils import get_webpo_content_binding, ContentBindingType
+
+
+class TestGetWebPoContentBinding:
+
+ @pytest.mark.parametrize('client_name, context, is_authenticated, expected', [
+ *[(client, context, is_authenticated, expected) for client in [
+ 'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER']
+ for context, is_authenticated, expected in [
+ (PoTokenContext.GVS, False, ('example-visitor-data', ContentBindingType.VISITOR_DATA)),
+ (PoTokenContext.PLAYER, False, ('example-video-id', ContentBindingType.VIDEO_ID)),
+ (PoTokenContext.SUBS, False, ('example-video-id', ContentBindingType.VIDEO_ID)),
+ (PoTokenContext.GVS, True, ('example-data-sync-id', ContentBindingType.DATASYNC_ID)),
+ ]],
+ ('WEB_REMIX', PoTokenContext.GVS, False, ('example-visitor-data', ContentBindingType.VISITOR_DATA)),
+ ('WEB_REMIX', PoTokenContext.PLAYER, False, ('example-visitor-data', ContentBindingType.VISITOR_DATA)),
+ ('ANDROID', PoTokenContext.GVS, False, (None, None)),
+ ('IOS', PoTokenContext.GVS, False, (None, None)),
+ ])
+ def test_get_webpo_content_binding(self, pot_request, client_name, context, is_authenticated, expected):
+ pot_request.innertube_context['client']['clientName'] = client_name
+ pot_request.context = context
+ pot_request.is_authenticated = is_authenticated
+ assert get_webpo_content_binding(pot_request) == expected
+
+ def test_extract_visitor_id(self, pot_request):
+ pot_request.visitor_data = 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D'
+ assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == ('123abcXYZ_-', ContentBindingType.VISITOR_ID)
+
+ def test_invalid_visitor_id(self, pot_request):
+ # visitor id not alphanumeric (i.e. protobuf extraction failed)
+ pot_request.visitor_data = 'CggxMjM0NTY3OCiA4s-qBg%3D%3D'
+ assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == (pot_request.visitor_data, ContentBindingType.VISITOR_DATA)
+
+ def test_no_visitor_id(self, pot_request):
+ pot_request.visitor_data = 'KIDiz6oG'
+ assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == (pot_request.visitor_data, ContentBindingType.VISITOR_DATA)
+
+ def test_invalid_base64(self, pot_request):
+ pot_request.visitor_data = 'invalid-base64'
+ assert get_webpo_content_binding(pot_request, bind_to_visitor_id=True) == (pot_request.visitor_data, ContentBindingType.VISITOR_DATA)
diff --git a/test/test_pot/test_pot_builtin_webpospec.py b/test/test_pot/test_pot_builtin_webpospec.py
new file mode 100644
index 0000000000..c5fb6f3820
--- /dev/null
+++ b/test/test_pot/test_pot_builtin_webpospec.py
@@ -0,0 +1,92 @@
+import pytest
+
+from yt_dlp.extractor.youtube.pot._provider import IEContentProvider, BuiltinIEContentProvider
+from yt_dlp.extractor.youtube.pot.cache import CacheProviderWritePolicy
+from yt_dlp.utils import bug_reports_message
+from yt_dlp.extractor.youtube.pot.provider import (
+ PoTokenRequest,
+ PoTokenContext,
+
+)
+from yt_dlp.version import __version__
+
+from yt_dlp.extractor.youtube.pot._builtin.webpo_cachespec import WebPoPCSP
+from yt_dlp.extractor.youtube.pot._registry import _pot_pcs_providers
+
+
+@pytest.fixture()
+def pot_request(pot_request) -> PoTokenRequest:
+ pot_request.visitor_data = 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D' # visitor_id=123abcXYZ_-
+ return pot_request
+
+
+class TestWebPoPCSP:
+ def test_base_type(self):
+ assert issubclass(WebPoPCSP, IEContentProvider)
+ assert issubclass(WebPoPCSP, BuiltinIEContentProvider)
+
+ def test_init(self, ie, logger):
+ pcs = WebPoPCSP(ie=ie, logger=logger, settings={})
+ assert pcs.PROVIDER_NAME == 'webpo'
+ assert pcs.PROVIDER_VERSION == __version__
+ assert pcs.BUG_REPORT_MESSAGE == bug_reports_message(before='')
+ assert pcs.is_available()
+
+ def test_is_registered(self):
+ assert _pot_pcs_providers.value.get('WebPo') == WebPoPCSP
+
+ @pytest.mark.parametrize('client_name, context, is_authenticated', [
+ ('ANDROID', PoTokenContext.GVS, False),
+ ('IOS', PoTokenContext.GVS, False),
+ ('IOS', PoTokenContext.PLAYER, False),
+ ])
+ def test_not_supports(self, ie, logger, pot_request, client_name, context, is_authenticated):
+ pcs = WebPoPCSP(ie=ie, logger=logger, settings={})
+ pot_request.innertube_context['client']['clientName'] = client_name
+ pot_request.context = context
+ pot_request.is_authenticated = is_authenticated
+ assert pcs.generate_cache_spec(pot_request) is None
+
+ @pytest.mark.parametrize('client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected', [
+ *[(client, context, is_authenticated, remote_host, source_address, request_proxy, expected) for client in [
+ 'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER']
+ for context, is_authenticated, remote_host, source_address, request_proxy, expected in [
+ (PoTokenContext.GVS, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}),
+ (PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'video_id'}),
+ (PoTokenContext.GVS, True, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': 'example-data-sync-id', 'cbt': 'datasync_id'}),
+ ]],
+ ('WEB_REMIX', PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}),
+ ('WEB', PoTokenContext.GVS, False, None, None, None, {'t': 'webpo', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id', 'ip': None, 'sa': None, 'px': None}),
+ ('TVHTML5', PoTokenContext.PLAYER, False, None, None, 'http://example.com', {'t': 'webpo', 'cb': '123abcXYZ_-', 'cbt': 'video_id', 'ip': None, 'sa': None, 'px': 'http://example.com'}),
+
+ ])
+ def test_generate_key_bindings(self, ie, logger, pot_request, client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected):
+ pcs = WebPoPCSP(ie=ie, logger=logger, settings={})
+ pot_request.innertube_context['client']['clientName'] = client_name
+ pot_request.context = context
+ pot_request.is_authenticated = is_authenticated
+ pot_request.innertube_context['client']['remoteHost'] = remote_host
+ pot_request.request_source_address = source_address
+ pot_request.request_proxy = request_proxy
+ pot_request.video_id = '123abcXYZ_-' # same as visitor id to test type
+
+ assert pcs.generate_cache_spec(pot_request).key_bindings == expected
+
+ def test_no_bind_visitor_id(self, ie, logger, pot_request):
+ # Should not bind to visitor id if setting is set to False
+ pcs = WebPoPCSP(ie=ie, logger=logger, settings={'bind_to_visitor_id': ['false']})
+ pot_request.innertube_context['client']['clientName'] = 'WEB'
+ pot_request.context = PoTokenContext.GVS
+ pot_request.is_authenticated = False
+ assert pcs.generate_cache_spec(pot_request).key_bindings == {'t': 'webpo', 'ip': None, 'sa': None, 'px': None, 'cb': 'CgsxMjNhYmNYWVpfLSiA4s%2DqBg%3D%3D', 'cbt': 'visitor_data'}
+
+ def test_default_ttl(self, ie, logger, pot_request):
+ pcs = WebPoPCSP(ie=ie, logger=logger, settings={})
+ assert pcs.generate_cache_spec(pot_request).default_ttl == 6 * 60 * 60 # should default to 6 hours
+
+ def test_write_policy(self, ie, logger, pot_request):
+ pcs = WebPoPCSP(ie=ie, logger=logger, settings={})
+ pot_request.context = PoTokenContext.GVS
+ assert pcs.generate_cache_spec(pot_request).write_policy == CacheProviderWritePolicy.WRITE_ALL
+ pot_request.context = PoTokenContext.PLAYER
+ assert pcs.generate_cache_spec(pot_request).write_policy == CacheProviderWritePolicy.WRITE_FIRST
diff --git a/test/test_pot/test_pot_director.py b/test/test_pot/test_pot_director.py
new file mode 100644
index 0000000000..bbfdd0e98e
--- /dev/null
+++ b/test/test_pot/test_pot_director.py
@@ -0,0 +1,1529 @@
+from __future__ import annotations
+import abc
+import base64
+import dataclasses
+import hashlib
+import json
+import time
+import pytest
+
+from yt_dlp.extractor.youtube.pot._provider import BuiltinIEContentProvider, IEContentProvider
+
+from yt_dlp.extractor.youtube.pot.provider import (
+ PoTokenRequest,
+ PoTokenContext,
+ PoTokenProviderError,
+ PoTokenProviderRejectedRequest,
+)
+from yt_dlp.extractor.youtube.pot._director import (
+ PoTokenCache,
+ validate_cache_spec,
+ clean_pot,
+ validate_response,
+ PoTokenRequestDirector,
+ provider_display_list,
+)
+
+from yt_dlp.extractor.youtube.pot.cache import (
+ PoTokenCacheSpec,
+ PoTokenCacheSpecProvider,
+ PoTokenCacheProvider,
+ CacheProviderWritePolicy,
+ PoTokenCacheProviderError,
+)
+
+
+from yt_dlp.extractor.youtube.pot.provider import (
+ PoTokenResponse,
+ PoTokenProvider,
+)
+
+
+class BaseMockPoTokenProvider(PoTokenProvider, abc.ABC):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.available_called_times = 0
+ self.request_called_times = 0
+ self.close_called = False
+
+ def is_available(self) -> bool:
+ self.available_called_times += 1
+ return True
+
+ def request_pot(self, *args, **kwargs):
+ self.request_called_times += 1
+ return super().request_pot(*args, **kwargs)
+
+ def close(self):
+ self.close_called = True
+ super().close()
+
+
+class ExamplePTP(BaseMockPoTokenProvider):
+ PROVIDER_NAME = 'example'
+ PROVIDER_VERSION = '0.0.1'
+ BUG_REPORT_LOCATION = 'https://example.com/issues'
+
+ _SUPPORTED_CLIENTS = ('WEB',)
+ _SUPPORTED_CONTEXTS = (PoTokenContext.GVS, )
+
+ def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
+ if request.data_sync_id == 'example':
+ return PoTokenResponse(request.video_id)
+ return PoTokenResponse(EXAMPLE_PO_TOKEN)
+
+
+def success_ptp(response: PoTokenResponse | None = None, key: str | None = None):
+ class SuccessPTP(BaseMockPoTokenProvider):
+ PROVIDER_NAME = 'success'
+ PROVIDER_VERSION = '0.0.1'
+ BUG_REPORT_LOCATION = 'https://success.example.com/issues'
+
+ _SUPPORTED_CLIENTS = ('WEB',)
+ _SUPPORTED_CONTEXTS = (PoTokenContext.GVS,)
+
+ def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
+ return response or PoTokenResponse(EXAMPLE_PO_TOKEN)
+
+ if key:
+ SuccessPTP.PROVIDER_KEY = key
+ return SuccessPTP
+
+
+@pytest.fixture
+def pot_provider(ie, logger):
+ return success_ptp()(ie=ie, logger=logger, settings={})
+
+
+class UnavailablePTP(BaseMockPoTokenProvider):
+ PROVIDER_NAME = 'unavailable'
+ BUG_REPORT_LOCATION = 'https://unavailable.example.com/issues'
+ _SUPPORTED_CLIENTS = None
+ _SUPPORTED_CONTEXTS = None
+
+ def is_available(self) -> bool:
+ super().is_available()
+ return False
+
+ def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
+ raise PoTokenProviderError('something went wrong')
+
+
+class UnsupportedPTP(BaseMockPoTokenProvider):
+ PROVIDER_NAME = 'unsupported'
+ BUG_REPORT_LOCATION = 'https://unsupported.example.com/issues'
+ _SUPPORTED_CLIENTS = None
+ _SUPPORTED_CONTEXTS = None
+
+ def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
+ raise PoTokenProviderRejectedRequest('unsupported request')
+
+
+class ErrorPTP(BaseMockPoTokenProvider):
+ PROVIDER_NAME = 'error'
+ BUG_REPORT_LOCATION = 'https://error.example.com/issues'
+ _SUPPORTED_CLIENTS = None
+ _SUPPORTED_CONTEXTS = None
+
+ def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
+ expected = request.video_id == 'expected'
+ raise PoTokenProviderError('an error occurred', expected=expected)
+
+
+class UnexpectedErrorPTP(BaseMockPoTokenProvider):
+ PROVIDER_NAME = 'unexpected_error'
+ BUG_REPORT_LOCATION = 'https://unexpected.example.com/issues'
+ _SUPPORTED_CLIENTS = None
+ _SUPPORTED_CONTEXTS = None
+
+ def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
+ raise ValueError('an unexpected error occurred')
+
+
+class InvalidPTP(BaseMockPoTokenProvider):
+ PROVIDER_NAME = 'invalid'
+ BUG_REPORT_LOCATION = 'https://invalid.example.com/issues'
+ _SUPPORTED_CLIENTS = None
+ _SUPPORTED_CONTEXTS = None
+
+ def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
+ if request.video_id == 'invalid_type':
+ return 'invalid-response'
+ else:
+ return PoTokenResponse('example-token?', expires_at='123')
+
+
+class BaseMockCacheSpecProvider(PoTokenCacheSpecProvider, abc.ABC):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.generate_called_times = 0
+ self.is_available_called_times = 0
+ self.close_called = False
+
+ def is_available(self) -> bool:
+ self.is_available_called_times += 1
+ return super().is_available()
+
+ def generate_cache_spec(self, request: PoTokenRequest):
+ self.generate_called_times += 1
+
+ def close(self):
+ self.close_called = True
+ super().close()
+
+
+class ExampleCacheSpecProviderPCSP(BaseMockCacheSpecProvider):
+
+ PROVIDER_NAME = 'example'
+ PROVIDER_VERSION = '0.0.1'
+ BUG_REPORT_LOCATION = 'https://example.com/issues'
+
+ def generate_cache_spec(self, request: PoTokenRequest):
+ super().generate_cache_spec(request)
+ return PoTokenCacheSpec(
+ key_bindings={'v': request.video_id, 'e': None},
+ default_ttl=60,
+ )
+
+
+class UnavailableCacheSpecProviderPCSP(BaseMockCacheSpecProvider):
+
+ PROVIDER_NAME = 'unavailable'
+ PROVIDER_VERSION = '0.0.1'
+
+ def is_available(self) -> bool:
+ super().is_available()
+ return False
+
+ def generate_cache_spec(self, request: PoTokenRequest):
+ super().generate_cache_spec(request)
+ return None
+
+
+class UnsupportedCacheSpecProviderPCSP(BaseMockCacheSpecProvider):
+
+ PROVIDER_NAME = 'unsupported'
+ PROVIDER_VERSION = '0.0.1'
+
+ def generate_cache_spec(self, request: PoTokenRequest):
+ super().generate_cache_spec(request)
+ return None
+
+
+class InvalidSpecCacheSpecProviderPCSP(BaseMockCacheSpecProvider):
+
+ PROVIDER_NAME = 'invalid'
+ PROVIDER_VERSION = '0.0.1'
+
+ def generate_cache_spec(self, request: PoTokenRequest):
+ super().generate_cache_spec(request)
+ return 'invalid-spec'
+
+
+class ErrorSpecCacheSpecProviderPCSP(BaseMockCacheSpecProvider):
+
+ PROVIDER_NAME = 'invalid'
+ PROVIDER_VERSION = '0.0.1'
+
+ def generate_cache_spec(self, request: PoTokenRequest):
+ super().generate_cache_spec(request)
+ raise ValueError('something went wrong')
+
+
+class BaseMockCacheProvider(PoTokenCacheProvider, abc.ABC):
+ BUG_REPORT_MESSAGE = 'example bug report message'
+
+ def __init__(self, *args, available=True, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.store_calls = 0
+ self.delete_calls = 0
+ self.get_calls = 0
+ self.available_called_times = 0
+ self.available = available
+
+ def is_available(self) -> bool:
+ self.available_called_times += 1
+ return self.available
+
+ def store(self, *args, **kwargs):
+ self.store_calls += 1
+
+ def delete(self, *args, **kwargs):
+ self.delete_calls += 1
+
+ def get(self, *args, **kwargs):
+ self.get_calls += 1
+
+ def close(self):
+ self.close_called = True
+ super().close()
+
+
+class ErrorPCP(BaseMockCacheProvider):
+ PROVIDER_NAME = 'error'
+
+ def store(self, *args, **kwargs):
+ super().store(*args, **kwargs)
+ raise PoTokenCacheProviderError('something went wrong')
+
+ def get(self, *args, **kwargs):
+ super().get(*args, **kwargs)
+ raise PoTokenCacheProviderError('something went wrong')
+
+
+class UnexpectedErrorPCP(BaseMockCacheProvider):
+ PROVIDER_NAME = 'unexpected_error'
+
+ def store(self, *args, **kwargs):
+ super().store(*args, **kwargs)
+ raise ValueError('something went wrong')
+
+ def get(self, *args, **kwargs):
+ super().get(*args, **kwargs)
+ raise ValueError('something went wrong')
+
+
+class MockMemoryPCP(BaseMockCacheProvider):
+ PROVIDER_NAME = 'memory'
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.cache = {}
+
+ def store(self, key, value, expires_at):
+ super().store(key, value, expires_at)
+ self.cache[key] = (value, expires_at)
+
+ def delete(self, key):
+ super().delete(key)
+ self.cache.pop(key, None)
+
+ def get(self, key):
+ super().get(key)
+ return self.cache.get(key, [None])[0]
+
+
+def create_memory_pcp(ie, logger, provider_key='memory', provider_name='memory', available=True):
+ cache = MockMemoryPCP(ie, logger, {}, available=available)
+ cache.PROVIDER_KEY = provider_key
+ cache.PROVIDER_NAME = provider_name
+ return cache
+
+
+@pytest.fixture
+def memorypcp(ie, logger) -> MockMemoryPCP:
+ return create_memory_pcp(ie, logger)
+
+
+@pytest.fixture
+def pot_cache(ie, logger):
+ class MockPoTokenCache(PoTokenCache):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.get_calls = 0
+ self.store_calls = 0
+ self.close_called = False
+
+ def get(self, *args, **kwargs):
+ self.get_calls += 1
+ return super().get(*args, **kwargs)
+
+ def store(self, *args, **kwargs):
+ self.store_calls += 1
+ return super().store(*args, **kwargs)
+
+ def close(self):
+ self.close_called = True
+ super().close()
+
+ return MockPoTokenCache(
+ cache_providers=[MockMemoryPCP(ie, logger, {})],
+ cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie, logger, settings={})],
+ logger=logger,
+ )
+
+
+EXAMPLE_PO_TOKEN = base64.urlsafe_b64encode(b'example-token').decode()
+
+
+class TestPoTokenCache:
+
+ def test_cache_success(self, memorypcp, pot_request, ie, logger):
+ cache = PoTokenCache(
+ cache_providers=[memorypcp],
+ cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+
+ assert cache.get(pot_request) is None
+ cache.store(pot_request, response)
+
+ cached_response = cache.get(pot_request)
+ assert cached_response is not None
+ assert cached_response.po_token == EXAMPLE_PO_TOKEN
+ assert cached_response.expires_at is not None
+
+ assert cache.get(dataclasses.replace(pot_request, video_id='another-video-id')) is None
+
+ def test_unsupported_cache_spec_no_fallback(self, memorypcp, pot_request, ie, logger):
+ unsupported_provider = UnsupportedCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})
+ cache = PoTokenCache(
+ cache_providers=[memorypcp],
+ cache_spec_providers=[unsupported_provider],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+ assert cache.get(pot_request) is None
+ assert unsupported_provider.generate_called_times == 1
+ cache.store(pot_request, response)
+ assert len(memorypcp.cache) == 0
+ assert unsupported_provider.generate_called_times == 2
+ assert cache.get(pot_request) is None
+ assert unsupported_provider.generate_called_times == 3
+ assert len(logger.messages.get('error', [])) == 0
+
+ def test_unsupported_cache_spec_fallback(self, memorypcp, pot_request, ie, logger):
+ unsupported_provider = UnsupportedCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})
+ example_provider = ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})
+ cache = PoTokenCache(
+ cache_providers=[memorypcp],
+ cache_spec_providers=[unsupported_provider, example_provider],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+
+ assert cache.get(pot_request) is None
+ assert unsupported_provider.generate_called_times == 1
+ assert example_provider.generate_called_times == 1
+
+ cache.store(pot_request, response)
+ assert unsupported_provider.generate_called_times == 2
+ assert example_provider.generate_called_times == 2
+
+ cached_response = cache.get(pot_request)
+ assert unsupported_provider.generate_called_times == 3
+ assert example_provider.generate_called_times == 3
+ assert cached_response is not None
+ assert cached_response.po_token == EXAMPLE_PO_TOKEN
+ assert cached_response.expires_at is not None
+
+ assert len(logger.messages.get('error', [])) == 0
+
+ def test_invalid_cache_spec_no_fallback(self, memorypcp, pot_request, ie, logger):
+ cache = PoTokenCache(
+ cache_providers=[memorypcp],
+ cache_spec_providers=[InvalidSpecCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+
+ assert cache.get(pot_request) is None
+ cache.store(pot_request, response)
+
+ assert cache.get(pot_request) is None
+
+ assert 'PoTokenCacheSpecProvider "InvalidSpecCacheSpecProvider" generate_cache_spec() returned invalid spec invalid-spec; please report this issue to the provider developer at (developer has not provided a bug report location) .' in logger.messages['error']
+
+ def test_invalid_cache_spec_fallback(self, memorypcp, pot_request, ie, logger):
+
+ invalid_provider = InvalidSpecCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})
+ example_provider = ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})
+ cache = PoTokenCache(
+ cache_providers=[memorypcp],
+ cache_spec_providers=[invalid_provider, example_provider],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+
+ assert cache.get(pot_request) is None
+ assert invalid_provider.generate_called_times == example_provider.generate_called_times == 1
+
+ cache.store(pot_request, response)
+ assert invalid_provider.generate_called_times == example_provider.generate_called_times == 2
+
+ cached_response = cache.get(pot_request)
+ assert invalid_provider.generate_called_times == example_provider.generate_called_times == 3
+ assert cached_response is not None
+ assert cached_response.po_token == EXAMPLE_PO_TOKEN
+ assert cached_response.expires_at is not None
+
+ assert 'PoTokenCacheSpecProvider "InvalidSpecCacheSpecProvider" generate_cache_spec() returned invalid spec invalid-spec; please report this issue to the provider developer at (developer has not provided a bug report location) .' in logger.messages['error']
+
+ def test_unavailable_cache_spec_no_fallback(self, memorypcp, pot_request, ie, logger):
+ unavailable_provider = UnavailableCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})
+ cache = PoTokenCache(
+ cache_providers=[memorypcp],
+ cache_spec_providers=[unavailable_provider],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+
+ assert cache.get(pot_request) is None
+ cache.store(pot_request, response)
+ assert cache.get(pot_request) is None
+ assert unavailable_provider.generate_called_times == 0
+
+ def test_unavailable_cache_spec_fallback(self, memorypcp, pot_request, ie, logger):
+ unavailable_provider = UnavailableCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})
+ example_provider = ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})
+ cache = PoTokenCache(
+ cache_providers=[memorypcp],
+ cache_spec_providers=[unavailable_provider, example_provider],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+
+ assert cache.get(pot_request) is None
+ assert unavailable_provider.generate_called_times == 0
+ assert unavailable_provider.is_available_called_times == 1
+ assert example_provider.generate_called_times == 1
+
+ cache.store(pot_request, response)
+ assert unavailable_provider.generate_called_times == 0
+ assert unavailable_provider.is_available_called_times == 2
+ assert example_provider.generate_called_times == 2
+
+ cached_response = cache.get(pot_request)
+ assert unavailable_provider.generate_called_times == 0
+ assert unavailable_provider.is_available_called_times == 3
+ assert example_provider.generate_called_times == 3
+ assert example_provider.is_available_called_times == 3
+ assert cached_response is not None
+ assert cached_response.po_token == EXAMPLE_PO_TOKEN
+ assert cached_response.expires_at is not None
+
+ def test_unexpected_error_cache_spec(self, memorypcp, pot_request, ie, logger):
+ error_provider = ErrorSpecCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})
+ cache = PoTokenCache(
+ cache_providers=[memorypcp],
+ cache_spec_providers=[error_provider],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+
+ assert cache.get(pot_request) is None
+ cache.store(pot_request, response)
+ assert cache.get(pot_request) is None
+ assert error_provider.generate_called_times == 3
+ assert error_provider.is_available_called_times == 3
+
+ assert 'Error occurred with "invalid" PO Token cache spec provider: ValueError(\'something went wrong\'); please report this issue to the provider developer at (developer has not provided a bug report location) .' in logger.messages['error']
+
+ def test_unexpected_error_cache_spec_fallback(self, memorypcp, pot_request, ie, logger):
+ error_provider = ErrorSpecCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})
+ example_provider = ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})
+ cache = PoTokenCache(
+ cache_providers=[memorypcp],
+ cache_spec_providers=[error_provider, example_provider],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+
+ assert cache.get(pot_request) is None
+ assert error_provider.generate_called_times == 1
+ assert error_provider.is_available_called_times == 1
+ assert example_provider.generate_called_times == 1
+
+ cache.store(pot_request, response)
+ assert error_provider.generate_called_times == 2
+ assert error_provider.is_available_called_times == 2
+ assert example_provider.generate_called_times == 2
+
+ cached_response = cache.get(pot_request)
+ assert error_provider.generate_called_times == 3
+ assert error_provider.is_available_called_times == 3
+ assert example_provider.generate_called_times == 3
+ assert example_provider.is_available_called_times == 3
+ assert cached_response is not None
+ assert cached_response.po_token == EXAMPLE_PO_TOKEN
+ assert cached_response.expires_at is not None
+
+ assert 'Error occurred with "invalid" PO Token cache spec provider: ValueError(\'something went wrong\'); please report this issue to the provider developer at (developer has not provided a bug report location) .' in logger.messages['error']
+
+ def test_key_bindings_spec_provider(self, memorypcp, pot_request, ie, logger):
+
+ class ExampleProviderPCSP(PoTokenCacheSpecProvider):
+ PROVIDER_NAME = 'example'
+
+ def generate_cache_spec(self, request: PoTokenRequest):
+ return PoTokenCacheSpec(
+ key_bindings={'v': request.video_id},
+ default_ttl=60,
+ )
+
+ class ExampleProviderTwoPCSP(ExampleProviderPCSP):
+ pass
+
+ example_provider = ExampleProviderPCSP(ie=ie, logger=logger, settings={})
+ example_provider_two = ExampleProviderTwoPCSP(ie=ie, logger=logger, settings={})
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+
+ cache = PoTokenCache(
+ cache_providers=[memorypcp],
+ cache_spec_providers=[example_provider],
+ logger=logger,
+ )
+
+ assert cache.get(pot_request) is None
+ cache.store(pot_request, response)
+ assert len(memorypcp.cache) == 1
+ assert hashlib.sha256(
+ f"{{'_dlp_cache': 'v1', '_p': 'ExampleProvider', 'v': '{pot_request.video_id}'}}".encode()).hexdigest() in memorypcp.cache
+
+ # The second spec provider returns the exact same key bindings as the first one,
+ # however the PoTokenCache should use the provider key to differentiate between them
+ cache = PoTokenCache(
+ cache_providers=[memorypcp],
+ cache_spec_providers=[example_provider_two],
+ logger=logger,
+ )
+
+ assert cache.get(pot_request) is None
+ cache.store(pot_request, response)
+ assert len(memorypcp.cache) == 2
+ assert hashlib.sha256(
+ f"{{'_dlp_cache': 'v1', '_p': 'ExampleProviderTwo', 'v': '{pot_request.video_id}'}}".encode()).hexdigest() in memorypcp.cache
+
+ def test_cache_provider_preferences(self, pot_request, ie, logger):
+ pcp_one = create_memory_pcp(ie, logger, provider_key='memory_pcp_one')
+ pcp_two = create_memory_pcp(ie, logger, provider_key='memory_pcp_two')
+
+ cache = PoTokenCache(
+ cache_providers=[pcp_one, pcp_two],
+ cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ cache.store(pot_request, PoTokenResponse(EXAMPLE_PO_TOKEN), write_policy=CacheProviderWritePolicy.WRITE_FIRST)
+ assert len(pcp_one.cache) == 1
+ assert len(pcp_two.cache) == 0
+
+ assert cache.get(pot_request)
+ assert pcp_one.get_calls == 1
+ assert pcp_two.get_calls == 0
+
+ standard_preference_called = False
+ pcp_one_preference_claled = False
+
+ def standard_preference(provider, request, *_, **__):
+ nonlocal standard_preference_called
+ standard_preference_called = True
+ assert isinstance(provider, PoTokenCacheProvider)
+ assert isinstance(request, PoTokenRequest)
+ return 1
+
+ def pcp_one_preference(provider, request, *_, **__):
+ nonlocal pcp_one_preference_claled
+ pcp_one_preference_claled = True
+ assert isinstance(provider, PoTokenCacheProvider)
+ assert isinstance(request, PoTokenRequest)
+ if provider.PROVIDER_KEY == pcp_one.PROVIDER_KEY:
+ return -100
+ return 0
+
+ # test that it can hanldle multiple preferences
+ cache.cache_provider_preferences.append(standard_preference)
+ cache.cache_provider_preferences.append(pcp_one_preference)
+
+ cache.store(pot_request, PoTokenResponse(EXAMPLE_PO_TOKEN), write_policy=CacheProviderWritePolicy.WRITE_FIRST)
+ assert cache.get(pot_request)
+ assert len(pcp_one.cache) == len(pcp_two.cache) == 1
+ assert pcp_two.get_calls == pcp_one.get_calls == 1
+ assert pcp_one.store_calls == pcp_two.store_calls == 1
+ assert standard_preference_called
+ assert pcp_one_preference_claled
+
+ def test_secondary_cache_provider_hit(self, pot_request, ie, logger):
+ pcp_one = create_memory_pcp(ie, logger, provider_key='memory_pcp_one')
+ pcp_two = create_memory_pcp(ie, logger, provider_key='memory_pcp_two')
+
+ cache = PoTokenCache(
+ cache_providers=[pcp_two],
+ cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ # Given the lower priority provider has the cache hit, store the response in the higher priority provider
+ cache.store(pot_request, PoTokenResponse(EXAMPLE_PO_TOKEN))
+ assert cache.get(pot_request)
+
+ cache.cache_providers[pcp_one.PROVIDER_KEY] = pcp_one
+
+ def pcp_one_pref(provider, *_, **__):
+ if provider.PROVIDER_KEY == pcp_one.PROVIDER_KEY:
+ return 1
+ return -1
+
+ cache.cache_provider_preferences.append(pcp_one_pref)
+
+ assert cache.get(pot_request)
+ assert pcp_one.get_calls == 1
+ assert pcp_two.get_calls == 2
+ # Should write back to pcp_one (now the highest priority cache provider)
+ assert pcp_one.store_calls == pcp_two.store_calls == 1
+ assert 'Writing PO Token response to highest priority cache provider' in logger.messages['trace']
+
+ def test_cache_provider_no_hits(self, pot_request, ie, logger):
+ pcp_one = create_memory_pcp(ie, logger, provider_key='memory_pcp_one')
+ pcp_two = create_memory_pcp(ie, logger, provider_key='memory_pcp_two')
+
+ cache = PoTokenCache(
+ cache_providers=[pcp_one, pcp_two],
+ cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ assert cache.get(pot_request) is None
+ assert pcp_one.get_calls == pcp_two.get_calls == 1
+
+ def test_get_invalid_po_token_response(self, pot_request, ie, logger):
+ # Test various scenarios where the po token response stored in the cache provider is invalid
+ pcp_one = create_memory_pcp(ie, logger, provider_key='memory_pcp_one')
+ pcp_two = create_memory_pcp(ie, logger, provider_key='memory_pcp_two')
+
+ cache = PoTokenCache(
+ cache_providers=[pcp_one, pcp_two],
+ cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ valid_response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+ cache.store(pot_request, valid_response)
+ assert len(pcp_one.cache) == len(pcp_two.cache) == 1
+ # Overwrite the valid response with an invalid one in the cache
+ pcp_one.store(next(iter(pcp_one.cache.keys())), json.dumps(dataclasses.asdict(PoTokenResponse(None))), int(time.time() + 1000))
+ assert cache.get(pot_request).po_token == valid_response.po_token
+ assert pcp_one.get_calls == pcp_two.get_calls == 1
+ assert pcp_one.delete_calls == 1 # Invalid response should be deleted from cache
+ assert pcp_one.store_calls == 3 # Since response was fetched from second cache provider, it should be stored in the first one
+ assert len(pcp_one.cache) == 1
+ assert 'Invalid PO Token response retrieved from cache provider "memory": {"po_token": null, "expires_at": null}; example bug report message' in logger.messages['error']
+
+ # Overwrite the valid response with an invalid json in the cache
+ pcp_one.store(next(iter(pcp_one.cache.keys())), 'invalid-json', int(time.time() + 1000))
+ assert cache.get(pot_request).po_token == valid_response.po_token
+ assert pcp_one.get_calls == pcp_two.get_calls == 2
+ assert pcp_one.delete_calls == 2
+ assert pcp_one.store_calls == 5 # 3 + 1 store we made in the test + 1 store from lower priority cache provider
+ assert len(pcp_one.cache) == 1
+
+ assert 'Invalid PO Token response retrieved from cache provider "memory": invalid-json; example bug report message' in logger.messages['error']
+
+ # Valid json, but missing required fields
+ pcp_one.store(next(iter(pcp_one.cache.keys())), '{"unknown_param": 0}', int(time.time() + 1000))
+ assert cache.get(pot_request).po_token == valid_response.po_token
+ assert pcp_one.get_calls == pcp_two.get_calls == 3
+ assert pcp_one.delete_calls == 3
+ assert pcp_one.store_calls == 7 # 5 + 1 store from test + 1 store from lower priority cache provider
+ assert len(pcp_one.cache) == 1
+
+ assert 'Invalid PO Token response retrieved from cache provider "memory": {"unknown_param": 0}; example bug report message' in logger.messages['error']
+
+ def test_store_invalid_po_token_response(self, pot_request, ie, logger):
+ # Should not store an invalid po token response
+ pcp_one = create_memory_pcp(ie, logger, provider_key='memory_pcp_one')
+
+ cache = PoTokenCache(
+ cache_providers=[pcp_one],
+ cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ cache.store(pot_request, PoTokenResponse(po_token=EXAMPLE_PO_TOKEN, expires_at=80))
+ assert cache.get(pot_request) is None
+ assert pcp_one.store_calls == 0
+ assert 'Invalid PO Token response provided to PoTokenCache.store()' in logger.messages['error'][0]
+
+ def test_store_write_policy(self, pot_request, ie, logger):
+ pcp_one = create_memory_pcp(ie, logger, provider_key='memory_pcp_one')
+ pcp_two = create_memory_pcp(ie, logger, provider_key='memory_pcp_two')
+
+ cache = PoTokenCache(
+ cache_providers=[pcp_one, pcp_two],
+ cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ cache.store(pot_request, PoTokenResponse(EXAMPLE_PO_TOKEN), write_policy=CacheProviderWritePolicy.WRITE_FIRST)
+ assert pcp_one.store_calls == 1
+ assert pcp_two.store_calls == 0
+
+ cache.store(pot_request, PoTokenResponse(EXAMPLE_PO_TOKEN), write_policy=CacheProviderWritePolicy.WRITE_ALL)
+ assert pcp_one.store_calls == 2
+ assert pcp_two.store_calls == 1
+
+ def test_store_write_first_policy_cache_spec(self, pot_request, ie, logger):
+ pcp_one = create_memory_pcp(ie, logger, provider_key='memory_pcp_one')
+ pcp_two = create_memory_pcp(ie, logger, provider_key='memory_pcp_two')
+
+ class WriteFirstPCSP(BaseMockCacheSpecProvider):
+ def generate_cache_spec(self, request: PoTokenRequest):
+ super().generate_cache_spec(request)
+ return PoTokenCacheSpec(
+ key_bindings={'v': request.video_id, 'e': None},
+ default_ttl=60,
+ write_policy=CacheProviderWritePolicy.WRITE_FIRST,
+ )
+
+ cache = PoTokenCache(
+ cache_providers=[pcp_one, pcp_two],
+ cache_spec_providers=[WriteFirstPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ cache.store(pot_request, PoTokenResponse(EXAMPLE_PO_TOKEN))
+ assert pcp_one.store_calls == 1
+ assert pcp_two.store_calls == 0
+
+ def test_store_write_all_policy_cache_spec(self, pot_request, ie, logger):
+ pcp_one = create_memory_pcp(ie, logger, provider_key='memory_pcp_one')
+ pcp_two = create_memory_pcp(ie, logger, provider_key='memory_pcp_two')
+
+ class WriteAllPCSP(BaseMockCacheSpecProvider):
+ def generate_cache_spec(self, request: PoTokenRequest):
+ super().generate_cache_spec(request)
+ return PoTokenCacheSpec(
+ key_bindings={'v': request.video_id, 'e': None},
+ default_ttl=60,
+ write_policy=CacheProviderWritePolicy.WRITE_ALL,
+ )
+
+ cache = PoTokenCache(
+ cache_providers=[pcp_one, pcp_two],
+ cache_spec_providers=[WriteAllPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ cache.store(pot_request, PoTokenResponse(EXAMPLE_PO_TOKEN))
+ assert pcp_one.store_calls == 1
+ assert pcp_two.store_calls == 1
+
+ def test_expires_at_pot_response(self, pot_request, memorypcp, ie, logger):
+ cache = PoTokenCache(
+ cache_providers=[memorypcp],
+ cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=10000000000)
+ cache.store(pot_request, response)
+ assert next(iter(memorypcp.cache.values()))[1] == 10000000000
+
+ def test_expires_at_default_spec(self, pot_request, memorypcp, ie, logger):
+
+ class TtlPCSP(BaseMockCacheSpecProvider):
+ def generate_cache_spec(self, request: PoTokenRequest):
+ super().generate_cache_spec(request)
+ return PoTokenCacheSpec(
+ key_bindings={'v': request.video_id, 'e': None},
+ default_ttl=10000000000,
+ )
+
+ cache = PoTokenCache(
+ cache_providers=[memorypcp],
+ cache_spec_providers=[TtlPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+ cache.store(pot_request, response)
+ assert next(iter(memorypcp.cache.values()))[1] >= 10000000000
+
+ def test_cache_provider_error_no_fallback(self, pot_request, ie, logger):
+ error_pcp = ErrorPCP(ie, logger, {})
+ cache = PoTokenCache(
+ cache_providers=[error_pcp],
+ cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+ cache.store(pot_request, response)
+ assert cache.get(pot_request) is None
+ assert error_pcp.get_calls == 1
+ assert error_pcp.store_calls == 1
+
+ assert logger.messages['warning'].count("Error from \"error\" PO Token cache provider: PoTokenCacheProviderError('something went wrong'); example bug report message") == 2
+
+ def test_cache_provider_error_fallback(self, pot_request, ie, logger):
+ error_pcp = ErrorPCP(ie, logger, {})
+ memory_pcp = create_memory_pcp(ie, logger, provider_key='memory')
+
+ cache = PoTokenCache(
+ cache_providers=[error_pcp, memory_pcp],
+ cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+ cache.store(pot_request, response)
+
+ # 1. Store fails for error_pcp, stored in memory_pcp
+ # 2. Get fails for error_pcp, fetched from memory_pcp
+ # 3. Since fetched from lower priority, it should be stored in the highest priority cache provider
+ # 4. Store fails in error_pcp. Since write policy is WRITE_FIRST, it should not try to store in memory_pcp regardless of if the store in error_pcp fails
+
+ assert cache.get(pot_request)
+ assert error_pcp.get_calls == 1
+ assert error_pcp.store_calls == 2 # since highest priority, when fetched from lower priority, it should be stored in the highest priority cache provider
+ assert memory_pcp.get_calls == 1
+ assert memory_pcp.store_calls == 1
+
+ assert logger.messages['warning'].count("Error from \"error\" PO Token cache provider: PoTokenCacheProviderError('something went wrong'); example bug report message") == 3
+
+ def test_cache_provider_unexpected_error_no_fallback(self, pot_request, ie, logger):
+ error_pcp = UnexpectedErrorPCP(ie, logger, {})
+ cache = PoTokenCache(
+ cache_providers=[error_pcp],
+ cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+ cache.store(pot_request, response)
+ assert cache.get(pot_request) is None
+ assert error_pcp.get_calls == 1
+ assert error_pcp.store_calls == 1
+
+ assert logger.messages['error'].count("Error occurred with \"unexpected_error\" PO Token cache provider: ValueError('something went wrong'); example bug report message") == 2
+
+ def test_cache_provider_unexpected_error_fallback(self, pot_request, ie, logger):
+ error_pcp = UnexpectedErrorPCP(ie, logger, {})
+ memory_pcp = create_memory_pcp(ie, logger, provider_key='memory')
+
+ cache = PoTokenCache(
+ cache_providers=[error_pcp, memory_pcp],
+ cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+ cache.store(pot_request, response)
+
+ # 1. Store fails for error_pcp, stored in memory_pcp
+ # 2. Get fails for error_pcp, fetched from memory_pcp
+ # 3. Since fetched from lower priority, it should be stored in the highest priority cache provider
+ # 4. Store fails in error_pcp. Since write policy is WRITE_FIRST, it should not try to store in memory_pcp regardless of if the store in error_pcp fails
+
+ assert cache.get(pot_request)
+ assert error_pcp.get_calls == 1
+ assert error_pcp.store_calls == 2 # since highest priority, when fetched from lower priority, it should be stored in the highest priority cache provider
+ assert memory_pcp.get_calls == 1
+ assert memory_pcp.store_calls == 1
+
+ assert logger.messages['error'].count("Error occurred with \"unexpected_error\" PO Token cache provider: ValueError('something went wrong'); example bug report message") == 3
+
+ def test_cache_provider_unavailable_no_fallback(self, pot_request, ie, logger):
+ provider = create_memory_pcp(ie, logger, available=False)
+
+ cache = PoTokenCache(
+ cache_providers=[provider],
+ cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+ cache.store(pot_request, response)
+ assert cache.get(pot_request) is None
+ assert provider.get_calls == 0
+ assert provider.store_calls == 0
+ assert provider.available_called_times
+
+ def test_cache_provider_unavailable_fallback(self, pot_request, ie, logger):
+ provider_unavailable = create_memory_pcp(ie, logger, provider_key='unavailable', provider_name='unavailable', available=False)
+ provider_available = create_memory_pcp(ie, logger, provider_key='available', provider_name='available')
+
+ cache = PoTokenCache(
+ cache_providers=[provider_unavailable, provider_available],
+ cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+ cache.store(pot_request, response)
+ assert cache.get(pot_request) is not None
+ assert provider_unavailable.get_calls == 0
+ assert provider_unavailable.store_calls == 0
+ assert provider_available.get_calls == 1
+ assert provider_available.store_calls == 1
+ assert provider_unavailable.available_called_times
+ assert provider_available.available_called_times
+
+ # should not even try to use the provider for the request
+ assert 'Attempting to fetch a PO Token response from "unavailable" provider' not in logger.messages['trace']
+ assert 'Attempting to fetch a PO Token response from "available" provider' not in logger.messages['trace']
+
+ def test_available_not_called(self, ie, pot_request, logger):
+ # Test that the available method is not called when provider higher in the list is available
+ provider_unavailable = create_memory_pcp(
+ ie, logger, provider_key='unavailable', provider_name='unavailable', available=False)
+ provider_available = create_memory_pcp(ie, logger, provider_key='available', provider_name='available')
+
+ logger.log_level = logger.LogLevel.INFO
+
+ cache = PoTokenCache(
+ cache_providers=[provider_available, provider_unavailable],
+ cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+ cache.store(pot_request, response, write_policy=CacheProviderWritePolicy.WRITE_FIRST)
+ assert cache.get(pot_request) is not None
+ assert provider_unavailable.get_calls == 0
+ assert provider_unavailable.store_calls == 0
+ assert provider_available.get_calls == 1
+ assert provider_available.store_calls == 1
+ assert provider_unavailable.available_called_times == 0
+ assert provider_available.available_called_times
+ assert 'PO Token Cache Providers: available-0.0.0 (external), unavailable-0.0.0 (external, unavailable)' not in logger.messages.get('trace', [])
+
+ def test_available_called_trace(self, ie, pot_request, logger):
+ # But if logging level is trace should call available (as part of debug logging)
+ provider_unavailable = create_memory_pcp(
+ ie, logger, provider_key='unavailable', provider_name='unavailable', available=False)
+ provider_available = create_memory_pcp(ie, logger, provider_key='available', provider_name='available')
+
+ logger.log_level = logger.LogLevel.TRACE
+
+ cache = PoTokenCache(
+ cache_providers=[provider_available, provider_unavailable],
+ cache_spec_providers=[ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})],
+ logger=logger,
+ )
+
+ response = PoTokenResponse(EXAMPLE_PO_TOKEN)
+ cache.store(pot_request, response, write_policy=CacheProviderWritePolicy.WRITE_FIRST)
+ assert cache.get(pot_request) is not None
+ assert provider_unavailable.get_calls == 0
+ assert provider_unavailable.store_calls == 0
+ assert provider_available.get_calls == 1
+ assert provider_available.store_calls == 1
+ assert provider_unavailable.available_called_times
+ assert provider_available.available_called_times
+ assert 'PO Token Cache Providers: available-0.0.0 (external), unavailable-0.0.0 (external, unavailable)' in logger.messages.get('trace', [])
+
+ def test_close(self, ie, pot_request, logger):
+ # Should call close on the cache providers and cache specs
+ memory_pcp = create_memory_pcp(ie, logger, provider_key='memory')
+ memory2_pcp = create_memory_pcp(ie, logger, provider_key='memory2')
+
+ spec1 = ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})
+ spec2 = UnavailableCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})
+
+ cache = PoTokenCache(
+ cache_providers=[memory2_pcp, memory_pcp],
+ cache_spec_providers=[spec1, spec2],
+ logger=logger,
+ )
+
+ cache.close()
+ assert memory_pcp.close_called
+ assert memory2_pcp.close_called
+ assert spec1.close_called
+ assert spec2.close_called
+
+
+class TestPoTokenRequestDirector:
+
+ def test_request_pot_success(self, ie, pot_request, pot_cache, pot_provider, logger):
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ director.register_provider(pot_provider)
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+
+ def test_request_and_cache(self, ie, pot_request, pot_cache, pot_provider, logger):
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ director.register_provider(pot_provider)
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+ assert pot_provider.request_called_times == 1
+ assert pot_cache.get_calls == 1
+ assert pot_cache.store_calls == 1
+
+ # Second request, should be cached
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+ assert pot_cache.get_calls == 2
+ assert pot_cache.store_calls == 1
+ assert pot_provider.request_called_times == 1
+
+ def test_bypass_cache(self, ie, pot_request, pot_cache, logger, pot_provider):
+ pot_request.bypass_cache = True
+
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ director.register_provider(pot_provider)
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+ assert pot_provider.request_called_times == 1
+ assert pot_cache.get_calls == 0
+ assert pot_cache.store_calls == 1
+
+ # Second request, should not get from cache
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+ assert pot_provider.request_called_times == 2
+ assert pot_cache.get_calls == 0
+ assert pot_cache.store_calls == 2
+
+ # POT is still cached, should get from cache
+ pot_request.bypass_cache = False
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+ assert pot_provider.request_called_times == 2
+ assert pot_cache.get_calls == 1
+ assert pot_cache.store_calls == 2
+
+ def test_clean_pot_generate(self, ie, pot_request, pot_cache, logger):
+ # Token should be cleaned before returning
+ base_token = base64.urlsafe_b64encode(b'token').decode()
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ provider = success_ptp(PoTokenResponse(base_token + '?extra=params'))(ie, logger, settings={})
+ director.register_provider(provider)
+
+ response = director.get_po_token(pot_request)
+ assert response == base_token
+ assert provider.request_called_times == 1
+
+ # Confirm the cleaned version was stored in the cache
+ cached_token = pot_cache.get(pot_request)
+ assert cached_token.po_token == base_token
+
+ def test_clean_pot_cache(self, ie, pot_request, pot_cache, logger, pot_provider):
+ # Token retrieved from cache should be cleaned before returning
+ base_token = base64.urlsafe_b64encode(b'token').decode()
+ pot_cache.store(pot_request, PoTokenResponse(base_token + '?extra=params'))
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ director.register_provider(pot_provider)
+
+ response = director.get_po_token(pot_request)
+ assert response == base_token
+ assert pot_cache.get_calls == 1
+ assert pot_provider.request_called_times == 0
+
+ def test_cache_expires_at_none(self, ie, pot_request, pot_cache, logger, pot_provider):
+ # Should cache if expires_at=None in the response
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ provider = success_ptp(PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=None))(ie, logger, settings={})
+ director.register_provider(provider)
+
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+ assert pot_cache.store_calls == 1
+ assert pot_cache.get(pot_request).po_token == EXAMPLE_PO_TOKEN
+
+ def test_cache_expires_at_positive(self, ie, pot_request, pot_cache, logger, pot_provider):
+ # Should cache if expires_at is a positive number in the response
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ provider = success_ptp(PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=99999999999))(ie, logger, settings={})
+ director.register_provider(provider)
+
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+ assert pot_cache.store_calls == 1
+ assert pot_cache.get(pot_request).po_token == EXAMPLE_PO_TOKEN
+
+ @pytest.mark.parametrize('expires_at', [0, -1])
+ def test_not_cache_expires_at(self, ie, pot_request, pot_cache, logger, pot_provider, expires_at):
+ # Should not cache if expires_at <= 0 in the response
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ provider = success_ptp(PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=expires_at))(ie, logger, settings={})
+ director.register_provider(provider)
+
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+ assert pot_cache.store_calls == 0
+ assert pot_cache.get(pot_request) is None
+
+ def test_no_providers(self, ie, pot_request, pot_cache, logger):
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ response = director.get_po_token(pot_request)
+ assert response is None
+
+ def test_try_cache_no_providers(self, ie, pot_request, pot_cache, logger):
+ # Should still try the cache even if no providers are configured
+ pot_cache.store(pot_request, PoTokenResponse(EXAMPLE_PO_TOKEN))
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+
+ def test_close(self, ie, pot_request, pot_cache, pot_provider, logger):
+ # Should call close on the pot cache and any providers
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+
+ provider2 = UnavailablePTP(ie, logger, {})
+ director.register_provider(pot_provider)
+ director.register_provider(provider2)
+
+ director.close()
+ assert pot_provider.close_called
+ assert provider2.close_called
+ assert pot_cache.close_called
+
+ def test_pot_provider_preferences(self, pot_request, pot_cache, ie, logger):
+ pot_request.bypass_cache = True
+ provider_two_pot = base64.urlsafe_b64encode(b'token2').decode()
+
+ example_provider = success_ptp(response=PoTokenResponse(EXAMPLE_PO_TOKEN), key='exampleone')(ie, logger, settings={})
+ example_provider_two = success_ptp(response=PoTokenResponse(provider_two_pot), key='exampletwo')(ie, logger, settings={})
+
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ director.register_provider(example_provider)
+ director.register_provider(example_provider_two)
+
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+ assert example_provider.request_called_times == 1
+ assert example_provider_two.request_called_times == 0
+
+ standard_preference_called = False
+ example_preference_called = False
+
+ # Test that the provider preferences are respected
+ def standard_preference(provider, request, *_, **__):
+ nonlocal standard_preference_called
+ standard_preference_called = True
+ assert isinstance(provider, PoTokenProvider)
+ assert isinstance(request, PoTokenRequest)
+ return 1
+
+ def example_preference(provider, request, *_, **__):
+ nonlocal example_preference_called
+ example_preference_called = True
+ assert isinstance(provider, PoTokenProvider)
+ assert isinstance(request, PoTokenRequest)
+ if provider.PROVIDER_KEY == example_provider.PROVIDER_KEY:
+ return -100
+ return 0
+
+ # test that it can handle multiple preferences
+ director.register_preference(example_preference)
+ director.register_preference(standard_preference)
+
+ response = director.get_po_token(pot_request)
+ assert response == provider_two_pot
+ assert example_provider.request_called_times == 1
+ assert example_provider_two.request_called_times == 1
+ assert standard_preference_called
+ assert example_preference_called
+
+ def test_unsupported_request_no_fallback(self, ie, logger, pot_cache, pot_request):
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ provider = UnsupportedPTP(ie, logger, {})
+ director.register_provider(provider)
+
+ response = director.get_po_token(pot_request)
+ assert response is None
+ assert provider.request_called_times == 1
+
+ def test_unsupported_request_fallback(self, ie, logger, pot_cache, pot_request, pot_provider):
+ # Should fallback to the next provider if the first one does not support the request
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ provider = UnsupportedPTP(ie, logger, {})
+ director.register_provider(provider)
+ director.register_provider(pot_provider)
+
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+ assert provider.request_called_times == 1
+ assert pot_provider.request_called_times == 1
+ assert 'PO Token Provider "unsupported" rejected this request, trying next available provider. Reason: unsupported request' in logger.messages['trace']
+
+ def test_unavailable_request_no_fallback(self, ie, logger, pot_cache, pot_request):
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ provider = UnavailablePTP(ie, logger, {})
+ director.register_provider(provider)
+
+ response = director.get_po_token(pot_request)
+ assert response is None
+ assert provider.request_called_times == 0
+ assert provider.available_called_times
+
+ def test_unavailable_request_fallback(self, ie, logger, pot_cache, pot_request, pot_provider):
+ # Should fallback to the next provider if the first one is unavailable
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ provider = UnavailablePTP(ie, logger, {})
+ director.register_provider(provider)
+ director.register_provider(pot_provider)
+
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+ assert provider.request_called_times == 0
+ assert provider.available_called_times
+ assert pot_provider.request_called_times == 1
+ assert pot_provider.available_called_times
+ # should not even try use the provider for the request
+ assert 'Attempting to fetch a PO Token from "unavailable" provider' not in logger.messages['trace']
+ assert 'Attempting to fetch a PO Token from "success" provider' in logger.messages['trace']
+
+ def test_available_not_called(self, ie, logger, pot_cache, pot_request, pot_provider):
+ # Test that the available method is not called when provider higher in the list is available
+ logger.log_level = logger.LogLevel.INFO
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ provider = UnavailablePTP(ie, logger, {})
+ director.register_provider(pot_provider)
+ director.register_provider(provider)
+
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+ assert provider.request_called_times == 0
+ assert provider.available_called_times == 0
+ assert pot_provider.request_called_times == 1
+ assert pot_provider.available_called_times == 2
+ assert 'PO Token Providers: success-0.0.1 (external), unavailable-0.0.0 (external, unavailable)' not in logger.messages.get('trace', [])
+
+ def test_available_called_trace(self, ie, logger, pot_cache, pot_request, pot_provider):
+ # But if logging level is trace should call available (as part of debug logging)
+ logger.log_level = logger.LogLevel.TRACE
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ provider = UnavailablePTP(ie, logger, {})
+ director.register_provider(pot_provider)
+ director.register_provider(provider)
+
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+ assert provider.request_called_times == 0
+ assert provider.available_called_times == 1
+ assert pot_provider.request_called_times == 1
+ assert pot_provider.available_called_times == 3
+ assert 'PO Token Providers: success-0.0.1 (external), unavailable-0.0.0 (external, unavailable)' in logger.messages['trace']
+
+ def test_provider_error_no_fallback_unexpected(self, ie, logger, pot_cache, pot_request):
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ provider = ErrorPTP(ie, logger, {})
+ director.register_provider(provider)
+ pot_request.video_id = 'unexpected'
+ response = director.get_po_token(pot_request)
+ assert response is None
+ assert provider.request_called_times == 1
+ assert "Error fetching PO Token from \"error\" provider: PoTokenProviderError('an error occurred'); please report this issue to the provider developer at https://error.example.com/issues ." in logger.messages['warning']
+
+ def test_provider_error_no_fallback_expected(self, ie, logger, pot_cache, pot_request):
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ provider = ErrorPTP(ie, logger, {})
+ director.register_provider(provider)
+ pot_request.video_id = 'expected'
+ response = director.get_po_token(pot_request)
+ assert response is None
+ assert provider.request_called_times == 1
+ assert "Error fetching PO Token from \"error\" provider: PoTokenProviderError('an error occurred')" in logger.messages['warning']
+
+ def test_provider_error_fallback(self, ie, logger, pot_cache, pot_request, pot_provider):
+ # Should fallback to the next provider if the first one raises an error
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ provider = ErrorPTP(ie, logger, {})
+ director.register_provider(provider)
+ director.register_provider(pot_provider)
+
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+ assert provider.request_called_times == 1
+ assert pot_provider.request_called_times == 1
+ assert "Error fetching PO Token from \"error\" provider: PoTokenProviderError('an error occurred'); please report this issue to the provider developer at https://error.example.com/issues ." in logger.messages['warning']
+
+ def test_provider_unexpected_error_no_fallback(self, ie, logger, pot_cache, pot_request):
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ provider = UnexpectedErrorPTP(ie, logger, {})
+ director.register_provider(provider)
+
+ response = director.get_po_token(pot_request)
+ assert response is None
+ assert provider.request_called_times == 1
+ assert "Unexpected error when fetching PO Token from \"unexpected_error\" provider: ValueError('an unexpected error occurred'); please report this issue to the provider developer at https://unexpected.example.com/issues ." in logger.messages['error']
+
+ def test_provider_unexpected_error_fallback(self, ie, logger, pot_cache, pot_request, pot_provider):
+ # Should fallback to the next provider if the first one raises an unexpected error
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ provider = UnexpectedErrorPTP(ie, logger, {})
+ director.register_provider(provider)
+ director.register_provider(pot_provider)
+
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+ assert provider.request_called_times == 1
+ assert pot_provider.request_called_times == 1
+ assert "Unexpected error when fetching PO Token from \"unexpected_error\" provider: ValueError('an unexpected error occurred'); please report this issue to the provider developer at https://unexpected.example.com/issues ." in logger.messages['error']
+
+ def test_invalid_po_token_response_type(self, ie, logger, pot_cache, pot_request, pot_provider):
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ provider = InvalidPTP(ie, logger, {})
+ director.register_provider(provider)
+
+ pot_request.video_id = 'invalid_type'
+
+ response = director.get_po_token(pot_request)
+ assert response is None
+ assert provider.request_called_times == 1
+ assert 'Invalid PO Token response received from "invalid" provider: invalid-response; please report this issue to the provider developer at https://invalid.example.com/issues .' in logger.messages['error']
+
+ # Should fallback to next available provider
+ director.register_provider(pot_provider)
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+ assert provider.request_called_times == 2
+ assert pot_provider.request_called_times == 1
+
+ def test_invalid_po_token_response(self, ie, logger, pot_cache, pot_request, pot_provider):
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+ provider = InvalidPTP(ie, logger, {})
+ director.register_provider(provider)
+
+ response = director.get_po_token(pot_request)
+ assert response is None
+ assert provider.request_called_times == 1
+ assert "Invalid PO Token response received from \"invalid\" provider: PoTokenResponse(po_token='example-token?', expires_at='123'); please report this issue to the provider developer at https://invalid.example.com/issues ." in logger.messages['error']
+
+ # Should fallback to next available provider
+ director.register_provider(pot_provider)
+ response = director.get_po_token(pot_request)
+ assert response == EXAMPLE_PO_TOKEN
+ assert provider.request_called_times == 2
+ assert pot_provider.request_called_times == 1
+
+ def test_copy_request_provider(self, ie, logger, pot_cache, pot_request):
+
+ class BadProviderPTP(BaseMockPoTokenProvider):
+ _SUPPORTED_CONTEXTS = None
+ _SUPPORTED_CLIENTS = None
+
+ def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
+ # Providers should not modify the request object, but we should guard against it
+ request.video_id = 'bad'
+ raise PoTokenProviderRejectedRequest('bad request')
+
+ class GoodProviderPTP(BaseMockPoTokenProvider):
+ _SUPPORTED_CONTEXTS = None
+ _SUPPORTED_CLIENTS = None
+
+ def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
+ return PoTokenResponse(base64.urlsafe_b64encode(request.video_id.encode()).decode())
+
+ director = PoTokenRequestDirector(logger=logger, cache=pot_cache)
+
+ bad_provider = BadProviderPTP(ie, logger, {})
+ good_provider = GoodProviderPTP(ie, logger, {})
+
+ director.register_provider(bad_provider)
+ director.register_provider(good_provider)
+
+ pot_request.video_id = 'good'
+ response = director.get_po_token(pot_request)
+ assert response == base64.urlsafe_b64encode(b'good').decode()
+ assert bad_provider.request_called_times == 1
+ assert good_provider.request_called_times == 1
+ assert pot_request.video_id == 'good'
+
+
+@pytest.mark.parametrize('spec, expected', [
+ (None, False),
+ (PoTokenCacheSpec(key_bindings={'v': 'video-id'}, default_ttl=60, write_policy=None), False), # type: ignore
+ (PoTokenCacheSpec(key_bindings={'v': 'video-id'}, default_ttl='invalid'), False), # type: ignore
+ (PoTokenCacheSpec(key_bindings='invalid', default_ttl=60), False), # type: ignore
+ (PoTokenCacheSpec(key_bindings={2: 'video-id'}, default_ttl=60), False), # type: ignore
+ (PoTokenCacheSpec(key_bindings={'v': 2}, default_ttl=60), False), # type: ignore
+ (PoTokenCacheSpec(key_bindings={'v': None}, default_ttl=60), False), # type: ignore
+
+ (PoTokenCacheSpec(key_bindings={'v': 'video_id', 'e': None}, default_ttl=60), True),
+ (PoTokenCacheSpec(key_bindings={'v': 'video_id'}, default_ttl=60, write_policy=CacheProviderWritePolicy.WRITE_FIRST), True),
+])
+def test_validate_cache_spec(spec, expected):
+ assert validate_cache_spec(spec) == expected
+
+
+@pytest.mark.parametrize('po_token', [
+ 'invalid-token?',
+ '123',
+])
+def test_clean_pot_fail(po_token):
+ with pytest.raises(ValueError, match='Invalid PO Token'):
+ clean_pot(po_token)
+
+
+@pytest.mark.parametrize('po_token,expected', [
+ ('TwAA/+8=', 'TwAA_-8='),
+ ('TwAA%5F%2D9VA6Q92v%5FvEQ4==?extra-param=2', 'TwAA_-9VA6Q92v_vEQ4='),
+])
+def test_clean_pot(po_token, expected):
+ assert clean_pot(po_token) == expected
+
+
+@pytest.mark.parametrize(
+ 'response, expected',
+ [
+ (None, False),
+ (PoTokenResponse(None), False),
+ (PoTokenResponse(1), False),
+ (PoTokenResponse('invalid-token?'), False),
+ (PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at='abc'), False), # type: ignore
+ (PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=100), False),
+ (PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=time.time() + 10000.0), False), # type: ignore
+ (PoTokenResponse(EXAMPLE_PO_TOKEN), True),
+ (PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=-1), True),
+ (PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=0), True),
+ (PoTokenResponse(EXAMPLE_PO_TOKEN, expires_at=int(time.time()) + 10000), True),
+ ],
+)
+def test_validate_pot_response(response, expected):
+ assert validate_response(response) == expected
+
+
+def test_built_in_provider(ie, logger):
+ class BuiltinProviderDefaultT(BuiltinIEContentProvider, suffix='T'):
+ def is_available(self):
+ return True
+
+ class BuiltinProviderCustomNameT(BuiltinIEContentProvider, suffix='T'):
+ PROVIDER_NAME = 'CustomName'
+
+ def is_available(self):
+ return True
+
+ class ExternalProviderDefaultT(IEContentProvider, suffix='T'):
+ def is_available(self):
+ return True
+
+ class ExternalProviderCustomT(IEContentProvider, suffix='T'):
+ PROVIDER_NAME = 'custom'
+ PROVIDER_VERSION = '5.4b2'
+
+ def is_available(self):
+ return True
+
+ class ExternalProviderUnavailableT(IEContentProvider, suffix='T'):
+ def is_available(self) -> bool:
+ return False
+
+ class BuiltinProviderUnavailableT(IEContentProvider, suffix='T'):
+ def is_available(self) -> bool:
+ return False
+
+ built_in_default = BuiltinProviderDefaultT(ie=ie, logger=logger, settings={})
+ built_in_custom_name = BuiltinProviderCustomNameT(ie=ie, logger=logger, settings={})
+ built_in_unavailable = BuiltinProviderUnavailableT(ie=ie, logger=logger, settings={})
+ external_default = ExternalProviderDefaultT(ie=ie, logger=logger, settings={})
+ external_custom = ExternalProviderCustomT(ie=ie, logger=logger, settings={})
+ external_unavailable = ExternalProviderUnavailableT(ie=ie, logger=logger, settings={})
+
+ assert provider_display_list([]) == 'none'
+ assert provider_display_list([built_in_default]) == 'BuiltinProviderDefault'
+ assert provider_display_list([external_unavailable]) == 'ExternalProviderUnavailable-0.0.0 (external, unavailable)'
+ assert provider_display_list([
+ built_in_default,
+ built_in_custom_name,
+ external_default,
+ external_custom,
+ external_unavailable,
+ built_in_unavailable],
+ ) == 'BuiltinProviderDefault, CustomName, ExternalProviderDefault-0.0.0 (external), custom-5.4b2 (external), ExternalProviderUnavailable-0.0.0 (external, unavailable), BuiltinProviderUnavailable-0.0.0 (external, unavailable)'
diff --git a/test/test_pot/test_pot_framework.py b/test/test_pot/test_pot_framework.py
new file mode 100644
index 0000000000..bc94653f4a
--- /dev/null
+++ b/test/test_pot/test_pot_framework.py
@@ -0,0 +1,629 @@
+import pytest
+
+from yt_dlp.extractor.youtube.pot._provider import IEContentProvider
+from yt_dlp.cookies import YoutubeDLCookieJar
+from yt_dlp.utils.networking import HTTPHeaderDict
+from yt_dlp.extractor.youtube.pot.provider import (
+ PoTokenRequest,
+ PoTokenContext,
+ ExternalRequestFeature,
+
+)
+
+from yt_dlp.extractor.youtube.pot.cache import (
+ PoTokenCacheProvider,
+ PoTokenCacheSpec,
+ PoTokenCacheSpecProvider,
+ CacheProviderWritePolicy,
+)
+
+import yt_dlp.extractor.youtube.pot.cache as cache
+
+from yt_dlp.networking import Request
+from yt_dlp.extractor.youtube.pot.provider import (
+ PoTokenResponse,
+ PoTokenProvider,
+ PoTokenProviderRejectedRequest,
+ provider_bug_report_message,
+ register_provider,
+ register_preference,
+)
+
+from yt_dlp.extractor.youtube.pot._registry import _pot_providers, _ptp_preferences, _pot_pcs_providers, _pot_cache_providers, _pot_cache_provider_preferences
+
+
+class ExamplePTP(PoTokenProvider):
+ PROVIDER_NAME = 'example'
+ PROVIDER_VERSION = '0.0.1'
+ BUG_REPORT_LOCATION = 'https://example.com/issues'
+
+ _SUPPORTED_CLIENTS = ('WEB',)
+ _SUPPORTED_CONTEXTS = (PoTokenContext.GVS, )
+
+ _SUPPORTED_EXTERNAL_REQUEST_FEATURES = (
+ ExternalRequestFeature.PROXY_SCHEME_HTTP,
+ ExternalRequestFeature.PROXY_SCHEME_SOCKS5H,
+ )
+
+ def is_available(self) -> bool:
+ return True
+
+ def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
+ return PoTokenResponse('example-token', expires_at=123)
+
+
+class ExampleCacheProviderPCP(PoTokenCacheProvider):
+
+ PROVIDER_NAME = 'example'
+ PROVIDER_VERSION = '0.0.1'
+ BUG_REPORT_LOCATION = 'https://example.com/issues'
+
+ def is_available(self) -> bool:
+ return True
+
+ def get(self, key: str):
+ return 'example-cache'
+
+ def store(self, key: str, value: str, expires_at: int):
+ pass
+
+ def delete(self, key: str):
+ pass
+
+
+class ExampleCacheSpecProviderPCSP(PoTokenCacheSpecProvider):
+
+ PROVIDER_NAME = 'example'
+ PROVIDER_VERSION = '0.0.1'
+ BUG_REPORT_LOCATION = 'https://example.com/issues'
+
+ def generate_cache_spec(self, request: PoTokenRequest):
+ return PoTokenCacheSpec(
+ key_bindings={'field': 'example-key'},
+ default_ttl=60,
+ write_policy=CacheProviderWritePolicy.WRITE_FIRST,
+ )
+
+
+class TestPoTokenProvider:
+
+ def test_base_type(self):
+ assert issubclass(PoTokenProvider, IEContentProvider)
+
+ def test_create_provider_missing_fetch_method(self, ie, logger):
+ class MissingMethodsPTP(PoTokenProvider):
+ def is_available(self) -> bool:
+ return True
+
+ with pytest.raises(TypeError):
+ MissingMethodsPTP(ie=ie, logger=logger, settings={})
+
+ def test_create_provider_missing_available_method(self, ie, logger):
+ class MissingMethodsPTP(PoTokenProvider):
+ def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
+ raise PoTokenProviderRejectedRequest('Not implemented')
+
+ with pytest.raises(TypeError):
+ MissingMethodsPTP(ie=ie, logger=logger, settings={})
+
+ def test_barebones_provider(self, ie, logger):
+ class BarebonesProviderPTP(PoTokenProvider):
+ def is_available(self) -> bool:
+ return True
+
+ def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
+ raise PoTokenProviderRejectedRequest('Not implemented')
+
+ provider = BarebonesProviderPTP(ie=ie, logger=logger, settings={})
+ assert provider.PROVIDER_NAME == 'BarebonesProvider'
+ assert provider.PROVIDER_KEY == 'BarebonesProvider'
+ assert provider.PROVIDER_VERSION == '0.0.0'
+ assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at (developer has not provided a bug report location) .'
+
+ def test_example_provider_success(self, ie, logger, pot_request):
+ provider = ExamplePTP(ie=ie, logger=logger, settings={})
+ assert provider.PROVIDER_NAME == 'example'
+ assert provider.PROVIDER_KEY == 'Example'
+ assert provider.PROVIDER_VERSION == '0.0.1'
+ assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at https://example.com/issues .'
+ assert provider.is_available()
+
+ response = provider.request_pot(pot_request)
+
+ assert response.po_token == 'example-token'
+ assert response.expires_at == 123
+
+ def test_provider_unsupported_context(self, ie, logger, pot_request):
+ provider = ExamplePTP(ie=ie, logger=logger, settings={})
+ pot_request.context = PoTokenContext.PLAYER
+
+ with pytest.raises(PoTokenProviderRejectedRequest):
+ provider.request_pot(pot_request)
+
+ def test_provider_unsupported_client(self, ie, logger, pot_request):
+ provider = ExamplePTP(ie=ie, logger=logger, settings={})
+ pot_request.innertube_context['client']['clientName'] = 'ANDROID'
+
+ with pytest.raises(PoTokenProviderRejectedRequest):
+ provider.request_pot(pot_request)
+
+ def test_provider_unsupported_proxy_scheme(self, ie, logger, pot_request):
+ provider = ExamplePTP(ie=ie, logger=logger, settings={})
+ pot_request.request_proxy = 'socks4://example.com'
+
+ with pytest.raises(
+ PoTokenProviderRejectedRequest,
+ match='External requests by "example" provider do not support proxy scheme "socks4". Supported proxy '
+ 'schemes: http, socks5h',
+ ):
+ provider.request_pot(pot_request)
+
+ pot_request.request_proxy = 'http://example.com'
+
+ assert provider.request_pot(pot_request)
+
+ def test_provider_ignore_external_request_features(self, ie, logger, pot_request):
+ class InternalPTP(ExamplePTP):
+ _SUPPORTED_EXTERNAL_REQUEST_FEATURES = None
+
+ provider = InternalPTP(ie=ie, logger=logger, settings={})
+
+ pot_request.request_proxy = 'socks5://example.com'
+ assert provider.request_pot(pot_request)
+ pot_request.request_source_address = '0.0.0.0'
+ assert provider.request_pot(pot_request)
+
+ def test_provider_unsupported_external_request_source_address(self, ie, logger, pot_request):
+ class InternalPTP(ExamplePTP):
+ _SUPPORTED_EXTERNAL_REQUEST_FEATURES = tuple()
+
+ provider = InternalPTP(ie=ie, logger=logger, settings={})
+
+ pot_request.request_source_address = None
+ assert provider.request_pot(pot_request)
+
+ pot_request.request_source_address = '0.0.0.0'
+ with pytest.raises(
+ PoTokenProviderRejectedRequest,
+ match='External requests by "example" provider do not support setting source address',
+ ):
+ provider.request_pot(pot_request)
+
+ def test_provider_supported_external_request_source_address(self, ie, logger, pot_request):
+ class InternalPTP(ExamplePTP):
+ _SUPPORTED_EXTERNAL_REQUEST_FEATURES = (
+ ExternalRequestFeature.SOURCE_ADDRESS,
+ )
+
+ provider = InternalPTP(ie=ie, logger=logger, settings={})
+
+ pot_request.request_source_address = None
+ assert provider.request_pot(pot_request)
+
+ pot_request.request_source_address = '0.0.0.0'
+ assert provider.request_pot(pot_request)
+
+ def test_provider_unsupported_external_request_tls_verification(self, ie, logger, pot_request):
+ class InternalPTP(ExamplePTP):
+ _SUPPORTED_EXTERNAL_REQUEST_FEATURES = tuple()
+
+ provider = InternalPTP(ie=ie, logger=logger, settings={})
+
+ pot_request.request_verify_tls = True
+ assert provider.request_pot(pot_request)
+
+ pot_request.request_verify_tls = False
+ with pytest.raises(
+ PoTokenProviderRejectedRequest,
+ match='External requests by "example" provider do not support ignoring TLS certificate failures',
+ ):
+ provider.request_pot(pot_request)
+
+ def test_provider_supported_external_request_tls_verification(self, ie, logger, pot_request):
+ class InternalPTP(ExamplePTP):
+ _SUPPORTED_EXTERNAL_REQUEST_FEATURES = (
+ ExternalRequestFeature.DISABLE_TLS_VERIFICATION,
+ )
+
+ provider = InternalPTP(ie=ie, logger=logger, settings={})
+
+ pot_request.request_verify_tls = True
+ assert provider.request_pot(pot_request)
+
+ pot_request.request_verify_tls = False
+ assert provider.request_pot(pot_request)
+
+ def test_provider_request_webpage(self, ie, logger, pot_request):
+ provider = ExamplePTP(ie=ie, logger=logger, settings={})
+
+ cookiejar = YoutubeDLCookieJar()
+ pot_request.request_headers = HTTPHeaderDict({'User-Agent': 'example-user-agent'})
+ pot_request.request_proxy = 'socks5://example-proxy.com'
+ pot_request.request_cookiejar = cookiejar
+
+ def mock_urlopen(request):
+ return request
+
+ ie._downloader.urlopen = mock_urlopen
+
+ sent_request = provider._request_webpage(Request(
+ 'https://example.com',
+ ), pot_request=pot_request)
+
+ assert sent_request.url == 'https://example.com'
+ assert sent_request.headers['User-Agent'] == 'example-user-agent'
+ assert sent_request.proxies == {'all': 'socks5://example-proxy.com'}
+ assert sent_request.extensions['cookiejar'] is cookiejar
+ assert 'Requesting webpage' in logger.messages['info']
+
+ def test_provider_request_webpage_override(self, ie, logger, pot_request):
+ provider = ExamplePTP(ie=ie, logger=logger, settings={})
+
+ cookiejar_request = YoutubeDLCookieJar()
+ pot_request.request_headers = HTTPHeaderDict({'User-Agent': 'example-user-agent'})
+ pot_request.request_proxy = 'socks5://example-proxy.com'
+ pot_request.request_cookiejar = cookiejar_request
+
+ def mock_urlopen(request):
+ return request
+
+ ie._downloader.urlopen = mock_urlopen
+
+ sent_request = provider._request_webpage(Request(
+ 'https://example.com',
+ headers={'User-Agent': 'override-user-agent-override'},
+ proxies={'http': 'http://example-proxy-override.com'},
+ extensions={'cookiejar': YoutubeDLCookieJar()},
+ ), pot_request=pot_request, note='Custom requesting webpage')
+
+ assert sent_request.url == 'https://example.com'
+ assert sent_request.headers['User-Agent'] == 'override-user-agent-override'
+ assert sent_request.proxies == {'http': 'http://example-proxy-override.com'}
+ assert sent_request.extensions['cookiejar'] is not cookiejar_request
+ assert 'Custom requesting webpage' in logger.messages['info']
+
+ def test_provider_request_webpage_no_log(self, ie, logger, pot_request):
+ provider = ExamplePTP(ie=ie, logger=logger, settings={})
+
+ def mock_urlopen(request):
+ return request
+
+ ie._downloader.urlopen = mock_urlopen
+
+ sent_request = provider._request_webpage(Request(
+ 'https://example.com',
+ ), note=False)
+
+ assert sent_request.url == 'https://example.com'
+ assert 'info' not in logger.messages
+
+ def test_provider_request_webpage_no_pot_request(self, ie, logger):
+ provider = ExamplePTP(ie=ie, logger=logger, settings={})
+
+ def mock_urlopen(request):
+ return request
+
+ ie._downloader.urlopen = mock_urlopen
+
+ sent_request = provider._request_webpage(Request(
+ 'https://example.com',
+ ), pot_request=None)
+
+ assert sent_request.url == 'https://example.com'
+
+ def test_get_config_arg(self, ie, logger):
+ provider = ExamplePTP(ie=ie, logger=logger, settings={'abc': ['123D'], 'xyz': ['456a', '789B']})
+
+ assert provider._configuration_arg('abc') == ['123d']
+ assert provider._configuration_arg('abc', default=['default']) == ['123d']
+ assert provider._configuration_arg('ABC', default=['default']) == ['default']
+ assert provider._configuration_arg('abc', casesense=True) == ['123D']
+ assert provider._configuration_arg('xyz', casesense=False) == ['456a', '789b']
+
+ def test_require_class_end_with_suffix(self, ie, logger):
+ class InvalidSuffix(PoTokenProvider):
+ PROVIDER_NAME = 'invalid-suffix'
+
+ def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
+ raise PoTokenProviderRejectedRequest('Not implemented')
+
+ def is_available(self) -> bool:
+ return True
+
+ provider = InvalidSuffix(ie=ie, logger=logger, settings={})
+
+ with pytest.raises(AssertionError):
+ provider.PROVIDER_KEY # noqa: B018
+
+
+class TestPoTokenCacheProvider:
+
+ def test_base_type(self):
+ assert issubclass(PoTokenCacheProvider, IEContentProvider)
+
+ def test_create_provider_missing_get_method(self, ie, logger):
+ class MissingMethodsPCP(PoTokenCacheProvider):
+ def store(self, key: str, value: str, expires_at: int):
+ pass
+
+ def delete(self, key: str):
+ pass
+
+ def is_available(self) -> bool:
+ return True
+
+ with pytest.raises(TypeError):
+ MissingMethodsPCP(ie=ie, logger=logger, settings={})
+
+ def test_create_provider_missing_store_method(self, ie, logger):
+ class MissingMethodsPCP(PoTokenCacheProvider):
+ def get(self, key: str):
+ pass
+
+ def delete(self, key: str):
+ pass
+
+ def is_available(self) -> bool:
+ return True
+
+ with pytest.raises(TypeError):
+ MissingMethodsPCP(ie=ie, logger=logger, settings={})
+
+ def test_create_provider_missing_delete_method(self, ie, logger):
+ class MissingMethodsPCP(PoTokenCacheProvider):
+ def get(self, key: str):
+ pass
+
+ def store(self, key: str, value: str, expires_at: int):
+ pass
+
+ def is_available(self) -> bool:
+ return True
+
+ with pytest.raises(TypeError):
+ MissingMethodsPCP(ie=ie, logger=logger, settings={})
+
+ def test_create_provider_missing_is_available_method(self, ie, logger):
+ class MissingMethodsPCP(PoTokenCacheProvider):
+ def get(self, key: str):
+ pass
+
+ def store(self, key: str, value: str, expires_at: int):
+ pass
+
+ def delete(self, key: str):
+ pass
+
+ with pytest.raises(TypeError):
+ MissingMethodsPCP(ie=ie, logger=logger, settings={})
+
+ def test_barebones_provider(self, ie, logger):
+ class BarebonesProviderPCP(PoTokenCacheProvider):
+
+ def is_available(self) -> bool:
+ return True
+
+ def get(self, key: str):
+ return 'example-cache'
+
+ def store(self, key: str, value: str, expires_at: int):
+ pass
+
+ def delete(self, key: str):
+ pass
+
+ provider = BarebonesProviderPCP(ie=ie, logger=logger, settings={})
+ assert provider.PROVIDER_NAME == 'BarebonesProvider'
+ assert provider.PROVIDER_KEY == 'BarebonesProvider'
+ assert provider.PROVIDER_VERSION == '0.0.0'
+ assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at (developer has not provided a bug report location) .'
+
+ def test_create_provider_example(self, ie, logger):
+ provider = ExampleCacheProviderPCP(ie=ie, logger=logger, settings={})
+ assert provider.PROVIDER_NAME == 'example'
+ assert provider.PROVIDER_KEY == 'ExampleCacheProvider'
+ assert provider.PROVIDER_VERSION == '0.0.1'
+ assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at https://example.com/issues .'
+ assert provider.is_available()
+
+ def test_get_config_arg(self, ie, logger):
+ provider = ExampleCacheProviderPCP(ie=ie, logger=logger, settings={'abc': ['123D'], 'xyz': ['456a', '789B']})
+ assert provider._configuration_arg('abc') == ['123d']
+ assert provider._configuration_arg('abc', default=['default']) == ['123d']
+ assert provider._configuration_arg('ABC', default=['default']) == ['default']
+ assert provider._configuration_arg('abc', casesense=True) == ['123D']
+ assert provider._configuration_arg('xyz', casesense=False) == ['456a', '789b']
+
+ def test_require_class_end_with_suffix(self, ie, logger):
+ class InvalidSuffix(PoTokenCacheProvider):
+ def get(self, key: str):
+ return 'example-cache'
+
+ def store(self, key: str, value: str, expires_at: int):
+ pass
+
+ def delete(self, key: str):
+ pass
+
+ def is_available(self) -> bool:
+ return True
+
+ provider = InvalidSuffix(ie=ie, logger=logger, settings={})
+
+ with pytest.raises(AssertionError):
+ provider.PROVIDER_KEY # noqa: B018
+
+
+class TestPoTokenCacheSpecProvider:
+
+ def test_base_type(self):
+ assert issubclass(PoTokenCacheSpecProvider, IEContentProvider)
+
+ def test_create_provider_missing_supports_method(self, ie, logger):
+ class MissingMethodsPCS(PoTokenCacheSpecProvider):
+ pass
+
+ with pytest.raises(TypeError):
+ MissingMethodsPCS(ie=ie, logger=logger, settings={})
+
+ def test_create_provider_barebones(self, ie, pot_request, logger):
+ class BarebonesProviderPCSP(PoTokenCacheSpecProvider):
+ def generate_cache_spec(self, request: PoTokenRequest):
+ return PoTokenCacheSpec(
+ default_ttl=100,
+ key_bindings={},
+ )
+
+ provider = BarebonesProviderPCSP(ie=ie, logger=logger, settings={})
+ assert provider.PROVIDER_NAME == 'BarebonesProvider'
+ assert provider.PROVIDER_KEY == 'BarebonesProvider'
+ assert provider.PROVIDER_VERSION == '0.0.0'
+ assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at (developer has not provided a bug report location) .'
+ assert provider.is_available()
+ assert provider.generate_cache_spec(request=pot_request).default_ttl == 100
+ assert provider.generate_cache_spec(request=pot_request).key_bindings == {}
+ assert provider.generate_cache_spec(request=pot_request).write_policy == CacheProviderWritePolicy.WRITE_ALL
+
+ def test_create_provider_example(self, ie, pot_request, logger):
+ provider = ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={})
+ assert provider.PROVIDER_NAME == 'example'
+ assert provider.PROVIDER_KEY == 'ExampleCacheSpecProvider'
+ assert provider.PROVIDER_VERSION == '0.0.1'
+ assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at https://example.com/issues .'
+ assert provider.is_available()
+ assert provider.generate_cache_spec(pot_request)
+ assert provider.generate_cache_spec(pot_request).key_bindings == {'field': 'example-key'}
+ assert provider.generate_cache_spec(pot_request).default_ttl == 60
+ assert provider.generate_cache_spec(pot_request).write_policy == CacheProviderWritePolicy.WRITE_FIRST
+
+ def test_get_config_arg(self, ie, logger):
+ provider = ExampleCacheSpecProviderPCSP(ie=ie, logger=logger, settings={'abc': ['123D'], 'xyz': ['456a', '789B']})
+
+ assert provider._configuration_arg('abc') == ['123d']
+ assert provider._configuration_arg('abc', default=['default']) == ['123d']
+ assert provider._configuration_arg('ABC', default=['default']) == ['default']
+ assert provider._configuration_arg('abc', casesense=True) == ['123D']
+ assert provider._configuration_arg('xyz', casesense=False) == ['456a', '789b']
+
+ def test_require_class_end_with_suffix(self, ie, logger):
+ class InvalidSuffix(PoTokenCacheSpecProvider):
+ def generate_cache_spec(self, request: PoTokenRequest):
+ return None
+
+ provider = InvalidSuffix(ie=ie, logger=logger, settings={})
+
+ with pytest.raises(AssertionError):
+ provider.PROVIDER_KEY # noqa: B018
+
+
+class TestPoTokenRequest:
+ def test_copy_request(self, pot_request):
+ copied_request = pot_request.copy()
+
+ assert copied_request is not pot_request
+ assert copied_request.context == pot_request.context
+ assert copied_request.innertube_context == pot_request.innertube_context
+ assert copied_request.innertube_context is not pot_request.innertube_context
+ copied_request.innertube_context['client']['clientName'] = 'ANDROID'
+ assert pot_request.innertube_context['client']['clientName'] != 'ANDROID'
+ assert copied_request.innertube_host == pot_request.innertube_host
+ assert copied_request.session_index == pot_request.session_index
+ assert copied_request.player_url == pot_request.player_url
+ assert copied_request.is_authenticated == pot_request.is_authenticated
+ assert copied_request.visitor_data == pot_request.visitor_data
+ assert copied_request.data_sync_id == pot_request.data_sync_id
+ assert copied_request.video_id == pot_request.video_id
+ assert copied_request.request_cookiejar is pot_request.request_cookiejar
+ assert copied_request.request_proxy == pot_request.request_proxy
+ assert copied_request.request_headers == pot_request.request_headers
+ assert copied_request.request_headers is not pot_request.request_headers
+ assert copied_request.request_timeout == pot_request.request_timeout
+ assert copied_request.request_source_address == pot_request.request_source_address
+ assert copied_request.request_verify_tls == pot_request.request_verify_tls
+ assert copied_request.bypass_cache == pot_request.bypass_cache
+
+
+def test_provider_bug_report_message(ie, logger):
+ provider = ExamplePTP(ie=ie, logger=logger, settings={})
+ assert provider.BUG_REPORT_MESSAGE == 'please report this issue to the provider developer at https://example.com/issues .'
+
+ message = provider_bug_report_message(provider)
+ assert message == '; please report this issue to the provider developer at https://example.com/issues .'
+
+ message_before = provider_bug_report_message(provider, before='custom message!')
+ assert message_before == 'custom message! Please report this issue to the provider developer at https://example.com/issues .'
+
+
+def test_register_provider(ie):
+
+ @register_provider
+ class UnavailableProviderPTP(PoTokenProvider):
+ def is_available(self) -> bool:
+ return False
+
+ def _real_request_pot(self, request: PoTokenRequest) -> PoTokenResponse:
+ raise PoTokenProviderRejectedRequest('Not implemented')
+
+ assert _pot_providers.value.get('UnavailableProvider') == UnavailableProviderPTP
+ _pot_providers.value.pop('UnavailableProvider')
+
+
+def test_register_pot_preference(ie):
+ before = len(_ptp_preferences.value)
+
+ @register_preference(ExamplePTP)
+ def unavailable_preference(provider: PoTokenProvider, request: PoTokenRequest):
+ return 1
+
+ assert len(_ptp_preferences.value) == before + 1
+
+
+def test_register_cache_provider(ie):
+
+ @cache.register_provider
+ class UnavailableCacheProviderPCP(PoTokenCacheProvider):
+ def is_available(self) -> bool:
+ return False
+
+ def get(self, key: str):
+ return 'example-cache'
+
+ def store(self, key: str, value: str, expires_at: int):
+ pass
+
+ def delete(self, key: str):
+ pass
+
+ assert _pot_cache_providers.value.get('UnavailableCacheProvider') == UnavailableCacheProviderPCP
+ _pot_cache_providers.value.pop('UnavailableCacheProvider')
+
+
+def test_register_cache_provider_spec(ie):
+
+ @cache.register_spec
+ class UnavailableCacheProviderPCSP(PoTokenCacheSpecProvider):
+ def is_available(self) -> bool:
+ return False
+
+ def generate_cache_spec(self, request: PoTokenRequest):
+ return None
+
+ assert _pot_pcs_providers.value.get('UnavailableCacheProvider') == UnavailableCacheProviderPCSP
+ _pot_pcs_providers.value.pop('UnavailableCacheProvider')
+
+
+def test_register_cache_provider_preference(ie):
+ before = len(_pot_cache_provider_preferences.value)
+
+ @cache.register_preference(ExampleCacheProviderPCP)
+ def unavailable_preference(provider: PoTokenCacheProvider, request: PoTokenRequest):
+ return 1
+
+ assert len(_pot_cache_provider_preferences.value) == before + 1
+
+
+def test_logger_log_level(logger):
+ assert logger.LogLevel('INFO') == logger.LogLevel.INFO
+ assert logger.LogLevel('debuG') == logger.LogLevel.DEBUG
+ assert logger.LogLevel(10) == logger.LogLevel.DEBUG
+ assert logger.LogLevel('UNKNOWN') == logger.LogLevel.INFO
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 0f0885366e..3f777aed7a 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -316,6 +316,10 @@
'https://www.youtube.com/s/player/8a8ac953/tv-player-es6.vflset/tv-player-es6.js',
'MiBYeXx_vRREbiCCmh', 'RtZYMVvmkE0JE',
),
+ (
+ 'https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js',
+ 'D3XWVpYgwhLLKNK4AGX', 'aZrQ1qWJ5yv5h',
+ ),
]
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 0ace126cf5..6f25e80aed 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -644,6 +644,7 @@ def __init__(self, params=None, auto_init=True):
self._printed_messages = set()
self._first_webpage_request = True
self._post_hooks = []
+ self._close_hooks = []
self._progress_hooks = []
self._postprocessor_hooks = []
self._download_retcode = 0
@@ -912,6 +913,11 @@ def add_post_hook(self, ph):
"""Add the post hook"""
self._post_hooks.append(ph)
+ def add_close_hook(self, ch):
+ """Add a close hook, called when YoutubeDL.close() is called"""
+ assert callable(ch), 'Close hook must be callable'
+ self._close_hooks.append(ch)
+
def add_progress_hook(self, ph):
"""Add the download progress hook"""
self._progress_hooks.append(ph)
@@ -1020,6 +1026,9 @@ def close(self):
self._request_director.close()
del self._request_director
+ for close_hook in self._close_hooks:
+ close_hook()
+
def trouble(self, message=None, tb=None, is_error=True):
"""Determine action to take when a download problem appears.
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
index fad323c901..5675445ace 100644
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@@ -764,11 +764,11 @@ def _get_linux_desktop_environment(env, logger):
GetDesktopEnvironment
"""
xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
- desktop_session = env.get('DESKTOP_SESSION', None)
+ desktop_session = env.get('DESKTOP_SESSION', '')
if xdg_current_desktop is not None:
for part in map(str.strip, xdg_current_desktop.split(':')):
if part == 'Unity':
- if desktop_session is not None and 'gnome-fallback' in desktop_session:
+ if 'gnome-fallback' in desktop_session:
return _LinuxDesktopEnvironment.GNOME
else:
return _LinuxDesktopEnvironment.UNITY
@@ -797,35 +797,34 @@ def _get_linux_desktop_environment(env, logger):
return _LinuxDesktopEnvironment.UKUI
elif part == 'LXQt':
return _LinuxDesktopEnvironment.LXQT
- logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
+ logger.debug(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
- elif desktop_session is not None:
- if desktop_session == 'deepin':
- return _LinuxDesktopEnvironment.DEEPIN
- elif desktop_session in ('mate', 'gnome'):
- return _LinuxDesktopEnvironment.GNOME
- elif desktop_session in ('kde4', 'kde-plasma'):
+ if desktop_session == 'deepin':
+ return _LinuxDesktopEnvironment.DEEPIN
+ elif desktop_session in ('mate', 'gnome'):
+ return _LinuxDesktopEnvironment.GNOME
+ elif desktop_session in ('kde4', 'kde-plasma'):
+ return _LinuxDesktopEnvironment.KDE4
+ elif desktop_session == 'kde':
+ if 'KDE_SESSION_VERSION' in env:
return _LinuxDesktopEnvironment.KDE4
- elif desktop_session == 'kde':
- if 'KDE_SESSION_VERSION' in env:
- return _LinuxDesktopEnvironment.KDE4
- else:
- return _LinuxDesktopEnvironment.KDE3
- elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
- return _LinuxDesktopEnvironment.XFCE
- elif desktop_session == 'ukui':
- return _LinuxDesktopEnvironment.UKUI
else:
- logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
-
+ return _LinuxDesktopEnvironment.KDE3
+ elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
+ return _LinuxDesktopEnvironment.XFCE
+ elif desktop_session == 'ukui':
+ return _LinuxDesktopEnvironment.UKUI
else:
- if 'GNOME_DESKTOP_SESSION_ID' in env:
- return _LinuxDesktopEnvironment.GNOME
- elif 'KDE_FULL_SESSION' in env:
- if 'KDE_SESSION_VERSION' in env:
- return _LinuxDesktopEnvironment.KDE4
- else:
- return _LinuxDesktopEnvironment.KDE3
+ logger.debug(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
+
+ if 'GNOME_DESKTOP_SESSION_ID' in env:
+ return _LinuxDesktopEnvironment.GNOME
+ elif 'KDE_FULL_SESSION' in env:
+ if 'KDE_SESSION_VERSION' in env:
+ return _LinuxDesktopEnvironment.KDE4
+ else:
+ return _LinuxDesktopEnvironment.KDE3
+
return _LinuxDesktopEnvironment.OTHER
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index e7dcb9853e..c516c79ce5 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -2147,6 +2147,7 @@
from .toggo import ToggoIE
from .tonline import TOnlineIE
from .toongoggles import ToonGogglesIE
+from .toutiao import ToutiaoIE
from .toutv import TouTvIE
from .toypics import (
ToypicsIE,
@@ -2369,6 +2370,7 @@
VHXEmbedIE,
VimeoAlbumIE,
VimeoChannelIE,
+ VimeoEventIE,
VimeoGroupsIE,
VimeoIE,
VimeoLikesIE,
diff --git a/yt_dlp/extractor/lrt.py b/yt_dlp/extractor/lrt.py
index e50194f88b..caff9125e0 100644
--- a/yt_dlp/extractor/lrt.py
+++ b/yt_dlp/extractor/lrt.py
@@ -2,7 +2,6 @@
from ..utils import (
clean_html,
merge_dicts,
- str_or_none,
traverse_obj,
unified_timestamp,
url_or_none,
@@ -138,13 +137,15 @@ def _real_extract(self, url):
'https://www.lrt.lt/radioteka/api/media', video_id,
query={'url': f'/mediateka/irasas/{video_id}/{path}'})
- return traverse_obj(media, {
- 'id': ('id', {int}, {str_or_none}),
- 'title': ('title', {str}),
- 'tags': ('tags', ..., 'name', {str}),
- 'categories': ('playlist_item', 'category', {str}, filter, all, filter),
- 'description': ('content', {clean_html}, {str}),
- 'timestamp': ('date', {lambda x: x.replace('.', '/')}, {unified_timestamp}),
- 'thumbnail': ('playlist_item', 'image', {urljoin('https://www.lrt.lt')}),
- 'formats': ('playlist_item', 'file', {lambda x: self._extract_m3u8_formats(x, video_id)}),
- })
+ return {
+ 'id': video_id,
+ 'formats': self._extract_m3u8_formats(media['playlist_item']['file'], video_id),
+ **traverse_obj(media, {
+ 'title': ('title', {str}),
+ 'tags': ('tags', ..., 'name', {str}),
+ 'categories': ('playlist_item', 'category', {str}, filter, all, filter),
+ 'description': ('content', {clean_html}, {str}),
+ 'timestamp': ('date', {lambda x: x.replace('.', '/')}, {unified_timestamp}),
+ 'thumbnail': ('playlist_item', 'image', {urljoin('https://www.lrt.lt')}),
+ }),
+ }
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index fc050c383b..0d0f7ceef0 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -32,7 +32,7 @@
urlencode_postdata,
urljoin,
)
-from ..utils.traversal import find_element, traverse_obj
+from ..utils.traversal import find_element, require, traverse_obj
class NiconicoBaseIE(InfoExtractor):
@@ -283,35 +283,54 @@ def _yield_dms_formats(self, api_data, video_id):
lambda _, v: v['id'] == video_fmt['format_id'], 'qualityLevel', {int_or_none}, any)) or -1
yield video_fmt
+ def _extract_server_response(self, webpage, video_id, fatal=True):
+ try:
+ return traverse_obj(
+ self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id),
+ ('data', 'response', {dict}, {require('server response')}))
+ except ExtractorError:
+ if not fatal:
+ return {}
+ raise
+
def _real_extract(self, url):
video_id = self._match_id(url)
try:
webpage, handle = self._download_webpage_handle(
- 'https://www.nicovideo.jp/watch/' + video_id, video_id)
+ f'https://www.nicovideo.jp/watch/{video_id}', video_id,
+ headers=self.geo_verification_headers())
if video_id.startswith('so'):
video_id = self._match_id(handle.url)
- api_data = traverse_obj(
- self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id),
- ('data', 'response', {dict}))
- if not api_data:
- raise ExtractorError('Server response data not found')
+ api_data = self._extract_server_response(webpage, video_id)
except ExtractorError as e:
try:
api_data = self._download_json(
- f'https://www.nicovideo.jp/api/watch/v3/{video_id}?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_{round(time.time() * 1000)}', video_id,
- note='Downloading API JSON', errnote='Unable to fetch data')['data']
+ f'https://www.nicovideo.jp/api/watch/v3/{video_id}', video_id,
+ 'Downloading API JSON', 'Unable to fetch data', query={
+ '_frontendId': '6',
+ '_frontendVersion': '0',
+ 'actionTrackId': f'AAAAAAAAAA_{round(time.time() * 1000)}',
+ }, headers=self.geo_verification_headers())['data']
except ExtractorError:
if not isinstance(e.cause, HTTPError):
+ # Raise if original exception was from _parse_json or utils.traversal.require
raise
+ # The webpage server response has more detailed error info than the API response
webpage = e.cause.response.read().decode('utf-8', 'replace')
- error_msg = self._html_search_regex(
- r'(?s)',
- webpage, 'error reason', default=None)
- if not error_msg:
+ reason_code = self._extract_server_response(
+ webpage, video_id, fatal=False).get('reasonCode')
+ if not reason_code:
raise
- raise ExtractorError(clean_html(error_msg), expected=True)
+ if reason_code in ('DOMESTIC_VIDEO', 'HIGH_RISK_COUNTRY_VIDEO'):
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ elif reason_code == 'HIDDEN_VIDEO':
+ raise ExtractorError(
+ 'The viewing period of this video has expired', expected=True)
+ elif reason_code == 'DELETED_VIDEO':
+ raise ExtractorError('This video has been deleted', expected=True)
+ raise ExtractorError(f'Niconico says: {reason_code}')
availability = self._availability(**(traverse_obj(api_data, ('payment', 'video', {
'needs_premium': ('isPremium', {bool}),
diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py
index 7794cae6c0..2c1436cac1 100644
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@@ -340,8 +340,9 @@ def _real_extract(self, url):
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
}))
- # all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
- headers = {'referer': 'https://patreon.com/'}
+ # Must be all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, and Vimeo.
+ # patreon.com URLs redirect to www.patreon.com; this matters when requesting mux.com m3u8s
+ headers = {'referer': 'https://www.patreon.com/'}
# handle Vimeo embeds
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
@@ -352,7 +353,7 @@ def _real_extract(self, url):
v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection
entries.append(self.url_result(
- VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
+ VimeoIE._smuggle_referrer(v_url, headers['referer']),
VimeoIE, url_transparent=True))
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
@@ -379,11 +380,13 @@ def _real_extract(self, url):
'url': post_file['url'],
})
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
- formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ post_file['url'], video_id, headers=headers)
entries.append({
'id': video_id,
'formats': formats,
'subtitles': subtitles,
+ 'http_headers': headers,
})
can_view_post = traverse_obj(attributes, 'current_user_can_view')
diff --git a/yt_dlp/extractor/picarto.py b/yt_dlp/extractor/picarto.py
index 72e89c31ed..92431fa241 100644
--- a/yt_dlp/extractor/picarto.py
+++ b/yt_dlp/extractor/picarto.py
@@ -10,7 +10,8 @@
class PicartoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P[a-zA-Z0-9]+)'
+ IE_NAME = 'picarto'
+ _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P[^/#?]+)/?(?:$|[?#])'
_TEST = {
'url': 'https://picarto.tv/Setz',
'info_dict': {
@@ -89,7 +90,8 @@ def _real_extract(self, url):
class PicartoVodIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?picarto\.tv/(?:videopopout|\w+/videos)/(?P[^/?#&]+)'
+ IE_NAME = 'picarto:vod'
+ _VALID_URL = r'https?://(?:www\.)?picarto\.tv/(?:videopopout|\w+(?:/profile)?/videos)/(?P[^/?#&]+)'
_TESTS = [{
'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
@@ -111,6 +113,18 @@ class PicartoVodIE(InfoExtractor):
'channel': 'ArtofZod',
'age_limit': 18,
},
+ }, {
+ 'url': 'https://picarto.tv/DrechuArt/profile/videos/400347',
+ 'md5': 'f9ea54868b1d9dec40eb554b484cc7bf',
+ 'info_dict': {
+ 'id': '400347',
+ 'ext': 'mp4',
+ 'title': 'Welcome to the Show',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'channel': 'DrechuArt',
+ 'age_limit': 0,
+ },
+
}, {
'url': 'https://picarto.tv/videopopout/Plague',
'only_matching': True,
diff --git a/yt_dlp/extractor/playsuisse.py b/yt_dlp/extractor/playsuisse.py
index 9bf5765fa7..46e3a5b8ff 100644
--- a/yt_dlp/extractor/playsuisse.py
+++ b/yt_dlp/extractor/playsuisse.py
@@ -9,11 +9,10 @@
int_or_none,
join_nonempty,
parse_qs,
- traverse_obj,
update_url_query,
urlencode_postdata,
)
-from ..utils.traversal import unpack
+from ..utils.traversal import traverse_obj, unpack
class PlaySuisseIE(InfoExtractor):
diff --git a/yt_dlp/extractor/podchaser.py b/yt_dlp/extractor/podchaser.py
index 4570f0f175..6c125f9ba6 100644
--- a/yt_dlp/extractor/podchaser.py
+++ b/yt_dlp/extractor/podchaser.py
@@ -5,11 +5,13 @@
from ..utils import (
OnDemandPagedList,
float_or_none,
+ int_or_none,
+ orderedSet,
str_or_none,
- str_to_int,
- traverse_obj,
unified_timestamp,
+ url_or_none,
)
+from ..utils.traversal import require, traverse_obj
class PodchaserIE(InfoExtractor):
@@ -21,24 +23,25 @@ class PodchaserIE(InfoExtractor):
'id': '104365585',
'title': 'Ep. 285 – freeze me off',
'description': 'cam ahn',
- 'thumbnail': r're:^https?://.*\.jpg$',
+ 'thumbnail': r're:https?://.+/.+\.jpg',
'ext': 'mp3',
- 'categories': ['Comedy'],
+ 'categories': ['Comedy', 'News', 'Politics', 'Arts'],
'tags': ['comedy', 'dark humor'],
- 'series': 'Cum Town',
+ 'series': 'The Adam Friedland Show Podcast',
'duration': 3708,
'timestamp': 1636531259,
'upload_date': '20211110',
'average_rating': 4.0,
+ 'series_id': '36924',
},
}, {
'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853',
'info_dict': {
'id': '28853',
'title': 'The Bone Zone',
- 'description': 'Podcast by The Bone Zone',
+ 'description': r're:The official home of the Bone Zone podcast.+',
},
- 'playlist_count': 275,
+ 'playlist_mincount': 275,
}, {
'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes',
'info_dict': {
@@ -51,19 +54,33 @@ class PodchaserIE(InfoExtractor):
@staticmethod
def _parse_episode(episode, podcast):
- return {
- 'id': str(episode.get('id')),
- 'title': episode.get('title'),
- 'description': episode.get('description'),
- 'url': episode.get('audio_url'),
- 'thumbnail': episode.get('image_url'),
- 'duration': str_to_int(episode.get('length')),
- 'timestamp': unified_timestamp(episode.get('air_date')),
- 'average_rating': float_or_none(episode.get('rating')),
- 'categories': list(set(traverse_obj(podcast, (('summary', None), 'categories', ..., 'text')))),
- 'tags': traverse_obj(podcast, ('tags', ..., 'text')),
- 'series': podcast.get('title'),
- }
+ info = traverse_obj(episode, {
+ 'id': ('id', {int}, {str_or_none}, {require('episode ID')}),
+ 'title': ('title', {str}),
+ 'description': ('description', {str}),
+ 'url': ('audio_url', {url_or_none}),
+ 'thumbnail': ('image_url', {url_or_none}),
+ 'duration': ('length', {int_or_none}),
+ 'timestamp': ('air_date', {unified_timestamp}),
+ 'average_rating': ('rating', {float_or_none}),
+ })
+ info.update(traverse_obj(podcast, {
+ 'series': ('title', {str}),
+ 'series_id': ('id', {int}, {str_or_none}),
+ 'categories': (('summary', None), 'categories', ..., 'text', {str}, filter, all, {orderedSet}),
+ 'tags': ('tags', ..., 'text', {str}),
+ }))
+ info['vcodec'] = 'none'
+
+ if info.get('series_id'):
+ podcast_slug = traverse_obj(podcast, ('slug', {str})) or 'podcast'
+ episode_slug = traverse_obj(episode, ('slug', {str})) or 'episode'
+ info['webpage_url'] = '/'.join((
+ 'https://www.podchaser.com/podcasts',
+ '-'.join((podcast_slug[:30].rstrip('-'), info['series_id'])),
+ '-'.join((episode_slug[:30].rstrip('-'), info['id']))))
+
+ return info
def _call_api(self, path, *args, **kwargs):
return self._download_json(f'https://api.podchaser.com/{path}', *args, **kwargs)
@@ -93,5 +110,5 @@ def _real_extract(self, url):
OnDemandPagedList(functools.partial(self._fetch_page, podcast_id, podcast), self._PAGE_SIZE),
str_or_none(podcast.get('id')), podcast.get('title'), podcast.get('description'))
- episode = self._call_api(f'episodes/{episode_id}', episode_id)
+ episode = self._call_api(f'podcasts/{podcast_id}/episodes/{episode_id}/player_ids', episode_id)
return self._parse_episode(episode, podcast)
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index c70940a606..3496a08ef6 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -697,7 +697,7 @@ def _real_extract(self, url):
try:
return self._extract_info_dict(info, full_title, token)
except ExtractorError as e:
- if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
+ if not isinstance(e.cause, HTTPError) or e.cause.status != 429:
raise
self.report_warning(
'You have reached the API rate limit, which is ~600 requests per '
diff --git a/yt_dlp/extractor/toutiao.py b/yt_dlp/extractor/toutiao.py
new file mode 100644
index 0000000000..b2a5aa2362
--- /dev/null
+++ b/yt_dlp/extractor/toutiao.py
@@ -0,0 +1,121 @@
+import json
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ str_or_none,
+ try_call,
+ url_or_none,
+)
+from ..utils.traversal import find_element, traverse_obj
+
+
+class ToutiaoIE(InfoExtractor):
+ IE_NAME = 'toutiao'
+ IE_DESC = '今日头条'
+
+ _VALID_URL = r'https?://www\.toutiao\.com/video/(?P\d+)/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://www.toutiao.com/video/7505382061495176511/',
+ 'info_dict': {
+ 'id': '7505382061495176511',
+ 'ext': 'mp4',
+ 'title': '新疆多地现不明飞行物,目击者称和月亮一样亮,几秒内突然加速消失,气象部门回应',
+ 'comment_count': int,
+ 'duration': 9.753,
+ 'like_count': int,
+ 'release_date': '20250517',
+ 'release_timestamp': 1747483344,
+ 'thumbnail': r're:https?://p\d+-sign\.toutiaoimg\.com/.+$',
+ 'uploader': '极目新闻',
+ 'uploader_id': 'MS4wLjABAAAAeateBb9Su8I3MJOZozmvyzWktmba5LMlliRDz1KffnM',
+ 'view_count': int,
+ },
+ }, {
+ 'url': 'https://www.toutiao.com/video/7479446610359878153/',
+ 'info_dict': {
+ 'id': '7479446610359878153',
+ 'ext': 'mp4',
+ 'title': '小伙竟然利用两块磁铁制作成磁力减震器,简直太有创意了!',
+ 'comment_count': int,
+ 'duration': 118.374,
+ 'like_count': int,
+ 'release_date': '20250308',
+ 'release_timestamp': 1741444368,
+ 'thumbnail': r're:https?://p\d+-sign\.toutiaoimg\.com/.+$',
+ 'uploader': '小莉创意发明',
+ 'uploader_id': 'MS4wLjABAAAA4f7d4mwtApALtHIiq-QM20dwXqe32NUz0DeWF7wbHKw',
+ 'view_count': int,
+ },
+ }]
+
+ def _real_initialize(self):
+ if self._get_cookies('https://www.toutiao.com').get('ttwid'):
+ return
+
+ urlh = self._request_webpage(
+ 'https://ttwid.bytedance.com/ttwid/union/register/', None,
+ 'Fetching ttwid', 'Unable to fetch ttwid', headers={
+ 'Content-Type': 'application/json',
+ }, data=json.dumps({
+ 'aid': 24,
+ 'needFid': False,
+ 'region': 'cn',
+ 'service': 'www.toutiao.com',
+ 'union': True,
+ }).encode(),
+ )
+
+ if ttwid := try_call(lambda: self._get_cookies(urlh.url)['ttwid'].value):
+ self._set_cookie('.toutiao.com', 'ttwid', ttwid)
+ return
+
+ self.raise_login_required()
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ video_data = traverse_obj(webpage, (
+ {find_element(tag='script', id='RENDER_DATA')},
+ {urllib.parse.unquote}, {json.loads}, 'data', 'initialVideo',
+ ))
+
+ formats = []
+ for video in traverse_obj(video_data, (
+ 'videoPlayInfo', 'video_list', lambda _, v: v['main_url'],
+ )):
+ formats.append({
+ 'url': video['main_url'],
+ **traverse_obj(video, ('video_meta', {
+ 'acodec': ('audio_profile', {str}),
+ 'asr': ('audio_sample_rate', {int_or_none}),
+ 'audio_channels': ('audio_channels', {float_or_none}, {int_or_none}),
+ 'ext': ('vtype', {str}),
+ 'filesize': ('size', {int_or_none}),
+ 'format_id': ('definition', {str}),
+ 'fps': ('fps', {int_or_none}),
+ 'height': ('vheight', {int_or_none}),
+ 'tbr': ('real_bitrate', {float_or_none(scale=1000)}),
+ 'vcodec': ('codec_type', {str}),
+ 'width': ('vwidth', {int_or_none}),
+ })),
+ })
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ **traverse_obj(video_data, {
+ 'comment_count': ('commentCount', {int_or_none}),
+ 'duration': ('videoPlayInfo', 'video_duration', {float_or_none}),
+ 'like_count': ('repinCount', {int_or_none}),
+ 'release_timestamp': ('publishTime', {int_or_none}),
+ 'thumbnail': (('poster', 'coverUrl'), {url_or_none}, any),
+ 'title': ('title', {str}),
+ 'uploader': ('userInfo', 'name', {str}),
+ 'uploader_id': ('userInfo', 'userId', {str_or_none}),
+ 'view_count': ('playCount', {int_or_none}),
+ 'webpage_url': ('detailUrl', {url_or_none}),
+ }),
+ }
diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py
index 0a7f95c21a..ebc2963b0f 100644
--- a/yt_dlp/extractor/twitcasting.py
+++ b/yt_dlp/extractor/twitcasting.py
@@ -1,4 +1,5 @@
import base64
+import hashlib
import itertools
import re
@@ -16,6 +17,7 @@
str_to_int,
try_get,
unified_timestamp,
+ update_url_query,
url_or_none,
urlencode_postdata,
urljoin,
@@ -171,6 +173,10 @@ def find_dmu(x):
'player': 'pc_web',
})
+ password_params = {
+ 'word': hashlib.md5(video_password.encode()).hexdigest(),
+ } if video_password else None
+
formats = []
# low: 640x360, medium: 1280x720, high: 1920x1080
qq = qualities(['low', 'medium', 'high'])
@@ -178,7 +184,7 @@ def find_dmu(x):
'tc-hls', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
)):
formats.append({
- 'url': m3u8_url,
+ 'url': update_url_query(m3u8_url, password_params),
'format_id': f'hls-{quality}',
'ext': 'mp4',
'quality': qq(quality),
@@ -192,7 +198,7 @@ def find_dmu(x):
'llfmp4', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
)):
formats.append({
- 'url': ws_url,
+ 'url': update_url_query(ws_url, password_params),
'format_id': f'ws-{mode}',
'ext': 'mp4',
'quality': qq(mode),
diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py
index 4f4c59627f..e4f2aec465 100644
--- a/yt_dlp/extractor/twitch.py
+++ b/yt_dlp/extractor/twitch.py
@@ -187,7 +187,7 @@ def _get_thumbnails(self, thumbnail):
'url': thumbnail,
}] if thumbnail else None
- def _extract_twitch_m3u8_formats(self, path, video_id, token, signature):
+ def _extract_twitch_m3u8_formats(self, path, video_id, token, signature, live_from_start=False):
formats = self._extract_m3u8_formats(
f'{self._USHER_BASE}/{path}/{video_id}.m3u8', video_id, 'mp4', query={
'allow_source': 'true',
@@ -204,7 +204,10 @@ def _extract_twitch_m3u8_formats(self, path, video_id, token, signature):
for fmt in formats:
if fmt.get('vcodec') and fmt['vcodec'].startswith('av01'):
# mpegts does not yet have proper support for av1
- fmt['downloader_options'] = {'ffmpeg_args_out': ['-f', 'mp4']}
+ fmt.setdefault('downloader_options', {}).update({'ffmpeg_args_out': ['-f', 'mp4']})
+ if live_from_start:
+ fmt.setdefault('downloader_options', {}).update({'ffmpeg_args': ['-live_start_index', '0']})
+ fmt['is_from_start'] = True
return formats
@@ -550,7 +553,8 @@ def _real_extract(self, url):
access_token = self._download_access_token(vod_id, 'video', 'id')
formats = self._extract_twitch_m3u8_formats(
- 'vod', vod_id, access_token['value'], access_token['signature'])
+ 'vod', vod_id, access_token['value'], access_token['signature'],
+ live_from_start=self.get_param('live_from_start'))
formats.extend(self._extract_storyboard(vod_id, video.get('storyboard'), info.get('duration')))
self._prefer_source(formats)
@@ -633,6 +637,10 @@ class TwitchPlaylistBaseIE(TwitchBaseIE):
_PAGE_LIMIT = 100
def _entries(self, channel_name, *args):
+ """
+ Subclasses must define _make_variables() and _extract_entry(),
+ as well as set _OPERATION_NAME, _ENTRY_KIND, _EDGE_KIND, and _NODE_KIND
+ """
cursor = None
variables_common = self._make_variables(channel_name, *args)
entries_key = f'{self._ENTRY_KIND}s'
@@ -672,7 +680,22 @@ def _entries(self, channel_name, *args):
break
-class TwitchVideosIE(TwitchPlaylistBaseIE):
+class TwitchVideosBaseIE(TwitchPlaylistBaseIE):
+ _OPERATION_NAME = 'FilterableVideoTower_Videos'
+ _ENTRY_KIND = 'video'
+ _EDGE_KIND = 'VideoEdge'
+ _NODE_KIND = 'Video'
+
+ @staticmethod
+ def _make_variables(channel_name, broadcast_type, sort):
+ return {
+ 'channelOwnerLogin': channel_name,
+ 'broadcastType': broadcast_type,
+ 'videoSort': sort.upper(),
+ }
+
+
+class TwitchVideosIE(TwitchVideosBaseIE):
_VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P[^/]+)/(?:videos|profile)'
_TESTS = [{
@@ -751,11 +774,6 @@ class TwitchVideosIE(TwitchPlaylistBaseIE):
'views': 'Popular',
}
- _OPERATION_NAME = 'FilterableVideoTower_Videos'
- _ENTRY_KIND = 'video'
- _EDGE_KIND = 'VideoEdge'
- _NODE_KIND = 'Video'
-
@classmethod
def suitable(cls, url):
return (False
@@ -764,14 +782,6 @@ def suitable(cls, url):
TwitchVideosCollectionsIE))
else super().suitable(url))
- @staticmethod
- def _make_variables(channel_name, broadcast_type, sort):
- return {
- 'channelOwnerLogin': channel_name,
- 'broadcastType': broadcast_type,
- 'videoSort': sort.upper(),
- }
-
@staticmethod
def _extract_entry(node):
return _make_video_result(node)
@@ -919,7 +929,7 @@ def _real_extract(self, url):
playlist_title=f'{channel_name} - Collections')
-class TwitchStreamIE(TwitchBaseIE):
+class TwitchStreamIE(TwitchVideosBaseIE):
IE_NAME = 'twitch:stream'
_VALID_URL = r'''(?x)
https?://
@@ -982,6 +992,7 @@ class TwitchStreamIE(TwitchBaseIE):
'skip_download': 'Livestream',
},
}]
+ _PAGE_LIMIT = 1
@classmethod
def suitable(cls, url):
@@ -995,6 +1006,20 @@ def suitable(cls, url):
TwitchClipsIE))
else super().suitable(url))
+ @staticmethod
+ def _extract_entry(node):
+ if not isinstance(node, dict) or not node.get('id'):
+ return None
+ video_id = node['id']
+ return {
+ '_type': 'url',
+ 'ie_key': TwitchVodIE.ie_key(),
+ 'id': 'v' + video_id,
+ 'url': f'https://www.twitch.tv/videos/{video_id}',
+ 'title': node.get('title'),
+ 'timestamp': unified_timestamp(node.get('publishedAt')) or 0,
+ }
+
def _real_extract(self, url):
channel_name = self._match_id(url).lower()
@@ -1029,6 +1054,16 @@ def _real_extract(self, url):
if not stream:
raise UserNotLive(video_id=channel_name)
+ timestamp = unified_timestamp(stream.get('createdAt'))
+
+ if self.get_param('live_from_start'):
+ self.to_screen(f'{channel_name}: Extracting VOD to download live from start')
+ entry = next(self._entries(channel_name, None, 'time'), None)
+ if entry and entry.pop('timestamp') >= (timestamp or float('inf')):
+ return entry
+ self.report_warning(
+ 'Unable to extract the VOD associated with this livestream', video_id=channel_name)
+
access_token = self._download_access_token(
channel_name, 'stream', 'channelName')
@@ -1038,7 +1073,6 @@ def _real_extract(self, url):
self._prefer_source(formats)
view_count = stream.get('viewers')
- timestamp = unified_timestamp(stream.get('createdAt'))
sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {}
uploader = sq_user.get('displayName')
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index 89c43b8ccd..9a683ae8fc 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -20,7 +20,6 @@
remove_end,
str_or_none,
strip_or_none,
- traverse_obj,
truncate_string,
try_call,
try_get,
@@ -29,6 +28,7 @@
url_or_none,
xpath_text,
)
+from ..utils.traversal import require, traverse_obj
class TwitterBaseIE(InfoExtractor):
@@ -1342,7 +1342,7 @@ def _extract_status(self, twid):
'tweet_mode': 'extended',
})
except ExtractorError as e:
- if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
+ if not isinstance(e.cause, HTTPError) or e.cause.status != 429:
raise
self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
status = self._call_syndication_api(twid)
@@ -1596,8 +1596,8 @@ def _find_dimension(target):
class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
IE_NAME = 'twitter:broadcast'
- _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P[0-9a-zA-Z]{13})'
+ _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?Pbroadcasts|events)/(?P\w+)'
_TESTS = [{
# untitled Periscope video
'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
@@ -1605,6 +1605,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'id': '1yNGaQLWpejGj',
'ext': 'mp4',
'title': 'Andrea May Sahouri - Periscope Broadcast',
+ 'display_id': '1yNGaQLWpejGj',
'uploader': 'Andrea May Sahouri',
'uploader_id': 'andreamsahouri',
'uploader_url': 'https://twitter.com/andreamsahouri',
@@ -1612,6 +1613,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'upload_date': '20200601',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
'view_count': int,
+ 'concurrent_view_count': int,
+ 'live_status': 'was_live',
},
}, {
'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
@@ -1619,6 +1622,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'id': '1ZkKzeyrPbaxv',
'ext': 'mp4',
'title': 'Starship | SN10 | High-Altitude Flight Test',
+ 'display_id': '1ZkKzeyrPbaxv',
'uploader': 'SpaceX',
'uploader_id': 'SpaceX',
'uploader_url': 'https://twitter.com/SpaceX',
@@ -1626,6 +1630,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'upload_date': '20210303',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
'view_count': int,
+ 'concurrent_view_count': int,
+ 'live_status': 'was_live',
},
}, {
'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
@@ -1633,6 +1639,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'id': '1OyKAVQrgzwGb',
'ext': 'mp4',
'title': 'Starship Flight Test',
+ 'display_id': '1OyKAVQrgzwGb',
'uploader': 'SpaceX',
'uploader_id': 'SpaceX',
'uploader_url': 'https://twitter.com/SpaceX',
@@ -1640,21 +1647,58 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
'upload_date': '20230420',
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
'view_count': int,
+ 'concurrent_view_count': int,
+ 'live_status': 'was_live',
+ },
+ }, {
+ 'url': 'https://x.com/i/events/1910629646300762112',
+ 'info_dict': {
+ 'id': '1LyxBWDRNqyKN',
+ 'ext': 'mp4',
+ 'title': '#ガンニバル ウォッチパーティー',
+ 'concurrent_view_count': int,
+ 'display_id': '1910629646300762112',
+ 'live_status': 'was_live',
+ 'release_date': '20250423',
+ 'release_timestamp': 1745409000,
+ 'tags': ['ガンニバル'],
+ 'thumbnail': r're:https?://[^?#]+\.jpg\?token=',
+ 'timestamp': 1745403328,
+ 'upload_date': '20250423',
+ 'uploader': 'ディズニープラス公式',
+ 'uploader_id': 'DisneyPlusJP',
+ 'uploader_url': 'https://twitter.com/DisneyPlusJP',
+ 'view_count': int,
},
}]
def _real_extract(self, url):
- broadcast_id = self._match_id(url)
+ broadcast_type, display_id = self._match_valid_url(url).group('type', 'id')
+
+ if broadcast_type == 'events':
+ timeline = self._call_api(
+ f'live_event/1/{display_id}/timeline.json', display_id)
+ broadcast_id = traverse_obj(timeline, (
+ 'twitter_objects', 'broadcasts', ..., ('id', 'broadcast_id'),
+ {str}, any, {require('broadcast ID')}))
+ else:
+ broadcast_id = display_id
+
broadcast = self._call_api(
'broadcasts/show.json', broadcast_id,
{'ids': broadcast_id})['broadcasts'][broadcast_id]
if not broadcast:
raise ExtractorError('Broadcast no longer exists', expected=True)
info = self._parse_broadcast_data(broadcast, broadcast_id)
- info['title'] = broadcast.get('status') or info.get('title')
- info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
- info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
+ info.update({
+ 'display_id': display_id,
+ 'title': broadcast.get('status') or info.get('title'),
+ 'uploader_id': broadcast.get('twitter_username') or info.get('uploader_id'),
+ 'uploader_url': format_field(
+ broadcast, 'twitter_username', 'https://twitter.com/%s', default=None),
+ })
if info['live_status'] == 'is_upcoming':
+ self.raise_no_formats('This live broadcast has not yet started', expected=True)
return info
media_key = broadcast['media_key']
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index fb9af7acf1..09497b699d 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -3,6 +3,7 @@
import itertools
import json
import re
+import time
import urllib.parse
from .common import InfoExtractor
@@ -13,10 +14,12 @@
OnDemandPagedList,
clean_html,
determine_ext,
+ filter_dict,
get_element_by_class,
int_or_none,
join_nonempty,
js_to_json,
+ jwt_decode_hs256,
merge_dicts,
parse_filesize,
parse_iso8601,
@@ -39,6 +42,9 @@ class VimeoBaseInfoExtractor(InfoExtractor):
_NETRC_MACHINE = 'vimeo'
_LOGIN_REQUIRED = False
_LOGIN_URL = 'https://vimeo.com/log_in'
+ _REFERER_HINT = (
+ 'Cannot download embed-only video without embedding URL. Please call yt-dlp '
+ 'with the URL of the page that embeds this video.')
_IOS_CLIENT_AUTH = 'MTMxNzViY2Y0NDE0YTQ5YzhjZTc0YmU0NjVjNDQxYzNkYWVjOWRlOTpHKzRvMmgzVUh4UkxjdU5FRW80cDNDbDhDWGR5dVJLNUJZZ055dHBHTTB4V1VzaG41bEx1a2hiN0NWYWNUcldSSW53dzRUdFRYZlJEZmFoTTArOTBUZkJHS3R4V2llYU04Qnl1bERSWWxUdXRidjNqR2J4SHFpVmtFSUcyRktuQw=='
_IOS_CLIENT_HEADERS = {
'Accept': 'application/vnd.vimeo.*+json; version=3.4.10',
@@ -47,6 +53,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
}
_IOS_OAUTH_CACHE_KEY = 'oauth-token-ios'
_ios_oauth_token = None
+ _viewer_info = None
@staticmethod
def _smuggle_referrer(url, referrer_url):
@@ -60,8 +67,21 @@ def _unsmuggle_headers(self, url):
headers['Referer'] = data['referer']
return url, data, headers
+ def _jwt_is_expired(self, token):
+ return jwt_decode_hs256(token)['exp'] - time.time() < 120
+
+ def _fetch_viewer_info(self, display_id=None, fatal=True):
+ if self._viewer_info and not self._jwt_is_expired(self._viewer_info['jwt']):
+ return self._viewer_info
+
+ self._viewer_info = self._download_json(
+ 'https://vimeo.com/_next/viewer', display_id, 'Downloading web token info',
+ 'Failed to download web token info', fatal=fatal, headers={'Accept': 'application/json'})
+
+ return self._viewer_info
+
def _perform_login(self, username, password):
- viewer = self._download_json('https://vimeo.com/_next/viewer', None, 'Downloading login token')
+ viewer = self._fetch_viewer_info()
data = {
'action': 'login',
'email': username,
@@ -96,11 +116,10 @@ def _get_video_password(self):
expected=True)
return password
- def _verify_video_password(self, video_id):
+ def _verify_video_password(self, video_id, path=None):
video_password = self._get_video_password()
- token = self._download_json(
- 'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info')['xsrft']
- url = f'https://vimeo.com/{video_id}'
+ token = self._fetch_viewer_info(video_id)['xsrft']
+ url = join_nonempty('https://vimeo.com', path, video_id, delim='/')
try:
self._request_webpage(
f'{url}/password', video_id,
@@ -117,6 +136,10 @@ def _verify_video_password(self, video_id):
raise ExtractorError('Wrong password', expected=True)
raise
+ def _extract_config_url(self, webpage, **kwargs):
+ return self._html_search_regex(
+ r'\bdata-config-url="([^"]+)"', webpage, 'config URL', **kwargs)
+
def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
vimeo_config = self._search_regex(
r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));',
@@ -164,6 +187,7 @@ def _parse_config(self, config, video_id):
sep_pattern = r'/sep/video/'
for files_type in ('hls', 'dash'):
for cdn_name, cdn_data in (try_get(config_files, lambda x: x[files_type]['cdns']) or {}).items():
+ # TODO: Also extract 'avc_url'? Investigate if there are 'hevc_url', 'av1_url'?
manifest_url = cdn_data.get('url')
if not manifest_url:
continue
@@ -244,7 +268,10 @@ def _parse_config(self, config, video_id):
'formats': formats,
'subtitles': subtitles,
'live_status': live_status,
- 'release_timestamp': traverse_obj(live_event, ('ingest', 'scheduled_start_time', {parse_iso8601})),
+ 'release_timestamp': traverse_obj(live_event, ('ingest', (
+ ('scheduled_start_time', {parse_iso8601}),
+ ('start_time', {int_or_none}),
+ ), any)),
# Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
# at the same time without actual units specified.
'_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
@@ -353,7 +380,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
(?:
(?Puser)|
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
- (?:.*?/)??
+ (?:(?!event/).*?/)??
(?P
(?:
play_redirect_hls|
@@ -933,8 +960,7 @@ def _try_album_password(self, url):
r'vimeo\.com/(?:album|showcase)/([^/]+)', url, 'album id', default=None)
if not album_id:
return
- viewer = self._download_json(
- 'https://vimeo.com/_rv/viewer', album_id, fatal=False)
+ viewer = self._fetch_viewer_info(album_id, fatal=False)
if not viewer:
webpage = self._download_webpage(url, album_id)
viewer = self._parse_json(self._search_regex(
@@ -992,9 +1018,7 @@ def _real_extract(self, url):
raise
errmsg = error.cause.response.read()
if b'Because of its privacy settings, this video cannot be played here' in errmsg:
- raise ExtractorError(
- 'Cannot download embed-only video without embedding URL. Please call yt-dlp '
- 'with the URL of the page that embeds this video.', expected=True)
+ raise ExtractorError(self._REFERER_HINT, expected=True)
# 403 == vimeo.com TLS fingerprint or DC IP block; 429 == player.vimeo.com TLS FP block
status = error.cause.status
dcip_msg = 'If you are using a data center IP or VPN/proxy, your IP may be blocked'
@@ -1039,8 +1063,7 @@ def _real_extract(self, url):
channel_id = self._search_regex(
r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
if channel_id:
- config_url = self._html_search_regex(
- r'\bdata-config-url="([^"]+)"', webpage, 'config URL', default=None)
+ config_url = self._extract_config_url(webpage, default=None)
video_description = clean_html(get_element_by_class('description', webpage))
info_dict.update({
'channel_id': channel_id,
@@ -1333,8 +1356,7 @@ def _fetch_page(self, album_id, authorization, hashed_pass, page):
def _real_extract(self, url):
album_id = self._match_id(url)
- viewer = self._download_json(
- 'https://vimeo.com/_rv/viewer', album_id, fatal=False)
+ viewer = self._fetch_viewer_info(album_id, fatal=False)
if not viewer:
webpage = self._download_webpage(url, album_id)
viewer = self._parse_json(self._search_regex(
@@ -1626,3 +1648,377 @@ def _real_extract(self, url):
return self.url_result(vimeo_url, VimeoIE, video_id, url_transparent=True,
description=description)
+
+
+class VimeoEventIE(VimeoBaseInfoExtractor):
+ IE_NAME = 'vimeo:event'
+ _VALID_URL = r'''(?x)
+ https?://(?:www\.)?vimeo\.com/event/(?P\d+)(?:/
+ (?:
+ (?:embed/)?(?P[\da-f]{10})|
+ videos/(?P\d+)
+ )
+ )?'''
+ _EMBED_REGEX = [r'