From 6ff135c31914ea8b5545f8d187c60e852cfde9bc Mon Sep 17 00:00:00 2001 From: Iuri Campos Date: Sat, 2 Aug 2025 23:05:40 +0100 Subject: [PATCH 01/11] [ie/shiey] Add extractor (#13354) Closes #12129 Authored by: iribeirocampos --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/shiey.py | 34 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 yt_dlp/extractor/shiey.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 3eea0cdf6b..bb595f924b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1866,6 +1866,7 @@ from .sharepoint import SharePointIE from .sharevideos import ShareVideosEmbedIE from .shemaroome import ShemarooMeIE +from .shiey import ShieyIE from .showroomlive import ShowRoomLiveIE from .sibnet import SibnetEmbedIE from .simplecast import ( diff --git a/yt_dlp/extractor/shiey.py b/yt_dlp/extractor/shiey.py new file mode 100644 index 0000000000..4e3a815fc6 --- /dev/null +++ b/yt_dlp/extractor/shiey.py @@ -0,0 +1,34 @@ +import json + +from .common import InfoExtractor +from .vimeo import VimeoIE +from ..utils import extract_attributes +from ..utils.traversal import find_element, traverse_obj + + +class ShieyIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?shiey\.com/videos/v/(?P[^/?#]+)' + + _TESTS = [{ + 'url': 'https://www.shiey.com/videos/v/train-journey-to-edge-of-serbia-ep-2', + 'info_dict': { + 'id': '1103409448', + 'ext': 'mp4', + 'title': 'Train Journey To Edge of Serbia (Ep. 2)', + 'uploader': 'shiey', + 'uploader_url': '', + 'duration': 1364, + 'thumbnail': r're:^https?://.+', + }, + 'params': {'skip_download': True}, + 'expected_warnings': ['Failed to parse XML: not well-formed'], + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + oembed_html = traverse_obj(webpage, ( + {find_element(attr='data-controller', value='VideoEmbed', html=True)}, + {extract_attributes}, 'data-config-embed-video', {json.loads}, 'oembedHtml', {str})) + + return self.url_result(VimeoIE._extract_url(url, oembed_html), VimeoIE) From 38c2bf40260f7788efb5a7f5e8eba8e5cb43f741 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sun, 3 Aug 2025 13:07:06 +1200 Subject: [PATCH 02/11] [ie/youtube] Add player params to mweb client (#13914) Authored by: coletdjnz --- yt_dlp/extractor/youtube/_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/youtube/_base.py b/yt_dlp/extractor/youtube/_base.py index 0a9b510c7d..f7dadd013d 100644 --- a/yt_dlp/extractor/youtube/_base.py +++ b/yt_dlp/extractor/youtube/_base.py @@ -282,6 +282,7 @@ class SubsPoTokenPolicy(BasePoTokenPolicy): 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)', }, }, + 'PLAYER_PARAMS': '8AEB', 'INNERTUBE_CONTEXT_CLIENT_NAME': 2, 'GVS_PO_TOKEN_POLICY': { StreamingProtocol.HTTPS: GvsPoTokenPolicy( From f799a4b4728e54dbe0d35e604a15238c13648600 Mon Sep 17 00:00:00 2001 From: sepro Date: Tue, 5 Aug 2025 18:47:37 +0200 Subject: [PATCH 03/11] [ie/youtube] Update `tv` client config (#13934) Closes #13930 Authored by: seproDev --- yt_dlp/extractor/youtube/_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/youtube/_base.py b/yt_dlp/extractor/youtube/_base.py index f7dadd013d..30a0fb61bb 100644 --- a/yt_dlp/extractor/youtube/_base.py +++ b/yt_dlp/extractor/youtube/_base.py @@ -309,12 +309,12 @@ class SubsPoTokenPolicy(BasePoTokenPolicy): 'client': { 'clientName': 'TVHTML5', 'clientVersion': '7.20250312.16.00', - 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version', + # See: https://github.com/youtube/cobalt/blob/main/cobalt/browser/user_agent/user_agent_platform_info.cc#L506 + 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/25.lts.30.1034943-gold (unlike Gecko), Unknown_TV_Unknown_0/Unknown (Unknown, Unknown)', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, 'SUPPORTS_COOKIES': True, - 'PLAYER_PARAMS': '8AEB', }, 'tv_simply': { 'INNERTUBE_CONTEXT': { From e651a53a2fd6330598c6973ebdc459a363c93cfa Mon Sep 17 00:00:00 2001 From: sepro Date: Tue, 5 Aug 2025 22:02:13 +0200 Subject: [PATCH 04/11] Revert f799a4b4728e54dbe0d35e604a15238c13648600 --- yt_dlp/extractor/youtube/_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/youtube/_base.py b/yt_dlp/extractor/youtube/_base.py index 30a0fb61bb..f7dadd013d 100644 --- a/yt_dlp/extractor/youtube/_base.py +++ b/yt_dlp/extractor/youtube/_base.py @@ -309,12 +309,12 @@ class SubsPoTokenPolicy(BasePoTokenPolicy): 'client': { 'clientName': 'TVHTML5', 'clientVersion': '7.20250312.16.00', - # See: https://github.com/youtube/cobalt/blob/main/cobalt/browser/user_agent/user_agent_platform_info.cc#L506 - 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/25.lts.30.1034943-gold (unlike Gecko), Unknown_TV_Unknown_0/Unknown (Unknown, Unknown)', + 'userAgent': 'Mozilla/5.0 (ChromiumStylePlatform) Cobalt/Version', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, 'SUPPORTS_COOKIES': True, + 'PLAYER_PARAMS': '8AEB', }, 'tv_simply': { 'INNERTUBE_CONTEXT': { From 1e0c77ddcce335a1875ecc17d93ed6ff3fabd975 Mon Sep 17 00:00:00 2001 From: sepro Date: Wed, 6 Aug 2025 16:52:34 +0200 Subject: [PATCH 05/11] [pp/XAttrMetadata] Don't write "Where from" on Windows (#13944) Fix 3e918d825d7ff367812658957b281b8cda8f9ebb Closes #13942 Authored by: seproDev --- yt_dlp/postprocessor/xattrpp.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py index fd83d783ba..f879989761 100644 --- a/yt_dlp/postprocessor/xattrpp.py +++ b/yt_dlp/postprocessor/xattrpp.py @@ -54,6 +54,9 @@ def run(self, info): if infoname == 'upload_date': value = hyphenate_date(value) elif xattrname == 'com.apple.metadata:kMDItemWhereFroms': + # NTFS ADS doesn't support colons in names + if os.name == 'nt': + continue value = self.APPLE_PLIST_TEMPLATE % value write_xattr(info['filepath'], xattrname, value.encode()) From 8175f3738fe4db3bc629d36bb72b927d4286d3f9 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 6 Aug 2025 14:00:53 -0500 Subject: [PATCH 06/11] [rh:requests] Bump minimum required version of urllib3 to 2.0.2 (#13939) - urllib3 1.26.x gives unexpected results with partial reads: https://github.com/urllib3/urllib3/issues/2128 - urllib3 2.0.0 and 2.0.1 were yanked from PyPI: https://github.com/urllib3/urllib3/issues/3009 Closes #13927 Authored by: bashonly --- .github/workflows/build.yml | 2 +- pyproject.toml | 2 +- yt_dlp/networking/_requests.py | 26 ++++---------------------- 3 files changed, 6 insertions(+), 24 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b3db8fec1b..810490f735 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -208,7 +208,7 @@ jobs: python3.9 -m pip install -U pip wheel 'setuptools>=71.0.2' # XXX: Keep this in sync with pyproject.toml (it can't be accessed at this stage) and exclude secretstorage python3.9 -m pip install -U Pyinstaller mutagen pycryptodomex brotli certifi cffi \ - 'requests>=2.32.2,<3' 'urllib3>=1.26.17,<3' 'websockets>=13.0' + 'requests>=2.32.2,<3' 'urllib3>=2.0.2,<3' 'websockets>=13.0' run: | cd repo diff --git a/pyproject.toml b/pyproject.toml index 41d5ec3b0f..52ff5a895c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,7 @@ default = [ "mutagen", "pycryptodomex", "requests>=2.32.2,<3", - "urllib3>=1.26.17,<3", + "urllib3>=2.0.2,<3", "websockets>=13.0", ] curl-cffi = [ diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py index 6582038fcb..1526d2a599 100644 --- a/yt_dlp/networking/_requests.py +++ b/yt_dlp/networking/_requests.py @@ -1,6 +1,5 @@ from __future__ import annotations -import contextlib import functools import http.client import logging @@ -20,9 +19,9 @@ urllib3_version = tuple(int_or_none(x, default=0) for x in urllib3.__version__.split('.')) -if urllib3_version < (1, 26, 17): +if urllib3_version < (2, 0, 2): urllib3._yt_dlp__version = f'{urllib3.__version__} (unsupported)' - raise ImportError('Only urllib3 >= 1.26.17 is supported') + raise ImportError('Only urllib3 >= 2.0.2 is supported') if requests.__build__ < 0x023202: requests._yt_dlp__version = f'{requests.__version__} (unsupported)' @@ -101,27 +100,10 @@ def subn(self, repl, string, *args, **kwargs): # https://github.com/urllib3/urllib3/commit/a2697e7c6b275f05879b60f593c5854a816489f0 import urllib3.util.url -if hasattr(urllib3.util.url, 'PERCENT_RE'): - urllib3.util.url.PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url.PERCENT_RE) -elif hasattr(urllib3.util.url, '_PERCENT_RE'): # urllib3 >= 2.0.0 +if hasattr(urllib3.util.url, '_PERCENT_RE'): # was 'PERCENT_RE' in urllib3 < 2.0.0 urllib3.util.url._PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url._PERCENT_RE) else: - warnings.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message()) - -''' -Workaround for issue in urllib.util.ssl_.py: ssl_wrap_context does not pass -server_hostname to SSLContext.wrap_socket if server_hostname is an IP, -however this is an issue because we set check_hostname to True in our SSLContext. - -Monkey-patching IS_SECURETRANSPORT forces ssl_wrap_context to pass server_hostname regardless. - -This has been fixed in urllib3 2.0+. -See: https://github.com/urllib3/urllib3/issues/517 -''' - -if urllib3_version < (2, 0, 0): - with contextlib.suppress(Exception): - urllib3.util.IS_SECURETRANSPORT = urllib3.util.ssl_.IS_SECURETRANSPORT = True + warnings.warn('Failed to patch _PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message()) # Requests will not automatically handle no_proxy by default From 662af5bb8307ec3ff8ab0857f1159922d64792f0 Mon Sep 17 00:00:00 2001 From: sepro Date: Wed, 6 Aug 2025 21:14:45 +0200 Subject: [PATCH 07/11] Warn when yt-dlp is severely outdated (#13937) Authored by: seproDev --- README.md | 3 +++ devscripts/cli_to_api.py | 1 + yt_dlp/YoutubeDL.py | 5 +++++ yt_dlp/__init__.py | 1 + yt_dlp/update.py | 17 +++++++++++++++++ 5 files changed, 27 insertions(+) diff --git a/README.md b/README.md index 12f68e98d8..5083341408 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,9 @@ # To install nightly with pip: python3 -m pip install -U --pre "yt-dlp[default]" ``` +When running a yt-dlp version that is older than 90 days, you will see a warning message suggesting to update to the latest version. +You can suppress this warning by adding `--no-update` to your command or configuration file. + ## DEPENDENCIES Python versions 3.9+ (CPython) and 3.11+ (PyPy) are supported. Other versions and implementations may or may not work correctly. diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py index 9c2710e09f..cc86b413f4 100755 --- a/devscripts/cli_to_api.py +++ b/devscripts/cli_to_api.py @@ -20,6 +20,7 @@ def parse_patched_options(opts): 'fragment_retries': 0, 'extract_flat': False, 'concat_playlist': 'never', + 'update_self': False, }) yt_dlp.options.create_parser = lambda: patched_parser try: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index a9f347bf4a..5985d2ec76 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -73,6 +73,7 @@ from .update import ( REPOSITORY, _get_system_deprecation, + _get_outdated_warning, _make_label, current_git_head, detect_variant, @@ -504,6 +505,7 @@ class YoutubeDL: force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts noprogress: Do not print the progress bar live_from_start: Whether to download livestreams videos from the start + warn_when_outdated: Emit a warning if the yt-dlp version is older than 90 days The following parameters are not used by YoutubeDL itself, they are used by the downloader (see yt_dlp/downloader/common.py): @@ -703,6 +705,9 @@ def process_color_policy(stream): system_deprecation = _get_system_deprecation() if system_deprecation: self.deprecated_feature(system_deprecation.replace('\n', '\n ')) + elif self.params.get('warn_when_outdated'): + if outdated_warning := _get_outdated_warning(): + self.report_warning(outdated_warning) if self.params.get('allow_unplayable_formats'): self.report_warning( diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 2e7646b7ec..f705bed1bf 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -971,6 +971,7 @@ def parse_options(argv=None): 'geo_bypass': opts.geo_bypass, 'geo_bypass_country': opts.geo_bypass_country, 'geo_bypass_ip_block': opts.geo_bypass_ip_block, + 'warn_when_outdated': opts.update_self is None, '_warnings': warnings, '_deprecation_warnings': deprecation_warnings, 'compat_opts': opts.compat_opts, diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 30cbf538e9..045f7ec7f3 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -2,6 +2,7 @@ import atexit import contextlib +import datetime as dt import functools import hashlib import json @@ -171,6 +172,22 @@ def _get_system_deprecation(): return f'Support for Python version {major}.{minor} has been deprecated. {PYTHON_MSG}' +def _get_outdated_warning(): + # Only yt-dlp guarantees a stable release at least every 90 days + if not ORIGIN.startswith('yt-dlp/'): + return None + + with contextlib.suppress(Exception): + last_updated = dt.date(*version_tuple(__version__)[:3]) + if last_updated < dt.datetime.now(dt.timezone.utc).date() - dt.timedelta(days=90): + return ('\n '.join(( + f'Your yt-dlp version ({__version__}) is older than 90 days!', + 'It is strongly recommended to always use the latest version.', + f'{is_non_updateable() or """Run "yt-dlp --update" or "yt-dlp -U" to update"""}.', + 'To suppress this warning, add --no-update to your command/config.'))) + return None + + def _sha256_file(path): h = hashlib.sha256() mv = memoryview(bytearray(128 * 1024)) From fe53ebe5b66a03c664708a4d6fd87b8c13a1bc7b Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 6 Aug 2025 15:08:34 -0500 Subject: [PATCH 08/11] [fd/dash] Re-extract if using --load-info-json with --live-from-start (#13922) Closes #13906 Authored by: bashonly --- yt_dlp/downloader/dash.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py index afc79b6caf..bf8652d8b0 100644 --- a/yt_dlp/downloader/dash.py +++ b/yt_dlp/downloader/dash.py @@ -3,7 +3,7 @@ from . import get_suitable_downloader from .fragment import FragmentFD -from ..utils import update_url_query, urljoin +from ..utils import ReExtractInfo, update_url_query, urljoin class DashSegmentsFD(FragmentFD): @@ -28,6 +28,11 @@ def real_download(self, filename, info_dict): requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])] args = [] for fmt in requested_formats or [info_dict]: + # Re-extract if --load-info-json is used and 'fragments' was originally a generator + # See https://github.com/yt-dlp/yt-dlp/issues/13906 + if isinstance(fmt['fragments'], str): + raise ReExtractInfo('the stream needs to be re-extracted', expected=True) + try: fragment_count = 1 if self.params.get('test') else len(fmt['fragments']) except TypeError: From e8d2807296ccc603e031f5982623a8311f2a5119 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 6 Aug 2025 19:03:44 -0500 Subject: [PATCH 09/11] [ie/digitalconcerthall] Fix formats extraction (#13948) Closes #13925 Authored by: bashonly --- yt_dlp/extractor/digitalconcerthall.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py index 4c4fe470da..be16f5e873 100644 --- a/yt_dlp/extractor/digitalconcerthall.py +++ b/yt_dlp/extractor/digitalconcerthall.py @@ -4,6 +4,7 @@ from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, + determine_ext, jwt_decode_hs256, parse_codecs, try_get, @@ -222,11 +223,18 @@ def _entries(self, items, language, type_, **kwargs): raise formats = [] - for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})): - formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) - for fmt in formats: - if fmt.get('format_note') and fmt.get('vcodec') == 'none': - fmt.update(parse_codecs(fmt['format_note'])) + for fmt_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})): + ext = determine_ext(fmt_url) + if ext == 'm3u8': + fmts = self._extract_m3u8_formats(fmt_url, video_id, 'mp4', m3u8_id='hls', fatal=False) + for fmt in fmts: + if fmt.get('format_note') and fmt.get('vcodec') == 'none': + fmt.update(parse_codecs(fmt['format_note'])) + formats.extend(fmts) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats(fmt_url, video_id, mpd_id='dash', fatal=False)) + else: + self.report_warning(f'Skipping unsupported format extension "{ext}"') yield { 'id': video_id, From a6df5e8a58d6743dd230011389c986495ec509da Mon Sep 17 00:00:00 2001 From: Sojiroh Date: Thu, 7 Aug 2025 17:16:55 -0400 Subject: [PATCH 10/11] [ie/YandexDisk] Support 360 URLs (#13935) Closes #13887 Authored by: Sojiroh --- yt_dlp/extractor/yandexdisk.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/yandexdisk.py b/yt_dlp/extractor/yandexdisk.py index 3214816701..654858fcca 100644 --- a/yt_dlp/extractor/yandexdisk.py +++ b/yt_dlp/extractor/yandexdisk.py @@ -16,7 +16,7 @@ class YandexDiskIE(InfoExtractor): _VALID_URL = r'''(?x)https?:// (?P yadi\.sk| - disk\.yandex\. + disk\.(?:360\.)?yandex\. (?: az| by| @@ -51,6 +51,9 @@ class YandexDiskIE(InfoExtractor): }, { 'url': 'https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6%2B%2FuhNqk38%3D', 'only_matching': True, + }, { + 'url': 'https://disk.360.yandex.ru/i/TM2xsIVsgjY4uw', + 'only_matching': True, }] def _real_extract(self, url): From e8d49b1c7f11c7e282319395ca9c2a201304be41 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Fri, 8 Aug 2025 00:04:30 -0400 Subject: [PATCH 11/11] [ie/motherless] Fix extractor (#13960) Authored by: Grub4K --- yt_dlp/extractor/motherless.py | 46 ++++++++++++++-------------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/yt_dlp/extractor/motherless.py b/yt_dlp/extractor/motherless.py index 86551950b7..e236ec3db8 100644 --- a/yt_dlp/extractor/motherless.py +++ b/yt_dlp/extractor/motherless.py @@ -51,23 +51,7 @@ class MotherlessIE(InfoExtractor): 'skip': '404', }, { 'url': 'http://motherless.com/g/cosplay/633979F', - 'md5': '0b2a43f447a49c3e649c93ad1fafa4a0', - 'info_dict': { - 'id': '633979F', - 'ext': 'mp4', - 'title': 'Turtlette', - 'categories': ['superheroine heroine superher'], - 'upload_date': '20140827', - 'uploader_id': 'shade0230', - 'thumbnail': r're:https?://.*\.jpg', - 'age_limit': 18, - 'like_count': int, - 'comment_count': int, - 'view_count': int, - }, - 'params': { - 'nocheckcertificate': True, - }, + 'expected_exception': 'ExtractorError', }, { 'url': 'http://motherless.com/8B4BBC1', 'info_dict': { @@ -113,8 +97,10 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) if any(p in webpage for p in ( - '404 - MOTHERLESS.COM<', - ">The page you're looking for cannot be found.<")): + '<title>404 - MOTHERLESS.COM<', + ">The page you're looking for cannot be found.<", + '<div class="error-page', + )): raise ExtractorError(f'Video {video_id} does not exist', expected=True) if '>The content you are trying to view is for friends only.' in webpage: @@ -183,6 +169,9 @@ class MotherlessPaginatedIE(InfoExtractor): def _correct_path(self, url, item_id): raise NotImplementedError('This method must be implemented by subclasses') + def _correct_title(self, title, /): + return title.partition(' - Videos')[0] if title else None + def _extract_entries(self, webpage, base): for mobj in re.finditer(r'href="[^"]*(?P<href>/[A-F0-9]+)"\s+title="(?P<title>[^"]+)', webpage): @@ -205,7 +194,7 @@ def get_page(idx): return self.playlist_result( OnDemandPagedList(get_page, self._PAGE_SIZE), item_id, - remove_end(self._html_extract_title(webpage), ' | MOTHERLESS.COM ™')) + self._correct_title(self._html_extract_title(webpage))) class MotherlessGroupIE(MotherlessPaginatedIE): @@ -214,7 +203,7 @@ class MotherlessGroupIE(MotherlessPaginatedIE): 'url': 'http://motherless.com/gv/movie_scenes', 'info_dict': { 'id': 'movie_scenes', - 'title': 'Movie Scenes - Videos - Hot and sexy scenes from "regular" movies... Beautiful actresses fully', + 'title': 'Movie Scenes', }, 'playlist_mincount': 540, }, { @@ -230,7 +219,7 @@ class MotherlessGroupIE(MotherlessPaginatedIE): 'id': 'beautiful_cock', 'title': 'Beautiful Cock', }, - 'playlist_mincount': 2040, + 'playlist_mincount': 371, }] def _correct_path(self, url, item_id): @@ -245,14 +234,14 @@ class MotherlessGalleryIE(MotherlessPaginatedIE): 'id': '338999F', 'title': 'Random', }, - 'playlist_mincount': 171, + 'playlist_mincount': 100, }, { 'url': 'https://motherless.com/GVABD6213', 'info_dict': { 'id': 'ABD6213', 'title': 'Cuties', }, - 'playlist_mincount': 2, + 'playlist_mincount': 1, }, { 'url': 'https://motherless.com/GVBCF7622', 'info_dict': { @@ -266,9 +255,12 @@ class MotherlessGalleryIE(MotherlessPaginatedIE): 'id': '035DE2F', 'title': 'General', }, - 'playlist_mincount': 420, + 'playlist_mincount': 234, }] + def _correct_title(self, title, /): + return remove_end(title, ' | MOTHERLESS.COM ™') + def _correct_path(self, url, item_id): return urllib.parse.urljoin(url, f'/GV{item_id}') @@ -279,14 +271,14 @@ class MotherlessUploaderIE(MotherlessPaginatedIE): 'url': 'https://motherless.com/u/Mrgo4hrs2023', 'info_dict': { 'id': 'Mrgo4hrs2023', - 'title': "Mrgo4hrs2023's Uploads - Videos", + 'title': "Mrgo4hrs2023's Uploads", }, 'playlist_mincount': 32, }, { 'url': 'https://motherless.com/u/Happy_couple?t=v', 'info_dict': { 'id': 'Happy_couple', - 'title': "Happy_couple's Uploads - Videos", + 'title': "Happy_couple's Uploads", }, 'playlist_mincount': 8, }]