diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e523154c41..389f6d6025 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -208,7 +208,7 @@ jobs: python3.9 -m pip install -U pip wheel 'setuptools>=71.0.2' # XXX: Keep this in sync with pyproject.toml (it can't be accessed at this stage) and exclude secretstorage python3.9 -m pip install -U Pyinstaller mutagen pycryptodomex brotli certifi cffi \ - 'requests>=2.32.2,<3' 'urllib3>=1.26.17,<3' 'websockets>=13.0' 'protobug==0.3.0' + 'requests>=2.32.2,<3' 'urllib3>=2.0.2,<3' 'websockets>=13.0' 'protobug==0.3.0' run: | cd repo diff --git a/README.md b/README.md index 1f8c7936e4..b3b77caa6d 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,9 @@ # To install nightly with pip: python3 -m pip install -U --pre "yt-dlp[default]" ``` +When running a yt-dlp version that is older than 90 days, you will see a warning message suggesting to update to the latest version. +You can suppress this warning by adding `--no-update` to your command or configuration file. + ## DEPENDENCIES Python versions 3.9+ (CPython) and 3.11+ (PyPy) are supported. Other versions and implementations may or may not work correctly. diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py index 9c2710e09f..cc86b413f4 100755 --- a/devscripts/cli_to_api.py +++ b/devscripts/cli_to_api.py @@ -20,6 +20,7 @@ def parse_patched_options(opts): 'fragment_retries': 0, 'extract_flat': False, 'concat_playlist': 'never', + 'update_self': False, }) yt_dlp.options.create_parser = lambda: patched_parser try: diff --git a/pyproject.toml b/pyproject.toml index e3e8baf996..51e4f5f0e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,7 @@ default = [ "mutagen", "pycryptodomex", "requests>=2.32.2,<3", - "urllib3>=1.26.17,<3", + "urllib3>=2.0.2,<3", "websockets>=13.0", "protobug==0.3.0", ] diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 37d6eae4c5..7049c70e08 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -73,6 +73,7 @@ from .update import ( REPOSITORY, _get_system_deprecation, + _get_outdated_warning, _make_label, current_git_head, detect_variant, @@ -504,6 +505,7 @@ class YoutubeDL: force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts noprogress: Do not print the progress bar live_from_start: Whether to download livestreams videos from the start + warn_when_outdated: Emit a warning if the yt-dlp version is older than 90 days The following parameters are not used by YoutubeDL itself, they are used by the downloader (see yt_dlp/downloader/common.py): @@ -703,6 +705,9 @@ def process_color_policy(stream): system_deprecation = _get_system_deprecation() if system_deprecation: self.deprecated_feature(system_deprecation.replace('\n', '\n ')) + elif self.params.get('warn_when_outdated'): + if outdated_warning := _get_outdated_warning(): + self.report_warning(outdated_warning) if self.params.get('allow_unplayable_formats'): self.report_warning( diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 2e7646b7ec..f705bed1bf 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -971,6 +971,7 @@ def parse_options(argv=None): 'geo_bypass': opts.geo_bypass, 'geo_bypass_country': opts.geo_bypass_country, 'geo_bypass_ip_block': opts.geo_bypass_ip_block, + 'warn_when_outdated': opts.update_self is None, '_warnings': warnings, '_deprecation_warnings': deprecation_warnings, 'compat_opts': opts.compat_opts, diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py index afc79b6caf..bf8652d8b0 100644 --- a/yt_dlp/downloader/dash.py +++ b/yt_dlp/downloader/dash.py @@ -3,7 +3,7 @@ from . import get_suitable_downloader from .fragment import FragmentFD -from ..utils import update_url_query, urljoin +from ..utils import ReExtractInfo, update_url_query, urljoin class DashSegmentsFD(FragmentFD): @@ -28,6 +28,11 @@ def real_download(self, filename, info_dict): requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])] args = [] for fmt in requested_formats or [info_dict]: + # Re-extract if --load-info-json is used and 'fragments' was originally a generator + # See https://github.com/yt-dlp/yt-dlp/issues/13906 + if isinstance(fmt['fragments'], str): + raise ReExtractInfo('the stream needs to be re-extracted', expected=True) + try: fragment_count = 1 if self.params.get('test') else len(fmt['fragments']) except TypeError: diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 3eea0cdf6b..bb595f924b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1866,6 +1866,7 @@ from .sharepoint import SharePointIE from .sharevideos import ShareVideosEmbedIE from .shemaroome import ShemarooMeIE +from .shiey import ShieyIE from .showroomlive import ShowRoomLiveIE from .sibnet import SibnetEmbedIE from .simplecast import ( diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py index 4c4fe470da..be16f5e873 100644 --- a/yt_dlp/extractor/digitalconcerthall.py +++ b/yt_dlp/extractor/digitalconcerthall.py @@ -4,6 +4,7 @@ from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, + determine_ext, jwt_decode_hs256, parse_codecs, try_get, @@ -222,11 +223,18 @@ def _entries(self, items, language, type_, **kwargs): raise formats = [] - for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})): - formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) - for fmt in formats: - if fmt.get('format_note') and fmt.get('vcodec') == 'none': - fmt.update(parse_codecs(fmt['format_note'])) + for fmt_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})): + ext = determine_ext(fmt_url) + if ext == 'm3u8': + fmts = self._extract_m3u8_formats(fmt_url, video_id, 'mp4', m3u8_id='hls', fatal=False) + for fmt in fmts: + if fmt.get('format_note') and fmt.get('vcodec') == 'none': + fmt.update(parse_codecs(fmt['format_note'])) + formats.extend(fmts) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats(fmt_url, video_id, mpd_id='dash', fatal=False)) + else: + self.report_warning(f'Skipping unsupported format extension "{ext}"') yield { 'id': video_id, diff --git a/yt_dlp/extractor/motherless.py b/yt_dlp/extractor/motherless.py index 86551950b7..e236ec3db8 100644 --- a/yt_dlp/extractor/motherless.py +++ b/yt_dlp/extractor/motherless.py @@ -51,23 +51,7 @@ class MotherlessIE(InfoExtractor): 'skip': '404', }, { 'url': 'http://motherless.com/g/cosplay/633979F', - 'md5': '0b2a43f447a49c3e649c93ad1fafa4a0', - 'info_dict': { - 'id': '633979F', - 'ext': 'mp4', - 'title': 'Turtlette', - 'categories': ['superheroine heroine superher'], - 'upload_date': '20140827', - 'uploader_id': 'shade0230', - 'thumbnail': r're:https?://.*\.jpg', - 'age_limit': 18, - 'like_count': int, - 'comment_count': int, - 'view_count': int, - }, - 'params': { - 'nocheckcertificate': True, - }, + 'expected_exception': 'ExtractorError', }, { 'url': 'http://motherless.com/8B4BBC1', 'info_dict': { @@ -113,8 +97,10 @@ def _real_extract(self, url): webpage = self._download_webpage(url, video_id) if any(p in webpage for p in ( - '404 - MOTHERLESS.COM<', - ">The page you're looking for cannot be found.<")): + '<title>404 - MOTHERLESS.COM<', + ">The page you're looking for cannot be found.<", + '<div class="error-page', + )): raise ExtractorError(f'Video {video_id} does not exist', expected=True) if '>The content you are trying to view is for friends only.' in webpage: @@ -183,6 +169,9 @@ class MotherlessPaginatedIE(InfoExtractor): def _correct_path(self, url, item_id): raise NotImplementedError('This method must be implemented by subclasses') + def _correct_title(self, title, /): + return title.partition(' - Videos')[0] if title else None + def _extract_entries(self, webpage, base): for mobj in re.finditer(r'href="[^"]*(?P<href>/[A-F0-9]+)"\s+title="(?P<title>[^"]+)', webpage): @@ -205,7 +194,7 @@ def get_page(idx): return self.playlist_result( OnDemandPagedList(get_page, self._PAGE_SIZE), item_id, - remove_end(self._html_extract_title(webpage), ' | MOTHERLESS.COM ™')) + self._correct_title(self._html_extract_title(webpage))) class MotherlessGroupIE(MotherlessPaginatedIE): @@ -214,7 +203,7 @@ class MotherlessGroupIE(MotherlessPaginatedIE): 'url': 'http://motherless.com/gv/movie_scenes', 'info_dict': { 'id': 'movie_scenes', - 'title': 'Movie Scenes - Videos - Hot and sexy scenes from "regular" movies... Beautiful actresses fully', + 'title': 'Movie Scenes', }, 'playlist_mincount': 540, }, { @@ -230,7 +219,7 @@ class MotherlessGroupIE(MotherlessPaginatedIE): 'id': 'beautiful_cock', 'title': 'Beautiful Cock', }, - 'playlist_mincount': 2040, + 'playlist_mincount': 371, }] def _correct_path(self, url, item_id): @@ -245,14 +234,14 @@ class MotherlessGalleryIE(MotherlessPaginatedIE): 'id': '338999F', 'title': 'Random', }, - 'playlist_mincount': 171, + 'playlist_mincount': 100, }, { 'url': 'https://motherless.com/GVABD6213', 'info_dict': { 'id': 'ABD6213', 'title': 'Cuties', }, - 'playlist_mincount': 2, + 'playlist_mincount': 1, }, { 'url': 'https://motherless.com/GVBCF7622', 'info_dict': { @@ -266,9 +255,12 @@ class MotherlessGalleryIE(MotherlessPaginatedIE): 'id': '035DE2F', 'title': 'General', }, - 'playlist_mincount': 420, + 'playlist_mincount': 234, }] + def _correct_title(self, title, /): + return remove_end(title, ' | MOTHERLESS.COM ™') + def _correct_path(self, url, item_id): return urllib.parse.urljoin(url, f'/GV{item_id}') @@ -279,14 +271,14 @@ class MotherlessUploaderIE(MotherlessPaginatedIE): 'url': 'https://motherless.com/u/Mrgo4hrs2023', 'info_dict': { 'id': 'Mrgo4hrs2023', - 'title': "Mrgo4hrs2023's Uploads - Videos", + 'title': "Mrgo4hrs2023's Uploads", }, 'playlist_mincount': 32, }, { 'url': 'https://motherless.com/u/Happy_couple?t=v', 'info_dict': { 'id': 'Happy_couple', - 'title': "Happy_couple's Uploads - Videos", + 'title': "Happy_couple's Uploads", }, 'playlist_mincount': 8, }] diff --git a/yt_dlp/extractor/shiey.py b/yt_dlp/extractor/shiey.py new file mode 100644 index 0000000000..4e3a815fc6 --- /dev/null +++ b/yt_dlp/extractor/shiey.py @@ -0,0 +1,34 @@ +import json + +from .common import InfoExtractor +from .vimeo import VimeoIE +from ..utils import extract_attributes +from ..utils.traversal import find_element, traverse_obj + + +class ShieyIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?shiey\.com/videos/v/(?P<id>[^/?#]+)' + + _TESTS = [{ + 'url': 'https://www.shiey.com/videos/v/train-journey-to-edge-of-serbia-ep-2', + 'info_dict': { + 'id': '1103409448', + 'ext': 'mp4', + 'title': 'Train Journey To Edge of Serbia (Ep. 2)', + 'uploader': 'shiey', + 'uploader_url': '', + 'duration': 1364, + 'thumbnail': r're:^https?://.+', + }, + 'params': {'skip_download': True}, + 'expected_warnings': ['Failed to parse XML: not well-formed'], + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + oembed_html = traverse_obj(webpage, ( + {find_element(attr='data-controller', value='VideoEmbed', html=True)}, + {extract_attributes}, 'data-config-embed-video', {json.loads}, 'oembedHtml', {str})) + + return self.url_result(VimeoIE._extract_url(url, oembed_html), VimeoIE) diff --git a/yt_dlp/extractor/yandexdisk.py b/yt_dlp/extractor/yandexdisk.py index 3214816701..654858fcca 100644 --- a/yt_dlp/extractor/yandexdisk.py +++ b/yt_dlp/extractor/yandexdisk.py @@ -16,7 +16,7 @@ class YandexDiskIE(InfoExtractor): _VALID_URL = r'''(?x)https?:// (?P<domain> yadi\.sk| - disk\.yandex\. + disk\.(?:360\.)?yandex\. (?: az| by| @@ -51,6 +51,9 @@ class YandexDiskIE(InfoExtractor): }, { 'url': 'https://yadi.sk/public?hash=5DZ296JK9GWCLp02f6jrObjnctjRxMs8L6%2B%2FuhNqk38%3D', 'only_matching': True, + }, { + 'url': 'https://disk.360.yandex.ru/i/TM2xsIVsgjY4uw', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/youtube/_base.py b/yt_dlp/extractor/youtube/_base.py index 413f54b522..bfe4a6811a 100644 --- a/yt_dlp/extractor/youtube/_base.py +++ b/yt_dlp/extractor/youtube/_base.py @@ -311,6 +311,7 @@ class SubsPoTokenPolicy(BasePoTokenPolicy): 'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)', }, }, + 'PLAYER_PARAMS': '8AEB', 'INNERTUBE_CONTEXT_CLIENT_NAME': 2, 'GVS_PO_TOKEN_POLICY': { StreamingProtocol.HTTPS: GvsPoTokenPolicy( diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py index 6582038fcb..1526d2a599 100644 --- a/yt_dlp/networking/_requests.py +++ b/yt_dlp/networking/_requests.py @@ -1,6 +1,5 @@ from __future__ import annotations -import contextlib import functools import http.client import logging @@ -20,9 +19,9 @@ urllib3_version = tuple(int_or_none(x, default=0) for x in urllib3.__version__.split('.')) -if urllib3_version < (1, 26, 17): +if urllib3_version < (2, 0, 2): urllib3._yt_dlp__version = f'{urllib3.__version__} (unsupported)' - raise ImportError('Only urllib3 >= 1.26.17 is supported') + raise ImportError('Only urllib3 >= 2.0.2 is supported') if requests.__build__ < 0x023202: requests._yt_dlp__version = f'{requests.__version__} (unsupported)' @@ -101,27 +100,10 @@ def subn(self, repl, string, *args, **kwargs): # https://github.com/urllib3/urllib3/commit/a2697e7c6b275f05879b60f593c5854a816489f0 import urllib3.util.url -if hasattr(urllib3.util.url, 'PERCENT_RE'): - urllib3.util.url.PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url.PERCENT_RE) -elif hasattr(urllib3.util.url, '_PERCENT_RE'): # urllib3 >= 2.0.0 +if hasattr(urllib3.util.url, '_PERCENT_RE'): # was 'PERCENT_RE' in urllib3 < 2.0.0 urllib3.util.url._PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url._PERCENT_RE) else: - warnings.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message()) - -''' -Workaround for issue in urllib.util.ssl_.py: ssl_wrap_context does not pass -server_hostname to SSLContext.wrap_socket if server_hostname is an IP, -however this is an issue because we set check_hostname to True in our SSLContext. - -Monkey-patching IS_SECURETRANSPORT forces ssl_wrap_context to pass server_hostname regardless. - -This has been fixed in urllib3 2.0+. -See: https://github.com/urllib3/urllib3/issues/517 -''' - -if urllib3_version < (2, 0, 0): - with contextlib.suppress(Exception): - urllib3.util.IS_SECURETRANSPORT = urllib3.util.ssl_.IS_SECURETRANSPORT = True + warnings.warn('Failed to patch _PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message()) # Requests will not automatically handle no_proxy by default diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py index fd83d783ba..f879989761 100644 --- a/yt_dlp/postprocessor/xattrpp.py +++ b/yt_dlp/postprocessor/xattrpp.py @@ -54,6 +54,9 @@ def run(self, info): if infoname == 'upload_date': value = hyphenate_date(value) elif xattrname == 'com.apple.metadata:kMDItemWhereFroms': + # NTFS ADS doesn't support colons in names + if os.name == 'nt': + continue value = self.APPLE_PLIST_TEMPLATE % value write_xattr(info['filepath'], xattrname, value.encode()) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 30cbf538e9..045f7ec7f3 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -2,6 +2,7 @@ import atexit import contextlib +import datetime as dt import functools import hashlib import json @@ -171,6 +172,22 @@ def _get_system_deprecation(): return f'Support for Python version {major}.{minor} has been deprecated. {PYTHON_MSG}' +def _get_outdated_warning(): + # Only yt-dlp guarantees a stable release at least every 90 days + if not ORIGIN.startswith('yt-dlp/'): + return None + + with contextlib.suppress(Exception): + last_updated = dt.date(*version_tuple(__version__)[:3]) + if last_updated < dt.datetime.now(dt.timezone.utc).date() - dt.timedelta(days=90): + return ('\n '.join(( + f'Your yt-dlp version ({__version__}) is older than 90 days!', + 'It is strongly recommended to always use the latest version.', + f'{is_non_updateable() or """Run "yt-dlp --update" or "yt-dlp -U" to update"""}.', + 'To suppress this warning, add --no-update to your command/config.'))) + return None + + def _sha256_file(path): h = hashlib.sha256() mv = memoryview(bytearray(128 * 1024))