From aa9f1f4d577e99897ac16cd19d4e217d688ea75d Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Wed, 9 Jul 2025 18:29:54 +1200 Subject: [PATCH 1/5] [ie/youtube] Log bad playability statuses of player responses (#13647) Authored by: coletdjnz --- yt_dlp/extractor/youtube/_video.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 8fa3b0a34..208abee93 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -3273,6 +3273,10 @@ def append_client(*client_names): # web_creator may work around age-verification for all videos but requires PO token append_client('tv_embedded', 'web_creator') + status = traverse_obj(pr, ('playabilityStatus', 'status', {str})) + if status not in ('OK', 'LIVE_STREAM_OFFLINE', 'AGE_CHECK_REQUIRED', 'AGE_VERIFICATION_REQUIRED'): + self.write_debug(f'{video_id}: {client} player response playability status: {status}') + prs.extend(deprioritized_prs) if skipped_clients: From 805519bfaa7cb5443912dfe45ac774834ba65a16 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 9 Jul 2025 15:45:47 -0500 Subject: [PATCH 2/5] [jsinterp] Fix undefined variable name caching (#13677) Fix b342d27f3f82d913976509ddf5bff539ad8567ec Authored by: bashonly --- test/test_jsinterp.py | 5 +++++ test/test_youtube_signature.py | 4 ++++ yt_dlp/jsinterp.py | 5 +++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index a1088cea4..43b1d0fde 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -536,6 +536,11 @@ def test_nested_function_scoping(self): } ''', 31) + def test_undefined_varnames(self): + jsi = JSInterpreter('function f(){ var a; return [a, b]; }') + self._test(jsi, [JS_Undefined, JS_Undefined]) + self.assertEqual(jsi._undefined_varnames, {'b'}) + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 98607df55..456246753 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -373,6 +373,10 @@ 'https://www.youtube.com/s/player/e12fbea4/player_ias_tce.vflset/en_US/base.js', 'kM5r52fugSZRAKHfo3', 'XkeRfXIPOkSwfg', ), + ( + 'https://www.youtube.com/s/player/ef259203/player_ias_tce.vflset/en_US/base.js', + 'rPqBC01nJpqhhi2iA2U', 'hY7dbiKFT51UIA', + ), ] diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index f06d96832..460bc2c03 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -677,8 +677,9 @@ def dict_item(key, val): # Set value as JS_Undefined or its pre-existing value local_vars.set_local(var, ret) else: - ret = local_vars.get(var, JS_Undefined) - if ret is JS_Undefined: + ret = local_vars.get(var, NO_DEFAULT) + if ret is NO_DEFAULT: + ret = JS_Undefined self._undefined_varnames.add(var) return ret, should_return From 0b359b184dee0c7052be482857bf562de67e4928 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 9 Jul 2025 16:58:19 -0500 Subject: [PATCH 3/5] [ie/9gag] Support browser impersonation (#13678) Closes #10837 Authored by: bashonly --- yt_dlp/extractor/ninegag.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/ninegag.py b/yt_dlp/extractor/ninegag.py index 2979f3a50..1b88e9c54 100644 --- a/yt_dlp/extractor/ninegag.py +++ b/yt_dlp/extractor/ninegag.py @@ -1,6 +1,5 @@ from .common import InfoExtractor from ..utils import ( - ExtractorError, determine_ext, int_or_none, traverse_obj, @@ -61,10 +60,10 @@ def _real_extract(self, url): post = self._download_json( 'https://9gag.com/v1/post', post_id, query={ 'id': post_id, - })['data']['post'] + }, impersonate=True)['data']['post'] if post.get('type') != 'Animated': - raise ExtractorError( + self.raise_no_formats( 'The given url does not contain a video', expected=True) From 7b4c96e0898db048259ef5fdf12ed14e3605dce3 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Thu, 10 Jul 2025 01:16:33 +0300 Subject: [PATCH 4/5] [ie/mir24.tv] Add extractor (#13651) Closes #13365 Authored by: swayll --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/mir24tv.py | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 yt_dlp/extractor/mir24tv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index ada12b3a8..84da570b0 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1147,6 +1147,7 @@ MindsIE, ) from .minoto import MinotoIE +from .mir24tv import Mir24TvIE from .mirrativ import ( MirrativIE, MirrativUserIE, diff --git a/yt_dlp/extractor/mir24tv.py b/yt_dlp/extractor/mir24tv.py new file mode 100644 index 000000000..5832901bf --- /dev/null +++ b/yt_dlp/extractor/mir24tv.py @@ -0,0 +1,37 @@ +from .common import InfoExtractor +from ..utils import parse_qs, url_or_none +from ..utils.traversal import require, traverse_obj + + +class Mir24TvIE(InfoExtractor): + IE_NAME = 'mir24.tv' + _VALID_URL = r'https?://(?:www\.)?mir24\.tv/news/(?P[0-9]+)/[^/?#]+' + _TESTS = [{ + 'url': 'https://mir24.tv/news/16635210/dni-kultury-rossii-otkrylis-v-uzbekistane.-na-prazdnichnom-koncerte-vystupili-zvezdy-rossijskoj-estrada', + 'info_dict': { + 'id': '16635210', + 'title': 'Дни культуры России открылись в Узбекистане. На праздничном концерте выступили звезды российской эстрады', + 'ext': 'mp4', + 'thumbnail': r're:https://images\.mir24\.tv/.+\.jpg', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id, impersonate=True) + + iframe_url = self._search_regex( + r']+\bsrc=["\'](https?://mir24\.tv/players/[^"\']+)', + webpage, 'iframe URL') + + m3u8_url = traverse_obj(iframe_url, ( + {parse_qs}, 'source', -1, {self._proto_relative_url}, {url_or_none}, {require('m3u8 URL')})) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls') + + return { + 'id': video_id, + 'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), + 'formats': formats, + 'subtitles': subtitles, + } From 2aaf1aa71d174700859c9ec1a81109b78e34961c Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Thu, 10 Jul 2025 07:21:47 +0900 Subject: [PATCH 5/5] [ie/newspicks] Fix extractor (#13612) Closes #10472 Authored by: doe1080 --- yt_dlp/extractor/newspicks.py | 93 +++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 37 deletions(-) diff --git a/yt_dlp/extractor/newspicks.py b/yt_dlp/extractor/newspicks.py index 4a1cb0a73..5f19eed98 100644 --- a/yt_dlp/extractor/newspicks.py +++ b/yt_dlp/extractor/newspicks.py @@ -1,53 +1,72 @@ -import re - from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + clean_html, + parse_iso8601, + parse_qs, + url_or_none, +) +from ..utils.traversal import require, traverse_obj class NewsPicksIE(InfoExtractor): - _VALID_URL = r'https?://newspicks\.com/movie-series/(?P\d+)\?movieId=(?P\d+)' - + _VALID_URL = r'https?://newspicks\.com/movie-series/(?P[^?/#]+)' _TESTS = [{ - 'url': 'https://newspicks.com/movie-series/11?movieId=1813', + 'url': 'https://newspicks.com/movie-series/11/?movieId=1813', 'info_dict': { 'id': '1813', - 'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】', - 'description': 'md5:09397aad46d6ded6487ff13f138acadf', - 'channel': 'HORIE ONE', - 'channel_id': '11', - 'release_date': '20220117', - 'thumbnail': r're:https://.+jpg', 'ext': 'mp4', + 'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】', + 'cast': 'count:4', + 'description': 'md5:09397aad46d6ded6487ff13f138acadf', + 'duration': 2940, + 'release_date': '20220117', + 'release_timestamp': 1642424400, + 'series': 'HORIE ONE', + 'series_id': '11', + 'thumbnail': r're:https?://resources\.newspicks\.com/.+\.(?:jpe?g|png)', + 'timestamp': 1642424420, + 'upload_date': '20220117', + }, + }, { + 'url': 'https://newspicks.com/movie-series/158/?movieId=3932', + 'info_dict': { + 'id': '3932', + 'ext': 'mp4', + 'title': '【検証】専門家は、KADOKAWAをどう見るか', + 'cast': 'count:3', + 'description': 'md5:2c2d4bf77484a4333ec995d676f9a91d', + 'duration': 1320, + 'release_date': '20240622', + 'release_timestamp': 1719088080, + 'series': 'NPレポート', + 'series_id': '158', + 'thumbnail': r're:https?://resources\.newspicks\.com/.+\.(?:jpe?g|png)', + 'timestamp': 1719086400, + 'upload_date': '20240622', }, }] def _real_extract(self, url): - video_id, channel_id = self._match_valid_url(url).group('id', 'channel_id') + series_id = self._match_id(url) + video_id = traverse_obj(parse_qs(url), ('movieId', -1, {str}, {require('movie ID')})) webpage = self._download_webpage(url, video_id) - entries = self._parse_html5_media_entries( - url, webpage.replace('movie-for-pc', 'movie'), video_id, 'hls') - if not entries: - raise ExtractorError('No HTML5 media elements found') - info = entries[0] - title = self._html_search_meta('og:title', webpage, fatal=False) - description = self._html_search_meta( - ('og:description', 'twitter:title'), webpage, fatal=False) - channel = self._html_search_regex( - r'value="11".+?(.+?)\s*(\d+)年(\d+)月(\d+)日\s*', - webpage, 'release date', fatal=False, group=(1, 2, 3)) - - info.update({ + return { 'id': video_id, - 'title': title, - 'description': description, - 'channel': channel, - 'channel_id': channel_id, - 'release_date': ('%04d%02d%02d' % tuple(map(int, release_date))) if release_date else None, - }) - return info + 'formats': formats, + 'series': traverse_obj(fragment, ('series', 'title', {str})), + 'series_id': series_id, + 'subtitles': subtitles, + **traverse_obj(fragment, ('movie', { + 'title': ('title', {str}), + 'cast': ('relatedUsers', ..., 'displayName', {str}, filter, all, filter), + 'description': ('explanation', {clean_html}), + 'release_timestamp': ('onAirStartDate', {parse_iso8601}), + 'thumbnail': (('image', 'coverImageUrl'), {url_or_none}, any), + 'timestamp': ('published', {parse_iso8601}), + })), + }