From ff6f94041aeee19c5559e1c1cd693960a1c1dd14 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 25 Jun 2025 12:10:00 -0500 Subject: [PATCH 01/11] [ie/youtube] Improve player context payloads (#13539) Closes #12563 Authored by: bashonly --- yt_dlp/extractor/youtube/_video.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 55ebdce1ba..081ed6ac81 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -2820,6 +2820,10 @@ def _generate_player_context(cls, sts=None): context['signatureTimestamp'] = sts return { 'playbackContext': { + 'adPlaybackContext': { + 'pyv': True, + 'adType': 'AD_TYPE_INSTREAM', + }, 'contentPlaybackContext': context, }, **cls._get_checkok_params(), From 8f94b76cbf7bbd9dfd8762c63cdea04f90f1297f Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 25 Jun 2025 13:32:57 -0500 Subject: [PATCH 02/11] [ie/youtube] Check any `ios` m3u8 formats prior to download (#13524) Closes #13511 Authored by: bashonly --- yt_dlp/YoutubeDL.py | 2 ++ yt_dlp/extractor/youtube/_video.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 309489672e..67ca90349f 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2219,6 +2219,7 @@ def _check_formats(self, formats): self.report_warning(f'Unable to delete temporary file "{temp_file.name}"') f['__working'] = success if success: + f.pop('__needs_testing', None) yield f else: self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id'])) @@ -3963,6 +3964,7 @@ def simplified_codec(f, field): self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None, (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe' else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None), + self._format_out('Untested', self.Styles.WARNING) if f.get('__needs_testing') else None, format_field(f, 'format_note'), format_field(f, 'container', ignore=(None, f.get('ext'))), delim=', '), delim=' '), diff --git a/yt_dlp/extractor/youtube/_video.py b/yt_dlp/extractor/youtube/_video.py index 081ed6ac81..5ccc33fa33 100644 --- a/yt_dlp/extractor/youtube/_video.py +++ b/yt_dlp/extractor/youtube/_video.py @@ -3556,6 +3556,11 @@ def process_manifest_format(f, proto, client_name, itag, po_token): f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ') f['source_preference'] -= 20 + # XXX: Check if IOS HLS formats are affected by player PO token enforcement; temporary + # See https://github.com/yt-dlp/yt-dlp/issues/13511 + if proto == 'hls' and client_name == 'ios': + f['__needs_testing'] = True + itags[itag].add(key) if itag and all_formats: @@ -4284,6 +4289,7 @@ def process_language(container, base_url, lang_code, sub_name, client_name, quer if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'): # Newly uploaded videos' HLS formats are potentially problematic and need to be checked + # XXX: This is redundant for as long as we are already checking all IOS HLS formats upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc) if upload_datetime >= datetime_from_str('today-2days'): for fmt in info['formats']: From 5b559d0072b7164daf06bacdc41c6f11283452c8 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 25 Jun 2025 14:02:37 -0500 Subject: [PATCH 03/11] [ie/sproutvideo] Fix extractor (#13544) Closes #13540 Authored by: bashonly --- yt_dlp/extractor/sproutvideo.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/sproutvideo.py b/yt_dlp/extractor/sproutvideo.py index c0923594e5..764c78f1e5 100644 --- a/yt_dlp/extractor/sproutvideo.py +++ b/yt_dlp/extractor/sproutvideo.py @@ -41,6 +41,7 @@ class SproutVideoIE(InfoExtractor): 'duration': 703, 'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg', }, + 'skip': 'Account Disabled', }, { # http formats 'sd' and 'hd' are available 'url': 'https://videos.sproutvideo.com/embed/119cd6bc1a18e6cd98/30751a1761ae5b90', @@ -97,11 +98,21 @@ def _extract_embed_urls(cls, url, webpage): def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) - webpage = self._download_webpage( - url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'})) + webpage = self._download_webpage(url, video_id, headers={ + **traverse_obj(smuggled_data, {'Referer': 'referer'}), + # yt-dlp's default Chrome user-agents are too old + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:140.0) Gecko/20100101 Firefox/140.0', + }) data = self._search_json( - r'var\s+dat\s*=\s*["\']', webpage, 'data', video_id, contains_pattern=r'[A-Za-z0-9+/=]+', - end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode()) + r'var\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id, + contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', + transform_source=lambda x: base64.b64decode(x).decode()) + + # SproutVideo may send player info for 'SMPTE Color Monitor Test' [a791d7b71b12ecc52e] + # e.g. if the user-agent we used with the webpage request is too old + video_uid = data['videoUid'] + if video_id != video_uid: + raise ExtractorError(f'{self.IE_NAME} sent the wrong video data ({video_uid})') formats, subtitles = [], {} headers = { From c57412d1f9cf0124adc972a47858ac42b740c61d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A6sim?= Date: Wed, 25 Jun 2025 22:24:20 +0300 Subject: [PATCH 04/11] [ie/lsm] Fix extractors (#13126) Closes #12298 Authored by: Caesim404 --- yt_dlp/extractor/cloudycdn.py | 26 +++++++++++++++------ yt_dlp/extractor/lsm.py | 43 ++++++++++++++++++++++++----------- 2 files changed, 49 insertions(+), 20 deletions(-) diff --git a/yt_dlp/extractor/cloudycdn.py b/yt_dlp/extractor/cloudycdn.py index 6e757d79ee..a9a5392744 100644 --- a/yt_dlp/extractor/cloudycdn.py +++ b/yt_dlp/extractor/cloudycdn.py @@ -11,7 +11,7 @@ class CloudyCDNIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//embed\.cloudycdn\.services/(?P[^/?#]+)/media/(?P[\w-]+)' + _VALID_URL = r'(?:https?:)?//embed\.(?Pcloudycdn\.services|backscreen\.com)/(?P[^/?#]+)/media/(?P[\w-]+)' _EMBED_REGEX = [rf']+\bsrc=[\'"](?P{_VALID_URL})'] _TESTS = [{ 'url': 'https://embed.cloudycdn.services/ltv/media/46k_d23-6000-105?', @@ -23,7 +23,7 @@ class CloudyCDNIE(InfoExtractor): 'duration': 1442, 'upload_date': '20231121', 'title': 'D23-6000-105_cetstud', - 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg', + 'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/660858/placeholder1700589200.jpg', }, }, { 'url': 'https://embed.cloudycdn.services/izm/media/26e_lv-8-5-1', @@ -33,7 +33,7 @@ class CloudyCDNIE(InfoExtractor): 'ext': 'mp4', 'title': 'LV-8-5-1', 'timestamp': 1669767167, - 'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/488306/placeholder1679423604.jpg', + 'thumbnail': 'https://store.bstrm.net/tmsp00120/assets/media/488306/placeholder1679423604.jpg', 'duration': 1205, 'upload_date': '20221130', }, @@ -48,9 +48,21 @@ class CloudyCDNIE(InfoExtractor): 'duration': 1673, 'title': 'D24-6000-074-cetstud', 'timestamp': 1718902233, - 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/788392/placeholder1718903938.jpg', + 'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/788392/placeholder1718903938.jpg', }, 'params': {'format': 'bv'}, + }, { + 'url': 'https://embed.backscreen.com/ltv/media/32j_z25-0600-127?', + 'md5': '9b6fa09ac1a4de53d4f42b94affc3b42', + 'info_dict': { + 'id': '32j_z25-0600-127', + 'ext': 'mp4', + 'title': 'Z25-0600-127-DZ', + 'duration': 1906, + 'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/977427/placeholder1746633646.jpg', + 'timestamp': 1746632402, + 'upload_date': '20250507', + }, }] _WEBPAGE_TESTS = [{ 'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/', @@ -60,17 +72,17 @@ class CloudyCDNIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20230223', 'duration': 629, - 'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/518407/placeholder1678748124.jpg', + 'thumbnail': 'https://store.bstrm.net/tmsp00120/assets/media/518407/placeholder1678748124.jpg', 'timestamp': 1677181513, 'title': 'LIB-2', }, }] def _real_extract(self, url): - site_id, video_id = self._match_valid_url(url).group('site_id', 'id') + domain, site_id, video_id = self._match_valid_url(url).group('domain', 'site_id', 'id') data = self._download_json( - f'https://player.cloudycdn.services/player/{site_id}/media/{video_id}/', + f'https://player.{domain}/player/{site_id}/media/{video_id}/', video_id, data=urlencode_postdata({ 'version': '6.4.0', 'referer': url, diff --git a/yt_dlp/extractor/lsm.py b/yt_dlp/extractor/lsm.py index 56c06d7458..93ea2e35da 100644 --- a/yt_dlp/extractor/lsm.py +++ b/yt_dlp/extractor/lsm.py @@ -167,11 +167,11 @@ class LSMLTVEmbedIE(InfoExtractor): 'duration': 1442, 'upload_date': '20231121', 'title': 'D23-6000-105_cetstud', - 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg', + 'thumbnail': 'https://store.bstrm.net/tmsp00060/assets/media/660858/placeholder1700589200.jpg', }, }, { 'url': 'https://ltv.lsm.lv/embed?enablesdkjs=1&c=eyJpdiI6IncwVzZmUFk2MU12enVWK1I3SUcwQ1E9PSIsInZhbHVlIjoid3FhV29vamc3T2sxL1RaRmJ5Rm1GTXozU0o2dVczdUtLK0cwZEZJMDQ2a3ZIRG5DK2pneGlnbktBQy9uazVleHN6VXhxdWIweWNvcHRDSnlISlNYOHlVZ1lpcTUrcWZSTUZPQW14TVdkMW9aOUtRWVNDcFF4eWpHNGcrT0VZbUNFQStKQk91cGpndW9FVjJIa0lpbkh3PT0iLCJtYWMiOiIyZGI1NDJlMWRlM2QyMGNhOGEwYTM2MmNlN2JlOGRhY2QyYjdkMmEzN2RlOTEzYTVkNzI1ODlhZDlhZjU4MjQ2IiwidGFnIjoiIn0=', - 'md5': 'a1711e190fe680fdb68fd8413b378e87', + 'md5': 'f236cef2fd5953612754e4e66be51e7a', 'info_dict': { 'id': 'wUnFArIPDSY', 'ext': 'mp4', @@ -198,6 +198,8 @@ class LSMLTVEmbedIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/@LTV16plus', 'like_count': int, 'description': 'md5:7ff0c42ba971e3c13e4b8a2ff03b70b5', + 'media_type': 'livestream', + 'timestamp': 1652550741, }, }] @@ -208,7 +210,7 @@ def _real_extract(self, url): r'window\.ltvEmbedPayload\s*=', webpage, 'embed json', video_id) embed_type = traverse_obj(data, ('source', 'name', {str})) - if embed_type == 'telia': + if embed_type in ('backscreen', 'telia'): # 'telia' only for backwards compat ie_key = 'CloudyCDN' embed_url = traverse_obj(data, ('source', 'embed_url', {url_or_none})) elif embed_type == 'youtube': @@ -226,9 +228,9 @@ def _real_extract(self, url): class LSMReplayIE(InfoExtractor): - _VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:ieraksts|statja)/[^/?#]+/(?P\d+)' + _VALID_URL = r'https?://replay\.lsm\.lv/[^/?#]+/(?:skaties/|klausies/)?(?:ieraksts|statja)/[^/?#]+/(?P\d+)' _TESTS = [{ - 'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija', + 'url': 'https://replay.lsm.lv/lv/skaties/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija', 'md5': '64f72a360ca530d5ed89c77646c9eee5', 'info_dict': { 'id': '46k_d23-6000-105', @@ -241,20 +243,23 @@ class LSMReplayIE(InfoExtractor): 'thumbnail': 'https://ltv.lsm.lv/storage/media/8/7/large/5/1f9604e1.jpg', }, }, { - 'url': 'https://replay.lsm.lv/lv/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam', - 'md5': '719b33875cd1429846eeeaeec6df2830', + 'url': 'https://replay.lsm.lv/lv/klausies/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam', + 'md5': '84feb80fd7e6ec07744726a9f01cda4d', 'info_dict': { - 'id': 'a342781', - 'ext': 'mp3', + 'id': '183522', + 'ext': 'm4a', 'duration': 1823, 'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām', 'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/large_fd4675ac.jpg', 'upload_date': '20231102', - 'timestamp': 1698921060, + 'timestamp': 1698913860, 'description': 'md5:7bac3b2dd41e44325032943251c357b1', }, }, { - 'url': 'https://replay.lsm.lv/ru/statja/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija', + 'url': 'https://replay.lsm.lv/ru/skaties/statja/ltv/355067/v-kengaragse-nacalas-ukladka-relsov', + 'only_matching': True, + }, { + 'url': 'https://replay.lsm.lv/lv/ieraksts/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija', 'only_matching': True, }] @@ -267,12 +272,24 @@ def _real_extract(self, url): data = self._search_nuxt_data( self._fix_nuxt_data(webpage), video_id, context_name='__REPLAY__') + playback_type = data['playback']['type'] + + if playback_type == 'playable_audio_lr': + playback_data = { + 'formats': self._extract_m3u8_formats(data['playback']['service']['hls_url'], video_id), + } + elif playback_type == 'embed': + playback_data = { + '_type': 'url_transparent', + 'url': data['playback']['service']['url'], + } + else: + raise ExtractorError(f'Unsupported playback type "{playback_type}"') return { - '_type': 'url_transparent', 'id': video_id, + **playback_data, **traverse_obj(data, { - 'url': ('playback', 'service', 'url', {url_or_none}), 'title': ('mediaItem', 'title'), 'description': ('mediaItem', ('lead', 'body')), 'duration': ('mediaItem', 'duration', {int_or_none}), From a4ce4327c9836691d3b6b00e44a90b6741601ed8 Mon Sep 17 00:00:00 2001 From: D Trombett Date: Thu, 26 Jun 2025 01:24:39 +0200 Subject: [PATCH 05/11] [ie/SportDeutschland] Fix extractor (#13519) Closes #13518 Authored by: DTrombett --- yt_dlp/extractor/sportdeutschland.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/sportdeutschland.py b/yt_dlp/extractor/sportdeutschland.py index 2d6acb8768..8349d96045 100644 --- a/yt_dlp/extractor/sportdeutschland.py +++ b/yt_dlp/extractor/sportdeutschland.py @@ -25,6 +25,7 @@ class SportDeutschlandIE(InfoExtractor): 'upload_date': '20230114', 'timestamp': 1673733618, }, + 'skip': 'not found', }, { 'url': 'https://sportdeutschland.tv/deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0', 'info_dict': { @@ -41,6 +42,7 @@ class SportDeutschlandIE(InfoExtractor): 'upload_date': '20220309', 'timestamp': 1646860727.0, }, + 'skip': 'not found', }, { 'url': 'https://sportdeutschland.tv/ggcbremen/formationswochenende-latein-2023', 'info_dict': { @@ -68,6 +70,7 @@ class SportDeutschlandIE(InfoExtractor): 'live_status': 'was_live', }, }], + 'skip': 'not found', }, { 'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1', 'info_dict': { @@ -82,13 +85,30 @@ class SportDeutschlandIE(InfoExtractor): 'live_status': 'is_live', }, 'skip': 'live', + }, { + 'url': 'https://sportdeutschland.tv/rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates', + 'md5': '35c11a19395c938cdd076b93bda54cde', + 'info_dict': { + 'id': '9f27a97d-1544-4d0b-aa03-48d92d17a03a', + 'ext': 'mp4', + 'title': 'GFL2: Rostock Griffins vs. Elmshorn Fighting Pirates', + 'display_id': 'rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates', + 'channel': 'Rostock Griffins', + 'channel_url': 'https://sportdeutschland.tv/rostock-griffins', + 'live_status': 'was_live', + 'description': 'md5:60cb00067e55dafa27b0933a43d72862', + 'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b', + 'timestamp': 1749913117, + 'upload_date': '20250614', + }, }] def _process_video(self, asset_id, video): is_live = video['type'] == 'mux_live' token = self._download_json( - f'https://api.sportdeutschland.tv/api/frontend/asset-token/{asset_id}', - video['id'], query={'type': video['type'], 'playback_id': video['src']})['token'] + f'https://api.sportdeutschland.tv/api/web/personal/asset-token/{asset_id}', + video['id'], query={'type': video['type'], 'playback_id': video['src']}, + headers={'Referer': 'https://sportdeutschland.tv/'})['token'] formats, subtitles = self._extract_m3u8_formats_and_subtitles( f'https://stream.mux.com/{video["src"]}.m3u8?token={token}', video['id'], live=is_live) From 3bd30291601c47fa4a257983473884103ecab0c7 Mon Sep 17 00:00:00 2001 From: D Trombett Date: Thu, 26 Jun 2025 01:26:23 +0200 Subject: [PATCH 06/11] [ie/tv8.it] Support slugless URLs (#13478) Authored by: DTrombett --- yt_dlp/extractor/skyit.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/skyit.py b/yt_dlp/extractor/skyit.py index 0013d2621e..fe45be7740 100644 --- a/yt_dlp/extractor/skyit.py +++ b/yt_dlp/extractor/skyit.py @@ -213,7 +213,7 @@ class CieloTVItIE(SkyItIE): # XXX: Do not subclass from concrete IE class TV8ItIE(SkyItVideoIE): # XXX: Do not subclass from concrete IE IE_NAME = 'tv8.it' - _VALID_URL = r'https?://(?:www\.)?tv8\.it/(?:show)?video/[0-9a-z-]+-(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?tv8\.it/(?:show)?video/(?:[0-9a-z-]+-)?(?P\d+)' _TESTS = [{ 'url': 'https://www.tv8.it/video/ogni-mattina-ucciso-asino-di-andrea-lo-cicero-630529', 'md5': '9ab906a3f75ea342ed928442f9dabd21', @@ -227,6 +227,19 @@ class TV8ItIE(SkyItVideoIE): # XXX: Do not subclass from concrete IE 'thumbnail': 'https://videoplatform.sky.it/still/2020/11/18/1605717753954_ogni-mattina-ucciso-asino-di-andrea-lo-cicero_videostill_1.jpg', }, 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.tv8.it/video/964361', + 'md5': '1e58e807154658a16edc29e45be38107', + 'info_dict': { + 'id': '964361', + 'ext': 'mp4', + 'title': 'GialappaShow - S.4 Ep.2', + 'description': 'md5:60bb4ff5af18bbeeaedabc1de5f9e1e2', + 'duration': 8030, + 'thumbnail': 'https://videoplatform.sky.it/captures/494/2024/11/06/964361/964361_1730888412914_thumb_494.jpg', + 'timestamp': 1730821499, + 'upload_date': '20241105', + }, }] _DOMAIN = 'mtv8' From 2600849badb0d08c55b58dcc77a13af6ba423da6 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Thu, 26 Jun 2025 08:37:49 +0900 Subject: [PATCH 07/11] [ie/huya:live] Fix extractor (#13520) Authored by: doe1080 --- yt_dlp/extractor/huya.py | 86 +++++++++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 27 deletions(-) diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py index f79e032e4a..65099b0628 100644 --- a/yt_dlp/extractor/huya.py +++ b/yt_dlp/extractor/huya.py @@ -7,12 +7,13 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, + clean_html, int_or_none, parse_duration, str_or_none, try_get, unescapeHTML, - unified_strdate, + update_url, update_url_query, url_or_none, ) @@ -22,8 +23,8 @@ class HuyaLiveIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?!(?:video/play/))(?P[^/#?&]+)(?:\D|$)' IE_NAME = 'huya:live' - IE_DESC = 'huya.com' - TESTS = [{ + IE_DESC = '虎牙直播' + _TESTS = [{ 'url': 'https://www.huya.com/572329', 'info_dict': { 'id': '572329', @@ -149,63 +150,94 @@ class HuyaVideoIE(InfoExtractor): 'id': '1002412640', 'ext': 'mp4', 'title': '8月3日', - 'thumbnail': r're:https?://.*\.jpg', - 'duration': 14, + 'categories': ['主机游戏'], + 'duration': 14.0, 'uploader': '虎牙-ATS欧卡车队青木', 'uploader_id': '1564376151', 'upload_date': '20240803', 'view_count': int, 'comment_count': int, 'like_count': int, + 'thumbnail': r're:https?://.+\.jpg', + 'timestamp': 1722675433, }, - }, - { + }, { 'url': 'https://www.huya.com/video/play/556054543.html', 'info_dict': { 'id': '556054543', 'ext': 'mp4', 'title': '我不挑事 也不怕事', - 'thumbnail': r're:https?://.*\.jpg', - 'duration': 1864, + 'categories': ['英雄联盟'], + 'description': 'md5:58184869687d18ce62dc7b4b2ad21201', + 'duration': 1864.0, 'uploader': '卡尔', 'uploader_id': '367138632', 'upload_date': '20210811', 'view_count': int, 'comment_count': int, 'like_count': int, + 'tags': 'count:4', + 'thumbnail': r're:https?://.+\.jpg', + 'timestamp': 1628675950, + }, + }, { + # Only m3u8 available + 'url': 'https://www.huya.com/video/play/1063345618.html', + 'info_dict': { + 'id': '1063345618', + 'ext': 'mp4', + 'title': '峡谷第一中!黑铁上钻石顶级教学对抗elo', + 'categories': ['英雄联盟'], + 'comment_count': int, + 'duration': 21603.0, + 'like_count': int, + 'thumbnail': r're:https?://.+\.jpg', + 'timestamp': 1749668803, + 'upload_date': '20250611', + 'uploader': '北枫CC', + 'uploader_id': '2183525275', + 'view_count': int, }, }] def _real_extract(self, url: str): video_id = self._match_id(url) - video_data = self._download_json( - 'https://liveapi.huya.com/moment/getMomentContent', video_id, - query={'videoId': video_id})['data']['moment']['videoInfo'] + moment = self._download_json( + 'https://liveapi.huya.com/moment/getMomentContent', + video_id, query={'videoId': video_id})['data']['moment'] formats = [] - for definition in traverse_obj(video_data, ('definitions', lambda _, v: url_or_none(v['url']))): - formats.append({ - 'url': definition['url'], - **traverse_obj(definition, { - 'format_id': ('defName', {str}), - 'width': ('width', {int_or_none}), - 'height': ('height', {int_or_none}), + for definition in traverse_obj(moment, ( + 'videoInfo', 'definitions', lambda _, v: url_or_none(v['m3u8']), + )): + fmts = self._extract_m3u8_formats(definition['m3u8'], video_id, 'mp4', fatal=False) + for fmt in fmts: + fmt.update(**traverse_obj(definition, { 'filesize': ('size', {int_or_none}), - }), - }) + 'format_id': ('defName', {str}), + 'height': ('height', {int_or_none}), + 'quality': ('definition', {int_or_none}), + 'width': ('width', {int_or_none}), + })) + formats.extend(fmts) return { 'id': video_id, 'formats': formats, - **traverse_obj(video_data, { + **traverse_obj(moment, { + 'comment_count': ('commentCount', {int_or_none}), + 'description': ('content', {clean_html}, filter), + 'like_count': ('favorCount', {int_or_none}), + 'timestamp': ('cTime', {int_or_none}), + }), + **traverse_obj(moment, ('videoInfo', { 'title': ('videoTitle', {str}), - 'thumbnail': ('videoCover', {url_or_none}), + 'categories': ('category', {str}, filter, all, filter), 'duration': ('videoDuration', {parse_duration}), + 'tags': ('tags', ..., {str}, filter, all, filter), + 'thumbnail': (('videoBigCover', 'videoCover'), {url_or_none}, {update_url(query=None)}, any), 'uploader': ('nickName', {str}), 'uploader_id': ('uid', {str_or_none}), - 'upload_date': ('videoUploadTime', {unified_strdate}), 'view_count': ('videoPlayNum', {int_or_none}), - 'comment_count': ('videoCommentNum', {int_or_none}), - 'like_count': ('favorCount', {int_or_none}), - }), + })), } From 1838a1ce5d4ade80770ba9162eaffc9a1607dc70 Mon Sep 17 00:00:00 2001 From: Anton Larionov <11796525+anlar@users.noreply.github.com> Date: Thu, 26 Jun 2025 01:51:20 +0200 Subject: [PATCH 08/11] [ie/mave] Add extractor (#13380) Authored by: anlar --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/mave.py | 107 ++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 yt_dlp/extractor/mave.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 34c98b537d..fbbd9571f7 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1107,6 +1107,7 @@ from .massengeschmacktv import MassengeschmackTVIE from .masters import MastersIE from .matchtv import MatchTVIE +from .mave import MaveIE from .mbn import MBNIE from .mdr import MDRIE from .medaltv import MedalTVIE diff --git a/yt_dlp/extractor/mave.py b/yt_dlp/extractor/mave.py new file mode 100644 index 0000000000..86d8d8b7c4 --- /dev/null +++ b/yt_dlp/extractor/mave.py @@ -0,0 +1,107 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + int_or_none, + parse_iso8601, + urljoin, +) +from ..utils.traversal import require, traverse_obj + + +class MaveIE(InfoExtractor): + _VALID_URL = r'https?://(?P[\w-]+)\.mave\.digital/(?Pep-\d+)' + _TESTS = [{ + 'url': 'https://ochenlichnoe.mave.digital/ep-25', + 'md5': 'aa3e513ef588b4366df1520657cbc10c', + 'info_dict': { + 'id': '4035f587-914b-44b6-aa5a-d76685ad9bc2', + 'ext': 'mp3', + 'display_id': 'ochenlichnoe-ep-25', + 'title': 'Между мной и миром: психология самооценки', + 'description': 'md5:4b7463baaccb6982f326bce5c700382a', + 'uploader': 'Самарский университет', + 'channel': 'Очень личное', + 'channel_id': 'ochenlichnoe', + 'channel_url': 'https://ochenlichnoe.mave.digital/', + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'duration': 3744, + 'thumbnail': r're:https://.+/storage/podcasts/.+\.jpg', + 'series': 'Очень личное', + 'series_id': '2e0c3749-6df2-4946-82f4-50691419c065', + 'season': 'Season 3', + 'season_number': 3, + 'episode': 'Episode 3', + 'episode_number': 3, + 'timestamp': 1747817300, + 'upload_date': '20250521', + }, + }, { + 'url': 'https://budem.mave.digital/ep-12', + 'md5': 'e1ce2780fcdb6f17821aa3ca3e8c919f', + 'info_dict': { + 'id': '41898bb5-ff57-4797-9236-37a8e537aa21', + 'ext': 'mp3', + 'display_id': 'budem-ep-12', + 'title': 'Екатерина Михайлова: "Горе от ума" не про женщин написана', + 'description': 'md5:fa3bdd59ee829dfaf16e3efcb13f1d19', + 'uploader': 'Полина Цветкова+Евгения Акопова', + 'channel': 'Все там будем', + 'channel_id': 'budem', + 'channel_url': 'https://budem.mave.digital/', + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'age_limit': 18, + 'duration': 3664, + 'thumbnail': r're:https://.+/storage/podcasts/.+\.jpg', + 'series': 'Все там будем', + 'series_id': 'fe9347bf-c009-4ebd-87e8-b06f2f324746', + 'season': 'Season 2', + 'season_number': 2, + 'episode': 'Episode 5', + 'episode_number': 5, + 'timestamp': 1735538400, + 'upload_date': '20241230', + }, + }] + _API_BASE_URL = 'https://api.mave.digital/' + + def _real_extract(self, url): + channel_id, slug = self._match_valid_url(url).group('channel', 'id') + display_id = f'{channel_id}-{slug}' + webpage = self._download_webpage(url, display_id) + data = traverse_obj( + self._search_nuxt_json(webpage, display_id), + ('data', lambda _, v: v['activeEpisodeData'], any, {require('podcast data')})) + + return { + 'display_id': display_id, + 'channel_id': channel_id, + 'channel_url': f'https://{channel_id}.mave.digital/', + 'vcodec': 'none', + 'thumbnail': re.sub(r'_\d+(?=\.(?:jpg|png))', '', self._og_search_thumbnail(webpage, default='')) or None, + **traverse_obj(data, ('activeEpisodeData', { + 'url': ('audio', {urljoin(self._API_BASE_URL)}), + 'id': ('id', {str}), + 'title': ('title', {str}), + 'description': ('description', {clean_html}), + 'duration': ('duration', {int_or_none}), + 'season_number': ('season', {int_or_none}), + 'episode_number': ('number', {int_or_none}), + 'view_count': ('listenings', {int_or_none}), + 'like_count': ('reactions', lambda _, v: v['type'] == 'like', 'count', {int_or_none}, any), + 'dislike_count': ('reactions', lambda _, v: v['type'] == 'dislike', 'count', {int_or_none}, any), + 'age_limit': ('is_explicit', {bool}, {lambda x: 18 if x else None}), + 'timestamp': ('publish_date', {parse_iso8601}), + })), + **traverse_obj(data, ('podcast', 'podcast', { + 'series_id': ('id', {str}), + 'series': ('title', {str}), + 'channel': ('title', {str}), + 'uploader': ('author', {str}), + })), + } From eff0759705ffcb0c9b9bb59433e9b3acbbd36ddc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 25 Jun 2025 23:53:38 +0000 Subject: [PATCH 09/11] Release 2025.06.25 Created by: bashonly :ci skip all --- CONTRIBUTORS | 2 ++ Changelog.md | 19 +++++++++++++++++++ supportedsites.md | 5 +++-- yt_dlp/version.py | 6 +++--- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index ea391bc15a..00d4d15aab 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -779,3 +779,5 @@ brian6932 iednod55 maxbin123 nullpos +anlar +eason1478 diff --git a/Changelog.md b/Changelog.md index dd95abc864..d37852658f 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,25 @@ # Changelog # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2025.06.25 + +#### Extractor changes +- [Add `_search_nuxt_json` helper](https://github.com/yt-dlp/yt-dlp/commit/51887484e46ab6015c041cb1ab626a55f25a03bd) ([#13386](https://github.com/yt-dlp/yt-dlp/issues/13386)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +- **brightcove**: new: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/e6bd4a3da295b760ab20b39c18ce8934d312c2bf) ([#13461](https://github.com/yt-dlp/yt-dlp/issues/13461)) by [doe1080](https://github.com/doe1080) +- **huya**: live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2600849badb0d08c55b58dcc77a13af6ba423da6) ([#13520](https://github.com/yt-dlp/yt-dlp/issues/13520)) by [doe1080](https://github.com/doe1080) +- **hypergryph**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/1722c55400ff30bb5aee5dd7a262f0b7e9ce2f0e) ([#13415](https://github.com/yt-dlp/yt-dlp/issues/13415)) by [doe1080](https://github.com/doe1080), [eason1478](https://github.com/eason1478) +- **lsm**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/c57412d1f9cf0124adc972a47858ac42b740c61d) ([#13126](https://github.com/yt-dlp/yt-dlp/issues/13126)) by [Caesim404](https://github.com/Caesim404) +- **mave**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1838a1ce5d4ade80770ba9162eaffc9a1607dc70) ([#13380](https://github.com/yt-dlp/yt-dlp/issues/13380)) by [anlar](https://github.com/anlar) +- **sportdeutschland**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a4ce4327c9836691d3b6b00e44a90b6741601ed8) ([#13519](https://github.com/yt-dlp/yt-dlp/issues/13519)) by [DTrombett](https://github.com/DTrombett) +- **sproutvideo**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5b559d0072b7164daf06bacdc41c6f11283452c8) ([#13544](https://github.com/yt-dlp/yt-dlp/issues/13544)) by [bashonly](https://github.com/bashonly) +- **tv8.it**: [Support slugless URLs](https://github.com/yt-dlp/yt-dlp/commit/3bd30291601c47fa4a257983473884103ecab0c7) ([#13478](https://github.com/yt-dlp/yt-dlp/issues/13478)) by [DTrombett](https://github.com/DTrombett) +- **youtube** + - [Check any `ios` m3u8 formats prior to download](https://github.com/yt-dlp/yt-dlp/commit/8f94b76cbf7bbd9dfd8762c63cdea04f90f1297f) ([#13524](https://github.com/yt-dlp/yt-dlp/issues/13524)) by [bashonly](https://github.com/bashonly) + - [Improve player context payloads](https://github.com/yt-dlp/yt-dlp/commit/ff6f94041aeee19c5559e1c1cd693960a1c1dd14) ([#13539](https://github.com/yt-dlp/yt-dlp/issues/13539)) by [bashonly](https://github.com/bashonly) + +#### Misc. changes +- **test**: `traversal`: [Fix morsel tests for Python 3.14](https://github.com/yt-dlp/yt-dlp/commit/73bf10211668e4a59ccafd790e06ee82d9fea9ea) ([#13471](https://github.com/yt-dlp/yt-dlp/issues/13471)) by [Grub4K](https://github.com/Grub4K) + ### 2025.06.09 #### Extractor changes diff --git a/supportedsites.md b/supportedsites.md index 1fe381603e..b3fe011739 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -590,7 +590,7 @@ # Supported sites - **Hungama** - **HungamaAlbumPlaylist** - **HungamaSong** - - **huya:live**: huya.com + - **huya:live**: 虎牙直播 - **huya:video**: 虎牙视频 - **Hypem** - **Hytale** @@ -776,6 +776,7 @@ # Supported sites - **massengeschmack.tv** - **Masters** - **MatchTV** + - **Mave** - **MBN**: mbn.co.kr (매일방송) - **MDR**: MDR.DE - **MedalTV** @@ -832,7 +833,7 @@ # Supported sites - **Mojevideo**: mojevideo.sk - **Mojvideo** - **Monstercat** - - **MonsterSirenHypergryphMusic** + - **monstersiren**: 塞壬唱片 - **Motherless** - **MotherlessGallery** - **MotherlessGroup** diff --git a/yt_dlp/version.py b/yt_dlp/version.py index b97c014999..020a0299c0 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2025.06.09' +__version__ = '2025.06.25' -RELEASE_GIT_HEAD = '339614a173c74b42d63e858c446a9cae262a13af' +RELEASE_GIT_HEAD = '1838a1ce5d4ade80770ba9162eaffc9a1607dc70' VARIANT = None @@ -12,4 +12,4 @@ ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2025.06.09' +_pkg_version = '2025.06.25' From 99b85ac102047446e6adf5b62bfc3c8d80b53778 Mon Sep 17 00:00:00 2001 From: c-basalt <117849907+c-basalt@users.noreply.github.com> Date: Thu, 26 Jun 2025 13:42:41 -0400 Subject: [PATCH 10/11] [ie/BilibiliSpaceVideo] Extract hidden-mode collections as playlists (#13533) Closes #13435 Authored by: c-basalt --- yt_dlp/extractor/bilibili.py | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 43c9000ce8..0f5c2c97e4 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -1226,6 +1226,26 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE): 'id': '313580179', }, 'playlist_mincount': 92, + }, { + # Hidden-mode collection + 'url': 'https://space.bilibili.com/3669403/video', + 'info_dict': { + 'id': '3669403', + }, + 'playlist': [{ + 'info_dict': { + '_type': 'playlist', + 'id': '3669403_3958082', + 'title': '合集·直播回放', + 'description': '', + 'uploader': '月路Yuel', + 'uploader_id': '3669403', + 'timestamp': int, + 'upload_date': str, + 'thumbnail': str, + }, + }], + 'params': {'playlist_items': '7'}, }] def _real_extract(self, url): @@ -1282,8 +1302,14 @@ def get_metadata(page_data): } def get_entries(page_data): - for entry in traverse_obj(page_data, ('list', 'vlist')) or []: - yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid']) + for entry in traverse_obj(page_data, ('list', 'vlist', ..., {dict})): + if traverse_obj(entry, ('meta', 'attribute')) == 156: + # hidden-mode collection doesn't show its videos in uploads; extract as playlist instead + yield self.url_result( + f'https://space.bilibili.com/{entry["mid"]}/lists/{entry["meta"]["id"]}?type=season', + BilibiliCollectionListIE, f'{entry["mid"]}_{entry["meta"]["id"]}') + else: + yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid']) metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries) return self.playlist_result(paged_list, playlist_id) From 06c1a8cdffe14050206683253726875144192ef5 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Fri, 27 Jun 2025 02:45:03 +0900 Subject: [PATCH 11/11] [ie/niconico:live] Fix extractor and downloader (#13158) Authored by: doe1080 --- yt_dlp/downloader/niconico.py | 53 +++++++++++------------ yt_dlp/extractor/common.py | 3 ++ yt_dlp/extractor/niconico.py | 79 +++++++++++++++++------------------ 3 files changed, 66 insertions(+), 69 deletions(-) diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index 33cf15df88..35a12b5554 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -5,47 +5,46 @@ from .common import FileDownloader from .external import FFmpegFD from ..networking import Request -from ..utils import DownloadError, str_or_none, try_get +from ..networking.websocket import WebSocketResponse +from ..utils import DownloadError, str_or_none, truncate_string +from ..utils.traversal import traverse_obj class NiconicoLiveFD(FileDownloader): """ Downloads niconico live without being stopped """ def real_download(self, filename, info_dict): - video_id = info_dict['video_id'] - ws_url = info_dict['url'] - ws_extractor = info_dict['ws'] - ws_origin_host = info_dict['origin'] - live_quality = info_dict.get('live_quality', 'high') - live_latency = info_dict.get('live_latency', 'high') + video_id = info_dict['id'] + opts = info_dict['downloader_options'] + quality, ws_extractor, ws_url = opts['max_quality'], opts['ws'], opts['ws_url'] dl = FFmpegFD(self.ydl, self.params or {}) new_info_dict = info_dict.copy() - new_info_dict.update({ - 'protocol': 'm3u8', - }) + new_info_dict['protocol'] = 'm3u8' def communicate_ws(reconnect): - if reconnect: - ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'})) + # Support --load-info-json as if it is a reconnect attempt + if reconnect or not isinstance(ws_extractor, WebSocketResponse): + ws = self.ydl.urlopen(Request( + ws_url, headers={'Origin': 'https://live.nicovideo.jp'})) if self.ydl.params.get('verbose', False): - self.to_screen('[debug] Sending startWatching request') + self.write_debug('Sending startWatching request') ws.send(json.dumps({ - 'type': 'startWatching', 'data': { + 'reconnect': True, + 'room': { + 'commentable': True, + 'protocol': 'webSocket', + }, 'stream': { - 'quality': live_quality, - 'protocol': 'hls+fmp4', - 'latency': live_latency, 'accessRightMethod': 'single_cookie', 'chasePlay': False, + 'latency': 'high', + 'protocol': 'hls', + 'quality': quality, }, - 'room': { - 'protocol': 'webSocket', - 'commentable': True, - }, - 'reconnect': True, }, + 'type': 'startWatching', })) else: ws = ws_extractor @@ -58,7 +57,6 @@ def communicate_ws(reconnect): if not data or not isinstance(data, dict): continue if data.get('type') == 'ping': - # pong back ws.send(r'{"type":"pong"}') ws.send(r'{"type":"keepSeat"}') elif data.get('type') == 'disconnect': @@ -66,12 +64,10 @@ def communicate_ws(reconnect): return True elif data.get('type') == 'error': self.write_debug(data) - message = try_get(data, lambda x: x['body']['code'], str) or recv + message = traverse_obj(data, ('body', 'code', {str_or_none}), default=recv) return DownloadError(message) elif self.ydl.params.get('verbose', False): - if len(recv) > 100: - recv = recv[:100] + '...' - self.to_screen(f'[debug] Server said: {recv}') + self.write_debug(f'Server response: {truncate_string(recv, 100)}') def ws_main(): reconnect = False @@ -81,7 +77,8 @@ def ws_main(): if ret is True: return except BaseException as e: - self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e))) + self.to_screen( + f'[niconico:live] {video_id}: Connection error occured, reconnecting after 10 seconds: {e}') time.sleep(10) continue finally: diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 6058f66aea..32b4680b73 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -263,6 +263,9 @@ class InfoExtractor: * http_chunk_size Chunk size for HTTP downloads * ffmpeg_args Extra arguments for ffmpeg downloader (input) * ffmpeg_args_out Extra arguments for ffmpeg downloader (output) + * ws (NiconicoLiveFD only) WebSocketResponse + * ws_url (NiconicoLiveFD only) Websockets URL + * max_quality (NiconicoLiveFD only) Max stream quality string * is_dash_periods Whether the format is a result of merging multiple DASH periods. RTMP formats can also have the additional fields: page_url, diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 0d0f7ceef0..a20e570e64 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -4,16 +4,15 @@ import json import re import time -import urllib.parse from .common import InfoExtractor, SearchInfoExtractor -from ..networking import Request from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, OnDemandPagedList, clean_html, determine_ext, + extract_attributes, float_or_none, int_or_none, parse_bitrate, @@ -22,9 +21,8 @@ parse_qs, parse_resolution, qualities, - remove_start, str_or_none, - unescapeHTML, + truncate_string, unified_timestamp, update_url_query, url_basename, @@ -32,7 +30,11 @@ urlencode_postdata, urljoin, ) -from ..utils.traversal import find_element, require, traverse_obj +from ..utils.traversal import ( + find_element, + require, + traverse_obj, +) class NiconicoBaseIE(InfoExtractor): @@ -806,41 +808,39 @@ class NiconicoLiveIE(NiconicoBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id) + webpage = self._download_webpage(url, video_id, expected_status=404) + if err_msg := traverse_obj(webpage, ({find_element(cls='message')}, {clean_html})): + raise ExtractorError(err_msg, expected=True) - embedded_data = self._parse_json(unescapeHTML(self._search_regex( - r' 100: - recv = recv[:100] + '...' - self.write_debug(f'Server said: {recv}') + self.write_debug(f'Server response: {truncate_string(recv, 100)}') title = traverse_obj(embedded_data, ('program', 'title')) or self._html_search_meta( ('og:title', 'twitter:title'), webpage, 'live title', fatal=False) - raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail')) or {} + raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail', {dict})) or {} thumbnails = [] for name, value in raw_thumbs.items(): if not isinstance(value, dict): @@ -897,31 +895,30 @@ def _real_extract(self, url): cookie['domain'], cookie['name'], cookie['value'], expire_time=unified_timestamp(cookie.get('expires')), path=cookie['path'], secure=cookie['secure']) - fmt_common = { - 'live_latency': 'high', - 'origin': hostname, - 'protocol': 'niconico_live', - 'video_id': video_id, - 'ws': ws, - } q_iter = (q for q in qualities[1:] if not q.startswith('audio_')) # ignore initial 'abr' a_map = {96: 'audio_low', 192: 'audio_high'} formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True) for fmt in formats: + fmt['protocol'] = 'niconico_live' if fmt.get('acodec') == 'none': fmt['format_id'] = next(q_iter, fmt['format_id']) elif fmt.get('vcodec') == 'none': abr = parse_bitrate(fmt['url'].lower()) fmt.update({ 'abr': abr, + 'acodec': 'mp4a.40.2', 'format_id': a_map.get(abr, fmt['format_id']), }) - fmt.update(fmt_common) return { 'id': video_id, 'title': title, + 'downloader_options': { + 'max_quality': traverse_obj(embedded_data, ('program', 'stream', 'maxQuality', {str})) or 'normal', + 'ws': ws, + 'ws_url': ws_url, + }, **traverse_obj(embedded_data, { 'view_count': ('program', 'statistics', 'watchCount'), 'comment_count': ('program', 'statistics', 'commentCount'),