From 60b582751d6753c0aa6e05e47fac3af5b5d94349 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Fri, 1 Aug 2025 22:19:32 +0300 Subject: [PATCH 01/22] Update and fix smotrim.ru extractor (see description) - Added support for links with 'channel' type - Extract json info from api's json dictionary - Added thumbnail url - Fixed outdated broken links in _TESTS - Get many video formats from m3u8 playlist --- yt_dlp/extractor/smotrim.py | 163 ++++++++++++++++++++++++------------ 1 file changed, 108 insertions(+), 55 deletions(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index d3f1b695b..0ab6159c0 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -1,65 +1,118 @@ -from .common import InfoExtractor -from ..utils import ExtractorError +import re + +from yt_dlp.extractor.common import InfoExtractor +from yt_dlp.utils import ExtractorError class SmotrimIE(InfoExtractor): - _VALID_URL = r'https?://smotrim\.ru/(?Pbrand|video|article|live)/(?P[0-9]+)' - _TESTS = [{ # video - 'url': 'https://smotrim.ru/video/1539617', - 'md5': 'b1923a533c8cab09679789d720d0b1c5', - 'info_dict': { - 'id': '1539617', - 'ext': 'mp4', - 'title': 'Полиглот. Китайский с нуля за 16 часов! Урок №16', - 'description': '', + _VALID_URL = r"https?://smotrim\.ru/(?Pbrand|video|article|live|channel)/(?P[0-9]+)" + _TESTS = [ + { + "url": "https://smotrim.ru/video/3003613", + "info_dict": { + "id": "3003613", + "ext": "mp4", + "title": "Погода. на 2 августа 2025 года", + "thumbnail": "https://cdn-st2.smotrim.ru/vh/pictures/xw/635/545/7.jpg", + }, + "add_ie": ["RUTV"], }, - 'add_ie': ['RUTV'], - }, { # article (geo-restricted? plays fine from the US and JP) - 'url': 'https://smotrim.ru/article/2813445', - 'md5': 'e0ac453952afbc6a2742e850b4dc8e77', - 'info_dict': { - 'id': '2431846', - 'ext': 'mp4', - 'title': 'Новости культуры. Съёмки первой программы "Большие и маленькие"', - 'description': 'md5:94a4a22472da4252bf5587a4ee441b99', + { + "url": "https://smotrim.ru/article/4609632", + "info_dict": { + "id": "3000761", + "ext": "mp4", + "title": "Новости культуры. Ольга Любимова провела рабочую встречу в Еврейской автономной области", + "thumbnail": "https://cdn-st2.smotrim.ru/vh/pictures/xw/638/386/6.jpg", + }, + "add_ie": ["RUTV"], }, - 'add_ie': ['RUTV'], - }, { # brand, redirect - 'url': 'https://smotrim.ru/brand/64356', - 'md5': '740472999ccff81d7f6df79cecd91c18', - 'info_dict': { - 'id': '2354523', - 'ext': 'mp4', - 'title': 'Большие и маленькие. Лучшее. 4-й выпуск', - 'description': 'md5:84089e834429008371ea41ea3507b989', + { + "url": "https://smotrim.ru/brand/64356", + "info_dict": { + "id": "2885093", + "ext": "mp4", + "title": "Большие и маленькие. 6-й сезон 8-й выпуск", + "thumbnail": "https://cdn-st2.smotrim.ru/vh/pictures/xw/527/187/3.jpg", + }, + "add_ie": ["RUTV"], }, - 'add_ie': ['RUTV'], - }, { # live - 'url': 'https://smotrim.ru/live/19201', - 'info_dict': { - 'id': '19201', - 'ext': 'mp4', - # this looks like a TV channel name - 'title': 'Россия Культура. Прямой эфир', - 'description': '', + { # GEO RESTRICTED + "url": "https://smotrim.ru/live/19201", + "info_dict": { + "id": "381308c7-a066-4c4f-9656-83e2e792a7b4", + "ext": "mp4", + "title": "Россия К", + "thumbnail": "https://cdn-st2.smotrim.ru/vh/pictures/xw/441/085/7.png", + }, + "add_ie": ["RUTV"], }, - 'add_ie': ['RUTV'], - }] + { # GEO RESTRICTED, REDIRECT FROM live, CANONICAL FOR live urls + "url": "https://smotrim.ru/channel/4", + "info_dict": { + "id": "4", + "ext": "mp4", + "title": "Россия К", + "thumbnail": "https://cdn-st2.smotrim.ru/vh/pictures/xw/441/085/7.png", + }, + "add_ie": ["RUTV"], + }, + ] def _real_extract(self, url): - video_id, typ = self._match_valid_url(url).group('id', 'type') - rutv_type = 'video' - if typ not in ('video', 'live'): - webpage = self._download_webpage(url, video_id, f'Resolving {typ} link') - # there are two cases matching regex: - # 1. "embedUrl" in JSON LD (/brand/) - # 2. "src" attribute from iframe (/article/) - video_id = self._search_regex( - r'"https://player.smotrim.ru/iframe/video/id/(?P\d+)/', - webpage, 'video_id', default=None) + video_id, type = self._match_valid_url(url).group("id", "type") + webpage = self._download_webpage(url, video_id, f"Resolving {type} link") + iframe_url = self._search_regex( + r']+\bsrc=["\'](https?://player\.smotrim\.ru/iframe/[^"\']+)', + webpage, + "iframe URL", + ) + if type in {"live", "channel"}: + # iframe_url = "https://player.smotrim.ru/iframe/live/uid/381308c7-a066-4c4f-9656-83e2e792a7b4/showZoomBtn/false/isPlay/true/mute/true/sid/smotrim_rk/" + video_id = re.search( + r"(?P[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})", + iframe_url, + ) if not video_id: - raise ExtractorError('There are no video in this page.', expected=True) - elif typ == 'live': - rutv_type = 'live' - - return self.url_result(f'https://player.vgtrk.com/iframe/{rutv_type}/id/{video_id}') + raise ExtractorError( + "There are no player uuid in this page.", expected=True + ) + video_id = video_id.group("video_id") + jsondata_url = ( + f"https://player.smotrim.ru/iframe/datalive/uid/{video_id}/sid/smotrim" + ) + else: + # iframe_url = "https://player.smotrim.ru/iframe/video/id/3000761/sid/smotrim/isPlay/true/mute/true/?acc_video_id=3204061" + video_id = re.search( + r"^https?://player\.smotrim\.ru/iframe/video/id/(?P\d+)/sid/", + iframe_url, + ) + if not video_id: + raise ExtractorError( + "There are no player id in this page.", expected=True + ) + video_id = video_id.group("video_id") + jsondata_url = ( + f"https://player.smotrim.ru/iframe/datavideo/id/{video_id}/sid/smotrim" + ) + try: + json_info = self._download_json( + jsondata_url, video_id, "Downloading player config JSON metadata" + ) + except Exception as e: + raise ExtractorError(str(e), expected=True) + m3u8_url = json_info["data"]["playlist"]["medialist"][0]["sources"]["m3u8"][ + "auto" + ] + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + m3u8_url, video_id, "mp4", m3u8_id="hls" + ) + return { + "id": video_id, + "title": json_info["data"]["playlist"]["medialist"][0]["title"], + "thumbnail": json_info["data"]["playlist"]["medialist"][0]["pictures"][ + "16:9" + ], + "formats": formats, + "subtitles": subtitles, + } From 9ca66dec4d6023a8c08b17bf74cdabb976384231 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Fri, 1 Aug 2025 22:20:17 +0300 Subject: [PATCH 02/22] Fix imports section --- yt_dlp/extractor/smotrim.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index 0ab6159c0..03253f44b 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -1,7 +1,7 @@ import re -from yt_dlp.extractor.common import InfoExtractor -from yt_dlp.utils import ExtractorError +from .common import InfoExtractor +from ..utils import ExtractorError class SmotrimIE(InfoExtractor): From 70a45aa336081ee1d273d06cf2b8bb48f7aa4e80 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Fri, 1 Aug 2025 22:35:28 +0300 Subject: [PATCH 03/22] Double quotes fixed --- yt_dlp/extractor/smotrim.py | 121 ++++++++++++++++++------------------ 1 file changed, 61 insertions(+), 60 deletions(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index 03253f44b..a2cfd245e 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -5,114 +5,115 @@ class SmotrimIE(InfoExtractor): - _VALID_URL = r"https?://smotrim\.ru/(?Pbrand|video|article|live|channel)/(?P[0-9]+)" + _VALID_URL = r'https?://smotrim\.ru/(?Pbrand|video|article|live|channel)/(?P[0-9]+)' _TESTS = [ { - "url": "https://smotrim.ru/video/3003613", - "info_dict": { - "id": "3003613", - "ext": "mp4", - "title": "Погода. на 2 августа 2025 года", - "thumbnail": "https://cdn-st2.smotrim.ru/vh/pictures/xw/635/545/7.jpg", + 'url': 'https://smotrim.ru/video/3003613', + 'info_dict': { + 'id': '3003613', + 'ext': 'mp4', + 'title': 'Погода. на 2 августа 2025 года', + 'thumbnail': 'https://cdn-st2.smotrim.ru/vh/pictures/xw/635/545/7.jpg', }, - "add_ie": ["RUTV"], + 'add_ie': ['RUTV'], }, { - "url": "https://smotrim.ru/article/4609632", - "info_dict": { - "id": "3000761", - "ext": "mp4", - "title": "Новости культуры. Ольга Любимова провела рабочую встречу в Еврейской автономной области", - "thumbnail": "https://cdn-st2.smotrim.ru/vh/pictures/xw/638/386/6.jpg", + 'url': 'https://smotrim.ru/article/4609632', + 'info_dict': { + 'id': '3000761', + 'ext': 'mp4', + 'title': 'Новости культуры. Ольга Любимова провела рабочую встречу в Еврейской автономной области', + 'thumbnail': 'https://cdn-st2.smotrim.ru/vh/pictures/xw/638/386/6.jpg', }, - "add_ie": ["RUTV"], + 'add_ie': ['RUTV'], }, { - "url": "https://smotrim.ru/brand/64356", - "info_dict": { - "id": "2885093", - "ext": "mp4", - "title": "Большие и маленькие. 6-й сезон 8-й выпуск", - "thumbnail": "https://cdn-st2.smotrim.ru/vh/pictures/xw/527/187/3.jpg", + 'url': 'https://smotrim.ru/brand/64356', + 'info_dict': { + 'id': '2885093', + 'ext': 'mp4', + 'title': 'Большие и маленькие. 6-й сезон 8-й выпуск', + 'thumbnail': 'https://cdn-st2.smotrim.ru/vh/pictures/xw/527/187/3.jpg', }, - "add_ie": ["RUTV"], + 'add_ie': ['RUTV'], }, { # GEO RESTRICTED - "url": "https://smotrim.ru/live/19201", - "info_dict": { - "id": "381308c7-a066-4c4f-9656-83e2e792a7b4", - "ext": "mp4", - "title": "Россия К", - "thumbnail": "https://cdn-st2.smotrim.ru/vh/pictures/xw/441/085/7.png", + 'url': 'https://smotrim.ru/live/19201', + 'info_dict': { + 'id': '381308c7-a066-4c4f-9656-83e2e792a7b4', + 'ext': 'mp4', + 'title': 'Россия К', + 'thumbnail': 'https://cdn-st2.smotrim.ru/vh/pictures/xw/441/085/7.png', }, - "add_ie": ["RUTV"], + 'add_ie': ['RUTV'], }, { # GEO RESTRICTED, REDIRECT FROM live, CANONICAL FOR live urls - "url": "https://smotrim.ru/channel/4", - "info_dict": { - "id": "4", - "ext": "mp4", - "title": "Россия К", - "thumbnail": "https://cdn-st2.smotrim.ru/vh/pictures/xw/441/085/7.png", + 'url': 'https://smotrim.ru/channel/4', + 'info_dict': { + 'id': '4', + 'ext': 'mp4', + 'title': 'Россия К', + 'thumbnail': 'https://cdn-st2.smotrim.ru/vh/pictures/xw/441/085/7.png', }, - "add_ie": ["RUTV"], + 'add_ie': ['RUTV'], }, ] def _real_extract(self, url): - video_id, type = self._match_valid_url(url).group("id", "type") - webpage = self._download_webpage(url, video_id, f"Resolving {type} link") + video_id, type = self._match_valid_url(url).group('id', 'type') + webpage = self._download_webpage(url, video_id, f'Resolving {type} link') iframe_url = self._search_regex( r']+\bsrc=["\'](https?://player\.smotrim\.ru/iframe/[^"\']+)', webpage, - "iframe URL", + 'iframe URL', ) - if type in {"live", "channel"}: - # iframe_url = "https://player.smotrim.ru/iframe/live/uid/381308c7-a066-4c4f-9656-83e2e792a7b4/showZoomBtn/false/isPlay/true/mute/true/sid/smotrim_rk/" + if type in {'live', 'channel'}: + # iframe_url = 'https://player.smotrim.ru/iframe/live/uid/381308c7-a066-4c4f-9656-83e2e792a7b4/showZoomBtn/false/isPlay/true/mute/true/sid/smotrim_rk/' video_id = re.search( - r"(?P[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})", + r'(?P[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', iframe_url, ) if not video_id: raise ExtractorError( - "There are no player uuid in this page.", expected=True + 'There are no player uuid in this page.', expected=True ) - video_id = video_id.group("video_id") + video_id = video_id.group('video_id') jsondata_url = ( - f"https://player.smotrim.ru/iframe/datalive/uid/{video_id}/sid/smotrim" + f'https://player.smotrim.ru/iframe/datalive/uid/{video_id}/sid/smotrim' ) else: - # iframe_url = "https://player.smotrim.ru/iframe/video/id/3000761/sid/smotrim/isPlay/true/mute/true/?acc_video_id=3204061" + # iframe_url = 'https://player.smotrim.ru/iframe/video/id/3000761/sid/smotrim/isPlay/true/mute/true/?acc_video_id=3204061' video_id = re.search( - r"^https?://player\.smotrim\.ru/iframe/video/id/(?P\d+)/sid/", + r'^https?://player\.smotrim\.ru/iframe/video/id/(?P\d+)/sid/', iframe_url, ) if not video_id: raise ExtractorError( - "There are no player id in this page.", expected=True + 'There are no player id in this page.', expected=True ) - video_id = video_id.group("video_id") + video_id = video_id.group('video_id') jsondata_url = ( - f"https://player.smotrim.ru/iframe/datavideo/id/{video_id}/sid/smotrim" + f'https://player.smotrim.ru/iframe/datavideo/id/{video_id}/sid/smotrim' ) try: json_info = self._download_json( - jsondata_url, video_id, "Downloading player config JSON metadata" + jsondata_url, video_id, 'Downloading player config JSON metadata' ) except Exception as e: raise ExtractorError(str(e), expected=True) - m3u8_url = json_info["data"]["playlist"]["medialist"][0]["sources"]["m3u8"][ - "auto" + m3u8_url = json_info['data']['playlist']['medialist'][0]['sources']['m3u8'][ + 'auto' ] formats, subtitles = self._extract_m3u8_formats_and_subtitles( - m3u8_url, video_id, "mp4", m3u8_id="hls" + m3u8_url, video_id, 'mp4', m3u8_id='hls' ) return { - "id": video_id, - "title": json_info["data"]["playlist"]["medialist"][0]["title"], - "thumbnail": json_info["data"]["playlist"]["medialist"][0]["pictures"][ - "16:9" + 'id': video_id, + 'title': json_info['data']['playlist']['medialist'][0]['title'], + 'thumbnail': json_info['data']['playlist']['medialist'][0]['pictures'][ + '16:9' ], - "formats": formats, - "subtitles": subtitles, + 'formats': formats, + 'subtitles': subtitles, } + From ed049655698448c5ba22a60b7cb161a48a3d9fd7 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Fri, 1 Aug 2025 22:38:27 +0300 Subject: [PATCH 04/22] Fixed the 'type' variable, which argues with the built-in python function --- yt_dlp/extractor/smotrim.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index a2cfd245e..4e425d9d1 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -60,14 +60,14 @@ class SmotrimIE(InfoExtractor): ] def _real_extract(self, url): - video_id, type = self._match_valid_url(url).group('id', 'type') - webpage = self._download_webpage(url, video_id, f'Resolving {type} link') + video_id, typ = self._match_valid_url(url).group('id', 'type') + webpage = self._download_webpage(url, video_id, f'Resolving {typ} link') iframe_url = self._search_regex( r']+\bsrc=["\'](https?://player\.smotrim\.ru/iframe/[^"\']+)', webpage, 'iframe URL', ) - if type in {'live', 'channel'}: + if typ in {'live', 'channel'}: # iframe_url = 'https://player.smotrim.ru/iframe/live/uid/381308c7-a066-4c4f-9656-83e2e792a7b4/showZoomBtn/false/isPlay/true/mute/true/sid/smotrim_rk/' video_id = re.search( r'(?P[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', From c4e22096adbbaa391522e3874d2ff19bde814393 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Fri, 1 Aug 2025 22:42:07 +0300 Subject: [PATCH 05/22] Fixed 'COM812 Trailing comma missing' --- yt_dlp/extractor/smotrim.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index 4e425d9d1..38ba5a22a 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -75,7 +75,7 @@ def _real_extract(self, url): ) if not video_id: raise ExtractorError( - 'There are no player uuid in this page.', expected=True + 'There are no player uuid in this page.', expected=True, ) video_id = video_id.group('video_id') jsondata_url = ( @@ -89,7 +89,7 @@ def _real_extract(self, url): ) if not video_id: raise ExtractorError( - 'There are no player id in this page.', expected=True + 'There are no player id in this page.', expected=True, ) video_id = video_id.group('video_id') jsondata_url = ( @@ -97,7 +97,7 @@ def _real_extract(self, url): ) try: json_info = self._download_json( - jsondata_url, video_id, 'Downloading player config JSON metadata' + jsondata_url, video_id, 'Downloading player config JSON metadata', ) except Exception as e: raise ExtractorError(str(e), expected=True) @@ -105,7 +105,7 @@ def _real_extract(self, url): 'auto' ] formats, subtitles = self._extract_m3u8_formats_and_subtitles( - m3u8_url, video_id, 'mp4', m3u8_id='hls' + m3u8_url, video_id, 'mp4', m3u8_id='hls', ) return { 'id': video_id, From 2b25a20695a5197de8d53f9dc9639f53e8b8154e Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Fri, 1 Aug 2025 22:47:44 +0300 Subject: [PATCH 06/22] Add empty string before return --- yt_dlp/extractor/smotrim.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index 38ba5a22a..7e8771395 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -107,6 +107,7 @@ def _real_extract(self, url): formats, subtitles = self._extract_m3u8_formats_and_subtitles( m3u8_url, video_id, 'mp4', m3u8_id='hls', ) + return { 'id': video_id, 'title': json_info['data']['playlist']['medialist'][0]['title'], From 4df15c0f742ccfbc12f712a4aeda05e637901233 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Fri, 1 Aug 2025 22:50:15 +0300 Subject: [PATCH 07/22] Fixed 'W293 Blank line contains whitespace' --- yt_dlp/extractor/smotrim.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index 7e8771395..46718f2fb 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -107,7 +107,7 @@ def _real_extract(self, url): formats, subtitles = self._extract_m3u8_formats_and_subtitles( m3u8_url, video_id, 'mp4', m3u8_id='hls', ) - + return { 'id': video_id, 'title': json_info['data']['playlist']['medialist'][0]['title'], From a8c799a13c21b9374df02ee45f25028b7883b468 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Fri, 1 Aug 2025 23:09:09 +0300 Subject: [PATCH 08/22] Added check that json download status code is 200. --- yt_dlp/extractor/smotrim.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index 46718f2fb..63bc8dbf9 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -101,6 +101,8 @@ def _real_extract(self, url): ) except Exception as e: raise ExtractorError(str(e), expected=True) + if json_info.get("status") != 200: + raise ExtractorError("Json download error. Status code: %s" % str(json_info.get("status")), expected=True) m3u8_url = json_info['data']['playlist']['medialist'][0]['sources']['m3u8'][ 'auto' ] @@ -111,9 +113,7 @@ def _real_extract(self, url): return { 'id': video_id, 'title': json_info['data']['playlist']['medialist'][0]['title'], - 'thumbnail': json_info['data']['playlist']['medialist'][0]['pictures'][ - '16:9' - ], + 'thumbnail': json_info['data']['playlist']['medialist'][0]['pictures']['16:9'], 'formats': formats, 'subtitles': subtitles, } From fd8503cb53a8a4b4e93a3375a278e5655391dc99 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Fri, 1 Aug 2025 23:09:55 +0300 Subject: [PATCH 09/22] Fix double quotes --- yt_dlp/extractor/smotrim.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index 63bc8dbf9..d08f676a4 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -101,8 +101,8 @@ def _real_extract(self, url): ) except Exception as e: raise ExtractorError(str(e), expected=True) - if json_info.get("status") != 200: - raise ExtractorError("Json download error. Status code: %s" % str(json_info.get("status")), expected=True) + if json_info.get('status') != 200: + raise ExtractorError('Json download error. Status code: %s' % str(json_info.get('status')), expected=True) m3u8_url = json_info['data']['playlist']['medialist'][0]['sources']['m3u8'][ 'auto' ] From 6dd3fb854824325d0ad976eb98f5c8dcdcbbeedc Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Fri, 1 Aug 2025 23:15:14 +0300 Subject: [PATCH 10/22] Fix autopep8 empty line end error --- yt_dlp/extractor/smotrim.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index d08f676a4..a89c2529b 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -117,4 +117,3 @@ def _real_extract(self, url): 'formats': formats, 'subtitles': subtitles, } - From 691784cb72c6175991431eded3b55f2e275ed295 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Sat, 2 Aug 2025 00:39:18 +0300 Subject: [PATCH 11/22] Added "is_live" status --- yt_dlp/extractor/smotrim.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index a89c2529b..e2f3b5bb9 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -116,4 +116,5 @@ def _real_extract(self, url): 'thumbnail': json_info['data']['playlist']['medialist'][0]['pictures']['16:9'], 'formats': formats, 'subtitles': subtitles, + "is_live": json_info['data']['playlist']['type'] == 'live', } From 72003004bf56d3497e8af6f56432cd1a95dd9580 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Sat, 2 Aug 2025 00:41:13 +0300 Subject: [PATCH 12/22] Fix double quotes --- yt_dlp/extractor/smotrim.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index e2f3b5bb9..76262c55d 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -116,5 +116,5 @@ def _real_extract(self, url): 'thumbnail': json_info['data']['playlist']['medialist'][0]['pictures']['16:9'], 'formats': formats, 'subtitles': subtitles, - "is_live": json_info['data']['playlist']['type'] == 'live', + 'is_live': json_info['data']['playlist']['type'] == 'live', } From 6d01405d9fd2ff097fa38b3b8ddf8fc21c9f2873 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Sat, 2 Aug 2025 00:52:02 +0300 Subject: [PATCH 13/22] Some code optimizations and add 'duration' key to output dict --- yt_dlp/extractor/smotrim.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index 76262c55d..bc24a6d10 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -103,18 +103,19 @@ def _real_extract(self, url): raise ExtractorError(str(e), expected=True) if json_info.get('status') != 200: raise ExtractorError('Json download error. Status code: %s' % str(json_info.get('status')), expected=True) - m3u8_url = json_info['data']['playlist']['medialist'][0]['sources']['m3u8'][ - 'auto' - ] + media_info = json_info['data']['playlist']['medialist'][0] formats, subtitles = self._extract_m3u8_formats_and_subtitles( - m3u8_url, video_id, 'mp4', m3u8_id='hls', + media_info['sources']['m3u8']['auto'], video_id, 'mp4', m3u8_id='hls', ) - - return { + res = { 'id': video_id, - 'title': json_info['data']['playlist']['medialist'][0]['title'], - 'thumbnail': json_info['data']['playlist']['medialist'][0]['pictures']['16:9'], + 'title': media_info['title'], + 'thumbnail': media_info['pictures']['16:9'], 'formats': formats, 'subtitles': subtitles, 'is_live': json_info['data']['playlist']['type'] == 'live', } + if not res['is_live'] and 'duration' in media_info: + res['duration'] = media_info['duration'] + + return res From 292616f6b6bd3213cc3aa1464afad0ca382bb435 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Sat, 2 Aug 2025 01:08:26 +0300 Subject: [PATCH 14/22] Added extra keys 'is_live', 'duration', 'subtitles' to info_dicts in _TESTS --- yt_dlp/extractor/smotrim.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index bc24a6d10..685896c3c 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -14,6 +14,9 @@ class SmotrimIE(InfoExtractor): 'ext': 'mp4', 'title': 'Погода. на 2 августа 2025 года', 'thumbnail': 'https://cdn-st2.smotrim.ru/vh/pictures/xw/635/545/7.jpg', + 'subtitles': {}, + 'is_live': False, + 'duration': 105, }, 'add_ie': ['RUTV'], }, @@ -24,6 +27,9 @@ class SmotrimIE(InfoExtractor): 'ext': 'mp4', 'title': 'Новости культуры. Ольга Любимова провела рабочую встречу в Еврейской автономной области', 'thumbnail': 'https://cdn-st2.smotrim.ru/vh/pictures/xw/638/386/6.jpg', + 'subtitles': {}, + 'is_live': False, + 'duration': 69, }, 'add_ie': ['RUTV'], }, @@ -34,6 +40,9 @@ class SmotrimIE(InfoExtractor): 'ext': 'mp4', 'title': 'Большие и маленькие. 6-й сезон 8-й выпуск', 'thumbnail': 'https://cdn-st2.smotrim.ru/vh/pictures/xw/527/187/3.jpg', + 'subtitles': {}, + 'is_live': False, + 'duration': 6622, }, 'add_ie': ['RUTV'], }, @@ -44,6 +53,12 @@ class SmotrimIE(InfoExtractor): 'ext': 'mp4', 'title': 'Россия К', 'thumbnail': 'https://cdn-st2.smotrim.ru/vh/pictures/xw/441/085/7.png', + 'subtitles': { + 'ru': [ + {'url': 'https://vgtrkregion-reg.cdnvideo.ru/vgtrk/0/kultura-hd/track_2001_319a299f/chunklist.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native',}, + ] + }, + 'is_live': True, }, 'add_ie': ['RUTV'], }, @@ -54,6 +69,12 @@ class SmotrimIE(InfoExtractor): 'ext': 'mp4', 'title': 'Россия К', 'thumbnail': 'https://cdn-st2.smotrim.ru/vh/pictures/xw/441/085/7.png', + 'subtitles': { + 'ru': [ + {'url': 'https://vgtrkregion-reg.cdnvideo.ru/vgtrk/0/kultura-hd/track_2001_319a299f/chunklist.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native',}, + ] + }, + 'is_live': True, }, 'add_ie': ['RUTV'], }, From c01b70eef847855d85103ff168f92b7558e0ad99 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Sat, 2 Aug 2025 01:12:12 +0300 Subject: [PATCH 15/22] Fix 'COM819 Trailing comma prohibited' and 'COM812 Trailing comma missing' --- yt_dlp/extractor/smotrim.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index 685896c3c..ae1e304c7 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -55,8 +55,8 @@ class SmotrimIE(InfoExtractor): 'thumbnail': 'https://cdn-st2.smotrim.ru/vh/pictures/xw/441/085/7.png', 'subtitles': { 'ru': [ - {'url': 'https://vgtrkregion-reg.cdnvideo.ru/vgtrk/0/kultura-hd/track_2001_319a299f/chunklist.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native',}, - ] + {'url': 'https://vgtrkregion-reg.cdnvideo.ru/vgtrk/0/kultura-hd/track_2001_319a299f/chunklist.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native'}, + ], }, 'is_live': True, }, @@ -71,8 +71,8 @@ class SmotrimIE(InfoExtractor): 'thumbnail': 'https://cdn-st2.smotrim.ru/vh/pictures/xw/441/085/7.png', 'subtitles': { 'ru': [ - {'url': 'https://vgtrkregion-reg.cdnvideo.ru/vgtrk/0/kultura-hd/track_2001_319a299f/chunklist.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native',}, - ] + {'url': 'https://vgtrkregion-reg.cdnvideo.ru/vgtrk/0/kultura-hd/track_2001_319a299f/chunklist.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native'}, + ], }, 'is_live': True, }, From 2a384a14b0e51532130c0716096e7fe0bfc7f416 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Sun, 3 Aug 2025 00:40:06 +0300 Subject: [PATCH 16/22] Added int_or_none() from utils for 'duration' --- yt_dlp/extractor/smotrim.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index ae1e304c7..e03c7eaab 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -1,7 +1,7 @@ import re from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ExtractorError, int_or_none class SmotrimIE(InfoExtractor): @@ -128,15 +128,13 @@ def _real_extract(self, url): formats, subtitles = self._extract_m3u8_formats_and_subtitles( media_info['sources']['m3u8']['auto'], video_id, 'mp4', m3u8_id='hls', ) - res = { + + return { 'id': video_id, - 'title': media_info['title'], + 'title': media_info.get('title'), 'thumbnail': media_info['pictures']['16:9'], 'formats': formats, 'subtitles': subtitles, 'is_live': json_info['data']['playlist']['type'] == 'live', + 'duration': int_or_none(media_info.get('duration')) } - if not res['is_live'] and 'duration' in media_info: - res['duration'] = media_info['duration'] - - return res From ee1be9ca52a353f98e235d05da560733994eece0 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Sun, 3 Aug 2025 00:41:35 +0300 Subject: [PATCH 17/22] Fix trailing comma --- yt_dlp/extractor/smotrim.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index e03c7eaab..12a740e83 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -136,5 +136,5 @@ def _real_extract(self, url): 'formats': formats, 'subtitles': subtitles, 'is_live': json_info['data']['playlist']['type'] == 'live', - 'duration': int_or_none(media_info.get('duration')) + 'duration': int_or_none(media_info.get('duration')), } From bfdfb57fd67d8382ea6e7c6b0cbd671e3ffd1d03 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Sun, 3 Aug 2025 01:10:42 +0300 Subject: [PATCH 18/22] Added key 'duration' = None to info_dict in TESTS of live streams. --- yt_dlp/extractor/smotrim.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index 12a740e83..a0aee5bdd 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -59,6 +59,7 @@ class SmotrimIE(InfoExtractor): ], }, 'is_live': True, + 'duration': None, }, 'add_ie': ['RUTV'], }, @@ -75,6 +76,7 @@ class SmotrimIE(InfoExtractor): ], }, 'is_live': True, + 'duration': None, }, 'add_ie': ['RUTV'], }, From b73250f275ca5bcf96129726c71ef2d02c23c359 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Tue, 5 Aug 2025 00:34:46 +0300 Subject: [PATCH 19/22] Prevent '"NoneType" object has no attribute "get"' exception and and missing dict keys. --- yt_dlp/extractor/smotrim.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index a0aee5bdd..146f9073c 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -126,17 +126,24 @@ def _real_extract(self, url): raise ExtractorError(str(e), expected=True) if json_info.get('status') != 200: raise ExtractorError('Json download error. Status code: %s' % str(json_info.get('status')), expected=True) - media_info = json_info['data']['playlist']['medialist'][0] - formats, subtitles = self._extract_m3u8_formats_and_subtitles( - media_info['sources']['m3u8']['auto'], video_id, 'mp4', m3u8_id='hls', - ) + try: + media_info = json_info.get('data').get('playlist').get('medialist')[0] + except (KeyError, AttributeError, TypeError) as e: + raise ExtractorError("media_info get error: %s " % str(e), expected=True) + try: + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + media_info.get('sources').get('m3u8').get('auto'), video_id, 'mp4', m3u8_id='hls', + ) + res = { + 'id': video_id, + 'title': media_info.get('title'), + 'thumbnail': media_info.get('pictures').get('16:9'), + 'formats': formats, + 'subtitles': subtitles, + 'is_live': json_info.get('data').get('playlist').get('type') == 'live', + 'duration': int_or_none(media_info.get('duration')), + } + except (KeyError, AttributeError, TypeError) as e: + raise ExtractorError("Result error: %s" % str(e), expected=True) - return { - 'id': video_id, - 'title': media_info.get('title'), - 'thumbnail': media_info['pictures']['16:9'], - 'formats': formats, - 'subtitles': subtitles, - 'is_live': json_info['data']['playlist']['type'] == 'live', - 'duration': int_or_none(media_info.get('duration')), - } + return res From 4f21363359e10b9ab60d1416ccfd6b2262fcdacd Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Tue, 5 Aug 2025 00:36:36 +0300 Subject: [PATCH 20/22] Fix double quotes --- yt_dlp/extractor/smotrim.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index 146f9073c..b396f9a6f 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -129,7 +129,7 @@ def _real_extract(self, url): try: media_info = json_info.get('data').get('playlist').get('medialist')[0] except (KeyError, AttributeError, TypeError) as e: - raise ExtractorError("media_info get error: %s " % str(e), expected=True) + raise ExtractorError('media_info get error: %s' % str(e), expected=True) try: formats, subtitles = self._extract_m3u8_formats_and_subtitles( media_info.get('sources').get('m3u8').get('auto'), video_id, 'mp4', m3u8_id='hls', @@ -144,6 +144,6 @@ def _real_extract(self, url): 'duration': int_or_none(media_info.get('duration')), } except (KeyError, AttributeError, TypeError) as e: - raise ExtractorError("Result error: %s" % str(e), expected=True) + raise ExtractorError('Result error: %s' % str(e), expected=True) return res From 89eac10fa841029e9da654f95c7be0b836360020 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Wed, 6 Aug 2025 22:25:54 +0300 Subject: [PATCH 21/22] Use traverse_obj for get json info data --- yt_dlp/extractor/smotrim.py | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index b396f9a6f..77796eb46 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -2,6 +2,7 @@ from .common import InfoExtractor from ..utils import ExtractorError, int_or_none +from ..utils.traversal import traverse_obj class SmotrimIE(InfoExtractor): @@ -126,24 +127,19 @@ def _real_extract(self, url): raise ExtractorError(str(e), expected=True) if json_info.get('status') != 200: raise ExtractorError('Json download error. Status code: %s' % str(json_info.get('status')), expected=True) - try: - media_info = json_info.get('data').get('playlist').get('medialist')[0] - except (KeyError, AttributeError, TypeError) as e: - raise ExtractorError('media_info get error: %s' % str(e), expected=True) - try: - formats, subtitles = self._extract_m3u8_formats_and_subtitles( - media_info.get('sources').get('m3u8').get('auto'), video_id, 'mp4', m3u8_id='hls', - ) - res = { - 'id': video_id, - 'title': media_info.get('title'), - 'thumbnail': media_info.get('pictures').get('16:9'), - 'formats': formats, - 'subtitles': subtitles, - 'is_live': json_info.get('data').get('playlist').get('type') == 'live', - 'duration': int_or_none(media_info.get('duration')), - } - except (KeyError, AttributeError, TypeError) as e: - raise ExtractorError('Result error: %s' % str(e), expected=True) + media_info = traverse_obj(json_info, ('data', 'playlist', 'medialist', 0)) + if not media_info: + raise ExtractorError('Unable to get media_info', expected=True) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + traverse_obj(media_info, ('sources', 'm3u8', 'auto')), video_id, 'mp4', m3u8_id='hls', + ) - return res + return { + 'id': video_id, + 'title': traverse_obj(media_info, ('title')), + 'thumbnail': traverse_obj(media_info, ('pictures', '16:9')), + 'formats': formats, + 'subtitles': subtitles, + 'is_live': traverse_obj(json_info, ('data', 'playlist', 'type')) == 'live', + 'duration': int_or_none(traverse_obj(media_info, ('duration'))), + } From de283383a8d2471c5686026ada85707a5c5f9ce0 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Thu, 7 Aug 2025 00:09:33 +0300 Subject: [PATCH 22/22] Using regular expressions as values for thumbnails in _TESTS --- yt_dlp/extractor/smotrim.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index 77796eb46..2d31170f9 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -14,7 +14,7 @@ class SmotrimIE(InfoExtractor): 'id': '3003613', 'ext': 'mp4', 'title': 'Погода. на 2 августа 2025 года', - 'thumbnail': 'https://cdn-st2.smotrim.ru/vh/pictures/xw/635/545/7.jpg', + 'thumbnail': r're:https?://cdn-st\d\.smotrim\.ru/vh/pictures/[a-z]{2}/\d+/\d+/\d+\.(?:jpg|png)', 'subtitles': {}, 'is_live': False, 'duration': 105, @@ -27,7 +27,7 @@ class SmotrimIE(InfoExtractor): 'id': '3000761', 'ext': 'mp4', 'title': 'Новости культуры. Ольга Любимова провела рабочую встречу в Еврейской автономной области', - 'thumbnail': 'https://cdn-st2.smotrim.ru/vh/pictures/xw/638/386/6.jpg', + 'thumbnail': r're:https?://cdn-st\d\.smotrim\.ru/vh/pictures/[a-z]{2}/\d+/\d+/\d+\.(?:jpg|png)', 'subtitles': {}, 'is_live': False, 'duration': 69, @@ -40,7 +40,7 @@ class SmotrimIE(InfoExtractor): 'id': '2885093', 'ext': 'mp4', 'title': 'Большие и маленькие. 6-й сезон 8-й выпуск', - 'thumbnail': 'https://cdn-st2.smotrim.ru/vh/pictures/xw/527/187/3.jpg', + 'thumbnail': r're:https?://cdn-st\d\.smotrim\.ru/vh/pictures/[a-z]{2}/\d+/\d+/\d+\.(?:jpg|png)', 'subtitles': {}, 'is_live': False, 'duration': 6622, @@ -53,7 +53,7 @@ class SmotrimIE(InfoExtractor): 'id': '381308c7-a066-4c4f-9656-83e2e792a7b4', 'ext': 'mp4', 'title': 'Россия К', - 'thumbnail': 'https://cdn-st2.smotrim.ru/vh/pictures/xw/441/085/7.png', + 'thumbnail': r're:https?://cdn-st\d\.smotrim\.ru/vh/pictures/[a-z]{2}/\d+/\d+/\d+\.(?:jpg|png)', 'subtitles': { 'ru': [ {'url': 'https://vgtrkregion-reg.cdnvideo.ru/vgtrk/0/kultura-hd/track_2001_319a299f/chunklist.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native'}, @@ -70,7 +70,7 @@ class SmotrimIE(InfoExtractor): 'id': '4', 'ext': 'mp4', 'title': 'Россия К', - 'thumbnail': 'https://cdn-st2.smotrim.ru/vh/pictures/xw/441/085/7.png', + 'thumbnail': r're:https?://cdn-st\d\.smotrim\.ru/vh/pictures/[a-z]{2}/\d+/\d+/\d+\.(?:jpg|png)', 'subtitles': { 'ru': [ {'url': 'https://vgtrkregion-reg.cdnvideo.ru/vgtrk/0/kultura-hd/track_2001_319a299f/chunklist.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native'},