From 7b4c96e0898db048259ef5fdf12ed14e3605dce3 Mon Sep 17 00:00:00 2001 From: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> Date: Thu, 10 Jul 2025 01:16:33 +0300 Subject: [PATCH] [ie/mir24.tv] Add extractor (#13651) Closes #13365 Authored by: swayll --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/mir24tv.py | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 yt_dlp/extractor/mir24tv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index ada12b3a8a..84da570b0a 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1147,6 +1147,7 @@ MindsIE, ) from .minoto import MinotoIE +from .mir24tv import Mir24TvIE from .mirrativ import ( MirrativIE, MirrativUserIE, diff --git a/yt_dlp/extractor/mir24tv.py b/yt_dlp/extractor/mir24tv.py new file mode 100644 index 0000000000..5832901bf1 --- /dev/null +++ b/yt_dlp/extractor/mir24tv.py @@ -0,0 +1,37 @@ +from .common import InfoExtractor +from ..utils import parse_qs, url_or_none +from ..utils.traversal import require, traverse_obj + + +class Mir24TvIE(InfoExtractor): + IE_NAME = 'mir24.tv' + _VALID_URL = r'https?://(?:www\.)?mir24\.tv/news/(?P[0-9]+)/[^/?#]+' + _TESTS = [{ + 'url': 'https://mir24.tv/news/16635210/dni-kultury-rossii-otkrylis-v-uzbekistane.-na-prazdnichnom-koncerte-vystupili-zvezdy-rossijskoj-estrada', + 'info_dict': { + 'id': '16635210', + 'title': 'Дни культуры России открылись в Узбекистане. На праздничном концерте выступили звезды российской эстрады', + 'ext': 'mp4', + 'thumbnail': r're:https://images\.mir24\.tv/.+\.jpg', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id, impersonate=True) + + iframe_url = self._search_regex( + r']+\bsrc=["\'](https?://mir24\.tv/players/[^"\']+)', + webpage, 'iframe URL') + + m3u8_url = traverse_obj(iframe_url, ( + {parse_qs}, 'source', -1, {self._proto_relative_url}, {url_or_none}, {require('m3u8 URL')})) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls') + + return { + 'id': video_id, + 'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), + 'formats': formats, + 'subtitles': subtitles, + }