From cc831cbf8507a6262b04896bb01866484903c51c Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Mon, 2 Jun 2025 02:17:51 +0900 Subject: [PATCH] [ie/tvnoe] Rework Extractor --- yt_dlp/extractor/tvnoe.py | 90 +++++++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 28 deletions(-) diff --git a/yt_dlp/extractor/tvnoe.py b/yt_dlp/extractor/tvnoe.py index 24a82623f..94323d732 100644 --- a/yt_dlp/extractor/tvnoe.py +++ b/yt_dlp/extractor/tvnoe.py @@ -1,46 +1,80 @@ +import re + from .common import InfoExtractor from ..utils import ( clean_html, - get_element_by_class, + extract_attributes, js_to_json, + mimetype2ext, + unified_strdate, + urljoin, ) +from ..utils.traversal import find_element, traverse_obj class TVNoeIE(InfoExtractor): - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P[0-9]+)' - _TEST = { - 'url': 'http://www.tvnoe.cz/video/10362', - 'md5': 'aee983f279aab96ec45ab6e2abb3c2ca', + IE_NAME = 'tvnoe' + IE_DESC = 'Televize Noe' + + _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/porad/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://www.tvnoe.cz/porad/43216-outdoor-films-s-mudr-tomasem-kempnym-pomahat-potrebnym-nejen-u-nas', 'info_dict': { - 'id': '10362', + 'id': '43216-outdoor-films-s-mudr-tomasem-kempnym-pomahat-potrebnym-nejen-u-nas', 'ext': 'mp4', - 'series': 'Noční univerzita', - 'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací', - 'description': 'md5:f337bae384e1a531a52c55ebc50fff41', + 'title': 'Pomáhat potřebným nejen u nás', + 'description': 'md5:78b538ee32f7e881ec23b9c278a0ff3a', + 'release_date': '20250531', + 'series': 'Outdoor Films s MUDr. Tomášem Kempným', + 'thumbnail': r're:https?://www\.tvnoe\.cz/.+\.jpg', }, - } + }, { + 'url': 'https://www.tvnoe.cz/porad/43205-zamysleni-tomase-halika-7-nedele-velikonocni', + 'info_dict': { + 'id': '43205-zamysleni-tomase-halika-7-nedele-velikonocni', + 'ext': 'mp4', + 'title': '7. neděle velikonoční', + 'description': 'md5:6bb9908efc59abe60e1c8c7c0e9bb6cd', + 'release_date': '20250531', + 'series': 'Zamyšlení Tomáše Halíka', + 'thumbnail': r're:https?://www\.tvnoe\.cz/.+\.jpg', + }, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + player = self._search_json( + r'var\s+INIT_PLAYER\s*=\s*', webpage, 'init player', + video_id, transform_source=js_to_json, fatal=True) - iframe_url = self._search_regex( - r']+src="([^"]+)"', webpage, 'iframe URL') + formats = [] + for source in traverse_obj(player, ('tracks', ..., lambda _, v: v['src'])): + src_url = source['src'] + ext = mimetype2ext(source.get('type')) + if ext == 'm3u8': + fmts = self._extract_m3u8_formats( + src_url, video_id, 'mp4', m3u8_id='hls', fatal=False) + elif ext == 'mpd': + fmts = self._extract_mpd_formats( + src_url, video_id, mpd_id='dash', fatal=False) + else: + self.report_warning(f'Unsupported stream type: {ext}') + continue + formats.extend(fmts) - ifs_page = self._download_webpage(iframe_url, video_id) - jwplayer_data = self._find_jwplayer_data( - ifs_page, video_id, transform_source=js_to_json) - info_dict = self._parse_jwplayer_data( - jwplayer_data, video_id, require_title=False, base_url=iframe_url) - - info_dict.update({ + return { 'id': video_id, - 'title': clean_html(get_element_by_class( - 'field-name-field-podnazev', webpage)), - 'description': clean_html(get_element_by_class( - 'field-name-body', webpage)), - 'series': clean_html(get_element_by_class('title', webpage)), - }) - - return info_dict + 'description': clean_html(self._search_regex( + r'(.+)

', webpage, 'description', default='')), + 'formats': formats, + **traverse_obj(webpage, { + 'title': ({find_element(tag='h2')}, {clean_html}), + 'release_date': ( + {re.compile(r'\d{1,2}\.\d{1,2}\.\d{4}').findall}, filter, ..., {unified_strdate}, any), + 'series': ({find_element(tag='h1')}, {clean_html}), + 'thumbnail': ( + {find_element(id='player-live', html=True)}, {extract_attributes}, + 'poster', {urljoin('https://www.tvnoe.cz/')}), + }), + }