1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-28 01:18:30 +00:00

[ie/tvnoe] Rework Extractor

This commit is contained in:
doe1080 2025-06-02 02:17:51 +09:00
parent 3fe72e9eea
commit cc831cbf85

View File

@ -1,46 +1,80 @@
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
get_element_by_class, extract_attributes,
js_to_json, js_to_json,
mimetype2ext,
unified_strdate,
urljoin,
) )
from ..utils.traversal import find_element, traverse_obj
class TVNoeIE(InfoExtractor): class TVNoeIE(InfoExtractor):
_WORKING = False IE_NAME = 'tvnoe'
_VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)' IE_DESC = 'Televize Noe'
_TEST = {
'url': 'http://www.tvnoe.cz/video/10362', _VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/porad/(?P<id>[\w-]+)'
'md5': 'aee983f279aab96ec45ab6e2abb3c2ca', _TESTS = [{
'url': 'https://www.tvnoe.cz/porad/43216-outdoor-films-s-mudr-tomasem-kempnym-pomahat-potrebnym-nejen-u-nas',
'info_dict': { 'info_dict': {
'id': '10362', 'id': '43216-outdoor-films-s-mudr-tomasem-kempnym-pomahat-potrebnym-nejen-u-nas',
'ext': 'mp4', 'ext': 'mp4',
'series': 'Noční univerzita', 'title': 'Pomáhat potřebným nejen u nás',
'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací', 'description': 'md5:78b538ee32f7e881ec23b9c278a0ff3a',
'description': 'md5:f337bae384e1a531a52c55ebc50fff41', 'release_date': '20250531',
'series': 'Outdoor Films s MUDr. Tomášem Kempným',
'thumbnail': r're:https?://www\.tvnoe\.cz/.+\.jpg',
}, },
} }, {
'url': 'https://www.tvnoe.cz/porad/43205-zamysleni-tomase-halika-7-nedele-velikonocni',
'info_dict': {
'id': '43205-zamysleni-tomase-halika-7-nedele-velikonocni',
'ext': 'mp4',
'title': '7. neděle velikonoční',
'description': 'md5:6bb9908efc59abe60e1c8c7c0e9bb6cd',
'release_date': '20250531',
'series': 'Zamyšlení Tomáše Halíka',
'thumbnail': r're:https?://www\.tvnoe\.cz/.+\.jpg',
},
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
player = self._search_json(
r'var\s+INIT_PLAYER\s*=\s*', webpage, 'init player',
video_id, transform_source=js_to_json, fatal=True)
iframe_url = self._search_regex( formats = []
r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe URL') for source in traverse_obj(player, ('tracks', ..., lambda _, v: v['src'])):
src_url = source['src']
ext = mimetype2ext(source.get('type'))
if ext == 'm3u8':
fmts = self._extract_m3u8_formats(
src_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
elif ext == 'mpd':
fmts = self._extract_mpd_formats(
src_url, video_id, mpd_id='dash', fatal=False)
else:
self.report_warning(f'Unsupported stream type: {ext}')
continue
formats.extend(fmts)
ifs_page = self._download_webpage(iframe_url, video_id) return {
jwplayer_data = self._find_jwplayer_data(
ifs_page, video_id, transform_source=js_to_json)
info_dict = self._parse_jwplayer_data(
jwplayer_data, video_id, require_title=False, base_url=iframe_url)
info_dict.update({
'id': video_id, 'id': video_id,
'title': clean_html(get_element_by_class( 'description': clean_html(self._search_regex(
'field-name-field-podnazev', webpage)), r'<p\s+class="">(.+)</p>', webpage, 'description', default='')),
'description': clean_html(get_element_by_class( 'formats': formats,
'field-name-body', webpage)), **traverse_obj(webpage, {
'series': clean_html(get_element_by_class('title', webpage)), 'title': ({find_element(tag='h2')}, {clean_html}),
}) 'release_date': (
{re.compile(r'\d{1,2}\.\d{1,2}\.\d{4}').findall}, filter, ..., {unified_strdate}, any),
return info_dict 'series': ({find_element(tag='h1')}, {clean_html}),
'thumbnail': (
{find_element(id='player-live', html=True)}, {extract_attributes},
'poster', {urljoin('https://www.tvnoe.cz/')}),
}),
}