1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-27 17:08:32 +00:00

[ie/tvnoe] Rework Extractor

This commit is contained in:
doe1080 2025-06-02 02:17:51 +09:00
parent 3fe72e9eea
commit cc831cbf85

View File

@ -1,46 +1,80 @@
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
get_element_by_class,
extract_attributes,
js_to_json,
mimetype2ext,
unified_strdate,
urljoin,
)
from ..utils.traversal import find_element, traverse_obj
class TVNoeIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.tvnoe.cz/video/10362',
'md5': 'aee983f279aab96ec45ab6e2abb3c2ca',
IE_NAME = 'tvnoe'
IE_DESC = 'Televize Noe'
_VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/porad/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://www.tvnoe.cz/porad/43216-outdoor-films-s-mudr-tomasem-kempnym-pomahat-potrebnym-nejen-u-nas',
'info_dict': {
'id': '10362',
'id': '43216-outdoor-films-s-mudr-tomasem-kempnym-pomahat-potrebnym-nejen-u-nas',
'ext': 'mp4',
'series': 'Noční univerzita',
'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací',
'description': 'md5:f337bae384e1a531a52c55ebc50fff41',
'title': 'Pomáhat potřebným nejen u nás',
'description': 'md5:78b538ee32f7e881ec23b9c278a0ff3a',
'release_date': '20250531',
'series': 'Outdoor Films s MUDr. Tomášem Kempným',
'thumbnail': r're:https?://www\.tvnoe\.cz/.+\.jpg',
},
}
}, {
'url': 'https://www.tvnoe.cz/porad/43205-zamysleni-tomase-halika-7-nedele-velikonocni',
'info_dict': {
'id': '43205-zamysleni-tomase-halika-7-nedele-velikonocni',
'ext': 'mp4',
'title': '7. neděle velikonoční',
'description': 'md5:6bb9908efc59abe60e1c8c7c0e9bb6cd',
'release_date': '20250531',
'series': 'Zamyšlení Tomáše Halíka',
'thumbnail': r're:https?://www\.tvnoe\.cz/.+\.jpg',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player = self._search_json(
r'var\s+INIT_PLAYER\s*=\s*', webpage, 'init player',
video_id, transform_source=js_to_json, fatal=True)
iframe_url = self._search_regex(
r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe URL')
formats = []
for source in traverse_obj(player, ('tracks', ..., lambda _, v: v['src'])):
src_url = source['src']
ext = mimetype2ext(source.get('type'))
if ext == 'm3u8':
fmts = self._extract_m3u8_formats(
src_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
elif ext == 'mpd':
fmts = self._extract_mpd_formats(
src_url, video_id, mpd_id='dash', fatal=False)
else:
self.report_warning(f'Unsupported stream type: {ext}')
continue
formats.extend(fmts)
ifs_page = self._download_webpage(iframe_url, video_id)
jwplayer_data = self._find_jwplayer_data(
ifs_page, video_id, transform_source=js_to_json)
info_dict = self._parse_jwplayer_data(
jwplayer_data, video_id, require_title=False, base_url=iframe_url)
info_dict.update({
return {
'id': video_id,
'title': clean_html(get_element_by_class(
'field-name-field-podnazev', webpage)),
'description': clean_html(get_element_by_class(
'field-name-body', webpage)),
'series': clean_html(get_element_by_class('title', webpage)),
})
return info_dict
'description': clean_html(self._search_regex(
r'<p\s+class="">(.+)</p>', webpage, 'description', default='')),
'formats': formats,
**traverse_obj(webpage, {
'title': ({find_element(tag='h2')}, {clean_html}),
'release_date': (
{re.compile(r'\d{1,2}\.\d{1,2}\.\d{4}').findall}, filter, ..., {unified_strdate}, any),
'series': ({find_element(tag='h1')}, {clean_html}),
'thumbnail': (
{find_element(id='player-live', html=True)}, {extract_attributes},
'poster', {urljoin('https://www.tvnoe.cz/')}),
}),
}