mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-27 17:08:32 +00:00
[ie/tvnoe] Rework Extractor
This commit is contained in:
parent
3fe72e9eea
commit
cc831cbf85
@ -1,46 +1,80 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
get_element_by_class,
|
||||
extract_attributes,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
unified_strdate,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class TVNoeIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/video/(?P<id>[0-9]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.tvnoe.cz/video/10362',
|
||||
'md5': 'aee983f279aab96ec45ab6e2abb3c2ca',
|
||||
IE_NAME = 'tvnoe'
|
||||
IE_DESC = 'Televize Noe'
|
||||
|
||||
_VALID_URL = r'https?://(?:www\.)?tvnoe\.cz/porad/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tvnoe.cz/porad/43216-outdoor-films-s-mudr-tomasem-kempnym-pomahat-potrebnym-nejen-u-nas',
|
||||
'info_dict': {
|
||||
'id': '10362',
|
||||
'id': '43216-outdoor-films-s-mudr-tomasem-kempnym-pomahat-potrebnym-nejen-u-nas',
|
||||
'ext': 'mp4',
|
||||
'series': 'Noční univerzita',
|
||||
'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací',
|
||||
'description': 'md5:f337bae384e1a531a52c55ebc50fff41',
|
||||
'title': 'Pomáhat potřebným nejen u nás',
|
||||
'description': 'md5:78b538ee32f7e881ec23b9c278a0ff3a',
|
||||
'release_date': '20250531',
|
||||
'series': 'Outdoor Films s MUDr. Tomášem Kempným',
|
||||
'thumbnail': r're:https?://www\.tvnoe\.cz/.+\.jpg',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.tvnoe.cz/porad/43205-zamysleni-tomase-halika-7-nedele-velikonocni',
|
||||
'info_dict': {
|
||||
'id': '43205-zamysleni-tomase-halika-7-nedele-velikonocni',
|
||||
'ext': 'mp4',
|
||||
'title': '7. neděle velikonoční',
|
||||
'description': 'md5:6bb9908efc59abe60e1c8c7c0e9bb6cd',
|
||||
'release_date': '20250531',
|
||||
'series': 'Zamyšlení Tomáše Halíka',
|
||||
'thumbnail': r're:https?://www\.tvnoe\.cz/.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player = self._search_json(
|
||||
r'var\s+INIT_PLAYER\s*=\s*', webpage, 'init player',
|
||||
video_id, transform_source=js_to_json, fatal=True)
|
||||
|
||||
iframe_url = self._search_regex(
|
||||
r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe URL')
|
||||
formats = []
|
||||
for source in traverse_obj(player, ('tracks', ..., lambda _, v: v['src'])):
|
||||
src_url = source['src']
|
||||
ext = mimetype2ext(source.get('type'))
|
||||
if ext == 'm3u8':
|
||||
fmts = self._extract_m3u8_formats(
|
||||
src_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
fmts = self._extract_mpd_formats(
|
||||
src_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
self.report_warning(f'Unsupported stream type: {ext}')
|
||||
continue
|
||||
formats.extend(fmts)
|
||||
|
||||
ifs_page = self._download_webpage(iframe_url, video_id)
|
||||
jwplayer_data = self._find_jwplayer_data(
|
||||
ifs_page, video_id, transform_source=js_to_json)
|
||||
info_dict = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False, base_url=iframe_url)
|
||||
|
||||
info_dict.update({
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': clean_html(get_element_by_class(
|
||||
'field-name-field-podnazev', webpage)),
|
||||
'description': clean_html(get_element_by_class(
|
||||
'field-name-body', webpage)),
|
||||
'series': clean_html(get_element_by_class('title', webpage)),
|
||||
})
|
||||
|
||||
return info_dict
|
||||
'description': clean_html(self._search_regex(
|
||||
r'<p\s+class="">(.+)</p>', webpage, 'description', default='')),
|
||||
'formats': formats,
|
||||
**traverse_obj(webpage, {
|
||||
'title': ({find_element(tag='h2')}, {clean_html}),
|
||||
'release_date': (
|
||||
{re.compile(r'\d{1,2}\.\d{1,2}\.\d{4}').findall}, filter, ..., {unified_strdate}, any),
|
||||
'series': ({find_element(tag='h1')}, {clean_html}),
|
||||
'thumbnail': (
|
||||
{find_element(id='player-live', html=True)}, {extract_attributes},
|
||||
'poster', {urljoin('https://www.tvnoe.cz/')}),
|
||||
}),
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user