mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-07-10 07:18:33 +00:00
[ie/Tagesschau] Update extractor for current website
This commit is contained in:
parent
e67d786c7c
commit
613852e4a8
@ -1,19 +1,19 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
|
get_elements_html_by_attribute,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TagesschauIE(InfoExtractor):
|
class TagesschauIE(InfoExtractor):
|
||||||
_WORKING = False
|
_VALID_URL = [
|
||||||
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?(?P<id>[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html'
|
r'https?://(?:www\.)?tagesschau\.de(?:/[^/#?]+)*/(?P<id>[^/#?\.]+)',
|
||||||
|
r'https?://(?:www\.)?(?P<id>tagesschau\.de)/?',
|
||||||
|
]
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
|
'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
|
||||||
@ -106,44 +106,40 @@ class TagesschauIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = self._match_valid_url(url)
|
webpage_id = self._match_id(url)
|
||||||
video_id = mobj.group('id') or mobj.group('path')
|
webpage = self._download_webpage(url, webpage_id)
|
||||||
display_id = video_id.lstrip('-')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<span[^>]*class="headline"[^>]*>(.+?)</span>',
|
r'<span[^>]*class="headline"[^>]*>(.+?)</span>',
|
||||||
webpage, 'title', default=None) or self._og_search_title(webpage, fatal=False)
|
webpage, 'title', default=None) or self._og_search_title(webpage, fatal=False)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
videos = re.findall(r'<div[^>]+>', webpage)
|
media_players = get_elements_html_by_attribute(
|
||||||
num = 0
|
'data-v-type', 'MediaPlayer(?:InlinePlay)?', webpage, escape_value=False)
|
||||||
for video in videos:
|
|
||||||
video = extract_attributes(video).get('data-config')
|
for player in media_players:
|
||||||
if not video:
|
data = self._parse_json(extract_attributes(player)['data-v'], webpage_id)
|
||||||
continue
|
media_id = data['mc']['pluginData']['trackingSAND@all']['av_content_id']
|
||||||
video = self._parse_json(video, video_id, transform_source=js_to_json, fatal=False)
|
video_formats = try_get(data, lambda x: x['mc']['streams'][0]['media'])
|
||||||
video_formats = try_get(video, lambda x: x['mc']['_mediaArray'][0]['_mediaStreamArray'])
|
|
||||||
if not video_formats:
|
if not video_formats:
|
||||||
continue
|
continue
|
||||||
num += 1
|
|
||||||
for video_format in video_formats:
|
|
||||||
media_url = video_format.get('_stream') or ''
|
|
||||||
formats = []
|
formats = []
|
||||||
|
for video_format in video_formats:
|
||||||
|
media_url = video_format.get('url') or ''
|
||||||
if media_url.endswith('master.m3u8'):
|
if media_url.endswith('master.m3u8'):
|
||||||
formats = self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls')
|
formats += self._extract_m3u8_formats(media_url, media_id, 'mp4', m3u8_id='hls')
|
||||||
elif media_url.endswith('.mp3'):
|
elif media_url.endswith('.mp3'):
|
||||||
formats = [{
|
formats.append({
|
||||||
'url': media_url,
|
'url': media_url,
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
}]
|
'format_note': video_format.get('forcedLabel'),
|
||||||
|
})
|
||||||
if not formats:
|
if not formats:
|
||||||
continue
|
continue
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': f'{display_id}-{num}',
|
'id': media_id,
|
||||||
'title': try_get(video, lambda x: x['mc']['_title']),
|
'title': try_get(data, lambda x: x['mc']['meta']['title']),
|
||||||
'duration': int_or_none(try_get(video, lambda x: x['mc']['_duration'])),
|
'duration': int_or_none(try_get(data, lambda x: x['mc']['meta']['durationSeconds'])),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -151,10 +147,10 @@ def _real_extract(self, url):
|
|||||||
raise UnsupportedError(url)
|
raise UnsupportedError(url)
|
||||||
|
|
||||||
if len(entries) > 1:
|
if len(entries) > 1:
|
||||||
return self.playlist_result(entries, display_id, title)
|
return self.playlist_result(entries, webpage_id, title)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': display_id,
|
'id': entries[0]['id'],
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'formats': entries[0]['formats'],
|
'formats': entries[0]['formats'],
|
||||||
|
Loading…
Reference in New Issue
Block a user