mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-28 09:28:33 +00:00
Merge 5d0fa22359
into 73bf102116
This commit is contained in:
commit
0aa3e23a1c
@ -1,80 +1,155 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
|
get_elements_html_by_attribute,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class TagesschauIE(InfoExtractor):
|
class TagesschauIE(InfoExtractor):
|
||||||
_WORKING = False
|
_VALID_URL = [
|
||||||
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?(?P<id>[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html'
|
r'https?://(?:www\.)?tagesschau\.de(?:/[^/#?]+)*/(?P<id>[^/#?\.]+)',
|
||||||
|
r'https?://(?:www\.)?(?P<id>tagesschau\.de)/?',
|
||||||
|
]
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# Single video without recommendations
|
||||||
'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
|
'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
|
||||||
'md5': 'ccb9359bf8c4795836e43759f3408a93',
|
'md5': 'ccb9359bf8c4795836e43759f3408a93',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'video-102143-1',
|
'id': 'video-102143',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
|
'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
|
||||||
'duration': 138,
|
'duration': 138,
|
||||||
|
'thumbnail': 'https://images.tagesschau.de/image/eb0b0d74-03ac-45ec-9300-0851fd6823d3/AAABj-POS-g/AAABkZLhkrw/16x9-1280/sendungslogo-tagesschau-100.webp',
|
||||||
|
'timestamp': 1437250200,
|
||||||
|
'upload_date': '20150718',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
# Single video embedded
|
||||||
|
'url': 'https://www.tagesschau.de/multimedia/sendung/tagesschau_20_uhr/video-102143~player.html',
|
||||||
|
'md5': 'ccb9359bf8c4795836e43759f3408a93',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'video-102143',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
|
||||||
|
'duration': 138,
|
||||||
|
'thumbnail': 'https://images.tagesschau.de/image/eb0b0d74-03ac-45ec-9300-0851fd6823d3/AAABj-POS-g/AAABkZLhkrw/16x9-1280/sendungslogo-tagesschau-100.webp',
|
||||||
|
'timestamp': 1437250200,
|
||||||
|
'upload_date': '20150718',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Single video with recommendations, `--no-playlist`
|
||||||
'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
|
'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
|
||||||
'md5': '5c15e8f3da049e48829ec9786d835536',
|
'md5': '5c15e8f3da049e48829ec9786d835536',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ts-5727-1',
|
'id': 'video-45741',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Ganze Sendung',
|
'title': 'tagesschau 20:00 Uhr',
|
||||||
'duration': 932,
|
'duration': 932,
|
||||||
|
'thumbnail': 'https://images.tagesschau.de/image/eb0b0d74-03ac-45ec-9300-0851fd6823d3/AAABj-POS-g/AAABkZLhkrw/16x9-1280/sendungslogo-tagesschau-100.webp',
|
||||||
|
'timestamp': 1417723200,
|
||||||
|
'upload_date': '20141204',
|
||||||
},
|
},
|
||||||
|
'params': {'noplaylist': True},
|
||||||
}, {
|
}, {
|
||||||
# exclusive audio
|
# Single audio embedded
|
||||||
'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html',
|
'url': 'https://www.tagesschau.de/multimedia/audio/audio-157831~player.html',
|
||||||
'md5': '4bff8f23504df56a0d86ed312d654182',
|
'md5': '4bff8f23504df56a0d86ed312d654182',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'audio-29417-1',
|
'id': 'audio-157831',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'EU-Gipfel: Im Verbrennerstreit hat Deutschland maximalen Schaden angerichtet',
|
'title': 'EU-Gipfel: Im Verbrennerstreit hat Deutschland maximalen Schaden angerichtet',
|
||||||
|
'duration': 200,
|
||||||
|
'thumbnail': 'https://images.tagesschau.de/image/197a5977-3f5f-4c21-8c08-fad6ecb4b493/AAABj864C3w/AAABkZLhkrw/16x9-1280/default-audioplayer-100.webp',
|
||||||
|
'timestamp': 1679687280,
|
||||||
|
'upload_date': '20230324',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.tagesschau.de/inland/bnd-303.html',
|
# Single audio with recommendations, `--no-playlist`
|
||||||
'md5': 'f049fa1698d7564e9ca4c3325108f034',
|
'url': 'https://www.tagesschau.de/multimedia/audio/audio-157831.html',
|
||||||
|
'md5': '4bff8f23504df56a0d86ed312d654182',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'bnd-303-1',
|
'id': 'audio-157831',
|
||||||
'ext': 'mp3',
|
|
||||||
'title': 'Das Siegel des Bundesnachrichtendienstes | dpa',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'afd-parteitag-135',
|
|
||||||
'title': 'AfD',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 15,
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'audio-29417-1',
|
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'EU-Gipfel: Im Verbrennerstreit hat Deutschland maximalen Schaden angerichtet',
|
'title': 'EU-Gipfel: Im Verbrennerstreit hat Deutschland maximalen Schaden angerichtet',
|
||||||
|
'duration': 200,
|
||||||
|
'thumbnail': 'https://images.tagesschau.de/image/197a5977-3f5f-4c21-8c08-fad6ecb4b493/AAABj864C3w/AAABkZLhkrw/16x9-1280/default-audioplayer-100.webp',
|
||||||
|
'timestamp': 1679687280,
|
||||||
|
'upload_date': '20230324',
|
||||||
},
|
},
|
||||||
|
'params': {'noplaylist': True},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.tagesschau.de/multimedia/audio/podcast-11km-327.html',
|
# Article with multimedia content, `--no-playlist`
|
||||||
|
'url': 'https://www.tagesschau.de/inland/bundestagswahl/bundestagswahl-ergebnisse-104.html',
|
||||||
|
'md5': 'f72b42f213f632dbbe76551fabebcaef',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'podcast-11km-327',
|
'id': 'video-1437570',
|
||||||
'ext': 'mp3',
|
'ext': 'mp4',
|
||||||
'title': 'Gewalt in der Kita – Wenn Erzieher:innen schweigen',
|
'title': 'Union mit Kanzlerkandidat Merz gewinnt Bundestagswahl: Parteienlandschaft im Umbruch',
|
||||||
'upload_date': '20230322',
|
'duration': 181,
|
||||||
'timestamp': 1679482808,
|
'thumbnail': 'https://images.tagesschau.de/image/ab8aa6ce-4fd5-4c1e-921d-807b07848a80/AAABlTZ3CAQ/AAABkZLpihI/20x9-1280/union-wahl-siegesfeier-100.webp',
|
||||||
'thumbnail': 'https://www.tagesschau.de/multimedia/audio/podcast-11km-329~_v-original.jpg',
|
'timestamp': 1740401379,
|
||||||
'description': 'md5:dad059931fe4b3693e3656e93a249848',
|
'upload_date': '20250224',
|
||||||
},
|
},
|
||||||
|
'params': {'noplaylist': True},
|
||||||
|
}, {
|
||||||
|
# Topic page with multimedia content, `--no-playlist`
|
||||||
|
'url': 'https://www.tagesschau.de/thema/em_2024',
|
||||||
|
'md5': '07be4d381753e8411b527c8f0a36229f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'audio-195242',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Optimistische Verbraucherstimmung kommt an der Börse nicht an ',
|
||||||
|
'thumbnail': 'https://images.tagesschau.de/image/490fbbe9-1718-4fe0-8f51-538a3182d28e/AAABkOOc5Z0/AAABkZLpihI/20x9-1280/em-2024-fans-100.webp',
|
||||||
|
'timestamp': 1721825201,
|
||||||
|
'upload_date': '20240724',
|
||||||
|
},
|
||||||
|
'params': {'noplaylist': True},
|
||||||
|
}, {
|
||||||
|
# Playlist, single video with recommendations
|
||||||
|
'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ts-5727',
|
||||||
|
'title': 'tagesschau',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 8,
|
||||||
|
}, {
|
||||||
|
# Playlist, single audio with recommendations
|
||||||
|
'url': 'https://www.tagesschau.de/multimedia/audio/audio-157831.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'audio-157831',
|
||||||
|
'title': 'EU-Gipfel: Im Verbrennerstreit hat Deutschland maximalen Schaden angerichtet',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
}, {
|
||||||
|
# Playlist, article with multimedia content
|
||||||
|
'url': 'https://www.tagesschau.de/inland/bundestagswahl/bundestagswahl-ergebnisse-104.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'bundestagswahl-ergebnisse-104',
|
||||||
|
'title': 'Vorläufiges Ergebnis der Bundestagswahl: Union stärkste Kraft, FDP und BSW draußen',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 20,
|
||||||
|
}, {
|
||||||
|
# Playlist, topic page with multimedia content
|
||||||
|
'url': 'https://www.tagesschau.de/thema/em_2024',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'em_2024',
|
||||||
|
'title': 'EM 2024',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 10,
|
||||||
|
}, {
|
||||||
|
# Podcast feed
|
||||||
|
'url': 'https://www.tagesschau.de/multimedia/podcast/11km/11km-feed-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '11km-feed-100',
|
||||||
|
'title': '11KM: der tagesschau-Podcast',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 250,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
|
'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -100,65 +175,76 @@ class TagesschauIE(InfoExtractor):
|
|||||||
'url': 'http://www.tagesschau.de/100sekunden/index.html',
|
'url': 'http://www.tagesschau.de/100sekunden/index.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# playlist article with collapsing sections
|
|
||||||
'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html',
|
'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.tagesschau.de',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.tagesschau.de/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = self._match_valid_url(url)
|
webpage_id = self._match_id(url)
|
||||||
video_id = mobj.group('id') or mobj.group('path')
|
webpage = self._download_webpage(url, webpage_id)
|
||||||
display_id = video_id.lstrip('-')
|
media_players = get_elements_html_by_attribute(
|
||||||
|
'data-v-type', 'MediaPlayer(?:InlinePlay)?', webpage, escape_value=False)
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
|
||||||
r'<span[^>]*class="headline"[^>]*>(.+?)</span>',
|
|
||||||
webpage, 'title', default=None) or self._og_search_title(webpage, fatal=False)
|
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
videos = re.findall(r'<div[^>]+>', webpage)
|
for player in media_players:
|
||||||
num = 0
|
data = self._parse_json(extract_attributes(player)['data-v'], webpage_id)
|
||||||
for video in videos:
|
media_id = traverse_obj(data, ('mc', 'pluginData', (
|
||||||
video = extract_attributes(video).get('data-config')
|
('trackingSAND@all', 'av_content_id'),
|
||||||
if not video:
|
('trackingPiano@all', 'avContent', 'av_content_id'),
|
||||||
|
('trackingAgf@all', 'playerID')), any))
|
||||||
|
if not media_id:
|
||||||
|
self.report_warning('Skipping unrecognized media file')
|
||||||
continue
|
continue
|
||||||
video = self._parse_json(video, video_id, transform_source=js_to_json, fatal=False)
|
|
||||||
video_formats = try_get(video, lambda x: x['mc']['_mediaArray'][0]['_mediaStreamArray'])
|
entry = {
|
||||||
if not video_formats:
|
'id': media_id,
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': ('mc', (
|
||||||
|
('pluginData', 'trackingPiano@all', 'avContent', 'av_content'),
|
||||||
|
('meta', 'title')), any),
|
||||||
|
'duration': ('mc', 'meta', 'durationSeconds', {int_or_none}),
|
||||||
|
'thumbnail': (
|
||||||
|
'pc', 'generic', 'imageTemplateConfig', 'size', -1,
|
||||||
|
'value', {lambda v: (v + '.webp') if v else None}),
|
||||||
|
'timestamp': (
|
||||||
|
'mc', 'pluginData', 'trackingPiano@all', 'avContent',
|
||||||
|
'd:av_publication_date', {parse_iso8601}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
input_formats = traverse_obj(data, (
|
||||||
|
'mc', 'streams', 0, 'media', lambda _, v: url_or_none(v.get('url'))), default=[])
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for input_format in input_formats:
|
||||||
|
file_url = input_format['url']
|
||||||
|
if file_url.endswith('master.m3u8'):
|
||||||
|
formats = self._extract_m3u8_formats(file_url, media_id, 'mp4', m3u8_id='hls')
|
||||||
|
break
|
||||||
|
if file_url.endswith('.mp3'):
|
||||||
|
formats.append(traverse_obj(input_format, {
|
||||||
|
'url': 'url',
|
||||||
|
'vcodec': {lambda _: 'none'},
|
||||||
|
'format_note': ('forcedLabel', {str}),
|
||||||
|
}))
|
||||||
|
if not formats:
|
||||||
|
self.report_warning(f'Skipping file {media_id} because it has no formats')
|
||||||
continue
|
continue
|
||||||
num += 1
|
entry['formats'] = formats
|
||||||
for video_format in video_formats:
|
entries.append(entry)
|
||||||
media_url = video_format.get('_stream') or ''
|
|
||||||
formats = []
|
|
||||||
if media_url.endswith('master.m3u8'):
|
|
||||||
formats = self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls')
|
|
||||||
elif media_url.endswith('.mp3'):
|
|
||||||
formats = [{
|
|
||||||
'url': media_url,
|
|
||||||
'vcodec': 'none',
|
|
||||||
}]
|
|
||||||
if not formats:
|
|
||||||
continue
|
|
||||||
entries.append({
|
|
||||||
'id': f'{display_id}-{num}',
|
|
||||||
'title': try_get(video, lambda x: x['mc']['_title']),
|
|
||||||
'duration': int_or_none(try_get(video, lambda x: x['mc']['_duration'])),
|
|
||||||
'formats': formats,
|
|
||||||
})
|
|
||||||
|
|
||||||
if not entries:
|
if not entries:
|
||||||
raise UnsupportedError(url)
|
raise UnsupportedError(url)
|
||||||
|
|
||||||
if len(entries) > 1:
|
if len(entries) > 1 and self._yes_playlist(
|
||||||
return self.playlist_result(entries, display_id, title)
|
webpage_id, entries[0]['id'], playlist_label='all media on', video_label='file'):
|
||||||
|
title = self._html_search_meta(
|
||||||
|
['og:title', 'title', 'twitter:title'], webpage, 'title', fatal=False)
|
||||||
|
return self.playlist_result(entries, webpage_id, title)
|
||||||
|
|
||||||
return {
|
return entries[0]
|
||||||
'id': display_id,
|
|
||||||
'title': title,
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
|
||||||
'formats': entries[0]['formats'],
|
|
||||||
'timestamp': parse_iso8601(self._html_search_meta('date', webpage)),
|
|
||||||
'description': self._og_search_description(webpage),
|
|
||||||
'duration': entries[0]['duration'],
|
|
||||||
}
|
|
||||||
|
Loading…
Reference in New Issue
Block a user