diff --git a/yt_dlp/extractor/telemb.py b/yt_dlp/extractor/telemb.py index 2f62c1c7cb..b9db80294e 100644 --- a/yt_dlp/extractor/telemb.py +++ b/yt_dlp/extractor/telemb.py @@ -1,5 +1,8 @@ +import itertools + from .common import InfoExtractor from ..utils import ( + ExtractorError, clean_html, extract_attributes, float_or_none, @@ -24,15 +27,13 @@ class TeleMBIE(InfoExtractor): 'ext': 'mp4', 'title': 'Frameries - Un concours pour conducteurs d\'engins de chantier', 'creators': ['Sabine Dupont'], - 'description': 'md5:bfb8fdff559b64684bb005ce4901af12', + 'description': 'md5:1dc04a3aa56c5228503071baa8b4cc97', 'display_id': 'frameries-un-concours-pour-conducteurs-dengins-de-chantier', 'duration': 144.6, 'location': 'Frameries', 'release_date': '20250515', 'release_timestamp': 1747319520, 'thumbnail': r're:https?://www\.telemb\.be/cdn.+\.(?:jpe?g|png)', - 'timestamp': 1747319229, - 'upload_date': '20250515', }, }, { 'url': 'https://www.telemb.be/sports/karate-cinq-karatekas-du-bushikai-wasmuel-la-coupe-du-monde-tokyo/37849', @@ -41,15 +42,13 @@ class TeleMBIE(InfoExtractor): 'ext': 'mp4', 'title': 'Karaté : Cinq karatékas du Bushikai Wasmuel à la Coupe du Monde à Tokyo', 'creators': ['Jacob Hemptinne'], - 'description': 'md5:82ebfa7a4ddd359c9e05b5a8c8ab04c5', + 'description': 'md5:17f2d55a1533a69079cc21eadd14725f', 'display_id': 'karate-cinq-karatekas-du-bushikai-wasmuel-la-coupe-du-monde-tokyo', 'duration': 211.6, 'location': 'Quaregnon', 'release_date': '20250512', 'release_timestamp': 1747066800, 'thumbnail': r're:https?://www\.telemb\.be/cdn/.+\.(?:jpe?g|png)', - 'timestamp': 1746987989, - 'upload_date': '20250511', }, }, { 'url': 'https://www.telemb.be/replay/emission/les-infos/les-infos-16052025/36502', @@ -57,14 +56,13 @@ class TeleMBIE(InfoExtractor): 'id': '36502', 'ext': 'mp4', 'title': 'Les Infos - 16/05/2025', - 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'creators': ['Télé MB'], + 'description': 'md5:dff75a3a51c769696c23454e932ff720', 'display_id': 'les-infos-16052025', 'duration': 1144.32, 'release_date': '20250516', 'release_timestamp': 1747412520, 'thumbnail': r're:https?://www\.telemb\.be/cdn.+\.(?:jpe?g|png)', - 'timestamp': 1747408485, - 'upload_date': '20250516', }, }, { 'url': 'https://www.telemb.be/actu/linvite-des-infos/le-cma-de-jemappes-fetera-ses-20-ans-ce-week-end/36711', @@ -73,15 +71,13 @@ class TeleMBIE(InfoExtractor): 'ext': 'mp4', 'title': 'Le CMA de Jemappes fêtera ses 20 ans ce week-end', 'creators': ['Loélia Chais'], - 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'description': 'md5:f244efcce22f217df044d755e116ddef', 'display_id': 'le-cma-de-jemappes-fetera-ses-20-ans-ce-week-end', 'duration': 316.08, 'location': 'Mons', 'release_date': '20241128', 'release_timestamp': 1732787226, 'thumbnail': r're:https?://www\.telemb\.be/cdn.+\.(?:jpe?g|png)', - 'timestamp': 1732727988, - 'upload_date': '20241127', }, }] @@ -89,16 +85,15 @@ def _real_extract(self, url): video_id = self._match_id(url) display_id = self._match_valid_url(url).group('alt_id').split('/')[-1] webpage = self._download_webpage(url, video_id) + json_ld = next(itertools.islice(self._yield_json_ld(webpage, video_id), 1, 2), {}) - player_info = traverse_obj(webpage, ( - {find_element(cls='freecaster-player', html=True)}, {extract_attributes}, { - 'id': ('data-video-id', {str_or_none}), - 'thumbnail': ('data-poster', {lambda x: self._proto_relative_url(x)}, {url_or_none}), - }, - )) - video_info = self._download_json( - f'https://tvlocales-player-v12.freecaster.com/embed/{player_info.pop("id")}.json', video_id, - ).get('video') + data_video_id = traverse_obj(webpage, ( + {find_element(cls='freecaster-player', html=True)}, + {extract_attributes}, 'data-video-id', {str_or_none})) + if not (video_info := traverse_obj(self._download_json( + f'https://tvlocales-player-v12.freecaster.com/embed/{data_video_id}.json', video_id, + ), ('video', {dict}), default={})): + raise ExtractorError('Failed to fetch video information') qualities = { '3': (640, 360), @@ -108,12 +103,13 @@ def _real_extract(self, url): } formats = [] for src in traverse_obj(video_info, ('src', lambda _, v: v['src'])): - src_url = src['src'] + if not (src_url := url_or_none(src['src'])): + continue ext = mimetype2ext(src.get('type')) if ext == 'mp4': quality = src_url.rpartition('_')[2].removesuffix('.mp4') - width, height = qualities.get(quality) + width, height = qualities.get(quality, (None, None)) formats.append({ 'acodec': 'mp4a.40.2', 'ext': ext, @@ -146,19 +142,15 @@ def _real_extract(self, url): return { 'id': video_id, 'display_id': display_id, + 'duration': traverse_obj(video_info, ('duration', {float_or_none})), 'formats': formats, - **player_info, - **traverse_obj(webpage, { - 'creator': ({find_element(cls='content-author')}, {clean_html}), - 'location': ({find_element(cls='content-location')}, {clean_html}), - }), - **traverse_obj(self._search_json_ld(webpage, video_id), { - 'title': ('title', {clean_html}), + 'location': traverse_obj(webpage, ( + {find_element(cls='content-location')}, {clean_html})), + **traverse_obj(json_ld, { + 'title': ('headline', {clean_html}), + 'creator': ('author', 'name', {clean_html}), 'description': ('description', {clean_html}), - 'release_timestamp': ('timestamp', {int_or_none}), - }), - **traverse_obj(video_info, { - 'duration': ('duration', {float_or_none}), - 'timestamp': ('published_at', {parse_iso8601}), + 'release_timestamp': ('datePublished', {parse_iso8601}), + 'thumbnail': ('image', ..., {lambda x: self._proto_relative_url(x)}, {url_or_none}, any), }), }