import json import time import urllib.parse from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, clean_html, extract_attributes, filter_dict, float_or_none, get_element_by_class, get_element_html_by_class, int_or_none, jwt_decode_hs256, jwt_encode_hs256, make_archive_id, merge_dicts, parse_age_limit, parse_duration, parse_iso8601, str_or_none, strip_or_none, traverse_obj, try_call, url_or_none, ) class VRTBaseIE(InfoExtractor): _GEO_BYPASS = False _PLAYER_INFO = { 'platform': 'desktop', 'app': { 'type': 'browser', 'name': 'Chrome', }, 'device': 'undefined (undefined)', 'os': { 'name': 'Windows', 'version': '10', }, 'player': { 'name': 'VRT web player', 'version': '5.1.1-prod-2025-02-14T08:44:16"', }, } # From https://player.vrt.be/vrtnws/js/main.js & https://player.vrt.be/ketnet/js/main.8cdb11341bcb79e4cd44.js _JWT_KEY_ID = '0-0Fp51UZykfaiCJrfTE3+oMI8zvDteYfPtR+2n1R+z8w=' _JWT_SIGNING_KEY = 'b5f500d55cb44715107249ccd8a5c0136cfb2788dbb71b90a4f142423bacaf38' # -dev # player-stag.vrt.be key: d23987504521ae6fbf2716caca6700a24bb1579477b43c84e146b279de5ca595 # player.vrt.be key: 2a9251d782700769fb856da5725daf38661874ca6f80ae7dc2b05ec1a81a24ae def _extract_formats_and_subtitles(self, data, video_id): if traverse_obj(data, 'drm'): self.report_drm(video_id) formats, subtitles = [], {} for target in traverse_obj(data, ('targetUrls', lambda _, v: url_or_none(v['url']) and v['type'])): format_type = target['type'].upper() format_url = target['url'] if format_type in ('HLS', 'HLS_AES'): fmts, subs = self._extract_m3u8_formats_and_subtitles( format_url, video_id, 'mp4', m3u8_id=format_type, fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) elif format_type == 'HDS': formats.extend(self._extract_f4m_formats( format_url, video_id, f4m_id=format_type, fatal=False)) elif format_type == 'MPEG_DASH': fmts, subs = self._extract_mpd_formats_and_subtitles( format_url, video_id, mpd_id=format_type, fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) elif format_type == 'HSS': fmts, subs = self._extract_ism_formats_and_subtitles( format_url, video_id, ism_id='mss', fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) else: formats.append({ 'format_id': format_type, 'url': format_url, }) for sub in traverse_obj(data, ('subtitleUrls', lambda _, v: v['url'] and v['type'] == 'CLOSED')): subtitles.setdefault('nl', []).append({'url': sub['url']}) return formats, subtitles def _call_api(self, video_id, client='null', id_token=None, version='v2'): player_info = {'exp': (round(time.time(), 3) + 900), **self._PLAYER_INFO} player_token = self._download_json( f'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/{version}/tokens', video_id, 'Downloading player token', 'Failed to download player token', headers={ **self.geo_verification_headers(), 'Content-Type': 'application/json', }, data=json.dumps({ 'identityToken': id_token or '', 'playerInfo': jwt_encode_hs256(player_info, self._JWT_SIGNING_KEY, headers={ 'kid': self._JWT_KEY_ID, }).decode(), }, separators=(',', ':')).encode())['vrtPlayerToken'] return self._download_json( # The URL below redirects to https://media-services-public.vrt.be/media-aggregator/{version}/media-items/{video_id} f'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/{version}/videos/{video_id}', video_id, 'Downloading API JSON', 'Failed to download API JSON', query={ 'vrtPlayerToken': player_token, 'client': client, }, expected_status=400) class VRTIE(VRTBaseIE): IE_DESC = 'VRT NWS, Flanders News, Flandern Info and Sporza' _VALID_URL = r'https?://(?:www\.)?(?Pvrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P[^/?&#]+)' _TESTS = [{ 'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/', 'info_dict': { 'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd', 'ext': 'mp4', 'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand', 'description': 'md5:6fd85f999b2d1841aa5568f4bf02c3ff', 'duration': 31.2, 'thumbnail': 'https://images.vrt.be/orig/2019/05/15/2d914d61-7710-11e9-abcc-02b7b76bf47f.jpg', }, 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/', 'info_dict': { 'id': 'pbs-pub-f2c86a46-8138-413a-a4b9-a0015a16ce2c$vid-1f112b31-e58e-4379-908d-aca6d80f8818', 'ext': 'mp4', 'title': 'De Belgian Cats zijn klaar voor het EK', 'description': 'Video: De Belgian Cats zijn klaar voor het EK mét Ann Wauters | basketbal, sport in het journaal', 'duration': 115.17, 'thumbnail': 'https://images.vrt.be/orig/2019/05/15/11c0dba3-770e-11e9-abcc-02b7b76bf47f.jpg', }, 'params': {'skip_download': 'm3u8'}, }] _CLIENT_MAP = { 'vrt.be/vrtnws': 'vrtnieuws', 'sporza.be': 'sporza', } def _real_extract(self, url): site, display_id = self._match_valid_url(url).groups() webpage = self._download_webpage(url, display_id) attrs = extract_attributes(get_element_html_by_class('vrtvideo', webpage) or '') asset_id = attrs.get('data-video-id') or attrs['data-videoid'] publication_id = traverse_obj(attrs, 'data-publication-id', 'data-publicationid') if publication_id: asset_id = f'{publication_id}${asset_id}' client = traverse_obj(attrs, 'data-client-code', 'data-client') or self._CLIENT_MAP[site] data = self._call_api(asset_id, client) formats, subtitles = self._extract_formats_and_subtitles(data, asset_id) description = self._html_search_meta( ['og:description', 'twitter:description', 'description'], webpage) if description == '…': description = None return { 'id': asset_id, 'formats': formats, 'subtitles': subtitles, 'description': description, 'thumbnail': url_or_none(attrs.get('data-posterimage')), 'duration': float_or_none(attrs.get('data-duration'), 1000), '_old_archive_ids': [make_archive_id('Canvas', asset_id)], **traverse_obj(data, { 'title': ('title', {str}), 'description': ('shortDescription', {str}), 'duration': ('duration', {float_or_none(scale=1000)}), 'thumbnail': ('posterImageUrl', {url_or_none}), }), } class VrtNUIEBase(VRTBaseIE): _NETRC_MACHINE = 'vrtnu' _TOKEN_COOKIE_DOMAIN = '.www.vrt.be' _ACCESS_TOKEN_COOKIE_NAME = 'vrtnu-site_profile_at' _REFRESH_TOKEN_COOKIE_NAME = 'vrtnu-site_profile_rt' _MEDIA_TOKEN_COOKIE_NAME = 'vrtnu-site_profile_vt' def _fetch_tokens(self): has_credentials = self._get_login_info()[0] access_token = self._get_vrt_cookie(self._ACCESS_TOKEN_COOKIE_NAME) video_token = self._get_vrt_cookie(self._MEDIA_TOKEN_COOKIE_NAME) if (access_token and not self._is_jwt_token_expired(access_token) and video_token and not self._is_jwt_token_expired(video_token)): return access_token, video_token if has_credentials: access_token, video_token = self.cache.load(self._NETRC_MACHINE, 'token_data', default=(None, None)) if (access_token and not self._is_jwt_token_expired(access_token) and video_token and not self._is_jwt_token_expired(video_token)): self.write_debug('Restored tokens from cache') self._set_cookie(self._TOKEN_COOKIE_DOMAIN, self._ACCESS_TOKEN_COOKIE_NAME, access_token) self._set_cookie(self._TOKEN_COOKIE_DOMAIN, self._MEDIA_TOKEN_COOKIE_NAME, video_token) return access_token, video_token if not self._get_vrt_cookie(self._REFRESH_TOKEN_COOKIE_NAME): return None, None self._request_webpage( 'https://www.vrt.be/vrtmax/sso/refresh', None, note='Refreshing tokens', errnote='Failed to refresh tokens', fatal=False) access_token = self._get_vrt_cookie(self._ACCESS_TOKEN_COOKIE_NAME) video_token = self._get_vrt_cookie(self._MEDIA_TOKEN_COOKIE_NAME) if not access_token or not video_token: self.cache.store(self._NETRC_MACHINE, 'refresh_token', None) self.cookiejar.clear(self._TOKEN_COOKIE_DOMAIN, '/vrtmax/sso', self._REFRESH_TOKEN_COOKIE_NAME) msg = 'Refreshing of tokens failed' if not has_credentials: self.report_warning(msg) return None, None self.report_warning(f'{msg}. Re-logging in') return self._perform_login(*self._get_login_info()) if has_credentials: self.cache.store(self._NETRC_MACHINE, 'token_data', (access_token, video_token)) return access_token, video_token def _get_vrt_cookie(self, cookie_name): # Refresh token cookie is scoped to /vrtmax/sso, others are scoped to / return try_call(lambda: self._get_cookies('https://www.vrt.be/vrtmax/sso')[cookie_name].value) @staticmethod def _is_jwt_token_expired(token): return jwt_decode_hs256(token)['exp'] - time.time() < 300 def _perform_login(self, username, password): refresh_token = self._get_vrt_cookie(self._REFRESH_TOKEN_COOKIE_NAME) if refresh_token and not self._is_jwt_token_expired(refresh_token): self.write_debug('Using refresh token from logged-in cookies; skipping login with credentials') return refresh_token = self.cache.load(self._NETRC_MACHINE, 'refresh_token', default=None) if refresh_token and not self._is_jwt_token_expired(refresh_token): self.write_debug('Restored refresh token from cache') self._set_cookie(self._TOKEN_COOKIE_DOMAIN, self._REFRESH_TOKEN_COOKIE_NAME, refresh_token, path='/vrtmax/sso') return self._request_webpage( 'https://www.vrt.be/vrtmax/sso/login', None, note='Getting session cookies', errnote='Failed to get session cookies') login_data = self._download_json( 'https://login.vrt.be/perform_login', None, data=json.dumps({ 'clientId': 'vrtnu-site', 'loginID': username, 'password': password, }).encode(), headers={ 'Content-Type': 'application/json', 'Oidcxsrf': self._get_cookies('https://login.vrt.be')['OIDCXSRF'].value, }, note='Logging in', errnote='Login failed', expected_status=403) if login_data.get('errorCode'): raise ExtractorError(f'Login failed: {login_data.get("errorMessage")}', expected=True) self._request_webpage( login_data['redirectUrl'], None, note='Getting access token', errnote='Failed to get access token') access_token = self._get_vrt_cookie(self._ACCESS_TOKEN_COOKIE_NAME) video_token = self._get_vrt_cookie(self._MEDIA_TOKEN_COOKIE_NAME) refresh_token = self._get_vrt_cookie(self._REFRESH_TOKEN_COOKIE_NAME) if not all((access_token, video_token, refresh_token)): raise ExtractorError('Unable to extract token cookie values') self.cache.store(self._NETRC_MACHINE, 'token_data', (access_token, video_token)) self.cache.store(self._NETRC_MACHINE, 'refresh_token', refresh_token) return access_token, video_token def fetch_query(self, url, access_token, display_id, query_name, query): return self._download_json( f'https://www.vrt.be/vrtnu-api/graphql{"" if access_token else "/public"}/v1', display_id, 'Downloading asset JSON', 'Unable to download asset JSON', data=json.dumps({ 'operationName': query_name, 'query': query, 'variables': {'pageId': urllib.parse.urlparse(url).path}, }).encode(), headers=filter_dict({ 'Authorization': f'Bearer {access_token}' if access_token else None, 'Content-Type': 'application/json', 'x-vrt-client-name': 'WEB', 'x-vrt-client-version': '1.5.9', 'x-vrt-zone': 'default', }))['data']['page'] def fetch_metadata(self, url, access_token, display_id): return self.fetch_query(url, access_token, display_id, self._MEDIA_PAGE_QUERY_OPERATION_NAME, self._MEDIA_PAGE_QUERY) class VrtNUIE(VrtNUIEBase): IE_NAME = 'vrtmax' IE_DESC = 'VRT MAX (formerly VRT NU)' _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?:vrtnu|vrtmax)/a-z/(?:[^/]+/){2}(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.vrt.be/vrtmax/a-z/ket---doc/trailer/ket---doc-trailer-s6/', 'info_dict': { 'id': 'pbs-pub-c8a78645-5d3e-468a-89ec-6f3ed5534bd5$vid-242ddfe9-18f5-4e16-ab45-09b122a19251', 'ext': 'mp4', 'channel': 'ketnet', 'description': 'Neem een kijkje in de bijzondere wereld van deze Ketnetters.', 'display_id': 'ket---doc-trailer-s6', 'duration': 30.0, 'episode': 'Reeks 6 volledig vanaf 3 maart', 'episode_id': '1739450401467', 'season': 'Trailer', 'season_id': '1739450401467', 'series': 'Ket & Doc', 'thumbnail': 'https://images.vrt.be/orig/2025/02/21/63f07122-5bbd-4ca1-b42e-8565c6cd95df.jpg', 'timestamp': 1740373200, 'title': 'Reeks 6 volledig vanaf 3 maart', 'upload_date': '20250224', '_old_archive_ids': [ 'canvas pbs-pub-c8a78645-5d3e-468a-89ec-6f3ed5534bd5$vid-242ddfe9-18f5-4e16-ab45-09b122a19251', 'ketnet pbs-pub-c8a78645-5d3e-468a-89ec-6f3ed5534bd5$vid-242ddfe9-18f5-4e16-ab45-09b122a19251', ], }, }, { 'url': 'https://www.vrt.be/vrtmax/a-z/meisjes/6/meisjes-s6a5/', 'info_dict': { 'id': 'pbs-pub-97b541ab-e05c-43b9-9a40-445702ef7189$vid-5e306921-a9aa-4fa9-9f39-5b82c8f1028e', 'ext': 'mp4', 'channel': 'ketnet', 'description': 'md5:713793f15cbf677f66200b36b7b1ec5a', 'display_id': 'meisjes-s6a5', 'duration': 1336.02, 'episode': 'Week 5', 'episode_id': '1684157692901', 'episode_number': 5, 'season': '6', 'season_id': '1684157692901', 'season_number': 6, 'series': 'Meisjes', 'thumbnail': 'https://images.vrt.be/orig/2023/05/14/bf526ae0-f1d9-11ed-91d7-02b7b76bf47f.jpg', 'timestamp': 1685251800, 'title': 'Week 5', 'upload_date': '20230528', '_old_archive_ids': [ 'canvas pbs-pub-97b541ab-e05c-43b9-9a40-445702ef7189$vid-5e306921-a9aa-4fa9-9f39-5b82c8f1028e', 'ketnet pbs-pub-97b541ab-e05c-43b9-9a40-445702ef7189$vid-5e306921-a9aa-4fa9-9f39-5b82c8f1028e', ], }, }, { 'url': 'https://www.vrt.be/vrtnu/a-z/taboe/3/taboe-s3a4/', 'info_dict': { 'id': 'pbs-pub-f50faa3a-1778-46b6-9117-4ba85f197703$vid-547507fe-1c8b-4394-b361-21e627cbd0fd', 'ext': 'mp4', 'channel': 'een', 'description': 'md5:bf61345a95eca9393a95de4a7a54b5c6', 'display_id': 'taboe-s3a4', 'duration': 2882.02, 'episode': 'Mensen met het syndroom van Gilles de la Tourette', 'episode_id': '1739055911734', 'episode_number': 4, 'season': '3', 'season_id': '1739055911734', 'season_number': 3, 'series': 'Taboe', 'thumbnail': 'https://images.vrt.be/orig/2025/02/19/8198496c-d1ae-4bca-9a48-761cf3ea3ff2.jpg', 'timestamp': 1740286800, 'title': 'Mensen met het syndroom van Gilles de la Tourette', 'upload_date': '20250223', '_old_archive_ids': [ 'canvas pbs-pub-f50faa3a-1778-46b6-9117-4ba85f197703$vid-547507fe-1c8b-4394-b361-21e627cbd0fd', 'ketnet pbs-pub-f50faa3a-1778-46b6-9117-4ba85f197703$vid-547507fe-1c8b-4394-b361-21e627cbd0fd', ], }, }] _MEDIA_PAGE_QUERY = ''' query VideoPage($pageId: ID!) { page(id: $pageId) { ... on EpisodePage { episode { ageRaw description durationRaw episodeNumberRaw id name onTimeRaw program { title } season { id titleRaw } title brand } ldjson player { image { templateUrl } modes { streamId } } } } } ''' _MEDIA_PAGE_QUERY_OPERATION_NAME = 'VideoPage' def _real_extract(self, url): display_id = self._match_id(url) access_token, video_token = self._fetch_tokens() metadata = self.fetch_metadata(url, access_token, display_id) video_id = metadata['player']['modes'][0]['streamId'] try: streaming_info = self._call_api(video_id, 'vrtnu-web@PROD', id_token=video_token) except ExtractorError as e: if not video_token and isinstance(e.cause, HTTPError) and e.cause.status == 404: self.raise_login_required() raise formats, subtitles = self._extract_formats_and_subtitles(streaming_info, video_id) code = traverse_obj(streaming_info, ('code', {str})) if not formats and code: if code in ('CONTENT_AVAILABLE_ONLY_FOR_BE_RESIDENTS', 'CONTENT_AVAILABLE_ONLY_IN_BE', 'CONTENT_UNAVAILABLE_VIA_PROXY'): self.raise_geo_restricted(countries=['BE']) elif code in ('CONTENT_AVAILABLE_ONLY_FOR_BE_RESIDENTS_AND_EXPATS', 'CONTENT_IS_AGE_RESTRICTED', 'CONTENT_REQUIRES_AUTHENTICATION'): self.raise_login_required() else: self.raise_no_formats(f'Unable to extract formats: {code}') return { 'duration': float_or_none(streaming_info.get('duration'), 1000), 'thumbnail': url_or_none(streaming_info.get('posterImageUrl')), **self._json_ld(traverse_obj(metadata, ('ldjson', ..., {json.loads})), video_id, fatal=False), **traverse_obj(metadata, ('episode', { 'title': ('title', {str}), 'description': ('description', {str}), 'timestamp': ('onTimeRaw', {parse_iso8601}), 'series': ('program', 'title', {str}), 'season': ('season', 'titleRaw', {str}), 'season_number': ('season', 'titleRaw', {int_or_none}), 'season_id': ('id', {str_or_none}), 'episode': ('title', {str}), 'episode_number': ('episodeNumberRaw', {int_or_none}), 'episode_id': ('id', {str_or_none}), 'age_limit': ('ageRaw', {parse_age_limit}), 'channel': ('brand', {str}), 'duration': ('durationRaw', {parse_duration}), })), 'id': video_id, 'display_id': display_id, 'formats': formats, 'subtitles': subtitles, '_old_archive_ids': [make_archive_id('Canvas', video_id), make_archive_id('Ketnet', video_id)], } class VrtNURadioIE(VrtNUIEBase): IE_NAME = 'vrtmax_radio' IE_DESC = 'VRT MAX Radio (formerly VRT NU)' _VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtmax/luister/radio/[^/]+/[^/]+/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.vrt.be/vrtmax/luister/radio/k/klara-live-op-jazz-middelheim~31-225/klara-live-op-jazz-middelheim~31-28457-0/', 'info_dict': { 'duration': 9000.043, 'thumbnail': 'https://images.vrt.be/orig/2024/07/04/be709dea-39e5-11ef-92ff-02b7b76bf47f.png', 'channel': 'klara', 'channel_url': 'https://www.vrt.be/vrtmax/kanalen/klara/', 'ext': 'mp4', 'timestamp': 1749319200, 'upload_date': '20250607', 'title': 'Klara Live op Jazz Middelheim - 2025-06-07 20_00', 'id': 'pbs-pub-57aa55b1-da11-4749-bdd6-a2cdc5bba25c$aud-4f672d08-c7ff-48d0-980e-5f43db55e381', 'description': 'Dee Dee Bridgewater laat met een kanjer van een stem zien waarom ze het label van jazzicoon meer dan verdient!\nBeleef Jazz Middelheim vanop de eerste rij! Bart Vanhoudt, Guy Peters en Lies Steppe laten u ook thuis en onderweg meegenieten met interviews, reportages en concerten.', 'display_id': 'klara-live-op-jazz-middelheim~31-28457-0', }, }, { 'url': 'https://www.vrt.be/vrtmax/luister/radio/n/nieuwe-feiten~11-9/nieuwe-feiten~11-33278-0/', 'info_dict': { 'id': 'pbs-pub-d6b2929a-60b5-43fd-88ed-fb8cc6ae2bea$aud-10e4e771-bf76-409e-bddb-1b4c1d5c0f7c', 'ext': 'mp4', 'display_id': 'nieuwe-feiten~11-33278-0', 'title': 'Nieuwe Feiten - 2025-06-23 12_00', 'description': 'md5:2189eb55d66cd4bd4ba24a68d562dc45', 'channel': 'radio1', 'channel_url': 'https://www.vrt.be/vrtmax/kanalen/radio-1/', 'duration': 3119.424, 'thumbnail': 'https://images.vrt.be/orig/2025/02/07/d8cc34bf-9f61-41aa-81fc-906042ba0847.png', 'timestamp': 1750672800, 'upload_date': '20250623', }, }] _MEDIA_PAGE_QUERY = ''' query RadioEpisodePage($pageId: ID!) { page(id: $pageId) { ... on RadioEpisodePage { radioEpisode { objectId startDate presenters { name category title icon __typename } __typename } __typename } ... on PlaybackPage { ...playbackPageFragment header { title announcementValue brandsLogos { brandTitle logos { type mono width height __typename } __typename } __typename } __typename } ...errorFragment __typename } } fragment playbackPageFragment on PlaybackPage { __typename objectId title brand brandLogos { ...brandLogosFragment __typename } permalink seo { ...seoFragment __typename } socialSharing { ...socialSharingFragment __typename } trackingData { ...trackingDataFragment __typename } ldjson player { ...playerFragment __typename } menu { ...menuFragment __typename } nudge { ...nudgeFragment __typename } components { ...bannerFragment ...contactInfoFragment ...mediaInfoFragment __typename } } fragment menuFragment on ContainerNavigation { __typename objectId items { __typename objectId componentId title active action { ... on SwitchTabAction { __typename referencedTabId mediaType link } __typename } } } fragment seoFragment on SeoProperties { __typename title description } fragment socialSharingFragment on SocialSharingProperties { __typename title description image { __typename objectId templateUrl } } fragment playerFragment on MediaPlayer { __typename objectId classification { iconName __typename } maxAge image { ...imageFragment __typename } modes { __typename active adsUrl cimMediaTrackingData { channel ct programDuration programId programName se st tv __typename } mediaTrackingData { ...trackingDataFragment __typename } token { placeholder value __typename } resumePointTemplate { mediaId mediaName __typename } streamId ... on VideoPlayerMode { aspectRatio __typename } } progress { __typename completed durationInSeconds progressInSeconds } secondaryMeta { ...metaFragment __typename } sportBuffStreamId subtitle title } fragment imageFragment on Image { __typename objectId alt focusPoint { x y __typename } templateUrl } fragment metaFragment on MetaDataItem { __typename type value shortValue longValue } fragment trackingDataFragment on PageTrackingData { data perTrigger { trigger data template { id __typename } __typename } __typename } fragment bannerFragment on Banner { __typename objectId accessibilityTitle brand countdown { date __typename } richDescription { __typename text } image { objectId templateUrl alt focusPoint { x y __typename } __typename } title compactLayout textTheme backgroundColor style action { ...actionFragment __typename } actionItems { ...actionItemFragment __typename } titleArt { objectId templateUrl __typename } labelMeta { __typename type value } ... on IComponent { ...componentTrackingDataFragment __typename } } fragment actionFragment on Action { __typename ... on FavoriteAction { id favorite title __typename } ... on ListDeleteAction { listName id listId title __typename } ... on ListTileDeletedAction { listName id listId __typename } ... on LinkAction { internalTarget linkId link internalTarget externalTarget passUserIdentity zone { preferredZone isExclusive __typename } linkTokens { __typename placeholder value } __typename } ... on ClientDrivenAction { __typename clientDrivenActionType } ... on ShareAction { title url __typename } ... on SwitchTabAction { referencedTabId mediaType link __typename } ... on FinishAction { id __typename } } fragment actionItemFragment on ActionItem { __typename objectId accessibilityLabel active mode title themeOverride action { ...actionFragment __typename } icons { ...iconFragment __typename } } fragment iconFragment on Icon { __typename accessibilityLabel position ... on DesignSystemIcon { value { name __typename } activeValue { name __typename } __typename } ... on ImageIcon { value { srcSet { src format __typename } __typename } activeValue { srcSet { src format __typename } __typename } __typename } } fragment componentTrackingDataFragment on IComponent { trackingData { data perTrigger { trigger data template { id __typename } __typename } __typename } __typename } fragment brandLogosFragment on Logo { colorOnColor height mono primary type width __typename } fragment contactInfoFragment on ContactInfo { __typename title items { title description options { objectId title icons { ...iconFragment __typename } action { ... on LinkAction { link externalTarget __typename } __typename } __typename } __typename } } fragment mediaInfoFragment on MediaInfo { __typename objectId title maxAge description accessibilityTitle actionItems { ...actionItemFragment __typename } trackingData { ...trackingDataFragment __typename } image { ...imageFragment __typename } primaryMeta { ...metaFragment __typename } secondaryMeta { ...metaFragment __typename } tertiaryMeta { ...metaFragment __typename } } fragment nudgeFragment on PopUp { __typename buttons { ...actionItemFragment __typename } description image { ...imageFragment __typename } objectId title trackingData { ...trackingDataFragment __typename } } fragment errorFragment on ErrorPage { errorComponents: components { ...noContentFragment __typename } __typename } fragment noContentFragment on NoContent { __typename objectId title text backgroundImage { ...imageFragment __typename } mainImage { ...imageFragment __typename } noContentType actionItems { ...actionItemFragment __typename } }''' _MEDIA_PAGE_QUERY_OPERATION_NAME = 'RadioEpisodePage' def _real_extract(self, url): display_id = self._match_id(url) access_token, video_token = self._fetch_tokens() metadata = self.fetch_metadata(url, access_token, display_id) video_id = metadata['player']['modes'][0]['streamId'] try: streaming_info = self._call_api(video_id, 'vrtnu-web@PROD', id_token=video_token) except ExtractorError as e: if not video_token and isinstance(e.cause, HTTPError) and e.cause.status == 404: self.raise_login_required() raise formats, subtitles = self._extract_formats_and_subtitles(streaming_info, video_id) code = traverse_obj(streaming_info, ('code', {str})) if not formats and code: if code in ('CONTENT_AVAILABLE_ONLY_FOR_BE_RESIDENTS', 'CONTENT_AVAILABLE_ONLY_IN_BE', 'CONTENT_UNAVAILABLE_VIA_PROXY'): self.raise_geo_restricted(countries=['BE']) elif code in ('CONTENT_AVAILABLE_ONLY_FOR_BE_RESIDENTS_AND_EXPATS', 'CONTENT_IS_AGE_RESTRICTED', 'CONTENT_REQUIRES_AUTHENTICATION'): self.raise_login_required() else: self.raise_no_formats(f'Unable to extract formats: {code}') program_name = traverse_obj(metadata, ('player', 'modes', 0, 'cimMediaTrackingData', 'programName', {str})) match = self._search_regex(r'^(.+?)\s*-\s*(\d{2})/(\d{2})/(\d{4})\s+(\d{2}):(\d{2})$', program_name, 'program_name', fatal=False, group=list(range(7))) # reformat program name from 'ProgramName - 30/01/2025 22:00' to 'ProgramName - 2025-06-18 22_00' if len(match) == 7: show_name, day, month, year, hour, minute = match[1:] program_name = f'{show_name} - {year}-{month}-{day} {hour}_{minute}' return { 'duration': float_or_none(streaming_info.get('duration'), 1000), **self._json_ld(traverse_obj(metadata, ('ldjson', ..., {json.loads})), video_id, fatal=False), **traverse_obj(metadata, { 'timestamp': ('radioEpisode', 'startDate', {parse_iso8601}), 'title': ('title', {str}), 'channel': ('brand', {str}), 'channel_url': ('components', 0, 'actionItems', 1, 'action', 'link', {lambda x: urllib.parse.urljoin(urllib.parse.urlparse(url).scheme + '://' + urllib.parse.urlparse(url).netloc, str(x))}), 'description': ('components', 0, 'description', {str}), 'thumbnail': ('components', 0, 'image', 'templateUrl', {str}), }), 'ext': 'm4a', 'title': program_name, 'id': video_id, 'display_id': display_id, 'formats': formats, } class DagelijkseKostIE(VRTBaseIE): IE_DESC = 'dagelijksekost.een.be' _VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof', 'info_dict': { 'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa', 'ext': 'mp4', 'title': 'Hachis parmentier met witloof', 'description': 'md5:9960478392d87f63567b5b117688cdc5', 'display_id': 'hachis-parmentier-met-witloof', }, 'params': {'skip_download': 'm3u8'}, }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) video_id = self._html_search_regex( r'data-url=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video id', group='id') data = self._call_api(video_id, 'dako@prod', version='v1') formats, subtitles = self._extract_formats_and_subtitles(data, video_id) return { 'id': video_id, 'formats': formats, 'subtitles': subtitles, 'display_id': display_id, 'title': strip_or_none(get_element_by_class( 'dish-metadata__title', webpage) or self._html_search_meta('twitter:title', webpage)), 'description': clean_html(get_element_by_class( 'dish-description', webpage)) or self._html_search_meta( ['description', 'twitter:description', 'og:description'], webpage), '_old_archive_ids': [make_archive_id('Canvas', video_id)], } class Radio1BeIE(VRTBaseIE): _VALID_URL = r'https?://radio1\.be/(?:lees|luister/select)/(?P[\w/-]+)' _TESTS = [{ 'url': 'https://radio1.be/luister/select/de-ochtend/komt-n-va-volgend-jaar-op-in-wallonie', 'info_dict': { 'id': 'eb6c22e9-544f-44f4-af39-cf8cccd29e22', 'title': 'Komt N-VA volgend jaar op in Wallonië?', 'display_id': 'de-ochtend/komt-n-va-volgend-jaar-op-in-wallonie', 'description': 'md5:b374ea1c9302f38362df9dea1931468e', 'thumbnail': r're:https?://cds\.vrt\.radio/[^/#\?&]+', }, 'playlist_mincount': 1, }, { 'url': 'https://radio1.be/lees/europese-unie-wil-onmiddellijke-humanitaire-pauze-en-duurzaam-staakt-het-vuren-in-gaza?view=web', 'info_dict': { 'id': '5d47f102-dbdb-4fa0-832b-26c1870311f2', 'title': 'Europese Unie wil "onmiddellijke humanitaire pauze" en "duurzaam staakt-het-vuren" in Gaza', 'description': 'md5:1aad1fae7d39edeffde5d3e67d276b64', 'thumbnail': r're:https?://cds\.vrt\.radio/[^/#\?&]+', 'display_id': 'europese-unie-wil-onmiddellijke-humanitaire-pauze-en-duurzaam-staakt-het-vuren-in-gaza', }, 'playlist_mincount': 1, }] def _extract_video_entries(self, next_js_data, display_id): video_data = traverse_obj( next_js_data, ((None, ('paragraphs', ...)), {lambda x: x if x['mediaReference'] else None})) for data in video_data: media_reference = data['mediaReference'] formats, subtitles = self._extract_formats_and_subtitles( self._call_api(media_reference), display_id) yield { 'id': media_reference, 'formats': formats, 'subtitles': subtitles, **traverse_obj(data, { 'title': ('title', {str}), 'description': ('body', {clean_html}), }), } def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) next_js_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['item'] return self.playlist_result( self._extract_video_entries(next_js_data, display_id), **merge_dicts(traverse_obj( next_js_data, ({ 'id': ('id', {str}), 'title': ('title', {str}), 'description': (('description', 'content'), {clean_html}), }), get_all=False), { 'display_id': display_id, 'title': self._html_search_meta(['name', 'og:title', 'twitter:title'], webpage), 'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage), 'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage), }))