diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 3eea0cdf6b..6585104c13 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -636,7 +636,10 @@ FancodeVodIE, ) from .fathom import FathomIE -from .faulio import FaulioLiveIE +from .faulio import ( + FaulioIE, + FaulioLiveIE, +) from .faz import FazIE from .fc2 import ( FC2IE, diff --git a/yt_dlp/extractor/faulio.py b/yt_dlp/extractor/faulio.py index a5d5c750b4..d97cd2e002 100644 --- a/yt_dlp/extractor/faulio.py +++ b/yt_dlp/extractor/faulio.py @@ -5,15 +5,124 @@ from ..utils import js_to_json, url_or_none from ..utils.traversal import traverse_obj +_DOMAINS = ( + 'aloula.sba.sa', + 'bahry.com', + 'maraya.sba.net.ae', + 'sat7plus.org', +) -class FaulioLiveIE(InfoExtractor): - _DOMAINS = ( - 'aloula.sba.sa', - 'bahry.com', - 'maraya.sba.net.ae', - 'sat7plus.org', - ) - _VALID_URL = fr'https?://(?:{"|".join(map(re.escape, _DOMAINS))})/(?:(?:en|ar|fa)/)?live/(?P[a-zA-Z0-9-]+)' +_LANGUAGES = ( + 'ar', + 'en', + 'fa', +) + + +class FaulioBase(InfoExtractor): + def _get_headers(self, url): + origin = f'{urllib.parse.urlparse(url).scheme}://{urllib.parse.urlparse(url).netloc}' + return { + 'Referer': origin, + 'Origin': origin, + } + + def _get_api_base(self, url, video_id): + webpage = self._download_webpage(url, video_id) + config_data = self._search_json( + r'window\.__NUXT__\.config=', webpage, 'config', video_id, transform_source=js_to_json) + return config_data['public']['TRANSLATIONS_API_URL'] + + +class FaulioIE(FaulioBase): + _VALID_URL = fr'https?://(?:{"|".join(map(re.escape, _DOMAINS))})/(?:(?:{"|".join(map(re.escape, _LANGUAGES))})/)?(?:episode|media)/(?P[a-zA-Z0-9-]+)' + _TESTS = [ + { + 'url': 'https://bahry.com/en/media/1191', + 'info_dict': { + 'id': 'bahry.faulio.com_1191', + 'ext': 'mp4', + 'display_id': 'Episode-4-1191', + 'title': 'Episode 4', + 'episode': 'Episode 4', + 'description': '', + 'series': 'Wild Water', + 'season': 'Season 1', + 'season_number': 1, + 'episode_number': 4, + 'thumbnail': str, + 'duration': 1653, + 'age_limit': 0, + }, + }, + { + 'url': 'https://maraya.sba.net.ae/en/episode/127735', + 'info_dict': { + 'id': 'maraya.faulio.com_127735', + 'ext': 'mp4', + 'display_id': 'عبدالله-الهاجري---عبدالرحمن-المطروشي-127735', + 'title': 'عبدالله الهاجري - عبدالرحمن المطروشي', + 'episode': 'عبدالله الهاجري - عبدالرحمن المطروشي', + 'description': 'تابعوا رحلة الطلبة الإماراتيين المبتعثين إلى أرقى الجامعات العالمية. يستعرض البرنامج كيف تُعدّ هذه البعثات الطلاب بالمهارات والمعرفة اللازمة لمواجهة تحديات المستقبل وقيادة مسيرة الوطن.', + 'series': 'أبناؤنا في الخارج', + 'season': 'Season 3', + 'season_number': 3, + 'episode_number': 7, + 'thumbnail': str, + 'duration': 1316, + 'age_limit': 0, + }, + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + api_base = self._get_api_base(url, video_id) + + video_info = self._download_json(f'{api_base}/video/{video_id}', video_id) + player_info = self._download_json(f'{api_base}/video/{video_id}/player', video_id) + + headers = self._get_headers(url) + formats = [] + subtitles = {} + if hls_url := traverse_obj(player_info, ('settings', 'protocols', 'hls', {url_or_none})): + fmts, subs = self._extract_m3u8_formats_and_subtitles( + hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False, headers=headers) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + if mpd_url := traverse_obj(player_info, ('settings', 'protocols', 'dash', {url_or_none})): + fmts, subs = self._extract_mpd_formats_and_subtitles( + mpd_url, video_id, mpd_id='dash', fatal=False, headers=headers) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + for f in formats: + f['http_headers'] = headers + + return { + 'id': f'{urllib.parse.urlparse(api_base).hostname}_{video_id}', + **traverse_obj(traverse_obj(video_info, ('blocks', 0)), { + 'display_id': ('slug',), + 'title': ('title',), + 'episode': ('title',), + 'description': ('description',), + 'series': ('program_title',), + 'season_number': ('season_number',), + 'episode_number': ('episode',), + 'thumbnail': ('image',), + 'duration': ('duration', 'total'), + 'age_limit': ('age_rating',), + }), + 'formats': formats, + 'subtitles': subtitles, + 'is_live': False, + } + + +class FaulioLiveIE(FaulioBase): + _VALID_URL = fr'https?://(?:{"|".join(map(re.escape, _DOMAINS))})/(?:(?:{"|".join(map(re.escape, _LANGUAGES))})/)?live/(?P[a-zA-Z0-9-]+)' _TESTS = [{ 'url': 'https://aloula.sba.sa/live/saudiatv', 'info_dict': { @@ -69,30 +178,30 @@ class FaulioLiveIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - config_data = self._search_json( - r'window\.__NUXT__\.config=', webpage, 'config', video_id, transform_source=js_to_json) - api_base = config_data['public']['TRANSLATIONS_API_URL'] + api_base = self._get_api_base(url, video_id) channel = traverse_obj( self._download_json(f'{api_base}/channels', video_id), (lambda k, v: v['url'] == video_id, any)) + headers = self._get_headers(url) formats = [] subtitles = {} if hls_url := traverse_obj(channel, ('streams', 'hls', {url_or_none})): fmts, subs = self._extract_m3u8_formats_and_subtitles( - hls_url, video_id, 'mp4', m3u8_id='hls', live=True, fatal=False) + hls_url, video_id, 'mp4', m3u8_id='hls', live=True, fatal=False, headers=headers) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) if mpd_url := traverse_obj(channel, ('streams', 'mpd', {url_or_none})): fmts, subs = self._extract_mpd_formats_and_subtitles( - mpd_url, video_id, mpd_id='dash', fatal=False) + mpd_url, video_id, mpd_id='dash', fatal=False, headers=headers) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) + for f in formats: + f['http_headers'] = headers + return { 'id': f'{urllib.parse.urlparse(api_base).hostname}_{video_id}', **traverse_obj(channel, {