From 25bd4b42604a4fddb4c53f80ee3f9a5cc5619f8c Mon Sep 17 00:00:00 2001 From: CasperMcFadden95 <145611964+CasperMcFadden95@users.noreply.github.com> Date: Sat, 2 Aug 2025 09:56:30 +0000 Subject: [PATCH 1/6] Add FaulioIE --- yt_dlp/extractor/_extractors.py | 5 +- yt_dlp/extractor/faulio.py | 139 ++++++++++++++++++++++++++++---- 2 files changed, 128 insertions(+), 16 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 3eea0cdf6b..6585104c13 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -636,7 +636,10 @@ FancodeVodIE, ) from .fathom import FathomIE -from .faulio import FaulioLiveIE +from .faulio import ( + FaulioIE, + FaulioLiveIE, +) from .faz import FazIE from .fc2 import ( FC2IE, diff --git a/yt_dlp/extractor/faulio.py b/yt_dlp/extractor/faulio.py index a5d5c750b4..d97cd2e002 100644 --- a/yt_dlp/extractor/faulio.py +++ b/yt_dlp/extractor/faulio.py @@ -5,15 +5,124 @@ from ..utils import js_to_json, url_or_none from ..utils.traversal import traverse_obj +_DOMAINS = ( + 'aloula.sba.sa', + 'bahry.com', + 'maraya.sba.net.ae', + 'sat7plus.org', +) -class FaulioLiveIE(InfoExtractor): - _DOMAINS = ( - 'aloula.sba.sa', - 'bahry.com', - 'maraya.sba.net.ae', - 'sat7plus.org', - ) - _VALID_URL = fr'https?://(?:{"|".join(map(re.escape, _DOMAINS))})/(?:(?:en|ar|fa)/)?live/(?P[a-zA-Z0-9-]+)' +_LANGUAGES = ( + 'ar', + 'en', + 'fa', +) + + +class FaulioBase(InfoExtractor): + def _get_headers(self, url): + origin = f'{urllib.parse.urlparse(url).scheme}://{urllib.parse.urlparse(url).netloc}' + return { + 'Referer': origin, + 'Origin': origin, + } + + def _get_api_base(self, url, video_id): + webpage = self._download_webpage(url, video_id) + config_data = self._search_json( + r'window\.__NUXT__\.config=', webpage, 'config', video_id, transform_source=js_to_json) + return config_data['public']['TRANSLATIONS_API_URL'] + + +class FaulioIE(FaulioBase): + _VALID_URL = fr'https?://(?:{"|".join(map(re.escape, _DOMAINS))})/(?:(?:{"|".join(map(re.escape, _LANGUAGES))})/)?(?:episode|media)/(?P[a-zA-Z0-9-]+)' + _TESTS = [ + { + 'url': 'https://bahry.com/en/media/1191', + 'info_dict': { + 'id': 'bahry.faulio.com_1191', + 'ext': 'mp4', + 'display_id': 'Episode-4-1191', + 'title': 'Episode 4', + 'episode': 'Episode 4', + 'description': '', + 'series': 'Wild Water', + 'season': 'Season 1', + 'season_number': 1, + 'episode_number': 4, + 'thumbnail': str, + 'duration': 1653, + 'age_limit': 0, + }, + }, + { + 'url': 'https://maraya.sba.net.ae/en/episode/127735', + 'info_dict': { + 'id': 'maraya.faulio.com_127735', + 'ext': 'mp4', + 'display_id': 'عبدالله-الهاجري---عبدالرحمن-المطروشي-127735', + 'title': 'عبدالله الهاجري - عبدالرحمن المطروشي', + 'episode': 'عبدالله الهاجري - عبدالرحمن المطروشي', + 'description': 'تابعوا رحلة الطلبة الإماراتيين المبتعثين إلى أرقى الجامعات العالمية. يستعرض البرنامج كيف تُعدّ هذه البعثات الطلاب بالمهارات والمعرفة اللازمة لمواجهة تحديات المستقبل وقيادة مسيرة الوطن.', + 'series': 'أبناؤنا في الخارج', + 'season': 'Season 3', + 'season_number': 3, + 'episode_number': 7, + 'thumbnail': str, + 'duration': 1316, + 'age_limit': 0, + }, + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + api_base = self._get_api_base(url, video_id) + + video_info = self._download_json(f'{api_base}/video/{video_id}', video_id) + player_info = self._download_json(f'{api_base}/video/{video_id}/player', video_id) + + headers = self._get_headers(url) + formats = [] + subtitles = {} + if hls_url := traverse_obj(player_info, ('settings', 'protocols', 'hls', {url_or_none})): + fmts, subs = self._extract_m3u8_formats_and_subtitles( + hls_url, video_id, 'mp4', m3u8_id='hls', fatal=False, headers=headers) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + if mpd_url := traverse_obj(player_info, ('settings', 'protocols', 'dash', {url_or_none})): + fmts, subs = self._extract_mpd_formats_and_subtitles( + mpd_url, video_id, mpd_id='dash', fatal=False, headers=headers) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + for f in formats: + f['http_headers'] = headers + + return { + 'id': f'{urllib.parse.urlparse(api_base).hostname}_{video_id}', + **traverse_obj(traverse_obj(video_info, ('blocks', 0)), { + 'display_id': ('slug',), + 'title': ('title',), + 'episode': ('title',), + 'description': ('description',), + 'series': ('program_title',), + 'season_number': ('season_number',), + 'episode_number': ('episode',), + 'thumbnail': ('image',), + 'duration': ('duration', 'total'), + 'age_limit': ('age_rating',), + }), + 'formats': formats, + 'subtitles': subtitles, + 'is_live': False, + } + + +class FaulioLiveIE(FaulioBase): + _VALID_URL = fr'https?://(?:{"|".join(map(re.escape, _DOMAINS))})/(?:(?:{"|".join(map(re.escape, _LANGUAGES))})/)?live/(?P[a-zA-Z0-9-]+)' _TESTS = [{ 'url': 'https://aloula.sba.sa/live/saudiatv', 'info_dict': { @@ -69,30 +178,30 @@ class FaulioLiveIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - config_data = self._search_json( - r'window\.__NUXT__\.config=', webpage, 'config', video_id, transform_source=js_to_json) - api_base = config_data['public']['TRANSLATIONS_API_URL'] + api_base = self._get_api_base(url, video_id) channel = traverse_obj( self._download_json(f'{api_base}/channels', video_id), (lambda k, v: v['url'] == video_id, any)) + headers = self._get_headers(url) formats = [] subtitles = {} if hls_url := traverse_obj(channel, ('streams', 'hls', {url_or_none})): fmts, subs = self._extract_m3u8_formats_and_subtitles( - hls_url, video_id, 'mp4', m3u8_id='hls', live=True, fatal=False) + hls_url, video_id, 'mp4', m3u8_id='hls', live=True, fatal=False, headers=headers) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) if mpd_url := traverse_obj(channel, ('streams', 'mpd', {url_or_none})): fmts, subs = self._extract_mpd_formats_and_subtitles( - mpd_url, video_id, mpd_id='dash', fatal=False) + mpd_url, video_id, mpd_id='dash', fatal=False, headers=headers) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) + for f in formats: + f['http_headers'] = headers + return { 'id': f'{urllib.parse.urlparse(api_base).hostname}_{video_id}', **traverse_obj(channel, { From 599459fdd30c8859292726cb30ee397c2515eae0 Mon Sep 17 00:00:00 2001 From: CasperMcFadden95 <145611964+CasperMcFadden95@users.noreply.github.com> Date: Sat, 2 Aug 2025 11:54:42 +0000 Subject: [PATCH 2/6] Apply suggestions from code review Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/faulio.py | 69 +++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 39 deletions(-) diff --git a/yt_dlp/extractor/faulio.py b/yt_dlp/extractor/faulio.py index d97cd2e002..afa73851dc 100644 --- a/yt_dlp/extractor/faulio.py +++ b/yt_dlp/extractor/faulio.py @@ -5,26 +5,21 @@ from ..utils import js_to_json, url_or_none from ..utils.traversal import traverse_obj -_DOMAINS = ( - 'aloula.sba.sa', - 'bahry.com', - 'maraya.sba.net.ae', - 'sat7plus.org', -) +class FaulioBaseIE(InfoExtractor): + _DOMAINS = ( + 'aloula.sba.sa', + 'bahry.com', + 'maraya.sba.net.ae', + 'sat7plus.org', + ) + _LANGUAGES = ('ar', 'en', 'fa') + _BASE_URL_RE = fr'https?://(?:{"|".join(map(re.escape, _DOMAINS))})/(?:(?:{"|".join(_LANGUAGES)})/)?' -_LANGUAGES = ( - 'ar', - 'en', - 'fa', -) - - -class FaulioBase(InfoExtractor): def _get_headers(self, url): - origin = f'{urllib.parse.urlparse(url).scheme}://{urllib.parse.urlparse(url).netloc}' + parsed_url = urllib.parse.urlparse(url) return { - 'Referer': origin, - 'Origin': origin, + 'Referer': url, + 'Origin': f'{parsed_url.scheme}://{parsed_url.hostname}' } def _get_api_base(self, url, video_id): @@ -34,10 +29,9 @@ def _get_api_base(self, url, video_id): return config_data['public']['TRANSLATIONS_API_URL'] -class FaulioIE(FaulioBase): - _VALID_URL = fr'https?://(?:{"|".join(map(re.escape, _DOMAINS))})/(?:(?:{"|".join(map(re.escape, _LANGUAGES))})/)?(?:episode|media)/(?P[a-zA-Z0-9-]+)' - _TESTS = [ - { +class FaulioIE(FaulioBaseIE): + _VALID_URL = fr'{FaulioBaseIE._BASE_URL_RE}(?:episode|media)/(?P[a-zA-Z0-9-]+)' + _TESTS = [{ 'url': 'https://bahry.com/en/media/1191', 'info_dict': { 'id': 'bahry.faulio.com_1191', @@ -54,8 +48,7 @@ class FaulioIE(FaulioBase): 'duration': 1653, 'age_limit': 0, }, - }, - { + }, { 'url': 'https://maraya.sba.net.ae/en/episode/127735', 'info_dict': { 'id': 'maraya.faulio.com_127735', @@ -71,16 +64,15 @@ class FaulioIE(FaulioBase): 'thumbnail': str, 'duration': 1316, 'age_limit': 0, - }, }, - ] + }] def _real_extract(self, url): video_id = self._match_id(url) api_base = self._get_api_base(url, video_id) - video_info = self._download_json(f'{api_base}/video/{video_id}', video_id) + video_info = self._download_json(f'{api_base}/video/{video_id}', video_id, fatal=False) player_info = self._download_json(f'{api_base}/video/{video_id}/player', video_id) headers = self._get_headers(url) @@ -104,25 +96,24 @@ def _real_extract(self, url): return { 'id': f'{urllib.parse.urlparse(api_base).hostname}_{video_id}', **traverse_obj(traverse_obj(video_info, ('blocks', 0)), { - 'display_id': ('slug',), - 'title': ('title',), - 'episode': ('title',), - 'description': ('description',), - 'series': ('program_title',), - 'season_number': ('season_number',), - 'episode_number': ('episode',), - 'thumbnail': ('image',), - 'duration': ('duration', 'total'), - 'age_limit': ('age_rating',), + 'display_id': ('slug', {str}), + 'title': ('title', {str}), + 'episode': ('title', {str}), + 'description': ('description', {str}), + 'series': ('program_title', {str}), + 'season_number': ('season_number', {int_or_none}), + 'episode_number': ('episode', {int_or_none}), + 'thumbnail': ('image', {url_or_none}), + 'duration': ('duration', 'total', {int_or_none}), + 'age_limit': ('age_rating', {int_or_none}), }), 'formats': formats, 'subtitles': subtitles, - 'is_live': False, } -class FaulioLiveIE(FaulioBase): - _VALID_URL = fr'https?://(?:{"|".join(map(re.escape, _DOMAINS))})/(?:(?:{"|".join(map(re.escape, _LANGUAGES))})/)?live/(?P[a-zA-Z0-9-]+)' +class FaulioLiveIE(FaulioBaseIE): + _VALID_URL = fr'{FaulioBaseIE._BASE_URL_RE}live/(?P[a-zA-Z0-9-]+)' _TESTS = [{ 'url': 'https://aloula.sba.sa/live/saudiatv', 'info_dict': { From 182417a6251d861529eb6c955a3531cffab71260 Mon Sep 17 00:00:00 2001 From: CasperMcFadden95 <145611964+CasperMcFadden95@users.noreply.github.com> Date: Sat, 2 Aug 2025 12:13:22 +0000 Subject: [PATCH 3/6] Fix build and improve tests --- yt_dlp/extractor/faulio.py | 83 ++++++++++++++++++++++++++++---------- 1 file changed, 61 insertions(+), 22 deletions(-) diff --git a/yt_dlp/extractor/faulio.py b/yt_dlp/extractor/faulio.py index afa73851dc..6a7844f0c3 100644 --- a/yt_dlp/extractor/faulio.py +++ b/yt_dlp/extractor/faulio.py @@ -2,9 +2,10 @@ import urllib.parse from .common import InfoExtractor -from ..utils import js_to_json, url_or_none +from ..utils import int_or_none, js_to_json, url_or_none from ..utils.traversal import traverse_obj + class FaulioBaseIE(InfoExtractor): _DOMAINS = ( 'aloula.sba.sa', @@ -19,7 +20,7 @@ def _get_headers(self, url): parsed_url = urllib.parse.urlparse(url) return { 'Referer': url, - 'Origin': f'{parsed_url.scheme}://{parsed_url.hostname}' + 'Origin': f'{parsed_url.scheme}://{parsed_url.hostname}', } def _get_api_base(self, url, video_id): @@ -32,39 +33,77 @@ def _get_api_base(self, url, video_id): class FaulioIE(FaulioBaseIE): _VALID_URL = fr'{FaulioBaseIE._BASE_URL_RE}(?:episode|media)/(?P[a-zA-Z0-9-]+)' _TESTS = [{ - 'url': 'https://bahry.com/en/media/1191', - 'info_dict': { - 'id': 'bahry.faulio.com_1191', - 'ext': 'mp4', - 'display_id': 'Episode-4-1191', - 'title': 'Episode 4', - 'episode': 'Episode 4', - 'description': '', - 'series': 'Wild Water', - 'season': 'Season 1', - 'season_number': 1, - 'episode_number': 4, - 'thumbnail': str, - 'duration': 1653, - 'age_limit': 0, - }, + 'url': 'https://aloula.sba.sa/en/episode/29102', + 'info_dict': { + 'id': 'aloula.faulio.com_29102', + 'ext': 'mp4', + 'display_id': 'هذا-مكانك-03-004-v-29102', + 'title': 'الحلقة 4', + 'episode': 'الحلقة 4', + 'description': '', + 'series': 'هذا مكانك', + 'season': 'Season 3', + 'season_number': 3, + 'episode_number': 4, + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 4855, + 'age_limit': 3, + }, }, { - 'url': 'https://maraya.sba.net.ae/en/episode/127735', + 'url': 'https://bahry.com/en/media/1191', + 'info_dict': { + 'id': 'bahry.faulio.com_1191', + 'ext': 'mp4', + 'display_id': 'Episode-4-1191', + 'title': 'Episode 4', + 'episode': 'Episode 4', + 'description': '', + 'series': 'Wild Water', + 'season': 'Season 1', + 'season_number': 1, + 'episode_number': 4, + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1653, + 'age_limit': 0, + }, + }, + { + 'url': 'https://maraya.sba.net.ae/episode/127735', 'info_dict': { 'id': 'maraya.faulio.com_127735', 'ext': 'mp4', 'display_id': 'عبدالله-الهاجري---عبدالرحمن-المطروشي-127735', 'title': 'عبدالله الهاجري - عبدالرحمن المطروشي', 'episode': 'عبدالله الهاجري - عبدالرحمن المطروشي', - 'description': 'تابعوا رحلة الطلبة الإماراتيين المبتعثين إلى أرقى الجامعات العالمية. يستعرض البرنامج كيف تُعدّ هذه البعثات الطلاب بالمهارات والمعرفة اللازمة لمواجهة تحديات المستقبل وقيادة مسيرة الوطن.', + 'description': 'md5:53de01face66d3d6303221e5a49388a0', 'series': 'أبناؤنا في الخارج', 'season': 'Season 3', 'season_number': 3, 'episode_number': 7, - 'thumbnail': str, + 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 1316, 'age_limit': 0, - }, + }, + }, { + 'url': 'https://sat7plus.org/episode/18165', + 'info_dict': { + 'id': 'sat7.faulio.com_18165', + 'ext': 'mp4', + 'display_id': 'ep-13-ADHD-18165', + 'title': 'ADHD and creativity', + 'episode': 'ADHD and creativity', + 'description': '', + 'series': 'ADHD Podcast', + 'season': 'Season 1', + 'season_number': 1, + 'episode_number': 13, + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2492, + 'age_limit': 0, + }, + }, { + 'url': 'https://aloula.sba.sa/en/episode/0', + 'only_matching': True, }] def _real_extract(self, url): From f0b7ec707aa294236b958d30394bb282e58f1a49 Mon Sep 17 00:00:00 2001 From: CasperMcFadden95 <145611964+CasperMcFadden95@users.noreply.github.com> Date: Sat, 2 Aug 2025 12:14:15 +0000 Subject: [PATCH 4/6] hatch fmt --- yt_dlp/extractor/faulio.py | 71 +++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/yt_dlp/extractor/faulio.py b/yt_dlp/extractor/faulio.py index 6a7844f0c3..4f0e910984 100644 --- a/yt_dlp/extractor/faulio.py +++ b/yt_dlp/extractor/faulio.py @@ -66,44 +66,43 @@ class FaulioIE(FaulioBaseIE): 'duration': 1653, 'age_limit': 0, }, - }, - { - 'url': 'https://maraya.sba.net.ae/episode/127735', - 'info_dict': { - 'id': 'maraya.faulio.com_127735', - 'ext': 'mp4', - 'display_id': 'عبدالله-الهاجري---عبدالرحمن-المطروشي-127735', - 'title': 'عبدالله الهاجري - عبدالرحمن المطروشي', - 'episode': 'عبدالله الهاجري - عبدالرحمن المطروشي', - 'description': 'md5:53de01face66d3d6303221e5a49388a0', - 'series': 'أبناؤنا في الخارج', - 'season': 'Season 3', - 'season_number': 3, - 'episode_number': 7, - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 1316, - 'age_limit': 0, - }, }, { - 'url': 'https://sat7plus.org/episode/18165', - 'info_dict': { - 'id': 'sat7.faulio.com_18165', - 'ext': 'mp4', - 'display_id': 'ep-13-ADHD-18165', - 'title': 'ADHD and creativity', - 'episode': 'ADHD and creativity', - 'description': '', - 'series': 'ADHD Podcast', - 'season': 'Season 1', - 'season_number': 1, - 'episode_number': 13, - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 2492, - 'age_limit': 0, - }, + 'url': 'https://maraya.sba.net.ae/episode/127735', + 'info_dict': { + 'id': 'maraya.faulio.com_127735', + 'ext': 'mp4', + 'display_id': 'عبدالله-الهاجري---عبدالرحمن-المطروشي-127735', + 'title': 'عبدالله الهاجري - عبدالرحمن المطروشي', + 'episode': 'عبدالله الهاجري - عبدالرحمن المطروشي', + 'description': 'md5:53de01face66d3d6303221e5a49388a0', + 'series': 'أبناؤنا في الخارج', + 'season': 'Season 3', + 'season_number': 3, + 'episode_number': 7, + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1316, + 'age_limit': 0, + }, }, { - 'url': 'https://aloula.sba.sa/en/episode/0', - 'only_matching': True, + 'url': 'https://sat7plus.org/episode/18165', + 'info_dict': { + 'id': 'sat7.faulio.com_18165', + 'ext': 'mp4', + 'display_id': 'ep-13-ADHD-18165', + 'title': 'ADHD and creativity', + 'episode': 'ADHD and creativity', + 'description': '', + 'series': 'ADHD Podcast', + 'season': 'Season 1', + 'season_number': 1, + 'episode_number': 13, + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2492, + 'age_limit': 0, + }, + }, { + 'url': 'https://aloula.sba.sa/en/episode/0', + 'only_matching': True, }] def _real_extract(self, url): From f1be5d6967d19330a851c981cbf9c47795c5dd45 Mon Sep 17 00:00:00 2001 From: CasperMcFadden95 <145611964+CasperMcFadden95@users.noreply.github.com> Date: Sat, 2 Aug 2025 20:57:48 +0000 Subject: [PATCH 5/6] Set http_headers --- yt_dlp/extractor/faulio.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/faulio.py b/yt_dlp/extractor/faulio.py index 4f0e910984..e5e171c0e9 100644 --- a/yt_dlp/extractor/faulio.py +++ b/yt_dlp/extractor/faulio.py @@ -128,9 +128,6 @@ def _real_extract(self, url): formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) - for f in formats: - f['http_headers'] = headers - return { 'id': f'{urllib.parse.urlparse(api_base).hostname}_{video_id}', **traverse_obj(traverse_obj(video_info, ('blocks', 0)), { @@ -147,6 +144,7 @@ def _real_extract(self, url): }), 'formats': formats, 'subtitles': subtitles, + 'http_headers': headers, } @@ -228,9 +226,6 @@ def _real_extract(self, url): formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) - for f in formats: - f['http_headers'] = headers - return { 'id': f'{urllib.parse.urlparse(api_base).hostname}_{video_id}', **traverse_obj(channel, { @@ -239,5 +234,6 @@ def _real_extract(self, url): }), 'formats': formats, 'subtitles': subtitles, + 'http_headers': headers, 'is_live': True, } From 2e3b3633d2763f6e3010d692679207b558fe0011 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 2 Aug 2025 21:45:03 +0000 Subject: [PATCH 6/6] reduce blank lines --- yt_dlp/extractor/faulio.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/yt_dlp/extractor/faulio.py b/yt_dlp/extractor/faulio.py index e5e171c0e9..9c6c13e0e0 100644 --- a/yt_dlp/extractor/faulio.py +++ b/yt_dlp/extractor/faulio.py @@ -107,9 +107,7 @@ class FaulioIE(FaulioBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - api_base = self._get_api_base(url, video_id) - video_info = self._download_json(f'{api_base}/video/{video_id}', video_id, fatal=False) player_info = self._download_json(f'{api_base}/video/{video_id}/player', video_id)