mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[extractor/bibeltv] Fix extraction, support live streams and series (#6505)
Authored by: flashdagger
This commit is contained in:
		| @@ -1,27 +1,197 @@ | ||||
| from functools import partial | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     clean_html, | ||||
|     determine_ext, | ||||
|     format_field, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     orderedSet, | ||||
|     parse_iso8601, | ||||
|     traverse_obj, | ||||
|     url_or_none, | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| class BibelTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?:crn/)?(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.bibeltv.de/mediathek/videos/329703-sprachkurs-in-malaiisch', | ||||
|         'md5': '252f908192d611de038b8504b08bf97f', | ||||
|         'info_dict': { | ||||
|             'id': 'ref:329703', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Sprachkurs in Malaiisch', | ||||
|             'description': 'md5:3e9f197d29ee164714e67351cf737dfe', | ||||
|             'timestamp': 1608316701, | ||||
|             'uploader_id': '5840105145001', | ||||
|             'upload_date': '20201218', | ||||
| class BibelTVBaseIE(InfoExtractor): | ||||
|     _GEO_COUNTRIES = ['AT', 'CH', 'DE'] | ||||
|     _GEO_BYPASS = False | ||||
| 
 | ||||
|     API_URL = 'https://www.bibeltv.de/mediathek/api' | ||||
|     AUTH_TOKEN = 'j88bRXY8DsEqJ9xmTdWhrByVi5Hm' | ||||
| 
 | ||||
|     def _extract_formats_and_subtitles(self, data, crn_id, *, is_live=False): | ||||
|         formats = [] | ||||
|         subtitles = {} | ||||
|         for media_url in traverse_obj(data, (..., 'src', {url_or_none})): | ||||
|             media_ext = determine_ext(media_url) | ||||
|             if media_ext == 'm3u8': | ||||
|                 m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles( | ||||
|                     media_url, crn_id, live=is_live) | ||||
|                 formats.extend(m3u8_formats) | ||||
|                 subtitles.update(m3u8_subs) | ||||
|             elif media_ext == 'mpd': | ||||
|                 mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(media_url, crn_id) | ||||
|                 formats.extend(mpd_formats) | ||||
|                 subtitles.update(mpd_subs) | ||||
|             elif media_ext == 'mp4': | ||||
|                 formats.append({'url': media_url}) | ||||
|             else: | ||||
|                 self.report_warning(f'Unknown format {media_ext!r}') | ||||
| 
 | ||||
|         return formats, subtitles | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def _extract_base_info(data): | ||||
|         return { | ||||
|             'id': data['crn'], | ||||
|             **traverse_obj(data, { | ||||
|                 'title': 'title', | ||||
|                 'description': 'description', | ||||
|                 'duration': ('duration', {partial(int_or_none, scale=1000)}), | ||||
|                 'timestamp': ('schedulingStart', {parse_iso8601}), | ||||
|                 'season_number': 'seasonNumber', | ||||
|                 'episode_number': 'episodeNumber', | ||||
|                 'view_count': 'viewCount', | ||||
|                 'like_count': 'likeCount', | ||||
|             }), | ||||
|             'thumbnails': orderedSet(traverse_obj(data, ('images', ..., { | ||||
|                 'url': ('url', {url_or_none}), | ||||
|             }))), | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://www.bibeltv.de/mediathek/videos/crn/326374', | ||||
|         'only_matching': True, | ||||
| 
 | ||||
|     def _extract_url_info(self, data): | ||||
|         return { | ||||
|             '_type': 'url', | ||||
|             'url': format_field(data, 'slug', 'https://www.bibeltv.de/mediathek/videos/%s'), | ||||
|             **self._extract_base_info(data), | ||||
|         } | ||||
| 
 | ||||
|     def _extract_video_info(self, data): | ||||
|         crn_id = data['crn'] | ||||
| 
 | ||||
|         if data.get('drm'): | ||||
|             self.report_drm(crn_id) | ||||
| 
 | ||||
|         json_data = self._download_json( | ||||
|             format_field(data, 'id', f'{self.API_URL}/video/%s'), crn_id, | ||||
|             headers={'Authorization': self.AUTH_TOKEN}, fatal=False, | ||||
|             errnote='No formats available') or {} | ||||
| 
 | ||||
|         formats, subtitles = self._extract_formats_and_subtitles( | ||||
|             traverse_obj(json_data, ('video', 'videoUrls', ...)), crn_id) | ||||
| 
 | ||||
|         return { | ||||
|             '_type': 'video', | ||||
|             **self._extract_base_info(data), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
| class BibelTVVideoIE(BibelTVBaseIE): | ||||
|     IE_DESC = 'BibelTV single video' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?P<id>\d+)[\w-]+' | ||||
|     IE_NAME = 'bibeltv:video' | ||||
| 
 | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.bibeltv.de/mediathek/videos/344436-alte-wege', | ||||
|         'md5': 'ec1c07efe54353780512e8a4103b612e', | ||||
|         'info_dict': { | ||||
|             'id': '344436', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Alte Wege', | ||||
|             'description': 'md5:2f4eb7294c9797a47b8fd13cccca22e9', | ||||
|             'timestamp': 1677877071, | ||||
|             'duration': 150.0, | ||||
|             'upload_date': '20230303', | ||||
|             'thumbnail': r're:https://bibeltv\.imgix\.net/[\w-]+\.jpg', | ||||
|             'episode': 'Episode 1', | ||||
|             'episode_number': 1, | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|         }, | ||||
|         'params': { | ||||
|             'format': '6', | ||||
|         }, | ||||
|     }] | ||||
|     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5840105145001/default_default/index.html?videoId=ref:%s' | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         crn_id = self._match_id(url) | ||||
|         return self.url_result( | ||||
|             self.BRIGHTCOVE_URL_TEMPLATE % crn_id, 'BrightcoveNew') | ||||
|         video_data = traverse_obj( | ||||
|             self._search_nextjs_data(self._download_webpage(url, crn_id), crn_id), | ||||
|             ('props', 'pageProps', 'videoPageData', 'videos', 0, {dict})) | ||||
|         if not video_data: | ||||
|             raise ExtractorError('Missing video data.') | ||||
| 
 | ||||
|         return self._extract_video_info(video_data) | ||||
| 
 | ||||
| 
 | ||||
| class BibelTVSeriesIE(BibelTVBaseIE): | ||||
|     IE_DESC = 'BibelTV series playlist' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/serien/(?P<id>\d+)[\w-]+' | ||||
|     IE_NAME = 'bibeltv:series' | ||||
| 
 | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.bibeltv.de/mediathek/serien/333485-ein-wunder-fuer-jeden-tag', | ||||
|         'playlist_mincount': 400, | ||||
|         'info_dict': { | ||||
|             'id': '333485', | ||||
|             'title': 'Ein Wunder für jeden Tag', | ||||
|             'description': 'Tägliche Kurzandacht mit Déborah Rosenkranz.', | ||||
|         }, | ||||
|     }] | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         crn_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, crn_id) | ||||
|         nextjs_data = self._search_nextjs_data(webpage, crn_id) | ||||
|         series_data = traverse_obj(nextjs_data, ('props', 'pageProps', 'seriePageData', {dict})) | ||||
|         if not series_data: | ||||
|             raise ExtractorError('Missing series data.') | ||||
| 
 | ||||
|         return self.playlist_result( | ||||
|             traverse_obj(series_data, ('videos', ..., {dict}, {self._extract_url_info})), | ||||
|             crn_id, series_data.get('title'), clean_html(series_data.get('description'))) | ||||
| 
 | ||||
| 
 | ||||
| class BibelTVLiveIE(BibelTVBaseIE): | ||||
|     IE_DESC = 'BibelTV live program' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/livestreams/(?P<id>[\w-]+)' | ||||
|     IE_NAME = 'bibeltv:live' | ||||
| 
 | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.bibeltv.de/livestreams/bibeltv/', | ||||
|         'info_dict': { | ||||
|             'id': 'bibeltv', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:Bibel TV', | ||||
|             'live_status': 'is_live', | ||||
|             'thumbnail': 'https://streampreview.bibeltv.de/bibeltv.webp', | ||||
|         }, | ||||
|         'params': {'skip_download': 'm3u8'}, | ||||
|     }, { | ||||
|         'url': 'https://www.bibeltv.de/livestreams/impuls/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         stream_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, stream_id) | ||||
|         stream_data = self._search_json( | ||||
|             r'\\"video\\":', webpage, 'bibeltvData', stream_id, | ||||
|             transform_source=lambda jstring: js_to_json(jstring.replace('\\"', '"'))) | ||||
| 
 | ||||
|         formats, subtitles = self._extract_formats_and_subtitles( | ||||
|             traverse_obj(stream_data, ('src', ...)), stream_id, is_live=True) | ||||
| 
 | ||||
|         return { | ||||
|             'id': stream_id, | ||||
|             'title': stream_data.get('title'), | ||||
|             'thumbnail': stream_data.get('poster'), | ||||
|             'is_live': True, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 MMM
					MMM