mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[wdr] Add WDRBaseIE, for Sportschau (#9799)
This commit is contained in:
		| @@ -15,7 +15,87 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class WDRIE(InfoExtractor): | ||||
| class WDRBaseIE(InfoExtractor): | ||||
|     def _extract_wdr_video(self, webpage, display_id): | ||||
|         # for wdr.de the data-extension is in a tag with the class "mediaLink" | ||||
|         # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn" | ||||
|         # for wdrmaus its in a link to the page in a multiline "videoLink"-tag | ||||
|         json_metadata = self._html_search_regex( | ||||
|             r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', | ||||
|             webpage, 'media link', default=None, flags=re.MULTILINE) | ||||
|  | ||||
|         if not json_metadata: | ||||
|             return | ||||
|  | ||||
|         media_link_obj = self._parse_json(json_metadata, display_id, | ||||
|                                           transform_source=js_to_json) | ||||
|         jsonp_url = media_link_obj['mediaObj']['url'] | ||||
|  | ||||
|         metadata = self._download_json( | ||||
|             jsonp_url, 'metadata', transform_source=strip_jsonp) | ||||
|  | ||||
|         metadata_tracker_data = metadata['trackerData'] | ||||
|         metadata_media_resource = metadata['mediaResource'] | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         # check if the metadata contains a direct URL to a file | ||||
|         for kind, media_resource in metadata_media_resource.items(): | ||||
|             if kind not in ('dflt', 'alt'): | ||||
|                 continue | ||||
|  | ||||
|             for tag_name, medium_url in media_resource.items(): | ||||
|                 if tag_name not in ('videoURL', 'audioURL'): | ||||
|                     continue | ||||
|  | ||||
|                 ext = determine_ext(medium_url) | ||||
|                 if ext == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         medium_url, display_id, 'mp4', 'm3u8_native', | ||||
|                         m3u8_id='hls')) | ||||
|                 elif ext == 'f4m': | ||||
|                     manifest_url = update_url_query( | ||||
|                         medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'}) | ||||
|                     formats.extend(self._extract_f4m_formats( | ||||
|                         manifest_url, display_id, f4m_id='hds', fatal=False)) | ||||
|                 elif ext == 'smil': | ||||
|                     formats.extend(self._extract_smil_formats( | ||||
|                         medium_url, 'stream', fatal=False)) | ||||
|                 else: | ||||
|                     a_format = { | ||||
|                         'url': medium_url | ||||
|                     } | ||||
|                     if ext == 'unknown_video': | ||||
|                         urlh = self._request_webpage( | ||||
|                             medium_url, display_id, note='Determining extension') | ||||
|                         ext = urlhandle_detect_ext(urlh) | ||||
|                         a_format['ext'] = ext | ||||
|                     formats.append(a_format) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         caption_url = metadata_media_resource.get('captionURL') | ||||
|         if caption_url: | ||||
|             subtitles['de'] = [{ | ||||
|                 'url': caption_url, | ||||
|                 'ext': 'ttml', | ||||
|             }] | ||||
|  | ||||
|         title = metadata_tracker_data['trackerClipTitle'] | ||||
|  | ||||
|         return { | ||||
|             'id': metadata_tracker_data.get('trackerClipId', display_id), | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'alt_title': metadata_tracker_data.get('trackerClipSubcategory'), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'upload_date': unified_strdate(metadata_tracker_data.get('trackerClipAirTime')), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class WDRIE(WDRBaseIE): | ||||
|     _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5' | ||||
|     _PAGE_REGEX = r'/(?:mediathek/)?[^/]+/(?P<type>[^/]+)/(?P<display_id>.+)\.html' | ||||
|     _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL | ||||
| @@ -91,10 +171,10 @@ class WDRIE(InfoExtractor): | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5', | ||||
|             # HDS download, MD5 is unstable | ||||
|             'md5': '803138901f6368ee497b4d195bb164f2', | ||||
|             'info_dict': { | ||||
|                 'id': 'mdb-186083', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20130919', | ||||
|                 'title': 'Sachgeschichte - Achterbahn ', | ||||
|                 'description': '- Die Sendung mit der Maus -', | ||||
| @@ -120,14 +200,9 @@ class WDRIE(InfoExtractor): | ||||
|         display_id = mobj.group('display_id') | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         # for wdr.de the data-extension is in a tag with the class "mediaLink" | ||||
|         # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn" | ||||
|         # for wdrmaus its in a link to the page in a multiline "videoLink"-tag | ||||
|         json_metadata = self._html_search_regex( | ||||
|             r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', | ||||
|             webpage, 'media link', default=None, flags=re.MULTILINE) | ||||
|         info_dict = self._extract_wdr_video(webpage, display_id) | ||||
|  | ||||
|         if not json_metadata: | ||||
|         if not info_dict: | ||||
|             entries = [ | ||||
|                 self.url_result(page_url + href[0], 'WDR') | ||||
|                 for href in re.findall( | ||||
| @@ -140,86 +215,22 @@ class WDRIE(InfoExtractor): | ||||
|  | ||||
|             raise ExtractorError('No downloadable streams found', expected=True) | ||||
|  | ||||
|         media_link_obj = self._parse_json(json_metadata, display_id, | ||||
|                                           transform_source=js_to_json) | ||||
|         jsonp_url = media_link_obj['mediaObj']['url'] | ||||
|  | ||||
|         metadata = self._download_json( | ||||
|             jsonp_url, 'metadata', transform_source=strip_jsonp) | ||||
|  | ||||
|         metadata_tracker_data = metadata['trackerData'] | ||||
|         metadata_media_resource = metadata['mediaResource'] | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         # check if the metadata contains a direct URL to a file | ||||
|         for kind, media_resource in metadata_media_resource.items(): | ||||
|             if kind not in ('dflt', 'alt'): | ||||
|                 continue | ||||
|  | ||||
|             for tag_name, medium_url in media_resource.items(): | ||||
|                 if tag_name not in ('videoURL', 'audioURL'): | ||||
|                     continue | ||||
|  | ||||
|                 ext = determine_ext(medium_url) | ||||
|                 if ext == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         medium_url, display_id, 'mp4', 'm3u8_native', | ||||
|                         m3u8_id='hls')) | ||||
|                 elif ext == 'f4m': | ||||
|                     manifest_url = update_url_query( | ||||
|                         medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'}) | ||||
|                     formats.extend(self._extract_f4m_formats( | ||||
|                         manifest_url, display_id, f4m_id='hds', fatal=False)) | ||||
|                 elif ext == 'smil': | ||||
|                     formats.extend(self._extract_smil_formats( | ||||
|                         medium_url, 'stream', fatal=False)) | ||||
|                 else: | ||||
|                     a_format = { | ||||
|                         'url': medium_url | ||||
|                     } | ||||
|                     if ext == 'unknown_video': | ||||
|                         urlh = self._request_webpage( | ||||
|                             medium_url, display_id, note='Determining extension') | ||||
|                         ext = urlhandle_detect_ext(urlh) | ||||
|                         a_format['ext'] = ext | ||||
|                     formats.append(a_format) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         caption_url = metadata_media_resource.get('captionURL') | ||||
|         if caption_url: | ||||
|             subtitles['de'] = [{ | ||||
|                 'url': caption_url, | ||||
|                 'ext': 'ttml', | ||||
|             }] | ||||
|  | ||||
|         title = metadata_tracker_data.get('trackerClipTitle') | ||||
|         is_live = url_type == 'live' | ||||
|  | ||||
|         if is_live: | ||||
|             title = self._live_title(title) | ||||
|             upload_date = None | ||||
|         elif 'trackerClipAirTime' in metadata_tracker_data: | ||||
|             upload_date = metadata_tracker_data['trackerClipAirTime'] | ||||
|         else: | ||||
|             upload_date = self._html_search_meta('DC.Date', webpage, 'upload date') | ||||
|             info_dict.update({ | ||||
|                 'title': self._live_title(info_dict['title']), | ||||
|                 'upload_date': None, | ||||
|             }) | ||||
|         elif 'upload_date' not in info_dict: | ||||
|             info_dict['upload_date'] = unified_strdate(self._html_search_meta('DC.Date', webpage, 'upload date')) | ||||
|  | ||||
|         if upload_date: | ||||
|             upload_date = unified_strdate(upload_date) | ||||
|  | ||||
|         return { | ||||
|             'id': metadata_tracker_data.get('trackerClipId', display_id), | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'alt_title': metadata_tracker_data.get('trackerClipSubcategory'), | ||||
|             'formats': formats, | ||||
|             'upload_date': upload_date, | ||||
|         info_dict.update({ | ||||
|             'description': self._html_search_meta('Description', webpage), | ||||
|             'is_live': is_live, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|         }) | ||||
|  | ||||
|         return info_dict | ||||
|  | ||||
|  | ||||
| class WDRMobileIE(InfoExtractor): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Yen Chi Hsuan
					Yen Chi Hsuan