mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[WDR] extract jsonp-url by parsing data-extension of mediaLink
This commit is contained in:
		| @@ -10,6 +10,7 @@ from ..compat import ( | ||||
| ) | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     js_to_json, | ||||
|     strip_jsonp, | ||||
|     unified_strdate, | ||||
|     ExtractorError, | ||||
| @@ -21,8 +22,6 @@ class WDRIE(InfoExtractor): | ||||
|     _PAGE_REGEX = r'/mediathek/(?P<media_type>[^/]+)/(?P<type>[^/]+)/(?P<display_id>.+)\.html' | ||||
|     _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL | ||||
|  | ||||
|     _JS_URL_REGEX = r'(https?://deviceids-medp.wdr.de/ondemand/\d+/\d+\.js)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www1.wdr.de/mediathek/video/sendungen/doku-am-freitag/video-geheimnis-aachener-dom-100.html', | ||||
| @@ -102,9 +101,13 @@ class WDRIE(InfoExtractor): | ||||
|         display_id = mobj.group('display_id') | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         js_url = self._search_regex(self._JS_URL_REGEX, webpage, 'js_url', default=None) | ||||
|         # for wdr.de the data-extension is in a tag with the class "mediaLink" | ||||
|         # for wdrmaus its in a link to the page in a multiline "videoLink"-tag | ||||
|         json_metadata = self._html_search_regex( | ||||
|             r'class=(?:"mediaLink\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', | ||||
|             webpage, 'media link', default=None, flags=re.MULTILINE) | ||||
|  | ||||
|         if not js_url: | ||||
|         if not json_metadata: | ||||
|             entries = [ | ||||
|                 self.url_result(page_url + href[0], 'WDR') | ||||
|                 for href in re.findall( | ||||
| @@ -117,8 +120,12 @@ class WDRIE(InfoExtractor): | ||||
|  | ||||
|             raise ExtractorError('No downloadable streams found', expected=True) | ||||
|  | ||||
|         media_link_obj = self._parse_json(json_metadata, display_id, | ||||
|                                           transform_source=js_to_json) | ||||
|         jsonp_url = media_link_obj['mediaObj']['url'] | ||||
|  | ||||
|         metadata = self._download_json( | ||||
|             js_url, 'metadata', transform_source=strip_jsonp) | ||||
|             jsonp_url, 'metadata', transform_source=strip_jsonp) | ||||
|  | ||||
|         metadata_tracker_data = metadata['trackerData'] | ||||
|         metadata_media_resource = metadata['mediaResource'] | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Boris Wachtmeister
					Boris Wachtmeister