mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[extractor/mediastream] Improve WinSports and embed extraction (#6426)
				
					
				
			Closes #6419, Closes #6527 Authored by: bashonly
This commit is contained in:
		| @@ -2,16 +2,44 @@ import re | ||||
| 
 | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     remove_end, | ||||
|     str_or_none, | ||||
|     strip_or_none, | ||||
|     traverse_obj, | ||||
|     urljoin, | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| class MediaStreamIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://mdstrm.com/(?:embed|live-stream)/(?P<id>\w+)' | ||||
| class MediaStreamBaseIE(InfoExtractor): | ||||
|     _EMBED_BASE_URL = 'https://mdstrm.com/embed' | ||||
|     _BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)' | ||||
| 
 | ||||
|     def _extract_mediastream_urls(self, webpage): | ||||
|         yield from traverse_obj(list(self._yield_json_ld(webpage, None)), ( | ||||
|             lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'), | ||||
|             {lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None})) | ||||
| 
 | ||||
|         for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream\.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage): | ||||
|             yield f'{self._EMBED_BASE_URL}/{mobj.group("video_id")}' | ||||
| 
 | ||||
|         yield from re.findall( | ||||
|             rf'<iframe[^>]+\bsrc="({self._BASE_URL_RE}/\w+)', webpage) | ||||
| 
 | ||||
|         for mobj in re.finditer( | ||||
|             r'''(?x) | ||||
|                 <(?:div|ps-mediastream)[^>]+ | ||||
|                 (class="[^"]*MediaStreamVideoPlayer)[^"]*"[^>]+ | ||||
|                 data-video-id="(?P<video_id>\w+)" | ||||
|                 (?:\s*data-video-type="(?P<video_type>[^"]+))? | ||||
|                 (?:[^>]*>\s*<div[^>]+\1[^"]*"[^>]+data-mediastream=["\'][^>]+ | ||||
|                     https://mdstrm\.com/(?P<live>live-stream))? | ||||
|                 ''', webpage): | ||||
| 
 | ||||
|             video_type = 'live-stream' if mobj.group('video_type') == 'live' or mobj.group('live') else 'embed' | ||||
|             yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}' | ||||
| 
 | ||||
| 
 | ||||
| class MediaStreamIE(MediaStreamBaseIE): | ||||
|     _VALID_URL = MediaStreamBaseIE._BASE_URL_RE + r'/(?P<id>\w+)' | ||||
| 
 | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://mdstrm.com/embed/6318e3f1d1d316083ae48831', | ||||
| @@ -23,6 +51,7 @@ class MediaStreamIE(InfoExtractor): | ||||
|             'thumbnail': r're:^https?://[^?#]+6318e3f1d1d316083ae48831', | ||||
|             'ext': 'mp4', | ||||
|         }, | ||||
|         'params': {'skip_download': 'm3u8'}, | ||||
|     }] | ||||
| 
 | ||||
|     _WEBPAGE_TESTS = [{ | ||||
| @@ -35,9 +64,7 @@ class MediaStreamIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|             'live_status': 'is_live', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': 'Livestream' | ||||
|         }, | ||||
|         'params': {'skip_download': 'Livestream'}, | ||||
|     }, { | ||||
|         'url': 'https://www.multimedios.com/television/clases-de-llaves-y-castigos-quien-sabe-mas', | ||||
|         'md5': 'de31f0b1ecc321fb35bf22d58734ea40', | ||||
| @@ -48,6 +75,7 @@ class MediaStreamIE(InfoExtractor): | ||||
|             'thumbnail': 're:^https?://[^?#]+63731bab8ec9b308a2c9ed28', | ||||
|             'ext': 'mp4', | ||||
|         }, | ||||
|         'params': {'skip_download': 'm3u8'}, | ||||
|     }, { | ||||
|         'url': 'https://www.americatv.com.pe/videos/esto-es-guerra/facundo-gonzalez-sufrio-fuerte-golpe-durante-competencia-frente-hugo-garcia-eeg-noticia-139120', | ||||
|         'info_dict': { | ||||
| @@ -57,6 +85,7 @@ class MediaStreamIE(InfoExtractor): | ||||
|             'thumbnail': 're:^https?://[^?#]+63756df1c638b008a5659dec', | ||||
|             'ext': 'mp4', | ||||
|         }, | ||||
|         'params': {'skip_download': 'm3u8'}, | ||||
|     }, { | ||||
|         'url': 'https://www.americatv.com.pe/videos/al-fondo-hay-sitio/nuevas-lomas-town-bernardo-mata-se-enfrento-sujeto-luchar-amor-macarena-noticia-139083', | ||||
|         'info_dict': { | ||||
| @@ -66,26 +95,12 @@ class MediaStreamIE(InfoExtractor): | ||||
|             'thumbnail': 're:^https?://[^?#]+637307669609130f74cd3a6e', | ||||
|             'ext': 'mp4', | ||||
|         }, | ||||
|         'params': {'skip_download': 'm3u8'}, | ||||
|     }] | ||||
| 
 | ||||
|     @classmethod | ||||
|     def _extract_embed_urls(cls, url, webpage): | ||||
|         for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage): | ||||
|             yield f'https://mdstrm.com/embed/{mobj.group("video_id")}' | ||||
| 
 | ||||
|         yield from re.findall( | ||||
|             r'<iframe[^>]src\s*=\s*"(https://mdstrm.com/[\w-]+/\w+)', webpage) | ||||
| 
 | ||||
|         for mobj in re.finditer( | ||||
|             r'''(?x) | ||||
|                 <(?:div|ps-mediastream)[^>]+ | ||||
|                 class\s*=\s*"[^"]*MediaStreamVideoPlayer[^"]*"[^>]+ | ||||
|                 data-video-id\s*=\s*"(?P<video_id>\w+)\s*" | ||||
|                 (?:\s*data-video-type\s*=\s*"(?P<video_type>[^"]+))? | ||||
|                 ''', webpage): | ||||
| 
 | ||||
|             video_type = 'live-stream' if mobj.group('video_type') == 'live' else 'embed' | ||||
|             yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}' | ||||
|     def _extract_from_webpage(self, url, webpage): | ||||
|         for embed_url in self._extract_mediastream_urls(webpage): | ||||
|             yield self.url_result(embed_url, MediaStreamIE, None) | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
| @@ -94,7 +109,7 @@ class MediaStreamIE(InfoExtractor): | ||||
|         if 'Debido a tu ubicación no puedes ver el contenido' in webpage: | ||||
|             self.raise_geo_restricted() | ||||
| 
 | ||||
|         player_config = self._search_json(r'window.MDSTRM.OPTIONS\s*=', webpage, 'metadata', video_id) | ||||
|         player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id) | ||||
| 
 | ||||
|         formats, subtitles = [], {} | ||||
|         for video_format in player_config['src']: | ||||
| @@ -122,7 +137,7 @@ class MediaStreamIE(InfoExtractor): | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
| class WinSportsVideoIE(InfoExtractor): | ||||
| class WinSportsVideoIE(MediaStreamBaseIE): | ||||
|     _VALID_URL = r'https?://www\.winsports\.co/videos/(?P<id>[\w-]+)' | ||||
| 
 | ||||
|     _TESTS = [{ | ||||
| @@ -158,21 +173,36 @@ class WinSportsVideoIE(InfoExtractor): | ||||
|             'ext': 'mp4', | ||||
|         }, | ||||
|         'params': {'skip_download': 'm3u8'}, | ||||
|     }, { | ||||
|         'url': 'https://www.winsports.co/videos/bucaramanga-se-quedo-con-el-grito-de-gol-en-la-garganta', | ||||
|         'info_dict': { | ||||
|             'id': '6402adb62bbf3b18d454e1b0', | ||||
|             'display_id': 'bucaramanga-se-quedo-con-el-grito-de-gol-en-la-garganta', | ||||
|             'title': '⚽Bucaramanga se quedó con el grito de gol en la garganta', | ||||
|             'description': 'Gol anulado Bucaramanga', | ||||
|             'thumbnail': r're:^https?://[^?#]+6402adb62bbf3b18d454e1b0', | ||||
|             'ext': 'mp4', | ||||
|         }, | ||||
|         'params': {'skip_download': 'm3u8'}, | ||||
|     }] | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         json_ld = self._search_json_ld(webpage, display_id, expected_type='VideoObject', default={}) | ||||
|         media_setting_json = self._search_json( | ||||
|             r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'drupal-setting-json', display_id) | ||||
|         data = self._search_json( | ||||
|             r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'data', display_id) | ||||
| 
 | ||||
|         mediastream_id = traverse_obj( | ||||
|             media_setting_json, ('settings', 'mediastream_formatter', ..., 'mediastream_id', {str_or_none}), | ||||
|             get_all=False) or json_ld.get('url') | ||||
|         if not mediastream_id: | ||||
|         mediastream_url = urljoin(f'{self._EMBED_BASE_URL}/', ( | ||||
|             traverse_obj(data, ( | ||||
|                 (('settings', 'mediastream_formatter', ..., 'mediastream_id'), 'url'), {str}), get_all=False) | ||||
|             or next(self._extract_mediastream_urls(webpage), None))) | ||||
| 
 | ||||
|         if not mediastream_url: | ||||
|             self.raise_no_formats('No MediaStream embed found in webpage') | ||||
| 
 | ||||
|         title = clean_html(remove_end( | ||||
|             self._search_json_ld(webpage, display_id, expected_type='VideoObject', default={}).get('title') | ||||
|             or self._og_search_title(webpage), '| Win Sports')) | ||||
| 
 | ||||
|         return self.url_result( | ||||
|             urljoin('https://mdstrm.com/embed/', mediastream_id), MediaStreamIE, display_id, url_transparent=True, | ||||
|             display_id=display_id, video_title=strip_or_none(remove_end(json_ld.get('title'), '| Win Sports'))) | ||||
|             mediastream_url, MediaStreamIE, display_id, url_transparent=True, display_id=display_id, video_title=title) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 bashonly
					bashonly