mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-11-03 16:15:14 +00:00 
			
		
		
		
	Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
		
			
				
	
	
		
			210 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			210 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import re
 | 
						|
 | 
						|
from .common import InfoExtractor
 | 
						|
from ..utils import (
 | 
						|
    int_or_none,
 | 
						|
    js_to_json,
 | 
						|
    parse_duration,
 | 
						|
    traverse_obj,
 | 
						|
    try_get,
 | 
						|
    urljoin,
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
class MainStreamingIE(InfoExtractor):
 | 
						|
    _VALID_URL = r'https?://(?:webtools-?)?(?P<host>[A-Za-z0-9-]*\.msvdn\.net)/(?:embed|amp_embed|content)/(?P<id>\w+)'
 | 
						|
    _EMBED_REGEX = [rf'<iframe[^>]+?src=["\']?(?P<url>{_VALID_URL})["\']?']
 | 
						|
    IE_DESC = 'MainStreaming Player'
 | 
						|
 | 
						|
    _TESTS = [
 | 
						|
        {
 | 
						|
            # Live stream offline, has alternative content id
 | 
						|
            'url': 'https://webtools-e18da6642b684f8aa9ae449862783a56.msvdn.net/embed/53EN6GxbWaJC',
 | 
						|
            'info_dict': {
 | 
						|
                'id': '53EN6GxbWaJC',
 | 
						|
                'title': 'Diretta homepage 2021-12-31 12:00',
 | 
						|
                'description': '',
 | 
						|
                'live_status': 'was_live',
 | 
						|
                'ext': 'mp4',
 | 
						|
                'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
 | 
						|
            },
 | 
						|
            'expected_warnings': [
 | 
						|
                'Ignoring alternative content ID: WDAF1KOWUpH3',
 | 
						|
                'MainStreaming said: Live event is OFFLINE',
 | 
						|
            ],
 | 
						|
            'skip': 'live stream offline',
 | 
						|
        }, {
 | 
						|
            # playlist
 | 
						|
            'url': 'https://webtools-e18da6642b684f8aa9ae449862783a56.msvdn.net/embed/WDAF1KOWUpH3',
 | 
						|
            'info_dict': {
 | 
						|
                'id': 'WDAF1KOWUpH3',
 | 
						|
                'title': 'Playlist homepage',
 | 
						|
            },
 | 
						|
            'playlist_mincount': 2,
 | 
						|
        }, {
 | 
						|
            # livestream
 | 
						|
            'url': 'https://webtools-859c1818ed614cc5b0047439470927b0.msvdn.net/embed/tDoFkZD3T1Lw',
 | 
						|
            'info_dict': {
 | 
						|
                'id': 'tDoFkZD3T1Lw',
 | 
						|
                'title': r're:Class CNBC Live \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
 | 
						|
                'live_status': 'is_live',
 | 
						|
                'ext': 'mp4',
 | 
						|
                'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
 | 
						|
            },
 | 
						|
            'skip': 'live stream',
 | 
						|
        }, {
 | 
						|
            'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/EUlZfGWkGpOd?autoPlay=false',
 | 
						|
            'info_dict': {
 | 
						|
                'id': 'EUlZfGWkGpOd',
 | 
						|
                'title': 'La Settimana ',
 | 
						|
                'description': '03 Ottobre ore 02:00',
 | 
						|
                'ext': 'mp4',
 | 
						|
                'live_status': 'not_live',
 | 
						|
                'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
 | 
						|
                'duration': 1512,
 | 
						|
            },
 | 
						|
        }, {
 | 
						|
            # video without webtools- prefix
 | 
						|
            'url': 'https://f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/MfuWmzL2lGkA?autoplay=false&T=1635860445',
 | 
						|
            'info_dict': {
 | 
						|
                'id': 'MfuWmzL2lGkA',
 | 
						|
                'title': 'TG Mattina',
 | 
						|
                'description': '06 Ottobre ore 08:00',
 | 
						|
                'ext': 'mp4',
 | 
						|
                'live_status': 'not_live',
 | 
						|
                'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
 | 
						|
                'duration': 789.04,
 | 
						|
            },
 | 
						|
        }, {
 | 
						|
            # always-on livestream with DVR
 | 
						|
            'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/HVvPMzy',
 | 
						|
            'info_dict': {
 | 
						|
                'id': 'HVvPMzy',
 | 
						|
                'title': r're:^Diretta LaC News24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
 | 
						|
                'description': 'canale all news',
 | 
						|
                'live_status': 'is_live',
 | 
						|
                'ext': 'mp4',
 | 
						|
                'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
 | 
						|
            },
 | 
						|
            'params': {
 | 
						|
                'skip_download': True,
 | 
						|
            },
 | 
						|
        }, {
 | 
						|
            # no host
 | 
						|
            'url': 'https://webtools.msvdn.net/embed/MfuWmzL2lGkA',
 | 
						|
            'only_matching': True,
 | 
						|
        }, {
 | 
						|
            'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/amp_embed/tDoFkZD3T1Lw',
 | 
						|
            'only_matching': True,
 | 
						|
        }, {
 | 
						|
            'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/content/tDoFkZD3T1Lw#',
 | 
						|
            'only_matching': True,
 | 
						|
        },
 | 
						|
    ]
 | 
						|
 | 
						|
    def _playlist_entries(self, host, playlist_content):
 | 
						|
        for entry in playlist_content:
 | 
						|
            content_id = entry.get('contentID')
 | 
						|
            yield {
 | 
						|
                '_type': 'url',
 | 
						|
                'ie_key': MainStreamingIE.ie_key(),
 | 
						|
                'id': content_id,
 | 
						|
                'duration': int_or_none(traverse_obj(entry, ('duration', 'totalSeconds'))),
 | 
						|
                'title': entry.get('title'),
 | 
						|
                'url': f'https://{host}/embed/{content_id}',
 | 
						|
            }
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def _get_webtools_host(host):
 | 
						|
        if not host.startswith('webtools'):
 | 
						|
            host = 'webtools' + ('-' if not host.startswith('.') else '') + host
 | 
						|
        return host
 | 
						|
 | 
						|
    def _get_webtools_base_url(self, host):
 | 
						|
        return f'{self.http_scheme()}//{self._get_webtools_host(host)}'
 | 
						|
 | 
						|
    def _call_api(self, host: str, path: str, item_id: str, query=None, note='Downloading API JSON', fatal=False):
 | 
						|
        # JSON API, does not appear to be documented
 | 
						|
        return self._call_webtools_api(host, '/api/v2/' + path, item_id, query, note, fatal)
 | 
						|
 | 
						|
    def _call_webtools_api(self, host: str, path: str, item_id: str, query=None, note='Downloading webtools API JSON', fatal=False):
 | 
						|
        # webtools docs: https://webtools.msvdn.net/
 | 
						|
        return self._download_json(
 | 
						|
            urljoin(self._get_webtools_base_url(host), path), item_id, query=query, note=note, fatal=fatal)
 | 
						|
 | 
						|
    def _real_extract(self, url):
 | 
						|
        host, video_id = self._match_valid_url(url).groups()
 | 
						|
        content_info = try_get(
 | 
						|
            self._call_api(
 | 
						|
                host, f'content/{video_id}', video_id, note='Downloading content info API JSON'), lambda x: x['playerContentInfo'])
 | 
						|
        # Fallback
 | 
						|
        if not content_info:
 | 
						|
            webpage = self._download_webpage(url, video_id)
 | 
						|
            player_config = self._parse_json(
 | 
						|
                self._search_regex(
 | 
						|
                    r'config\s*=\s*({.+?})\s*;', webpage, 'mainstreaming player config',
 | 
						|
                    default='{}', flags=re.DOTALL),
 | 
						|
                video_id, transform_source=js_to_json, fatal=False) or {}
 | 
						|
            content_info = player_config['contentInfo']
 | 
						|
 | 
						|
        host = content_info.get('host') or host
 | 
						|
        video_id = content_info.get('contentID') or video_id
 | 
						|
        title = content_info.get('title')
 | 
						|
        description = traverse_obj(content_info, 'longDescription', 'shortDescription', expected_type=str)
 | 
						|
        live_status = 'not_live'
 | 
						|
        if content_info.get('drmEnabled'):
 | 
						|
            self.report_drm(video_id)
 | 
						|
 | 
						|
        alternative_content_id = content_info.get('alternativeContentID')
 | 
						|
        if alternative_content_id:
 | 
						|
            self.report_warning(f'Ignoring alternative content ID: {alternative_content_id}')
 | 
						|
 | 
						|
        content_type = int_or_none(content_info.get('contentType'))
 | 
						|
        format_base_url = None
 | 
						|
        formats = []
 | 
						|
        subtitles = {}
 | 
						|
        # Live content
 | 
						|
        if content_type == 20:
 | 
						|
            dvr_enabled = traverse_obj(content_info, ('playerSettings', 'dvrEnabled'), expected_type=bool)
 | 
						|
            format_base_url = f"https://{host}/live/{content_info['liveSourceID']}/{video_id}/%s{'?DVR' if dvr_enabled else ''}"
 | 
						|
            live_status = 'is_live'
 | 
						|
            heartbeat = self._call_api(host, f'heartbeat/{video_id}', video_id, note='Checking stream status') or {}
 | 
						|
            if heartbeat.get('heartBeatUp') is False:
 | 
						|
                self.raise_no_formats(f'MainStreaming said: {heartbeat.get("responseMessage")}', expected=True)
 | 
						|
                live_status = 'was_live'
 | 
						|
 | 
						|
        # Playlist
 | 
						|
        elif content_type == 31:
 | 
						|
            return self.playlist_result(
 | 
						|
                self._playlist_entries(host, content_info.get('playlistContents')), video_id, title, description)
 | 
						|
        # Normal video content?
 | 
						|
        elif content_type == 10:
 | 
						|
            format_base_url = f'https://{host}/vod/{video_id}/%s'
 | 
						|
            # Progressive format
 | 
						|
            # Note: in https://webtools.msvdn.net/loader/playerV2.js there is mention of original.mp3 format,
 | 
						|
            # however it seems to be the same as original.mp4?
 | 
						|
            formats.append({'url': format_base_url % 'original.mp4', 'format_note': 'original', 'quality': 1})
 | 
						|
        else:
 | 
						|
            self.raise_no_formats(f'Unknown content type {content_type}')
 | 
						|
 | 
						|
        if format_base_url:
 | 
						|
            m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
 | 
						|
                format_base_url % 'playlist.m3u8', video_id=video_id, fatal=False)
 | 
						|
            mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(
 | 
						|
                format_base_url % 'manifest.mpd', video_id=video_id, fatal=False)
 | 
						|
 | 
						|
            subtitles = self._merge_subtitles(m3u8_subs, mpd_subs)
 | 
						|
            formats.extend(m3u8_formats + mpd_formats)
 | 
						|
 | 
						|
        return {
 | 
						|
            'id': video_id,
 | 
						|
            'title': title,
 | 
						|
            'description': description,
 | 
						|
            'formats': formats,
 | 
						|
            'live_status': live_status,
 | 
						|
            'duration': parse_duration(content_info.get('duration')),
 | 
						|
            'tags': content_info.get('tags'),
 | 
						|
            'subtitles': subtitles,
 | 
						|
            'thumbnail': urljoin(self._get_webtools_base_url(host), f'image/{video_id}/poster'),
 | 
						|
        }
 |