mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-11-04 08:35:12 +00:00 
			
		
		
		
	[mtv] fix mtv.com and more(?)
This commit is contained in:
		@@ -7,6 +7,7 @@ from .common import InfoExtractor
 | 
				
			|||||||
from ..compat import (
 | 
					from ..compat import (
 | 
				
			||||||
    compat_str,
 | 
					    compat_str,
 | 
				
			||||||
    compat_xpath,
 | 
					    compat_xpath,
 | 
				
			||||||
 | 
					    compat_urlparse,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
from ..utils import (
 | 
					from ..utils import (
 | 
				
			||||||
    ExtractorError,
 | 
					    ExtractorError,
 | 
				
			||||||
@@ -22,6 +23,7 @@ from ..utils import (
 | 
				
			|||||||
    unescapeHTML,
 | 
					    unescapeHTML,
 | 
				
			||||||
    update_url_query,
 | 
					    update_url_query,
 | 
				
			||||||
    url_basename,
 | 
					    url_basename,
 | 
				
			||||||
 | 
					    get_domain,
 | 
				
			||||||
    xpath_text,
 | 
					    xpath_text,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -253,7 +255,39 @@ class MTVServicesInfoExtractor(InfoExtractor):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
 | 
					        return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _extract_mgid(self, webpage):
 | 
					    def _extract_new_triforce_mgid(self, webpage, url='', data_zone=None, video_id=None):
 | 
				
			||||||
 | 
					        # print(compat_urlparse.urlparse(url).netloc)
 | 
				
			||||||
 | 
					        domain = get_domain(url)
 | 
				
			||||||
 | 
					        if domain is None:
 | 
				
			||||||
 | 
					            raise ExtractorError(
 | 
				
			||||||
 | 
					                '[%s] could not get domain' % self.IE_NAME,
 | 
				
			||||||
 | 
					                expected=True)
 | 
				
			||||||
 | 
					        url = url.replace("https://", "http://")
 | 
				
			||||||
 | 
					        enc_url = compat_urlparse.quote(url, safe='')
 | 
				
			||||||
 | 
					        _TRIFORCE_V8_TEMPLATE = 'https://%s/feeds/triforce/manifest/v8?url=%s'
 | 
				
			||||||
 | 
					        triforce_manifest_url = _TRIFORCE_V8_TEMPLATE % (domain, enc_url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        manifest = self._download_json(triforce_manifest_url, video_id, fatal=False)
 | 
				
			||||||
 | 
					        if manifest.get('manifest').get('type') == 'redirect':
 | 
				
			||||||
 | 
					            self.to_screen('Found a redirect. Downloading manifest from new location')
 | 
				
			||||||
 | 
					            new_loc = manifest.get('manifest').get('newLocation')
 | 
				
			||||||
 | 
					            new_loc = new_loc.replace("https://", "http://")
 | 
				
			||||||
 | 
					            enc_new_loc = compat_urlparse.quote(new_loc, safe='')
 | 
				
			||||||
 | 
					            triforce_manifest_new_loc = _TRIFORCE_V8_TEMPLATE % (domain, enc_new_loc)
 | 
				
			||||||
 | 
					            manifest = self._download_json(triforce_manifest_new_loc, video_id, fatal=False)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        item_id = try_get(manifest, lambda x: x['manifest']['reporting']['itemId'], compat_str)
 | 
				
			||||||
 | 
					        if not item_id:
 | 
				
			||||||
 | 
					            self.to_screen('Found no id!')
 | 
				
			||||||
 | 
					            return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # 'episode' can be anything. 'content' is used often as well
 | 
				
			||||||
 | 
					        _MGID_TEMPLATE = 'mgid:arc:episode:%s:%s'
 | 
				
			||||||
 | 
					        mgid = _MGID_TEMPLATE % (domain, item_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return mgid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _extract_mgid(self, webpage, url):
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
 | 
					            # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
 | 
				
			||||||
            # or http://media.mtvnservices.com/{mgid}
 | 
					            # or http://media.mtvnservices.com/{mgid}
 | 
				
			||||||
@@ -275,6 +309,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
 | 
				
			|||||||
            mgid = self._search_regex(
 | 
					            mgid = self._search_regex(
 | 
				
			||||||
                r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None)
 | 
					                r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if not mgid:
 | 
				
			||||||
 | 
					            mgid = self._extract_new_triforce_mgid(webpage, url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if not mgid:
 | 
					        if not mgid:
 | 
				
			||||||
            mgid = self._extract_triforce_mgid(webpage)
 | 
					            mgid = self._extract_triforce_mgid(webpage)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -283,7 +320,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
 | 
				
			|||||||
    def _real_extract(self, url):
 | 
					    def _real_extract(self, url):
 | 
				
			||||||
        title = url_basename(url)
 | 
					        title = url_basename(url)
 | 
				
			||||||
        webpage = self._download_webpage(url, title)
 | 
					        webpage = self._download_webpage(url, title)
 | 
				
			||||||
        mgid = self._extract_mgid(webpage)
 | 
					        mgid = self._extract_mgid(webpage, url)
 | 
				
			||||||
        videos_info = self._get_videos_info(mgid)
 | 
					        videos_info = self._get_videos_info(mgid)
 | 
				
			||||||
        return videos_info
 | 
					        return videos_info
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1984,6 +1984,7 @@ def get_elements_by_attribute(attribute, value, html, escape_value=True):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class HTMLAttributeParser(compat_HTMLParser):
 | 
					class HTMLAttributeParser(compat_HTMLParser):
 | 
				
			||||||
    """Trivial HTML parser to gather the attributes for a single element"""
 | 
					    """Trivial HTML parser to gather the attributes for a single element"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self):
 | 
					    def __init__(self):
 | 
				
			||||||
        self.attrs = {}
 | 
					        self.attrs = {}
 | 
				
			||||||
        compat_HTMLParser.__init__(self)
 | 
					        compat_HTMLParser.__init__(self)
 | 
				
			||||||
@@ -2378,6 +2379,7 @@ class GeoRestrictedError(ExtractorError):
 | 
				
			|||||||
    This exception may be thrown when a video is not available from your
 | 
					    This exception may be thrown when a video is not available from your
 | 
				
			||||||
    geographic location due to geographic restrictions imposed by a website.
 | 
					    geographic location due to geographic restrictions imposed by a website.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, msg, countries=None):
 | 
					    def __init__(self, msg, countries=None):
 | 
				
			||||||
        super(GeoRestrictedError, self).__init__(msg, expected=True)
 | 
					        super(GeoRestrictedError, self).__init__(msg, expected=True)
 | 
				
			||||||
        self.msg = msg
 | 
					        self.msg = msg
 | 
				
			||||||
@@ -3558,6 +3560,11 @@ def remove_quotes(s):
 | 
				
			|||||||
    return s
 | 
					    return s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_domain(url):
 | 
				
			||||||
 | 
					    domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
 | 
				
			||||||
 | 
					    return domain.group('domain') if domain else None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def url_basename(url):
 | 
					def url_basename(url):
 | 
				
			||||||
    path = compat_urlparse.urlparse(url).path
 | 
					    path = compat_urlparse.urlparse(url).path
 | 
				
			||||||
    return path.strip('/').split('/')[-1]
 | 
					    return path.strip('/').split('/')[-1]
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user