mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[mtv,cc,cmt,spike] Improve and refactor
- Eliminate _transform_rtmp_url * Generalize triforce mgid extraction + [cmt] Add support for full-episodes (closes #11623)
This commit is contained in:
		| @@ -1,13 +1,11 @@ | |||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| from .mtv import MTVIE | from .mtv import MTVIE | ||||||
| from ..utils import ExtractorError |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class CMTIE(MTVIE): | class CMTIE(MTVIE): | ||||||
|     IE_NAME = 'cmt.com' |     IE_NAME = 'cmt.com' | ||||||
|     _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P<videoid>\d+)' |     _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|full-episodes)/(?P<id>[^/]+)' | ||||||
|     _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' |  | ||||||
|  |  | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', |         'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', | ||||||
| @@ -35,15 +33,16 @@ class CMTIE(MTVIE): | |||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     @classmethod |  | ||||||
|     def _transform_rtmp_url(cls, rtmp_video_url): |  | ||||||
|         if 'error_not_available.swf' in rtmp_video_url: |  | ||||||
|             raise ExtractorError( |  | ||||||
|                 '%s said: video is not available' % cls.IE_NAME, expected=True) |  | ||||||
|  |  | ||||||
|         return super(CMTIE, cls)._transform_rtmp_url(rtmp_video_url) |  | ||||||
|  |  | ||||||
|     def _extract_mgid(self, webpage): |     def _extract_mgid(self, webpage): | ||||||
|         return self._search_regex( |         mgid = self._search_regex( | ||||||
|             r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1', |             r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1', | ||||||
|             webpage, 'mgid', group='mgid') |             webpage, 'mgid', group='mgid', default=None) | ||||||
|  |         if not mgid: | ||||||
|  |             mgid = self._extract_triforce_mgid(webpage) | ||||||
|  |         return mgid | ||||||
|  |  | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         video_id = self._match_id(url) | ||||||
|  |         webpage = self._download_webpage(url, video_id) | ||||||
|  |         mgid = self._extract_mgid(webpage) | ||||||
|  |         return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) | ||||||
|   | |||||||
| @@ -48,17 +48,8 @@ class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor): | |||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         playlist_id = self._match_id(url) |         playlist_id = self._match_id(url) | ||||||
|         webpage = self._download_webpage(url, playlist_id) |         webpage = self._download_webpage(url, playlist_id) | ||||||
|  |         mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1') | ||||||
|         feed_json = self._search_regex(r'var triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, 'triforce feeed') |  | ||||||
|         feed = self._parse_json(feed_json, playlist_id) |  | ||||||
|         zones = feed['manifest']['zones'] |  | ||||||
|  |  | ||||||
|         video_zone = zones['t2_lc_promo1'] |  | ||||||
|         feed = self._download_json(video_zone['feed'], playlist_id) |  | ||||||
|         mgid = feed['result']['data']['id'] |  | ||||||
|  |  | ||||||
|         videos_info = self._get_videos_info(mgid) |         videos_info = self._get_videos_info(mgid) | ||||||
|  |  | ||||||
|         return videos_info |         return videos_info | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -94,12 +85,6 @@ class ToshIE(MTVServicesInfoExtractor): | |||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     @classmethod |  | ||||||
|     def _transform_rtmp_url(cls, rtmp_video_url): |  | ||||||
|         new_urls = super(ToshIE, cls)._transform_rtmp_url(rtmp_video_url) |  | ||||||
|         new_urls['rtmp'] = rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm') |  | ||||||
|         return new_urls |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class ComedyCentralTVIE(MTVServicesInfoExtractor): | class ComedyCentralTVIE(MTVServicesInfoExtractor): | ||||||
|     _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)' |     _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)' | ||||||
|   | |||||||
| @@ -13,11 +13,11 @@ from ..utils import ( | |||||||
|     fix_xml_ampersands, |     fix_xml_ampersands, | ||||||
|     float_or_none, |     float_or_none, | ||||||
|     HEADRequest, |     HEADRequest, | ||||||
|     NO_DEFAULT, |  | ||||||
|     RegexNotFoundError, |     RegexNotFoundError, | ||||||
|     sanitized_Request, |     sanitized_Request, | ||||||
|     strip_or_none, |     strip_or_none, | ||||||
|     timeconvert, |     timeconvert, | ||||||
|  |     try_get, | ||||||
|     unescapeHTML, |     unescapeHTML, | ||||||
|     update_url_query, |     update_url_query, | ||||||
|     url_basename, |     url_basename, | ||||||
| @@ -42,15 +42,6 @@ class MTVServicesInfoExtractor(InfoExtractor): | |||||||
|         # Remove the templates, like &device={device} |         # Remove the templates, like &device={device} | ||||||
|         return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url) |         return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url) | ||||||
|  |  | ||||||
|     # This was originally implemented for ComedyCentral, but it also works here |  | ||||||
|     @classmethod |  | ||||||
|     def _transform_rtmp_url(cls, rtmp_video_url): |  | ||||||
|         m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url) |  | ||||||
|         if not m: |  | ||||||
|             return {'rtmp': rtmp_video_url} |  | ||||||
|         base = 'http://viacommtvstrmfs.fplive.net/' |  | ||||||
|         return {'http': base + m.group('finalid')} |  | ||||||
|  |  | ||||||
|     def _get_feed_url(self, uri): |     def _get_feed_url(self, uri): | ||||||
|         return self._FEED_URL |         return self._FEED_URL | ||||||
|  |  | ||||||
| @@ -91,22 +82,28 @@ class MTVServicesInfoExtractor(InfoExtractor): | |||||||
|             if rendition.get('method') == 'hls': |             if rendition.get('method') == 'hls': | ||||||
|                 hls_url = rendition.find('./src').text |                 hls_url = rendition.find('./src').text | ||||||
|                 formats.extend(self._extract_m3u8_formats( |                 formats.extend(self._extract_m3u8_formats( | ||||||
|                     hls_url, video_id, ext='mp4', entry_protocol='m3u8_native')) |                     hls_url, video_id, ext='mp4', entry_protocol='m3u8_native', | ||||||
|  |                     m3u8_id='hls')) | ||||||
|             else: |             else: | ||||||
|                 # fms |                 # fms | ||||||
|                 try: |                 try: | ||||||
|                     _, _, ext = rendition.attrib['type'].partition('/') |                     _, _, ext = rendition.attrib['type'].partition('/') | ||||||
|                     rtmp_video_url = rendition.find('./src').text |                     rtmp_video_url = rendition.find('./src').text | ||||||
|  |                     if 'error_not_available.swf' in rtmp_video_url: | ||||||
|  |                         raise ExtractorError( | ||||||
|  |                             '%s said: video is not available' % self.IE_NAME, | ||||||
|  |                             expected=True) | ||||||
|                     if rtmp_video_url.endswith('siteunavail.png'): |                     if rtmp_video_url.endswith('siteunavail.png'): | ||||||
|                         continue |                         continue | ||||||
|                     new_urls = self._transform_rtmp_url(rtmp_video_url) |  | ||||||
|                     formats.extend([{ |                     formats.extend([{ | ||||||
|                         'ext': 'flv' if new_url.startswith('rtmp') else ext, |                         'ext': 'flv' if rtmp_video_url.startswith('rtmp') else ext, | ||||||
|                         'url': new_url, |                         'url': rtmp_video_url, | ||||||
|                         'format_id': '-'.join(filter(None, [kind, rendition.get('bitrate')])), |                         'format_id': '-'.join(filter(None, [ | ||||||
|  |                             'rtmp' if rtmp_video_url.startswith('rtmp') else None, | ||||||
|  |                             rendition.get('bitrate')])), | ||||||
|                         'width': int(rendition.get('width')), |                         'width': int(rendition.get('width')), | ||||||
|                         'height': int(rendition.get('height')), |                         'height': int(rendition.get('height')), | ||||||
|                     } for kind, new_url in new_urls.items()]) |                     }]) | ||||||
|                 except (KeyError, TypeError): |                 except (KeyError, TypeError): | ||||||
|                     raise ExtractorError('Invalid rendition field.') |                     raise ExtractorError('Invalid rendition field.') | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
| @@ -212,7 +209,28 @@ class MTVServicesInfoExtractor(InfoExtractor): | |||||||
|             [self._get_video_info(item, use_hls) for item in idoc.findall('.//item')], |             [self._get_video_info(item, use_hls) for item in idoc.findall('.//item')], | ||||||
|             playlist_title=title, playlist_description=description) |             playlist_title=title, playlist_description=description) | ||||||
|  |  | ||||||
|     def _extract_mgid(self, webpage, default=NO_DEFAULT): |     def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None): | ||||||
|  |         triforce_feed = self._parse_json(self._search_regex( | ||||||
|  |             r'triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, | ||||||
|  |             'triforce feed', default='{}'), video_id, fatal=False) | ||||||
|  |  | ||||||
|  |         data_zone = self._search_regex( | ||||||
|  |             r'data-zone=(["\'])(?P<zone>.+?_lc_promo.*?)\1', webpage, | ||||||
|  |             'data zone', default=data_zone, group='zone') | ||||||
|  |  | ||||||
|  |         feed_url = try_get( | ||||||
|  |             triforce_feed, lambda x: x['manifest']['zones'][data_zone]['feed'], | ||||||
|  |             compat_str) | ||||||
|  |         if not feed_url: | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         feed = self._download_json(feed_url, video_id, fatal=False) | ||||||
|  |         if not feed: | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |         return try_get(feed, lambda x: x['result']['data']['id'], compat_str) | ||||||
|  |  | ||||||
|  |     def _extract_mgid(self, webpage): | ||||||
|         try: |         try: | ||||||
|             # the url can be http://media.mtvnservices.com/fb/{mgid}.swf |             # the url can be http://media.mtvnservices.com/fb/{mgid}.swf | ||||||
|             # or http://media.mtvnservices.com/{mgid} |             # or http://media.mtvnservices.com/{mgid} | ||||||
| @@ -232,7 +250,11 @@ class MTVServicesInfoExtractor(InfoExtractor): | |||||||
|             sm4_embed = self._html_search_meta( |             sm4_embed = self._html_search_meta( | ||||||
|                 'sm4:video:embed', webpage, 'sm4 embed', default='') |                 'sm4:video:embed', webpage, 'sm4 embed', default='') | ||||||
|             mgid = self._search_regex( |             mgid = self._search_regex( | ||||||
|                 r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=default) |                 r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None) | ||||||
|  |  | ||||||
|  |         if not mgid: | ||||||
|  |             mgid = self._extract_triforce_mgid(webpage) | ||||||
|  |  | ||||||
|         return mgid |         return mgid | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|   | |||||||
| @@ -46,7 +46,7 @@ class SpikeIE(MTVServicesInfoExtractor): | |||||||
|     _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)') |     _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)') | ||||||
|  |  | ||||||
|     def _extract_mgid(self, webpage): |     def _extract_mgid(self, webpage): | ||||||
|         mgid = super(SpikeIE, self)._extract_mgid(webpage, default=None) |         mgid = super(SpikeIE, self)._extract_mgid(webpage) | ||||||
|         if mgid is None: |         if mgid is None: | ||||||
|             url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id') |             url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id') | ||||||
|             video_type, episode_id = url_parts.split('/', 1) |             video_type, episode_id = url_parts.split('/', 1) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․