mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-26 04:00:57 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			275 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			275 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from __future__ import unicode_literals
 | |
| 
 | |
| import re
 | |
| 
 | |
| from .mtv import MTVServicesInfoExtractor
 | |
| from ..compat import (
 | |
|     compat_str,
 | |
|     compat_urllib_parse,
 | |
| )
 | |
| from ..utils import (
 | |
|     ExtractorError,
 | |
|     float_or_none,
 | |
|     unified_strdate,
 | |
| )
 | |
| 
 | |
| 
 | |
| class ComedyCentralIE(MTVServicesInfoExtractor):
 | |
|     _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
 | |
|         (video-clips|episodes|cc-studios|video-collections|full-episodes)
 | |
|         /(?P<title>.*)'''
 | |
|     _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
 | |
| 
 | |
|     _TEST = {
 | |
|         'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
 | |
|         'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
 | |
|         'info_dict': {
 | |
|             'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
 | |
|             'ext': 'mp4',
 | |
|             'title': 'CC:Stand-Up|Greg Fitzsimmons: Life on Stage|Uncensored - Too Good of a Mother',
 | |
|             'description': 'After a certain point, breastfeeding becomes c**kblocking.',
 | |
|         },
 | |
|     }
 | |
| 
 | |
| 
 | |
| class ComedyCentralShowsIE(MTVServicesInfoExtractor):
 | |
|     IE_DESC = 'The Daily Show / The Colbert Report'
 | |
|     # urls can be abbreviations like :thedailyshow
 | |
|     # urls for episodes like:
 | |
|     # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
 | |
|     #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
 | |
|     #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
 | |
|     _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
 | |
|                       |https?://(:www\.)?
 | |
|                           (?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/
 | |
|                          ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
 | |
|                           (?P<clip>
 | |
|                               (?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
 | |
|                               |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
 | |
|                               |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
 | |
|                           )|
 | |
|                           (?P<interview>
 | |
|                               extended-interviews/(?P<interID>[0-9a-z]+)/
 | |
|                               (?:playlist_tds_extended_)?(?P<interview_title>[^/?#]*?)
 | |
|                               (?:/[^/?#]?|[?#]|$))))
 | |
|                      '''
 | |
|     _TESTS = [{
 | |
|         'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart',
 | |
|         'md5': '4e2f5cb088a83cd8cdb7756132f9739d',
 | |
|         'info_dict': {
 | |
|             'id': 'ab9ab3e7-5a98-4dbe-8b21-551dc0523d55',
 | |
|             'ext': 'mp4',
 | |
|             'upload_date': '20121213',
 | |
|             'description': 'Kristen Stewart learns to let loose in "On the Road."',
 | |
|             'uploader': 'thedailyshow',
 | |
|             'title': 'thedailyshow kristen-stewart part 1',
 | |
|         }
 | |
|     }, {
 | |
|         'url': 'http://thedailyshow.cc.com/extended-interviews/b6364d/sarah-chayes-extended-interview',
 | |
|         'info_dict': {
 | |
|             'id': 'sarah-chayes-extended-interview',
 | |
|             'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
 | |
|             'title': 'thedailyshow Sarah Chayes Extended Interview',
 | |
|         },
 | |
|         'playlist': [
 | |
|             {
 | |
|                 'info_dict': {
 | |
|                     'id': '0baad492-cbec-4ec1-9e50-ad91c291127f',
 | |
|                     'ext': 'mp4',
 | |
|                     'upload_date': '20150129',
 | |
|                     'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
 | |
|                     'uploader': 'thedailyshow',
 | |
|                     'title': 'thedailyshow sarah-chayes-extended-interview part 1',
 | |
|                 },
 | |
|             },
 | |
|             {
 | |
|                 'info_dict': {
 | |
|                     'id': '1e4fb91b-8ce7-4277-bd7c-98c9f1bbd283',
 | |
|                     'ext': 'mp4',
 | |
|                     'upload_date': '20150129',
 | |
|                     'description': 'Carnegie Endowment Senior Associate Sarah Chayes discusses how corrupt institutions function throughout the world in her book "Thieves of State: Why Corruption Threatens Global Security."',
 | |
|                     'uploader': 'thedailyshow',
 | |
|                     'title': 'thedailyshow sarah-chayes-extended-interview part 2',
 | |
|                 },
 | |
|             },
 | |
|         ],
 | |
|         'params': {
 | |
|             'skip_download': True,
 | |
|         },
 | |
|     }, {
 | |
|         'url': 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview',
 | |
|         'only_matching': True,
 | |
|     }, {
 | |
|         'url': 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news',
 | |
|         'only_matching': True,
 | |
|     }, {
 | |
|         'url': 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114',
 | |
|         'only_matching': True,
 | |
|     }, {
 | |
|         'url': 'http://thedailyshow.cc.com/guests/michael-lewis/3efna8/exclusive---michael-lewis-extended-interview-pt--3',
 | |
|         'only_matching': True,
 | |
|     }, {
 | |
|         'url': 'http://thedailyshow.cc.com/episodes/sy7yv0/april-8--2014---denis-leary',
 | |
|         'only_matching': True,
 | |
|     }, {
 | |
|         'url': 'http://thecolbertreport.cc.com/episodes/8ase07/april-8--2014---jane-goodall',
 | |
|         'only_matching': True,
 | |
|     }, {
 | |
|         'url': 'http://thedailyshow.cc.com/video-playlists/npde3s/the-daily-show-19088-highlights',
 | |
|         'only_matching': True,
 | |
|     }, {
 | |
|         'url': 'http://thedailyshow.cc.com/video-playlists/t6d9sg/the-daily-show-20038-highlights/be3cwo',
 | |
|         'only_matching': True,
 | |
|     }, {
 | |
|         'url': 'http://thedailyshow.cc.com/special-editions/2l8fdb/special-edition---a-look-back-at-food',
 | |
|         'only_matching': True,
 | |
|     }, {
 | |
|         'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
 | |
|         'only_matching': True,
 | |
|     }]
 | |
| 
 | |
|     _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
 | |
| 
 | |
|     _video_extensions = {
 | |
|         '3500': 'mp4',
 | |
|         '2200': 'mp4',
 | |
|         '1700': 'mp4',
 | |
|         '1200': 'mp4',
 | |
|         '750': 'mp4',
 | |
|         '400': 'mp4',
 | |
|     }
 | |
|     _video_dimensions = {
 | |
|         '3500': (1280, 720),
 | |
|         '2200': (960, 540),
 | |
|         '1700': (768, 432),
 | |
|         '1200': (640, 360),
 | |
|         '750': (512, 288),
 | |
|         '400': (384, 216),
 | |
|     }
 | |
| 
 | |
|     def _real_extract(self, url):
 | |
|         mobj = re.match(self._VALID_URL, url)
 | |
| 
 | |
|         if mobj.group('shortname'):
 | |
|             if mobj.group('shortname') in ('tds', 'thedailyshow'):
 | |
|                 url = 'http://thedailyshow.cc.com/full-episodes/'
 | |
|             else:
 | |
|                 url = 'http://thecolbertreport.cc.com/full-episodes/'
 | |
|             mobj = re.match(self._VALID_URL, url, re.VERBOSE)
 | |
|             assert mobj is not None
 | |
| 
 | |
|         if mobj.group('clip'):
 | |
|             if mobj.group('videotitle'):
 | |
|                 epTitle = mobj.group('videotitle')
 | |
|             elif mobj.group('showname') == 'thedailyshow':
 | |
|                 epTitle = mobj.group('tdstitle')
 | |
|             else:
 | |
|                 epTitle = mobj.group('cntitle')
 | |
|             dlNewest = False
 | |
|         elif mobj.group('interview'):
 | |
|             epTitle = mobj.group('interview_title')
 | |
|             dlNewest = False
 | |
|         else:
 | |
|             dlNewest = not mobj.group('episode')
 | |
|             if dlNewest:
 | |
|                 epTitle = mobj.group('showname')
 | |
|             else:
 | |
|                 epTitle = mobj.group('episode')
 | |
|         show_name = mobj.group('showname')
 | |
| 
 | |
|         webpage, htmlHandle = self._download_webpage_handle(url, epTitle)
 | |
|         if dlNewest:
 | |
|             url = htmlHandle.geturl()
 | |
|             mobj = re.match(self._VALID_URL, url, re.VERBOSE)
 | |
|             if mobj is None:
 | |
|                 raise ExtractorError('Invalid redirected URL: ' + url)
 | |
|             if mobj.group('episode') == '':
 | |
|                 raise ExtractorError('Redirected URL is still not specific: ' + url)
 | |
|             epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1]
 | |
| 
 | |
|         mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
 | |
|         if len(mMovieParams) == 0:
 | |
|             # The Colbert Report embeds the information in a without
 | |
|             # a URL prefix; so extract the alternate reference
 | |
|             # and then add the URL prefix manually.
 | |
| 
 | |
|             altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video|playlist).*?:.*?)"', webpage)
 | |
|             if len(altMovieParams) == 0:
 | |
|                 raise ExtractorError('unable to find Flash URL in webpage ' + url)
 | |
|             else:
 | |
|                 mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
 | |
| 
 | |
|         uri = mMovieParams[0][1]
 | |
|         # Correct cc.com in uri
 | |
|         uri = re.sub(r'(episode:[^.]+)(\.cc)?\.com', r'\1.com', uri)
 | |
| 
 | |
|         index_url = 'http://%s.cc.com/feeds/mrss?%s' % (show_name, compat_urllib_parse.urlencode({'uri': uri}))
 | |
|         idoc = self._download_xml(
 | |
|             index_url, epTitle,
 | |
|             'Downloading show index', 'Unable to download episode index')
 | |
| 
 | |
|         title = idoc.find('./channel/title').text
 | |
|         description = idoc.find('./channel/description').text
 | |
| 
 | |
|         entries = []
 | |
|         item_els = idoc.findall('.//item')
 | |
|         for part_num, itemEl in enumerate(item_els):
 | |
|             upload_date = unified_strdate(itemEl.findall('./pubDate')[0].text)
 | |
|             thumbnail = itemEl.find('.//{http://search.yahoo.com/mrss/}thumbnail').attrib.get('url')
 | |
| 
 | |
|             content = itemEl.find('.//{http://search.yahoo.com/mrss/}content')
 | |
|             duration = float_or_none(content.attrib.get('duration'))
 | |
|             mediagen_url = content.attrib['url']
 | |
|             guid = itemEl.find('./guid').text.rpartition(':')[-1]
 | |
| 
 | |
|             cdoc = self._download_xml(
 | |
|                 mediagen_url, epTitle,
 | |
|                 'Downloading configuration for segment %d / %d' % (part_num + 1, len(item_els)))
 | |
| 
 | |
|             turls = []
 | |
|             for rendition in cdoc.findall('.//rendition'):
 | |
|                 finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
 | |
|                 turls.append(finfo)
 | |
| 
 | |
|             formats = []
 | |
|             for format, rtmp_video_url in turls:
 | |
|                 w, h = self._video_dimensions.get(format, (None, None))
 | |
|                 formats.append({
 | |
|                     'format_id': 'vhttp-%s' % format,
 | |
|                     'url': self._transform_rtmp_url(rtmp_video_url),
 | |
|                     'ext': self._video_extensions.get(format, 'mp4'),
 | |
|                     'height': h,
 | |
|                     'width': w,
 | |
|                 })
 | |
|                 formats.append({
 | |
|                     'format_id': 'rtmp-%s' % format,
 | |
|                     'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'),
 | |
|                     'ext': self._video_extensions.get(format, 'mp4'),
 | |
|                     'height': h,
 | |
|                     'width': w,
 | |
|                 })
 | |
|                 self._sort_formats(formats)
 | |
| 
 | |
|             subtitles = self._extract_subtitles(cdoc, guid)
 | |
| 
 | |
|             virtual_id = show_name + ' ' + epTitle + ' part ' + compat_str(part_num + 1)
 | |
|             entries.append({
 | |
|                 'id': guid,
 | |
|                 'title': virtual_id,
 | |
|                 'formats': formats,
 | |
|                 'uploader': show_name,
 | |
|                 'upload_date': upload_date,
 | |
|                 'duration': duration,
 | |
|                 'thumbnail': thumbnail,
 | |
|                 'description': description,
 | |
|                 'subtitles': subtitles,
 | |
|             })
 | |
| 
 | |
|         return {
 | |
|             '_type': 'playlist',
 | |
|             'id': epTitle,
 | |
|             'entries': entries,
 | |
|             'title': show_name + ' ' + title,
 | |
|             'description': description,
 | |
|         }
 | 
