mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[channel9] Cleanup
This commit is contained in:
		| @@ -3,10 +3,7 @@ | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     format_bytes, | ||||
|     ExtractorError, | ||||
| ) | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
| class Channel9IE(InfoExtractor): | ||||
|     ''' | ||||
| @@ -51,7 +48,6 @@ class Channel9IE(InfoExtractor): | ||||
|     ] | ||||
|  | ||||
|     _RSS_URL = 'http://channel9.msdn.com/%s/RSS' | ||||
|     _EXTRACT_ENTRY_ITEMS_FROM_RSS = False | ||||
|  | ||||
|     # Sorted by quality | ||||
|     _known_formats = ['MP3', 'MP4', 'Mid Quality WMV', 'Mid Quality MP4', 'High Quality WMV', 'High Quality MP4'] | ||||
| @@ -90,37 +86,6 @@ class Channel9IE(InfoExtractor): | ||||
|         formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id'])) | ||||
|         return formats | ||||
|  | ||||
|     def _formats_from_rss_item(self, item): | ||||
|  | ||||
|         def process_formats(elem): | ||||
|             formats = [] | ||||
|             for media_content in elem.findall('./{http://search.yahoo.com/mrss/}content'): | ||||
|                 url = media_content.attrib['url'] | ||||
|                 # Ignore unrelated media | ||||
|                 if url.endswith('.ism/manifest'): | ||||
|                     continue | ||||
|                 format_note = media_content.attrib['type'] | ||||
|                 filesize = int(media_content.attrib['fileSize']) | ||||
|                 formats.append({'url': url, | ||||
|                                 'format_note': format_note, | ||||
|                                 'format': '%s %s' % (format_note, format_bytes(filesize)), | ||||
|                                 'filesize': filesize, | ||||
|                                 }) | ||||
|             return formats | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         for media_group in item.findall('./{http://search.yahoo.com/mrss/}group'): | ||||
|             formats.extend(process_formats(media_group)) | ||||
|  | ||||
|         # Sometimes there are no media:groups in item, but there is media:content | ||||
|         # right in item (usually when there is the only media source) | ||||
|         formats.extend(process_formats(item))         | ||||
|  | ||||
|         # Sort by file size | ||||
|         formats.sort(key=lambda fmt: fmt['filesize']) | ||||
|         return formats | ||||
|  | ||||
|     def _extract_title(self, html): | ||||
|         title = self._html_search_meta(u'title', html, u'title') | ||||
|         if title is None:            | ||||
| @@ -274,61 +239,12 @@ class Channel9IE(InfoExtractor): | ||||
|  | ||||
|         return contents | ||||
|  | ||||
|     def _extract_content_rss(self, rss): | ||||
|         ''' | ||||
|         Extracts links to entry items right out of RSS feed. | ||||
|         This approach is faster than extracting from web pages | ||||
|         one by one, but suffers from some problems. | ||||
|         Pros: | ||||
|          - no need to download additional pages | ||||
|          - provides more media links | ||||
|          - accurate file size | ||||
|         Cons: | ||||
|          - fewer meta data provided | ||||
|          - links to media files have no appropriate data that may be used as format_id | ||||
|          - RSS does not contain links to presentation materials (slides, zip) | ||||
|         ''' | ||||
|         entries = [] | ||||
|         for item in rss.findall('./channel/item'): | ||||
|             url = item.find('./link').text | ||||
|             video_id = url.split('/')[-1] | ||||
|             formats = self._formats_from_rss_item(item) | ||||
|  | ||||
|             if len(formats) == 0: | ||||
|                 self._downloader.report_warning(u'The recording for session %s is not yet available' % video_id) | ||||
|                 continue | ||||
|  | ||||
|             title = item.find('./title').text | ||||
|             description = item.find('./description').text | ||||
|  | ||||
|             thumbnail = item.find('./{http://search.yahoo.com/mrss/}thumbnail').text | ||||
|  | ||||
|             duration_e = item.find('./{http://www.itunes.com/dtds/podcast-1.0.dtd}duration') | ||||
|             duration = duration_e.text if duration_e is not None else 0 | ||||
|  | ||||
|             speakers_e = item.find('./{http://purl.org/dc/elements/1.1/}creator') | ||||
|             speakers = speakers_e.text.split(', ') if speakers_e is not None and speakers_e.text else [] | ||||
|  | ||||
|             entries.append({'_type': 'video', | ||||
|                             'id': video_id, | ||||
|                             'formats': formats, | ||||
|                             'title': title, | ||||
|                             'description': description, | ||||
|                             'thumbnail': thumbnail, | ||||
|                             'duration': duration, | ||||
|                             'session_speakers': speakers,                             | ||||
|                             }) | ||||
|         return entries | ||||
|  | ||||
|     def _extract_list(self, content_path): | ||||
|         rss = self._download_xml(self._RSS_URL % content_path, content_path, u'Downloading RSS') | ||||
|         if self._EXTRACT_ENTRY_ITEMS_FROM_RSS:    | ||||
|             return self._extract_content_rss(rss) | ||||
|         else: | ||||
|             entries = [self.url_result(session_url.text, 'Channel9') | ||||
|                        for session_url in rss.findall('./channel/item/link')] | ||||
|             title_text = rss.find('./channel/title').text | ||||
|             return self.playlist_result(entries, content_path, title_text) | ||||
|         entries = [self.url_result(session_url.text, 'Channel9') | ||||
|                    for session_url in rss.findall('./channel/item/link')] | ||||
|         title_text = rss.find('./channel/title').text | ||||
|         return self.playlist_result(entries, content_path, title_text) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 dst
					dst