mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[bloomberg] Extract the available formats (closes #2776)
It uses a helper method in the InfoExtractor class. The downloader will pick the requested formats using the bitrate in the info dict.
This commit is contained in:
		| @@ -220,6 +220,7 @@ class F4mFD(FileDownloader): | |||||||
|  |  | ||||||
|     def real_download(self, filename, info_dict): |     def real_download(self, filename, info_dict): | ||||||
|         man_url = info_dict['url'] |         man_url = info_dict['url'] | ||||||
|  |         requested_bitrate = info_dict.get('tbr') | ||||||
|         self.to_screen('[download] Downloading f4m manifest') |         self.to_screen('[download] Downloading f4m manifest') | ||||||
|         manifest = self.ydl.urlopen(man_url).read() |         manifest = self.ydl.urlopen(man_url).read() | ||||||
|         self.report_destination(filename) |         self.report_destination(filename) | ||||||
| @@ -233,8 +234,14 @@ class F4mFD(FileDownloader): | |||||||
|  |  | ||||||
|         doc = etree.fromstring(manifest) |         doc = etree.fromstring(manifest) | ||||||
|         formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))] |         formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))] | ||||||
|         formats = sorted(formats, key=lambda f: f[0]) |         if requested_bitrate is None: | ||||||
|         rate, media = formats[-1] |             # get the best format | ||||||
|  |             formats = sorted(formats, key=lambda f: f[0]) | ||||||
|  |             rate, media = formats[-1] | ||||||
|  |         else: | ||||||
|  |             rate, media = list(filter( | ||||||
|  |                 lambda f: int(f[0]) == requested_bitrate, formats))[0] | ||||||
|  |  | ||||||
|         base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) |         base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) | ||||||
|         bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text) |         bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text) | ||||||
|         metadata = base64.b64decode(media.find(_add_ns('metadata')).text) |         metadata = base64.b64decode(media.find(_add_ns('metadata')).text) | ||||||
|   | |||||||
| @@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor): | |||||||
|  |  | ||||||
|     _TEST = { |     _TEST = { | ||||||
|         'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', |         'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', | ||||||
|         'md5': '7bf08858ff7c203c870e8a6190e221e5', |         # The md5 checksum changes | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': 'qurhIVlJSB6hzkVi229d8g', |             'id': 'qurhIVlJSB6hzkVi229d8g', | ||||||
|             'ext': 'flv', |             'ext': 'flv', | ||||||
| @@ -31,8 +31,7 @@ class BloombergIE(InfoExtractor): | |||||||
|         return { |         return { | ||||||
|             'id': name.split('-')[-1], |             'id': name.split('-')[-1], | ||||||
|             'title': title, |             'title': title, | ||||||
|             'url': f4m_url, |             'formats': self._extract_f4m_formats(f4m_url, name), | ||||||
|             'ext': 'flv', |  | ||||||
|             'description': self._og_search_description(webpage), |             'description': self._og_search_description(webpage), | ||||||
|             'thumbnail': self._og_search_thumbnail(webpage), |             'thumbnail': self._og_search_thumbnail(webpage), | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -18,6 +18,7 @@ from ..utils import ( | |||||||
|     clean_html, |     clean_html, | ||||||
|     compiled_regex_type, |     compiled_regex_type, | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|  |     int_or_none, | ||||||
|     RegexNotFoundError, |     RegexNotFoundError, | ||||||
|     sanitize_filename, |     sanitize_filename, | ||||||
|     unescapeHTML, |     unescapeHTML, | ||||||
| @@ -590,6 +591,22 @@ class InfoExtractor(object): | |||||||
|         self.to_screen(msg) |         self.to_screen(msg) | ||||||
|         time.sleep(timeout) |         time.sleep(timeout) | ||||||
|  |  | ||||||
|  |     def _extract_f4m_formats(self, manifest_url, video_id): | ||||||
|  |         manifest = self._download_xml(manifest_url, video_id) | ||||||
|  |  | ||||||
|  |         formats = [] | ||||||
|  |         for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'): | ||||||
|  |             formats.append({ | ||||||
|  |                 'url': manifest_url, | ||||||
|  |                 'ext': 'flv', | ||||||
|  |                 'tbr': int_or_none(media_el.attrib.get('bitrate')), | ||||||
|  |                 'width': int_or_none(media_el.attrib.get('width')), | ||||||
|  |                 'height': int_or_none(media_el.attrib.get('height')), | ||||||
|  |             }) | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|  |         return formats | ||||||
|  |  | ||||||
|  |  | ||||||
| class SearchInfoExtractor(InfoExtractor): | class SearchInfoExtractor(InfoExtractor): | ||||||
|     """ |     """ | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Jaime Marquínez Ferrándiz
					Jaime Marquínez Ferrándiz