mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[youtube] Download DASH manifest
If given, download and parse the DASH manifest file, in order to get ultra-HQ formats. Fixes #2166
This commit is contained in:
		| @@ -1091,6 +1091,8 @@ class YoutubeDL(object): | |||||||
|                 res += 'audio' |                 res += 'audio' | ||||||
|             if fdict.get('abr') is not None: |             if fdict.get('abr') is not None: | ||||||
|                 res += '@%3dk' % fdict['abr'] |                 res += '@%3dk' % fdict['abr'] | ||||||
|  |             if fdict.get('asr') is not None: | ||||||
|  |                 res += ' (%5dHz)' % fdict['asr'] | ||||||
|             if fdict.get('filesize') is not None: |             if fdict.get('filesize') is not None: | ||||||
|                 if res: |                 if res: | ||||||
|                     res += ', ' |                     res += ', ' | ||||||
|   | |||||||
| @@ -63,6 +63,7 @@ class InfoExtractor(object): | |||||||
|                     * tbr        Average bitrate of audio and video in KBit/s |                     * tbr        Average bitrate of audio and video in KBit/s | ||||||
|                     * abr        Average audio bitrate in KBit/s |                     * abr        Average audio bitrate in KBit/s | ||||||
|                     * acodec     Name of the audio codec in use |                     * acodec     Name of the audio codec in use | ||||||
|  |                     * asr        Audio sampling rate in Hertz | ||||||
|                     * vbr        Average video bitrate in KBit/s |                     * vbr        Average video bitrate in KBit/s | ||||||
|                     * vcodec     Name of the video codec in use |                     * vcodec     Name of the video codec in use | ||||||
|                     * filesize   The number of bytes, if known in advance |                     * filesize   The number of bytes, if known in advance | ||||||
|   | |||||||
| @@ -27,6 +27,7 @@ from ..utils import ( | |||||||
|     get_element_by_id, |     get_element_by_id, | ||||||
|     get_element_by_attribute, |     get_element_by_attribute, | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|  |     int_or_none, | ||||||
|     unescapeHTML, |     unescapeHTML, | ||||||
|     unified_strdate, |     unified_strdate, | ||||||
|     orderedSet, |     orderedSet, | ||||||
| @@ -269,6 +270,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|                 u"uploader_id": u"setindia" |                 u"uploader_id": u"setindia" | ||||||
|             } |             } | ||||||
|         }, |         }, | ||||||
|  |         { | ||||||
|  |             u"url": u"http://www.youtube.com/watch?v=a9LDPn-MO4I", | ||||||
|  |             u"file": u"a9LDPn-MO4I.m4a", | ||||||
|  |             u"note": u"256k DASH audio (format 141) via DASH manifest", | ||||||
|  |             u"params": { | ||||||
|  |                 u"format": "141" | ||||||
|  |             }, | ||||||
|  |             u"info_dict": { | ||||||
|  |                 u"upload_date": "20121002", | ||||||
|  |                 u"uploader_id": "8KVIDEO", | ||||||
|  |                 u"description": "No description available.", | ||||||
|  |                 u"uploader": "8KVIDEO", | ||||||
|  |                 u"title": "UHDTV TEST 8K VIDEO.mp4" | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -1066,18 +1082,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|         video_id = mobj.group(2) |         video_id = mobj.group(2) | ||||||
|         return video_id |         return video_id | ||||||
|  |  | ||||||
|     def _get_video_url_list(self, url_map): |  | ||||||
|         """ |  | ||||||
|         Transform a dictionary in the format {itag:url} to a list of (itag, url) |  | ||||||
|         with the requested formats. |  | ||||||
|         """ |  | ||||||
|         existing_formats = [x for x in self._formats if x in url_map] |  | ||||||
|         if len(existing_formats) == 0: |  | ||||||
|             raise ExtractorError(u'no known formats available for video') |  | ||||||
|         video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats |  | ||||||
|         video_url_list.reverse() # order worst to best |  | ||||||
|         return video_url_list |  | ||||||
|  |  | ||||||
|     def _extract_from_m3u8(self, manifest_url, video_id): |     def _extract_from_m3u8(self, manifest_url, video_id): | ||||||
|         url_map = {} |         url_map = {} | ||||||
|         def _get_urls(_manifest): |         def _get_urls(_manifest): | ||||||
| @@ -1251,7 +1255,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|                 video_annotations = self._extract_annotations(video_id) |                 video_annotations = self._extract_annotations(video_id) | ||||||
|  |  | ||||||
|         # Decide which formats to download |         # Decide which formats to download | ||||||
|  |  | ||||||
|         try: |         try: | ||||||
|             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage) |             mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage) | ||||||
|             if not mobj: |             if not mobj: | ||||||
| @@ -1276,9 +1279,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|         except ValueError: |         except ValueError: | ||||||
|             pass |             pass | ||||||
|  |  | ||||||
|  |         def _map_to_format_list(urlmap): | ||||||
|  |             formats = [] | ||||||
|  |             for itag, video_real_url in urlmap.items(): | ||||||
|  |                 dct = { | ||||||
|  |                     'format_id': itag, | ||||||
|  |                     'url': video_real_url, | ||||||
|  |                     'player_url': player_url, | ||||||
|  |                 } | ||||||
|  |                 dct.update(self._formats[itag]) | ||||||
|  |                 formats.append(dct) | ||||||
|  |             return formats | ||||||
|  |  | ||||||
|         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): |         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): | ||||||
|             self.report_rtmp_download() |             self.report_rtmp_download() | ||||||
|             video_url_list = [('_rtmp', video_info['conn'][0])] |             formats = [{ | ||||||
|  |                 'format_id': '_rtmp', | ||||||
|  |                 'protocol': 'rtmp', | ||||||
|  |                 'url': video_info['conn'][0], | ||||||
|  |                 'player_url': player_url, | ||||||
|  |             }] | ||||||
|         elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1: |         elif len(video_info.get('url_encoded_fmt_stream_map', [])) >= 1 or len(video_info.get('adaptive_fmts', [])) >= 1: | ||||||
|             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0] |             encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts',[''])[0] | ||||||
|             if 'rtmpe%3Dyes' in encoded_url_map: |             if 'rtmpe%3Dyes' in encoded_url_map: | ||||||
| @@ -1323,23 +1343,49 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | |||||||
|                     if 'ratebypass' not in url: |                     if 'ratebypass' not in url: | ||||||
|                         url += '&ratebypass=yes' |                         url += '&ratebypass=yes' | ||||||
|                     url_map[url_data['itag'][0]] = url |                     url_map[url_data['itag'][0]] = url | ||||||
|             video_url_list = self._get_video_url_list(url_map) |             formats = _map_to_format_list(url_map) | ||||||
|         elif video_info.get('hlsvp'): |         elif video_info.get('hlsvp'): | ||||||
|             manifest_url = video_info['hlsvp'][0] |             manifest_url = video_info['hlsvp'][0] | ||||||
|             url_map = self._extract_from_m3u8(manifest_url, video_id) |             url_map = self._extract_from_m3u8(manifest_url, video_id) | ||||||
|             video_url_list = self._get_video_url_list(url_map) |             formats = _map_to_format_list(url_map) | ||||||
|         else: |         else: | ||||||
|             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') |             raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') | ||||||
|  |  | ||||||
|         formats = [] |         # Look for the DASH manifest | ||||||
|         for itag, video_real_url in video_url_list: |         dash_manifest_url_lst = video_info.get('dashmpd') | ||||||
|             dct = { |         if dash_manifest_url_lst and dash_manifest_url_lst[0]: | ||||||
|                 'format_id': itag, |             try: | ||||||
|                 'url': video_real_url, |                 dash_doc = self._download_xml( | ||||||
|                 'player_url': player_url, |                     dash_manifest_url_lst[0], video_id, | ||||||
|  |                     note=u'Downloading DASH manifest', | ||||||
|  |                     errnote=u'Could not download DASH manifest') | ||||||
|  |                 for r in dash_doc.findall(u'.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): | ||||||
|  |                     url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') | ||||||
|  |                     if url_el is None: | ||||||
|  |                         continue | ||||||
|  |                     format_id = r.attrib['id'] | ||||||
|  |                     video_url = url_el.text | ||||||
|  |                     filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) | ||||||
|  |                     f = { | ||||||
|  |                         'format_id': format_id, | ||||||
|  |                         'url': video_url, | ||||||
|  |                         'width': int_or_none(r.attrib.get('width')), | ||||||
|  |                         'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), | ||||||
|  |                         'asr': int_or_none(r.attrib.get('audioSamplingRate')), | ||||||
|  |                         'filesize': filesize, | ||||||
|                     } |                     } | ||||||
|             dct.update(self._formats[itag]) |                     try: | ||||||
|             formats.append(dct) |                         existing_format = next( | ||||||
|  |                             fo for fo in formats | ||||||
|  |                             if fo['format_id'] == format_id) | ||||||
|  |                     except StopIteration: | ||||||
|  |                         f.update(self._formats.get(format_id, {})) | ||||||
|  |                         formats.append(f) | ||||||
|  |                     else: | ||||||
|  |                         existing_format.update(f) | ||||||
|  |  | ||||||
|  |             except (ExtractorError, KeyError) as e: | ||||||
|  |                 self.report_warning(u'Skipping DASH manifest: %s' % e, video_id) | ||||||
|  |  | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1128,8 +1128,8 @@ class HEADRequest(compat_urllib_request.Request): | |||||||
|         return "HEAD" |         return "HEAD" | ||||||
|  |  | ||||||
|  |  | ||||||
| def int_or_none(v): | def int_or_none(v, scale=1): | ||||||
|     return v if v is None else int(v) |     return v if v is None else (int(v) // scale) | ||||||
|  |  | ||||||
|  |  | ||||||
| def parse_duration(s): | def parse_duration(s): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister