mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[YoutubeDL] Fix format selection with filters (Closes #10083)
This commit is contained in:
		| @@ -335,6 +335,40 @@ class TestFormatSelection(unittest.TestCase): | ||||
|             downloaded = ydl.downloaded_info_dicts[0] | ||||
|             self.assertEqual(downloaded['format_id'], f1['format_id']) | ||||
|  | ||||
|     def test_audio_only_extractor_format_selection(self): | ||||
|         # For extractors with incomplete formats (all formats are audio-only or | ||||
|         # video-only) best and worst should fallback to corresponding best/worst | ||||
|         # video-only or audio-only formats (as per | ||||
|         # https://github.com/rg3/youtube-dl/pull/5556) | ||||
|         formats = [ | ||||
|             {'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, | ||||
|             {'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL}, | ||||
|         ] | ||||
|         info_dict = _make_result(formats) | ||||
|  | ||||
|         ydl = YDL({'format': 'best'}) | ||||
|         ydl.process_ie_result(info_dict.copy()) | ||||
|         downloaded = ydl.downloaded_info_dicts[0] | ||||
|         self.assertEqual(downloaded['format_id'], 'high') | ||||
|  | ||||
|         ydl = YDL({'format': 'worst'}) | ||||
|         ydl.process_ie_result(info_dict.copy()) | ||||
|         downloaded = ydl.downloaded_info_dicts[0] | ||||
|         self.assertEqual(downloaded['format_id'], 'low') | ||||
|  | ||||
|     def test_format_not_available(self): | ||||
|         formats = [ | ||||
|             {'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL}, | ||||
|             {'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL}, | ||||
|         ] | ||||
|         info_dict = _make_result(formats) | ||||
|  | ||||
|         # This must fail since complete video-audio format does not match filter | ||||
|         # and extractor does not provide incomplete only formats (i.e. only | ||||
|         # video-only or audio-only). | ||||
|         ydl = YDL({'format': 'best[height>360]'}) | ||||
|         self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) | ||||
|  | ||||
|     def test_invalid_format_specs(self): | ||||
|         def assert_syntax_error(format_spec): | ||||
|             ydl = YDL({'format': format_spec}) | ||||
|   | ||||
| @@ -5,6 +5,7 @@ from __future__ import absolute_import, unicode_literals | ||||
|  | ||||
| import collections | ||||
| import contextlib | ||||
| import copy | ||||
| import datetime | ||||
| import errno | ||||
| import fileinput | ||||
| @@ -1051,9 +1052,9 @@ class YoutubeDL(object): | ||||
|             if isinstance(selector, list): | ||||
|                 fs = [_build_selector_function(s) for s in selector] | ||||
|  | ||||
|                 def selector_function(formats): | ||||
|                 def selector_function(ctx): | ||||
|                     for f in fs: | ||||
|                         for format in f(formats): | ||||
|                         for format in f(ctx): | ||||
|                             yield format | ||||
|                 return selector_function | ||||
|             elif selector.type == GROUP: | ||||
| @@ -1061,17 +1062,17 @@ class YoutubeDL(object): | ||||
|             elif selector.type == PICKFIRST: | ||||
|                 fs = [_build_selector_function(s) for s in selector.selector] | ||||
|  | ||||
|                 def selector_function(formats): | ||||
|                 def selector_function(ctx): | ||||
|                     for f in fs: | ||||
|                         picked_formats = list(f(formats)) | ||||
|                         picked_formats = list(f(ctx)) | ||||
|                         if picked_formats: | ||||
|                             return picked_formats | ||||
|                     return [] | ||||
|             elif selector.type == SINGLE: | ||||
|                 format_spec = selector.selector | ||||
|  | ||||
|                 def selector_function(formats): | ||||
|                     formats = list(formats) | ||||
|                 def selector_function(ctx): | ||||
|                     formats = list(ctx['formats']) | ||||
|                     if not formats: | ||||
|                         return | ||||
|                     if format_spec == 'all': | ||||
| @@ -1084,9 +1085,10 @@ class YoutubeDL(object): | ||||
|                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] | ||||
|                         if audiovideo_formats: | ||||
|                             yield audiovideo_formats[format_idx] | ||||
|                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format | ||||
|                         elif (all(f.get('acodec') != 'none' for f in formats) or | ||||
|                               all(f.get('vcodec') != 'none' for f in formats)): | ||||
|                         # for extractors with incomplete formats (audio only (soundcloud) | ||||
|                         # or video only (imgur)) we will fallback to best/worst | ||||
|                         # {video,audio}-only format | ||||
|                         elif ctx['incomplete_formats']: | ||||
|                             yield formats[format_idx] | ||||
|                     elif format_spec == 'bestaudio': | ||||
|                         audio_formats = [ | ||||
| @@ -1160,17 +1162,18 @@ class YoutubeDL(object): | ||||
|                     } | ||||
|                 video_selector, audio_selector = map(_build_selector_function, selector.selector) | ||||
|  | ||||
|                 def selector_function(formats): | ||||
|                     formats = list(formats) | ||||
|                     for pair in itertools.product(video_selector(formats), audio_selector(formats)): | ||||
|                 def selector_function(ctx): | ||||
|                     for pair in itertools.product( | ||||
|                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))): | ||||
|                         yield _merge(pair) | ||||
|  | ||||
|             filters = [self._build_format_filter(f) for f in selector.filters] | ||||
|  | ||||
|             def final_selector(formats): | ||||
|             def final_selector(ctx): | ||||
|                 ctx_copy = copy.deepcopy(ctx) | ||||
|                 for _filter in filters: | ||||
|                     formats = list(filter(_filter, formats)) | ||||
|                 return selector_function(formats) | ||||
|                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats'])) | ||||
|                 return selector_function(ctx_copy) | ||||
|             return final_selector | ||||
|  | ||||
|         stream = io.BytesIO(format_spec.encode('utf-8')) | ||||
| @@ -1377,7 +1380,35 @@ class YoutubeDL(object): | ||||
|             req_format_list.append('best') | ||||
|             req_format = '/'.join(req_format_list) | ||||
|         format_selector = self.build_format_selector(req_format) | ||||
|         formats_to_download = list(format_selector(formats)) | ||||
|  | ||||
|         # While in format selection we may need to have an access to the original | ||||
|         # format set in order to calculate some metrics or do some processing. | ||||
|         # For now we need to be able to guess whether original formats provided | ||||
|         # by extractor are incomplete or not (i.e. whether extractor provides only | ||||
|         # video-only or audio-only formats) for proper formats selection for | ||||
|         # extractors with such incomplete formats (see | ||||
|         # https://github.com/rg3/youtube-dl/pull/5556). | ||||
|         # Since formats may be filtered during format selection and may not match | ||||
|         # the original formats the results may be incorrect. Thus original formats | ||||
|         # or pre-calculated metrics should be passed to format selection routines | ||||
|         # as well. | ||||
|         # We will pass a context object containing all necessary additional data | ||||
|         # instead of just formats. | ||||
|         # This fixes incorrect format selection issue (see | ||||
|         # https://github.com/rg3/youtube-dl/issues/10083). | ||||
|         incomplete_formats = all( | ||||
|             # All formats are video-only or | ||||
|             f.get('vcodec') != 'none' and f.get('acodec') == 'none' or | ||||
|             # all formats are audio-only | ||||
|             f.get('vcodec') == 'none' and f.get('acodec') != 'none' | ||||
|             for f in formats) | ||||
|  | ||||
|         ctx = { | ||||
|             'formats': formats, | ||||
|             'incomplete_formats': incomplete_formats, | ||||
|         } | ||||
|  | ||||
|         formats_to_download = list(format_selector(ctx)) | ||||
|         if not formats_to_download: | ||||
|             raise ExtractorError('requested format not available', | ||||
|                                  expected=True) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․