mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 22:55:18 +00:00 
			
		
		
		
	[YoutubeDL] Fix format selection with filters (Closes #10083)
This commit is contained in:
		| @@ -335,6 +335,40 @@ class TestFormatSelection(unittest.TestCase): | |||||||
|             downloaded = ydl.downloaded_info_dicts[0] |             downloaded = ydl.downloaded_info_dicts[0] | ||||||
|             self.assertEqual(downloaded['format_id'], f1['format_id']) |             self.assertEqual(downloaded['format_id'], f1['format_id']) | ||||||
|  |  | ||||||
|  |     def test_audio_only_extractor_format_selection(self): | ||||||
|  |         # For extractors with incomplete formats (all formats are audio-only or | ||||||
|  |         # video-only) best and worst should fallback to corresponding best/worst | ||||||
|  |         # video-only or audio-only formats (as per | ||||||
|  |         # https://github.com/rg3/youtube-dl/pull/5556) | ||||||
|  |         formats = [ | ||||||
|  |             {'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, | ||||||
|  |             {'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL}, | ||||||
|  |         ] | ||||||
|  |         info_dict = _make_result(formats) | ||||||
|  |  | ||||||
|  |         ydl = YDL({'format': 'best'}) | ||||||
|  |         ydl.process_ie_result(info_dict.copy()) | ||||||
|  |         downloaded = ydl.downloaded_info_dicts[0] | ||||||
|  |         self.assertEqual(downloaded['format_id'], 'high') | ||||||
|  |  | ||||||
|  |         ydl = YDL({'format': 'worst'}) | ||||||
|  |         ydl.process_ie_result(info_dict.copy()) | ||||||
|  |         downloaded = ydl.downloaded_info_dicts[0] | ||||||
|  |         self.assertEqual(downloaded['format_id'], 'low') | ||||||
|  |  | ||||||
|  |     def test_format_not_available(self): | ||||||
|  |         formats = [ | ||||||
|  |             {'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL}, | ||||||
|  |             {'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL}, | ||||||
|  |         ] | ||||||
|  |         info_dict = _make_result(formats) | ||||||
|  |  | ||||||
|  |         # This must fail since complete video-audio format does not match filter | ||||||
|  |         # and extractor does not provide incomplete only formats (i.e. only | ||||||
|  |         # video-only or audio-only). | ||||||
|  |         ydl = YDL({'format': 'best[height>360]'}) | ||||||
|  |         self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) | ||||||
|  |  | ||||||
|     def test_invalid_format_specs(self): |     def test_invalid_format_specs(self): | ||||||
|         def assert_syntax_error(format_spec): |         def assert_syntax_error(format_spec): | ||||||
|             ydl = YDL({'format': format_spec}) |             ydl = YDL({'format': format_spec}) | ||||||
|   | |||||||
| @@ -5,6 +5,7 @@ from __future__ import absolute_import, unicode_literals | |||||||
|  |  | ||||||
| import collections | import collections | ||||||
| import contextlib | import contextlib | ||||||
|  | import copy | ||||||
| import datetime | import datetime | ||||||
| import errno | import errno | ||||||
| import fileinput | import fileinput | ||||||
| @@ -1051,9 +1052,9 @@ class YoutubeDL(object): | |||||||
|             if isinstance(selector, list): |             if isinstance(selector, list): | ||||||
|                 fs = [_build_selector_function(s) for s in selector] |                 fs = [_build_selector_function(s) for s in selector] | ||||||
|  |  | ||||||
|                 def selector_function(formats): |                 def selector_function(ctx): | ||||||
|                     for f in fs: |                     for f in fs: | ||||||
|                         for format in f(formats): |                         for format in f(ctx): | ||||||
|                             yield format |                             yield format | ||||||
|                 return selector_function |                 return selector_function | ||||||
|             elif selector.type == GROUP: |             elif selector.type == GROUP: | ||||||
| @@ -1061,17 +1062,17 @@ class YoutubeDL(object): | |||||||
|             elif selector.type == PICKFIRST: |             elif selector.type == PICKFIRST: | ||||||
|                 fs = [_build_selector_function(s) for s in selector.selector] |                 fs = [_build_selector_function(s) for s in selector.selector] | ||||||
|  |  | ||||||
|                 def selector_function(formats): |                 def selector_function(ctx): | ||||||
|                     for f in fs: |                     for f in fs: | ||||||
|                         picked_formats = list(f(formats)) |                         picked_formats = list(f(ctx)) | ||||||
|                         if picked_formats: |                         if picked_formats: | ||||||
|                             return picked_formats |                             return picked_formats | ||||||
|                     return [] |                     return [] | ||||||
|             elif selector.type == SINGLE: |             elif selector.type == SINGLE: | ||||||
|                 format_spec = selector.selector |                 format_spec = selector.selector | ||||||
|  |  | ||||||
|                 def selector_function(formats): |                 def selector_function(ctx): | ||||||
|                     formats = list(formats) |                     formats = list(ctx['formats']) | ||||||
|                     if not formats: |                     if not formats: | ||||||
|                         return |                         return | ||||||
|                     if format_spec == 'all': |                     if format_spec == 'all': | ||||||
| @@ -1084,9 +1085,10 @@ class YoutubeDL(object): | |||||||
|                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] |                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] | ||||||
|                         if audiovideo_formats: |                         if audiovideo_formats: | ||||||
|                             yield audiovideo_formats[format_idx] |                             yield audiovideo_formats[format_idx] | ||||||
|                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format |                         # for extractors with incomplete formats (audio only (soundcloud) | ||||||
|                         elif (all(f.get('acodec') != 'none' for f in formats) or |                         # or video only (imgur)) we will fallback to best/worst | ||||||
|                               all(f.get('vcodec') != 'none' for f in formats)): |                         # {video,audio}-only format | ||||||
|  |                         elif ctx['incomplete_formats']: | ||||||
|                             yield formats[format_idx] |                             yield formats[format_idx] | ||||||
|                     elif format_spec == 'bestaudio': |                     elif format_spec == 'bestaudio': | ||||||
|                         audio_formats = [ |                         audio_formats = [ | ||||||
| @@ -1160,17 +1162,18 @@ class YoutubeDL(object): | |||||||
|                     } |                     } | ||||||
|                 video_selector, audio_selector = map(_build_selector_function, selector.selector) |                 video_selector, audio_selector = map(_build_selector_function, selector.selector) | ||||||
|  |  | ||||||
|                 def selector_function(formats): |                 def selector_function(ctx): | ||||||
|                     formats = list(formats) |                     for pair in itertools.product( | ||||||
|                     for pair in itertools.product(video_selector(formats), audio_selector(formats)): |                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))): | ||||||
|                         yield _merge(pair) |                         yield _merge(pair) | ||||||
|  |  | ||||||
|             filters = [self._build_format_filter(f) for f in selector.filters] |             filters = [self._build_format_filter(f) for f in selector.filters] | ||||||
|  |  | ||||||
|             def final_selector(formats): |             def final_selector(ctx): | ||||||
|  |                 ctx_copy = copy.deepcopy(ctx) | ||||||
|                 for _filter in filters: |                 for _filter in filters: | ||||||
|                     formats = list(filter(_filter, formats)) |                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats'])) | ||||||
|                 return selector_function(formats) |                 return selector_function(ctx_copy) | ||||||
|             return final_selector |             return final_selector | ||||||
|  |  | ||||||
|         stream = io.BytesIO(format_spec.encode('utf-8')) |         stream = io.BytesIO(format_spec.encode('utf-8')) | ||||||
| @@ -1377,7 +1380,35 @@ class YoutubeDL(object): | |||||||
|             req_format_list.append('best') |             req_format_list.append('best') | ||||||
|             req_format = '/'.join(req_format_list) |             req_format = '/'.join(req_format_list) | ||||||
|         format_selector = self.build_format_selector(req_format) |         format_selector = self.build_format_selector(req_format) | ||||||
|         formats_to_download = list(format_selector(formats)) |  | ||||||
|  |         # While in format selection we may need to have an access to the original | ||||||
|  |         # format set in order to calculate some metrics or do some processing. | ||||||
|  |         # For now we need to be able to guess whether original formats provided | ||||||
|  |         # by extractor are incomplete or not (i.e. whether extractor provides only | ||||||
|  |         # video-only or audio-only formats) for proper formats selection for | ||||||
|  |         # extractors with such incomplete formats (see | ||||||
|  |         # https://github.com/rg3/youtube-dl/pull/5556). | ||||||
|  |         # Since formats may be filtered during format selection and may not match | ||||||
|  |         # the original formats the results may be incorrect. Thus original formats | ||||||
|  |         # or pre-calculated metrics should be passed to format selection routines | ||||||
|  |         # as well. | ||||||
|  |         # We will pass a context object containing all necessary additional data | ||||||
|  |         # instead of just formats. | ||||||
|  |         # This fixes incorrect format selection issue (see | ||||||
|  |         # https://github.com/rg3/youtube-dl/issues/10083). | ||||||
|  |         incomplete_formats = all( | ||||||
|  |             # All formats are video-only or | ||||||
|  |             f.get('vcodec') != 'none' and f.get('acodec') == 'none' or | ||||||
|  |             # all formats are audio-only | ||||||
|  |             f.get('vcodec') == 'none' and f.get('acodec') != 'none' | ||||||
|  |             for f in formats) | ||||||
|  |  | ||||||
|  |         ctx = { | ||||||
|  |             'formats': formats, | ||||||
|  |             'incomplete_formats': incomplete_formats, | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         formats_to_download = list(format_selector(ctx)) | ||||||
|         if not formats_to_download: |         if not formats_to_download: | ||||||
|             raise ExtractorError('requested format not available', |             raise ExtractorError('requested format not available', | ||||||
|                                  expected=True) |                                  expected=True) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․