mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	Add pre-processor stage after_filter
				
					
				
			* Move `_match_entry` and `post_extract` to `process_video_result`. It is also left in `process_info` for API compat
* `--list-...` options and `--force-write-archive` now obey filtering options
* Move `SponsorBlockPP` to `after_filter`. Closes https://github.com/yt-dlp/yt-dlp/issues/2536
* Reverts 4ec82a72bb since this commit addresses the issue it was solving
			
			
This commit is contained in:
		
							
								
								
									
										20
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								README.md
									
									
									
									
									
								
							| @@ -982,15 +982,17 @@ You can also fork the project on github and run your fork's [build workflow](.gi | ||||
|                                      semicolon ";" delimited list of NAME=VALUE. | ||||
|                                      The "when" argument determines when the | ||||
|                                      postprocessor is invoked. It can be one of | ||||
|                                      "pre_process" (after extraction), | ||||
|                                      "before_dl" (before video download), | ||||
|                                      "post_process" (after video download; | ||||
|                                      default), "after_move" (after moving file | ||||
|                                      to their final locations), "after_video" | ||||
|                                      (after downloading and processing all | ||||
|                                      formats of a video), or "playlist" (end of | ||||
|                                      playlist). This option can be used multiple | ||||
|                                      times to add different postprocessors | ||||
|                                      "pre_process" (after video extraction), | ||||
|                                      "after_filter" (after video passes filter), | ||||
|                                      "before_dl" (before each video download), | ||||
|                                      "post_process" (after each video download; | ||||
|                                      default), "after_move" (after moving video | ||||
|                                      file to it's final locations), | ||||
|                                      "after_video" (after downloading and | ||||
|                                      processing all formats of a video), or | ||||
|                                      "playlist" (at end of playlist). This | ||||
|                                      option can be used multiple times to add | ||||
|                                      different postprocessors | ||||
|  | ||||
| ## SponsorBlock Options: | ||||
| Make chapter entries for, or remove various segments (sponsor, | ||||
|   | ||||
| @@ -30,9 +30,7 @@ class YDL(FakeYDL): | ||||
|         self.msgs = [] | ||||
|  | ||||
|     def process_info(self, info_dict): | ||||
|         info_dict = info_dict.copy() | ||||
|         info_dict.pop('__original_infodict', None) | ||||
|         self.downloaded_info_dicts.append(info_dict) | ||||
|         self.downloaded_info_dicts.append(info_dict.copy()) | ||||
|  | ||||
|     def to_screen(self, msg): | ||||
|         self.msgs.append(msg) | ||||
| @@ -898,20 +896,6 @@ class TestYoutubeDL(unittest.TestCase): | ||||
|         os.unlink(filename) | ||||
|  | ||||
|     def test_match_filter(self): | ||||
|         class FilterYDL(YDL): | ||||
|             def __init__(self, *args, **kwargs): | ||||
|                 super(FilterYDL, self).__init__(*args, **kwargs) | ||||
|                 self.params['simulate'] = True | ||||
|  | ||||
|             def process_info(self, info_dict): | ||||
|                 super(YDL, self).process_info(info_dict) | ||||
|  | ||||
|             def _match_entry(self, info_dict, incomplete=False): | ||||
|                 res = super(FilterYDL, self)._match_entry(info_dict, incomplete) | ||||
|                 if res is None: | ||||
|                     self.downloaded_info_dicts.append(info_dict.copy()) | ||||
|                 return res | ||||
|  | ||||
|         first = { | ||||
|             'id': '1', | ||||
|             'url': TEST_URL, | ||||
| @@ -939,7 +923,7 @@ class TestYoutubeDL(unittest.TestCase): | ||||
|         videos = [first, second] | ||||
|  | ||||
|         def get_videos(filter_=None): | ||||
|             ydl = FilterYDL({'match_filter': filter_}) | ||||
|             ydl = YDL({'match_filter': filter_, 'simulate': True}) | ||||
|             for v in videos: | ||||
|                 ydl.process_ie_result(v, download=True) | ||||
|             return [v['id'] for v in ydl.downloaded_info_dicts] | ||||
|   | ||||
| @@ -1037,8 +1037,7 @@ class YoutubeDL(object): | ||||
|     @staticmethod | ||||
|     def _copy_infodict(info_dict): | ||||
|         info_dict = dict(info_dict) | ||||
|         for key in ('__original_infodict', '__postprocessors'): | ||||
|             info_dict.pop(key, None) | ||||
|         info_dict.pop('__postprocessors', None) | ||||
|         return info_dict | ||||
|  | ||||
|     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): | ||||
| @@ -2512,8 +2511,6 @@ class YoutubeDL(object): | ||||
|         if '__x_forwarded_for_ip' in info_dict: | ||||
|             del info_dict['__x_forwarded_for_ip'] | ||||
|  | ||||
|         # TODO Central sorting goes here | ||||
|  | ||||
|         if self.params.get('check_formats') is True: | ||||
|             formats = LazyList(self._check_formats(formats[::-1]), reverse=True) | ||||
|  | ||||
| @@ -2526,6 +2523,12 @@ class YoutubeDL(object): | ||||
|  | ||||
|         info_dict, _ = self.pre_process(info_dict) | ||||
|  | ||||
|         if self._match_entry(info_dict) is not None: | ||||
|             return info_dict | ||||
|  | ||||
|         self.post_extract(info_dict) | ||||
|         info_dict, _ = self.pre_process(info_dict, 'after_filter') | ||||
|  | ||||
|         # The pre-processors may have modified the formats | ||||
|         formats = info_dict.get('formats', [info_dict]) | ||||
|  | ||||
| @@ -2610,15 +2613,12 @@ class YoutubeDL(object): | ||||
|                     + ', '.join([f['format_id'] for f in formats_to_download])) | ||||
|             max_downloads_reached = False | ||||
|             for i, fmt in enumerate(formats_to_download): | ||||
|                 formats_to_download[i] = new_info = dict(info_dict) | ||||
|                 # Save a reference to the original info_dict so that it can be modified in process_info if needed | ||||
|                 formats_to_download[i] = new_info = self._copy_infodict(info_dict) | ||||
|                 new_info.update(fmt) | ||||
|                 new_info['__original_infodict'] = info_dict | ||||
|                 try: | ||||
|                     self.process_info(new_info) | ||||
|                 except MaxDownloadsReached: | ||||
|                     max_downloads_reached = True | ||||
|                 new_info.pop('__original_infodict') | ||||
|                 # Remove copied info | ||||
|                 for key, val in tuple(new_info.items()): | ||||
|                     if info_dict.get(key) == val: | ||||
| @@ -2826,7 +2826,7 @@ class YoutubeDL(object): | ||||
|         return None | ||||
|  | ||||
|     def process_info(self, info_dict): | ||||
|         """Process a single resolved IE result. (Modified it in-place)""" | ||||
|         """Process a single resolved IE result. (Modifies it in-place)""" | ||||
|  | ||||
|         assert info_dict.get('_type', 'video') == 'video' | ||||
|         original_infodict = info_dict | ||||
| @@ -2834,18 +2834,22 @@ class YoutubeDL(object): | ||||
|         if 'format' not in info_dict and 'ext' in info_dict: | ||||
|             info_dict['format'] = info_dict['ext'] | ||||
|  | ||||
|         # This is mostly just for backward compatibility of process_info | ||||
|         # As a side-effect, this allows for format-specific filters | ||||
|         if self._match_entry(info_dict) is not None: | ||||
|             info_dict['__write_download_archive'] = 'ignore' | ||||
|             return | ||||
|  | ||||
|         # Does nothing under normal operation - for backward compatibility of process_info | ||||
|         self.post_extract(info_dict) | ||||
|         self._num_downloads += 1 | ||||
|  | ||||
|         # info_dict['_filename'] needs to be set for backward compatibility | ||||
|         info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True) | ||||
|         temp_filename = self.prepare_filename(info_dict, 'temp') | ||||
|         files_to_move = {} | ||||
|  | ||||
|         self._num_downloads += 1 | ||||
|  | ||||
|         # Forced printings | ||||
|         self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict)) | ||||
|  | ||||
| @@ -3259,17 +3263,14 @@ class YoutubeDL(object): | ||||
|             return info_dict | ||||
|         info_dict.setdefault('epoch', int(time.time())) | ||||
|         info_dict.setdefault('_type', 'video') | ||||
|         remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict | ||||
|         keep_keys = ['_type']  # Always keep this to facilitate load-info-json | ||||
|  | ||||
|         if remove_private_keys: | ||||
|             remove_keys |= { | ||||
|             reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in { | ||||
|                 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries', | ||||
|                 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber', | ||||
|             } | ||||
|             reject = lambda k, v: k not in keep_keys and ( | ||||
|                 k.startswith('_') or k in remove_keys or v is None) | ||||
|         else: | ||||
|             reject = lambda k, v: k in remove_keys | ||||
|             reject = lambda k, v: False | ||||
|  | ||||
|         def filter_fn(obj): | ||||
|             if isinstance(obj, dict): | ||||
| @@ -3296,14 +3297,8 @@ class YoutubeDL(object): | ||||
|                     actual_post_extract(video_dict or {}) | ||||
|                 return | ||||
|  | ||||
|             post_extractor = info_dict.get('__post_extractor') or (lambda: {}) | ||||
|             extra = post_extractor().items() | ||||
|             info_dict.update(extra) | ||||
|             info_dict.pop('__post_extractor', None) | ||||
|  | ||||
|             original_infodict = info_dict.get('__original_infodict') or {} | ||||
|             original_infodict.update(extra) | ||||
|             original_infodict.pop('__post_extractor', None) | ||||
|             post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {}) | ||||
|             info_dict.update(post_extractor()) | ||||
|  | ||||
|         actual_post_extract(info_dict or {}) | ||||
|  | ||||
|   | ||||
| @@ -474,8 +474,8 @@ def _real_main(argv=None): | ||||
|             'key': 'SponsorBlock', | ||||
|             'categories': sponsorblock_query, | ||||
|             'api': opts.sponsorblock_api, | ||||
|             # Run this immediately after extraction is complete | ||||
|             'when': 'pre_process' | ||||
|             # Run this after filtering videos | ||||
|             'when': 'after_filter' | ||||
|         }) | ||||
|     if opts.parse_metadata: | ||||
|         postprocessors.append({ | ||||
|   | ||||
| @@ -1550,11 +1550,11 @@ def create_parser(): | ||||
|             'and (optionally) arguments to be passed to it, separated by a colon ":". ' | ||||
|             'ARGS are a semicolon ";" delimited list of NAME=VALUE. ' | ||||
|             'The "when" argument determines when the postprocessor is invoked. ' | ||||
|             'It can be one of "pre_process" (after extraction), ' | ||||
|             '"before_dl" (before video download), "post_process" (after video download; default), ' | ||||
|             '"after_move" (after moving file to their final locations), ' | ||||
|             'It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), ' | ||||
|             '"before_dl" (before each video download), "post_process" (after each video download; default), ' | ||||
|             '"after_move" (after moving video file to it\'s final locations), ' | ||||
|             '"after_video" (after downloading and processing all formats of a video), ' | ||||
|             'or "playlist" (end of playlist). ' | ||||
|             'or "playlist" (at end of playlist). ' | ||||
|             'This option can be used multiple times to add different postprocessors')) | ||||
|  | ||||
|     sponsorblock = optparse.OptionGroup(parser, 'SponsorBlock Options', description=( | ||||
|   | ||||
| @@ -3166,7 +3166,7 @@ def qualities(quality_ids): | ||||
|     return q | ||||
|  | ||||
|  | ||||
| POSTPROCESS_WHEN = {'pre_process', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist'} | ||||
| POSTPROCESS_WHEN = {'pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist'} | ||||
|  | ||||
|  | ||||
| DEFAULT_OUTTMPL = { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 pukkandan
					pukkandan