mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	Improve --download-sections
				
					
				
			* Support negative time-ranges * Add `*from-url` to obey time-ranges in URL Closes #7248
This commit is contained in:
		
							
								
								
									
										14
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								README.md
									
									
									
									
									
								
							| @@ -610,12 +610,14 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git | ||||
|     --no-hls-use-mpegts             Do not use the mpegts container for HLS | ||||
|                                     videos. This is default when not downloading | ||||
|                                     live streams | ||||
|     --download-sections REGEX       Download only chapters whose title matches | ||||
|                                     the given regular expression. Time ranges | ||||
|                                     prefixed by a "*" can also be used in place | ||||
|                                     of chapters to download the specified range. | ||||
|                                     Needs ffmpeg. This option can be used | ||||
|                                     multiple times to download multiple | ||||
|     --download-sections REGEX       Download only chapters that match the | ||||
|                                     regular expression. A "*" prefix denotes | ||||
|                                     time-range instead of chapter. Negative | ||||
|                                     timestamps are calculated from the end. | ||||
|                                     "*from-url" can be used to download between | ||||
|                                     the "start_time" and "end_time" extracted | ||||
|                                     from the URL. Needs ffmpeg. This option can | ||||
|                                     be used multiple times to download multiple | ||||
|                                     sections, e.g. --download-sections | ||||
|                                     "*10:15-inf" --download-sections "intro" | ||||
|     --downloader [PROTO:]NAME       Name or path of the external downloader to | ||||
|   | ||||
| @@ -2806,11 +2806,13 @@ class YoutubeDL: | ||||
|                 new_info.update(fmt) | ||||
|                 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf') | ||||
|                 end_time = offset + min(chapter.get('end_time', duration), duration) | ||||
|                 # duration may not be accurate. So allow deviations <1sec | ||||
|                 if end_time == float('inf') or end_time > offset + duration + 1: | ||||
|                     end_time = None | ||||
|                 if chapter or offset: | ||||
|                     new_info.update({ | ||||
|                         'section_start': offset + chapter.get('start_time', 0), | ||||
|                         # duration may not be accurate. So allow deviations <1sec | ||||
|                         'section_end': end_time if end_time <= offset + duration + 1 else None, | ||||
|                         'section_end': end_time, | ||||
|                         'section_title': chapter.get('title'), | ||||
|                         'section_number': chapter.get('index'), | ||||
|                     }) | ||||
|   | ||||
| @@ -320,26 +320,49 @@ def validate_options(opts): | ||||
|         opts.skip_download = None | ||||
|         del opts.outtmpl['default'] | ||||
| 
 | ||||
|     def parse_chapters(name, value): | ||||
|         chapters, ranges = [], [] | ||||
|     def parse_chapters(name, value, advanced=False): | ||||
|         parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x) | ||||
|         TIMESTAMP_RE = r'''(?x)(?: | ||||
|             (?P<start_sign>-?)(?P<start>[^-]+) | ||||
|         )?\s*-\s*(?: | ||||
|             (?P<end_sign>-?)(?P<end>[^-]+) | ||||
|         )?''' | ||||
| 
 | ||||
|         chapters, ranges, from_url = [], [], False | ||||
|         for regex in value or []: | ||||
|             if regex.startswith('*'): | ||||
|                 for range_ in map(str.strip, regex[1:].split(',')): | ||||
|                     mobj = range_ != '-' and re.fullmatch(r'([^-]+)?\s*-\s*([^-]+)?', range_) | ||||
|                     dur = mobj and (parse_timestamp(mobj.group(1) or '0'), parse_timestamp(mobj.group(2) or 'inf')) | ||||
|                     if None in (dur or [None]): | ||||
|                         raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "*start-end"') | ||||
|                     ranges.append(dur) | ||||
|             if advanced and regex == '*from-url': | ||||
|                 from_url = True | ||||
|                 continue | ||||
|             elif not regex.startswith('*'): | ||||
|                 try: | ||||
|                     chapters.append(re.compile(regex)) | ||||
|                 except re.error as err: | ||||
|                     raise ValueError(f'invalid {name} regex "{regex}" - {err}') | ||||
|         return chapters, ranges | ||||
|                 continue | ||||
| 
 | ||||
|     opts.remove_chapters, opts.remove_ranges = parse_chapters('--remove-chapters', opts.remove_chapters) | ||||
|     opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges)) | ||||
|             for range_ in map(str.strip, regex[1:].split(',')): | ||||
|                 mobj = range_ != '-' and re.fullmatch(TIMESTAMP_RE, range_) | ||||
|                 dur = mobj and [parse_timestamp(mobj.group('start') or '0'), parse_timestamp(mobj.group('end') or 'inf')] | ||||
|                 signs = mobj and (mobj.group('start_sign'), mobj.group('end_sign')) | ||||
| 
 | ||||
|                 err = None | ||||
|                 if None in (dur or [None]): | ||||
|                     err = 'Must be of the form "*start-end"' | ||||
|                 elif not advanced and any(signs): | ||||
|                     err = 'Negative timestamps are not allowed' | ||||
|                 else: | ||||
|                     dur[0] *= -1 if signs[0] else 1 | ||||
|                     dur[1] *= -1 if signs[1] else 1 | ||||
|                     if dur[1] == float('-inf'): | ||||
|                         err = '"-inf" is not a valid end' | ||||
|                 if err: | ||||
|                     raise ValueError(f'invalid {name} time range "{regex}". {err}') | ||||
|                 ranges.append(dur) | ||||
| 
 | ||||
|         return chapters, ranges, from_url | ||||
| 
 | ||||
|     opts.remove_chapters, opts.remove_ranges, _ = parse_chapters('--remove-chapters', opts.remove_chapters) | ||||
|     opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges, True)) | ||||
| 
 | ||||
|     # Cookies from browser | ||||
|     if opts.cookiesfrombrowser: | ||||
|   | ||||
| @@ -1012,8 +1012,9 @@ def create_parser(): | ||||
|         '--download-sections', | ||||
|         metavar='REGEX', dest='download_ranges', action='append', | ||||
|         help=( | ||||
|             'Download only chapters whose title matches the given regular expression. ' | ||||
|             'Time ranges prefixed by a "*" can also be used in place of chapters to download the specified range. ' | ||||
|             'Download only chapters that match the regular expression. ' | ||||
|             'A "*" prefix denotes time-range instead of chapter. Negative timestamps are calculated from the end. ' | ||||
|             '"*from-url" can be used to download between the "start_time" and "end_time" extracted from the URL. ' | ||||
|             'Needs ffmpeg. This option can be used multiple times to download multiple sections, ' | ||||
|             'e.g. --download-sections "*10:15-inf" --download-sections "intro"')) | ||||
|     downloader.add_option( | ||||
|   | ||||
| @@ -3753,11 +3753,11 @@ def match_filter_func(filters, breaking_filters=None): | ||||
| 
 | ||||
| 
 | ||||
| class download_range_func: | ||||
|     def __init__(self, chapters, ranges): | ||||
|         self.chapters, self.ranges = chapters, ranges | ||||
|     def __init__(self, chapters, ranges, from_info=False): | ||||
|         self.chapters, self.ranges, self.from_info = chapters, ranges, from_info | ||||
| 
 | ||||
|     def __call__(self, info_dict, ydl): | ||||
|         if not self.ranges and not self.chapters: | ||||
|         if not any((self.ranges, self.chapters, self.from_info)): | ||||
|             yield {} | ||||
| 
 | ||||
|         warning = ('There are no chapters matching the regex' if info_dict.get('chapters') | ||||
| @@ -3770,7 +3770,21 @@ class download_range_func: | ||||
|         if self.chapters and warning: | ||||
|             ydl.to_screen(f'[info] {info_dict["id"]}: {warning}') | ||||
| 
 | ||||
|         yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or []) | ||||
|         for start, end in self.ranges or []: | ||||
|             yield { | ||||
|                 'start_time': self._handle_negative_timestamp(start, info_dict), | ||||
|                 'end_time': self._handle_negative_timestamp(end, info_dict), | ||||
|             } | ||||
| 
 | ||||
|         if self.from_info and (info_dict.get('start_time') or info_dict.get('end_time')): | ||||
|             yield { | ||||
|                 'start_time': info_dict.get('start_time'), | ||||
|                 'end_time': info_dict.get('end_time'), | ||||
|             } | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def _handle_negative_timestamp(time, info): | ||||
|         return max(info['duration'] + time, 0) if info.get('duration') and time < 0 else time | ||||
| 
 | ||||
|     def __eq__(self, other): | ||||
|         return (isinstance(other, download_range_func) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 pukkandan
					pukkandan