mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 22:55:18 +00:00 
			
		
		
		
	[youtube] Extract chapters
This commit is contained in:
		| @@ -1257,6 +1257,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|         url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id | ||||
|         return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_chapters(description, duration): | ||||
|         if not description: | ||||
|             return None | ||||
|         chapter_lines = re.findall( | ||||
|             r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)', | ||||
|             description) | ||||
|         if not chapter_lines: | ||||
|             return None | ||||
|         chapters = [] | ||||
|         for next_num, (chapter_line, time_point) in enumerate( | ||||
|                 chapter_lines, start=1): | ||||
|             start_time = parse_duration(time_point) | ||||
|             if start_time is None: | ||||
|                 continue | ||||
|             end_time = (duration if next_num == len(chapter_lines) | ||||
|                         else parse_duration(chapter_lines[next_num][1])) | ||||
|             if end_time is None: | ||||
|                 continue | ||||
|             chapter_title = re.sub( | ||||
|                 r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-') | ||||
|             chapter_title = re.sub(r'\s+', ' ', chapter_title) | ||||
|             chapters.append({ | ||||
|                 'start_time': start_time, | ||||
|                 'end_time': end_time, | ||||
|                 'title': chapter_title, | ||||
|             }) | ||||
|         return chapters | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|  | ||||
| @@ -1399,9 +1428,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             video_title = '_' | ||||
|  | ||||
|         # description | ||||
|         video_description = get_element_by_id("eow-description", video_webpage) | ||||
|         description_original = video_description = get_element_by_id("eow-description", video_webpage) | ||||
|         if video_description: | ||||
|             video_description = re.sub(r'''(?x) | ||||
|             description_original = video_description = re.sub(r'''(?x) | ||||
|                 <a\s+ | ||||
|                     (?:[a-zA-Z-]+="[^"]*"\s+)*? | ||||
|                     (?:title|href)="([^"]+)"\s+ | ||||
| @@ -1558,6 +1587,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|         if self._downloader.params.get('writeannotations', False): | ||||
|             video_annotations = self._extract_annotations(video_id) | ||||
|  | ||||
|         chapters = self._extract_chapters(description_original, video_duration) | ||||
|  | ||||
|         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): | ||||
|             self.report_rtmp_download() | ||||
|             formats = [{ | ||||
| @@ -1790,6 +1821,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             'duration': video_duration, | ||||
|             'age_limit': 18 if age_gate else 0, | ||||
|             'annotations': video_annotations, | ||||
|             'chapters': chapters, | ||||
|             'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id, | ||||
|             'view_count': view_count, | ||||
|             'like_count': like_count, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․