mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-11-01 07:05:14 +00:00
[extractor/rutube] Extract chapters from description (#6345)
Authored by: mushbite
This commit is contained in:
@@ -3205,11 +3205,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
|
||||
), expected_type=list)
|
||||
|
||||
return self._extract_chapters(
|
||||
return self._extract_chapters_helper(
|
||||
chapter_list,
|
||||
chapter_time=lambda chapter: float_or_none(
|
||||
start_function=lambda chapter: float_or_none(
|
||||
traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
|
||||
chapter_title=lambda chapter: traverse_obj(
|
||||
title_function=lambda chapter: traverse_obj(
|
||||
chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
|
||||
duration=duration)
|
||||
|
||||
@@ -3222,42 +3222,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
chapter_title = lambda chapter: self._get_text(chapter, 'title')
|
||||
|
||||
return next(filter(None, (
|
||||
self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
|
||||
chapter_time, chapter_title, duration)
|
||||
self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
|
||||
chapter_time, chapter_title, duration)
|
||||
for contents in content_list)), [])
|
||||
|
||||
def _extract_chapters_from_description(self, description, duration):
|
||||
duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'
|
||||
sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'
|
||||
return self._extract_chapters(
|
||||
re.findall(sep_re % (duration_re, r'.+?'), description or ''),
|
||||
chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
|
||||
duration=duration, strict=False) or self._extract_chapters(
|
||||
re.findall(sep_re % (r'.+?', duration_re), description or ''),
|
||||
chapter_time=lambda x: parse_duration(x[1]), chapter_title=lambda x: x[0],
|
||||
duration=duration, strict=False)
|
||||
|
||||
def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
|
||||
if not duration:
|
||||
return
|
||||
chapter_list = [{
|
||||
'start_time': chapter_time(chapter),
|
||||
'title': chapter_title(chapter),
|
||||
} for chapter in chapter_list or []]
|
||||
if not strict:
|
||||
chapter_list.sort(key=lambda c: c['start_time'] or 0)
|
||||
|
||||
chapters = [{'start_time': 0}]
|
||||
for idx, chapter in enumerate(chapter_list):
|
||||
if chapter['start_time'] is None:
|
||||
self.report_warning(f'Incomplete chapter {idx}')
|
||||
elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
|
||||
chapters.append(chapter)
|
||||
elif chapter not in chapters:
|
||||
self.report_warning(
|
||||
f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
|
||||
return chapters[1:]
|
||||
|
||||
def _extract_comment(self, comment_renderer, parent=None):
|
||||
comment_id = comment_renderer.get('commentId')
|
||||
if not comment_id:
|
||||
|
||||
Reference in New Issue
Block a user