1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2026-03-04 05:09:40 +00:00

[ie/youtube] Fix tracking of parent comment among replies (#15439)

Fix d22436e5dc

Closes #15438
Authored by: bashonly
This commit is contained in:
bashonly
2025-12-30 14:53:33 -06:00
committed by GitHub
parent 6c918c5071
commit 468aa6a9b4

View File

@@ -2437,7 +2437,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return info return info
def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None): def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None, depth=1):
get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0] get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
@@ -2469,15 +2469,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
break break
return _continuation return _continuation
def extract_thread(contents, entity_payloads): def extract_thread(contents, entity_payloads, thread_parent, thread_depth):
if not parent: if not thread_parent:
tracker['current_page_thread'] = 0 tracker['current_page_thread'] = 0
if max_depth < tracker['current_depth']: if max_depth < thread_depth:
return return
for content in contents: for content in contents:
if not parent and tracker['total_parent_comments'] >= max_parents: if not thread_parent and tracker['total_parent_comments'] >= max_parents:
yield yield
comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer']) comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
@@ -2487,7 +2487,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]], (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
expected_type=dict, default={}) expected_type=dict, default={})
comment = self._extract_comment_old(comment_renderer, parent) comment = self._extract_comment_old(comment_renderer, thread_parent)
# new comment format # new comment format
else: else:
@@ -2498,7 +2498,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not comment_keys: if not comment_keys:
continue continue
entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys) entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
comment = self._extract_comment(entities, parent) comment = self._extract_comment(entities, thread_parent)
if comment: if comment:
comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
@@ -2517,14 +2517,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
continue continue
self.report_warning( self.report_warning(
'Detected YouTube comments looping. Stopping comment extraction ' 'Detected YouTube comments looping. Stopping comment extraction '
f'{"for this thread" if parent else ""} as we probably cannot get any more.') f'{"for this thread" if thread_parent else ""} as we probably cannot get any more.')
yield yield
break # Safeguard for recursive call in subthreads code path below break # Safeguard for recursive call in subthreads code path below
else: else:
tracker['seen_comment_ids'].add(comment['id']) tracker['seen_comment_ids'].add(comment_id)
tracker['running_total'] += 1 tracker['running_total'] += 1
tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1 tracker['total_reply_comments' if thread_parent else 'total_parent_comments'] += 1
yield comment yield comment
# Attempt to get the replies # Attempt to get the replies
@@ -2536,24 +2536,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'subThreads', lambda _, v: v['commentThreadRenderer'])) 'subThreads', lambda _, v: v['commentThreadRenderer']))
# Recursively extract from `commentThreadRenderer`s in `subThreads` # Recursively extract from `commentThreadRenderer`s in `subThreads`
if subthreads: if subthreads:
tracker['current_depth'] += 1 for entry in extract_thread(subthreads, entity_payloads, comment_id, thread_depth + 1):
for entry in extract_thread(subthreads, entity_payloads):
if entry: if entry:
yield entry yield entry
tracker['current_depth'] -= 1
# All of the subThreads' `continuationItemRenderer`s were within the nested # All of the subThreads' `continuationItemRenderer`s were within the nested
# `commentThreadRenderer`s and are now exhausted, so avoid unnecessary recursion below # `commentThreadRenderer`s and are now exhausted, so avoid unnecessary recursion below
continue continue
tracker['current_page_thread'] += 1 tracker['current_page_thread'] += 1
tracker['current_depth'] += 1
# Recursively extract from `continuationItemRenderer`s in `subThreads` # Recursively extract from `continuationItemRenderer`s in `subThreads`
comment_entries_iter = self._comment_entries( comment_entries_iter = self._comment_entries(
comment_replies_renderer, ytcfg, video_id, comment_replies_renderer, ytcfg, video_id,
parent=comment_id, tracker=tracker) parent=comment_id, tracker=tracker, depth=thread_depth + 1)
yield from itertools.islice(comment_entries_iter, min( yield from itertools.islice(comment_entries_iter, min(
max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))) max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
tracker['current_depth'] -= 1
# Keeps track of counts across recursive calls # Keeps track of counts across recursive calls
if not tracker: if not tracker:
@@ -2565,13 +2561,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'total_reply_comments': 0, 'total_reply_comments': 0,
'seen_comment_ids': set(), 'seen_comment_ids': set(),
'pinned_comment_ids': set(), 'pinned_comment_ids': set(),
'current_depth': 1,
} }
_max_comments, max_parents, max_replies, max_replies_per_thread, max_depth, *_ = ( _max_comments, max_parents, max_replies, max_replies_per_thread, max_depth, *_ = (
int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 5) int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 5)
if max_depth < tracker['current_depth']: if max_depth < depth:
return return
continuation = self._extract_continuation(root_continuation_data) continuation = self._extract_continuation(root_continuation_data)
@@ -2645,7 +2640,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
break break
continue continue
for entry in extract_thread(continuation_items, mutations): for entry in extract_thread(continuation_items, mutations, parent, depth):
if not entry: if not entry:
return return
yield entry yield entry