1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2026-01-09 08:21:24 +00:00

[ie/youtube] Fix tracking of parent comment among replies (#15439)

Fix d22436e5dc

Closes #15438
Authored by: bashonly
This commit is contained in:
bashonly
2025-12-30 14:53:33 -06:00
committed by GitHub
parent 6c918c5071
commit 468aa6a9b4

View File

@@ -2437,7 +2437,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return info
def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None, depth=1):
get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
@@ -2469,15 +2469,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
break
return _continuation
def extract_thread(contents, entity_payloads):
if not parent:
def extract_thread(contents, entity_payloads, thread_parent, thread_depth):
if not thread_parent:
tracker['current_page_thread'] = 0
if max_depth < tracker['current_depth']:
if max_depth < thread_depth:
return
for content in contents:
if not parent and tracker['total_parent_comments'] >= max_parents:
if not thread_parent and tracker['total_parent_comments'] >= max_parents:
yield
comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
@@ -2487,7 +2487,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
expected_type=dict, default={})
comment = self._extract_comment_old(comment_renderer, parent)
comment = self._extract_comment_old(comment_renderer, thread_parent)
# new comment format
else:
@@ -2498,7 +2498,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not comment_keys:
continue
entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
comment = self._extract_comment(entities, parent)
comment = self._extract_comment(entities, thread_parent)
if comment:
comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
@@ -2517,14 +2517,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
continue
self.report_warning(
'Detected YouTube comments looping. Stopping comment extraction '
f'{"for this thread" if parent else ""} as we probably cannot get any more.')
f'{"for this thread" if thread_parent else ""} as we probably cannot get any more.')
yield
break # Safeguard for recursive call in subthreads code path below
else:
tracker['seen_comment_ids'].add(comment['id'])
tracker['seen_comment_ids'].add(comment_id)
tracker['running_total'] += 1
tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
tracker['total_reply_comments' if thread_parent else 'total_parent_comments'] += 1
yield comment
# Attempt to get the replies
@@ -2536,24 +2536,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'subThreads', lambda _, v: v['commentThreadRenderer']))
# Recursively extract from `commentThreadRenderer`s in `subThreads`
if subthreads:
tracker['current_depth'] += 1
for entry in extract_thread(subthreads, entity_payloads):
for entry in extract_thread(subthreads, entity_payloads, comment_id, thread_depth + 1):
if entry:
yield entry
tracker['current_depth'] -= 1
# All of the subThreads' `continuationItemRenderer`s were within the nested
# `commentThreadRenderer`s and are now exhausted, so avoid unnecessary recursion below
continue
tracker['current_page_thread'] += 1
tracker['current_depth'] += 1
# Recursively extract from `continuationItemRenderer`s in `subThreads`
comment_entries_iter = self._comment_entries(
comment_replies_renderer, ytcfg, video_id,
parent=comment_id, tracker=tracker)
parent=comment_id, tracker=tracker, depth=thread_depth + 1)
yield from itertools.islice(comment_entries_iter, min(
max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
tracker['current_depth'] -= 1
# Keeps track of counts across recursive calls
if not tracker:
@@ -2565,13 +2561,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'total_reply_comments': 0,
'seen_comment_ids': set(),
'pinned_comment_ids': set(),
'current_depth': 1,
}
_max_comments, max_parents, max_replies, max_replies_per_thread, max_depth, *_ = (
int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 5)
if max_depth < tracker['current_depth']:
if max_depth < depth:
return
continuation = self._extract_continuation(root_continuation_data)
@@ -2645,7 +2640,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
break
continue
for entry in extract_thread(continuation_items, mutations):
for entry in extract_thread(continuation_items, mutations, parent, depth):
if not entry:
return
yield entry