mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[extractor] Generalize getcomments implementation
				
					
				
			This commit is contained in:
		| @@ -2241,7 +2241,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|     def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None): | ||||
|  | ||||
|         def extract_header(contents): | ||||
|             _total_comments = 0 | ||||
|             _continuation = None | ||||
|             for content in contents: | ||||
|                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer']) | ||||
| @@ -2251,7 +2250,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 if expected_comment_count: | ||||
|                     comment_counts[1] = expected_comment_count | ||||
|                     self.to_screen('Downloading ~%d comments' % expected_comment_count) | ||||
|                     _total_comments = comment_counts[1] | ||||
|                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0] | ||||
|                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top | ||||
|  | ||||
| @@ -2271,7 +2269,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first' | ||||
|                 self.to_screen('Sorting comments by %s' % sort_text) | ||||
|                 break | ||||
|             return _total_comments, _continuation | ||||
|             return _continuation | ||||
|  | ||||
|         def extract_thread(contents): | ||||
|             if not parent: | ||||
| @@ -2359,9 +2357,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                          lambda x: x['appendContinuationItemsAction']['continuationItems']), | ||||
|                         list) or [] | ||||
|                     if is_first_continuation: | ||||
|                         total_comments, continuation = extract_header(continuation_items) | ||||
|                         if total_comments: | ||||
|                             yield total_comments | ||||
|                         continuation = extract_header(continuation_items) | ||||
|                         is_first_continuation = False | ||||
|                         if continuation: | ||||
|                             break | ||||
| @@ -2389,9 +2385,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                         continue | ||||
|                     if is_first_continuation: | ||||
|                         header_continuation_items = [continuation_renderer.get('header') or {}] | ||||
|                         total_comments, continuation = extract_header(header_continuation_items) | ||||
|                         if total_comments: | ||||
|                             yield total_comments | ||||
|                         continuation = extract_header(header_continuation_items) | ||||
|                         is_first_continuation = False | ||||
|                         if continuation: | ||||
|                             break | ||||
| @@ -2419,35 +2413,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             [bytes_to_intlist(base64.b64decode(part)) for part in parts])) | ||||
|         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8') | ||||
|  | ||||
|     def _extract_comments(self, ytcfg, video_id, contents, webpage): | ||||
|     def _get_comments(self, ytcfg, video_id, contents, webpage): | ||||
|         """Entry for comment extraction""" | ||||
|         def _real_comment_extract(contents): | ||||
|             yield from self._comment_entries( | ||||
|                 traverse_obj(contents, (..., 'itemSectionRenderer'), get_all=False), ytcfg, video_id) | ||||
|  | ||||
|         comments = [] | ||||
|         estimated_total = 0 | ||||
|         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf') | ||||
|         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) | ||||
|         # Force English regardless of account setting to prevent parsing issues | ||||
|         # See: https://github.com/yt-dlp/yt-dlp/issues/532 | ||||
|         ytcfg = copy.deepcopy(ytcfg) | ||||
|         traverse_obj( | ||||
|             ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en' | ||||
|         try: | ||||
|             for comment in _real_comment_extract(contents): | ||||
|                 if len(comments) >= max_comments: | ||||
|                     break | ||||
|                 if isinstance(comment, int): | ||||
|                     estimated_total = comment | ||||
|                     continue | ||||
|                 comments.append(comment) | ||||
|         except KeyboardInterrupt: | ||||
|             self.to_screen('Interrupted by user') | ||||
|         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total)) | ||||
|         return { | ||||
|             'comments': comments, | ||||
|             'comment_count': len(comments), | ||||
|         } | ||||
|         return itertools.islice(_real_comment_extract(contents), 0, max_comments) | ||||
|  | ||||
|     @staticmethod | ||||
|     def _get_checkok_params(): | ||||
| @@ -3209,8 +3187,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             needs_auth=info['age_limit'] >= 18, | ||||
|             is_unlisted=None if is_private is None else is_unlisted) | ||||
|  | ||||
|         if self.get_param('getcomments', False): | ||||
|             info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage) | ||||
|         info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage) | ||||
|  | ||||
|         self.mark_watched(video_id, player_responses) | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 pukkandan
					pukkandan