mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[extractor/youtube] Extract more metadata for comments (#7179)
Adds new comment fields: * `author_url` - The url to the comment author's page * `author_is_verified` - Whether the author is verified on the platform * `is_pinned` - Whether the comment is pinned to the top of the comments Closes https://github.com/yt-dlp/yt-dlp/issues/5411 Authored by: coletdjnz
This commit is contained in:
		| @@ -314,6 +314,11 @@ class InfoExtractor: | |||||||
|                         * "author" - human-readable name of the comment author |                         * "author" - human-readable name of the comment author | ||||||
|                         * "author_id" - user ID of the comment author |                         * "author_id" - user ID of the comment author | ||||||
|                         * "author_thumbnail" - The thumbnail of the comment author |                         * "author_thumbnail" - The thumbnail of the comment author | ||||||
|  |                         * "author_url" - The url to the comment author's page | ||||||
|  |                         * "author_is_verified" - Whether the author is verified | ||||||
|  |                                                  on the platform | ||||||
|  |                         * "author_is_uploader" - Whether the comment is made by | ||||||
|  |                                                  the video uploader | ||||||
|                         * "id" - Comment ID |                         * "id" - Comment ID | ||||||
|                         * "html" - Comment as HTML |                         * "html" - Comment as HTML | ||||||
|                         * "text" - Plain text of the comment |                         * "text" - Plain text of the comment | ||||||
| @@ -325,8 +330,8 @@ class InfoExtractor: | |||||||
|                         * "dislike_count" - Number of negative ratings of the comment |                         * "dislike_count" - Number of negative ratings of the comment | ||||||
|                         * "is_favorited" - Whether the comment is marked as |                         * "is_favorited" - Whether the comment is marked as | ||||||
|                                            favorite by the video uploader |                                            favorite by the video uploader | ||||||
|                         * "author_is_uploader" - Whether the comment is made by |                         * "is_pinned" - Whether the comment is pinned to | ||||||
|                                                  the video uploader |                                         the top of the comments | ||||||
|     age_limit:      Age restriction for the video, as an integer (years) |     age_limit:      Age restriction for the video, as an integer (years) | ||||||
|     webpage_url:    The URL to the video webpage, if given to yt-dlp it |     webpage_url:    The URL to the video webpage, if given to yt-dlp it | ||||||
|                     should allow to get the same result again. (It will be set |                     should allow to get the same result again. (It will be set | ||||||
|   | |||||||
| @@ -3271,37 +3271,50 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||||||
|         if not comment_id: |         if not comment_id: | ||||||
|             return |             return | ||||||
| 
 | 
 | ||||||
|         text = self._get_text(comment_renderer, 'contentText') |         info = { | ||||||
|  |             'id': comment_id, | ||||||
|  |             'text': self._get_text(comment_renderer, 'contentText'), | ||||||
|  |             'like_count': self._get_count(comment_renderer, 'voteCount'), | ||||||
|  |             'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})), | ||||||
|  |             'author': self._get_text(comment_renderer, 'authorText'), | ||||||
|  |             'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})), | ||||||
|  |             'parent': parent or 'root', | ||||||
|  |         } | ||||||
| 
 | 
 | ||||||
|         # Timestamp is an estimate calculated from the current time and time_text |         # Timestamp is an estimate calculated from the current time and time_text | ||||||
|         time_text = self._get_text(comment_renderer, 'publishedTimeText') or '' |         time_text = self._get_text(comment_renderer, 'publishedTimeText') or '' | ||||||
|         timestamp = self._parse_time_text(time_text) |         timestamp = self._parse_time_text(time_text) | ||||||
| 
 | 
 | ||||||
|         author = self._get_text(comment_renderer, 'authorText') |         info.update({ | ||||||
|         author_id = try_get(comment_renderer, |             # FIXME: non-standard, but we need a way of showing that it is an estimate. | ||||||
|                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str) |             '_time_text': time_text, | ||||||
| 
 |  | ||||||
|         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'], |  | ||||||
|                                                        lambda x: x['likeCount']), str)) or 0 |  | ||||||
|         author_thumbnail = try_get(comment_renderer, |  | ||||||
|                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str) |  | ||||||
| 
 |  | ||||||
|         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool) |  | ||||||
|         is_favorited = 'creatorHeart' in (try_get( |  | ||||||
|             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {}) |  | ||||||
|         return { |  | ||||||
|             'id': comment_id, |  | ||||||
|             'text': text, |  | ||||||
|             'timestamp': timestamp, |             'timestamp': timestamp, | ||||||
|             'time_text': time_text, |         }) | ||||||
|             'like_count': votes, | 
 | ||||||
|             'is_favorited': is_favorited, |         info['author_url'] = urljoin( | ||||||
|             'author': author, |             'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', ( | ||||||
|             'author_id': author_id, |                 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))), | ||||||
|             'author_thumbnail': author_thumbnail, |                 expected_type=str, get_all=False)) | ||||||
|             'author_is_uploader': author_is_uploader, | 
 | ||||||
|             'parent': parent or 'root' |         author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner') | ||||||
|         } |         if author_is_uploader is not None: | ||||||
|  |             info['author_is_uploader'] = author_is_uploader | ||||||
|  | 
 | ||||||
|  |         comment_abr = traverse_obj( | ||||||
|  |             comment_renderer, ('actionsButtons', 'commentActionButtonsRenderer'), expected_type=dict) | ||||||
|  |         if comment_abr is not None: | ||||||
|  |             info['is_favorited'] = 'creatorHeart' in comment_abr | ||||||
|  | 
 | ||||||
|  |         comment_ab_icontype = traverse_obj( | ||||||
|  |             comment_renderer, ('authorCommentBadge', 'authorCommentBadgeRenderer', 'icon', 'iconType')) | ||||||
|  |         if comment_ab_icontype is not None: | ||||||
|  |             info['author_is_verified'] = comment_ab_icontype in ('CHECK_CIRCLE_THICK', 'OFFICIAL_ARTIST_BADGE') | ||||||
|  | 
 | ||||||
|  |         is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge') | ||||||
|  |         if is_pinned: | ||||||
|  |             info['is_pinned'] = True | ||||||
|  | 
 | ||||||
|  |         return info | ||||||
| 
 | 
 | ||||||
|     def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None): |     def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None): | ||||||
| 
 | 
 | ||||||
| @@ -3349,14 +3362,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||||||
|                 comment = self._extract_comment(comment_renderer, parent) |                 comment = self._extract_comment(comment_renderer, parent) | ||||||
|                 if not comment: |                 if not comment: | ||||||
|                     continue |                     continue | ||||||
|                 is_pinned = bool(traverse_obj(comment_renderer, 'pinnedCommentBadge')) |  | ||||||
|                 comment_id = comment['id'] |                 comment_id = comment['id'] | ||||||
|                 if is_pinned: |                 if comment.get('is_pinned'): | ||||||
|                     tracker['pinned_comment_ids'].add(comment_id) |                     tracker['pinned_comment_ids'].add(comment_id) | ||||||
|                 # Sometimes YouTube may break and give us infinite looping comments. |                 # Sometimes YouTube may break and give us infinite looping comments. | ||||||
|                 # See: https://github.com/yt-dlp/yt-dlp/issues/6290 |                 # See: https://github.com/yt-dlp/yt-dlp/issues/6290 | ||||||
|                 if comment_id in tracker['seen_comment_ids']: |                 if comment_id in tracker['seen_comment_ids']: | ||||||
|                     if comment_id in tracker['pinned_comment_ids'] and not is_pinned: |                     if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'): | ||||||
|                         # Pinned comments may appear a second time in newest first sort |                         # Pinned comments may appear a second time in newest first sort | ||||||
|                         # See: https://github.com/yt-dlp/yt-dlp/issues/6712 |                         # See: https://github.com/yt-dlp/yt-dlp/issues/6712 | ||||||
|                         continue |                         continue | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 coletdjnz
					coletdjnz