mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[extractor/biliIntl] Add comment extraction (#6079)
Authored by: HobbyistDev
This commit is contained in:
		| @@ -26,6 +26,7 @@ from ..utils import ( | ||||
|     srt_subtitles_timecode, | ||||
|     str_or_none, | ||||
|     traverse_obj, | ||||
|     unified_timestamp, | ||||
|     unsmuggle_url, | ||||
|     url_or_none, | ||||
|     urlencode_postdata, | ||||
| @@ -996,6 +997,53 @@ class BiliIntlIE(BiliIntlBaseIE): | ||||
|             'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$', | ||||
|             'upload_date': '20221212', | ||||
|             'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation', | ||||
|         }, | ||||
|     }, { | ||||
|         # episode comment extraction | ||||
|         'url': 'https://www.bilibili.tv/en/play/34580/340317', | ||||
|         'info_dict': { | ||||
|             'id': '340317', | ||||
|             'ext': 'mp4', | ||||
|             'timestamp': 1604057820, | ||||
|             'upload_date': '20201030', | ||||
|             'episode_number': 5, | ||||
|             'title': 'E5 - My Own Steel', | ||||
|             'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2', | ||||
|             'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$', | ||||
|             'episode': 'Episode 5', | ||||
|             'comment_count': int, | ||||
|             'chapters': [{ | ||||
|                 'start_time': 0, | ||||
|                 'end_time': 61.0, | ||||
|                 'title': '<Untitled Chapter 1>' | ||||
|             }, { | ||||
|                 'start_time': 61.0, | ||||
|                 'end_time': 134.0, | ||||
|                 'title': 'Intro' | ||||
|             }, { | ||||
|                 'start_time': 1290.0, | ||||
|                 'end_time': 1379.0, | ||||
|                 'title': 'Outro' | ||||
|             }], | ||||
|         }, | ||||
|         'params': { | ||||
|             'getcomments': True | ||||
|         } | ||||
|     }, { | ||||
|         # user generated content comment extraction | ||||
|         'url': 'https://www.bilibili.tv/en/video/2045730385', | ||||
|         'info_dict': { | ||||
|             'id': '2045730385', | ||||
|             'ext': 'mp4', | ||||
|             'description': 'md5:693b6f3967fb4e7e7764ea817857c33a', | ||||
|             'timestamp': 1667891924, | ||||
|             'upload_date': '20221108', | ||||
|             'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation', | ||||
|             'comment_count': int, | ||||
|             'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg', | ||||
|         }, | ||||
|         'params': { | ||||
|             'getcomments': True | ||||
|         } | ||||
|     }, { | ||||
|         # episode id without intro and outro | ||||
| @@ -1055,11 +1103,69 @@ class BiliIntlIE(BiliIntlBaseIE): | ||||
| 
 | ||||
|         # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found | ||||
|         return merge_dicts( | ||||
|             self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id), { | ||||
|             self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), { | ||||
|                 'title': self._html_search_meta('og:title', webpage), | ||||
|                 'description': self._html_search_meta('og:description', webpage) | ||||
|             }) | ||||
| 
 | ||||
|     def _get_comments_reply(self, root_id, next_id=0, display_id=None): | ||||
|         comment_api_raw_data = self._download_json( | ||||
|             'https://api.bilibili.tv/reply/web/detail', display_id, | ||||
|             note=f'Downloading reply comment of {root_id} - {next_id}', | ||||
|             query={ | ||||
|                 'platform': 'web', | ||||
|                 'ps': 20,  # comment's reply per page (default: 3) | ||||
|                 'root': root_id, | ||||
|                 'next': next_id, | ||||
|             }) | ||||
| 
 | ||||
|         for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)): | ||||
|             yield { | ||||
|                 'author': traverse_obj(replies, ('member', 'name')), | ||||
|                 'author_id': traverse_obj(replies, ('member', 'mid')), | ||||
|                 'author_thumbnail': traverse_obj(replies, ('member', 'face')), | ||||
|                 'text': traverse_obj(replies, ('content', 'message')), | ||||
|                 'id': replies.get('rpid'), | ||||
|                 'like_count': int_or_none(replies.get('like_count')), | ||||
|                 'parent': replies.get('parent'), | ||||
|                 'timestamp': unified_timestamp(replies.get('ctime_text')) | ||||
|             } | ||||
| 
 | ||||
|         if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')): | ||||
|             yield from self._get_comments_reply( | ||||
|                 root_id, comment_api_raw_data['data']['cursor']['next'], display_id) | ||||
| 
 | ||||
|     def _get_comments(self, video_id, ep_id): | ||||
|         for i in itertools.count(0): | ||||
|             comment_api_raw_data = self._download_json( | ||||
|                 'https://api.bilibili.tv/reply/web/root', video_id, | ||||
|                 note=f'Downloading comment page {i + 1}', | ||||
|                 query={ | ||||
|                     'platform': 'web', | ||||
|                     'pn': i,  # page number | ||||
|                     'ps': 20,  # comment per page (default: 20) | ||||
|                     'oid': video_id, | ||||
|                     'type': 3 if ep_id else 1,  # 1: user generated content, 3: series content | ||||
|                     'sort_type': 1,  # 1: best, 2: recent | ||||
|                 }) | ||||
| 
 | ||||
|             for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)): | ||||
|                 yield { | ||||
|                     'author': traverse_obj(replies, ('member', 'name')), | ||||
|                     'author_id': traverse_obj(replies, ('member', 'mid')), | ||||
|                     'author_thumbnail': traverse_obj(replies, ('member', 'face')), | ||||
|                     'text': traverse_obj(replies, ('content', 'message')), | ||||
|                     'id': replies.get('rpid'), | ||||
|                     'like_count': int_or_none(replies.get('like_count')), | ||||
|                     'timestamp': unified_timestamp(replies.get('ctime_text')), | ||||
|                     'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))), | ||||
|                 } | ||||
|                 if replies.get('count'): | ||||
|                     yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id) | ||||
| 
 | ||||
|             if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')): | ||||
|                 break | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid') | ||||
|         video_id = ep_id or aid | ||||
| @@ -1087,7 +1193,8 @@ class BiliIntlIE(BiliIntlBaseIE): | ||||
|             **self._extract_video_metadata(url, video_id, season_id), | ||||
|             'formats': self._get_formats(ep_id=ep_id, aid=aid), | ||||
|             'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid), | ||||
|             'chapters': chapters | ||||
|             'chapters': chapters, | ||||
|             '__post_extractor': self.extract_comments(video_id, ep_id) | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 HobbyistDev
					HobbyistDev