mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[ie/BiliBiliBangumi] Fix extractors (#7337)
- Overhaul BiliBiliBangumi extractor for the site's new API - Add BiliBiliBangumiSeason extractor - Refactor BiliBiliBangumiMedia extractor Closes #6701, Closes #7400 Authored by: GD-Slime
This commit is contained in:
		| @@ -214,6 +214,7 @@ from .bild import BildIE | ||||
| from .bilibili import ( | ||||
|     BiliBiliIE, | ||||
|     BiliBiliBangumiIE, | ||||
|     BiliBiliBangumiSeasonIE, | ||||
|     BiliBiliBangumiMediaIE, | ||||
|     BiliBiliSearchIE, | ||||
|     BilibiliCategoryIE, | ||||
|   | ||||
| @@ -18,6 +18,7 @@ from ..utils import ( | ||||
|     float_or_none, | ||||
|     format_field, | ||||
|     int_or_none, | ||||
|     join_nonempty, | ||||
|     make_archive_id, | ||||
|     merge_dicts, | ||||
|     mimetype2ext, | ||||
| @@ -135,6 +136,17 @@ class BilibiliBaseIE(InfoExtractor): | ||||
|         for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))): | ||||
|             yield from children | ||||
| 
 | ||||
|     def _get_episodes_from_season(self, ss_id, url): | ||||
|         season_info = self._download_json( | ||||
|             'https://api.bilibili.com/pgc/web/season/section', ss_id, | ||||
|             note='Downloading season info', query={'season_id': ss_id}, | ||||
|             headers={'Referer': url, **self.geo_verification_headers()}) | ||||
| 
 | ||||
|         for entry in traverse_obj(season_info, ( | ||||
|                 'result', 'main_section', 'episodes', | ||||
|                 lambda _, v: url_or_none(v['share_url']) and v['id'])): | ||||
|             yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}') | ||||
| 
 | ||||
| 
 | ||||
| class BiliBiliIE(BilibiliBaseIE): | ||||
|     _VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)' | ||||
| @@ -403,76 +415,93 @@ class BiliBiliIE(BilibiliBaseIE): | ||||
| 
 | ||||
| 
 | ||||
| class BiliBiliBangumiIE(BilibiliBaseIE): | ||||
|     _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P<id>(?:ss|ep)\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)' | ||||
| 
 | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.bilibili.com/bangumi/play/ss897', | ||||
|         'url': 'https://www.bilibili.com/bangumi/play/ep267851', | ||||
|         'info_dict': { | ||||
|             'id': 'ss897', | ||||
|             'id': '267851', | ||||
|             'ext': 'mp4', | ||||
|             'series': '神的记事本', | ||||
|             'season': '神的记事本', | ||||
|             'season_id': 897, | ||||
|             'series': '鬼灭之刃', | ||||
|             'series_id': '4358', | ||||
|             'season': '鬼灭之刃', | ||||
|             'season_id': '26801', | ||||
|             'season_number': 1, | ||||
|             'episode': '你与旅行包', | ||||
|             'episode_number': 2, | ||||
|             'title': '神的记事本:第2话 你与旅行包', | ||||
|             'duration': 1428.487, | ||||
|             'timestamp': 1310809380, | ||||
|             'upload_date': '20110716', | ||||
|             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', | ||||
|             'episode': '残酷', | ||||
|             'episode_id': '267851', | ||||
|             'episode_number': 1, | ||||
|             'title': '1 残酷', | ||||
|             'duration': 1425.256, | ||||
|             'timestamp': 1554566400, | ||||
|             'upload_date': '20190406', | ||||
|             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://www.bilibili.com/bangumi/play/ep508406', | ||||
|         'only_matching': True, | ||||
|         'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.' | ||||
|     }] | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         episode_id = video_id[2:] | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
| 
 | ||||
|         if '您所在的地区无法观看本片' in webpage: | ||||
|             raise GeoRestrictedError('This video is restricted') | ||||
|         elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage | ||||
|                 or '正在观看预览,大会员免费看全片' in webpage): | ||||
|         elif '正在观看预览,大会员免费看全片' in webpage: | ||||
|             self.raise_login_required('This video is for premium members only') | ||||
| 
 | ||||
|         play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data'] | ||||
|         headers = {'Referer': url, **self.geo_verification_headers()} | ||||
|         play_info = self._download_json( | ||||
|             'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id, | ||||
|             'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id}, | ||||
|             headers=headers) | ||||
|         premium_only = play_info.get('code') == -10403 | ||||
|         play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {} | ||||
| 
 | ||||
|         formats = self.extract_formats(play_info) | ||||
|         if (not formats and '成为大会员抢先看' in webpage | ||||
|                 and play_info.get('durl') and not play_info.get('dash')): | ||||
|         if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage): | ||||
|             self.raise_login_required('This video is for premium members only') | ||||
| 
 | ||||
|         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) | ||||
|         bangumi_info = self._download_json( | ||||
|             'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details', | ||||
|             query={'ep_id': episode_id}, headers=headers)['result'] | ||||
| 
 | ||||
|         season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id')) | ||||
|         episode_number, episode_info = next(( | ||||
|             (idx, ep) for idx, ep in enumerate(traverse_obj( | ||||
|                 bangumi_info, ('episodes', ..., {dict})), 1) | ||||
|             if str_or_none(ep.get('id')) == episode_id), (1, {})) | ||||
| 
 | ||||
|         season_id = bangumi_info.get('season_id') | ||||
|         season_number = season_id and next(( | ||||
|             idx + 1 for idx, e in enumerate( | ||||
|                 traverse_obj(initial_state, ('mediaInfo', 'seasons', ...))) | ||||
|                 traverse_obj(bangumi_info, ('seasons', ...))) | ||||
|             if e.get('season_id') == season_id | ||||
|         ), None) | ||||
| 
 | ||||
|         aid = episode_info.get('aid') | ||||
| 
 | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'title': traverse_obj(initial_state, 'h1Title'), | ||||
|             'episode': traverse_obj(initial_state, ('epInfo', 'long_title')), | ||||
|             'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))), | ||||
|             'series': traverse_obj(initial_state, ('mediaInfo', 'series')), | ||||
|             'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')), | ||||
|             'season_id': season_id, | ||||
|             **traverse_obj(bangumi_info, { | ||||
|                 'series': ('series', 'series_title', {str}), | ||||
|                 'series_id': ('series', 'series_id', {str_or_none}), | ||||
|                 'thumbnail': ('square_cover', {url_or_none}), | ||||
|             }), | ||||
|             'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info), | ||||
|             'episode': episode_info.get('long_title'), | ||||
|             'episode_id': episode_id, | ||||
|             'episode_number': int_or_none(episode_info.get('title')) or episode_number, | ||||
|             'season_id': str_or_none(season_id), | ||||
|             'season_number': season_number, | ||||
|             'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')), | ||||
|             'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')), | ||||
|             'timestamp': int_or_none(episode_info.get('pub_time')), | ||||
|             'duration': float_or_none(play_info.get('timelength'), scale=1000), | ||||
|             'subtitles': self.extract_subtitles( | ||||
|                 video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))), | ||||
|             '__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))), | ||||
|             'http_headers': {'Referer': url, **self.geo_verification_headers()}, | ||||
|             'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')), | ||||
|             '__post_extractor': self.extract_comments(aid), | ||||
|             'http_headers': headers, | ||||
|         } | ||||
| 
 | ||||
| 
 | ||||
| class BiliBiliBangumiMediaIE(InfoExtractor): | ||||
| class BiliBiliBangumiMediaIE(BilibiliBaseIE): | ||||
|     _VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.bilibili.com/bangumi/media/md24097891', | ||||
| @@ -485,16 +514,26 @@ class BiliBiliBangumiMediaIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         media_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, media_id) | ||||
|         ss_id = self._search_json( | ||||
|             r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id'] | ||||
| 
 | ||||
|         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id) | ||||
|         episode_list = self._download_json( | ||||
|             'https://api.bilibili.com/pgc/web/season/section', media_id, | ||||
|             query={'season_id': initial_state['mediaInfo']['season_id']}, | ||||
|             note='Downloading season info')['result']['main_section']['episodes'] | ||||
|         return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id) | ||||
| 
 | ||||
|         return self.playlist_result(( | ||||
|             self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid']) | ||||
|             for entry in episode_list), media_id) | ||||
| 
 | ||||
| class BiliBiliBangumiSeasonIE(BilibiliBaseIE): | ||||
|     _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.bilibili.com/bangumi/play/ss26801', | ||||
|         'info_dict': { | ||||
|             'id': '26801' | ||||
|         }, | ||||
|         'playlist_mincount': 26 | ||||
|     }] | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         ss_id = self._match_id(url) | ||||
| 
 | ||||
|         return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id) | ||||
| 
 | ||||
| 
 | ||||
| class BilibiliSpaceBaseIE(InfoExtractor): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 GD-Slime
					GD-Slime