mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[ie/BiliBiliBangumi] Fix extractors (#7337)
- Overhaul BiliBiliBangumi extractor for the site's new API - Add BiliBiliBangumiSeason extractor - Refactor BiliBiliBangumiMedia extractor Closes #6701, Closes #7400 Authored by: GD-Slime
This commit is contained in:
		| @@ -214,6 +214,7 @@ from .bild import BildIE | |||||||
| from .bilibili import ( | from .bilibili import ( | ||||||
|     BiliBiliIE, |     BiliBiliIE, | ||||||
|     BiliBiliBangumiIE, |     BiliBiliBangumiIE, | ||||||
|  |     BiliBiliBangumiSeasonIE, | ||||||
|     BiliBiliBangumiMediaIE, |     BiliBiliBangumiMediaIE, | ||||||
|     BiliBiliSearchIE, |     BiliBiliSearchIE, | ||||||
|     BilibiliCategoryIE, |     BilibiliCategoryIE, | ||||||
|   | |||||||
| @@ -18,6 +18,7 @@ from ..utils import ( | |||||||
|     float_or_none, |     float_or_none, | ||||||
|     format_field, |     format_field, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|  |     join_nonempty, | ||||||
|     make_archive_id, |     make_archive_id, | ||||||
|     merge_dicts, |     merge_dicts, | ||||||
|     mimetype2ext, |     mimetype2ext, | ||||||
| @@ -135,6 +136,17 @@ class BilibiliBaseIE(InfoExtractor): | |||||||
|         for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))): |         for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))): | ||||||
|             yield from children |             yield from children | ||||||
| 
 | 
 | ||||||
|  |     def _get_episodes_from_season(self, ss_id, url): | ||||||
|  |         season_info = self._download_json( | ||||||
|  |             'https://api.bilibili.com/pgc/web/season/section', ss_id, | ||||||
|  |             note='Downloading season info', query={'season_id': ss_id}, | ||||||
|  |             headers={'Referer': url, **self.geo_verification_headers()}) | ||||||
|  | 
 | ||||||
|  |         for entry in traverse_obj(season_info, ( | ||||||
|  |                 'result', 'main_section', 'episodes', | ||||||
|  |                 lambda _, v: url_or_none(v['share_url']) and v['id'])): | ||||||
|  |             yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}') | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| class BiliBiliIE(BilibiliBaseIE): | class BiliBiliIE(BilibiliBaseIE): | ||||||
|     _VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)' |     _VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)' | ||||||
| @@ -403,76 +415,93 @@ class BiliBiliIE(BilibiliBaseIE): | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class BiliBiliBangumiIE(BilibiliBaseIE): | class BiliBiliBangumiIE(BilibiliBaseIE): | ||||||
|     _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P<id>(?:ss|ep)\d+)' |     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)' | ||||||
| 
 | 
 | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://www.bilibili.com/bangumi/play/ss897', |         'url': 'https://www.bilibili.com/bangumi/play/ep267851', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': 'ss897', |             'id': '267851', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'series': '神的记事本', |             'series': '鬼灭之刃', | ||||||
|             'season': '神的记事本', |             'series_id': '4358', | ||||||
|             'season_id': 897, |             'season': '鬼灭之刃', | ||||||
|  |             'season_id': '26801', | ||||||
|             'season_number': 1, |             'season_number': 1, | ||||||
|             'episode': '你与旅行包', |             'episode': '残酷', | ||||||
|             'episode_number': 2, |             'episode_id': '267851', | ||||||
|             'title': '神的记事本:第2话 你与旅行包', |             'episode_number': 1, | ||||||
|             'duration': 1428.487, |             'title': '1 残酷', | ||||||
|             'timestamp': 1310809380, |             'duration': 1425.256, | ||||||
|             'upload_date': '20110716', |             'timestamp': 1554566400, | ||||||
|             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', |             'upload_date': '20190406', | ||||||
|  |             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' | ||||||
|         }, |         }, | ||||||
|     }, { |         'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.' | ||||||
|         'url': 'https://www.bilibili.com/bangumi/play/ep508406', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }] |     }] | ||||||
| 
 | 
 | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|  |         episode_id = video_id[2:] | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
| 
 | 
 | ||||||
|         if '您所在的地区无法观看本片' in webpage: |         if '您所在的地区无法观看本片' in webpage: | ||||||
|             raise GeoRestrictedError('This video is restricted') |             raise GeoRestrictedError('This video is restricted') | ||||||
|         elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage |         elif '正在观看预览,大会员免费看全片' in webpage: | ||||||
|                 or '正在观看预览,大会员免费看全片' in webpage): |  | ||||||
|             self.raise_login_required('This video is for premium members only') |             self.raise_login_required('This video is for premium members only') | ||||||
| 
 | 
 | ||||||
|         play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data'] |         headers = {'Referer': url, **self.geo_verification_headers()} | ||||||
|  |         play_info = self._download_json( | ||||||
|  |             'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id, | ||||||
|  |             'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id}, | ||||||
|  |             headers=headers) | ||||||
|  |         premium_only = play_info.get('code') == -10403 | ||||||
|  |         play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {} | ||||||
|  | 
 | ||||||
|         formats = self.extract_formats(play_info) |         formats = self.extract_formats(play_info) | ||||||
|         if (not formats and '成为大会员抢先看' in webpage |         if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage): | ||||||
|                 and play_info.get('durl') and not play_info.get('dash')): |  | ||||||
|             self.raise_login_required('This video is for premium members only') |             self.raise_login_required('This video is for premium members only') | ||||||
| 
 | 
 | ||||||
|         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) |         bangumi_info = self._download_json( | ||||||
|  |             'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details', | ||||||
|  |             query={'ep_id': episode_id}, headers=headers)['result'] | ||||||
| 
 | 
 | ||||||
|         season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id')) |         episode_number, episode_info = next(( | ||||||
|  |             (idx, ep) for idx, ep in enumerate(traverse_obj( | ||||||
|  |                 bangumi_info, ('episodes', ..., {dict})), 1) | ||||||
|  |             if str_or_none(ep.get('id')) == episode_id), (1, {})) | ||||||
|  | 
 | ||||||
|  |         season_id = bangumi_info.get('season_id') | ||||||
|         season_number = season_id and next(( |         season_number = season_id and next(( | ||||||
|             idx + 1 for idx, e in enumerate( |             idx + 1 for idx, e in enumerate( | ||||||
|                 traverse_obj(initial_state, ('mediaInfo', 'seasons', ...))) |                 traverse_obj(bangumi_info, ('seasons', ...))) | ||||||
|             if e.get('season_id') == season_id |             if e.get('season_id') == season_id | ||||||
|         ), None) |         ), None) | ||||||
| 
 | 
 | ||||||
|  |         aid = episode_info.get('aid') | ||||||
|  | 
 | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|             'title': traverse_obj(initial_state, 'h1Title'), |             **traverse_obj(bangumi_info, { | ||||||
|             'episode': traverse_obj(initial_state, ('epInfo', 'long_title')), |                 'series': ('series', 'series_title', {str}), | ||||||
|             'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))), |                 'series_id': ('series', 'series_id', {str_or_none}), | ||||||
|             'series': traverse_obj(initial_state, ('mediaInfo', 'series')), |                 'thumbnail': ('square_cover', {url_or_none}), | ||||||
|             'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')), |             }), | ||||||
|             'season_id': season_id, |             'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info), | ||||||
|  |             'episode': episode_info.get('long_title'), | ||||||
|  |             'episode_id': episode_id, | ||||||
|  |             'episode_number': int_or_none(episode_info.get('title')) or episode_number, | ||||||
|  |             'season_id': str_or_none(season_id), | ||||||
|             'season_number': season_number, |             'season_number': season_number, | ||||||
|             'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')), |             'timestamp': int_or_none(episode_info.get('pub_time')), | ||||||
|             'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')), |  | ||||||
|             'duration': float_or_none(play_info.get('timelength'), scale=1000), |             'duration': float_or_none(play_info.get('timelength'), scale=1000), | ||||||
|             'subtitles': self.extract_subtitles( |             'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')), | ||||||
|                 video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))), |             '__post_extractor': self.extract_comments(aid), | ||||||
|             '__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))), |             'http_headers': headers, | ||||||
|             'http_headers': {'Referer': url, **self.geo_verification_headers()}, |  | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class BiliBiliBangumiMediaIE(InfoExtractor): | class BiliBiliBangumiMediaIE(BilibiliBaseIE): | ||||||
|     _VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)' |     _VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://www.bilibili.com/bangumi/media/md24097891', |         'url': 'https://www.bilibili.com/bangumi/media/md24097891', | ||||||
| @@ -485,16 +514,26 @@ class BiliBiliBangumiMediaIE(InfoExtractor): | |||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         media_id = self._match_id(url) |         media_id = self._match_id(url) | ||||||
|         webpage = self._download_webpage(url, media_id) |         webpage = self._download_webpage(url, media_id) | ||||||
|  |         ss_id = self._search_json( | ||||||
|  |             r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id'] | ||||||
| 
 | 
 | ||||||
|         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id) |         return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id) | ||||||
|         episode_list = self._download_json( |  | ||||||
|             'https://api.bilibili.com/pgc/web/season/section', media_id, |  | ||||||
|             query={'season_id': initial_state['mediaInfo']['season_id']}, |  | ||||||
|             note='Downloading season info')['result']['main_section']['episodes'] |  | ||||||
| 
 | 
 | ||||||
|         return self.playlist_result(( | 
 | ||||||
|             self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid']) | class BiliBiliBangumiSeasonIE(BilibiliBaseIE): | ||||||
|             for entry in episode_list), media_id) |     _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)' | ||||||
|  |     _TESTS = [{ | ||||||
|  |         'url': 'https://www.bilibili.com/bangumi/play/ss26801', | ||||||
|  |         'info_dict': { | ||||||
|  |             'id': '26801' | ||||||
|  |         }, | ||||||
|  |         'playlist_mincount': 26 | ||||||
|  |     }] | ||||||
|  | 
 | ||||||
|  |     def _real_extract(self, url): | ||||||
|  |         ss_id = self._match_id(url) | ||||||
|  | 
 | ||||||
|  |         return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class BilibiliSpaceBaseIE(InfoExtractor): | class BilibiliSpaceBaseIE(InfoExtractor): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 GD-Slime
					GD-Slime