mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[Bilibili] Add 8k support (#1964)
Closes #1898, #1819 Authored by: u-spec-png
This commit is contained in:
		| @@ -17,9 +17,9 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     float_or_none, | ||||
|     mimetype2ext, | ||||
|     parse_iso8601, | ||||
|     traverse_obj, | ||||
|     try_get, | ||||
|     parse_count, | ||||
|     smuggle_url, | ||||
|     srt_subtitles_timecode, | ||||
| @@ -53,15 +53,13 @@ class BiliBiliIE(InfoExtractor): | ||||
|         'md5': '5f7d29e1a2872f3df0cf76b1f87d3788', | ||||
|         'info_dict': { | ||||
|             'id': '1074402', | ||||
|             'ext': 'flv', | ||||
|             'ext': 'mp4', | ||||
|             'title': '【金坷垃】金泡沫', | ||||
|             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', | ||||
|             'duration': 308.067, | ||||
|             'timestamp': 1398012678, | ||||
|             'upload_date': '20140420', | ||||
|             'thumbnail': r're:^https?://.+\.jpg', | ||||
|             'uploader': '菊子桑', | ||||
|             'uploader_id': '156160', | ||||
|             'uploader': '菊子桑', | ||||
|             'upload_date': '20140420', | ||||
|             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', | ||||
|             'timestamp': 1398012678, | ||||
|         }, | ||||
|     }, { | ||||
|         # Tested in BiliBiliBangumiIE | ||||
| @@ -82,42 +80,20 @@ class BiliBiliIE(InfoExtractor): | ||||
|         }, | ||||
|         'skip': 'Geo-restricted to China', | ||||
|     }, { | ||||
|         # Title with double quotes | ||||
|         'url': 'http://www.bilibili.com/video/av8903802/', | ||||
|         'info_dict': { | ||||
|             'id': '8903802', | ||||
|             'ext': 'mp4', | ||||
|             'title': '阿滴英文|英文歌分享#6 "Closer', | ||||
|             'upload_date': '20170301', | ||||
|             'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文', | ||||
|             'timestamp': 1488382634, | ||||
|             'uploader_id': '65880958', | ||||
|             'uploader': '阿滴英文', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': '8903802_part1', | ||||
|                 'ext': 'flv', | ||||
|                 'title': '阿滴英文|英文歌分享#6 "Closer', | ||||
|                 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a', | ||||
|                 'uploader': '阿滴英文', | ||||
|                 'uploader_id': '65880958', | ||||
|                 'timestamp': 1488382634, | ||||
|                 'upload_date': '20170301', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': '8903802_part2', | ||||
|                 'ext': 'flv', | ||||
|                 'title': '阿滴英文|英文歌分享#6 "Closer', | ||||
|                 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a', | ||||
|                 'uploader': '阿滴英文', | ||||
|                 'uploader_id': '65880958', | ||||
|                 'timestamp': 1488382634, | ||||
|                 'upload_date': '20170301', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }] | ||||
|     }, { | ||||
|         # new BV video id format | ||||
|         'url': 'https://www.bilibili.com/video/BV1JE411F741', | ||||
| @@ -152,6 +128,7 @@ class BiliBiliIE(InfoExtractor): | ||||
|         av_id, bv_id = self._get_video_id_set(video_id, mobj.group('id_bv') is not None) | ||||
|         video_id = av_id | ||||
|  | ||||
|         info = {} | ||||
|         anime_id = mobj.group('anime_id') | ||||
|         page_id = mobj.group('page') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
| @@ -203,35 +180,48 @@ class BiliBiliIE(InfoExtractor): | ||||
|         } | ||||
|         headers.update(self.geo_verification_headers()) | ||||
|  | ||||
|         video_info = self._parse_json( | ||||
|             self._search_regex(r'window.__playinfo__\s*=\s*({.+?})</script>', webpage, 'video info', default=None), | ||||
|             video_id, fatal=False) or {} | ||||
|         video_info = video_info.get('data') or {} | ||||
|  | ||||
|         durl = traverse_obj(video_info, ('dash', 'video')) | ||||
|         audios = traverse_obj(video_info, ('dash', 'audio')) or [] | ||||
|         entries = [] | ||||
|  | ||||
|         RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4') | ||||
|         for num, rendition in enumerate(RENDITIONS, start=1): | ||||
|             payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition) | ||||
|             sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest() | ||||
|  | ||||
|             video_info = self._download_json( | ||||
|                 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign), | ||||
|                 video_id, note='Downloading video info page', | ||||
|                 headers=headers, fatal=num == len(RENDITIONS)) | ||||
|  | ||||
|             if not video_info: | ||||
|                 continue | ||||
|                 video_info = self._download_json( | ||||
|                     'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign), | ||||
|                     video_id, note='Downloading video info page', | ||||
|                     headers=headers, fatal=num == len(RENDITIONS)) | ||||
|                 if not video_info: | ||||
|                     continue | ||||
|  | ||||
|             if 'durl' not in video_info: | ||||
|             if not durl and 'durl' not in video_info: | ||||
|                 if num < len(RENDITIONS): | ||||
|                     continue | ||||
|                 self._report_error(video_info) | ||||
|  | ||||
|             for idx, durl in enumerate(video_info['durl']): | ||||
|                 formats = [{ | ||||
|                     'url': durl['url'], | ||||
|                     'filesize': int_or_none(durl['size']), | ||||
|                 }] | ||||
|                 for backup_url in durl.get('backup_url', []): | ||||
|             formats = [] | ||||
|             for idx, durl in enumerate(durl or video_info['durl']): | ||||
|                 formats.append({ | ||||
|                     'url': durl.get('baseUrl') or durl.get('base_url') or durl.get('url'), | ||||
|                     'ext': mimetype2ext(durl.get('mimeType') or durl.get('mime_type')), | ||||
|                     'fps': int_or_none(durl.get('frameRate') or durl.get('frame_rate')), | ||||
|                     'width': int_or_none(durl.get('width')), | ||||
|                     'height': int_or_none(durl.get('height')), | ||||
|                     'vcodec': durl.get('codecs'), | ||||
|                     'acodec': 'none' if audios else None, | ||||
|                     'tbr': float_or_none(durl.get('bandwidth'), scale=1000), | ||||
|                     'filesize': int_or_none(durl.get('size')), | ||||
|                 }) | ||||
|                 for backup_url in traverse_obj(durl, 'backup_url', expected_type=list) or []: | ||||
|                     formats.append({ | ||||
|                         'url': backup_url, | ||||
|                         # backup URLs have lower priorities | ||||
|                         'quality': -2 if 'hd.mp4' in backup_url else -3, | ||||
|                     }) | ||||
|  | ||||
| @@ -239,30 +229,46 @@ class BiliBiliIE(InfoExtractor): | ||||
|                     a_format.setdefault('http_headers', {}).update({ | ||||
|                         'Referer': url, | ||||
|                     }) | ||||
|  | ||||
|                 self._sort_formats(formats) | ||||
|  | ||||
|                 entries.append({ | ||||
|                     'id': '%s_part%s' % (video_id, idx), | ||||
|                     'duration': float_or_none(durl.get('length'), 1000), | ||||
|                     'formats': formats, | ||||
|             for audio in audios: | ||||
|                 formats.append({ | ||||
|                     'url': audio.get('baseUrl') or audio.get('base_url') or audio.get('url'), | ||||
|                     'ext': mimetype2ext(audio.get('mimeType') or audio.get('mime_type')), | ||||
|                     'fps': int_or_none(audio.get('frameRate') or audio.get('frame_rate')), | ||||
|                     'width': int_or_none(audio.get('width')), | ||||
|                     'height': int_or_none(audio.get('height')), | ||||
|                     'acodec': audio.get('codecs'), | ||||
|                     'vcodec': 'none', | ||||
|                     'tbr': float_or_none(audio.get('bandwidth'), scale=1000), | ||||
|                     'filesize': int_or_none(audio.get('size')) | ||||
|                 }) | ||||
|                 for backup_url in traverse_obj(audio, 'backup_url', expected_type=list) or []: | ||||
|                     formats.append({ | ||||
|                         'url': backup_url, | ||||
|                         # backup URLs have lower priorities | ||||
|                         'quality': -3, | ||||
|                     }) | ||||
|  | ||||
|             info.update({ | ||||
|                 'id': video_id, | ||||
|                 'duration': float_or_none(durl.get('length'), 1000), | ||||
|                 'formats': formats, | ||||
|             }) | ||||
|             break | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             (r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1', | ||||
|             (r'<h1[^>]+title=(["\'])(?P<title>[^"\']+)', | ||||
|              r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title', | ||||
|             group='title', fatal=False) | ||||
|  | ||||
|         # Get part title for anthologies | ||||
|         if page_id is not None: | ||||
|             # TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video | ||||
|             part_title = try_get( | ||||
|                 self._download_json( | ||||
|                     f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp', | ||||
|                     video_id, note='Extracting videos in anthology'), | ||||
|                 lambda x: x['data'][int(page_id) - 1]['part']) | ||||
|             title = part_title or title | ||||
|             # TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video. | ||||
|             part_info = traverse_obj(self._download_json( | ||||
|                 f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp', | ||||
|                 video_id, note='Extracting videos in anthology'), 'data', expected_type=list) | ||||
|             title = title if len(part_info) == 1 else traverse_obj(part_info, (int(page_id) - 1, 'part')) or title | ||||
|  | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|         timestamp = unified_timestamp(self._html_search_regex( | ||||
| @@ -272,7 +278,7 @@ class BiliBiliIE(InfoExtractor): | ||||
|         thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage) | ||||
|  | ||||
|         # TODO 'view_count' requires deobfuscating Javascript | ||||
|         info = { | ||||
|         info.update({ | ||||
|             'id': str(video_id) if page_id is None else '%s_part%s' % (video_id, page_id), | ||||
|             'cid': cid, | ||||
|             'title': title, | ||||
| @@ -280,7 +286,7 @@ class BiliBiliIE(InfoExtractor): | ||||
|             'timestamp': timestamp, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': float_or_none(video_info.get('timelength'), scale=1000), | ||||
|         } | ||||
|         }) | ||||
|  | ||||
|         uploader_mobj = re.search( | ||||
|             r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>\s*(?P<name>[^<]+?)\s*<', | ||||
| @@ -301,7 +307,7 @@ class BiliBiliIE(InfoExtractor): | ||||
|                 video_id, fatal=False, note='Downloading tags'), ('data', ..., 'tag_name')), | ||||
|         } | ||||
|  | ||||
|         entries[0]['subtitles'] = { | ||||
|         info['subtitles'] = { | ||||
|             'danmaku': [{ | ||||
|                 'ext': 'xml', | ||||
|                 'url': f'https://comment.bilibili.com/{cid}.xml', | ||||
| @@ -336,12 +342,10 @@ class BiliBiliIE(InfoExtractor): | ||||
|             entry['id'] = '%s_part%d' % (video_id, (idx + 1)) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'multi_video', | ||||
|             'id': str(video_id), | ||||
|             'bv_id': bv_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'entries': entries, | ||||
|             **info, **top_level_info | ||||
|         } | ||||
|  | ||||
| @@ -482,9 +486,9 @@ class BilibiliChannelIE(InfoExtractor): | ||||
|             data = self._download_json( | ||||
|                 self._API_URL % (list_id, page_num), list_id, note=f'Downloading page {page_num}')['data'] | ||||
|  | ||||
|             max_count = max_count or try_get(data, lambda x: x['page']['count']) | ||||
|             max_count = max_count or traverse_obj(data, ('page', 'count')) | ||||
|  | ||||
|             entries = try_get(data, lambda x: x['list']['vlist']) | ||||
|             entries = traverse_obj(data, ('list', 'vlist')) | ||||
|             if not entries: | ||||
|                 return | ||||
|             for entry in entries: | ||||
| @@ -522,7 +526,7 @@ class BilibiliCategoryIE(InfoExtractor): | ||||
|             api_url, query, query={'Search_key': query, 'pn': page_num}, | ||||
|             note='Extracting results from page %s of %s' % (page_num, num_pages)) | ||||
|  | ||||
|         video_list = try_get(parsed_json, lambda x: x['data']['archives'], list) | ||||
|         video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list) | ||||
|         if not video_list: | ||||
|             raise ExtractorError('Failed to retrieve video list for page %d' % page_num) | ||||
|  | ||||
| @@ -552,7 +556,7 @@ class BilibiliCategoryIE(InfoExtractor): | ||||
|  | ||||
|         api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value | ||||
|         page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'}) | ||||
|         page_data = try_get(page_json, lambda x: x['data']['page'], dict) | ||||
|         page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict) | ||||
|         count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size')) | ||||
|         if count is None or not size: | ||||
|             raise ExtractorError('Failed to calculate either page count or size') | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 u-spec-png
					u-spec-png