From bcdba198103b60171ce91c6d556694a7b5e096d8 Mon Sep 17 00:00:00 2001 From: _Grqz <173015200+grqz@users.noreply.github.com> Date: Mon, 14 Jul 2025 17:51:16 +1200 Subject: [PATCH 1/8] Apply patch --- yt_dlp/extractor/bilibili.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 0c6535fc72..3c1e57baa4 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -720,13 +720,15 @@ def _real_extract(self, url): duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), __post_extractor=self.extract_comments(aid)) - play_info = None - if self.is_logged_in: - play_info = traverse_obj( - self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id, default=None), - ('data', {dict})) - if not play_info: - play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1}) + play_info = traverse_obj( + self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id, default=None), + ('data', {dict})) + if not self.is_logged_in or not play_info: + dl_play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1}) + if not dl_play_info.get('v_voucher'): + play_info = dl_play_info + else: + self.report_warning('Failed to download play info, falling back to the playinfo embedded in the webpage.', video_id=video_id) formats = self.extract_formats(play_info) if video_data.get('is_upower_exclusive'): From d93663451dc45f1d4e8d200ab23b535bffaa913d Mon Sep 17 00:00:00 2001 From: _Grqz <173015200+grqz@users.noreply.github.com> Date: Mon, 14 Jul 2025 22:11:19 +1200 Subject: [PATCH 2/8] raise early on playinfo unavailable --- yt_dlp/extractor/bilibili.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 3c1e57baa4..668d80296e 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -175,9 +175,14 @@ def _download_playinfo(self, bvid, cid, headers=None, query=None): else: note = f'Downloading video formats for cid {cid}' - return self._download_json( + playurl_raw = self._download_json( 'https://api.bilibili.com/x/player/wbi/playurl', bvid, - query=self._sign_wbi(params, bvid), headers=headers, note=note)['data'] + query=self._sign_wbi(params, bvid), headers=headers, note=note) + if playurl_raw.get('v_voucher'): + return playurl_raw['data'] + else: + self.report_warning('Received a captcha from Bilibili while downloading play info') + return None def json2srt(self, json_data): srt_data = '' @@ -724,11 +729,10 @@ def _real_extract(self, url): self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id, default=None), ('data', {dict})) if not self.is_logged_in or not play_info: - dl_play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1}) - if not dl_play_info.get('v_voucher'): + if dl_play_info := self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1}): play_info = dl_play_info - else: - self.report_warning('Failed to download play info, falling back to the playinfo embedded in the webpage.', video_id=video_id) + if not play_info: + raise ExtractorError('Unable to download play info') formats = self.extract_formats(play_info) if video_data.get('is_upower_exclusive'): From 92173d7bc9880eebb8b9577736fb58226d31a05f Mon Sep 17 00:00:00 2001 From: _Grqz <173015200+grqz@users.noreply.github.com> Date: Tue, 15 Jul 2025 14:44:05 +1200 Subject: [PATCH 3/8] add dm params --- yt_dlp/extractor/bilibili.py | 53 ++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 668d80296e..7f7eebdd9c 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -166,6 +166,48 @@ def _sign_wbi(self, params, video_id): params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest() return params + @staticmethod + @functools.cache + def __screen_dimensions(): + DIMENSIONS = [ + ((1920, 1080), 18), + ((1366, 768), 18), + ((1536, 864), 17), + ((1280, 720), 8), + ((2560, 1440), 7), + ((1440, 900), 5), + ((1600, 900), 5), + ] + dims = [dim for dim, _ in DIMENSIONS] + prefs = [pref for _, pref in DIMENSIONS] + return random.choices(dims, weights=prefs)[0] + + @property + def _dm_params(self): + def get_wh(width=1920, height=1080): + # return [6093, 6631, 31] + res0, res1 = width, height + rnd = math.floor(114 * random.random()) + return [2 * res0 + 2 * res1 + 3 * rnd, 4 * res0 - res1 + rnd, rnd] + + def get_of(scroll_top=10, scroll_left=10): + # return [430, 760, 380] + res0, res1 = scroll_top, scroll_left + rnd = math.floor(514 * random.random()) + return [3 * res0 + 2 * res1 + rnd, 4 * res0 - 4 * res1 + 2 * rnd, rnd] + + # .dm_img_list and .dm_img_inter.ds are more troublesome as user interactions are involved. + # Leave them empty for now as the site isn't checking them yet. + # Reference: https://github.com/SocialSisterYi/bilibili-API-collect/issues/868#issuecomment-1908690516 + return { + 'dm_img_list': '[]', + 'dm_img_str': base64.b64encode( + ''.join(random.choices(string.printable, k=random.randint(16, 64))).encode())[:-2].decode(), + 'dm_cover_img_str': base64.b64encode( + ''.join(random.choices(string.printable, k=random.randint(32, 128))).encode())[:-2].decode(), + 'dm_img_inter': {'ds': [], 'wh': get_wh(*self.__screen_dimensions()), 'of': get_of(random.randint(0, 100), 0)}, + } + def _download_playinfo(self, bvid, cid, headers=None, query=None): params = {'bvid': bvid, 'cid': cid, 'fnval': 4048, **(query or {})} if self.is_logged_in: @@ -175,14 +217,15 @@ def _download_playinfo(self, bvid, cid, headers=None, query=None): else: note = f'Downloading video formats for cid {cid}' - playurl_raw = self._download_json( + playurl_data = self._download_json( 'https://api.bilibili.com/x/player/wbi/playurl', bvid, - query=self._sign_wbi(params, bvid), headers=headers, note=note) - if playurl_raw.get('v_voucher'): - return playurl_raw['data'] - else: + query=self._sign_wbi(merge_dicts(params, self._dm_params), bvid), headers=headers, note=note)['data'] + if playurl_data.get('v_voucher'): self.report_warning('Received a captcha from Bilibili while downloading play info') + self.write_debug(playurl_data) return None + else: + return playurl_data def json2srt(self, json_data): srt_data = '' From 8385332556916966e34223631304149f4be4b1ee Mon Sep 17 00:00:00 2001 From: _Grqz <173015200+grqz@users.noreply.github.com> Date: Tue, 15 Jul 2025 14:47:28 +1200 Subject: [PATCH 4/8] rename --- yt_dlp/extractor/bilibili.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 7f7eebdd9c..7aece2d5dd 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -169,7 +169,7 @@ def _sign_wbi(self, params, video_id): @staticmethod @functools.cache def __screen_dimensions(): - DIMENSIONS = [ + DIMS_AND_PREFS = [ ((1920, 1080), 18), ((1366, 768), 18), ((1536, 864), 17), @@ -178,8 +178,8 @@ def __screen_dimensions(): ((1440, 900), 5), ((1600, 900), 5), ] - dims = [dim for dim, _ in DIMENSIONS] - prefs = [pref for _, pref in DIMENSIONS] + dims = [dim for dim, _ in DIMS_AND_PREFS] + prefs = [pref for _, pref in DIMS_AND_PREFS] return random.choices(dims, weights=prefs)[0] @property From 43c7d5077ff2bfd7e3e975562adc1a1c2b5c98e1 Mon Sep 17 00:00:00 2001 From: _Grqz <173015200+grqz@users.noreply.github.com> Date: Tue, 15 Jul 2025 14:50:22 +1200 Subject: [PATCH 5/8] remove write_debug --- yt_dlp/extractor/bilibili.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 7aece2d5dd..2d99e4573b 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -222,7 +222,6 @@ def _download_playinfo(self, bvid, cid, headers=None, query=None): query=self._sign_wbi(merge_dicts(params, self._dm_params), bvid), headers=headers, note=note)['data'] if playurl_data.get('v_voucher'): self.report_warning('Received a captcha from Bilibili while downloading play info') - self.write_debug(playurl_data) return None else: return playurl_data From 128a73d21b84228db014900ba47f246c89297bf3 Mon Sep 17 00:00:00 2001 From: _Grqz <173015200+grqz@users.noreply.github.com> Date: Tue, 15 Jul 2025 16:49:32 +1200 Subject: [PATCH 6/8] Fix dm params, apply suggestions from code review Co-authored-by: doe1080 <98906116+doe1080@users.noreply.github.com> --- yt_dlp/extractor/bilibili.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 2d99e4573b..4f9dd0e96e 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -169,7 +169,7 @@ def _sign_wbi(self, params, video_id): @staticmethod @functools.cache def __screen_dimensions(): - DIMS_AND_PREFS = [ + dims, prefs = zip( ((1920, 1080), 18), ((1366, 768), 18), ((1536, 864), 17), @@ -177,21 +177,17 @@ def __screen_dimensions(): ((2560, 1440), 7), ((1440, 900), 5), ((1600, 900), 5), - ] - dims = [dim for dim, _ in DIMS_AND_PREFS] - prefs = [pref for _, pref in DIMS_AND_PREFS] + ) return random.choices(dims, weights=prefs)[0] @property def _dm_params(self): def get_wh(width=1920, height=1080): - # return [6093, 6631, 31] res0, res1 = width, height rnd = math.floor(114 * random.random()) return [2 * res0 + 2 * res1 + 3 * rnd, 4 * res0 - res1 + rnd, rnd] def get_of(scroll_top=10, scroll_left=10): - # return [430, 760, 380] res0, res1 = scroll_top, scroll_left rnd = math.floor(514 * random.random()) return [3 * res0 + 2 * res1 + rnd, 4 * res0 - 4 * res1 + 2 * rnd, rnd] @@ -205,7 +201,8 @@ def get_of(scroll_top=10, scroll_left=10): ''.join(random.choices(string.printable, k=random.randint(16, 64))).encode())[:-2].decode(), 'dm_cover_img_str': base64.b64encode( ''.join(random.choices(string.printable, k=random.randint(32, 128))).encode())[:-2].decode(), - 'dm_img_inter': {'ds': [], 'wh': get_wh(*self.__screen_dimensions()), 'of': get_of(random.randint(0, 100), 0)}, + # Bilibili expects dm_img_inter to be a compact JSON (without spaces) + 'dm_img_inter': json.dumps({'ds': [], 'wh': get_wh(*self.__screen_dimensions()), 'of': get_of(random.randint(0, 100), 0)}).replace(' ', ''), } def _download_playinfo(self, bvid, cid, headers=None, query=None): @@ -1314,13 +1311,8 @@ def fetch_page(page_idx): 'pn': page_idx + 1, 'ps': 30, 'tid': 0, - 'web_location': 1550101, - 'dm_img_list': '[]', - 'dm_img_str': base64.b64encode( - ''.join(random.choices(string.printable, k=random.randint(16, 64))).encode())[:-2].decode(), - 'dm_cover_img_str': base64.b64encode( - ''.join(random.choices(string.printable, k=random.randint(32, 128))).encode())[:-2].decode(), - 'dm_img_inter': '{"ds":[],"wh":[6093,6631,31],"of":[430,760,380]}', + 'web_location': '333.1387', + **self._dm_params, } try: From 459ef828576ff73bb9a92150f633c94abf63c33f Mon Sep 17 00:00:00 2001 From: _Grqz <173015200+grqz@users.noreply.github.com> Date: Tue, 15 Jul 2025 17:50:52 +1200 Subject: [PATCH 7/8] better err handling --- yt_dlp/extractor/bilibili.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 4f9dd0e96e..57d45d6317 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -205,7 +205,7 @@ def get_of(scroll_top=10, scroll_left=10): 'dm_img_inter': json.dumps({'ds': [], 'wh': get_wh(*self.__screen_dimensions()), 'of': get_of(random.randint(0, 100), 0)}).replace(' ', ''), } - def _download_playinfo(self, bvid, cid, headers=None, query=None): + def _download_playinfo(self, bvid, cid, headers=None, query=None, fatal=True): params = {'bvid': bvid, 'cid': cid, 'fnval': 4048, **(query or {})} if self.is_logged_in: params.pop('try_look', None) @@ -214,14 +214,23 @@ def _download_playinfo(self, bvid, cid, headers=None, query=None): else: note = f'Downloading video formats for cid {cid}' - playurl_data = self._download_json( + playurl_raw = self._download_json( 'https://api.bilibili.com/x/player/wbi/playurl', bvid, - query=self._sign_wbi(merge_dicts(params, self._dm_params), bvid), headers=headers, note=note)['data'] - if playurl_data.get('v_voucher'): - self.report_warning('Received a captcha from Bilibili while downloading play info') - return None + query=self._sign_wbi(merge_dicts(params, self._dm_params), bvid), headers=headers, note=note) + code = -playurl_raw['code'] + if code == 0: + return playurl_raw['data'] else: - return playurl_data + breakpoint() + err_desc = playurl_raw['message'] + msg = f'Unable to download video info({code}: {err_desc})' + expected = code in (401, 352) + if expected: + msg += ', please wait and try later' + if fatal: + raise ExtractorError(msg, expected=expected) + else: + self.report_warning(msg) def json2srt(self, json_data): srt_data = '' @@ -768,7 +777,7 @@ def _real_extract(self, url): self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id, default=None), ('data', {dict})) if not self.is_logged_in or not play_info: - if dl_play_info := self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1}): + if dl_play_info := self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1}, fatal=False): play_info = dl_play_info if not play_info: raise ExtractorError('Unable to download play info') From d1de068ea1032352b6f3977da3ac017463a5d49d Mon Sep 17 00:00:00 2001 From: _Grqz <173015200+grqz@users.noreply.github.com> Date: Tue, 15 Jul 2025 22:45:37 +1200 Subject: [PATCH 8/8] fix bilibilispacevideo --- yt_dlp/extractor/bilibili.py | 50 +++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 57d45d6317..82390a84c6 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -25,8 +25,10 @@ float_or_none, format_field, get_element_by_class, + get_element_by_id, int_or_none, join_nonempty, + jwt_decode_hs256, make_archive_id, merge_dicts, mimetype2ext, @@ -51,6 +53,7 @@ class BilibiliBaseIE(InfoExtractor): _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?') _WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session _wbi_key_cache = {} + _W_WEBID = None @property def is_logged_in(self): @@ -166,6 +169,31 @@ def _sign_wbi(self, params, video_id): params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest() return params + @staticmethod + def _validate_w_webid(w_webid): + if not w_webid: + return False + decoded = jwt_decode_hs256(w_webid) + created_at, ttl = decoded.get('created_at'), decoded.get('ttl') + if not isinstance(created_at, int) or not isinstance(ttl, int): + return False + return time.time() < created_at + ttl + + def _get_w_webid(self, url, video_id): + if self._W_WEBID and self._validate_w_webid(self._W_WEBID): + return self._W_WEBID + + self._W_WEBID = self.cache.load(self.ie_key(), 'w_webid') + if self._W_WEBID and self._validate_w_webid(self._W_WEBID): + return self._W_WEBID + webpage = self._download_webpage(url, video_id) + render_data = get_element_by_id('__RENDER_DATA__', webpage) + self._W_WEBID = traverse_obj(render_data, ({urllib.parse.unquote}, {json.loads}, 'access_id')) + if self._W_WEBID and self._validate_w_webid(self._W_WEBID): + self.cache.store(self.ie_key(), 'w_webid', self._W_WEBID) + return self._W_WEBID + return None + @staticmethod @functools.cache def __screen_dimensions(): @@ -202,7 +230,7 @@ def get_of(scroll_top=10, scroll_left=10): 'dm_cover_img_str': base64.b64encode( ''.join(random.choices(string.printable, k=random.randint(32, 128))).encode())[:-2].decode(), # Bilibili expects dm_img_inter to be a compact JSON (without spaces) - 'dm_img_inter': json.dumps({'ds': [], 'wh': get_wh(*self.__screen_dimensions()), 'of': get_of(random.randint(0, 100), 0)}).replace(' ', ''), + 'dm_img_inter': json.dumps({'ds': [], 'wh': get_wh(*self.__screen_dimensions()), 'of': get_of(random.randint(0, 100), 0)}, separators=(',', ':')), } def _download_playinfo(self, bvid, cid, headers=None, query=None, fatal=True): @@ -216,12 +244,14 @@ def _download_playinfo(self, bvid, cid, headers=None, query=None, fatal=True): playurl_raw = self._download_json( 'https://api.bilibili.com/x/player/wbi/playurl', bvid, - query=self._sign_wbi(merge_dicts(params, self._dm_params), bvid), headers=headers, note=note) + query=self._sign_wbi(merge_dicts(params, self._dm_params), bvid), headers={ + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36', + **headers, + }, note=note) code = -playurl_raw['code'] if code == 0: return playurl_raw['data'] else: - breakpoint() err_desc = playurl_raw['message'] msg = f'Unable to download video info({code}: {err_desc})' expected = code in (401, 352) @@ -780,7 +810,7 @@ def _real_extract(self, url): if dl_play_info := self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1}, fatal=False): play_info = dl_play_info if not play_info: - raise ExtractorError('Unable to download play info') + raise ExtractorError('Unable to extract or download play info') formats = self.extract_formats(play_info) if video_data.get('is_upower_exclusive'): @@ -1321,14 +1351,22 @@ def fetch_page(page_idx): 'ps': 30, 'tid': 0, 'web_location': '333.1387', + 'special_type': '', + 'index': 0, **self._dm_params, + 'w_webid': self._get_w_webid(url, playlist_id), } try: response = self._download_json( 'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id, query=self._sign_wbi(query, playlist_id), - note=f'Downloading space page {page_idx}', headers={'Referer': url}) + note=f'Downloading space page {page_idx}', headers={ + 'Referer': url, + 'Origin': 'https://space.bilibili.com', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36', + 'Accept-Language': 'en,zh-CN;q=0.9,zh;q=0.8', + }) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 412: raise ExtractorError( @@ -2028,7 +2066,7 @@ def _real_extract(self, url): post_data = self._download_json( 'https://api.bilibili.com/x/polymer/web-dynamic/v1/detail', post_id, query={'id': post_id}, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36', }) video_url = traverse_obj(post_data, ( 'data', 'item', (None, 'orig'), 'modules', 'module_dynamic',