1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-08-17 09:58:29 +00:00

weibo: fix 403 by sending Referer header; fix regex truncating

This commit is contained in:
AzartX47 2025-08-12 15:05:32 -10:00
parent bdeb3eb3f2
commit 2b9453f357

View File

@ -53,7 +53,6 @@ def _update_visitor_cookies(self, visitor_url, video_id):
}) })
def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs): def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs):
# XXX: Always fatal; _download_webpage_handle only returns False (not a tuple) on error
webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs) webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs)
if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com': if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com':
self._update_visitor_cookies(urlh.url, video_id) self._update_visitor_cookies(urlh.url, video_id)
@ -188,8 +187,8 @@ class WeiboIE(WeiboBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
headers = {'Referer': 'https://weibo.com/'}
meta = self._weibo_download_json(f'https://weibo.com/ajax/statuses/show?id={video_id}', video_id) meta = self._weibo_download_json(f'https://weibo.com/ajax/statuses/show?id={video_id}', video_id, headers=headers)
mix_media_info = traverse_obj(meta, ('mix_media_info', 'items', ...)) mix_media_info = traverse_obj(meta, ('mix_media_info', 'items', ...))
if not mix_media_info: if not mix_media_info:
return self._parse_video_info(meta) return self._parse_video_info(meta)
@ -205,7 +204,7 @@ def _entries(self, mix_media_info):
class WeiboVideoIE(WeiboBaseIE): class WeiboVideoIE(WeiboBaseIE):
_VALID_URL = r'https?://(?:www\.)?weibo\.com/tv/show/(?P<id>\d+:\d+)' _VALID_URL = r'https?://(?:www\.)?weibo\.com/tv/show/(?P<id>\d+:[0-9A-Za-z]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://weibo.com/tv/show/1034:4797699866951785?from=old_pc_videoshow', 'url': 'https://weibo.com/tv/show/1034:4797699866951785?from=old_pc_videoshow',
'info_dict': { 'info_dict': {
@ -253,10 +252,11 @@ class WeiboUserIE(WeiboBaseIE):
}] }]
def _fetch_page(self, uid, cursor=0, page=1): def _fetch_page(self, uid, cursor=0, page=1):
headers = {'Referer': 'https://weibo.com/'}
return self._weibo_download_json( return self._weibo_download_json(
'https://weibo.com/ajax/profile/getWaterFallContent', 'https://weibo.com/ajax/profile/getWaterFallContent',
uid, note=f'Downloading videos page {page}', uid, note=f'Downloading videos page {page}',
query={'uid': uid, 'cursor': cursor})['data'] query={'uid': uid, 'cursor': cursor}, headers=headers)['data']
def _entries(self, uid, first_page): def _entries(self, uid, first_page):
cursor = 0 cursor = 0