mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	| @@ -947,12 +947,11 @@ class BiliIntlIE(BiliIntlBaseIE): | |||||||
|         video_id = ep_id or aid |         video_id = ep_id or aid | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|         # Bstation layout |         # Bstation layout | ||||||
|         initial_data = self._parse_json(self._search_regex( |         initial_data = ( | ||||||
|             r'window\.__INITIAL_(?:DATA|STATE)__\s*=\s*({.+?});', webpage, |             self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={}) | ||||||
|             'preload state', default='{}'), video_id, fatal=False) or {} |             or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None)) | ||||||
|         video_data = ( |         video_data = traverse_obj( | ||||||
|             traverse_obj(initial_data, ('OgvVideo', 'epDetail'), expected_type=dict) |             initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) | ||||||
|             or traverse_obj(initial_data, ('UgcVideo', 'videoData'), expected_type=dict) or {}) |  | ||||||
| 
 | 
 | ||||||
|         if season_id and not video_data: |         if season_id and not video_data: | ||||||
|             # Non-Bstation layout, read through episode list |             # Non-Bstation layout, read through episode list | ||||||
| @@ -960,7 +959,7 @@ class BiliIntlIE(BiliIntlBaseIE): | |||||||
|             video_data = traverse_obj(season_json, |             video_data = traverse_obj(season_json, | ||||||
|                                       ('sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == ep_id), |                                       ('sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == ep_id), | ||||||
|                                       expected_type=dict, get_all=False) |                                       expected_type=dict, get_all=False) | ||||||
|         return self._extract_video_info(video_data, ep_id=ep_id, aid=aid) |         return self._extract_video_info(video_data or {}, ep_id=ep_id, aid=aid) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class BiliIntlSeriesIE(BiliIntlBaseIE): | class BiliIntlSeriesIE(BiliIntlBaseIE): | ||||||
|   | |||||||
| @@ -1588,15 +1588,13 @@ class InfoExtractor: | |||||||
|                 webpage, 'next.js data', fatal=fatal, **kw), |                 webpage, 'next.js data', fatal=fatal, **kw), | ||||||
|             video_id, transform_source=transform_source, fatal=fatal) |             video_id, transform_source=transform_source, fatal=fatal) | ||||||
| 
 | 
 | ||||||
|     def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', return_full_data=False): |     def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)): | ||||||
|         ''' Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function. ''' |         """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function""" | ||||||
|         # not all website do this, but it can be changed |  | ||||||
|         # https://stackoverflow.com/questions/67463109/how-to-change-or-hide-nuxt-and-nuxt-keyword-in-page-source |  | ||||||
|         rectx = re.escape(context_name) |         rectx = re.escape(context_name) | ||||||
|  |         FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)' | ||||||
|         js, arg_keys, arg_vals = self._search_regex( |         js, arg_keys, arg_vals = self._search_regex( | ||||||
|             (r'<script>window\.%s=\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.+?)\)\);?</script>' % rectx, |             (rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'), | ||||||
|              r'%s\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)' % rectx), |             webpage, context_name, group=('js', 'arg_keys', 'arg_vals'), fatal=fatal) | ||||||
|             webpage, context_name, group=['js', 'arg_keys', 'arg_vals']) |  | ||||||
| 
 | 
 | ||||||
|         args = dict(zip(arg_keys.split(','), arg_vals.split(','))) |         args = dict(zip(arg_keys.split(','), arg_vals.split(','))) | ||||||
| 
 | 
 | ||||||
| @@ -1604,10 +1602,8 @@ class InfoExtractor: | |||||||
|             if val in ('undefined', 'void 0'): |             if val in ('undefined', 'void 0'): | ||||||
|                 args[key] = 'null' |                 args[key] = 'null' | ||||||
| 
 | 
 | ||||||
|         ret = self._parse_json(js_to_json(js, args), video_id) |         ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal) | ||||||
|         if return_full_data: |         return traverse_obj(ret, traverse) or {} | ||||||
|             return ret |  | ||||||
|         return ret['data'][0] |  | ||||||
| 
 | 
 | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def _hidden_inputs(html): |     def _hidden_inputs(html): | ||||||
|   | |||||||
| @@ -1,8 +1,5 @@ | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import traverse_obj, unified_timestamp | ||||||
|     traverse_obj, |  | ||||||
|     unified_timestamp, |  | ||||||
| ) |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class FourZeroStudioArchiveIE(InfoExtractor): | class FourZeroStudioArchiveIE(InfoExtractor): | ||||||
| @@ -25,7 +22,7 @@ class FourZeroStudioArchiveIE(InfoExtractor): | |||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id') |         video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id') | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|         nuxt_data = self._search_nuxt_data(webpage, video_id, return_full_data=True) |         nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None) | ||||||
| 
 | 
 | ||||||
|         pcb = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorBroadcast'), get_all=False) |         pcb = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorBroadcast'), get_all=False) | ||||||
|         uploader_internal_id = traverse_obj(nuxt_data, ( |         uploader_internal_id = traverse_obj(nuxt_data, ( | ||||||
| @@ -82,7 +79,7 @@ class FourZeroStudioClipIE(InfoExtractor): | |||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id') |         video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id') | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|         nuxt_data = self._search_nuxt_data(webpage, video_id, return_full_data=True) |         nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None) | ||||||
| 
 | 
 | ||||||
|         clip_info = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorArchivedClip'), get_all=False) |         clip_info = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorArchivedClip'), get_all=False) | ||||||
| 
 | 
 | ||||||
|   | |||||||
| @@ -3216,7 +3216,11 @@ def js_to_json(code, vars={}): | |||||||
| 
 | 
 | ||||||
|         return '"%s"' % v |         return '"%s"' % v | ||||||
| 
 | 
 | ||||||
|  |     def create_map(mobj): | ||||||
|  |         return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars)))) | ||||||
|  | 
 | ||||||
|     code = re.sub(r'new Date\((".+")\)', r'\g<1>', code) |     code = re.sub(r'new Date\((".+")\)', r'\g<1>', code) | ||||||
|  |     code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code) | ||||||
| 
 | 
 | ||||||
|     return re.sub(r'''(?sx) |     return re.sub(r'''(?sx) | ||||||
|         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| |         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 pukkandan
					pukkandan