mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	| @@ -3,6 +3,8 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     clean_html, | ||||
|     get_element_by_class, | ||||
|     int_or_none, | ||||
|     join_nonempty, | ||||
|     parse_duration, | ||||
| @@ -45,25 +47,36 @@ class NhkBaseIE(InfoExtractor): | ||||
|         self.cache.store('nhk', 'api_info', api_info) | ||||
|         return api_info | ||||
| 
 | ||||
|     def _extract_formats_and_subtitles(self, vod_id): | ||||
|     def _extract_stream_info(self, vod_id): | ||||
|         for refresh in (False, True): | ||||
|             api_info = self._get_api_info(refresh) | ||||
|             if not api_info: | ||||
|                 continue | ||||
| 
 | ||||
|             api_url = api_info.pop('url') | ||||
|             stream_url = traverse_obj( | ||||
|             meta = traverse_obj( | ||||
|                 self._download_json( | ||||
|                     api_url, vod_id, 'Downloading stream url info', fatal=False, query={ | ||||
|                         **api_info, | ||||
|                         'type': 'json', | ||||
|                         'optional_id': vod_id, | ||||
|                         'active_flg': 1, | ||||
|                     }), | ||||
|                 ('meta', 0, 'movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False) | ||||
|             if stream_url: | ||||
|                 return self._extract_m3u8_formats_and_subtitles(stream_url, vod_id) | ||||
|                     }), ('meta', 0)) | ||||
|             stream_url = traverse_obj( | ||||
|                 meta, ('movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False) | ||||
| 
 | ||||
|             if stream_url: | ||||
|                 formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, vod_id) | ||||
|                 return { | ||||
|                     **traverse_obj(meta, { | ||||
|                         'duration': ('duration', {int_or_none}), | ||||
|                         'timestamp': ('publication_date', {unified_timestamp}), | ||||
|                         'release_timestamp': ('insert_date', {unified_timestamp}), | ||||
|                         'modified_timestamp': ('update_date', {unified_timestamp}), | ||||
|                     }), | ||||
|                     'formats': formats, | ||||
|                     'subtitles': subtitles, | ||||
|                 } | ||||
|         raise ExtractorError('Unable to extract stream url') | ||||
| 
 | ||||
|     def _extract_episode_info(self, url, episode=None): | ||||
| @@ -77,11 +90,11 @@ class NhkBaseIE(InfoExtractor): | ||||
|         if fetch_episode: | ||||
|             episode = self._call_api( | ||||
|                 episode_id, lang, is_video, True, episode_id[:4] == '9999')[0] | ||||
|         title = episode.get('sub_title_clean') or episode['sub_title'] | ||||
| 
 | ||||
|         def get_clean_field(key): | ||||
|             return episode.get(key + '_clean') or episode.get(key) | ||||
|             return clean_html(episode.get(key + '_clean') or episode.get(key)) | ||||
| 
 | ||||
|         title = get_clean_field('sub_title') | ||||
|         series = get_clean_field('title') | ||||
| 
 | ||||
|         thumbnails = [] | ||||
| @@ -96,22 +109,30 @@ class NhkBaseIE(InfoExtractor): | ||||
|                 'url': 'https://www3.nhk.or.jp' + img_path, | ||||
|             }) | ||||
| 
 | ||||
|         episode_name = title | ||||
|         if series and title: | ||||
|             title = f'{series} - {title}' | ||||
|         elif series and not title: | ||||
|             title = series | ||||
|             series = None | ||||
|             episode_name = None | ||||
|         else:  # title, no series | ||||
|             episode_name = None | ||||
| 
 | ||||
|         info = { | ||||
|             'id': episode_id + '-' + lang, | ||||
|             'title': '%s - %s' % (series, title) if series and title else title, | ||||
|             'title': title, | ||||
|             'description': get_clean_field('description'), | ||||
|             'thumbnails': thumbnails, | ||||
|             'series': series, | ||||
|             'episode': title, | ||||
|             'episode': episode_name, | ||||
|         } | ||||
| 
 | ||||
|         if is_video: | ||||
|             vod_id = episode['vod_id'] | ||||
|             formats, subs = self._extract_formats_and_subtitles(vod_id) | ||||
| 
 | ||||
|             info.update({ | ||||
|                 **self._extract_stream_info(vod_id), | ||||
|                 'id': vod_id, | ||||
|                 'formats': formats, | ||||
|                 'subtitles': subs, | ||||
|             }) | ||||
| 
 | ||||
|         else: | ||||
| @@ -148,6 +169,14 @@ class NhkVodIE(NhkBaseIE): | ||||
|             'thumbnail': 'md5:51bcef4a21936e7fea1ff4e06353f463', | ||||
|             'episode': 'The Tohoku Shinkansen: Full Speed Ahead', | ||||
|             'series': 'Japan Railway Journal', | ||||
|             'modified_timestamp': 1694243656, | ||||
|             'timestamp': 1681428600, | ||||
|             'release_timestamp': 1693883728, | ||||
|             'duration': 1679, | ||||
|             'upload_date': '20230413', | ||||
|             'modified_date': '20230909', | ||||
|             'release_date': '20230905', | ||||
| 
 | ||||
|         }, | ||||
|     }, { | ||||
|         # video clip | ||||
| @@ -161,6 +190,13 @@ class NhkVodIE(NhkBaseIE): | ||||
|             'thumbnail': 'md5:d6a4d9b6e9be90aaadda0bcce89631ed', | ||||
|             'series': 'Dining with the Chef', | ||||
|             'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU', | ||||
|             'duration': 148, | ||||
|             'upload_date': '20190816', | ||||
|             'release_date': '20230902', | ||||
|             'release_timestamp': 1693619292, | ||||
|             'modified_timestamp': 1694168033, | ||||
|             'modified_date': '20230908', | ||||
|             'timestamp': 1565997540, | ||||
|         }, | ||||
|     }, { | ||||
|         # radio | ||||
| @@ -170,7 +206,7 @@ class NhkVodIE(NhkBaseIE): | ||||
|             'ext': 'm4a', | ||||
|             'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines', | ||||
|             'series': 'Living in Japan', | ||||
|             'description': 'md5:850611969932874b4a3309e0cae06c2f', | ||||
|             'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab', | ||||
|             'thumbnail': 'md5:960622fb6e06054a4a1a0c97ea752545', | ||||
|             'episode': 'Tips for Travelers to Japan / Ramen Vending Machines' | ||||
|         }, | ||||
| @@ -212,6 +248,23 @@ class NhkVodIE(NhkBaseIE): | ||||
|             'description': 'md5:9c1d6cbeadb827b955b20e99ab920ff0', | ||||
|         }, | ||||
|         'skip': 'expires 2023-10-15', | ||||
|     }, { | ||||
|         # a one-off (single-episode series). title from the api is just '<p></p>' | ||||
|         'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/3004952/', | ||||
|         'info_dict': { | ||||
|             'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Barakan Discovers AMAMI OSHIMA: Isson\'s Treasure Island', | ||||
|             'description': 'md5:5db620c46a0698451cc59add8816b797', | ||||
|             'thumbnail': 'md5:67d9ff28009ba379bfa85ad1aaa0e2bd', | ||||
|             'release_date': '20230905', | ||||
|             'timestamp': 1690103400, | ||||
|             'duration': 2939, | ||||
|             'release_timestamp': 1693898699, | ||||
|             'modified_timestamp': 1698057495, | ||||
|             'modified_date': '20231023', | ||||
|             'upload_date': '20230723', | ||||
|         }, | ||||
|     }] | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
| @@ -226,13 +279,15 @@ class NhkVodProgramIE(NhkBaseIE): | ||||
|         'info_dict': { | ||||
|             'id': 'sumo', | ||||
|             'title': 'GRAND SUMO Highlights', | ||||
|             'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf', | ||||
|         }, | ||||
|         'playlist_mincount': 12, | ||||
|         'playlist_mincount': 0, | ||||
|     }, { | ||||
|         'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway', | ||||
|         'info_dict': { | ||||
|             'id': 'japanrailway', | ||||
|             'title': 'Japan Railway Journal', | ||||
|             'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f', | ||||
|         }, | ||||
|         'playlist_mincount': 12, | ||||
|     }, { | ||||
| @@ -241,6 +296,7 @@ class NhkVodProgramIE(NhkBaseIE): | ||||
|         'info_dict': { | ||||
|             'id': 'japanrailway', | ||||
|             'title': 'Japan Railway Journal', | ||||
|             'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f', | ||||
|         }, | ||||
|         'playlist_mincount': 5, | ||||
|     }, { | ||||
| @@ -265,11 +321,11 @@ class NhkVodProgramIE(NhkBaseIE): | ||||
|             entries.append(self._extract_episode_info( | ||||
|                 urljoin(url, episode_path), episode)) | ||||
| 
 | ||||
|         program_title = None | ||||
|         if entries: | ||||
|             program_title = entries[0].get('series') | ||||
|         html = self._download_webpage(url, program_id) | ||||
|         program_title = clean_html(get_element_by_class('p-programDetail__title', html)) | ||||
|         program_description = clean_html(get_element_by_class('p-programDetail__text', html)) | ||||
| 
 | ||||
|         return self.playlist_result(entries, program_id, program_title) | ||||
|         return self.playlist_result(entries, program_id, program_title, program_description) | ||||
| 
 | ||||
| 
 | ||||
| class NhkForSchoolBangumiIE(InfoExtractor): | ||||
| @@ -421,6 +477,7 @@ class NhkRadiruIE(InfoExtractor): | ||||
|         'skip': 'Episode expired on 2023-04-16', | ||||
|         'info_dict': { | ||||
|             'channel': 'NHK-FM', | ||||
|             'uploader': 'NHK-FM', | ||||
|             'description': 'md5:94b08bdeadde81a97df4ec882acce3e9', | ||||
|             'ext': 'm4a', | ||||
|             'id': '0449_01_3853544', | ||||
| @@ -441,6 +498,7 @@ class NhkRadiruIE(InfoExtractor): | ||||
|             'title': 'ベストオブクラシック', | ||||
|             'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。', | ||||
|             'channel': 'NHK-FM', | ||||
|             'uploader': 'NHK-FM', | ||||
|             'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg', | ||||
|         }, | ||||
|         'playlist_mincount': 3, | ||||
| @@ -454,6 +512,7 @@ class NhkRadiruIE(InfoExtractor): | ||||
|             'title': '有島武郎「一房のぶどう」', | ||||
|             'description': '朗読:川野一宇(ラジオ深夜便アンカー)\r\n\r\n(2016年12月8日放送「ラジオ深夜便『アンカー朗読シリーズ』」より)', | ||||
|             'channel': 'NHKラジオ第1、NHK-FM', | ||||
|             'uploader': 'NHKラジオ第1、NHK-FM', | ||||
|             'timestamp': 1635757200, | ||||
|             'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F300/img/corner/box_109_thumbnail.jpg', | ||||
|             'release_date': '20161207', | ||||
| @@ -469,6 +528,7 @@ class NhkRadiruIE(InfoExtractor): | ||||
|             'id': 'F261_01_3855109', | ||||
|             'ext': 'm4a', | ||||
|             'channel': 'NHKラジオ第1', | ||||
|             'uploader': 'NHKラジオ第1', | ||||
|             'timestamp': 1681635900, | ||||
|             'release_date': '20230416', | ||||
|             'series': 'NHKラジオニュース', | ||||
| @@ -513,6 +573,7 @@ class NhkRadiruIE(InfoExtractor): | ||||
|         series_meta = traverse_obj(meta, { | ||||
|             'title': 'program_name', | ||||
|             'channel': 'media_name', | ||||
|             'uploader': 'media_name', | ||||
|             'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}), | ||||
|         }, get_all=False) | ||||
| 
 | ||||
| @@ -541,6 +602,7 @@ class NhkRadioNewsPageIE(InfoExtractor): | ||||
|             'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg', | ||||
|             'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d', | ||||
|             'channel': 'NHKラジオ第1', | ||||
|             'uploader': 'NHKラジオ第1', | ||||
|             'title': 'NHKラジオニュース', | ||||
|         } | ||||
|     }] | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 garret
					garret