mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[extractor/niconico:live] Add extractor (#5764)
Authored by: Lesmiscore
This commit is contained in:
		| @@ -5,13 +5,17 @@ import json | ||||
| import re | ||||
| import time | ||||
| 
 | ||||
| from urllib.parse import urlparse | ||||
| 
 | ||||
| from .common import InfoExtractor, SearchInfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_HTTPError, | ||||
| ) | ||||
| from ..dependencies import websockets | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     OnDemandPagedList, | ||||
|     WebSocketsWrapper, | ||||
|     bug_reports_message, | ||||
|     clean_html, | ||||
|     float_or_none, | ||||
| @@ -895,3 +899,162 @@ class NiconicoUserIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         list_id = self._match_id(url) | ||||
|         return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key()) | ||||
| 
 | ||||
| 
 | ||||
| class NiconicoLiveIE(InfoExtractor): | ||||
|     IE_NAME = 'niconico:live' | ||||
|     IE_DESC = 'ニコニコ生放送' | ||||
|     _VALID_URL = r'https?://(?:sp\.)?live2?\.nicovideo\.jp/(?:watch|gate)/(?P<id>lv\d+)' | ||||
|     _TESTS = [{ | ||||
|         'note': 'this test case includes invisible characters for title, pasting them as-is', | ||||
|         'url': 'https://live.nicovideo.jp/watch/lv339533123', | ||||
|         'info_dict': { | ||||
|             'id': 'lv339533123', | ||||
|             'title': '激辛ペヤング食べます( ;ᯅ; )(歌枠オーディション参加中)', | ||||
|             'view_count': 1526, | ||||
|             'comment_count': 1772, | ||||
|             'description': '初めましてもかって言います❕\nのんびり自由に適当に暮らしてます', | ||||
|             'uploader': 'もか', | ||||
|             'channel': 'ゲストさんのコミュニティ', | ||||
|             'channel_id': 'co5776900', | ||||
|             'channel_url': 'https://com.nicovideo.jp/community/co5776900', | ||||
|             'timestamp': 1670677328, | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         'skip': 'livestream', | ||||
|     }, { | ||||
|         'url': 'https://live2.nicovideo.jp/watch/lv339533123', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://sp.live.nicovideo.jp/watch/lv339533123', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://sp.live2.nicovideo.jp/watch/lv339533123', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
| 
 | ||||
|     _KNOWN_LATENCY = ('high', 'low') | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         if not websockets: | ||||
|             raise ExtractorError('websockets library is not available. Please install it.', expected=True) | ||||
|         video_id = self._match_id(url) | ||||
|         webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id) | ||||
| 
 | ||||
|         embedded_data = self._parse_json(unescapeHTML(self._search_regex( | ||||
|             r'<script\s+id="embedded-data"\s*data-props="(.+?)"', webpage, 'embedded data')), video_id) | ||||
| 
 | ||||
|         ws_url = traverse_obj(embedded_data, ('site', 'relive', 'webSocketUrl')) | ||||
|         if not ws_url: | ||||
|             raise ExtractorError('The live hasn\'t started yet or already ended.', expected=True) | ||||
|         ws_url = update_url_query(ws_url, { | ||||
|             'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9', | ||||
|         }) | ||||
| 
 | ||||
|         hostname = remove_start(urlparse(urlh.geturl()).hostname, 'sp.') | ||||
|         cookies = try_get(urlh.geturl(), self._downloader._calc_cookies) | ||||
|         latency = try_get(self._configuration_arg('latency'), lambda x: x[0]) | ||||
|         if latency not in self._KNOWN_LATENCY: | ||||
|             latency = 'high' | ||||
| 
 | ||||
|         ws = WebSocketsWrapper(ws_url, { | ||||
|             'Cookies': str_or_none(cookies) or '', | ||||
|             'Origin': f'https://{hostname}', | ||||
|             'Accept': '*/*', | ||||
|             'User-Agent': self.get_param('http_headers')['User-Agent'], | ||||
|         }) | ||||
| 
 | ||||
|         self.write_debug('[debug] Sending HLS server request') | ||||
|         ws.send(json.dumps({ | ||||
|             'type': 'startWatching', | ||||
|             'data': { | ||||
|                 'stream': { | ||||
|                     'quality': 'abr', | ||||
|                     'protocol': 'hls+fmp4', | ||||
|                     'latency': latency, | ||||
|                     'chasePlay': False | ||||
|                 }, | ||||
|                 'room': { | ||||
|                     'protocol': 'webSocket', | ||||
|                     'commentable': True | ||||
|                 }, | ||||
|                 'reconnect': False, | ||||
|             } | ||||
|         })) | ||||
| 
 | ||||
|         while True: | ||||
|             recv = ws.recv() | ||||
|             if not recv: | ||||
|                 continue | ||||
|             data = json.loads(recv) | ||||
|             if not isinstance(data, dict): | ||||
|                 continue | ||||
|             if data.get('type') == 'stream': | ||||
|                 m3u8_url = data['data']['uri'] | ||||
|                 qualities = data['data']['availableQualities'] | ||||
|                 break | ||||
|             elif data.get('type') == 'disconnect': | ||||
|                 self.write_debug(recv) | ||||
|                 raise ExtractorError('Disconnected at middle of extraction') | ||||
|             elif data.get('type') == 'error': | ||||
|                 self.write_debug(recv) | ||||
|                 message = traverse_obj(data, ('body', 'code')) or recv | ||||
|                 raise ExtractorError(message) | ||||
|             elif self.get_param('verbose', False): | ||||
|                 if len(recv) > 100: | ||||
|                     recv = recv[:100] + '...' | ||||
|                 self.write_debug('Server said: %s' % recv) | ||||
| 
 | ||||
|         title = traverse_obj(embedded_data, ('program', 'title')) or self._html_search_meta( | ||||
|             ('og:title', 'twitter:title'), webpage, 'live title', fatal=False) | ||||
| 
 | ||||
|         raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail')) or {} | ||||
|         thumbnails = [] | ||||
|         for name, value in raw_thumbs.items(): | ||||
|             if not isinstance(value, dict): | ||||
|                 thumbnails.append({ | ||||
|                     'id': name, | ||||
|                     'url': value, | ||||
|                     **parse_resolution(value, lenient=True), | ||||
|                 }) | ||||
|                 continue | ||||
| 
 | ||||
|             for k, img_url in value.items(): | ||||
|                 res = parse_resolution(k, lenient=True) or parse_resolution(img_url, lenient=True) | ||||
|                 width, height = res.get('width'), res.get('height') | ||||
| 
 | ||||
|                 thumbnails.append({ | ||||
|                     'id': f'{name}_{width}x{height}', | ||||
|                     'url': img_url, | ||||
|                     **res, | ||||
|                 }) | ||||
| 
 | ||||
|         formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True) | ||||
|         for fmt, q in zip(formats, reversed(qualities[1:])): | ||||
|             fmt.update({ | ||||
|                 'format_id': q, | ||||
|                 'protocol': 'niconico_live', | ||||
|                 'ws': ws, | ||||
|                 'video_id': video_id, | ||||
|                 'cookies': cookies, | ||||
|                 'live_latency': latency, | ||||
|                 'origin': hostname, | ||||
|             }) | ||||
| 
 | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             **traverse_obj(embedded_data, { | ||||
|                 'view_count': ('program', 'statistics', 'watchCount'), | ||||
|                 'comment_count': ('program', 'statistics', 'commentCount'), | ||||
|                 'uploader': ('program', 'supplier', 'name'), | ||||
|                 'channel': ('socialGroup', 'name'), | ||||
|                 'channel_id': ('socialGroup', 'id'), | ||||
|                 'channel_url': ('socialGroup', 'socialGroupPageUrl'), | ||||
|             }), | ||||
|             'description': clean_html(traverse_obj(embedded_data, ('program', 'description'))), | ||||
|             'timestamp': int_or_none(traverse_obj(embedded_data, ('program', 'openTime'))), | ||||
|             'is_live': True, | ||||
|             'thumbnails': thumbnails, | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Lesmiscore
					Lesmiscore