mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[extractor/niconico:live] Add extractor (#5764)
Authored by: Lesmiscore
This commit is contained in:
		| @@ -30,7 +30,7 @@ from .hls import HlsFD | ||||
| from .http import HttpFD | ||||
| from .ism import IsmFD | ||||
| from .mhtml import MhtmlFD | ||||
| from .niconico import NiconicoDmcFD | ||||
| from .niconico import NiconicoDmcFD, NiconicoLiveFD | ||||
| from .rtmp import RtmpFD | ||||
| from .rtsp import RtspFD | ||||
| from .websocket import WebSocketFragmentFD | ||||
| @@ -50,6 +50,7 @@ PROTOCOL_MAP = { | ||||
|     'ism': IsmFD, | ||||
|     'mhtml': MhtmlFD, | ||||
|     'niconico_dmc': NiconicoDmcFD, | ||||
|     'niconico_live': NiconicoLiveFD, | ||||
|     'fc2_live': FC2LiveFD, | ||||
|     'websocket_frag': WebSocketFragmentFD, | ||||
|     'youtube_live_chat': YoutubeLiveChatFD, | ||||
|   | ||||
| @@ -1,8 +1,17 @@ | ||||
| import json | ||||
| import threading | ||||
| import time | ||||
| 
 | ||||
| from . import get_suitable_downloader | ||||
| from .common import FileDownloader | ||||
| from ..utils import sanitized_Request | ||||
| from .external import FFmpegFD | ||||
| from ..utils import ( | ||||
|     DownloadError, | ||||
|     str_or_none, | ||||
|     sanitized_Request, | ||||
|     WebSocketsWrapper, | ||||
|     try_get, | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| class NiconicoDmcFD(FileDownloader): | ||||
| @@ -50,3 +59,93 @@ class NiconicoDmcFD(FileDownloader): | ||||
|                     timer[0].cancel() | ||||
|                     download_complete = True | ||||
|         return success | ||||
| 
 | ||||
| 
 | ||||
| class NiconicoLiveFD(FileDownloader): | ||||
|     """ Downloads niconico live without being stopped """ | ||||
| 
 | ||||
|     def real_download(self, filename, info_dict): | ||||
|         video_id = info_dict['video_id'] | ||||
|         ws_url = info_dict['url'] | ||||
|         ws_extractor = info_dict['ws'] | ||||
|         ws_origin_host = info_dict['origin'] | ||||
|         cookies = info_dict.get('cookies') | ||||
|         live_quality = info_dict.get('live_quality', 'high') | ||||
|         live_latency = info_dict.get('live_latency', 'high') | ||||
|         dl = FFmpegFD(self.ydl, self.params or {}) | ||||
| 
 | ||||
|         new_info_dict = info_dict.copy() | ||||
|         new_info_dict.update({ | ||||
|             'protocol': 'm3u8', | ||||
|         }) | ||||
| 
 | ||||
|         def communicate_ws(reconnect): | ||||
|             if reconnect: | ||||
|                 ws = WebSocketsWrapper(ws_url, { | ||||
|                     'Cookies': str_or_none(cookies) or '', | ||||
|                     'Origin': f'https://{ws_origin_host}', | ||||
|                     'Accept': '*/*', | ||||
|                     'User-Agent': self.params['http_headers']['User-Agent'], | ||||
|                 }) | ||||
|                 if self.ydl.params.get('verbose', False): | ||||
|                     self.to_screen('[debug] Sending startWatching request') | ||||
|                 ws.send(json.dumps({ | ||||
|                     'type': 'startWatching', | ||||
|                     'data': { | ||||
|                         'stream': { | ||||
|                             'quality': live_quality, | ||||
|                             'protocol': 'hls+fmp4', | ||||
|                             'latency': live_latency, | ||||
|                             'chasePlay': False | ||||
|                         }, | ||||
|                         'room': { | ||||
|                             'protocol': 'webSocket', | ||||
|                             'commentable': True | ||||
|                         }, | ||||
|                         'reconnect': True, | ||||
|                     } | ||||
|                 })) | ||||
|             else: | ||||
|                 ws = ws_extractor | ||||
|             with ws: | ||||
|                 while True: | ||||
|                     recv = ws.recv() | ||||
|                     if not recv: | ||||
|                         continue | ||||
|                     data = json.loads(recv) | ||||
|                     if not data or not isinstance(data, dict): | ||||
|                         continue | ||||
|                     if data.get('type') == 'ping': | ||||
|                         # pong back | ||||
|                         ws.send(r'{"type":"pong"}') | ||||
|                         ws.send(r'{"type":"keepSeat"}') | ||||
|                     elif data.get('type') == 'disconnect': | ||||
|                         self.write_debug(data) | ||||
|                         return True | ||||
|                     elif data.get('type') == 'error': | ||||
|                         self.write_debug(data) | ||||
|                         message = try_get(data, lambda x: x['body']['code'], str) or recv | ||||
|                         return DownloadError(message) | ||||
|                     elif self.ydl.params.get('verbose', False): | ||||
|                         if len(recv) > 100: | ||||
|                             recv = recv[:100] + '...' | ||||
|                         self.to_screen('[debug] Server said: %s' % recv) | ||||
| 
 | ||||
|         def ws_main(): | ||||
|             reconnect = False | ||||
|             while True: | ||||
|                 try: | ||||
|                     ret = communicate_ws(reconnect) | ||||
|                     if ret is True: | ||||
|                         return | ||||
|                 except BaseException as e: | ||||
|                     self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e))) | ||||
|                     time.sleep(10) | ||||
|                     continue | ||||
|                 finally: | ||||
|                     reconnect = True | ||||
| 
 | ||||
|         thread = threading.Thread(target=ws_main, daemon=True) | ||||
|         thread.start() | ||||
| 
 | ||||
|         return dl.download(filename, new_info_dict) | ||||
|   | ||||
| @@ -1275,6 +1275,7 @@ from .niconico import ( | ||||
|     NicovideoSearchIE, | ||||
|     NicovideoSearchURLIE, | ||||
|     NicovideoTagURLIE, | ||||
|     NiconicoLiveIE, | ||||
| ) | ||||
| from .ninecninemedia import ( | ||||
|     NineCNineMediaIE, | ||||
|   | ||||
| @@ -5,13 +5,17 @@ import json | ||||
| import re | ||||
| import time | ||||
| 
 | ||||
| from urllib.parse import urlparse | ||||
| 
 | ||||
| from .common import InfoExtractor, SearchInfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_HTTPError, | ||||
| ) | ||||
| from ..dependencies import websockets | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     OnDemandPagedList, | ||||
|     WebSocketsWrapper, | ||||
|     bug_reports_message, | ||||
|     clean_html, | ||||
|     float_or_none, | ||||
| @@ -895,3 +899,162 @@ class NiconicoUserIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         list_id = self._match_id(url) | ||||
|         return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key()) | ||||
| 
 | ||||
| 
 | ||||
| class NiconicoLiveIE(InfoExtractor): | ||||
|     IE_NAME = 'niconico:live' | ||||
|     IE_DESC = 'ニコニコ生放送' | ||||
|     _VALID_URL = r'https?://(?:sp\.)?live2?\.nicovideo\.jp/(?:watch|gate)/(?P<id>lv\d+)' | ||||
|     _TESTS = [{ | ||||
|         'note': 'this test case includes invisible characters for title, pasting them as-is', | ||||
|         'url': 'https://live.nicovideo.jp/watch/lv339533123', | ||||
|         'info_dict': { | ||||
|             'id': 'lv339533123', | ||||
|             'title': '激辛ペヤング食べます( ;ᯅ; )(歌枠オーディション参加中)', | ||||
|             'view_count': 1526, | ||||
|             'comment_count': 1772, | ||||
|             'description': '初めましてもかって言います❕\nのんびり自由に適当に暮らしてます', | ||||
|             'uploader': 'もか', | ||||
|             'channel': 'ゲストさんのコミュニティ', | ||||
|             'channel_id': 'co5776900', | ||||
|             'channel_url': 'https://com.nicovideo.jp/community/co5776900', | ||||
|             'timestamp': 1670677328, | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         'skip': 'livestream', | ||||
|     }, { | ||||
|         'url': 'https://live2.nicovideo.jp/watch/lv339533123', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://sp.live.nicovideo.jp/watch/lv339533123', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://sp.live2.nicovideo.jp/watch/lv339533123', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
| 
 | ||||
|     _KNOWN_LATENCY = ('high', 'low') | ||||
| 
 | ||||
|     def _real_extract(self, url): | ||||
|         if not websockets: | ||||
|             raise ExtractorError('websockets library is not available. Please install it.', expected=True) | ||||
|         video_id = self._match_id(url) | ||||
|         webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id) | ||||
| 
 | ||||
|         embedded_data = self._parse_json(unescapeHTML(self._search_regex( | ||||
|             r'<script\s+id="embedded-data"\s*data-props="(.+?)"', webpage, 'embedded data')), video_id) | ||||
| 
 | ||||
|         ws_url = traverse_obj(embedded_data, ('site', 'relive', 'webSocketUrl')) | ||||
|         if not ws_url: | ||||
|             raise ExtractorError('The live hasn\'t started yet or already ended.', expected=True) | ||||
|         ws_url = update_url_query(ws_url, { | ||||
|             'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9', | ||||
|         }) | ||||
| 
 | ||||
|         hostname = remove_start(urlparse(urlh.geturl()).hostname, 'sp.') | ||||
|         cookies = try_get(urlh.geturl(), self._downloader._calc_cookies) | ||||
|         latency = try_get(self._configuration_arg('latency'), lambda x: x[0]) | ||||
|         if latency not in self._KNOWN_LATENCY: | ||||
|             latency = 'high' | ||||
| 
 | ||||
|         ws = WebSocketsWrapper(ws_url, { | ||||
|             'Cookies': str_or_none(cookies) or '', | ||||
|             'Origin': f'https://{hostname}', | ||||
|             'Accept': '*/*', | ||||
|             'User-Agent': self.get_param('http_headers')['User-Agent'], | ||||
|         }) | ||||
| 
 | ||||
|         self.write_debug('[debug] Sending HLS server request') | ||||
|         ws.send(json.dumps({ | ||||
|             'type': 'startWatching', | ||||
|             'data': { | ||||
|                 'stream': { | ||||
|                     'quality': 'abr', | ||||
|                     'protocol': 'hls+fmp4', | ||||
|                     'latency': latency, | ||||
|                     'chasePlay': False | ||||
|                 }, | ||||
|                 'room': { | ||||
|                     'protocol': 'webSocket', | ||||
|                     'commentable': True | ||||
|                 }, | ||||
|                 'reconnect': False, | ||||
|             } | ||||
|         })) | ||||
| 
 | ||||
|         while True: | ||||
|             recv = ws.recv() | ||||
|             if not recv: | ||||
|                 continue | ||||
|             data = json.loads(recv) | ||||
|             if not isinstance(data, dict): | ||||
|                 continue | ||||
|             if data.get('type') == 'stream': | ||||
|                 m3u8_url = data['data']['uri'] | ||||
|                 qualities = data['data']['availableQualities'] | ||||
|                 break | ||||
|             elif data.get('type') == 'disconnect': | ||||
|                 self.write_debug(recv) | ||||
|                 raise ExtractorError('Disconnected at middle of extraction') | ||||
|             elif data.get('type') == 'error': | ||||
|                 self.write_debug(recv) | ||||
|                 message = traverse_obj(data, ('body', 'code')) or recv | ||||
|                 raise ExtractorError(message) | ||||
|             elif self.get_param('verbose', False): | ||||
|                 if len(recv) > 100: | ||||
|                     recv = recv[:100] + '...' | ||||
|                 self.write_debug('Server said: %s' % recv) | ||||
| 
 | ||||
|         title = traverse_obj(embedded_data, ('program', 'title')) or self._html_search_meta( | ||||
|             ('og:title', 'twitter:title'), webpage, 'live title', fatal=False) | ||||
| 
 | ||||
|         raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail')) or {} | ||||
|         thumbnails = [] | ||||
|         for name, value in raw_thumbs.items(): | ||||
|             if not isinstance(value, dict): | ||||
|                 thumbnails.append({ | ||||
|                     'id': name, | ||||
|                     'url': value, | ||||
|                     **parse_resolution(value, lenient=True), | ||||
|                 }) | ||||
|                 continue | ||||
| 
 | ||||
|             for k, img_url in value.items(): | ||||
|                 res = parse_resolution(k, lenient=True) or parse_resolution(img_url, lenient=True) | ||||
|                 width, height = res.get('width'), res.get('height') | ||||
| 
 | ||||
|                 thumbnails.append({ | ||||
|                     'id': f'{name}_{width}x{height}', | ||||
|                     'url': img_url, | ||||
|                     **res, | ||||
|                 }) | ||||
| 
 | ||||
|         formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True) | ||||
|         for fmt, q in zip(formats, reversed(qualities[1:])): | ||||
|             fmt.update({ | ||||
|                 'format_id': q, | ||||
|                 'protocol': 'niconico_live', | ||||
|                 'ws': ws, | ||||
|                 'video_id': video_id, | ||||
|                 'cookies': cookies, | ||||
|                 'live_latency': latency, | ||||
|                 'origin': hostname, | ||||
|             }) | ||||
| 
 | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             **traverse_obj(embedded_data, { | ||||
|                 'view_count': ('program', 'statistics', 'watchCount'), | ||||
|                 'comment_count': ('program', 'statistics', 'commentCount'), | ||||
|                 'uploader': ('program', 'supplier', 'name'), | ||||
|                 'channel': ('socialGroup', 'name'), | ||||
|                 'channel_id': ('socialGroup', 'id'), | ||||
|                 'channel_url': ('socialGroup', 'socialGroupPageUrl'), | ||||
|             }), | ||||
|             'description': clean_html(traverse_obj(embedded_data, ('program', 'description'))), | ||||
|             'timestamp': int_or_none(traverse_obj(embedded_data, ('program', 'openTime'))), | ||||
|             'is_live': True, | ||||
|             'thumbnails': thumbnails, | ||||
|             'formats': formats, | ||||
|         } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Lesmiscore
					Lesmiscore