1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-27 08:58:30 +00:00

[ie/niconico:live] Fix extractor and downloader (#13158)

Authored by: doe1080
This commit is contained in:
doe1080 2025-06-27 02:45:03 +09:00 committed by GitHub
parent 99b85ac102
commit 06c1a8cdff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 66 additions and 69 deletions

View File

@ -5,47 +5,46 @@
from .common import FileDownloader
from .external import FFmpegFD
from ..networking import Request
from ..utils import DownloadError, str_or_none, try_get
from ..networking.websocket import WebSocketResponse
from ..utils import DownloadError, str_or_none, truncate_string
from ..utils.traversal import traverse_obj
class NiconicoLiveFD(FileDownloader):
""" Downloads niconico live without being stopped """
def real_download(self, filename, info_dict):
video_id = info_dict['video_id']
ws_url = info_dict['url']
ws_extractor = info_dict['ws']
ws_origin_host = info_dict['origin']
live_quality = info_dict.get('live_quality', 'high')
live_latency = info_dict.get('live_latency', 'high')
video_id = info_dict['id']
opts = info_dict['downloader_options']
quality, ws_extractor, ws_url = opts['max_quality'], opts['ws'], opts['ws_url']
dl = FFmpegFD(self.ydl, self.params or {})
new_info_dict = info_dict.copy()
new_info_dict.update({
'protocol': 'm3u8',
})
new_info_dict['protocol'] = 'm3u8'
def communicate_ws(reconnect):
if reconnect:
ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'}))
# Support --load-info-json as if it is a reconnect attempt
if reconnect or not isinstance(ws_extractor, WebSocketResponse):
ws = self.ydl.urlopen(Request(
ws_url, headers={'Origin': 'https://live.nicovideo.jp'}))
if self.ydl.params.get('verbose', False):
self.to_screen('[debug] Sending startWatching request')
self.write_debug('Sending startWatching request')
ws.send(json.dumps({
'type': 'startWatching',
'data': {
'reconnect': True,
'room': {
'commentable': True,
'protocol': 'webSocket',
},
'stream': {
'quality': live_quality,
'protocol': 'hls+fmp4',
'latency': live_latency,
'accessRightMethod': 'single_cookie',
'chasePlay': False,
'latency': 'high',
'protocol': 'hls',
'quality': quality,
},
'room': {
'protocol': 'webSocket',
'commentable': True,
},
'reconnect': True,
},
'type': 'startWatching',
}))
else:
ws = ws_extractor
@ -58,7 +57,6 @@ def communicate_ws(reconnect):
if not data or not isinstance(data, dict):
continue
if data.get('type') == 'ping':
# pong back
ws.send(r'{"type":"pong"}')
ws.send(r'{"type":"keepSeat"}')
elif data.get('type') == 'disconnect':
@ -66,12 +64,10 @@ def communicate_ws(reconnect):
return True
elif data.get('type') == 'error':
self.write_debug(data)
message = try_get(data, lambda x: x['body']['code'], str) or recv
message = traverse_obj(data, ('body', 'code', {str_or_none}), default=recv)
return DownloadError(message)
elif self.ydl.params.get('verbose', False):
if len(recv) > 100:
recv = recv[:100] + '...'
self.to_screen(f'[debug] Server said: {recv}')
self.write_debug(f'Server response: {truncate_string(recv, 100)}')
def ws_main():
reconnect = False
@ -81,7 +77,8 @@ def ws_main():
if ret is True:
return
except BaseException as e:
self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e)))
self.to_screen(
f'[niconico:live] {video_id}: Connection error occured, reconnecting after 10 seconds: {e}')
time.sleep(10)
continue
finally:

View File

@ -263,6 +263,9 @@ class InfoExtractor:
* http_chunk_size Chunk size for HTTP downloads
* ffmpeg_args Extra arguments for ffmpeg downloader (input)
* ffmpeg_args_out Extra arguments for ffmpeg downloader (output)
* ws (NiconicoLiveFD only) WebSocketResponse
* ws_url (NiconicoLiveFD only) Websockets URL
* max_quality (NiconicoLiveFD only) Max stream quality string
* is_dash_periods Whether the format is a result of merging
multiple DASH periods.
RTMP formats can also have the additional fields: page_url,

View File

@ -4,16 +4,15 @@
import json
import re
import time
import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
from ..networking import Request
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
OnDemandPagedList,
clean_html,
determine_ext,
extract_attributes,
float_or_none,
int_or_none,
parse_bitrate,
@ -22,9 +21,8 @@
parse_qs,
parse_resolution,
qualities,
remove_start,
str_or_none,
unescapeHTML,
truncate_string,
unified_timestamp,
update_url_query,
url_basename,
@ -32,7 +30,11 @@
urlencode_postdata,
urljoin,
)
from ..utils.traversal import find_element, require, traverse_obj
from ..utils.traversal import (
find_element,
require,
traverse_obj,
)
class NiconicoBaseIE(InfoExtractor):
@ -806,41 +808,39 @@ class NiconicoLiveIE(NiconicoBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id)
webpage = self._download_webpage(url, video_id, expected_status=404)
if err_msg := traverse_obj(webpage, ({find_element(cls='message')}, {clean_html})):
raise ExtractorError(err_msg, expected=True)
embedded_data = self._parse_json(unescapeHTML(self._search_regex(
r'<script\s+id="embedded-data"\s*data-props="(.+?)"', webpage, 'embedded data')), video_id)
ws_url = traverse_obj(embedded_data, ('site', 'relive', 'webSocketUrl'))
if not ws_url:
raise ExtractorError('The live hasn\'t started yet or already ended.', expected=True)
ws_url = update_url_query(ws_url, {
'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9',
})
hostname = remove_start(urllib.parse.urlparse(urlh.url).hostname, 'sp.')
embedded_data = traverse_obj(webpage, (
{find_element(tag='script', id='embedded-data', html=True)},
{extract_attributes}, 'data-props', {json.loads}))
frontend_id = traverse_obj(embedded_data, ('site', 'frontendId', {str_or_none}), default='9')
ws_url = traverse_obj(embedded_data, (
'site', 'relive', 'webSocketUrl', {url_or_none}, {require('websocket URL')}))
ws_url = update_url_query(ws_url, {'frontend_id': frontend_id})
ws = self._request_webpage(
Request(ws_url, headers={'Origin': f'https://{hostname}'}),
video_id=video_id, note='Connecting to WebSocket server')
ws_url, video_id, 'Connecting to WebSocket server',
headers={'Origin': 'https://live.nicovideo.jp'})
self.write_debug('Sending HLS server request')
ws.send(json.dumps({
'type': 'startWatching',
'data': {
'reconnect': False,
'room': {
'commentable': True,
'protocol': 'webSocket',
},
'stream': {
'quality': 'abr',
'protocol': 'hls',
'latency': 'high',
'accessRightMethod': 'single_cookie',
'chasePlay': False,
'latency': 'high',
'protocol': 'hls',
'quality': 'abr',
},
'room': {
'protocol': 'webSocket',
'commentable': True,
},
'reconnect': False,
},
'type': 'startWatching',
}))
while True:
@ -860,17 +860,15 @@ def _real_extract(self, url):
raise ExtractorError('Disconnected at middle of extraction')
elif data.get('type') == 'error':
self.write_debug(recv)
message = traverse_obj(data, ('body', 'code')) or recv
message = traverse_obj(data, ('body', 'code', {str_or_none}), default=recv)
raise ExtractorError(message)
elif self.get_param('verbose', False):
if len(recv) > 100:
recv = recv[:100] + '...'
self.write_debug(f'Server said: {recv}')
self.write_debug(f'Server response: {truncate_string(recv, 100)}')
title = traverse_obj(embedded_data, ('program', 'title')) or self._html_search_meta(
('og:title', 'twitter:title'), webpage, 'live title', fatal=False)
raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail')) or {}
raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail', {dict})) or {}
thumbnails = []
for name, value in raw_thumbs.items():
if not isinstance(value, dict):
@ -897,31 +895,30 @@ def _real_extract(self, url):
cookie['domain'], cookie['name'], cookie['value'],
expire_time=unified_timestamp(cookie.get('expires')), path=cookie['path'], secure=cookie['secure'])
fmt_common = {
'live_latency': 'high',
'origin': hostname,
'protocol': 'niconico_live',
'video_id': video_id,
'ws': ws,
}
q_iter = (q for q in qualities[1:] if not q.startswith('audio_')) # ignore initial 'abr'
a_map = {96: 'audio_low', 192: 'audio_high'}
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
for fmt in formats:
fmt['protocol'] = 'niconico_live'
if fmt.get('acodec') == 'none':
fmt['format_id'] = next(q_iter, fmt['format_id'])
elif fmt.get('vcodec') == 'none':
abr = parse_bitrate(fmt['url'].lower())
fmt.update({
'abr': abr,
'acodec': 'mp4a.40.2',
'format_id': a_map.get(abr, fmt['format_id']),
})
fmt.update(fmt_common)
return {
'id': video_id,
'title': title,
'downloader_options': {
'max_quality': traverse_obj(embedded_data, ('program', 'stream', 'maxQuality', {str})) or 'normal',
'ws': ws,
'ws_url': ws_url,
},
**traverse_obj(embedded_data, {
'view_count': ('program', 'statistics', 'watchCount'),
'comment_count': ('program', 'statistics', 'commentCount'),