mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-27 17:08:32 +00:00
[ie/niconico:live] Fix extractor and downloader (#13158)
Authored by: doe1080
This commit is contained in:
parent
99b85ac102
commit
06c1a8cdff
@ -5,47 +5,46 @@
|
|||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from .external import FFmpegFD
|
from .external import FFmpegFD
|
||||||
from ..networking import Request
|
from ..networking import Request
|
||||||
from ..utils import DownloadError, str_or_none, try_get
|
from ..networking.websocket import WebSocketResponse
|
||||||
|
from ..utils import DownloadError, str_or_none, truncate_string
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class NiconicoLiveFD(FileDownloader):
|
class NiconicoLiveFD(FileDownloader):
|
||||||
""" Downloads niconico live without being stopped """
|
""" Downloads niconico live without being stopped """
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
def real_download(self, filename, info_dict):
|
||||||
video_id = info_dict['video_id']
|
video_id = info_dict['id']
|
||||||
ws_url = info_dict['url']
|
opts = info_dict['downloader_options']
|
||||||
ws_extractor = info_dict['ws']
|
quality, ws_extractor, ws_url = opts['max_quality'], opts['ws'], opts['ws_url']
|
||||||
ws_origin_host = info_dict['origin']
|
|
||||||
live_quality = info_dict.get('live_quality', 'high')
|
|
||||||
live_latency = info_dict.get('live_latency', 'high')
|
|
||||||
dl = FFmpegFD(self.ydl, self.params or {})
|
dl = FFmpegFD(self.ydl, self.params or {})
|
||||||
|
|
||||||
new_info_dict = info_dict.copy()
|
new_info_dict = info_dict.copy()
|
||||||
new_info_dict.update({
|
new_info_dict['protocol'] = 'm3u8'
|
||||||
'protocol': 'm3u8',
|
|
||||||
})
|
|
||||||
|
|
||||||
def communicate_ws(reconnect):
|
def communicate_ws(reconnect):
|
||||||
if reconnect:
|
# Support --load-info-json as if it is a reconnect attempt
|
||||||
ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'}))
|
if reconnect or not isinstance(ws_extractor, WebSocketResponse):
|
||||||
|
ws = self.ydl.urlopen(Request(
|
||||||
|
ws_url, headers={'Origin': 'https://live.nicovideo.jp'}))
|
||||||
if self.ydl.params.get('verbose', False):
|
if self.ydl.params.get('verbose', False):
|
||||||
self.to_screen('[debug] Sending startWatching request')
|
self.write_debug('Sending startWatching request')
|
||||||
ws.send(json.dumps({
|
ws.send(json.dumps({
|
||||||
'type': 'startWatching',
|
|
||||||
'data': {
|
'data': {
|
||||||
|
'reconnect': True,
|
||||||
|
'room': {
|
||||||
|
'commentable': True,
|
||||||
|
'protocol': 'webSocket',
|
||||||
|
},
|
||||||
'stream': {
|
'stream': {
|
||||||
'quality': live_quality,
|
|
||||||
'protocol': 'hls+fmp4',
|
|
||||||
'latency': live_latency,
|
|
||||||
'accessRightMethod': 'single_cookie',
|
'accessRightMethod': 'single_cookie',
|
||||||
'chasePlay': False,
|
'chasePlay': False,
|
||||||
|
'latency': 'high',
|
||||||
|
'protocol': 'hls',
|
||||||
|
'quality': quality,
|
||||||
},
|
},
|
||||||
'room': {
|
|
||||||
'protocol': 'webSocket',
|
|
||||||
'commentable': True,
|
|
||||||
},
|
|
||||||
'reconnect': True,
|
|
||||||
},
|
},
|
||||||
|
'type': 'startWatching',
|
||||||
}))
|
}))
|
||||||
else:
|
else:
|
||||||
ws = ws_extractor
|
ws = ws_extractor
|
||||||
@ -58,7 +57,6 @@ def communicate_ws(reconnect):
|
|||||||
if not data or not isinstance(data, dict):
|
if not data or not isinstance(data, dict):
|
||||||
continue
|
continue
|
||||||
if data.get('type') == 'ping':
|
if data.get('type') == 'ping':
|
||||||
# pong back
|
|
||||||
ws.send(r'{"type":"pong"}')
|
ws.send(r'{"type":"pong"}')
|
||||||
ws.send(r'{"type":"keepSeat"}')
|
ws.send(r'{"type":"keepSeat"}')
|
||||||
elif data.get('type') == 'disconnect':
|
elif data.get('type') == 'disconnect':
|
||||||
@ -66,12 +64,10 @@ def communicate_ws(reconnect):
|
|||||||
return True
|
return True
|
||||||
elif data.get('type') == 'error':
|
elif data.get('type') == 'error':
|
||||||
self.write_debug(data)
|
self.write_debug(data)
|
||||||
message = try_get(data, lambda x: x['body']['code'], str) or recv
|
message = traverse_obj(data, ('body', 'code', {str_or_none}), default=recv)
|
||||||
return DownloadError(message)
|
return DownloadError(message)
|
||||||
elif self.ydl.params.get('verbose', False):
|
elif self.ydl.params.get('verbose', False):
|
||||||
if len(recv) > 100:
|
self.write_debug(f'Server response: {truncate_string(recv, 100)}')
|
||||||
recv = recv[:100] + '...'
|
|
||||||
self.to_screen(f'[debug] Server said: {recv}')
|
|
||||||
|
|
||||||
def ws_main():
|
def ws_main():
|
||||||
reconnect = False
|
reconnect = False
|
||||||
@ -81,7 +77,8 @@ def ws_main():
|
|||||||
if ret is True:
|
if ret is True:
|
||||||
return
|
return
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e)))
|
self.to_screen(
|
||||||
|
f'[niconico:live] {video_id}: Connection error occured, reconnecting after 10 seconds: {e}')
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
continue
|
continue
|
||||||
finally:
|
finally:
|
||||||
|
@ -263,6 +263,9 @@ class InfoExtractor:
|
|||||||
* http_chunk_size Chunk size for HTTP downloads
|
* http_chunk_size Chunk size for HTTP downloads
|
||||||
* ffmpeg_args Extra arguments for ffmpeg downloader (input)
|
* ffmpeg_args Extra arguments for ffmpeg downloader (input)
|
||||||
* ffmpeg_args_out Extra arguments for ffmpeg downloader (output)
|
* ffmpeg_args_out Extra arguments for ffmpeg downloader (output)
|
||||||
|
* ws (NiconicoLiveFD only) WebSocketResponse
|
||||||
|
* ws_url (NiconicoLiveFD only) Websockets URL
|
||||||
|
* max_quality (NiconicoLiveFD only) Max stream quality string
|
||||||
* is_dash_periods Whether the format is a result of merging
|
* is_dash_periods Whether the format is a result of merging
|
||||||
multiple DASH periods.
|
multiple DASH periods.
|
||||||
RTMP formats can also have the additional fields: page_url,
|
RTMP formats can also have the additional fields: page_url,
|
||||||
|
@ -4,16 +4,15 @@
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import urllib.parse
|
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
from ..networking import Request
|
|
||||||
from ..networking.exceptions import HTTPError
|
from ..networking.exceptions import HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
clean_html,
|
clean_html,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
extract_attributes,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_bitrate,
|
parse_bitrate,
|
||||||
@ -22,9 +21,8 @@
|
|||||||
parse_qs,
|
parse_qs,
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
qualities,
|
qualities,
|
||||||
remove_start,
|
|
||||||
str_or_none,
|
str_or_none,
|
||||||
unescapeHTML,
|
truncate_string,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_basename,
|
url_basename,
|
||||||
@ -32,7 +30,11 @@
|
|||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
from ..utils.traversal import find_element, require, traverse_obj
|
from ..utils.traversal import (
|
||||||
|
find_element,
|
||||||
|
require,
|
||||||
|
traverse_obj,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class NiconicoBaseIE(InfoExtractor):
|
class NiconicoBaseIE(InfoExtractor):
|
||||||
@ -806,41 +808,39 @@ class NiconicoLiveIE(NiconicoBaseIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage, urlh = self._download_webpage_handle(f'https://live.nicovideo.jp/watch/{video_id}', video_id)
|
webpage = self._download_webpage(url, video_id, expected_status=404)
|
||||||
|
if err_msg := traverse_obj(webpage, ({find_element(cls='message')}, {clean_html})):
|
||||||
|
raise ExtractorError(err_msg, expected=True)
|
||||||
|
|
||||||
embedded_data = self._parse_json(unescapeHTML(self._search_regex(
|
embedded_data = traverse_obj(webpage, (
|
||||||
r'<script\s+id="embedded-data"\s*data-props="(.+?)"', webpage, 'embedded data')), video_id)
|
{find_element(tag='script', id='embedded-data', html=True)},
|
||||||
|
{extract_attributes}, 'data-props', {json.loads}))
|
||||||
ws_url = traverse_obj(embedded_data, ('site', 'relive', 'webSocketUrl'))
|
frontend_id = traverse_obj(embedded_data, ('site', 'frontendId', {str_or_none}), default='9')
|
||||||
if not ws_url:
|
|
||||||
raise ExtractorError('The live hasn\'t started yet or already ended.', expected=True)
|
|
||||||
ws_url = update_url_query(ws_url, {
|
|
||||||
'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9',
|
|
||||||
})
|
|
||||||
|
|
||||||
hostname = remove_start(urllib.parse.urlparse(urlh.url).hostname, 'sp.')
|
|
||||||
|
|
||||||
|
ws_url = traverse_obj(embedded_data, (
|
||||||
|
'site', 'relive', 'webSocketUrl', {url_or_none}, {require('websocket URL')}))
|
||||||
|
ws_url = update_url_query(ws_url, {'frontend_id': frontend_id})
|
||||||
ws = self._request_webpage(
|
ws = self._request_webpage(
|
||||||
Request(ws_url, headers={'Origin': f'https://{hostname}'}),
|
ws_url, video_id, 'Connecting to WebSocket server',
|
||||||
video_id=video_id, note='Connecting to WebSocket server')
|
headers={'Origin': 'https://live.nicovideo.jp'})
|
||||||
|
|
||||||
self.write_debug('Sending HLS server request')
|
self.write_debug('Sending HLS server request')
|
||||||
ws.send(json.dumps({
|
ws.send(json.dumps({
|
||||||
'type': 'startWatching',
|
|
||||||
'data': {
|
'data': {
|
||||||
|
'reconnect': False,
|
||||||
|
'room': {
|
||||||
|
'commentable': True,
|
||||||
|
'protocol': 'webSocket',
|
||||||
|
},
|
||||||
'stream': {
|
'stream': {
|
||||||
'quality': 'abr',
|
|
||||||
'protocol': 'hls',
|
|
||||||
'latency': 'high',
|
|
||||||
'accessRightMethod': 'single_cookie',
|
'accessRightMethod': 'single_cookie',
|
||||||
'chasePlay': False,
|
'chasePlay': False,
|
||||||
|
'latency': 'high',
|
||||||
|
'protocol': 'hls',
|
||||||
|
'quality': 'abr',
|
||||||
},
|
},
|
||||||
'room': {
|
|
||||||
'protocol': 'webSocket',
|
|
||||||
'commentable': True,
|
|
||||||
},
|
|
||||||
'reconnect': False,
|
|
||||||
},
|
},
|
||||||
|
'type': 'startWatching',
|
||||||
}))
|
}))
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
@ -860,17 +860,15 @@ def _real_extract(self, url):
|
|||||||
raise ExtractorError('Disconnected at middle of extraction')
|
raise ExtractorError('Disconnected at middle of extraction')
|
||||||
elif data.get('type') == 'error':
|
elif data.get('type') == 'error':
|
||||||
self.write_debug(recv)
|
self.write_debug(recv)
|
||||||
message = traverse_obj(data, ('body', 'code')) or recv
|
message = traverse_obj(data, ('body', 'code', {str_or_none}), default=recv)
|
||||||
raise ExtractorError(message)
|
raise ExtractorError(message)
|
||||||
elif self.get_param('verbose', False):
|
elif self.get_param('verbose', False):
|
||||||
if len(recv) > 100:
|
self.write_debug(f'Server response: {truncate_string(recv, 100)}')
|
||||||
recv = recv[:100] + '...'
|
|
||||||
self.write_debug(f'Server said: {recv}')
|
|
||||||
|
|
||||||
title = traverse_obj(embedded_data, ('program', 'title')) or self._html_search_meta(
|
title = traverse_obj(embedded_data, ('program', 'title')) or self._html_search_meta(
|
||||||
('og:title', 'twitter:title'), webpage, 'live title', fatal=False)
|
('og:title', 'twitter:title'), webpage, 'live title', fatal=False)
|
||||||
|
|
||||||
raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail')) or {}
|
raw_thumbs = traverse_obj(embedded_data, ('program', 'thumbnail', {dict})) or {}
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for name, value in raw_thumbs.items():
|
for name, value in raw_thumbs.items():
|
||||||
if not isinstance(value, dict):
|
if not isinstance(value, dict):
|
||||||
@ -897,31 +895,30 @@ def _real_extract(self, url):
|
|||||||
cookie['domain'], cookie['name'], cookie['value'],
|
cookie['domain'], cookie['name'], cookie['value'],
|
||||||
expire_time=unified_timestamp(cookie.get('expires')), path=cookie['path'], secure=cookie['secure'])
|
expire_time=unified_timestamp(cookie.get('expires')), path=cookie['path'], secure=cookie['secure'])
|
||||||
|
|
||||||
fmt_common = {
|
|
||||||
'live_latency': 'high',
|
|
||||||
'origin': hostname,
|
|
||||||
'protocol': 'niconico_live',
|
|
||||||
'video_id': video_id,
|
|
||||||
'ws': ws,
|
|
||||||
}
|
|
||||||
q_iter = (q for q in qualities[1:] if not q.startswith('audio_')) # ignore initial 'abr'
|
q_iter = (q for q in qualities[1:] if not q.startswith('audio_')) # ignore initial 'abr'
|
||||||
a_map = {96: 'audio_low', 192: 'audio_high'}
|
a_map = {96: 'audio_low', 192: 'audio_high'}
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
|
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True)
|
||||||
for fmt in formats:
|
for fmt in formats:
|
||||||
|
fmt['protocol'] = 'niconico_live'
|
||||||
if fmt.get('acodec') == 'none':
|
if fmt.get('acodec') == 'none':
|
||||||
fmt['format_id'] = next(q_iter, fmt['format_id'])
|
fmt['format_id'] = next(q_iter, fmt['format_id'])
|
||||||
elif fmt.get('vcodec') == 'none':
|
elif fmt.get('vcodec') == 'none':
|
||||||
abr = parse_bitrate(fmt['url'].lower())
|
abr = parse_bitrate(fmt['url'].lower())
|
||||||
fmt.update({
|
fmt.update({
|
||||||
'abr': abr,
|
'abr': abr,
|
||||||
|
'acodec': 'mp4a.40.2',
|
||||||
'format_id': a_map.get(abr, fmt['format_id']),
|
'format_id': a_map.get(abr, fmt['format_id']),
|
||||||
})
|
})
|
||||||
fmt.update(fmt_common)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
'downloader_options': {
|
||||||
|
'max_quality': traverse_obj(embedded_data, ('program', 'stream', 'maxQuality', {str})) or 'normal',
|
||||||
|
'ws': ws,
|
||||||
|
'ws_url': ws_url,
|
||||||
|
},
|
||||||
**traverse_obj(embedded_data, {
|
**traverse_obj(embedded_data, {
|
||||||
'view_count': ('program', 'statistics', 'watchCount'),
|
'view_count': ('program', 'statistics', 'watchCount'),
|
||||||
'comment_count': ('program', 'statistics', 'commentCount'),
|
'comment_count': ('program', 'statistics', 'commentCount'),
|
||||||
|
Loading…
Reference in New Issue
Block a user