1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-27 17:08:32 +00:00
This commit is contained in:
doe1080 2025-06-27 11:10:59 +09:00 committed by GitHub
commit cb402776ca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -3,7 +3,6 @@
import itertools import itertools
import json import json
import re import re
import time
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
@ -16,12 +15,12 @@
float_or_none, float_or_none,
int_or_none, int_or_none,
parse_bitrate, parse_bitrate,
parse_duration,
parse_iso8601, parse_iso8601,
parse_qs, parse_qs,
parse_resolution, parse_resolution,
qualities, qualities,
str_or_none, str_or_none,
time_seconds,
truncate_string, truncate_string,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
@ -38,8 +37,14 @@
class NiconicoBaseIE(InfoExtractor): class NiconicoBaseIE(InfoExtractor):
_API_BASE = 'https://nvapi.nicovideo.jp'
_BASE_URL = 'https://www.nicovideo.jp'
_GEO_BYPASS = False _GEO_BYPASS = False
_GEO_COUNTRIES = ['JP'] _GEO_COUNTRIES = ['JP']
_HEADERS = {
'X-Frontend-ID': '6',
'X-Frontend-Version': '0',
}
_LOGIN_BASE = 'https://account.nicovideo.jp' _LOGIN_BASE = 'https://account.nicovideo.jp'
_NETRC_MACHINE = 'niconico' _NETRC_MACHINE = 'niconico'
@ -99,6 +104,42 @@ class NiconicoIE(NiconicoBaseIE):
IE_NAME = 'niconico' IE_NAME = 'niconico'
IE_DESC = 'ニコニコ動画' IE_DESC = 'ニコニコ動画'
_VALID_URL = r'https?://(?:(?:embed|sp|www)\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?\d+)'
_ERROR_MAP = {
'FORBIDDEN': {
'ADMINISTRATOR_DELETE_VIDEO': 'Video unavailable, possibly removed by admins',
'CHANNEL_MEMBER_ONLY': 'Channel members only',
'DELETED_CHANNEL_VIDEO': 'Video unavailable, channel was closed',
'DELETED_COMMUNITY_VIDEO': 'Video unavailable, community deleted or missing',
'DEFAULT': 'Page unavailable, check the URL',
'HARMFUL_VIDEO': 'Sensitive content, login required',
'HIDDEN_VIDEO': 'Video unavailable, set to private',
'NOT_ALLOWED': 'No parmission',
'PPV_VIDEO': 'PPV video, payment information required',
'PREMIUM_ONLY': 'Premium members only',
},
'INVALID_PARAMETER': {
'DEFAULT': 'Video unavailable, may not exist or was deleted',
},
'MAINTENANCE': {
'DEFAULT': 'Maintenance is in progress',
},
'NOT_FOUND': {
'DEFAULT': 'Video unavailable, may not exist or was deleted',
'RIGHT_HOLDER_DELETE_VIDEO': 'Removed by rights-holder request',
},
'UNAUTHORIZED': {
'DEFAULT': 'Invalid session, re-login required',
},
'UNKNOWN': {
'DEFAULT': 'Failed to fetch content',
},
}
_STATUS_MAP = {
'needs_auth': 'PPV video, payment information required',
'premium_only': 'Premium members only',
'subscriber_only': 'Channel members only',
}
_TESTS = [{ _TESTS = [{
'url': 'http://www.nicovideo.jp/watch/sm22312215', 'url': 'http://www.nicovideo.jp/watch/sm22312215',
'info_dict': { 'info_dict': {
@ -118,6 +159,7 @@ class NiconicoIE(NiconicoBaseIE):
'tags': [], 'tags': [],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'skip': 'temporarily skip to keep the diff small',
}, { }, {
# File downloaded with and without credentials are different, so omit # File downloaded with and without credentials are different, so omit
# the md5 field # the md5 field
@ -139,6 +181,7 @@ class NiconicoIE(NiconicoBaseIE):
'tags': ['Translation_Request', 'Kagamine_Rin', 'Rin_Original'], 'tags': ['Translation_Request', 'Kagamine_Rin', 'Rin_Original'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'skip': 'temporarily skip to keep the diff small',
}, { }, {
# 'video exists but is marked as "deleted" # 'video exists but is marked as "deleted"
# md5 is unstable # md5 is unstable
@ -189,6 +232,7 @@ class NiconicoIE(NiconicoBaseIE):
'tags': [], 'tags': [],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'skip': 'temporarily skip to keep the diff small',
}, { }, {
# "New" HTML5 video # "New" HTML5 video
'url': 'http://www.nicovideo.jp/watch/sm31464864', 'url': 'http://www.nicovideo.jp/watch/sm31464864',
@ -209,6 +253,7 @@ class NiconicoIE(NiconicoBaseIE):
'tags': [], 'tags': [],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'skip': 'temporarily skip to keep the diff small',
}, { }, {
# Video without owner # Video without owner
'url': 'http://www.nicovideo.jp/watch/sm18238488', 'url': 'http://www.nicovideo.jp/watch/sm18238488',
@ -227,6 +272,7 @@ class NiconicoIE(NiconicoBaseIE):
'tags': [], 'tags': [],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'skip': 'temporarily skip to keep the diff small',
}, { }, {
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg', 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
'only_matching': True, 'only_matching': True,
@ -236,9 +282,7 @@ class NiconicoIE(NiconicoBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
_VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)' def _extract_formats(self, api_data, video_id):
def _yield_dms_formats(self, api_data, video_id):
fmt_filter = lambda _, v: v['isAvailable'] and v['id'] fmt_filter = lambda _, v: v['isAvailable'] and v['id']
videos = traverse_obj(api_data, ('media', 'domand', 'videos', fmt_filter)) videos = traverse_obj(api_data, ('media', 'domand', 'videos', fmt_filter))
audios = traverse_obj(api_data, ('media', 'domand', 'audios', fmt_filter)) audios = traverse_obj(api_data, ('media', 'domand', 'audios', fmt_filter))
@ -247,164 +291,137 @@ def _yield_dms_formats(self, api_data, video_id):
if not all((videos, audios, access_key, track_id)): if not all((videos, audios, access_key, track_id)):
return return
dms_m3u8_url = self._download_json( m3u8_url = self._download_json(
f'https://nvapi.nicovideo.jp/v1/watch/{video_id}/access-rights/hls', video_id, f'{self._API_BASE}/v1/watch/{video_id}/access-rights/hls',
data=json.dumps({ video_id, headers={
'Accept': 'application/json;charset=utf-8',
'Content-Type': 'application/json',
'X-Access-Right-Key': access_key,
'X-Request-With': self._BASE_URL,
**self._HEADERS,
}, query={
'actionTrackId': track_id,
}, data=json.dumps({
'outputs': list(itertools.product((v['id'] for v in videos), (a['id'] for a in audios))), 'outputs': list(itertools.product((v['id'] for v in videos), (a['id'] for a in audios))),
}).encode(), query={'actionTrackId': track_id}, headers={ }).encode(),
'x-access-right-key': access_key, )['data']['contentUrl']
'x-frontend-id': 6, raw_fmts = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
'x-frontend-version': 0,
'x-request-with': 'https://www.nicovideo.jp',
})['data']['contentUrl']
# Getting all audio formats results in duplicate video formats which we filter out later
dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id, 'mp4')
# m3u8 extraction does not provide audio bitrates, so extract from the API data and fix formats = []
for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'): for a_fmt in traverse_obj(raw_fmts, lambda _, v: v['vcodec'] == 'none'):
yield { formats.append({
**audio_fmt, **a_fmt,
**traverse_obj(audios, (lambda _, v: audio_fmt['format_id'].startswith(v['id']), { **traverse_obj(audios, (lambda _, v: a_fmt['format_id'].startswith(v['id']), {
'format_id': ('id', {str}),
'abr': ('bitRate', {float_or_none(scale=1000)}), 'abr': ('bitRate', {float_or_none(scale=1000)}),
'asr': ('samplingRate', {int_or_none}), 'asr': ('samplingRate', {int_or_none}),
'format_id': ('id', {str}),
'quality': ('qualityLevel', {int_or_none}), 'quality': ('qualityLevel', {int_or_none}),
}), get_all=False), }, any)),
'acodec': 'aac', 'acodec': 'aac',
} })
# Sort before removing dupes to keep the format dicts with the lowest tbr # Sort first, keeping the lowest-tbr formats
video_fmts = sorted((fmt for fmt in dms_fmts if fmt['vcodec'] != 'none'), key=lambda f: f['tbr']) v_fmts = sorted((fmt for fmt in raw_fmts if fmt['vcodec'] != 'none'), key=lambda f: f['tbr'])
self._remove_duplicate_formats(video_fmts) self._remove_duplicate_formats(v_fmts)
# Calculate the true vbr/tbr by subtracting the lowest abr # Calculate the true vbr/tbr by subtracting the lowest abr
min_abr = min(traverse_obj(audios, (..., 'bitRate', {float_or_none})), default=0) / 1000 min_abr = traverse_obj(audios, (
for video_fmt in video_fmts: ..., 'bitRate', {float_or_none(scale=1000)}, all, {min}), default=0)
video_fmt['tbr'] -= min_abr for v_fmt in v_fmts:
video_fmt['format_id'] = url_basename(video_fmt['url']).rpartition('.')[0] v_fmt['format_id'] = url_basename(v_fmt['url']).rpartition('.')[0]
video_fmt['quality'] = traverse_obj(videos, ( v_fmt['quality'] = traverse_obj(videos, (
lambda _, v: v['id'] == video_fmt['format_id'], 'qualityLevel', {int_or_none}, any)) or -1 lambda _, v: v['id'] == v_fmt['format_id'], 'qualityLevel', {int_or_none}, any), default=-1)
yield video_fmt v_fmt['tbr'] -= min_abr
formats.extend(v_fmts)
def _extract_server_response(self, webpage, video_id, fatal=True): return formats
try:
return traverse_obj(
self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id),
('data', 'response', {dict}, {require('server response')}))
except ExtractorError:
if not fatal:
return {}
raise
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
try: path = 'v3' if self.is_logged_in else 'v3_guest'
webpage, handle = self._download_webpage_handle( api_resp = self._download_json(
f'https://www.nicovideo.jp/watch/{video_id}', video_id, f'{self._BASE_URL}/api/watch/{path}/{video_id}', video_id,
headers=self.geo_verification_headers()) 'Downloading API JSON', 'Unable to fetch data', headers={
if video_id.startswith('so'): **self._HEADERS,
video_id = self._match_id(handle.url) **self.geo_verification_headers(),
}, query={
'actionTrackId': f'AAAAAAAAAA_{round(time_seconds() * 1000)}',
}, expected_status=[400, 404])
api_data = self._extract_server_response(webpage, video_id) api_data = api_resp['data']
except ExtractorError as e: release_timestamp = traverse_obj(api_data, ('publishScheduledAt', {parse_iso8601}))
try:
api_data = self._download_json(
f'https://www.nicovideo.jp/api/watch/v3/{video_id}', video_id,
'Downloading API JSON', 'Unable to fetch data', query={
'_frontendId': '6',
'_frontendVersion': '0',
'actionTrackId': f'AAAAAAAAAA_{round(time.time() * 1000)}',
}, headers=self.geo_verification_headers())['data']
except ExtractorError:
if not isinstance(e.cause, HTTPError):
# Raise if original exception was from _parse_json or utils.traversal.require
raise
# The webpage server response has more detailed error info than the API response
webpage = e.cause.response.read().decode('utf-8', 'replace')
reason_code = self._extract_server_response(
webpage, video_id, fatal=False).get('reasonCode')
if not reason_code:
raise
if reason_code in ('DOMESTIC_VIDEO', 'HIGH_RISK_COUNTRY_VIDEO'):
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
elif reason_code == 'HIDDEN_VIDEO':
raise ExtractorError(
'The viewing period of this video has expired', expected=True)
elif reason_code == 'DELETED_VIDEO':
raise ExtractorError('This video has been deleted', expected=True)
raise ExtractorError(f'Niconico says: {reason_code}')
availability = self._availability(**(traverse_obj(api_data, ('payment', 'video', { meta = api_resp['meta']
'needs_premium': ('isPremium', {bool}), if meta.get('status') != 200:
'needs_subscription': ('isAdmission', {bool}), err_code = meta['errorCode']
})) or {'needs_auth': True})) reason_code = traverse_obj(api_data, ('reasonCode', {str_or_none}))
err_msg = 'Server busy, service temporarily unavailable'
formats = list(self._yield_dms_formats(api_data, video_id)) if reason_code in ('DOMESTIC_VIDEO', 'HIGH_RISK_COUNTRY_VIDEO'):
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
elif reason_code == 'HARMFUL_VIDEO' and traverse_obj(api_data, (
'viewer', 'allowSensitiveContents', {bool},
)) is False:
err_msg = 'Sensitive content, adjust display settings to watch'
elif reason_code == 'HIDDEN_VIDEO' and release_timestamp:
err_msg = f'Scheduled release, please wait. Release time: {release_timestamp}'
elif msg := traverse_obj(self._ERROR_MAP, (
err_code.upper(), (reason_code, 'DEFAULT'), {str}, any,
)):
err_msg = msg
raise ExtractorError(err_msg, expected=True)
availability = self._availability(**{
**dict.fromkeys(('is_private', 'is_unlisted'), False),
**traverse_obj(api_data, ('payment', 'video', {
'needs_auth': (('isContinuationBenefit', 'isPpv'), {bool}, any),
'needs_premium': ('isPremium', {bool}),
'needs_subscription': ('isAdmission', {bool}),
})),
})
formats = self._extract_formats(api_data, video_id)
if not formats: if not formats:
fail_msg = clean_html(self._html_search_regex( if err_msg := self._STATUS_MAP.get(availability):
r'<p[^>]+\bclass="fail-message"[^>]*>(?P<msg>.+?)</p>', self.raise_login_required(err_msg, metadata_available=True)
webpage, 'fail message', default=None, group='msg'))
if fail_msg:
self.to_screen(f'Niconico said: {fail_msg}')
if fail_msg and 'された地域と同じ地域からのみ視聴できます。' in fail_msg:
availability = None
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
elif availability == 'premium_only':
self.raise_login_required('This video requires premium', metadata_available=True)
elif availability == 'subscriber_only':
self.raise_login_required('This video is for members only', metadata_available=True)
elif availability == 'needs_auth':
self.raise_login_required(metadata_available=False)
# Start extracting information
tags = None
if webpage:
# use og:video:tag (not logged in)
og_video_tags = re.finditer(r'<meta\s+property="og:video:tag"\s*content="(.*?)">', webpage)
tags = list(filter(None, (clean_html(x.group(1)) for x in og_video_tags)))
if not tags:
# use keywords and split with comma (not logged in)
kwds = self._html_search_meta('keywords', webpage, default=None)
if kwds:
tags = [x for x in kwds.split(',') if x]
if not tags:
# find in json (logged in)
tags = traverse_obj(api_data, ('tag', 'items', ..., 'name'))
thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp']) thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp'])
def get_video_info(*items, get_first=True, **kwargs):
return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs)
return { return {
'id': video_id,
'_api_data': api_data,
'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None),
'formats': formats,
'availability': availability, 'availability': availability,
'thumbnails': [{ 'display_id': video_id,
'id': key, 'formats': formats,
'url': url, 'genres': traverse_obj(api_data, ('genre', 'label', {str}, filter, all, filter)),
'ext': 'jpg', 'release_timestamp': release_timestamp,
'preference': thumb_prefs(key),
**parse_resolution(url, lenient=True),
} for key, url in (get_video_info('thumbnail') or {}).items() if url],
'description': clean_html(get_video_info('description')),
'uploader': traverse_obj(api_data, ('owner', 'nickname'), ('channel', 'name'), ('community', 'name')),
'uploader_id': str_or_none(traverse_obj(api_data, ('owner', 'id'), ('channel', 'id'), ('community', 'id'))),
'timestamp': parse_iso8601(get_video_info('registeredAt')) or parse_iso8601(
self._html_search_meta('video:release_date', webpage, 'date published', default=None)),
'channel': traverse_obj(api_data, ('channel', 'name'), ('community', 'name')),
'channel_id': traverse_obj(api_data, ('channel', 'id'), ('community', 'id')),
'view_count': int_or_none(get_video_info('count', 'view')),
'tags': tags,
'genre': traverse_obj(api_data, ('genre', 'label'), ('genre', 'key')),
'comment_count': get_video_info('count', 'comment', expected_type=int),
'duration': (
parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None))
or get_video_info('duration')),
'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}',
'subtitles': self.extract_subtitles(video_id, api_data), 'subtitles': self.extract_subtitles(video_id, api_data),
'tags': traverse_obj(api_data, ('tag', 'items', ..., 'name', {str}, filter, all, filter)),
'thumbnails': [{
'ext': 'jpg',
'id': key,
'preference': thumb_prefs(key),
'url': url,
**parse_resolution(url, lenient=True),
} for key, url in traverse_obj(api_data, (
'video', 'thumbnail', {dict}), default={}).items()],
**traverse_obj(api_data, (('channel', 'owner'), any, {
'channel': (('name', 'nickname'), {str}, any),
'channel_id': ('id', {str_or_none}),
'uploader': (('name', 'nickname'), {str}, any),
'uploader_id': ('id', {str_or_none}),
})),
**traverse_obj(api_data, ('video', {
'id': ('id', {str_or_none}),
'title': ('title', {str}),
'description': ('description', {clean_html}, filter),
'duration': ('duration', {int_or_none}),
'timestamp': ('registeredAt', {parse_iso8601}),
})),
**traverse_obj(api_data, ('video', 'count', {
'comment_count': ('comment', {int_or_none}),
'like_count': ('like', {int_or_none}),
'view_count': ('view', {int_or_none}),
})),
} }
def _get_subtitles(self, video_id, api_data): def _get_subtitles(self, video_id, api_data):
@ -413,21 +430,19 @@ def _get_subtitles(self, video_id, api_data):
return return
danmaku = traverse_obj(self._download_json( danmaku = traverse_obj(self._download_json(
f'{comments_info["server"]}/v1/threads', video_id, data=json.dumps({ f'{comments_info["server"]}/v1/threads', video_id,
'Downloading comments', 'Failed to download comments', headers={
'Content-Type': 'text/plain;charset=UTF-8',
'Origin': self._BASE_URL,
'Referer': f'{self._BASE_URL}/',
'X-Client-Os-Type': 'others',
**self._HEADERS,
}, data=json.dumps({
'additionals': {}, 'additionals': {},
'params': comments_info.get('params'), 'params': comments_info.get('params'),
'threadKey': comments_info.get('threadKey'), 'threadKey': comments_info.get('threadKey'),
}).encode(), fatal=False, }).encode(), fatal=False,
headers={ ), ('data', 'threads', ..., 'comments', ...))
'Referer': 'https://www.nicovideo.jp/',
'Origin': 'https://www.nicovideo.jp',
'Content-Type': 'text/plain;charset=UTF-8',
'x-client-os-type': 'others',
'x-frontend-id': '6',
'x-frontend-version': '0',
},
note='Downloading comments', errnote='Failed to download comments'),
('data', 'threads', ..., 'comments', ...))
return { return {
'comments': [{ 'comments': [{