mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-02-07 22:47:24 +00:00
Merge branch 'yt-dlp:master' into ciscolive
This commit is contained in:
@@ -495,8 +495,6 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
|
||||
class BrightcoveNewBaseIE(AdobePassIE):
|
||||
def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
|
||||
title = json_data['name'].strip()
|
||||
|
||||
formats, subtitles = [], {}
|
||||
sources = json_data.get('sources') or []
|
||||
for source in sources:
|
||||
@@ -600,16 +598,18 @@ class BrightcoveNewBaseIE(AdobePassIE):
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': clean_html(json_data.get('description')),
|
||||
'thumbnails': thumbnails,
|
||||
'duration': duration,
|
||||
'timestamp': parse_iso8601(json_data.get('published_at')),
|
||||
'uploader_id': json_data.get('account_id'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'tags': json_data.get('tags', []),
|
||||
'is_live': is_live,
|
||||
**traverse_obj(json_data, {
|
||||
'title': ('name', {clean_html}),
|
||||
'description': ('description', {clean_html}),
|
||||
'tags': ('tags', ..., {str}, filter, all, filter),
|
||||
'timestamp': ('published_at', {parse_iso8601}),
|
||||
'uploader_id': ('account_id', {str}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
@@ -645,10 +645,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
'uploader_id': '4036320279001',
|
||||
'formats': 'mincount:39',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# playlist stream
|
||||
'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001',
|
||||
@@ -709,7 +706,6 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'TGD_01-032_5',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'tags': [],
|
||||
'timestamp': 1646078943,
|
||||
'uploader_id': '1569565978001',
|
||||
'upload_date': '20220228',
|
||||
@@ -721,7 +717,6 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'TGD 01-087 (Airs 05.25.22)_Segment 5',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'tags': [],
|
||||
'timestamp': 1651604591,
|
||||
'uploader_id': '1569565978001',
|
||||
'upload_date': '20220503',
|
||||
|
||||
@@ -101,6 +101,7 @@ from ..utils import (
|
||||
xpath_with_ns,
|
||||
)
|
||||
from ..utils._utils import _request_dump_filename
|
||||
from ..utils.jslib import devalue
|
||||
|
||||
|
||||
class InfoExtractor:
|
||||
@@ -1795,6 +1796,63 @@ class InfoExtractor:
|
||||
ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
|
||||
return traverse_obj(ret, traverse) or {}
|
||||
|
||||
def _resolve_nuxt_array(self, array, video_id, *, fatal=True, default=NO_DEFAULT):
|
||||
"""Resolves Nuxt rich JSON payload arrays"""
|
||||
# Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57
|
||||
# https://github.com/nuxt/nuxt/pull/19205
|
||||
if default is not NO_DEFAULT:
|
||||
fatal = False
|
||||
|
||||
if not isinstance(array, list) or not array:
|
||||
error_msg = 'Unable to resolve Nuxt JSON data: invalid input'
|
||||
if fatal:
|
||||
raise ExtractorError(error_msg, video_id=video_id)
|
||||
elif default is NO_DEFAULT:
|
||||
self.report_warning(error_msg, video_id=video_id)
|
||||
return {} if default is NO_DEFAULT else default
|
||||
|
||||
def indirect_reviver(data):
|
||||
return data
|
||||
|
||||
def json_reviver(data):
|
||||
return json.loads(data)
|
||||
|
||||
gen = devalue.parse_iter(array, revivers={
|
||||
'NuxtError': indirect_reviver,
|
||||
'EmptyShallowRef': json_reviver,
|
||||
'EmptyRef': json_reviver,
|
||||
'ShallowRef': indirect_reviver,
|
||||
'ShallowReactive': indirect_reviver,
|
||||
'Ref': indirect_reviver,
|
||||
'Reactive': indirect_reviver,
|
||||
})
|
||||
|
||||
while True:
|
||||
try:
|
||||
error_msg = f'Error resolving Nuxt JSON: {gen.send(None)}'
|
||||
if fatal:
|
||||
raise ExtractorError(error_msg, video_id=video_id)
|
||||
elif default is NO_DEFAULT:
|
||||
self.report_warning(error_msg, video_id=video_id, only_once=True)
|
||||
else:
|
||||
self.write_debug(f'{video_id}: {error_msg}', only_once=True)
|
||||
except StopIteration as error:
|
||||
return error.value or ({} if default is NO_DEFAULT else default)
|
||||
|
||||
def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT):
|
||||
"""Parses metadata from Nuxt rich JSON payloads embedded in HTML"""
|
||||
passed_default = default is not NO_DEFAULT
|
||||
|
||||
array = self._search_json(
|
||||
r'<script\b[^>]+\bid="__NUXT_DATA__"[^>]*>', webpage,
|
||||
'Nuxt JSON data', video_id, contains_pattern=r'\[(?s:.+)\]',
|
||||
fatal=fatal, default=NO_DEFAULT if not passed_default else None)
|
||||
|
||||
if not array:
|
||||
return default if passed_default else {}
|
||||
|
||||
return self._resolve_nuxt_array(array, video_id, fatal=fatal, default=default)
|
||||
|
||||
@staticmethod
|
||||
def _hidden_inputs(html):
|
||||
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
|
||||
|
||||
@@ -1,32 +1,66 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json, traverse_obj
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import subs_list_to_dict, traverse_obj
|
||||
|
||||
|
||||
class MonsterSirenHypergryphMusicIE(InfoExtractor):
|
||||
IE_NAME = 'monstersiren'
|
||||
IE_DESC = '塞壬唱片'
|
||||
_API_BASE = 'https://monster-siren.hypergryph.com/api'
|
||||
_VALID_URL = r'https?://monster-siren\.hypergryph\.com/music/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://monster-siren.hypergryph.com/music/514562',
|
||||
'info_dict': {
|
||||
'id': '514562',
|
||||
'ext': 'wav',
|
||||
'artists': ['塞壬唱片-MSR'],
|
||||
'album': 'Flame Shadow',
|
||||
'title': 'Flame Shadow',
|
||||
'album': 'Flame Shadow',
|
||||
'artists': ['塞壬唱片-MSR'],
|
||||
'description': 'md5:19e2acfcd1b65b41b29e8079ab948053',
|
||||
'thumbnail': r're:https?://web\.hycdn\.cn/siren/pic/.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://monster-siren.hypergryph.com/music/514518',
|
||||
'info_dict': {
|
||||
'id': '514518',
|
||||
'ext': 'wav',
|
||||
'title': 'Heavenly Me (Instrumental)',
|
||||
'album': 'Heavenly Me',
|
||||
'artists': ['塞壬唱片-MSR', 'AIYUE blessed : 理名'],
|
||||
'description': 'md5:ce790b41c932d1ad72eb791d1d8ae598',
|
||||
'thumbnail': r're:https?://web\.hycdn\.cn/siren/pic/.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
audio_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, audio_id)
|
||||
json_data = self._search_json(
|
||||
r'window\.g_initialProps\s*=', webpage, 'data', audio_id, transform_source=js_to_json)
|
||||
song = self._download_json(f'{self._API_BASE}/song/{audio_id}', audio_id)
|
||||
if traverse_obj(song, 'code') != 0:
|
||||
msg = traverse_obj(song, ('msg', {str}, filter))
|
||||
raise ExtractorError(
|
||||
msg or 'API returned an error response', expected=bool(msg))
|
||||
|
||||
album = None
|
||||
if album_id := traverse_obj(song, ('data', 'albumCid', {str})):
|
||||
album = self._download_json(
|
||||
f'{self._API_BASE}/album/{album_id}/detail', album_id, fatal=False)
|
||||
|
||||
return {
|
||||
'id': audio_id,
|
||||
'title': traverse_obj(json_data, ('player', 'songDetail', 'name')),
|
||||
'url': traverse_obj(json_data, ('player', 'songDetail', 'sourceUrl')),
|
||||
'ext': 'wav',
|
||||
'vcodec': 'none',
|
||||
'artists': traverse_obj(json_data, ('player', 'songDetail', 'artists', ...)),
|
||||
'album': traverse_obj(json_data, ('musicPlay', 'albumDetail', 'name')),
|
||||
**traverse_obj(song, ('data', {
|
||||
'title': ('name', {str}),
|
||||
'artists': ('artists', ..., {str}),
|
||||
'subtitles': ({'url': 'lyricUrl'}, all, {subs_list_to_dict(lang='en')}),
|
||||
'url': ('sourceUrl', {url_or_none}),
|
||||
})),
|
||||
**traverse_obj(album, ('data', {
|
||||
'album': ('name', {str}),
|
||||
'description': ('intro', {clean_html}),
|
||||
'thumbnail': ('coverUrl', {url_or_none}),
|
||||
})),
|
||||
}
|
||||
|
||||
1
yt_dlp/utils/jslib/__init__.py
Normal file
1
yt_dlp/utils/jslib/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Utility functions for handling web input based on commonly used JavaScript libraries
|
||||
167
yt_dlp/utils/jslib/devalue.py
Normal file
167
yt_dlp/utils/jslib/devalue.py
Normal file
@@ -0,0 +1,167 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import array
|
||||
import base64
|
||||
import datetime as dt
|
||||
import math
|
||||
import re
|
||||
|
||||
from .._utils import parse_iso8601
|
||||
|
||||
TYPE_CHECKING = False
|
||||
if TYPE_CHECKING:
|
||||
import collections.abc
|
||||
import typing
|
||||
|
||||
T = typing.TypeVar('T')
|
||||
|
||||
|
||||
_ARRAY_TYPE_LOOKUP = {
|
||||
'Int8Array': 'b',
|
||||
'Uint8Array': 'B',
|
||||
'Uint8ClampedArray': 'B',
|
||||
'Int16Array': 'h',
|
||||
'Uint16Array': 'H',
|
||||
'Int32Array': 'i',
|
||||
'Uint32Array': 'I',
|
||||
'Float32Array': 'f',
|
||||
'Float64Array': 'd',
|
||||
'BigInt64Array': 'l',
|
||||
'BigUint64Array': 'L',
|
||||
'ArrayBuffer': 'B',
|
||||
}
|
||||
|
||||
|
||||
def parse_iter(parsed: typing.Any, /, *, revivers: dict[str, collections.abc.Callable[[list], typing.Any]] | None = None):
|
||||
# based on https://github.com/Rich-Harris/devalue/blob/f3fd2aa93d79f21746555671f955a897335edb1b/src/parse.js
|
||||
resolved = {
|
||||
-1: None,
|
||||
-2: None,
|
||||
-3: math.nan,
|
||||
-4: math.inf,
|
||||
-5: -math.inf,
|
||||
-6: -0.0,
|
||||
}
|
||||
|
||||
if isinstance(parsed, int) and not isinstance(parsed, bool):
|
||||
if parsed not in resolved or parsed == -2:
|
||||
raise ValueError('invalid integer input')
|
||||
return resolved[parsed]
|
||||
elif not isinstance(parsed, list):
|
||||
raise ValueError('expected int or list as input')
|
||||
elif not parsed:
|
||||
raise ValueError('expected a non-empty list as input')
|
||||
|
||||
if revivers is None:
|
||||
revivers = {}
|
||||
return_value = [None]
|
||||
stack: list[tuple] = [(return_value, 0, 0)]
|
||||
|
||||
while stack:
|
||||
target, index, source = stack.pop()
|
||||
if isinstance(source, tuple):
|
||||
name, source, reviver = source
|
||||
try:
|
||||
resolved[source] = target[index] = reviver(target[index])
|
||||
except Exception as error:
|
||||
yield TypeError(f'failed to parse {source} as {name!r}: {error}')
|
||||
resolved[source] = target[index] = None
|
||||
continue
|
||||
|
||||
if source in resolved:
|
||||
target[index] = resolved[source]
|
||||
continue
|
||||
|
||||
# guard against Python negative indexing
|
||||
if source < 0:
|
||||
yield IndexError(f'invalid index: {source!r}')
|
||||
continue
|
||||
|
||||
try:
|
||||
value = parsed[source]
|
||||
except IndexError as error:
|
||||
yield error
|
||||
continue
|
||||
|
||||
if isinstance(value, list):
|
||||
if value and isinstance(value[0], str):
|
||||
# TODO: implement zips `strict=True`
|
||||
if reviver := revivers.get(value[0]):
|
||||
if value[1] == source:
|
||||
# XXX: avoid infinite loop
|
||||
yield IndexError(f'{value[0]!r} cannot point to itself (index: {source})')
|
||||
continue
|
||||
# inverse order: resolve index, revive value
|
||||
stack.append((target, index, (value[0], value[1], reviver)))
|
||||
stack.append((target, index, value[1]))
|
||||
continue
|
||||
|
||||
elif value[0] == 'Date':
|
||||
try:
|
||||
result = dt.datetime.fromtimestamp(parse_iso8601(value[1]), tz=dt.timezone.utc)
|
||||
except Exception:
|
||||
yield ValueError(f'invalid date: {value[1]!r}')
|
||||
result = None
|
||||
|
||||
elif value[0] == 'Set':
|
||||
result = [None] * (len(value) - 1)
|
||||
for offset, new_source in enumerate(value[1:]):
|
||||
stack.append((result, offset, new_source))
|
||||
|
||||
elif value[0] == 'Map':
|
||||
result = []
|
||||
for key, new_source in zip(*(iter(value[1:]),) * 2):
|
||||
pair = [None, None]
|
||||
stack.append((pair, 0, key))
|
||||
stack.append((pair, 1, new_source))
|
||||
result.append(pair)
|
||||
|
||||
elif value[0] == 'RegExp':
|
||||
# XXX: use jsinterp to translate regex flags
|
||||
# currently ignores `value[2]`
|
||||
result = re.compile(value[1])
|
||||
|
||||
elif value[0] == 'Object':
|
||||
result = value[1]
|
||||
|
||||
elif value[0] == 'BigInt':
|
||||
result = int(value[1])
|
||||
|
||||
elif value[0] == 'null':
|
||||
result = {}
|
||||
for key, new_source in zip(*(iter(value[1:]),) * 2):
|
||||
stack.append((result, key, new_source))
|
||||
|
||||
elif value[0] in _ARRAY_TYPE_LOOKUP:
|
||||
typecode = _ARRAY_TYPE_LOOKUP[value[0]]
|
||||
data = base64.b64decode(value[1])
|
||||
result = array.array(typecode, data).tolist()
|
||||
|
||||
else:
|
||||
yield TypeError(f'invalid type at {source}: {value[0]!r}')
|
||||
result = None
|
||||
else:
|
||||
result = len(value) * [None]
|
||||
for offset, new_source in enumerate(value):
|
||||
stack.append((result, offset, new_source))
|
||||
|
||||
elif isinstance(value, dict):
|
||||
result = {}
|
||||
for key, new_source in value.items():
|
||||
stack.append((result, key, new_source))
|
||||
|
||||
else:
|
||||
result = value
|
||||
|
||||
target[index] = resolved[source] = result
|
||||
|
||||
return return_value[0]
|
||||
|
||||
|
||||
def parse(parsed: typing.Any, /, *, revivers: dict[str, collections.abc.Callable[[typing.Any], typing.Any]] | None = None):
|
||||
generator = parse_iter(parsed, revivers=revivers)
|
||||
while True:
|
||||
try:
|
||||
raise generator.send(None)
|
||||
except StopIteration as error:
|
||||
return error.value
|
||||
Reference in New Issue
Block a user