mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-07-18 11:18:30 +00:00
Merge branch 'yt-dlp:master' into misc-2025-07
This commit is contained in:
commit
cdc5d703b2
@ -10,9 +10,13 @@ __yt_dlp()
|
|||||||
diropts="--cache-dir"
|
diropts="--cache-dir"
|
||||||
|
|
||||||
if [[ ${prev} =~ ${fileopts} ]]; then
|
if [[ ${prev} =~ ${fileopts} ]]; then
|
||||||
|
local IFS=$'\n'
|
||||||
|
type compopt &>/dev/null && compopt -o filenames
|
||||||
COMPREPLY=( $(compgen -f -- ${cur}) )
|
COMPREPLY=( $(compgen -f -- ${cur}) )
|
||||||
return 0
|
return 0
|
||||||
elif [[ ${prev} =~ ${diropts} ]]; then
|
elif [[ ${prev} =~ ${diropts} ]]; then
|
||||||
|
local IFS=$'\n'
|
||||||
|
type compopt &>/dev/null && compopt -o dirnames
|
||||||
COMPREPLY=( $(compgen -d -- ${cur}) )
|
COMPREPLY=( $(compgen -d -- ${cur}) )
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
|
@ -75,7 +75,7 @@ dev = [
|
|||||||
]
|
]
|
||||||
static-analysis = [
|
static-analysis = [
|
||||||
"autopep8~=2.0",
|
"autopep8~=2.0",
|
||||||
"ruff~=0.11.0",
|
"ruff~=0.12.0",
|
||||||
]
|
]
|
||||||
test = [
|
test = [
|
||||||
"pytest~=8.1",
|
"pytest~=8.1",
|
||||||
@ -210,10 +210,12 @@ ignore = [
|
|||||||
"TD001", # invalid-todo-tag
|
"TD001", # invalid-todo-tag
|
||||||
"TD002", # missing-todo-author
|
"TD002", # missing-todo-author
|
||||||
"TD003", # missing-todo-link
|
"TD003", # missing-todo-link
|
||||||
|
"PLC0415", # import-outside-top-level
|
||||||
"PLE0604", # invalid-all-object (false positives)
|
"PLE0604", # invalid-all-object (false positives)
|
||||||
"PLE0643", # potential-index-error (false positives)
|
"PLE0643", # potential-index-error (false positives)
|
||||||
"PLW0603", # global-statement
|
"PLW0603", # global-statement
|
||||||
"PLW1510", # subprocess-run-without-check
|
"PLW1510", # subprocess-run-without-check
|
||||||
|
"PLW1641", # eq-without-hash
|
||||||
"PLW2901", # redefined-loop-name
|
"PLW2901", # redefined-loop-name
|
||||||
"RUF001", # ambiguous-unicode-character-string
|
"RUF001", # ambiguous-unicode-character-string
|
||||||
"RUF012", # mutable-class-default
|
"RUF012", # mutable-class-default
|
||||||
|
@ -36,6 +36,18 @@ def do_GET(self):
|
|||||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
|
self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
|
||||||
|
elif self.path == '/fake.m3u8':
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Length', '1024')
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(1024 * b'\x00')
|
||||||
|
elif self.path == '/bipbop.m3u8':
|
||||||
|
with open('test/testdata/m3u8/bipbop_16x9.m3u8', 'rb') as f:
|
||||||
|
data = f.read()
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Length', str(len(data)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(data)
|
||||||
else:
|
else:
|
||||||
assert False
|
assert False
|
||||||
|
|
||||||
@ -2079,5 +2091,45 @@ def test_search_nuxt_json(self):
|
|||||||
self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT)
|
self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT)
|
||||||
|
|
||||||
|
|
||||||
|
class TestInfoExtractorNetwork(unittest.TestCase):
|
||||||
|
def setUp(self, /):
|
||||||
|
self.httpd = http.server.HTTPServer(
|
||||||
|
('127.0.0.1', 0), InfoExtractorTestRequestHandler)
|
||||||
|
self.port = http_server_port(self.httpd)
|
||||||
|
|
||||||
|
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||||
|
self.server_thread.daemon = True
|
||||||
|
self.server_thread.start()
|
||||||
|
|
||||||
|
self.called = False
|
||||||
|
|
||||||
|
def require_warning(*args, **kwargs):
|
||||||
|
self.called = True
|
||||||
|
|
||||||
|
self.ydl = FakeYDL()
|
||||||
|
self.ydl.report_warning = require_warning
|
||||||
|
self.ie = DummyIE(self.ydl)
|
||||||
|
|
||||||
|
def tearDown(self, /):
|
||||||
|
self.ydl.close()
|
||||||
|
self.httpd.shutdown()
|
||||||
|
self.httpd.server_close()
|
||||||
|
self.server_thread.join(1)
|
||||||
|
|
||||||
|
def test_extract_m3u8_formats(self):
|
||||||
|
formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles(
|
||||||
|
f'http://127.0.0.1:{self.port}/bipbop.m3u8', None, fatal=False)
|
||||||
|
self.assertFalse(self.called)
|
||||||
|
self.assertTrue(formats)
|
||||||
|
self.assertTrue(subtitles)
|
||||||
|
|
||||||
|
def test_extract_m3u8_formats_warning(self):
|
||||||
|
formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles(
|
||||||
|
f'http://127.0.0.1:{self.port}/fake.m3u8', None, fatal=False)
|
||||||
|
self.assertTrue(self.called, 'Warning was not issued for binary m3u8 file')
|
||||||
|
self.assertFalse(formats)
|
||||||
|
self.assertFalse(subtitles)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
|
|
||||||
from test.helper import (
|
from test.helper import (
|
||||||
assertGreaterEqual,
|
assertGreaterEqual,
|
||||||
|
assertLessEqual,
|
||||||
expect_info_dict,
|
expect_info_dict,
|
||||||
expect_warnings,
|
expect_warnings,
|
||||||
get_params,
|
get_params,
|
||||||
@ -121,10 +122,13 @@ def print_skipping(reason):
|
|||||||
params = get_params(test_case.get('params', {}))
|
params = get_params(test_case.get('params', {}))
|
||||||
params['outtmpl'] = tname + '_' + params['outtmpl']
|
params['outtmpl'] = tname + '_' + params['outtmpl']
|
||||||
if is_playlist and 'playlist' not in test_case:
|
if is_playlist and 'playlist' not in test_case:
|
||||||
params.setdefault('extract_flat', 'in_playlist')
|
params.setdefault('playlistend', max(
|
||||||
params.setdefault('playlistend', test_case.get(
|
test_case.get('playlist_mincount', -1),
|
||||||
'playlist_mincount', test_case.get('playlist_count', -2) + 1))
|
test_case.get('playlist_count', -2) + 1,
|
||||||
|
test_case.get('playlist_maxcount', -2) + 1))
|
||||||
params.setdefault('skip_download', True)
|
params.setdefault('skip_download', True)
|
||||||
|
if 'playlist_duration_sum' not in test_case:
|
||||||
|
params.setdefault('extract_flat', 'in_playlist')
|
||||||
|
|
||||||
ydl = YoutubeDL(params, auto_init=False)
|
ydl = YoutubeDL(params, auto_init=False)
|
||||||
ydl.add_default_info_extractors()
|
ydl.add_default_info_extractors()
|
||||||
@ -159,6 +163,7 @@ def try_rm_tcs_files(tcs=None):
|
|||||||
try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
|
try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
|
||||||
try_rm_tcs_files()
|
try_rm_tcs_files()
|
||||||
try:
|
try:
|
||||||
|
test_url = test_case['url']
|
||||||
try_num = 1
|
try_num = 1
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
@ -166,7 +171,7 @@ def try_rm_tcs_files(tcs=None):
|
|||||||
# for outside error handling, and returns the exit code
|
# for outside error handling, and returns the exit code
|
||||||
# instead of the result dict.
|
# instead of the result dict.
|
||||||
res_dict = ydl.extract_info(
|
res_dict = ydl.extract_info(
|
||||||
test_case['url'],
|
test_url,
|
||||||
force_generic_extractor=params.get('force_generic_extractor', False))
|
force_generic_extractor=params.get('force_generic_extractor', False))
|
||||||
except (DownloadError, ExtractorError) as err:
|
except (DownloadError, ExtractorError) as err:
|
||||||
# Check if the exception is not a network related one
|
# Check if the exception is not a network related one
|
||||||
@ -194,23 +199,23 @@ def try_rm_tcs_files(tcs=None):
|
|||||||
self.assertTrue('entries' in res_dict)
|
self.assertTrue('entries' in res_dict)
|
||||||
expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
|
expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
|
||||||
|
|
||||||
|
num_entries = len(res_dict.get('entries', []))
|
||||||
if 'playlist_mincount' in test_case:
|
if 'playlist_mincount' in test_case:
|
||||||
|
mincount = test_case['playlist_mincount']
|
||||||
assertGreaterEqual(
|
assertGreaterEqual(
|
||||||
self,
|
self, num_entries, mincount,
|
||||||
len(res_dict['entries']),
|
f'Expected at least {mincount} entries in playlist {test_url}, but got only {num_entries}')
|
||||||
test_case['playlist_mincount'],
|
|
||||||
'Expected at least %d in playlist %s, but got only %d' % (
|
|
||||||
test_case['playlist_mincount'], test_case['url'],
|
|
||||||
len(res_dict['entries'])))
|
|
||||||
if 'playlist_count' in test_case:
|
if 'playlist_count' in test_case:
|
||||||
|
count = test_case['playlist_count']
|
||||||
|
got = num_entries if num_entries <= count else 'more'
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
len(res_dict['entries']),
|
num_entries, count,
|
||||||
test_case['playlist_count'],
|
f'Expected exactly {count} entries in playlist {test_url}, but got {got}')
|
||||||
'Expected %d entries in playlist %s, but got %d.' % (
|
if 'playlist_maxcount' in test_case:
|
||||||
test_case['playlist_count'],
|
maxcount = test_case['playlist_maxcount']
|
||||||
test_case['url'],
|
assertLessEqual(
|
||||||
len(res_dict['entries']),
|
self, num_entries, maxcount,
|
||||||
))
|
f'Expected at most {maxcount} entries in playlist {test_url}, but got more')
|
||||||
if 'playlist_duration_sum' in test_case:
|
if 'playlist_duration_sum' in test_case:
|
||||||
got_duration = sum(e['duration'] for e in res_dict['entries'])
|
got_duration = sum(e['duration'] for e in res_dict['entries'])
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
|
@ -490,6 +490,57 @@ def test_increment_decrement(self):
|
|||||||
self._test('function f() { var a = "test--"; return a; }', 'test--')
|
self._test('function f() { var a = "test--"; return a; }', 'test--')
|
||||||
self._test('function f() { var b = 1; var a = "b--"; return a; }', 'b--')
|
self._test('function f() { var b = 1; var a = "b--"; return a; }', 'b--')
|
||||||
|
|
||||||
|
def test_nested_function_scoping(self):
|
||||||
|
self._test(R'''
|
||||||
|
function f() {
|
||||||
|
var g = function() {
|
||||||
|
var P = 2;
|
||||||
|
return P;
|
||||||
|
};
|
||||||
|
var P = 1;
|
||||||
|
g();
|
||||||
|
return P;
|
||||||
|
}
|
||||||
|
''', 1)
|
||||||
|
self._test(R'''
|
||||||
|
function f() {
|
||||||
|
var x = function() {
|
||||||
|
for (var w = 1, M = []; w < 2; w++) switch (w) {
|
||||||
|
case 1:
|
||||||
|
M.push("a");
|
||||||
|
case 2:
|
||||||
|
M.push("b");
|
||||||
|
}
|
||||||
|
return M
|
||||||
|
};
|
||||||
|
var w = "c";
|
||||||
|
var M = "d";
|
||||||
|
var y = x();
|
||||||
|
y.push(w);
|
||||||
|
y.push(M);
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
''', ['a', 'b', 'c', 'd'])
|
||||||
|
self._test(R'''
|
||||||
|
function f() {
|
||||||
|
var P, Q;
|
||||||
|
var z = 100;
|
||||||
|
var g = function() {
|
||||||
|
var P, Q; P = 2; Q = 15;
|
||||||
|
z = 0;
|
||||||
|
return P+Q;
|
||||||
|
};
|
||||||
|
P = 1; Q = 10;
|
||||||
|
var x = g(), y = 3;
|
||||||
|
return P+Q+x+y+z;
|
||||||
|
}
|
||||||
|
''', 31)
|
||||||
|
|
||||||
|
def test_undefined_varnames(self):
|
||||||
|
jsi = JSInterpreter('function f(){ var a; return [a, b]; }')
|
||||||
|
self._test(jsi, [JS_Undefined, JS_Undefined])
|
||||||
|
self.assertEqual(jsi._undefined_varnames, {'b'})
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -22,7 +22,6 @@
|
|||||||
import tempfile
|
import tempfile
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
import urllib.error
|
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import warnings
|
import warnings
|
||||||
import zlib
|
import zlib
|
||||||
@ -223,10 +222,7 @@ def do_GET(self):
|
|||||||
if encoding == 'br' and brotli:
|
if encoding == 'br' and brotli:
|
||||||
payload = brotli.compress(payload)
|
payload = brotli.compress(payload)
|
||||||
elif encoding == 'gzip':
|
elif encoding == 'gzip':
|
||||||
buf = io.BytesIO()
|
payload = gzip.compress(payload, mtime=0)
|
||||||
with gzip.GzipFile(fileobj=buf, mode='wb') as f:
|
|
||||||
f.write(payload)
|
|
||||||
payload = buf.getvalue()
|
|
||||||
elif encoding == 'deflate':
|
elif encoding == 'deflate':
|
||||||
payload = zlib.compress(payload)
|
payload = zlib.compress(payload)
|
||||||
elif encoding == 'unsupported':
|
elif encoding == 'unsupported':
|
||||||
@ -729,6 +725,17 @@ def test_keep_header_casing(self, handler):
|
|||||||
|
|
||||||
assert 'X-test-heaDer: test' in res
|
assert 'X-test-heaDer: test' in res
|
||||||
|
|
||||||
|
def test_partial_read_then_full_read(self, handler):
|
||||||
|
with handler() as rh:
|
||||||
|
for encoding in ('', 'gzip', 'deflate'):
|
||||||
|
res = validate_and_send(rh, Request(
|
||||||
|
f'http://127.0.0.1:{self.http_port}/content-encoding',
|
||||||
|
headers={'ytdl-encoding': encoding}))
|
||||||
|
assert res.headers.get('Content-Encoding') == encoding
|
||||||
|
assert res.read(6) == b'<html>'
|
||||||
|
assert res.read(0) == b''
|
||||||
|
assert res.read() == b'<video src="/vid.mp4" /></html>'
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||||
class TestClientCertificate:
|
class TestClientCertificate:
|
||||||
|
@ -333,6 +333,50 @@
|
|||||||
'https://www.youtube.com/s/player/fc2a56a5/tv-player-ias.vflset/tv-player-ias.js',
|
'https://www.youtube.com/s/player/fc2a56a5/tv-player-ias.vflset/tv-player-ias.js',
|
||||||
'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
|
'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/a74bf670/player_ias_tce.vflset/en_US/base.js',
|
||||||
|
'kM5r52fugSZRAKHfo3', 'hQP7k1hA22OrNTnq',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/6275f73c/player_ias_tce.vflset/en_US/base.js',
|
||||||
|
'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/20c72c18/player_ias_tce.vflset/en_US/base.js',
|
||||||
|
'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/9fe2e06e/player_ias_tce.vflset/en_US/base.js',
|
||||||
|
'kM5r52fugSZRAKHfo3', '6r5ekNIiEMPutZy',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/680f8c75/player_ias_tce.vflset/en_US/base.js',
|
||||||
|
'kM5r52fugSZRAKHfo3', '0ml9caTwpa55Jf',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/14397202/player_ias_tce.vflset/en_US/base.js',
|
||||||
|
'kM5r52fugSZRAKHfo3', 'ozZFAN21okDdJTa',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/5dcb2c1f/player_ias_tce.vflset/en_US/base.js',
|
||||||
|
'kM5r52fugSZRAKHfo3', 'p7iTbRZDYAF',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/a10d7fcc/player_ias_tce.vflset/en_US/base.js',
|
||||||
|
'kM5r52fugSZRAKHfo3', '9Zue7DDHJSD',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/8e20cb06/player_ias_tce.vflset/en_US/base.js',
|
||||||
|
'kM5r52fugSZRAKHfo3', '5-4tTneTROTpMzba',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/e12fbea4/player_ias_tce.vflset/en_US/base.js',
|
||||||
|
'kM5r52fugSZRAKHfo3', 'XkeRfXIPOkSwfg',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/ef259203/player_ias_tce.vflset/en_US/base.js',
|
||||||
|
'rPqBC01nJpqhhi2iA2U', 'hY7dbiKFT51UIA',
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -435,7 +435,7 @@ def sub_bytes_inv(data):
|
|||||||
|
|
||||||
|
|
||||||
def rotate(data):
|
def rotate(data):
|
||||||
return data[1:] + [data[0]]
|
return [*data[1:], data[0]]
|
||||||
|
|
||||||
|
|
||||||
def key_schedule_core(data, rcon_iteration):
|
def key_schedule_core(data, rcon_iteration):
|
||||||
|
@ -94,12 +94,19 @@ def real_download(self, filename, info_dict):
|
|||||||
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
|
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
|
||||||
if can_download:
|
if can_download:
|
||||||
has_ffmpeg = FFmpegFD.available()
|
has_ffmpeg = FFmpegFD.available()
|
||||||
no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s
|
if not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s:
|
||||||
if no_crypto and has_ffmpeg:
|
# Even if pycryptodomex isn't available, force HlsFD for m3u8s that won't work with ffmpeg
|
||||||
can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available'
|
ffmpeg_can_dl = not traverse_obj(info_dict, ((
|
||||||
elif no_crypto:
|
'extra_param_to_segment_url', 'extra_param_to_key_url',
|
||||||
message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; '
|
'hls_media_playlist_data', ('hls_aes', ('uri', 'key', 'iv')),
|
||||||
'Decryption will be performed natively, but will be extremely slow')
|
), any))
|
||||||
|
message = 'The stream has AES-128 encryption and {} available'.format(
|
||||||
|
'neither ffmpeg nor pycryptodomex are' if ffmpeg_can_dl and not has_ffmpeg else
|
||||||
|
'pycryptodomex is not')
|
||||||
|
if has_ffmpeg and ffmpeg_can_dl:
|
||||||
|
can_download = False
|
||||||
|
else:
|
||||||
|
message += '; decryption will be performed natively, but will be extremely slow'
|
||||||
elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s):
|
elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s):
|
||||||
install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and '
|
install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and '
|
||||||
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
|
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
|
||||||
|
@ -1147,6 +1147,7 @@
|
|||||||
MindsIE,
|
MindsIE,
|
||||||
)
|
)
|
||||||
from .minoto import MinotoIE
|
from .minoto import MinotoIE
|
||||||
|
from .mir24tv import Mir24TvIE
|
||||||
from .mirrativ import (
|
from .mirrativ import (
|
||||||
MirrativIE,
|
MirrativIE,
|
||||||
MirrativUserIE,
|
MirrativUserIE,
|
||||||
|
@ -900,7 +900,9 @@ def _real_extract(self, url):
|
|||||||
headers=headers))
|
headers=headers))
|
||||||
|
|
||||||
geo_blocked = traverse_obj(play_info, (
|
geo_blocked = traverse_obj(play_info, (
|
||||||
'raw', 'data', 'plugins', lambda _, v: v['name'] == 'AreaLimitPanel', 'config', 'is_block', {bool}, any))
|
('result', ('raw', 'data')), 'plugins',
|
||||||
|
lambda _, v: v['name'] == 'AreaLimitPanel',
|
||||||
|
'config', 'is_block', {bool}, any))
|
||||||
premium_only = play_info.get('code') == -10403
|
premium_only = play_info.get('code') == -10403
|
||||||
|
|
||||||
video_info = traverse_obj(play_info, (('result', ('raw', 'data')), 'video_info', {dict}, any)) or {}
|
video_info = traverse_obj(play_info, (('result', ('raw', 'data')), 'video_info', {dict}, any)) or {}
|
||||||
@ -914,7 +916,7 @@ def _real_extract(self, url):
|
|||||||
|
|
||||||
if traverse_obj(play_info, ((
|
if traverse_obj(play_info, ((
|
||||||
('result', 'play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE'
|
('result', 'play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE'
|
||||||
('raw', 'data', 'play_video_type'), # 'preview' vs 'whole'
|
(('result', ('raw', 'data')), 'play_video_type'), # 'preview' vs 'whole' vs 'none'
|
||||||
), any, {lambda x: x in ('PLAY_PREVIEW', 'preview')})):
|
), any, {lambda x: x in ('PLAY_PREVIEW', 'preview')})):
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'Only preview format is available, '
|
'Only preview format is available, '
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import base64
|
import base64
|
||||||
import collections
|
import collections
|
||||||
|
import contextlib
|
||||||
import functools
|
import functools
|
||||||
import getpass
|
import getpass
|
||||||
import http.client
|
import http.client
|
||||||
@ -2129,21 +2130,33 @@ def _extract_m3u8_formats_and_subtitles(
|
|||||||
raise ExtractorError(errnote, video_id=video_id)
|
raise ExtractorError(errnote, video_id=video_id)
|
||||||
self.report_warning(f'{errnote}{bug_reports_message()}')
|
self.report_warning(f'{errnote}{bug_reports_message()}')
|
||||||
return [], {}
|
return [], {}
|
||||||
|
if note is None:
|
||||||
res = self._download_webpage_handle(
|
note = 'Downloading m3u8 information'
|
||||||
m3u8_url, video_id,
|
if errnote is None:
|
||||||
note='Downloading m3u8 information' if note is None else note,
|
errnote = 'Failed to download m3u8 information'
|
||||||
errnote='Failed to download m3u8 information' if errnote is None else errnote,
|
response = self._request_webpage(
|
||||||
|
m3u8_url, video_id, note=note, errnote=errnote,
|
||||||
fatal=fatal, data=data, headers=headers, query=query)
|
fatal=fatal, data=data, headers=headers, query=query)
|
||||||
|
if response is False:
|
||||||
if res is False:
|
|
||||||
return [], {}
|
return [], {}
|
||||||
|
|
||||||
m3u8_doc, urlh = res
|
with contextlib.closing(response):
|
||||||
m3u8_url = urlh.url
|
prefix = response.read(512)
|
||||||
|
if not prefix.startswith(b'#EXTM3U'):
|
||||||
|
msg = 'Response data has no m3u header'
|
||||||
|
if fatal:
|
||||||
|
raise ExtractorError(msg, video_id=video_id)
|
||||||
|
self.report_warning(f'{msg}{bug_reports_message()}', video_id=video_id)
|
||||||
|
return [], {}
|
||||||
|
|
||||||
|
content = self._webpage_read_content(
|
||||||
|
response, m3u8_url, video_id, note=note, errnote=errnote,
|
||||||
|
fatal=fatal, prefix=prefix, data=data)
|
||||||
|
if content is False:
|
||||||
|
return [], {}
|
||||||
|
|
||||||
return self._parse_m3u8_formats_and_subtitles(
|
return self._parse_m3u8_formats_and_subtitles(
|
||||||
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
|
content, response.url, ext=ext, entry_protocol=entry_protocol,
|
||||||
preference=preference, quality=quality, m3u8_id=m3u8_id,
|
preference=preference, quality=quality, m3u8_id=m3u8_id,
|
||||||
note=note, errnote=errnote, fatal=fatal, live=live, data=data,
|
note=note, errnote=errnote, fatal=fatal, live=live, data=data,
|
||||||
headers=headers, query=query, video_id=video_id)
|
headers=headers, query=query, video_id=video_id)
|
||||||
|
37
yt_dlp/extractor/mir24tv.py
Normal file
37
yt_dlp/extractor/mir24tv.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import parse_qs, url_or_none
|
||||||
|
from ..utils.traversal import require, traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class Mir24TvIE(InfoExtractor):
|
||||||
|
IE_NAME = 'mir24.tv'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?mir24\.tv/news/(?P<id>[0-9]+)/[^/?#]+'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://mir24.tv/news/16635210/dni-kultury-rossii-otkrylis-v-uzbekistane.-na-prazdnichnom-koncerte-vystupili-zvezdy-rossijskoj-estrada',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '16635210',
|
||||||
|
'title': 'Дни культуры России открылись в Узбекистане. На праздничном концерте выступили звезды российской эстрады',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': r're:https://images\.mir24\.tv/.+\.jpg',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id, impersonate=True)
|
||||||
|
|
||||||
|
iframe_url = self._search_regex(
|
||||||
|
r'<iframe\b[^>]+\bsrc=["\'](https?://mir24\.tv/players/[^"\']+)',
|
||||||
|
webpage, 'iframe URL')
|
||||||
|
|
||||||
|
m3u8_url = traverse_obj(iframe_url, (
|
||||||
|
{parse_qs}, 'source', -1, {self._proto_relative_url}, {url_or_none}, {require('m3u8 URL')}))
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
@ -1,53 +1,72 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
parse_iso8601,
|
||||||
|
parse_qs,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import require, traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class NewsPicksIE(InfoExtractor):
|
class NewsPicksIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://newspicks\.com/movie-series/(?P<channel_id>\d+)\?movieId=(?P<id>\d+)'
|
_VALID_URL = r'https?://newspicks\.com/movie-series/(?P<id>[^?/#]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://newspicks.com/movie-series/11?movieId=1813',
|
'url': 'https://newspicks.com/movie-series/11/?movieId=1813',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1813',
|
'id': '1813',
|
||||||
'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】',
|
|
||||||
'description': 'md5:09397aad46d6ded6487ff13f138acadf',
|
|
||||||
'channel': 'HORIE ONE',
|
|
||||||
'channel_id': '11',
|
|
||||||
'release_date': '20220117',
|
|
||||||
'thumbnail': r're:https://.+jpg',
|
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】',
|
||||||
|
'cast': 'count:4',
|
||||||
|
'description': 'md5:09397aad46d6ded6487ff13f138acadf',
|
||||||
|
'duration': 2940,
|
||||||
|
'release_date': '20220117',
|
||||||
|
'release_timestamp': 1642424400,
|
||||||
|
'series': 'HORIE ONE',
|
||||||
|
'series_id': '11',
|
||||||
|
'thumbnail': r're:https?://resources\.newspicks\.com/.+\.(?:jpe?g|png)',
|
||||||
|
'timestamp': 1642424420,
|
||||||
|
'upload_date': '20220117',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://newspicks.com/movie-series/158/?movieId=3932',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3932',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '【検証】専門家は、KADOKAWAをどう見るか',
|
||||||
|
'cast': 'count:3',
|
||||||
|
'description': 'md5:2c2d4bf77484a4333ec995d676f9a91d',
|
||||||
|
'duration': 1320,
|
||||||
|
'release_date': '20240622',
|
||||||
|
'release_timestamp': 1719088080,
|
||||||
|
'series': 'NPレポート',
|
||||||
|
'series_id': '158',
|
||||||
|
'thumbnail': r're:https?://resources\.newspicks\.com/.+\.(?:jpe?g|png)',
|
||||||
|
'timestamp': 1719086400,
|
||||||
|
'upload_date': '20240622',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, channel_id = self._match_valid_url(url).group('id', 'channel_id')
|
series_id = self._match_id(url)
|
||||||
|
video_id = traverse_obj(parse_qs(url), ('movieId', -1, {str}, {require('movie ID')}))
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
entries = self._parse_html5_media_entries(
|
|
||||||
url, webpage.replace('movie-for-pc', 'movie'), video_id, 'hls')
|
|
||||||
if not entries:
|
|
||||||
raise ExtractorError('No HTML5 media elements found')
|
|
||||||
info = entries[0]
|
|
||||||
|
|
||||||
title = self._html_search_meta('og:title', webpage, fatal=False)
|
fragment = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['fragment']
|
||||||
description = self._html_search_meta(
|
m3u8_url = traverse_obj(fragment, ('movie', 'movieUrl', {url_or_none}, {require('m3u8 URL')}))
|
||||||
('og:description', 'twitter:title'), webpage, fatal=False)
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
|
||||||
channel = self._html_search_regex(
|
|
||||||
r'value="11".+?<div\s+class="title">(.+?)</div', webpage, 'channel name', fatal=False)
|
|
||||||
if not title or not channel:
|
|
||||||
title, channel = re.split(r'\s*|\s*', self._html_extract_title(webpage))
|
|
||||||
|
|
||||||
release_date = self._search_regex(
|
return {
|
||||||
r'<span\s+class="on-air-date">\s*(\d+)年(\d+)月(\d+)日\s*</span>',
|
|
||||||
webpage, 'release date', fatal=False, group=(1, 2, 3))
|
|
||||||
|
|
||||||
info.update({
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'formats': formats,
|
||||||
'description': description,
|
'series': traverse_obj(fragment, ('series', 'title', {str})),
|
||||||
'channel': channel,
|
'series_id': series_id,
|
||||||
'channel_id': channel_id,
|
'subtitles': subtitles,
|
||||||
'release_date': ('%04d%02d%02d' % tuple(map(int, release_date))) if release_date else None,
|
**traverse_obj(fragment, ('movie', {
|
||||||
})
|
'title': ('title', {str}),
|
||||||
return info
|
'cast': ('relatedUsers', ..., 'displayName', {str}, filter, all, filter),
|
||||||
|
'description': ('explanation', {clean_html}),
|
||||||
|
'release_timestamp': ('onAirStartDate', {parse_iso8601}),
|
||||||
|
'thumbnail': (('image', 'coverImageUrl'), {url_or_none}, any),
|
||||||
|
'timestamp': ('published', {parse_iso8601}),
|
||||||
|
})),
|
||||||
|
}
|
||||||
|
@ -8,6 +8,8 @@
|
|||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
|
make_archive_id,
|
||||||
|
orderedSet,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
remove_end,
|
remove_end,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
@ -16,6 +18,7 @@
|
|||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
|
variadic,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -495,7 +498,7 @@ def _real_extract(self, url):
|
|||||||
chapters = None
|
chapters = None
|
||||||
if chapter_durations and chapter_titles and len(chapter_durations) == len(chapter_titles):
|
if chapter_durations and chapter_titles and len(chapter_durations) == len(chapter_titles):
|
||||||
start_time = chapter_durations
|
start_time = chapter_durations
|
||||||
end_time = chapter_durations[1:] + [duration]
|
end_time = [*chapter_durations[1:], duration]
|
||||||
chapters = [{
|
chapters = [{
|
||||||
'start_time': s,
|
'start_time': s,
|
||||||
'end_time': e,
|
'end_time': e,
|
||||||
@ -591,102 +594,179 @@ class NhkRadiruIE(InfoExtractor):
|
|||||||
IE_DESC = 'NHK らじる (Radiru/Rajiru)'
|
IE_DESC = 'NHK らじる (Radiru/Rajiru)'
|
||||||
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
|
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_4003239',
|
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=LG96ZW5KZ4_01_4251382',
|
||||||
'skip': 'Episode expired on 2024-06-09',
|
'skip': 'Episode expires on 2025-07-14',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'ジャズ・トゥナイト ジャズ「Night and Day」特集',
|
'title': 'クラシックの庭\u3000特集「ドボルザークを聴く」(1)交響曲を中心に',
|
||||||
'id': '0449_01_4003239',
|
'id': 'LG96ZW5KZ4_01_4251382',
|
||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'uploader': 'NHK FM 東京',
|
'description': 'md5:652d3c38a25b77959c716421eba1617a',
|
||||||
'description': 'md5:ad05f3c3f3f6e99b2e69f9b5e49551dc',
|
'uploader': 'NHK FM・東京',
|
||||||
'series': 'ジャズ・トゥナイト',
|
'channel': 'NHK FM・東京',
|
||||||
'channel': 'NHK FM 東京',
|
'duration': 6597.0,
|
||||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
|
'thumbnail': 'https://www.nhk.jp/static/assets/images/radioseries/rs/LG96ZW5KZ4/LG96ZW5KZ4-eyecatch_a67c6e949325016c0724f2ed3eec8a2f.jpg',
|
||||||
'upload_date': '20240601',
|
'categories': ['音楽', 'クラシック・オペラ'],
|
||||||
'series_id': '0449_01',
|
'cast': ['田添菜穂子'],
|
||||||
'release_date': '20240601',
|
'series': 'クラシックの庭',
|
||||||
'timestamp': 1717257600,
|
'series_id': 'LG96ZW5KZ4',
|
||||||
'release_timestamp': 1717250400,
|
'episode': '特集「ドボルザークを聴く」(1)交響曲を中心に',
|
||||||
|
'episode_id': 'QP1Q2ZXZY3',
|
||||||
|
'timestamp': 1751871000,
|
||||||
|
'upload_date': '20250707',
|
||||||
|
'release_timestamp': 1751864403,
|
||||||
|
'release_date': '20250707',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# playlist, airs every weekday so it should _hopefully_ be okay forever
|
# playlist, airs every weekday so it should _hopefully_ be okay forever
|
||||||
'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=0458_01',
|
'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=Z9L1V2M24L_01',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0458_01',
|
'id': 'Z9L1V2M24L_01',
|
||||||
'title': 'ベストオブクラシック',
|
'title': 'ベストオブクラシック',
|
||||||
'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。',
|
'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。',
|
||||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg',
|
'thumbnail': 'https://www.nhk.jp/static/assets/images/radioseries/rs/Z9L1V2M24L/Z9L1V2M24L-eyecatch_83ed28b4782907998875965fee60a351.jpg',
|
||||||
'series_id': '0458_01',
|
'series_id': 'Z9L1V2M24L_01',
|
||||||
'uploader': 'NHK FM',
|
'uploader': 'NHK FM',
|
||||||
'channel': 'NHK FM',
|
'channel': 'NHK FM',
|
||||||
'series': 'ベストオブクラシック',
|
'series': 'ベストオブクラシック',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 3,
|
'playlist_mincount': 3,
|
||||||
}, {
|
|
||||||
# one with letters in the id
|
|
||||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F683_01_3910688',
|
|
||||||
'note': 'Expires on 2025-03-31',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'F683_01_3910688',
|
|
||||||
'ext': 'm4a',
|
|
||||||
'title': '夏目漱石「文鳥」第1回',
|
|
||||||
'series': '【らじる文庫】夏目漱石「文鳥」(全4回)',
|
|
||||||
'series_id': 'F683_01',
|
|
||||||
'description': '朗読:浅井理アナウンサー',
|
|
||||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F683/img/roudoku_05_rod_640.jpg',
|
|
||||||
'upload_date': '20240106',
|
|
||||||
'release_date': '20240106',
|
|
||||||
'uploader': 'NHK R1',
|
|
||||||
'release_timestamp': 1704511800,
|
|
||||||
'channel': 'NHK R1',
|
|
||||||
'timestamp': 1704512700,
|
|
||||||
},
|
|
||||||
'expected_warnings': ['Unable to download JSON metadata',
|
|
||||||
'Failed to get extended metadata. API returned Error 1: Invalid parameters'],
|
|
||||||
}, {
|
}, {
|
||||||
# news
|
# news
|
||||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_4012173',
|
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=18439M2W42_02_4251212',
|
||||||
|
'skip': 'Expires on 2025-07-15',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'F261_01_4012173',
|
'id': '18439M2W42_02_4251212',
|
||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'channel': 'NHKラジオ第1',
|
'title': 'マイあさ! 午前5時のNHKニュース 2025年7月8日',
|
||||||
'uploader': 'NHKラジオ第1',
|
'uploader': 'NHKラジオ第1',
|
||||||
|
'channel': 'NHKラジオ第1',
|
||||||
|
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/18439M2W42/img/series_945_thumbnail.jpg',
|
||||||
'series': 'NHKラジオニュース',
|
'series': 'NHKラジオニュース',
|
||||||
'title': '午前0時のNHKニュース',
|
'timestamp': 1751919420,
|
||||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
|
'upload_date': '20250707',
|
||||||
'release_timestamp': 1718290800,
|
'release_timestamp': 1751918400,
|
||||||
'release_date': '20240613',
|
'release_date': '20250707',
|
||||||
'timestamp': 1718291400,
|
|
||||||
'upload_date': '20240613',
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# fallback when extended metadata fails
|
# fallback when extended metadata fails
|
||||||
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=2834_01_4009298',
|
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=J8792PY43V_20_4253945',
|
||||||
'skip': 'Expires on 2024-06-07',
|
'skip': 'Expires on 2025-09-01',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2834_01_4009298',
|
'id': 'J8792PY43V_20_4253945',
|
||||||
'title': 'まち☆キラ!開成町特集',
|
|
||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'release_date': '20240531',
|
'title': '「後絶たない筋肉増強剤の使用」ワールドリポート',
|
||||||
'upload_date': '20240531',
|
'description': '大濱 敦(ソウル支局)',
|
||||||
'series': 'はま☆キラ!',
|
'uploader': 'NHK R1',
|
||||||
'thumbnail': 'https://www.nhk.or.jp/prog/img/2834/g2834.jpg',
|
'channel': 'NHK R1',
|
||||||
'channel': 'NHK R1,FM',
|
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/J8792PY43V/img/corner/box_31_thumbnail.jpg',
|
||||||
'description': '',
|
'series': 'マイあさ! ワールドリポート',
|
||||||
'timestamp': 1717123800,
|
'series_id': 'J8792PY43V_20',
|
||||||
'uploader': 'NHK R1,FM',
|
'timestamp': 1751837100,
|
||||||
'release_timestamp': 1717120800,
|
'upload_date': '20250706',
|
||||||
'series_id': '2834_01',
|
'release_timestamp': 1751835600,
|
||||||
|
'release_date': '20250706',
|
||||||
|
|
||||||
},
|
},
|
||||||
'expected_warnings': ['Failed to get extended metadata. API returned empty list.'],
|
'expected_warnings': ['Failed to download extended metadata: HTTP Error 404: Not Found'],
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_API_URL_TMPL = None
|
_API_URL_TMPL = None
|
||||||
|
|
||||||
|
# The `_format_*` and `_make_*` functions are ported from: https://www.nhk.or.jp/radio/assets/js/timetable_detail_new.js
|
||||||
|
|
||||||
|
def _format_act_list(self, act_list):
|
||||||
|
role_groups = {}
|
||||||
|
for act in traverse_obj(act_list, (..., {dict})):
|
||||||
|
role = act.get('role')
|
||||||
|
if role not in role_groups:
|
||||||
|
role_groups[role] = []
|
||||||
|
role_groups[role].append(act)
|
||||||
|
|
||||||
|
formatted_roles = []
|
||||||
|
for role, acts in role_groups.items():
|
||||||
|
for i, act in enumerate(acts):
|
||||||
|
res = f'【{role}】' if i == 0 and role is not None else ''
|
||||||
|
if title := act.get('title'):
|
||||||
|
res += f'{title}…'
|
||||||
|
formatted_roles.append(join_nonempty(res, act.get('name'), delim=''))
|
||||||
|
return join_nonempty(*formatted_roles, delim=',')
|
||||||
|
|
||||||
|
def _make_artists(self, track, key):
|
||||||
|
artists = []
|
||||||
|
for artist in traverse_obj(track, (key, ..., {dict})):
|
||||||
|
if res := join_nonempty(*traverse_obj(artist, ((
|
||||||
|
('role', filter, {'{}…'.format}),
|
||||||
|
('part', filter, {'({})'.format}),
|
||||||
|
('name', filter),
|
||||||
|
), {str})), delim=''):
|
||||||
|
artists.append(res)
|
||||||
|
|
||||||
|
return '、'.join(artists) or None
|
||||||
|
|
||||||
|
def _make_duration(self, track, key):
|
||||||
|
d = traverse_obj(track, (key, {parse_duration}))
|
||||||
|
if d is None:
|
||||||
|
return None
|
||||||
|
hours, remainder = divmod(d, 3600)
|
||||||
|
minutes, seconds = divmod(remainder, 60)
|
||||||
|
res = '('
|
||||||
|
if hours > 0:
|
||||||
|
res += f'{int(hours)}時間'
|
||||||
|
if minutes > 0:
|
||||||
|
res += f'{int(minutes)}分'
|
||||||
|
res += f'{int(seconds):02}秒)'
|
||||||
|
return res
|
||||||
|
|
||||||
|
def _format_music_list(self, music_list):
|
||||||
|
tracks = []
|
||||||
|
for track in traverse_obj(music_list, (..., {dict})):
|
||||||
|
track_details = traverse_obj(track, ((
|
||||||
|
('name', filter, {'「{}」'.format}),
|
||||||
|
('lyricist', filter, {'{}:作詞'.format}),
|
||||||
|
('composer', filter, {'{}:作曲'.format}),
|
||||||
|
('arranger', filter, {'{}:編曲'.format}),
|
||||||
|
), {str}))
|
||||||
|
|
||||||
|
track_details.append(self._make_artists(track, 'byArtist'))
|
||||||
|
track_details.append(self._make_duration(track, 'duration'))
|
||||||
|
|
||||||
|
if label := join_nonempty('label', 'code', delim=' ', from_dict=track):
|
||||||
|
track_details.append(f'<{label}>')
|
||||||
|
if location := traverse_obj(track, ('location', {str})):
|
||||||
|
track_details.append(f'~{location}~')
|
||||||
|
tracks.append(join_nonempty(*track_details, delim='\n'))
|
||||||
|
return '\n\n'.join(tracks)
|
||||||
|
|
||||||
|
def _format_description(self, response):
|
||||||
|
detailed_description = traverse_obj(response, ('detailedDescription', {dict})) or {}
|
||||||
|
return join_nonempty(
|
||||||
|
join_nonempty('epg80', 'epg200', delim='\n\n', from_dict=detailed_description),
|
||||||
|
traverse_obj(response, ('misc', 'actList', {self._format_act_list})),
|
||||||
|
traverse_obj(response, ('misc', 'musicList', {self._format_music_list})),
|
||||||
|
delim='\n\n')
|
||||||
|
|
||||||
|
def _get_thumbnails(self, data, keys, name=None, preference=-1):
|
||||||
|
thumbnails = []
|
||||||
|
for size, thumb in traverse_obj(data, (
|
||||||
|
*variadic(keys, (str, bytes, dict, set)), {dict.items},
|
||||||
|
lambda _, v: v[0] != 'copyright' and url_or_none(v[1]['url']),
|
||||||
|
)):
|
||||||
|
thumbnails.append({
|
||||||
|
'url': thumb['url'],
|
||||||
|
'width': int_or_none(thumb.get('width')),
|
||||||
|
'height': int_or_none(thumb.get('height')),
|
||||||
|
'preference': preference,
|
||||||
|
'id': join_nonempty(name, size),
|
||||||
|
})
|
||||||
|
preference -= 1
|
||||||
|
return thumbnails
|
||||||
|
|
||||||
def _extract_extended_metadata(self, episode_id, aa_vinfo):
|
def _extract_extended_metadata(self, episode_id, aa_vinfo):
|
||||||
service, _, area = traverse_obj(aa_vinfo, (2, {str}, {lambda x: (x or '').partition(',')}))
|
service, _, area = traverse_obj(aa_vinfo, (2, {str}, {lambda x: (x or '').partition(',')}))
|
||||||
|
date_id = aa_vinfo[3]
|
||||||
|
|
||||||
detail_url = try_call(
|
detail_url = try_call(
|
||||||
lambda: self._API_URL_TMPL.format(area=area, service=service, dateid=aa_vinfo[3]))
|
lambda: self._API_URL_TMPL.format(broadcastEventId=join_nonempty(service, area, date_id)))
|
||||||
if not detail_url:
|
if not detail_url:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
@ -699,36 +779,37 @@ def _extract_extended_metadata(self, episode_id, aa_vinfo):
|
|||||||
if error := traverse_obj(response, ('error', {dict})):
|
if error := traverse_obj(response, ('error', {dict})):
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'Failed to get extended metadata. API returned '
|
'Failed to get extended metadata. API returned '
|
||||||
f'Error {join_nonempty("code", "message", from_dict=error, delim=": ")}')
|
f'Error {join_nonempty("statuscode", "message", from_dict=error, delim=": ")}')
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
full_meta = traverse_obj(response, ('list', service, 0, {dict}))
|
station = traverse_obj(response, ('publishedOn', 'broadcastDisplayName', {str}))
|
||||||
if not full_meta:
|
|
||||||
self.report_warning('Failed to get extended metadata. API returned empty list.')
|
|
||||||
return {}
|
|
||||||
|
|
||||||
station = ' '.join(traverse_obj(full_meta, (('service', 'area'), 'name', {str}))) or None
|
thumbnails = []
|
||||||
thumbnails = [{
|
thumbnails.extend(self._get_thumbnails(response, ('about', 'eyecatch')))
|
||||||
'id': str(id_),
|
for num, dct in enumerate(traverse_obj(response, ('about', 'eyecatchList', ...))):
|
||||||
'preference': 1 if id_.startswith('thumbnail') else -2 if id_.startswith('logo') else -1,
|
thumbnails.extend(self._get_thumbnails(dct, None, join_nonempty('list', num), -2))
|
||||||
**traverse_obj(thumb, {
|
thumbnails.extend(
|
||||||
'url': 'url',
|
self._get_thumbnails(response, ('about', 'partOfSeries', 'eyecatch'), 'series', -3))
|
||||||
'width': ('width', {int_or_none}),
|
|
||||||
'height': ('height', {int_or_none}),
|
|
||||||
}),
|
|
||||||
} for id_, thumb in traverse_obj(full_meta, ('images', {dict.items}, lambda _, v: v[1]['url']))]
|
|
||||||
|
|
||||||
return filter_dict({
|
return filter_dict({
|
||||||
|
'description': self._format_description(response),
|
||||||
|
'cast': traverse_obj(response, ('misc', 'actList', ..., 'name', {str})),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
**traverse_obj(response, {
|
||||||
|
'title': ('name', {str}),
|
||||||
|
'timestamp': ('endDate', {unified_timestamp}),
|
||||||
|
'release_timestamp': ('startDate', {unified_timestamp}),
|
||||||
|
'duration': ('duration', {parse_duration}),
|
||||||
|
}),
|
||||||
|
**traverse_obj(response, ('identifierGroup', {
|
||||||
|
'series': ('radioSeriesName', {str}),
|
||||||
|
'series_id': ('radioSeriesId', {str}),
|
||||||
|
'episode': ('radioEpisodeName', {str}),
|
||||||
|
'episode_id': ('radioEpisodeId', {str}),
|
||||||
|
'categories': ('genre', ..., ['name1', 'name2'], {str}, all, {orderedSet}),
|
||||||
|
})),
|
||||||
'channel': station,
|
'channel': station,
|
||||||
'uploader': station,
|
'uploader': station,
|
||||||
'description': join_nonempty(
|
|
||||||
'subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta),
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
**traverse_obj(full_meta, {
|
|
||||||
'title': ('title', {str}),
|
|
||||||
'timestamp': ('end_time', {unified_timestamp}),
|
|
||||||
'release_timestamp': ('start_time', {unified_timestamp}),
|
|
||||||
}),
|
|
||||||
})
|
})
|
||||||
|
|
||||||
def _extract_episode_info(self, episode, programme_id, series_meta):
|
def _extract_episode_info(self, episode, programme_id, series_meta):
|
||||||
@ -782,7 +863,9 @@ def _real_extract(self, url):
|
|||||||
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
|
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
|
||||||
programme_id = f'{site_id}_{corner_id}'
|
programme_id = f'{site_id}_{corner_id}'
|
||||||
|
|
||||||
if site_id == 'F261': # XXX: News programmes use old API (for now?)
|
# XXX: News programmes use the old API
|
||||||
|
# Can't move this to NhkRadioNewsPageIE because news items still use the normal URL format
|
||||||
|
if site_id == '18439M2W42':
|
||||||
meta = self._download_json(
|
meta = self._download_json(
|
||||||
'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json', programme_id)['main']
|
'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json', programme_id)['main']
|
||||||
series_meta = traverse_obj(meta, {
|
series_meta = traverse_obj(meta, {
|
||||||
@ -843,8 +926,8 @@ class NhkRadioNewsPageIE(InfoExtractor):
|
|||||||
'url': 'https://www.nhk.or.jp/radionews/',
|
'url': 'https://www.nhk.or.jp/radionews/',
|
||||||
'playlist_mincount': 5,
|
'playlist_mincount': 5,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'F261_01',
|
'id': '18439M2W42_01',
|
||||||
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
|
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/18439M2W42/img/series_945_thumbnail.jpg',
|
||||||
'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d',
|
'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d',
|
||||||
'channel': 'NHKラジオ第1',
|
'channel': 'NHKラジオ第1',
|
||||||
'uploader': 'NHKラジオ第1',
|
'uploader': 'NHKラジオ第1',
|
||||||
@ -853,7 +936,7 @@ class NhkRadioNewsPageIE(InfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=F261_01', NhkRadiruIE)
|
return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=18439M2W42_01', NhkRadiruIE)
|
||||||
|
|
||||||
|
|
||||||
class NhkRadiruLiveIE(InfoExtractor):
|
class NhkRadiruLiveIE(InfoExtractor):
|
||||||
@ -863,11 +946,12 @@ class NhkRadiruLiveIE(InfoExtractor):
|
|||||||
# radio 1, no area specified
|
# radio 1, no area specified
|
||||||
'url': 'https://www.nhk.or.jp/radio/player/?ch=r1',
|
'url': 'https://www.nhk.or.jp/radio/player/?ch=r1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'r1-tokyo',
|
'id': 'bs-r1-130',
|
||||||
'title': 're:^NHKネットラジオ第1 東京.+$',
|
'title': 're:^NHKラジオ第1・東京.+$',
|
||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'thumbnail': 'https://www.nhk.or.jp/common/img/media/r1-200x200.png',
|
'thumbnail': 'https://www.nhk.jp/assets/images/broadcastservice/bs/r1/r1-logo.svg',
|
||||||
'live_status': 'is_live',
|
'live_status': 'is_live',
|
||||||
|
'_old_archive_ids': ['nhkradirulive r1-tokyo'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# radio 2, area specified
|
# radio 2, area specified
|
||||||
@ -875,26 +959,28 @@ class NhkRadiruLiveIE(InfoExtractor):
|
|||||||
'url': 'https://www.nhk.or.jp/radio/player/?ch=r2',
|
'url': 'https://www.nhk.or.jp/radio/player/?ch=r2',
|
||||||
'params': {'extractor_args': {'nhkradirulive': {'area': ['fukuoka']}}},
|
'params': {'extractor_args': {'nhkradirulive': {'area': ['fukuoka']}}},
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'r2-fukuoka',
|
'id': 'bs-r2-400',
|
||||||
'title': 're:^NHKネットラジオ第2 福岡.+$',
|
'title': 're:^NHKラジオ第2.+$',
|
||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'thumbnail': 'https://www.nhk.or.jp/common/img/media/r2-200x200.png',
|
'thumbnail': 'https://www.nhk.jp/assets/images/broadcastservice/bs/r2/r2-logo.svg',
|
||||||
'live_status': 'is_live',
|
'live_status': 'is_live',
|
||||||
|
'_old_archive_ids': ['nhkradirulive r2-fukuoka'],
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# fm, area specified
|
# fm, area specified
|
||||||
'url': 'https://www.nhk.or.jp/radio/player/?ch=fm',
|
'url': 'https://www.nhk.or.jp/radio/player/?ch=fm',
|
||||||
'params': {'extractor_args': {'nhkradirulive': {'area': ['sapporo']}}},
|
'params': {'extractor_args': {'nhkradirulive': {'area': ['sapporo']}}},
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'fm-sapporo',
|
'id': 'bs-r3-010',
|
||||||
'title': 're:^NHKネットラジオFM 札幌.+$',
|
'title': 're:^NHK FM・札幌.+$',
|
||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'thumbnail': 'https://www.nhk.or.jp/common/img/media/fm-200x200.png',
|
'thumbnail': 'https://www.nhk.jp/assets/images/broadcastservice/bs/r3/r3-logo.svg',
|
||||||
'live_status': 'is_live',
|
'live_status': 'is_live',
|
||||||
|
'_old_archive_ids': ['nhkradirulive fm-sapporo'],
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_NOA_STATION_IDS = {'r1': 'n1', 'r2': 'n2', 'fm': 'n3'}
|
_NOA_STATION_IDS = {'r1': 'r1', 'r2': 'r2', 'fm': 'r3'}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
station = self._match_id(url)
|
station = self._match_id(url)
|
||||||
@ -911,12 +997,15 @@ def _real_extract(self, url):
|
|||||||
noa_info = self._download_json(
|
noa_info = self._download_json(
|
||||||
f'https:{config.find(".//url_program_noa").text}'.format(area=data.find('areakey').text),
|
f'https:{config.find(".//url_program_noa").text}'.format(area=data.find('areakey').text),
|
||||||
station, note=f'Downloading {area} station metadata', fatal=False)
|
station, note=f'Downloading {area} station metadata', fatal=False)
|
||||||
present_info = traverse_obj(noa_info, ('nowonair_list', self._NOA_STATION_IDS.get(station), 'present'))
|
broadcast_service = traverse_obj(noa_info, (self._NOA_STATION_IDS.get(station), 'publishedOn'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'title': ' '.join(traverse_obj(present_info, (('service', 'area'), 'name', {str}))),
|
**traverse_obj(broadcast_service, {
|
||||||
'id': join_nonempty(station, area),
|
'title': ('broadcastDisplayName', {str}),
|
||||||
'thumbnails': traverse_obj(present_info, ('service', 'images', ..., {
|
'id': ('id', {str}),
|
||||||
|
}),
|
||||||
|
'_old_archive_ids': [make_archive_id(self, join_nonempty(station, area))],
|
||||||
|
'thumbnails': traverse_obj(broadcast_service, ('logo', ..., {
|
||||||
'url': 'url',
|
'url': 'url',
|
||||||
'width': ('width', {int_or_none}),
|
'width': ('width', {int_or_none}),
|
||||||
'height': ('height', {int_or_none}),
|
'height': ('height', {int_or_none}),
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
@ -61,10 +60,10 @@ def _real_extract(self, url):
|
|||||||
post = self._download_json(
|
post = self._download_json(
|
||||||
'https://9gag.com/v1/post', post_id, query={
|
'https://9gag.com/v1/post', post_id, query={
|
||||||
'id': post_id,
|
'id': post_id,
|
||||||
})['data']['post']
|
}, impersonate=True)['data']['post']
|
||||||
|
|
||||||
if post.get('type') != 'Animated':
|
if post.get('type') != 'Animated':
|
||||||
raise ExtractorError(
|
self.raise_no_formats(
|
||||||
'The given url does not contain a video',
|
'The given url does not contain a video',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
|
@ -101,7 +101,7 @@ def _real_extract(self, url):
|
|||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}), impersonate=True)
|
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}), impersonate=True)
|
||||||
data = self._search_json(
|
data = self._search_json(
|
||||||
r'var\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id,
|
r'(?:var|const|let)\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id,
|
||||||
contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];',
|
contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];',
|
||||||
transform_source=lambda x: base64.b64decode(x).decode())
|
transform_source=lambda x: base64.b64decode(x).decode())
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..networking.exceptions import HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
UserNotLive,
|
UserNotLive,
|
||||||
@ -188,19 +189,39 @@ def _get_thumbnails(self, thumbnail):
|
|||||||
}] if thumbnail else None
|
}] if thumbnail else None
|
||||||
|
|
||||||
def _extract_twitch_m3u8_formats(self, path, video_id, token, signature, live_from_start=False):
|
def _extract_twitch_m3u8_formats(self, path, video_id, token, signature, live_from_start=False):
|
||||||
formats = self._extract_m3u8_formats(
|
try:
|
||||||
f'{self._USHER_BASE}/{path}/{video_id}.m3u8', video_id, 'mp4', query={
|
formats = self._extract_m3u8_formats(
|
||||||
'allow_source': 'true',
|
f'{self._USHER_BASE}/{path}/{video_id}.m3u8', video_id, 'mp4', query={
|
||||||
'allow_audio_only': 'true',
|
'allow_source': 'true',
|
||||||
'allow_spectre': 'true',
|
'allow_audio_only': 'true',
|
||||||
'p': random.randint(1000000, 10000000),
|
'allow_spectre': 'true',
|
||||||
'platform': 'web',
|
'p': random.randint(1000000, 10000000),
|
||||||
'player': 'twitchweb',
|
'platform': 'web',
|
||||||
'supported_codecs': 'av1,h265,h264',
|
'player': 'twitchweb',
|
||||||
'playlist_include_framerate': 'true',
|
'supported_codecs': 'av1,h265,h264',
|
||||||
'sig': signature,
|
'playlist_include_framerate': 'true',
|
||||||
'token': token,
|
'sig': signature,
|
||||||
})
|
'token': token,
|
||||||
|
})
|
||||||
|
except ExtractorError as e:
|
||||||
|
if (
|
||||||
|
not isinstance(e.cause, HTTPError)
|
||||||
|
or e.cause.status != 403
|
||||||
|
or e.cause.response.get_header('content-type') != 'application/json'
|
||||||
|
):
|
||||||
|
raise
|
||||||
|
|
||||||
|
error_info = traverse_obj(e.cause.response.read(), ({json.loads}, 0, {dict})) or {}
|
||||||
|
if error_info.get('error_code') in ('vod_manifest_restricted', 'unauthorized_entitlements'):
|
||||||
|
common_msg = 'access to this subscriber-only content'
|
||||||
|
if self._get_cookies('https://gql.twitch.tv').get('auth-token'):
|
||||||
|
raise ExtractorError(f'Your account does not have {common_msg}', expected=True)
|
||||||
|
self.raise_login_required(f'You must be logged into an account that has {common_msg}')
|
||||||
|
|
||||||
|
if error_msg := join_nonempty('error_code', 'error', from_dict=error_info, delim=': '):
|
||||||
|
raise ExtractorError(error_msg, expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
for fmt in formats:
|
for fmt in formats:
|
||||||
if fmt.get('vcodec') and fmt['vcodec'].startswith('av01'):
|
if fmt.get('vcodec') and fmt['vcodec'].startswith('av01'):
|
||||||
# mpegts does not yet have proper support for av1
|
# mpegts does not yet have proper support for av1
|
||||||
|
@ -317,17 +317,31 @@ def _extract_lockup_view_model(self, view_model):
|
|||||||
content_id = view_model.get('contentId')
|
content_id = view_model.get('contentId')
|
||||||
if not content_id:
|
if not content_id:
|
||||||
return
|
return
|
||||||
|
|
||||||
content_type = view_model.get('contentType')
|
content_type = view_model.get('contentType')
|
||||||
if content_type not in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'):
|
if content_type == 'LOCKUP_CONTENT_TYPE_VIDEO':
|
||||||
|
ie = YoutubeIE
|
||||||
|
url = f'https://www.youtube.com/watch?v={content_id}'
|
||||||
|
thumb_keys = (None,)
|
||||||
|
elif content_type in ('LOCKUP_CONTENT_TYPE_PLAYLIST', 'LOCKUP_CONTENT_TYPE_PODCAST'):
|
||||||
|
ie = YoutubeTabIE
|
||||||
|
url = f'https://www.youtube.com/playlist?list={content_id}'
|
||||||
|
thumb_keys = ('collectionThumbnailViewModel', 'primaryThumbnail')
|
||||||
|
else:
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
f'Unsupported lockup view model content type "{content_type}"{bug_reports_message()}', only_once=True)
|
f'Unsupported lockup view model content type "{content_type}"{bug_reports_message()}',
|
||||||
|
only_once=True)
|
||||||
return
|
return
|
||||||
|
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
f'https://www.youtube.com/playlist?list={content_id}', ie=YoutubeTabIE, video_id=content_id,
|
url, ie, content_id,
|
||||||
title=traverse_obj(view_model, (
|
title=traverse_obj(view_model, (
|
||||||
'metadata', 'lockupMetadataViewModel', 'title', 'content', {str})),
|
'metadata', 'lockupMetadataViewModel', 'title', 'content', {str})),
|
||||||
thumbnails=self._extract_thumbnails(view_model, (
|
thumbnails=self._extract_thumbnails(view_model, (
|
||||||
'contentImage', 'collectionThumbnailViewModel', 'primaryThumbnail', 'thumbnailViewModel', 'image'), final_key='sources'))
|
'contentImage', *thumb_keys, 'thumbnailViewModel', 'image'), final_key='sources'),
|
||||||
|
duration=traverse_obj(view_model, (
|
||||||
|
'contentImage', 'thumbnailViewModel', 'overlays', ..., 'thumbnailOverlayBadgeViewModel',
|
||||||
|
'thumbnailBadges', ..., 'thumbnailBadgeViewModel', 'text', {parse_duration}, any)))
|
||||||
|
|
||||||
def _rich_entries(self, rich_grid_renderer):
|
def _rich_entries(self, rich_grid_renderer):
|
||||||
if lockup_view_model := traverse_obj(rich_grid_renderer, ('content', 'lockupViewModel', {dict})):
|
if lockup_view_model := traverse_obj(rich_grid_renderer, ('content', 'lockupViewModel', {dict})):
|
||||||
|
@ -26,7 +26,7 @@
|
|||||||
from .pot._director import initialize_pot_director
|
from .pot._director import initialize_pot_director
|
||||||
from .pot.provider import PoTokenContext, PoTokenRequest
|
from .pot.provider import PoTokenContext, PoTokenRequest
|
||||||
from ..openload import PhantomJSwrapper
|
from ..openload import PhantomJSwrapper
|
||||||
from ...jsinterp import JSInterpreter
|
from ...jsinterp import JSInterpreter, LocalNameSpace
|
||||||
from ...networking.exceptions import HTTPError
|
from ...networking.exceptions import HTTPError
|
||||||
from ...utils import (
|
from ...utils import (
|
||||||
NO_DEFAULT,
|
NO_DEFAULT,
|
||||||
@ -1801,6 +1801,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'tablet': 'player-plasma-ias-tablet-en_US.vflset/base.js',
|
'tablet': 'player-plasma-ias-tablet-en_US.vflset/base.js',
|
||||||
}
|
}
|
||||||
_INVERSE_PLAYER_JS_VARIANT_MAP = {v: k for k, v in _PLAYER_JS_VARIANT_MAP.items()}
|
_INVERSE_PLAYER_JS_VARIANT_MAP = {v: k for k, v in _PLAYER_JS_VARIANT_MAP.items()}
|
||||||
|
_NSIG_FUNC_CACHE_ID = 'nsig func'
|
||||||
|
_DUMMY_STRING = 'dlp_wins'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
@ -2204,7 +2206,7 @@ def _decrypt_nsig(self, s, video_id, player_url):
|
|||||||
self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
|
self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
|
extract_nsig = self._cached(self._extract_n_function_from_code, self._NSIG_FUNC_CACHE_ID, player_url)
|
||||||
ret = extract_nsig(jsi, func_code)(s)
|
ret = extract_nsig(jsi, func_code)(s)
|
||||||
except JSInterpreter.Exception as e:
|
except JSInterpreter.Exception as e:
|
||||||
try:
|
try:
|
||||||
@ -2312,16 +2314,18 @@ def _interpret_player_js_global_var(self, jscode, player_url):
|
|||||||
|
|
||||||
jsi = JSInterpreter(varcode)
|
jsi = JSInterpreter(varcode)
|
||||||
interpret_global_var = self._cached(jsi.interpret_expression, 'js global list', player_url)
|
interpret_global_var = self._cached(jsi.interpret_expression, 'js global list', player_url)
|
||||||
return varname, interpret_global_var(varvalue, {}, allow_recursion=10)
|
return varname, interpret_global_var(varvalue, LocalNameSpace(), allow_recursion=10)
|
||||||
|
|
||||||
def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
|
def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
|
||||||
|
# Fixup global array
|
||||||
varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
|
varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
|
||||||
if varname and global_list:
|
if varname and global_list:
|
||||||
nsig_code = f'var {varname}={json.dumps(global_list)}; {nsig_code}'
|
nsig_code = f'var {varname}={json.dumps(global_list)}; {nsig_code}'
|
||||||
else:
|
else:
|
||||||
varname = 'dlp_wins'
|
varname = self._DUMMY_STRING
|
||||||
global_list = []
|
global_list = []
|
||||||
|
|
||||||
|
# Fixup typeof check
|
||||||
undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+'
|
undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+'
|
||||||
fixed_code = re.sub(
|
fixed_code = re.sub(
|
||||||
fr'''(?x)
|
fr'''(?x)
|
||||||
@ -2334,6 +2338,32 @@ def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
|
|||||||
self.write_debug(join_nonempty(
|
self.write_debug(join_nonempty(
|
||||||
'No typeof statement found in nsig function code',
|
'No typeof statement found in nsig function code',
|
||||||
player_url and f' player = {player_url}', delim='\n'), only_once=True)
|
player_url and f' player = {player_url}', delim='\n'), only_once=True)
|
||||||
|
|
||||||
|
# Fixup global funcs
|
||||||
|
jsi = JSInterpreter(fixed_code)
|
||||||
|
cache_id = (self._NSIG_FUNC_CACHE_ID, player_url)
|
||||||
|
try:
|
||||||
|
self._cached(
|
||||||
|
self._extract_n_function_from_code, *cache_id)(jsi, (argnames, fixed_code))(self._DUMMY_STRING)
|
||||||
|
except JSInterpreter.Exception:
|
||||||
|
self._player_cache.pop(cache_id, None)
|
||||||
|
|
||||||
|
global_funcnames = jsi._undefined_varnames
|
||||||
|
debug_names = []
|
||||||
|
jsi = JSInterpreter(jscode)
|
||||||
|
for func_name in global_funcnames:
|
||||||
|
try:
|
||||||
|
func_args, func_code = jsi.extract_function_code(func_name)
|
||||||
|
fixed_code = f'var {func_name} = function({", ".join(func_args)}) {{ {func_code} }}; {fixed_code}'
|
||||||
|
debug_names.append(func_name)
|
||||||
|
except Exception:
|
||||||
|
self.report_warning(join_nonempty(
|
||||||
|
f'Unable to extract global nsig function {func_name} from player JS',
|
||||||
|
player_url and f' player = {player_url}', delim='\n'), only_once=True)
|
||||||
|
|
||||||
|
if debug_names:
|
||||||
|
self.write_debug(f'Extracted global nsig functions: {", ".join(debug_names)}')
|
||||||
|
|
||||||
return argnames, fixed_code
|
return argnames, fixed_code
|
||||||
|
|
||||||
def _extract_n_function_code(self, video_id, player_url):
|
def _extract_n_function_code(self, video_id, player_url):
|
||||||
@ -2347,7 +2377,7 @@ def _extract_n_function_code(self, video_id, player_url):
|
|||||||
|
|
||||||
func_name = self._extract_n_function_name(jscode, player_url=player_url)
|
func_name = self._extract_n_function_name(jscode, player_url=player_url)
|
||||||
|
|
||||||
# XXX: Workaround for the global array variable and lack of `typeof` implementation
|
# XXX: Work around (a) global array variable, (b) `typeof` short-circuit, (c) global functions
|
||||||
func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url)
|
func_code = self._fixup_n_function_code(*jsi.extract_function_code(func_name), jscode, player_url)
|
||||||
|
|
||||||
return jsi, player_id, func_code
|
return jsi, player_id, func_code
|
||||||
@ -3243,6 +3273,10 @@ def append_client(*client_names):
|
|||||||
# web_creator may work around age-verification for all videos but requires PO token
|
# web_creator may work around age-verification for all videos but requires PO token
|
||||||
append_client('tv_embedded', 'web_creator')
|
append_client('tv_embedded', 'web_creator')
|
||||||
|
|
||||||
|
status = traverse_obj(pr, ('playabilityStatus', 'status', {str}))
|
||||||
|
if status not in ('OK', 'LIVE_STREAM_OFFLINE', 'AGE_CHECK_REQUIRED', 'AGE_VERIFICATION_REQUIRED'):
|
||||||
|
self.write_debug(f'{video_id}: {client} player response playability status: {status}')
|
||||||
|
|
||||||
prs.extend(deprioritized_prs)
|
prs.extend(deprioritized_prs)
|
||||||
|
|
||||||
if skipped_clients:
|
if skipped_clients:
|
||||||
@ -3948,7 +3982,9 @@ def get_lang_code(track):
|
|||||||
def process_language(container, base_url, lang_code, sub_name, client_name, query):
|
def process_language(container, base_url, lang_code, sub_name, client_name, query):
|
||||||
lang_subs = container.setdefault(lang_code, [])
|
lang_subs = container.setdefault(lang_code, [])
|
||||||
for fmt in self._SUBTITLE_FORMATS:
|
for fmt in self._SUBTITLE_FORMATS:
|
||||||
query = {**query, 'fmt': fmt}
|
# xosf=1 results in undesirable text position data for vtt, json3 & srv* subtitles
|
||||||
|
# See: https://github.com/yt-dlp/yt-dlp/issues/13654
|
||||||
|
query = {**query, 'fmt': fmt, 'xosf': []}
|
||||||
lang_subs.append({
|
lang_subs.append({
|
||||||
'ext': fmt,
|
'ext': fmt,
|
||||||
'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
|
'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
|
||||||
|
@ -222,6 +222,14 @@ def __setitem__(self, key, value):
|
|||||||
def __delitem__(self, key):
|
def __delitem__(self, key):
|
||||||
raise NotImplementedError('Deleting is not supported')
|
raise NotImplementedError('Deleting is not supported')
|
||||||
|
|
||||||
|
def set_local(self, key, value):
|
||||||
|
self.maps[0][key] = value
|
||||||
|
|
||||||
|
def get_local(self, key):
|
||||||
|
if key in self.maps[0]:
|
||||||
|
return self.maps[0][key]
|
||||||
|
return JS_Undefined
|
||||||
|
|
||||||
|
|
||||||
class Debugger:
|
class Debugger:
|
||||||
import sys
|
import sys
|
||||||
@ -271,6 +279,7 @@ class JSInterpreter:
|
|||||||
def __init__(self, code, objects=None):
|
def __init__(self, code, objects=None):
|
||||||
self.code, self._functions = code, {}
|
self.code, self._functions = code, {}
|
||||||
self._objects = {} if objects is None else objects
|
self._objects = {} if objects is None else objects
|
||||||
|
self._undefined_varnames = set()
|
||||||
|
|
||||||
class Exception(ExtractorError): # noqa: A001
|
class Exception(ExtractorError): # noqa: A001
|
||||||
def __init__(self, msg, expr=None, *args, **kwargs):
|
def __init__(self, msg, expr=None, *args, **kwargs):
|
||||||
@ -381,7 +390,7 @@ def _dump(self, obj, namespace):
|
|||||||
return self._named_object(namespace, obj)
|
return self._named_object(namespace, obj)
|
||||||
|
|
||||||
@Debugger.wrap_interpreter
|
@Debugger.wrap_interpreter
|
||||||
def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
def interpret_statement(self, stmt, local_vars, allow_recursion=100, _is_var_declaration=False):
|
||||||
if allow_recursion < 0:
|
if allow_recursion < 0:
|
||||||
raise self.Exception('Recursion limit reached')
|
raise self.Exception('Recursion limit reached')
|
||||||
allow_recursion -= 1
|
allow_recursion -= 1
|
||||||
@ -401,6 +410,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
|
|||||||
if m.group('throw'):
|
if m.group('throw'):
|
||||||
raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion))
|
raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion))
|
||||||
should_return = not m.group('var')
|
should_return = not m.group('var')
|
||||||
|
_is_var_declaration = _is_var_declaration or bool(m.group('var'))
|
||||||
if not expr:
|
if not expr:
|
||||||
return None, should_return
|
return None, should_return
|
||||||
|
|
||||||
@ -585,7 +595,8 @@ def dict_item(key, val):
|
|||||||
sub_expressions = list(self._separate(expr))
|
sub_expressions = list(self._separate(expr))
|
||||||
if len(sub_expressions) > 1:
|
if len(sub_expressions) > 1:
|
||||||
for sub_expr in sub_expressions:
|
for sub_expr in sub_expressions:
|
||||||
ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
|
ret, should_abort = self.interpret_statement(
|
||||||
|
sub_expr, local_vars, allow_recursion, _is_var_declaration=_is_var_declaration)
|
||||||
if should_abort:
|
if should_abort:
|
||||||
return ret, True
|
return ret, True
|
||||||
return ret, False
|
return ret, False
|
||||||
@ -599,8 +610,12 @@ def dict_item(key, val):
|
|||||||
left_val = local_vars.get(m.group('out'))
|
left_val = local_vars.get(m.group('out'))
|
||||||
|
|
||||||
if not m.group('index'):
|
if not m.group('index'):
|
||||||
local_vars[m.group('out')] = self._operator(
|
eval_result = self._operator(
|
||||||
m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
|
m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion)
|
||||||
|
if _is_var_declaration:
|
||||||
|
local_vars.set_local(m.group('out'), eval_result)
|
||||||
|
else:
|
||||||
|
local_vars[m.group('out')] = eval_result
|
||||||
return local_vars[m.group('out')], should_return
|
return local_vars[m.group('out')], should_return
|
||||||
elif left_val in (None, JS_Undefined):
|
elif left_val in (None, JS_Undefined):
|
||||||
raise self.Exception(f'Cannot index undefined variable {m.group("out")}', expr)
|
raise self.Exception(f'Cannot index undefined variable {m.group("out")}', expr)
|
||||||
@ -654,7 +669,19 @@ def dict_item(key, val):
|
|||||||
return float('NaN'), should_return
|
return float('NaN'), should_return
|
||||||
|
|
||||||
elif m and m.group('return'):
|
elif m and m.group('return'):
|
||||||
return local_vars.get(m.group('name'), JS_Undefined), should_return
|
var = m.group('name')
|
||||||
|
# Declared variables
|
||||||
|
if _is_var_declaration:
|
||||||
|
ret = local_vars.get_local(var)
|
||||||
|
# Register varname in local namespace
|
||||||
|
# Set value as JS_Undefined or its pre-existing value
|
||||||
|
local_vars.set_local(var, ret)
|
||||||
|
else:
|
||||||
|
ret = local_vars.get(var, NO_DEFAULT)
|
||||||
|
if ret is NO_DEFAULT:
|
||||||
|
ret = JS_Undefined
|
||||||
|
self._undefined_varnames.add(var)
|
||||||
|
return ret, should_return
|
||||||
|
|
||||||
with contextlib.suppress(ValueError):
|
with contextlib.suppress(ValueError):
|
||||||
return json.loads(js_to_json(expr, strict=True)), should_return
|
return json.loads(js_to_json(expr, strict=True)), should_return
|
||||||
|
@ -140,6 +140,12 @@ def __init__(self, res: requests.models.Response):
|
|||||||
|
|
||||||
def read(self, amt: int | None = None):
|
def read(self, amt: int | None = None):
|
||||||
try:
|
try:
|
||||||
|
# Work around issue with `.read(amt)` then `.read()`
|
||||||
|
# See: https://github.com/urllib3/urllib3/issues/3636
|
||||||
|
if amt is None:
|
||||||
|
# Python 3.9 preallocates the whole read buffer, read in chunks
|
||||||
|
read_chunk = functools.partial(self.fp.read, 1 << 20, decode_content=True)
|
||||||
|
return b''.join(iter(read_chunk, b''))
|
||||||
# Interact with urllib3 response directly.
|
# Interact with urllib3 response directly.
|
||||||
return self.fp.read(amt, decode_content=True)
|
return self.fp.read(amt, decode_content=True)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user