diff --git a/devscripts/bash-completion.in b/devscripts/bash-completion.in
index 21f52798ed..bb66c20956 100644
--- a/devscripts/bash-completion.in
+++ b/devscripts/bash-completion.in
@@ -10,9 +10,13 @@ __yt_dlp()
diropts="--cache-dir"
if [[ ${prev} =~ ${fileopts} ]]; then
+ local IFS=$'\n'
+ type compopt &>/dev/null && compopt -o filenames
COMPREPLY=( $(compgen -f -- ${cur}) )
return 0
elif [[ ${prev} =~ ${diropts} ]]; then
+ local IFS=$'\n'
+ type compopt &>/dev/null && compopt -o dirnames
COMPREPLY=( $(compgen -d -- ${cur}) )
return 0
fi
diff --git a/pyproject.toml b/pyproject.toml
index 3775251e10..41d5ec3b0f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -75,7 +75,7 @@ dev = [
]
static-analysis = [
"autopep8~=2.0",
- "ruff~=0.11.0",
+ "ruff~=0.12.0",
]
test = [
"pytest~=8.1",
@@ -210,10 +210,12 @@ ignore = [
"TD001", # invalid-todo-tag
"TD002", # missing-todo-author
"TD003", # missing-todo-link
+ "PLC0415", # import-outside-top-level
"PLE0604", # invalid-all-object (false positives)
"PLE0643", # potential-index-error (false positives)
"PLW0603", # global-statement
"PLW1510", # subprocess-run-without-check
+ "PLW1641", # eq-without-hash
"PLW2901", # redefined-loop-name
"RUF001", # ambiguous-unicode-character-string
"RUF012", # mutable-class-default
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index e6c8d574e0..c9f70431f7 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -36,6 +36,18 @@ def do_GET(self):
self.send_header('Content-Type', 'text/html; charset=utf-8')
self.end_headers()
self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
+ elif self.path == '/fake.m3u8':
+ self.send_response(200)
+ self.send_header('Content-Length', '1024')
+ self.end_headers()
+ self.wfile.write(1024 * b'\x00')
+ elif self.path == '/bipbop.m3u8':
+ with open('test/testdata/m3u8/bipbop_16x9.m3u8', 'rb') as f:
+ data = f.read()
+ self.send_response(200)
+ self.send_header('Content-Length', str(len(data)))
+ self.end_headers()
+ self.wfile.write(data)
else:
assert False
@@ -2079,5 +2091,45 @@ def test_search_nuxt_json(self):
self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT)
+class TestInfoExtractorNetwork(unittest.TestCase):
+ def setUp(self, /):
+ self.httpd = http.server.HTTPServer(
+ ('127.0.0.1', 0), InfoExtractorTestRequestHandler)
+ self.port = http_server_port(self.httpd)
+
+ self.server_thread = threading.Thread(target=self.httpd.serve_forever)
+ self.server_thread.daemon = True
+ self.server_thread.start()
+
+ self.called = False
+
+ def require_warning(*args, **kwargs):
+ self.called = True
+
+ self.ydl = FakeYDL()
+ self.ydl.report_warning = require_warning
+ self.ie = DummyIE(self.ydl)
+
+ def tearDown(self, /):
+ self.ydl.close()
+ self.httpd.shutdown()
+ self.httpd.server_close()
+ self.server_thread.join(1)
+
+ def test_extract_m3u8_formats(self):
+ formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles(
+ f'http://127.0.0.1:{self.port}/bipbop.m3u8', None, fatal=False)
+ self.assertFalse(self.called)
+ self.assertTrue(formats)
+ self.assertTrue(subtitles)
+
+ def test_extract_m3u8_formats_warning(self):
+ formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles(
+ f'http://127.0.0.1:{self.port}/fake.m3u8', None, fatal=False)
+ self.assertTrue(self.called, 'Warning was not issued for binary m3u8 file')
+ self.assertFalse(formats)
+ self.assertFalse(subtitles)
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_download.py b/test/test_download.py
index 3f36869d9d..c7842735c2 100755
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -14,6 +14,7 @@
from test.helper import (
assertGreaterEqual,
+ assertLessEqual,
expect_info_dict,
expect_warnings,
get_params,
@@ -121,10 +122,13 @@ def print_skipping(reason):
params = get_params(test_case.get('params', {}))
params['outtmpl'] = tname + '_' + params['outtmpl']
if is_playlist and 'playlist' not in test_case:
- params.setdefault('extract_flat', 'in_playlist')
- params.setdefault('playlistend', test_case.get(
- 'playlist_mincount', test_case.get('playlist_count', -2) + 1))
+ params.setdefault('playlistend', max(
+ test_case.get('playlist_mincount', -1),
+ test_case.get('playlist_count', -2) + 1,
+ test_case.get('playlist_maxcount', -2) + 1))
params.setdefault('skip_download', True)
+ if 'playlist_duration_sum' not in test_case:
+ params.setdefault('extract_flat', 'in_playlist')
ydl = YoutubeDL(params, auto_init=False)
ydl.add_default_info_extractors()
@@ -159,6 +163,7 @@ def try_rm_tcs_files(tcs=None):
try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
try_rm_tcs_files()
try:
+ test_url = test_case['url']
try_num = 1
while True:
try:
@@ -166,7 +171,7 @@ def try_rm_tcs_files(tcs=None):
# for outside error handling, and returns the exit code
# instead of the result dict.
res_dict = ydl.extract_info(
- test_case['url'],
+ test_url,
force_generic_extractor=params.get('force_generic_extractor', False))
except (DownloadError, ExtractorError) as err:
# Check if the exception is not a network related one
@@ -194,23 +199,23 @@ def try_rm_tcs_files(tcs=None):
self.assertTrue('entries' in res_dict)
expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
+ num_entries = len(res_dict.get('entries', []))
if 'playlist_mincount' in test_case:
+ mincount = test_case['playlist_mincount']
assertGreaterEqual(
- self,
- len(res_dict['entries']),
- test_case['playlist_mincount'],
- 'Expected at least %d in playlist %s, but got only %d' % (
- test_case['playlist_mincount'], test_case['url'],
- len(res_dict['entries'])))
+ self, num_entries, mincount,
+ f'Expected at least {mincount} entries in playlist {test_url}, but got only {num_entries}')
if 'playlist_count' in test_case:
+ count = test_case['playlist_count']
+ got = num_entries if num_entries <= count else 'more'
self.assertEqual(
- len(res_dict['entries']),
- test_case['playlist_count'],
- 'Expected %d entries in playlist %s, but got %d.' % (
- test_case['playlist_count'],
- test_case['url'],
- len(res_dict['entries']),
- ))
+ num_entries, count,
+ f'Expected exactly {count} entries in playlist {test_url}, but got {got}')
+ if 'playlist_maxcount' in test_case:
+ maxcount = test_case['playlist_maxcount']
+ assertLessEqual(
+ self, num_entries, maxcount,
+ f'Expected at most {maxcount} entries in playlist {test_url}, but got more')
if 'playlist_duration_sum' in test_case:
got_duration = sum(e['duration'] for e in res_dict['entries'])
self.assertEqual(
diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 4268e890b8..43b1d0fdee 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -490,6 +490,57 @@ def test_increment_decrement(self):
self._test('function f() { var a = "test--"; return a; }', 'test--')
self._test('function f() { var b = 1; var a = "b--"; return a; }', 'b--')
+ def test_nested_function_scoping(self):
+ self._test(R'''
+ function f() {
+ var g = function() {
+ var P = 2;
+ return P;
+ };
+ var P = 1;
+ g();
+ return P;
+ }
+ ''', 1)
+ self._test(R'''
+ function f() {
+ var x = function() {
+ for (var w = 1, M = []; w < 2; w++) switch (w) {
+ case 1:
+ M.push("a");
+ case 2:
+ M.push("b");
+ }
+ return M
+ };
+ var w = "c";
+ var M = "d";
+ var y = x();
+ y.push(w);
+ y.push(M);
+ return y;
+ }
+ ''', ['a', 'b', 'c', 'd'])
+ self._test(R'''
+ function f() {
+ var P, Q;
+ var z = 100;
+ var g = function() {
+ var P, Q; P = 2; Q = 15;
+ z = 0;
+ return P+Q;
+ };
+ P = 1; Q = 10;
+ var x = g(), y = 3;
+ return P+Q+x+y+z;
+ }
+ ''', 31)
+
+ def test_undefined_varnames(self):
+ jsi = JSInterpreter('function f(){ var a; return [a, b]; }')
+ self._test(jsi, [JS_Undefined, JS_Undefined])
+ self.assertEqual(jsi._undefined_varnames, {'b'})
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_networking.py b/test/test_networking.py
index 2f441fced2..afdd0c7aa7 100644
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -22,7 +22,6 @@
import tempfile
import threading
import time
-import urllib.error
import urllib.request
import warnings
import zlib
@@ -223,10 +222,7 @@ def do_GET(self):
if encoding == 'br' and brotli:
payload = brotli.compress(payload)
elif encoding == 'gzip':
- buf = io.BytesIO()
- with gzip.GzipFile(fileobj=buf, mode='wb') as f:
- f.write(payload)
- payload = buf.getvalue()
+ payload = gzip.compress(payload, mtime=0)
elif encoding == 'deflate':
payload = zlib.compress(payload)
elif encoding == 'unsupported':
@@ -729,6 +725,17 @@ def test_keep_header_casing(self, handler):
assert 'X-test-heaDer: test' in res
+ def test_partial_read_then_full_read(self, handler):
+ with handler() as rh:
+ for encoding in ('', 'gzip', 'deflate'):
+ res = validate_and_send(rh, Request(
+ f'http://127.0.0.1:{self.http_port}/content-encoding',
+ headers={'ytdl-encoding': encoding}))
+ assert res.headers.get('Content-Encoding') == encoding
+ assert res.read(6) == b''
+ assert res.read(0) == b''
+ assert res.read() == b''
+
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
class TestClientCertificate:
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 5e67926798..4562467534 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -333,6 +333,50 @@
'https://www.youtube.com/s/player/fc2a56a5/tv-player-ias.vflset/tv-player-ias.js',
'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
),
+ (
+ 'https://www.youtube.com/s/player/a74bf670/player_ias_tce.vflset/en_US/base.js',
+ 'kM5r52fugSZRAKHfo3', 'hQP7k1hA22OrNTnq',
+ ),
+ (
+ 'https://www.youtube.com/s/player/6275f73c/player_ias_tce.vflset/en_US/base.js',
+ 'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
+ ),
+ (
+ 'https://www.youtube.com/s/player/20c72c18/player_ias_tce.vflset/en_US/base.js',
+ 'kM5r52fugSZRAKHfo3', '-I03XF0iyf6I_X0A',
+ ),
+ (
+ 'https://www.youtube.com/s/player/9fe2e06e/player_ias_tce.vflset/en_US/base.js',
+ 'kM5r52fugSZRAKHfo3', '6r5ekNIiEMPutZy',
+ ),
+ (
+ 'https://www.youtube.com/s/player/680f8c75/player_ias_tce.vflset/en_US/base.js',
+ 'kM5r52fugSZRAKHfo3', '0ml9caTwpa55Jf',
+ ),
+ (
+ 'https://www.youtube.com/s/player/14397202/player_ias_tce.vflset/en_US/base.js',
+ 'kM5r52fugSZRAKHfo3', 'ozZFAN21okDdJTa',
+ ),
+ (
+ 'https://www.youtube.com/s/player/5dcb2c1f/player_ias_tce.vflset/en_US/base.js',
+ 'kM5r52fugSZRAKHfo3', 'p7iTbRZDYAF',
+ ),
+ (
+ 'https://www.youtube.com/s/player/a10d7fcc/player_ias_tce.vflset/en_US/base.js',
+ 'kM5r52fugSZRAKHfo3', '9Zue7DDHJSD',
+ ),
+ (
+ 'https://www.youtube.com/s/player/8e20cb06/player_ias_tce.vflset/en_US/base.js',
+ 'kM5r52fugSZRAKHfo3', '5-4tTneTROTpMzba',
+ ),
+ (
+ 'https://www.youtube.com/s/player/e12fbea4/player_ias_tce.vflset/en_US/base.js',
+ 'kM5r52fugSZRAKHfo3', 'XkeRfXIPOkSwfg',
+ ),
+ (
+ 'https://www.youtube.com/s/player/ef259203/player_ias_tce.vflset/en_US/base.js',
+ 'rPqBC01nJpqhhi2iA2U', 'hY7dbiKFT51UIA',
+ ),
]
diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py
index 065901d68d..600cb12a89 100644
--- a/yt_dlp/aes.py
+++ b/yt_dlp/aes.py
@@ -435,7 +435,7 @@ def sub_bytes_inv(data):
def rotate(data):
- return data[1:] + [data[0]]
+ return [*data[1:], data[0]]
def key_schedule_core(data, rcon_iteration):
diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py
index 1f36a07f5f..2256305785 100644
--- a/yt_dlp/downloader/hls.py
+++ b/yt_dlp/downloader/hls.py
@@ -94,12 +94,19 @@ def real_download(self, filename, info_dict):
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
if can_download:
has_ffmpeg = FFmpegFD.available()
- no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s
- if no_crypto and has_ffmpeg:
- can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available'
- elif no_crypto:
- message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; '
- 'Decryption will be performed natively, but will be extremely slow')
+ if not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s:
+ # Even if pycryptodomex isn't available, force HlsFD for m3u8s that won't work with ffmpeg
+ ffmpeg_can_dl = not traverse_obj(info_dict, ((
+ 'extra_param_to_segment_url', 'extra_param_to_key_url',
+ 'hls_media_playlist_data', ('hls_aes', ('uri', 'key', 'iv')),
+ ), any))
+ message = 'The stream has AES-128 encryption and {} available'.format(
+ 'neither ffmpeg nor pycryptodomex are' if ffmpeg_can_dl and not has_ffmpeg else
+ 'pycryptodomex is not')
+ if has_ffmpeg and ffmpeg_can_dl:
+ can_download = False
+ else:
+ message += '; decryption will be performed natively, but will be extremely slow'
elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s):
install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and '
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index ada12b3a8a..84da570b0a 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -1147,6 +1147,7 @@
MindsIE,
)
from .minoto import MinotoIE
+from .mir24tv import Mir24TvIE
from .mirrativ import (
MirrativIE,
MirrativUserIE,
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index 0f5c2c97e4..0c6535fc72 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -900,7 +900,9 @@ def _real_extract(self, url):
headers=headers))
geo_blocked = traverse_obj(play_info, (
- 'raw', 'data', 'plugins', lambda _, v: v['name'] == 'AreaLimitPanel', 'config', 'is_block', {bool}, any))
+ ('result', ('raw', 'data')), 'plugins',
+ lambda _, v: v['name'] == 'AreaLimitPanel',
+ 'config', 'is_block', {bool}, any))
premium_only = play_info.get('code') == -10403
video_info = traverse_obj(play_info, (('result', ('raw', 'data')), 'video_info', {dict}, any)) or {}
@@ -914,7 +916,7 @@ def _real_extract(self, url):
if traverse_obj(play_info, ((
('result', 'play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE'
- ('raw', 'data', 'play_video_type'), # 'preview' vs 'whole'
+ (('result', ('raw', 'data')), 'play_video_type'), # 'preview' vs 'whole' vs 'none'
), any, {lambda x: x in ('PLAY_PREVIEW', 'preview')})):
self.report_warning(
'Only preview format is available, '
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 32b4680b73..b75e806233 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1,5 +1,6 @@
import base64
import collections
+import contextlib
import functools
import getpass
import http.client
@@ -2129,21 +2130,33 @@ def _extract_m3u8_formats_and_subtitles(
raise ExtractorError(errnote, video_id=video_id)
self.report_warning(f'{errnote}{bug_reports_message()}')
return [], {}
-
- res = self._download_webpage_handle(
- m3u8_url, video_id,
- note='Downloading m3u8 information' if note is None else note,
- errnote='Failed to download m3u8 information' if errnote is None else errnote,
+ if note is None:
+ note = 'Downloading m3u8 information'
+ if errnote is None:
+ errnote = 'Failed to download m3u8 information'
+ response = self._request_webpage(
+ m3u8_url, video_id, note=note, errnote=errnote,
fatal=fatal, data=data, headers=headers, query=query)
-
- if res is False:
+ if response is False:
return [], {}
- m3u8_doc, urlh = res
- m3u8_url = urlh.url
+ with contextlib.closing(response):
+ prefix = response.read(512)
+ if not prefix.startswith(b'#EXTM3U'):
+ msg = 'Response data has no m3u header'
+ if fatal:
+ raise ExtractorError(msg, video_id=video_id)
+ self.report_warning(f'{msg}{bug_reports_message()}', video_id=video_id)
+ return [], {}
+
+ content = self._webpage_read_content(
+ response, m3u8_url, video_id, note=note, errnote=errnote,
+ fatal=fatal, prefix=prefix, data=data)
+ if content is False:
+ return [], {}
return self._parse_m3u8_formats_and_subtitles(
- m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
+ content, response.url, ext=ext, entry_protocol=entry_protocol,
preference=preference, quality=quality, m3u8_id=m3u8_id,
note=note, errnote=errnote, fatal=fatal, live=live, data=data,
headers=headers, query=query, video_id=video_id)
diff --git a/yt_dlp/extractor/mir24tv.py b/yt_dlp/extractor/mir24tv.py
new file mode 100644
index 0000000000..5832901bf1
--- /dev/null
+++ b/yt_dlp/extractor/mir24tv.py
@@ -0,0 +1,37 @@
+from .common import InfoExtractor
+from ..utils import parse_qs, url_or_none
+from ..utils.traversal import require, traverse_obj
+
+
+class Mir24TvIE(InfoExtractor):
+ IE_NAME = 'mir24.tv'
+ _VALID_URL = r'https?://(?:www\.)?mir24\.tv/news/(?P[0-9]+)/[^/?#]+'
+ _TESTS = [{
+ 'url': 'https://mir24.tv/news/16635210/dni-kultury-rossii-otkrylis-v-uzbekistane.-na-prazdnichnom-koncerte-vystupili-zvezdy-rossijskoj-estrada',
+ 'info_dict': {
+ 'id': '16635210',
+ 'title': 'Дни культуры России открылись в Узбекистане. На праздничном концерте выступили звезды российской эстрады',
+ 'ext': 'mp4',
+ 'thumbnail': r're:https://images\.mir24\.tv/.+\.jpg',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id, impersonate=True)
+
+ iframe_url = self._search_regex(
+ r'