From 500761e41acb96953a5064e951d41d190c287e46 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 30 Jun 2025 18:06:22 -0500 Subject: [PATCH] [ie] Fix m3u8 playlist data corruption (#13588) Revert 7b81634fb1d15999757e7a9883daa6ef09ea785b Closes #13581 Authored by: bashonly --- test/test_InfoExtractor.py | 52 -------------------------------------- yt_dlp/extractor/common.py | 33 ++++++++---------------- 2 files changed, 10 insertions(+), 75 deletions(-) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index c9f70431f..e6c8d574e 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -36,18 +36,6 @@ def do_GET(self): self.send_header('Content-Type', 'text/html; charset=utf-8') self.end_headers() self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) - elif self.path == '/fake.m3u8': - self.send_response(200) - self.send_header('Content-Length', '1024') - self.end_headers() - self.wfile.write(1024 * b'\x00') - elif self.path == '/bipbop.m3u8': - with open('test/testdata/m3u8/bipbop_16x9.m3u8', 'rb') as f: - data = f.read() - self.send_response(200) - self.send_header('Content-Length', str(len(data))) - self.end_headers() - self.wfile.write(data) else: assert False @@ -2091,45 +2079,5 @@ def test_search_nuxt_json(self): self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT) -class TestInfoExtractorNetwork(unittest.TestCase): - def setUp(self, /): - self.httpd = http.server.HTTPServer( - ('127.0.0.1', 0), InfoExtractorTestRequestHandler) - self.port = http_server_port(self.httpd) - - self.server_thread = threading.Thread(target=self.httpd.serve_forever) - self.server_thread.daemon = True - self.server_thread.start() - - self.called = False - - def require_warning(*args, **kwargs): - self.called = True - - self.ydl = FakeYDL() - self.ydl.report_warning = require_warning - self.ie = DummyIE(self.ydl) - - def tearDown(self, /): - self.ydl.close() - self.httpd.shutdown() - self.httpd.server_close() - self.server_thread.join(1) - - def test_extract_m3u8_formats(self): - formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles( - f'http://127.0.0.1:{self.port}/bipbop.m3u8', None, fatal=False) - self.assertFalse(self.called) - self.assertTrue(formats) - self.assertTrue(subtitles) - - def test_extract_m3u8_formats_warning(self): - formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles( - f'http://127.0.0.1:{self.port}/fake.m3u8', None, fatal=False) - self.assertTrue(self.called, 'Warning was not issued for binary m3u8 file') - self.assertFalse(formats) - self.assertFalse(subtitles) - - if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index b75e80623..32b4680b7 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1,6 +1,5 @@ import base64 import collections -import contextlib import functools import getpass import http.client @@ -2130,33 +2129,21 @@ def _extract_m3u8_formats_and_subtitles( raise ExtractorError(errnote, video_id=video_id) self.report_warning(f'{errnote}{bug_reports_message()}') return [], {} - if note is None: - note = 'Downloading m3u8 information' - if errnote is None: - errnote = 'Failed to download m3u8 information' - response = self._request_webpage( - m3u8_url, video_id, note=note, errnote=errnote, + + res = self._download_webpage_handle( + m3u8_url, video_id, + note='Downloading m3u8 information' if note is None else note, + errnote='Failed to download m3u8 information' if errnote is None else errnote, fatal=fatal, data=data, headers=headers, query=query) - if response is False: + + if res is False: return [], {} - with contextlib.closing(response): - prefix = response.read(512) - if not prefix.startswith(b'#EXTM3U'): - msg = 'Response data has no m3u header' - if fatal: - raise ExtractorError(msg, video_id=video_id) - self.report_warning(f'{msg}{bug_reports_message()}', video_id=video_id) - return [], {} - - content = self._webpage_read_content( - response, m3u8_url, video_id, note=note, errnote=errnote, - fatal=fatal, prefix=prefix, data=data) - if content is False: - return [], {} + m3u8_doc, urlh = res + m3u8_url = urlh.url return self._parse_m3u8_formats_and_subtitles( - content, response.url, ext=ext, entry_protocol=entry_protocol, + m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol, preference=preference, quality=quality, m3u8_id=m3u8_id, note=note, errnote=errnote, fatal=fatal, live=live, data=data, headers=headers, query=query, video_id=video_id)