mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[extractor/common] Ensure response handle is not prematurely closed before it can be read if it matches expected_status (resolves #17195, closes #17846, resolves #17447)
This commit is contained in:
		| @@ -7,6 +7,7 @@ import json | |||||||
| import os.path | import os.path | ||||||
| import re | import re | ||||||
| import types | import types | ||||||
|  | import ssl | ||||||
| import sys | import sys | ||||||
|  |  | ||||||
| import youtube_dl.extractor | import youtube_dl.extractor | ||||||
| @@ -244,3 +245,12 @@ def expect_warnings(ydl, warnings_re): | |||||||
|             real_warning(w) |             real_warning(w) | ||||||
|  |  | ||||||
|     ydl.report_warning = _report_warning |     ydl.report_warning = _report_warning | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def http_server_port(httpd): | ||||||
|  |     if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): | ||||||
|  |         # In Jython SSLSocket is not a subclass of socket.socket | ||||||
|  |         sock = httpd.socket.sock | ||||||
|  |     else: | ||||||
|  |         sock = httpd.socket | ||||||
|  |     return sock.getsockname()[1] | ||||||
|   | |||||||
| @@ -9,11 +9,30 @@ import sys | |||||||
| import unittest | import unittest | ||||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||||
|  |  | ||||||
| from test.helper import FakeYDL, expect_dict, expect_value | from test.helper import FakeYDL, expect_dict, expect_value, http_server_port | ||||||
| from youtube_dl.compat import compat_etree_fromstring | from youtube_dl.compat import compat_etree_fromstring, compat_http_server | ||||||
| from youtube_dl.extractor.common import InfoExtractor | from youtube_dl.extractor.common import InfoExtractor | ||||||
| from youtube_dl.extractor import YoutubeIE, get_info_extractor | from youtube_dl.extractor import YoutubeIE, get_info_extractor | ||||||
| from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError | from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError | ||||||
|  | import threading | ||||||
|  |  | ||||||
|  |  | ||||||
|  | TEAPOT_RESPONSE_STATUS = 418 | ||||||
|  | TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>" | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): | ||||||
|  |     def log_message(self, format, *args): | ||||||
|  |         pass | ||||||
|  |  | ||||||
|  |     def do_GET(self): | ||||||
|  |         if self.path == '/teapot': | ||||||
|  |             self.send_response(TEAPOT_RESPONSE_STATUS) | ||||||
|  |             self.send_header('Content-Type', 'text/html; charset=utf-8') | ||||||
|  |             self.end_headers() | ||||||
|  |             self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) | ||||||
|  |         else: | ||||||
|  |             assert False | ||||||
|  |  | ||||||
|  |  | ||||||
| class TestIE(InfoExtractor): | class TestIE(InfoExtractor): | ||||||
| @@ -743,6 +762,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ | |||||||
|                 for i in range(len(entries)): |                 for i in range(len(entries)): | ||||||
|                     expect_dict(self, entries[i], expected_entries[i]) |                     expect_dict(self, entries[i], expected_entries[i]) | ||||||
|  |  | ||||||
|  |     def test_response_with_expected_status_returns_content(self): | ||||||
|  |         # Checks for mitigations against the effects of | ||||||
|  |         # <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which | ||||||
|  |         # manifest as `_download_webpage`, `_download_xml`, `_download_json`, | ||||||
|  |         # or the underlying `_download_webpage_handle` returning no content | ||||||
|  |         # when a response matches `expected_status`. | ||||||
|  |  | ||||||
|  |         httpd = compat_http_server.HTTPServer( | ||||||
|  |             ('127.0.0.1', 0), InfoExtractorTestRequestHandler) | ||||||
|  |         port = http_server_port(httpd) | ||||||
|  |         server_thread = threading.Thread(target=httpd.serve_forever) | ||||||
|  |         server_thread.daemon = True | ||||||
|  |         server_thread.start() | ||||||
|  |  | ||||||
|  |         (content, urlh) = self.ie._download_webpage_handle( | ||||||
|  |             'http://127.0.0.1:%d/teapot' % port, None, | ||||||
|  |             expected_status=TEAPOT_RESPONSE_STATUS) | ||||||
|  |         self.assertEqual(content, TEAPOT_RESPONSE_BODY) | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -9,26 +9,16 @@ import sys | |||||||
| import unittest | import unittest | ||||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||||
|  |  | ||||||
| from test.helper import try_rm | from test.helper import http_server_port, try_rm | ||||||
| from youtube_dl import YoutubeDL | from youtube_dl import YoutubeDL | ||||||
| from youtube_dl.compat import compat_http_server | from youtube_dl.compat import compat_http_server | ||||||
| from youtube_dl.downloader.http import HttpFD | from youtube_dl.downloader.http import HttpFD | ||||||
| from youtube_dl.utils import encodeFilename | from youtube_dl.utils import encodeFilename | ||||||
| import ssl |  | ||||||
| import threading | import threading | ||||||
|  |  | ||||||
| TEST_DIR = os.path.dirname(os.path.abspath(__file__)) | TEST_DIR = os.path.dirname(os.path.abspath(__file__)) | ||||||
|  |  | ||||||
|  |  | ||||||
| def http_server_port(httpd): |  | ||||||
|     if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): |  | ||||||
|         # In Jython SSLSocket is not a subclass of socket.socket |  | ||||||
|         sock = httpd.socket.sock |  | ||||||
|     else: |  | ||||||
|         sock = httpd.socket |  | ||||||
|     return sock.getsockname()[1] |  | ||||||
|  |  | ||||||
|  |  | ||||||
| TEST_SIZE = 10 * 1024 | TEST_SIZE = 10 * 1024 | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -8,6 +8,7 @@ import sys | |||||||
| import unittest | import unittest | ||||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||||
|  |  | ||||||
|  | from test.helper import http_server_port | ||||||
| from youtube_dl import YoutubeDL | from youtube_dl import YoutubeDL | ||||||
| from youtube_dl.compat import compat_http_server, compat_urllib_request | from youtube_dl.compat import compat_http_server, compat_urllib_request | ||||||
| import ssl | import ssl | ||||||
| @@ -16,15 +17,6 @@ import threading | |||||||
| TEST_DIR = os.path.dirname(os.path.abspath(__file__)) | TEST_DIR = os.path.dirname(os.path.abspath(__file__)) | ||||||
|  |  | ||||||
|  |  | ||||||
| def http_server_port(httpd): |  | ||||||
|     if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): |  | ||||||
|         # In Jython SSLSocket is not a subclass of socket.socket |  | ||||||
|         sock = httpd.socket.sock |  | ||||||
|     else: |  | ||||||
|         sock = httpd.socket |  | ||||||
|     return sock.getsockname()[1] |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): | class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): | ||||||
|     def log_message(self, format, *args): |     def log_message(self, format, *args): | ||||||
|         pass |         pass | ||||||
|   | |||||||
| @@ -606,6 +606,11 @@ class InfoExtractor(object): | |||||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: |         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||||
|             if isinstance(err, compat_urllib_error.HTTPError): |             if isinstance(err, compat_urllib_error.HTTPError): | ||||||
|                 if self.__can_accept_status_code(err, expected_status): |                 if self.__can_accept_status_code(err, expected_status): | ||||||
|  |                     # Retain reference to error to prevent file object from | ||||||
|  |                     # being closed before it can be read. Works around the | ||||||
|  |                     # effects of <https://bugs.python.org/issue15002> | ||||||
|  |                     # introduced in Python 3.4.1. | ||||||
|  |                     err.fp._error = err | ||||||
|                     return err.fp |                     return err.fp | ||||||
|  |  | ||||||
|             if errnote is False: |             if errnote is False: | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Xiao Di Guan
					Xiao Di Guan