mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-11-01 15:15:15 +00:00
Standardize retry mechanism (#1649)
* [utils] Create `RetryManager` * Migrate all retries to use the manager * [extractor] Add wrapper methods for convenience * Standardize console messages for retries * Add `--retry-sleep` for extractors
This commit is contained in:
@@ -28,7 +28,6 @@ from ..utils import (
|
||||
clean_html,
|
||||
datetime_from_str,
|
||||
dict_get,
|
||||
error_to_compat_str,
|
||||
float_or_none,
|
||||
format_field,
|
||||
get_first,
|
||||
@@ -45,7 +44,6 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
qualities,
|
||||
remove_end,
|
||||
remove_start,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
@@ -763,74 +761,54 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
|
||||
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
|
||||
default_client='web'):
|
||||
response = None
|
||||
last_error = None
|
||||
count = -1
|
||||
retries = self.get_param('extractor_retries', 3)
|
||||
if check_get_keys is None:
|
||||
check_get_keys = []
|
||||
while count < retries:
|
||||
count += 1
|
||||
if last_error:
|
||||
self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
|
||||
for retry in self.RetryManager():
|
||||
try:
|
||||
response = self._call_api(
|
||||
ep=ep, fatal=True, headers=headers,
|
||||
video_id=item_id, query=query,
|
||||
video_id=item_id, query=query, note=note,
|
||||
context=self._extract_context(ytcfg, default_client),
|
||||
api_key=self._extract_api_key(ytcfg, default_client),
|
||||
api_hostname=api_hostname, default_client=default_client,
|
||||
note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
|
||||
api_hostname=api_hostname, default_client=default_client)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, network_exceptions):
|
||||
if isinstance(e.cause, urllib.error.HTTPError):
|
||||
first_bytes = e.cause.read(512)
|
||||
if not is_html(first_bytes):
|
||||
yt_error = try_get(
|
||||
self._parse_json(
|
||||
self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
|
||||
lambda x: x['error']['message'], str)
|
||||
if yt_error:
|
||||
self._report_alerts([('ERROR', yt_error)], fatal=False)
|
||||
# Downloading page may result in intermittent 5xx HTTP error
|
||||
# Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
|
||||
# We also want to catch all other network exceptions since errors in later pages can be troublesome
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
|
||||
if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
|
||||
last_error = error_to_compat_str(e.cause or e.msg)
|
||||
if count < retries:
|
||||
continue
|
||||
if fatal:
|
||||
raise
|
||||
else:
|
||||
self.report_warning(error_to_compat_str(e))
|
||||
return
|
||||
if not isinstance(e.cause, network_exceptions):
|
||||
return self._error_or_warning(e, fatal=fatal)
|
||||
elif not isinstance(e.cause, urllib.error.HTTPError):
|
||||
retry.error = e
|
||||
continue
|
||||
|
||||
else:
|
||||
try:
|
||||
self._extract_and_report_alerts(response, only_once=True)
|
||||
except ExtractorError as e:
|
||||
# YouTube servers may return errors we want to retry on in a 200 OK response
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/839
|
||||
if 'unknown error' in e.msg.lower():
|
||||
last_error = e.msg
|
||||
continue
|
||||
if fatal:
|
||||
raise
|
||||
self.report_warning(error_to_compat_str(e))
|
||||
return
|
||||
if not check_get_keys or dict_get(response, check_get_keys):
|
||||
break
|
||||
# Youtube sometimes sends incomplete data
|
||||
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
|
||||
last_error = 'Incomplete data received'
|
||||
if count >= retries:
|
||||
if fatal:
|
||||
raise ExtractorError(last_error)
|
||||
else:
|
||||
self.report_warning(last_error)
|
||||
return
|
||||
return response
|
||||
first_bytes = e.cause.read(512)
|
||||
if not is_html(first_bytes):
|
||||
yt_error = try_get(
|
||||
self._parse_json(
|
||||
self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
|
||||
lambda x: x['error']['message'], str)
|
||||
if yt_error:
|
||||
self._report_alerts([('ERROR', yt_error)], fatal=False)
|
||||
# Downloading page may result in intermittent 5xx HTTP error
|
||||
# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
|
||||
# We also want to catch all other network exceptions since errors in later pages can be troublesome
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
|
||||
if e.cause.code not in (403, 429):
|
||||
retry.error = e
|
||||
continue
|
||||
return self._error_or_warning(e, fatal=fatal)
|
||||
|
||||
try:
|
||||
self._extract_and_report_alerts(response, only_once=True)
|
||||
except ExtractorError as e:
|
||||
# YouTube servers may return errors we want to retry on in a 200 OK response
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/839
|
||||
if 'unknown error' in e.msg.lower():
|
||||
retry.error = e
|
||||
continue
|
||||
return self._error_or_warning(e, fatal=fatal)
|
||||
# Youtube sometimes sends incomplete data
|
||||
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
|
||||
if not traverse_obj(response, *variadic(check_get_keys)):
|
||||
retry.error = ExtractorError('Incomplete data received')
|
||||
continue
|
||||
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def is_music_url(url):
|
||||
@@ -4522,48 +4500,30 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
|
||||
|
||||
def _extract_webpage(self, url, item_id, fatal=True):
|
||||
retries = self.get_param('extractor_retries', 3)
|
||||
count = -1
|
||||
webpage = data = last_error = None
|
||||
while count < retries:
|
||||
count += 1
|
||||
# Sometimes youtube returns a webpage with incomplete ytInitialData
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/116
|
||||
if last_error:
|
||||
self.report_warning('%s. Retrying ...' % last_error)
|
||||
webpage, data = None, None
|
||||
for retry in self.RetryManager(fatal=fatal):
|
||||
try:
|
||||
webpage = self._download_webpage(
|
||||
url, item_id,
|
||||
note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
|
||||
webpage = self._download_webpage(url, item_id, note='Downloading webpage')
|
||||
data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, network_exceptions):
|
||||
if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
|
||||
last_error = error_to_compat_str(e.cause or e.msg)
|
||||
if count < retries:
|
||||
continue
|
||||
if fatal:
|
||||
raise
|
||||
self.report_warning(error_to_compat_str(e))
|
||||
retry.error = e
|
||||
continue
|
||||
self._error_or_warning(e, fatal=fatal)
|
||||
break
|
||||
else:
|
||||
try:
|
||||
self._extract_and_report_alerts(data)
|
||||
except ExtractorError as e:
|
||||
if fatal:
|
||||
raise
|
||||
self.report_warning(error_to_compat_str(e))
|
||||
break
|
||||
|
||||
if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):
|
||||
break
|
||||
try:
|
||||
self._extract_and_report_alerts(data)
|
||||
except ExtractorError as e:
|
||||
self._error_or_warning(e, fatal=fatal)
|
||||
break
|
||||
|
||||
last_error = 'Incomplete yt initial data received'
|
||||
if count >= retries:
|
||||
if fatal:
|
||||
raise ExtractorError(last_error)
|
||||
self.report_warning(last_error)
|
||||
break
|
||||
# Sometimes youtube returns a webpage with incomplete ytInitialData
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/116
|
||||
if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
|
||||
retry.error = ExtractorError('Incomplete yt initial data received')
|
||||
continue
|
||||
|
||||
return webpage, data
|
||||
|
||||
|
||||
Reference in New Issue
Block a user