1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-11-01 15:15:15 +00:00

Standardize retry mechanism (#1649)

* [utils] Create `RetryManager`
* Migrate all retries to use the manager
* [extractor] Add wrapper methods for convenience
* Standardize console messages for retries
* Add `--retry-sleep` for extractors
This commit is contained in:
pukkandan
2022-08-02 01:43:18 +05:30
committed by GitHub
parent bfd973ece3
commit be5c1ae862
15 changed files with 256 additions and 277 deletions

View File

@@ -28,7 +28,6 @@ from ..utils import (
clean_html,
datetime_from_str,
dict_get,
error_to_compat_str,
float_or_none,
format_field,
get_first,
@@ -45,7 +44,6 @@ from ..utils import (
parse_iso8601,
parse_qs,
qualities,
remove_end,
remove_start,
smuggle_url,
str_or_none,
@@ -763,74 +761,54 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
default_client='web'):
response = None
last_error = None
count = -1
retries = self.get_param('extractor_retries', 3)
if check_get_keys is None:
check_get_keys = []
while count < retries:
count += 1
if last_error:
self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
for retry in self.RetryManager():
try:
response = self._call_api(
ep=ep, fatal=True, headers=headers,
video_id=item_id, query=query,
video_id=item_id, query=query, note=note,
context=self._extract_context(ytcfg, default_client),
api_key=self._extract_api_key(ytcfg, default_client),
api_hostname=api_hostname, default_client=default_client,
note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
api_hostname=api_hostname, default_client=default_client)
except ExtractorError as e:
if isinstance(e.cause, network_exceptions):
if isinstance(e.cause, urllib.error.HTTPError):
first_bytes = e.cause.read(512)
if not is_html(first_bytes):
yt_error = try_get(
self._parse_json(
self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
lambda x: x['error']['message'], str)
if yt_error:
self._report_alerts([('ERROR', yt_error)], fatal=False)
# Downloading page may result in intermittent 5xx HTTP error
# Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
last_error = error_to_compat_str(e.cause or e.msg)
if count < retries:
continue
if fatal:
raise
else:
self.report_warning(error_to_compat_str(e))
return
if not isinstance(e.cause, network_exceptions):
return self._error_or_warning(e, fatal=fatal)
elif not isinstance(e.cause, urllib.error.HTTPError):
retry.error = e
continue
else:
try:
self._extract_and_report_alerts(response, only_once=True)
except ExtractorError as e:
# YouTube servers may return errors we want to retry on in a 200 OK response
# See: https://github.com/yt-dlp/yt-dlp/issues/839
if 'unknown error' in e.msg.lower():
last_error = e.msg
continue
if fatal:
raise
self.report_warning(error_to_compat_str(e))
return
if not check_get_keys or dict_get(response, check_get_keys):
break
# Youtube sometimes sends incomplete data
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
last_error = 'Incomplete data received'
if count >= retries:
if fatal:
raise ExtractorError(last_error)
else:
self.report_warning(last_error)
return
return response
first_bytes = e.cause.read(512)
if not is_html(first_bytes):
yt_error = try_get(
self._parse_json(
self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
lambda x: x['error']['message'], str)
if yt_error:
self._report_alerts([('ERROR', yt_error)], fatal=False)
# Downloading page may result in intermittent 5xx HTTP error
# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
if e.cause.code not in (403, 429):
retry.error = e
continue
return self._error_or_warning(e, fatal=fatal)
try:
self._extract_and_report_alerts(response, only_once=True)
except ExtractorError as e:
# YouTube servers may return errors we want to retry on in a 200 OK response
# See: https://github.com/yt-dlp/yt-dlp/issues/839
if 'unknown error' in e.msg.lower():
retry.error = e
continue
return self._error_or_warning(e, fatal=fatal)
# Youtube sometimes sends incomplete data
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
if not traverse_obj(response, *variadic(check_get_keys)):
retry.error = ExtractorError('Incomplete data received')
continue
return response
@staticmethod
def is_music_url(url):
@@ -4522,48 +4500,30 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
def _extract_webpage(self, url, item_id, fatal=True):
retries = self.get_param('extractor_retries', 3)
count = -1
webpage = data = last_error = None
while count < retries:
count += 1
# Sometimes youtube returns a webpage with incomplete ytInitialData
# See: https://github.com/yt-dlp/yt-dlp/issues/116
if last_error:
self.report_warning('%s. Retrying ...' % last_error)
webpage, data = None, None
for retry in self.RetryManager(fatal=fatal):
try:
webpage = self._download_webpage(
url, item_id,
note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
webpage = self._download_webpage(url, item_id, note='Downloading webpage')
data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
except ExtractorError as e:
if isinstance(e.cause, network_exceptions):
if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
last_error = error_to_compat_str(e.cause or e.msg)
if count < retries:
continue
if fatal:
raise
self.report_warning(error_to_compat_str(e))
retry.error = e
continue
self._error_or_warning(e, fatal=fatal)
break
else:
try:
self._extract_and_report_alerts(data)
except ExtractorError as e:
if fatal:
raise
self.report_warning(error_to_compat_str(e))
break
if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):
break
try:
self._extract_and_report_alerts(data)
except ExtractorError as e:
self._error_or_warning(e, fatal=fatal)
break
last_error = 'Incomplete yt initial data received'
if count >= retries:
if fatal:
raise ExtractorError(last_error)
self.report_warning(last_error)
break
# Sometimes youtube returns a webpage with incomplete ytInitialData
# See: https://github.com/yt-dlp/yt-dlp/issues/116
if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
retry.error = ExtractorError('Incomplete yt initial data received')
continue
return webpage, data