mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-01-31 11:06:54 +00:00
Standardize retry mechanism (#1649)
* [utils] Create `RetryManager` * Migrate all retries to use the manager * [extractor] Add wrapper methods for convenience * Standardize console messages for retries * Add `--retry-sleep` for extractors
This commit is contained in:
@@ -32,6 +32,7 @@ from ..utils import (
|
||||
GeoUtils,
|
||||
LenientJSONDecoder,
|
||||
RegexNotFoundError,
|
||||
RetryManager,
|
||||
UnsupportedError,
|
||||
age_restricted,
|
||||
base_url,
|
||||
@@ -3848,6 +3849,13 @@ class InfoExtractor:
|
||||
self.to_screen(f'Downloading {playlist_label}{playlist_id} - add --no-playlist to download just the {video_label}{video_id}')
|
||||
return True
|
||||
|
||||
def _error_or_warning(self, err, _count=None, _retries=0, *, fatal=True):
|
||||
RetryManager.report_retry(err, _count or int(fatal), _retries, info=self.to_screen, warn=self.report_warning,
|
||||
sleep_func=self.get_param('retry_sleep_functions', {}).get('extractor'))
|
||||
|
||||
def RetryManager(self, **kwargs):
|
||||
return RetryManager(self.get_param('extractor_retries', 3), self._error_or_warning, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def extract_from_webpage(cls, ydl, url, webpage):
|
||||
ie = (cls if isinstance(cls._extract_from_webpage, types.MethodType)
|
||||
|
||||
@@ -19,7 +19,6 @@ from ..utils import (
|
||||
int_or_none,
|
||||
KNOWN_EXTENSIONS,
|
||||
mimetype2ext,
|
||||
remove_end,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
try_get,
|
||||
@@ -661,25 +660,20 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudBaseIE):
|
||||
'offset': 0,
|
||||
}
|
||||
|
||||
retries = self.get_param('extractor_retries', 3)
|
||||
|
||||
for i in itertools.count():
|
||||
attempt, last_error = -1, None
|
||||
while attempt < retries:
|
||||
attempt += 1
|
||||
if last_error:
|
||||
self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'), playlist_id)
|
||||
for retry in self.RetryManager():
|
||||
try:
|
||||
response = self._download_json(
|
||||
url, playlist_id, query=query, headers=self._HEADERS,
|
||||
note='Downloading track page %s%s' % (i + 1, f' (retry #{attempt})' if attempt else ''))
|
||||
note=f'Downloading track page {i + 1}')
|
||||
break
|
||||
except ExtractorError as e:
|
||||
# Downloading page may result in intermittent 502 HTTP error
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/872
|
||||
if attempt >= retries or not isinstance(e.cause, compat_HTTPError) or e.cause.code != 502:
|
||||
if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 502:
|
||||
raise
|
||||
last_error = str(e.cause or e.msg)
|
||||
retry.error = e
|
||||
continue
|
||||
|
||||
def resolve_entry(*candidates):
|
||||
for cand in candidates:
|
||||
|
||||
@@ -630,19 +630,17 @@ class TikTokUserIE(TikTokBaseIE):
|
||||
'device_id': ''.join(random.choice(string.digits) for _ in range(19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api.
|
||||
}
|
||||
|
||||
max_retries = self.get_param('extractor_retries', 3)
|
||||
for page in itertools.count(1):
|
||||
for retries in itertools.count():
|
||||
for retry in self.RetryManager():
|
||||
try:
|
||||
post_list = self._call_api('aweme/post', query, username,
|
||||
note='Downloading user video list page %d%s' % (page, f' (attempt {retries})' if retries != 0 else ''),
|
||||
errnote='Unable to download user video list')
|
||||
post_list = self._call_api(
|
||||
'aweme/post', query, username, note=f'Downloading user video list page {page}',
|
||||
errnote='Unable to download user video list')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0 and retries != max_retries:
|
||||
self.report_warning('%s. Retrying...' % str(e.cause or e.msg))
|
||||
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
|
||||
retry.error = e
|
||||
continue
|
||||
raise
|
||||
break
|
||||
yield from post_list.get('aweme_list', [])
|
||||
if not post_list.get('has_more'):
|
||||
break
|
||||
@@ -680,19 +678,17 @@ class TikTokBaseListIE(TikTokBaseIE):
|
||||
'device_id': ''.join(random.choice(string.digits) for i in range(19))
|
||||
}
|
||||
|
||||
max_retries = self.get_param('extractor_retries', 3)
|
||||
for page in itertools.count(1):
|
||||
for retries in itertools.count():
|
||||
for retry in self.RetryManager():
|
||||
try:
|
||||
post_list = self._call_api(self._API_ENDPOINT, query, display_id,
|
||||
note='Downloading video list page %d%s' % (page, f' (attempt {retries})' if retries != 0 else ''),
|
||||
errnote='Unable to download video list')
|
||||
post_list = self._call_api(
|
||||
self._API_ENDPOINT, query, display_id, note=f'Downloading video list page {page}',
|
||||
errnote='Unable to download video list')
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0 and retries != max_retries:
|
||||
self.report_warning('%s. Retrying...' % str(e.cause or e.msg))
|
||||
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
|
||||
retry.error = e
|
||||
continue
|
||||
raise
|
||||
break
|
||||
for video in post_list.get('aweme_list', []):
|
||||
yield {
|
||||
**self._parse_aweme_video_app(video),
|
||||
|
||||
@@ -28,7 +28,6 @@ from ..utils import (
|
||||
clean_html,
|
||||
datetime_from_str,
|
||||
dict_get,
|
||||
error_to_compat_str,
|
||||
float_or_none,
|
||||
format_field,
|
||||
get_first,
|
||||
@@ -45,7 +44,6 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
qualities,
|
||||
remove_end,
|
||||
remove_start,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
@@ -763,74 +761,54 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
|
||||
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
|
||||
default_client='web'):
|
||||
response = None
|
||||
last_error = None
|
||||
count = -1
|
||||
retries = self.get_param('extractor_retries', 3)
|
||||
if check_get_keys is None:
|
||||
check_get_keys = []
|
||||
while count < retries:
|
||||
count += 1
|
||||
if last_error:
|
||||
self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
|
||||
for retry in self.RetryManager():
|
||||
try:
|
||||
response = self._call_api(
|
||||
ep=ep, fatal=True, headers=headers,
|
||||
video_id=item_id, query=query,
|
||||
video_id=item_id, query=query, note=note,
|
||||
context=self._extract_context(ytcfg, default_client),
|
||||
api_key=self._extract_api_key(ytcfg, default_client),
|
||||
api_hostname=api_hostname, default_client=default_client,
|
||||
note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
|
||||
api_hostname=api_hostname, default_client=default_client)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, network_exceptions):
|
||||
if isinstance(e.cause, urllib.error.HTTPError):
|
||||
first_bytes = e.cause.read(512)
|
||||
if not is_html(first_bytes):
|
||||
yt_error = try_get(
|
||||
self._parse_json(
|
||||
self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
|
||||
lambda x: x['error']['message'], str)
|
||||
if yt_error:
|
||||
self._report_alerts([('ERROR', yt_error)], fatal=False)
|
||||
# Downloading page may result in intermittent 5xx HTTP error
|
||||
# Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
|
||||
# We also want to catch all other network exceptions since errors in later pages can be troublesome
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
|
||||
if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
|
||||
last_error = error_to_compat_str(e.cause or e.msg)
|
||||
if count < retries:
|
||||
continue
|
||||
if fatal:
|
||||
raise
|
||||
else:
|
||||
self.report_warning(error_to_compat_str(e))
|
||||
return
|
||||
if not isinstance(e.cause, network_exceptions):
|
||||
return self._error_or_warning(e, fatal=fatal)
|
||||
elif not isinstance(e.cause, urllib.error.HTTPError):
|
||||
retry.error = e
|
||||
continue
|
||||
|
||||
else:
|
||||
try:
|
||||
self._extract_and_report_alerts(response, only_once=True)
|
||||
except ExtractorError as e:
|
||||
# YouTube servers may return errors we want to retry on in a 200 OK response
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/839
|
||||
if 'unknown error' in e.msg.lower():
|
||||
last_error = e.msg
|
||||
continue
|
||||
if fatal:
|
||||
raise
|
||||
self.report_warning(error_to_compat_str(e))
|
||||
return
|
||||
if not check_get_keys or dict_get(response, check_get_keys):
|
||||
break
|
||||
# Youtube sometimes sends incomplete data
|
||||
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
|
||||
last_error = 'Incomplete data received'
|
||||
if count >= retries:
|
||||
if fatal:
|
||||
raise ExtractorError(last_error)
|
||||
else:
|
||||
self.report_warning(last_error)
|
||||
return
|
||||
return response
|
||||
first_bytes = e.cause.read(512)
|
||||
if not is_html(first_bytes):
|
||||
yt_error = try_get(
|
||||
self._parse_json(
|
||||
self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
|
||||
lambda x: x['error']['message'], str)
|
||||
if yt_error:
|
||||
self._report_alerts([('ERROR', yt_error)], fatal=False)
|
||||
# Downloading page may result in intermittent 5xx HTTP error
|
||||
# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
|
||||
# We also want to catch all other network exceptions since errors in later pages can be troublesome
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
|
||||
if e.cause.code not in (403, 429):
|
||||
retry.error = e
|
||||
continue
|
||||
return self._error_or_warning(e, fatal=fatal)
|
||||
|
||||
try:
|
||||
self._extract_and_report_alerts(response, only_once=True)
|
||||
except ExtractorError as e:
|
||||
# YouTube servers may return errors we want to retry on in a 200 OK response
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/839
|
||||
if 'unknown error' in e.msg.lower():
|
||||
retry.error = e
|
||||
continue
|
||||
return self._error_or_warning(e, fatal=fatal)
|
||||
# Youtube sometimes sends incomplete data
|
||||
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
|
||||
if not traverse_obj(response, *variadic(check_get_keys)):
|
||||
retry.error = ExtractorError('Incomplete data received')
|
||||
continue
|
||||
|
||||
return response
|
||||
|
||||
@staticmethod
|
||||
def is_music_url(url):
|
||||
@@ -4522,48 +4500,30 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||
return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
|
||||
|
||||
def _extract_webpage(self, url, item_id, fatal=True):
|
||||
retries = self.get_param('extractor_retries', 3)
|
||||
count = -1
|
||||
webpage = data = last_error = None
|
||||
while count < retries:
|
||||
count += 1
|
||||
# Sometimes youtube returns a webpage with incomplete ytInitialData
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/116
|
||||
if last_error:
|
||||
self.report_warning('%s. Retrying ...' % last_error)
|
||||
webpage, data = None, None
|
||||
for retry in self.RetryManager(fatal=fatal):
|
||||
try:
|
||||
webpage = self._download_webpage(
|
||||
url, item_id,
|
||||
note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
|
||||
webpage = self._download_webpage(url, item_id, note='Downloading webpage')
|
||||
data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, network_exceptions):
|
||||
if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
|
||||
last_error = error_to_compat_str(e.cause or e.msg)
|
||||
if count < retries:
|
||||
continue
|
||||
if fatal:
|
||||
raise
|
||||
self.report_warning(error_to_compat_str(e))
|
||||
retry.error = e
|
||||
continue
|
||||
self._error_or_warning(e, fatal=fatal)
|
||||
break
|
||||
else:
|
||||
try:
|
||||
self._extract_and_report_alerts(data)
|
||||
except ExtractorError as e:
|
||||
if fatal:
|
||||
raise
|
||||
self.report_warning(error_to_compat_str(e))
|
||||
break
|
||||
|
||||
if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):
|
||||
break
|
||||
try:
|
||||
self._extract_and_report_alerts(data)
|
||||
except ExtractorError as e:
|
||||
self._error_or_warning(e, fatal=fatal)
|
||||
break
|
||||
|
||||
last_error = 'Incomplete yt initial data received'
|
||||
if count >= retries:
|
||||
if fatal:
|
||||
raise ExtractorError(last_error)
|
||||
self.report_warning(last_error)
|
||||
break
|
||||
# Sometimes youtube returns a webpage with incomplete ytInitialData
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/116
|
||||
if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
|
||||
retry.error = ExtractorError('Incomplete yt initial data received')
|
||||
continue
|
||||
|
||||
return webpage, data
|
||||
|
||||
|
||||
Reference in New Issue
Block a user