1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2026-01-31 11:06:54 +00:00

Standardize retry mechanism (#1649)

* [utils] Create `RetryManager`
* Migrate all retries to use the manager
* [extractor] Add wrapper methods for convenience
* Standardize console messages for retries
* Add `--retry-sleep` for extractors
This commit is contained in:
pukkandan
2022-08-02 01:43:18 +05:30
committed by GitHub
parent bfd973ece3
commit be5c1ae862
15 changed files with 256 additions and 277 deletions

View File

@@ -32,6 +32,7 @@ from ..utils import (
GeoUtils,
LenientJSONDecoder,
RegexNotFoundError,
RetryManager,
UnsupportedError,
age_restricted,
base_url,
@@ -3848,6 +3849,13 @@ class InfoExtractor:
self.to_screen(f'Downloading {playlist_label}{playlist_id} - add --no-playlist to download just the {video_label}{video_id}')
return True
def _error_or_warning(self, err, _count=None, _retries=0, *, fatal=True):
RetryManager.report_retry(err, _count or int(fatal), _retries, info=self.to_screen, warn=self.report_warning,
sleep_func=self.get_param('retry_sleep_functions', {}).get('extractor'))
def RetryManager(self, **kwargs):
return RetryManager(self.get_param('extractor_retries', 3), self._error_or_warning, **kwargs)
@classmethod
def extract_from_webpage(cls, ydl, url, webpage):
ie = (cls if isinstance(cls._extract_from_webpage, types.MethodType)

View File

@@ -19,7 +19,6 @@ from ..utils import (
int_or_none,
KNOWN_EXTENSIONS,
mimetype2ext,
remove_end,
parse_qs,
str_or_none,
try_get,
@@ -661,25 +660,20 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudBaseIE):
'offset': 0,
}
retries = self.get_param('extractor_retries', 3)
for i in itertools.count():
attempt, last_error = -1, None
while attempt < retries:
attempt += 1
if last_error:
self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'), playlist_id)
for retry in self.RetryManager():
try:
response = self._download_json(
url, playlist_id, query=query, headers=self._HEADERS,
note='Downloading track page %s%s' % (i + 1, f' (retry #{attempt})' if attempt else ''))
note=f'Downloading track page {i + 1}')
break
except ExtractorError as e:
# Downloading page may result in intermittent 502 HTTP error
# See https://github.com/yt-dlp/yt-dlp/issues/872
if attempt >= retries or not isinstance(e.cause, compat_HTTPError) or e.cause.code != 502:
if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 502:
raise
last_error = str(e.cause or e.msg)
retry.error = e
continue
def resolve_entry(*candidates):
for cand in candidates:

View File

@@ -630,19 +630,17 @@ class TikTokUserIE(TikTokBaseIE):
'device_id': ''.join(random.choice(string.digits) for _ in range(19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api.
}
max_retries = self.get_param('extractor_retries', 3)
for page in itertools.count(1):
for retries in itertools.count():
for retry in self.RetryManager():
try:
post_list = self._call_api('aweme/post', query, username,
note='Downloading user video list page %d%s' % (page, f' (attempt {retries})' if retries != 0 else ''),
errnote='Unable to download user video list')
post_list = self._call_api(
'aweme/post', query, username, note=f'Downloading user video list page {page}',
errnote='Unable to download user video list')
except ExtractorError as e:
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0 and retries != max_retries:
self.report_warning('%s. Retrying...' % str(e.cause or e.msg))
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
retry.error = e
continue
raise
break
yield from post_list.get('aweme_list', [])
if not post_list.get('has_more'):
break
@@ -680,19 +678,17 @@ class TikTokBaseListIE(TikTokBaseIE):
'device_id': ''.join(random.choice(string.digits) for i in range(19))
}
max_retries = self.get_param('extractor_retries', 3)
for page in itertools.count(1):
for retries in itertools.count():
for retry in self.RetryManager():
try:
post_list = self._call_api(self._API_ENDPOINT, query, display_id,
note='Downloading video list page %d%s' % (page, f' (attempt {retries})' if retries != 0 else ''),
errnote='Unable to download video list')
post_list = self._call_api(
self._API_ENDPOINT, query, display_id, note=f'Downloading video list page {page}',
errnote='Unable to download video list')
except ExtractorError as e:
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0 and retries != max_retries:
self.report_warning('%s. Retrying...' % str(e.cause or e.msg))
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
retry.error = e
continue
raise
break
for video in post_list.get('aweme_list', []):
yield {
**self._parse_aweme_video_app(video),

View File

@@ -28,7 +28,6 @@ from ..utils import (
clean_html,
datetime_from_str,
dict_get,
error_to_compat_str,
float_or_none,
format_field,
get_first,
@@ -45,7 +44,6 @@ from ..utils import (
parse_iso8601,
parse_qs,
qualities,
remove_end,
remove_start,
smuggle_url,
str_or_none,
@@ -763,74 +761,54 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
default_client='web'):
response = None
last_error = None
count = -1
retries = self.get_param('extractor_retries', 3)
if check_get_keys is None:
check_get_keys = []
while count < retries:
count += 1
if last_error:
self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
for retry in self.RetryManager():
try:
response = self._call_api(
ep=ep, fatal=True, headers=headers,
video_id=item_id, query=query,
video_id=item_id, query=query, note=note,
context=self._extract_context(ytcfg, default_client),
api_key=self._extract_api_key(ytcfg, default_client),
api_hostname=api_hostname, default_client=default_client,
note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
api_hostname=api_hostname, default_client=default_client)
except ExtractorError as e:
if isinstance(e.cause, network_exceptions):
if isinstance(e.cause, urllib.error.HTTPError):
first_bytes = e.cause.read(512)
if not is_html(first_bytes):
yt_error = try_get(
self._parse_json(
self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
lambda x: x['error']['message'], str)
if yt_error:
self._report_alerts([('ERROR', yt_error)], fatal=False)
# Downloading page may result in intermittent 5xx HTTP error
# Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
last_error = error_to_compat_str(e.cause or e.msg)
if count < retries:
continue
if fatal:
raise
else:
self.report_warning(error_to_compat_str(e))
return
if not isinstance(e.cause, network_exceptions):
return self._error_or_warning(e, fatal=fatal)
elif not isinstance(e.cause, urllib.error.HTTPError):
retry.error = e
continue
else:
try:
self._extract_and_report_alerts(response, only_once=True)
except ExtractorError as e:
# YouTube servers may return errors we want to retry on in a 200 OK response
# See: https://github.com/yt-dlp/yt-dlp/issues/839
if 'unknown error' in e.msg.lower():
last_error = e.msg
continue
if fatal:
raise
self.report_warning(error_to_compat_str(e))
return
if not check_get_keys or dict_get(response, check_get_keys):
break
# Youtube sometimes sends incomplete data
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
last_error = 'Incomplete data received'
if count >= retries:
if fatal:
raise ExtractorError(last_error)
else:
self.report_warning(last_error)
return
return response
first_bytes = e.cause.read(512)
if not is_html(first_bytes):
yt_error = try_get(
self._parse_json(
self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
lambda x: x['error']['message'], str)
if yt_error:
self._report_alerts([('ERROR', yt_error)], fatal=False)
# Downloading page may result in intermittent 5xx HTTP error
# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
if e.cause.code not in (403, 429):
retry.error = e
continue
return self._error_or_warning(e, fatal=fatal)
try:
self._extract_and_report_alerts(response, only_once=True)
except ExtractorError as e:
# YouTube servers may return errors we want to retry on in a 200 OK response
# See: https://github.com/yt-dlp/yt-dlp/issues/839
if 'unknown error' in e.msg.lower():
retry.error = e
continue
return self._error_or_warning(e, fatal=fatal)
# Youtube sometimes sends incomplete data
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
if not traverse_obj(response, *variadic(check_get_keys)):
retry.error = ExtractorError('Incomplete data received')
continue
return response
@staticmethod
def is_music_url(url):
@@ -4522,48 +4500,30 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
def _extract_webpage(self, url, item_id, fatal=True):
retries = self.get_param('extractor_retries', 3)
count = -1
webpage = data = last_error = None
while count < retries:
count += 1
# Sometimes youtube returns a webpage with incomplete ytInitialData
# See: https://github.com/yt-dlp/yt-dlp/issues/116
if last_error:
self.report_warning('%s. Retrying ...' % last_error)
webpage, data = None, None
for retry in self.RetryManager(fatal=fatal):
try:
webpage = self._download_webpage(
url, item_id,
note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
webpage = self._download_webpage(url, item_id, note='Downloading webpage')
data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
except ExtractorError as e:
if isinstance(e.cause, network_exceptions):
if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
last_error = error_to_compat_str(e.cause or e.msg)
if count < retries:
continue
if fatal:
raise
self.report_warning(error_to_compat_str(e))
retry.error = e
continue
self._error_or_warning(e, fatal=fatal)
break
else:
try:
self._extract_and_report_alerts(data)
except ExtractorError as e:
if fatal:
raise
self.report_warning(error_to_compat_str(e))
break
if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):
break
try:
self._extract_and_report_alerts(data)
except ExtractorError as e:
self._error_or_warning(e, fatal=fatal)
break
last_error = 'Incomplete yt initial data received'
if count >= retries:
if fatal:
raise ExtractorError(last_error)
self.report_warning(last_error)
break
# Sometimes youtube returns a webpage with incomplete ytInitialData
# See: https://github.com/yt-dlp/yt-dlp/issues/116
if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
retry.error = ExtractorError('Incomplete yt initial data received')
continue
return webpage, data