Add option --extractor-retries to retry on known extractor errors

* Currently only used by youtube Fixes https://github.com/ytdl-org/youtube-dl/issues/28194 Possibly also fixes: https://github.com/ytdl-org/youtube-dl/issues/28289 (can not confirm since the issue isn't reliably reproducible)
2026-02-22 00:15:51 +00:00 · 2021-03-01 05:18:37 +05:30
parent f0884c8b3f
commit 62bff2c170
4 changed files with 48 additions and 30 deletions
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -2762,28 +2762,36 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
        for page_num in itertools.count(1):
            if not continuation:
                break
-            count = 0
-            retries = 3
-            while count <= retries:
+            retries = self._downloader.params.get('extractor_retries', 3)
+            count = -1
+            last_error = None
+            while count < retries:
+                count += 1
+                if last_error:
+                    self.report_warning('%s. Retrying ...' % last_error)
                try:
-                    # Downloading page may result in intermittent 5xx HTTP error
-                    # that is usually worked around with a retry
                    browse = self._download_json(
                        'https://www.youtube.com/browse_ajax', None,
                        'Downloading page %d%s'
                        % (page_num, ' (retry #%d)' % count if count else ''),
                        headers=headers, query=continuation)
-                    break
                except ExtractorError as e:
-                    if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
-                        count += 1
-                        if count <= retries:
+                    if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
+                        # Downloading page may result in intermittent 5xx HTTP error
+                        # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
+                        last_error = 'HTTP Error %s' % e.cause.code
+                        if count < retries:
                            continue
                    raise
-            if not browse:
-                break
-            response = try_get(browse, lambda x: x[1]['response'], dict)
-            if not response:
+                else:
+                    response = try_get(browse, lambda x: x[1]['response'], dict)
+
+                    # Youtube sometimes sends incomplete data
+                    # See: https://github.com/ytdl-org/youtube-dl/issues/28194
+                    if response.get('continuationContents') or response.get('onResponseReceivedActions'):
+                        break
+                    last_error = 'Incomplete data recieved'
+            if not browse or not response:
                break

            known_continuation_renderers = {
@@ -3004,11 +3012,16 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
                return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
            self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

-        count = 0
-        retries = 3
+        retries = self._downloader.params.get('extractor_retries', 3)
+        count = -1
        while count < retries:
+            count += 1
            # Sometimes youtube returns a webpage with incomplete ytInitialData
-            webpage = self._download_webpage(url, item_id)
+            # See: https://github.com/yt-dlp/yt-dlp/issues/116
+            if count:
+                self.report_warning('Incomplete yt initial data recieved. Retrying ...')
+            webpage = self._download_webpage(url, item_id,
+                'Downloading webpage%s' % ' (retry #%d)' % count if count else '')
            identity_token = self._extract_identity_token(webpage, item_id)
            data = self._extract_yt_initial_data(item_id, webpage)
            err_msg = None
@@ -3023,9 +3036,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
                raise ExtractorError('YouTube said: %s' % err_msg, expected=True)
            if data.get('contents') or data.get('currentVideoEndpoint'):
                break
-            count += 1
-            self.to_screen(
-                'Incomplete yt initial data recieved. Retrying (attempt %d of %d)...' % (count, retries))

        tabs = try_get(
            data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)