[cleanup] Misc cleanup

2026-03-01 11:49:27 +00:00 · 2022-06-28 10:40:54 +05:30
parent 47046464fa
commit ae61d108dd
10 changed files with 65 additions and 64 deletions
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2825,12 +2825,22 @@ class GenericIE(InfoExtractor):
                        new_url, {'force_videoid': force_videoid})
                return self.url_result(new_url)

+        def request_webpage():
+            request = sanitized_Request(url)
+            # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
+            # making it impossible to download only chunk of the file (yet we need only 512kB to
+            # test whether it's HTML or not). According to yt-dlp default Accept-Encoding
+            # that will always result in downloading the whole file that is not desirable.
+            # Therefore for extraction pass we have to override Accept-Encoding to any in order
+            # to accept raw bytes and being able to download only a chunk.
+            # It may probably better to solve this by checking Content-Type for application/octet-stream
+            # after HEAD request finishes, but not sure if we can rely on this.
+            request.add_header('Accept-Encoding', '*')
+            return self._request_webpage(request, video_id)
+
        full_response = None
        if head_response is False:
-            request = sanitized_Request(url)
-            request.add_header('Accept-Encoding', '*')
-            full_response = self._request_webpage(request, video_id)
-            head_response = full_response
+            head_response = full_response = request_webpage()

        info_dict = {
            'id': video_id,
@@ -2868,19 +2878,7 @@ class GenericIE(InfoExtractor):
            self.report_warning(
                '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))

-        if not full_response:
-            request = sanitized_Request(url)
-            # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
-            # making it impossible to download only chunk of the file (yet we need only 512kB to
-            # test whether it's HTML or not). According to yt-dlp default Accept-Encoding
-            # that will always result in downloading the whole file that is not desirable.
-            # Therefore for extraction pass we have to override Accept-Encoding to any in order
-            # to accept raw bytes and being able to download only a chunk.
-            # It may probably better to solve this by checking Content-Type for application/octet-stream
-            # after HEAD request finishes, but not sure if we can rely on this.
-            request.add_header('Accept-Encoding', '*')
-            full_response = self._request_webpage(request, video_id)
-
+        full_response = full_response or request_webpage()
        first_bytes = full_response.read(512)

        # Is it an M3U playlist?
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -2467,6 +2467,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
        assert os.path.basename(func_id) == func_id

+        self.write_debug(f'Extracting signature function {func_id}')
        cache_spec = self.cache.load('youtube-sigfuncs', func_id)
        if cache_spec is not None:
            return lambda s: ''.join(s[i] for i in cache_spec)
@@ -2714,10 +2715,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):

    @classmethod
    def extract_id(cls, url):
-        mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
-        if mobj is None:
-            raise ExtractorError('Invalid URL: %s' % url)
-        return mobj.group('id')
+        video_id = cls.get_temp_id(url)
+        if not video_id:
+            raise ExtractorError(f'Invalid URL: {url}')
+        return video_id

    def _extract_chapters_from_json(self, data, duration):
        chapter_list = traverse_obj(