[extractors] Use new framework for existing embeds (#4307)

`Brightcove` is difficult to migrate because it's subclasses may depend on the signature of the current functions. So it is left as-is for now Note: Tests have not been migrated
2026-02-26 10:20:11 +00:00 · 2022-08-01 06:53:25 +05:30
parent 1e8fe57e5c
commit bfd973ece3
138 changed files with 499 additions and 1909 deletions
--- a/yt_dlp/extractor/indavideo.py
+++ b/yt_dlp/extractor/indavideo.py
@@ -1,5 +1,3 @@
-import re
-
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
@@ -12,6 +10,14 @@ from ..utils import (

 class IndavideoEmbedIE(InfoExtractor):
    _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
+    # Some example URLs covered by generic extractor:
+    #   http://indavideo.hu/video/Vicces_cica_1
+    #   http://index.indavideo.hu/video/2015_0728_beregszasz
+    #   http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
+    #   http://erotika.indavideo.hu/video/Amator_tini_punci
+    #   http://film.indavideo.hu/video/f_hrom_nagymamm_volt
+    #   http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
+    _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)']
    _TESTS = [{
        'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
        'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
@@ -37,20 +43,6 @@ class IndavideoEmbedIE(InfoExtractor):
        'only_matching': True,
    }]

-    # Some example URLs covered by generic extractor:
-    #   http://indavideo.hu/video/Vicces_cica_1
-    #   http://index.indavideo.hu/video/2015_0728_beregszasz
-    #   http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
-    #   http://erotika.indavideo.hu/video/Amator_tini_punci
-    #   http://film.indavideo.hu/video/f_hrom_nagymamm_volt
-    #   http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
-
-    @staticmethod
-    def _extract_urls(webpage):
-        return re.findall(
-            r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)',
-            webpage)
-
    def _real_extract(self, url):
        video_id = self._match_id(url)