mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-11-04 08:35:12 +00:00 
			
		
		
		
	[extractor, test] Basic framework for embed tests (#4307)
and split download tests so they can be more easily run in CI Authored by: coletdjnz
This commit is contained in:
		@@ -92,6 +92,13 @@ def gettestcases(include_onlymatching=False):
 | 
				
			|||||||
        yield from ie.get_testcases(include_onlymatching)
 | 
					        yield from ie.get_testcases(include_onlymatching)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def getwebpagetestcases():
 | 
				
			||||||
 | 
					    for ie in yt_dlp.extractor.gen_extractors():
 | 
				
			||||||
 | 
					        for tc in ie.get_webpage_testcases():
 | 
				
			||||||
 | 
					            tc.setdefault('add_ie', []).append('Generic')
 | 
				
			||||||
 | 
					            yield tc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
md5 = lambda s: hashlib.md5(s.encode()).hexdigest()
 | 
					md5 = lambda s: hashlib.md5(s.encode()).hexdigest()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -8,6 +8,7 @@ import unittest
 | 
				
			|||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
					sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import collections
 | 
				
			||||||
import hashlib
 | 
					import hashlib
 | 
				
			||||||
import http.client
 | 
					import http.client
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
@@ -20,6 +21,7 @@ from test.helper import (
 | 
				
			|||||||
    expect_warnings,
 | 
					    expect_warnings,
 | 
				
			||||||
    get_params,
 | 
					    get_params,
 | 
				
			||||||
    gettestcases,
 | 
					    gettestcases,
 | 
				
			||||||
 | 
					    getwebpagetestcases,
 | 
				
			||||||
    is_download_test,
 | 
					    is_download_test,
 | 
				
			||||||
    report_warning,
 | 
					    report_warning,
 | 
				
			||||||
    try_rm,
 | 
					    try_rm,
 | 
				
			||||||
@@ -32,6 +34,7 @@ from yt_dlp.utils import (
 | 
				
			|||||||
    ExtractorError,
 | 
					    ExtractorError,
 | 
				
			||||||
    UnavailableVideoError,
 | 
					    UnavailableVideoError,
 | 
				
			||||||
    format_bytes,
 | 
					    format_bytes,
 | 
				
			||||||
 | 
					    join_nonempty,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
RETRIES = 3
 | 
					RETRIES = 3
 | 
				
			||||||
@@ -57,7 +60,9 @@ def _file_md5(fn):
 | 
				
			|||||||
        return hashlib.md5(f.read()).hexdigest()
 | 
					        return hashlib.md5(f.read()).hexdigest()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
defs = gettestcases()
 | 
					normal_test_cases = gettestcases()
 | 
				
			||||||
 | 
					webpage_test_cases = getwebpagetestcases()
 | 
				
			||||||
 | 
					tests_counter = collections.defaultdict(collections.Counter)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@is_download_test
 | 
					@is_download_test
 | 
				
			||||||
@@ -72,24 +77,13 @@ class TestDownload(unittest.TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def __str__(self):
 | 
					    def __str__(self):
 | 
				
			||||||
        """Identify each test with the `add_ie` attribute, if available."""
 | 
					        """Identify each test with the `add_ie` attribute, if available."""
 | 
				
			||||||
 | 
					        cls, add_ie = type(self), getattr(self, self._testMethodName).add_ie
 | 
				
			||||||
 | 
					        return f'{self._testMethodName} ({cls.__module__}.{cls.__name__}){f" [{add_ie}]" if add_ie else ""}:'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        def strclass(cls):
 | 
					 | 
				
			||||||
            """From 2.7's unittest; 2.6 had _strclass so we can't import it."""
 | 
					 | 
				
			||||||
            return f'{cls.__module__}.{cls.__name__}'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        add_ie = getattr(self, self._testMethodName).add_ie
 | 
					 | 
				
			||||||
        return '%s (%s)%s:' % (self._testMethodName,
 | 
					 | 
				
			||||||
                               strclass(self.__class__),
 | 
					 | 
				
			||||||
                               ' [%s]' % add_ie if add_ie else '')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def setUp(self):
 | 
					 | 
				
			||||||
        self.defs = defs
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Dynamically generate tests
 | 
					# Dynamically generate tests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
def generator(test_case, tname):
 | 
					def generator(test_case, tname):
 | 
				
			||||||
 | 
					 | 
				
			||||||
    def test_template(self):
 | 
					    def test_template(self):
 | 
				
			||||||
        if self.COMPLETED_TESTS.get(tname):
 | 
					        if self.COMPLETED_TESTS.get(tname):
 | 
				
			||||||
            return
 | 
					            return
 | 
				
			||||||
@@ -255,25 +249,29 @@ def generator(test_case, tname):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# And add them to TestDownload
 | 
					# And add them to TestDownload
 | 
				
			||||||
tests_counter = {}
 | 
					def inject_tests(test_cases, label=''):
 | 
				
			||||||
for test_case in defs:
 | 
					    for test_case in test_cases:
 | 
				
			||||||
        name = test_case['name']
 | 
					        name = test_case['name']
 | 
				
			||||||
    i = tests_counter.get(name, 0)
 | 
					        tname = join_nonempty('test', name, label, tests_counter[name][label], delim='_')
 | 
				
			||||||
    tests_counter[name] = i + 1
 | 
					        tests_counter[name][label] += 1
 | 
				
			||||||
    tname = f'test_{name}_{i}' if i else f'test_{name}'
 | 
					
 | 
				
			||||||
        test_method = generator(test_case, tname)
 | 
					        test_method = generator(test_case, tname)
 | 
				
			||||||
    test_method.__name__ = str(tname)
 | 
					        test_method.__name__ = tname
 | 
				
			||||||
    ie_list = test_case.get('add_ie')
 | 
					        test_method.add_ie = ','.join(test_case.get('add_ie', []))
 | 
				
			||||||
    test_method.add_ie = ie_list and ','.join(ie_list)
 | 
					 | 
				
			||||||
        setattr(TestDownload, test_method.__name__, test_method)
 | 
					        setattr(TestDownload, test_method.__name__, test_method)
 | 
				
			||||||
    del test_method
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def batch_generator(name, num_tests):
 | 
					inject_tests(normal_test_cases)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# TODO: disable redirection to the IE to ensure we are actually testing the webpage extraction
 | 
				
			||||||
 | 
					inject_tests(webpage_test_cases, 'webpage')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def batch_generator(name):
 | 
				
			||||||
    def test_template(self):
 | 
					    def test_template(self):
 | 
				
			||||||
 | 
					        for label, num_tests in tests_counter[name].items():
 | 
				
			||||||
            for i in range(num_tests):
 | 
					            for i in range(num_tests):
 | 
				
			||||||
            test_name = f'test_{name}_{i}' if i else f'test_{name}'
 | 
					                test_name = join_nonempty('test', name, label, i, delim='_')
 | 
				
			||||||
                try:
 | 
					                try:
 | 
				
			||||||
                    getattr(self, test_name)()
 | 
					                    getattr(self, test_name)()
 | 
				
			||||||
                except unittest.SkipTest:
 | 
					                except unittest.SkipTest:
 | 
				
			||||||
@@ -282,12 +280,12 @@ def batch_generator(name, num_tests):
 | 
				
			|||||||
    return test_template
 | 
					    return test_template
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
for name, num_tests in tests_counter.items():
 | 
					for name in tests_counter:
 | 
				
			||||||
    test_method = batch_generator(name, num_tests)
 | 
					    test_method = batch_generator(name)
 | 
				
			||||||
    test_method.__name__ = f'test_{name}_all'
 | 
					    test_method.__name__ = f'test_{name}_all'
 | 
				
			||||||
    test_method.add_ie = ''
 | 
					    test_method.add_ie = ''
 | 
				
			||||||
    setattr(TestDownload, test_method.__name__, test_method)
 | 
					    setattr(TestDownload, test_method.__name__, test_method)
 | 
				
			||||||
    del test_method
 | 
					del test_method
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -3665,11 +3665,18 @@ class InfoExtractor:
 | 
				
			|||||||
            t['name'] = cls.ie_key()
 | 
					            t['name'] = cls.ie_key()
 | 
				
			||||||
            yield t
 | 
					            yield t
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @classmethod
 | 
				
			||||||
 | 
					    def get_webpage_testcases(cls):
 | 
				
			||||||
 | 
					        tests = getattr(cls, '_WEBPAGE_TESTS', [])
 | 
				
			||||||
 | 
					        for t in tests:
 | 
				
			||||||
 | 
					            t['name'] = cls.ie_key()
 | 
				
			||||||
 | 
					        return tests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @classproperty
 | 
					    @classproperty
 | 
				
			||||||
    def age_limit(cls):
 | 
					    def age_limit(cls):
 | 
				
			||||||
        """Get age limit from the testcases"""
 | 
					        """Get age limit from the testcases"""
 | 
				
			||||||
        return max(traverse_obj(
 | 
					        return max(traverse_obj(
 | 
				
			||||||
            tuple(cls.get_testcases(include_onlymatching=False)),
 | 
					            (*cls.get_testcases(include_onlymatching=False), *cls.get_webpage_testcases()),
 | 
				
			||||||
            (..., (('playlist', 0), None), 'info_dict', 'age_limit')) or [0])
 | 
					            (..., (('playlist', 0), None), 'info_dict', 'age_limit')) or [0])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @classmethod
 | 
					    @classmethod
 | 
				
			||||||
@@ -3844,7 +3851,10 @@ class InfoExtractor:
 | 
				
			|||||||
    def extract_from_webpage(cls, ydl, url, webpage):
 | 
					    def extract_from_webpage(cls, ydl, url, webpage):
 | 
				
			||||||
        ie = (cls if isinstance(cls._extract_from_webpage, types.MethodType)
 | 
					        ie = (cls if isinstance(cls._extract_from_webpage, types.MethodType)
 | 
				
			||||||
              else ydl.get_info_extractor(cls.ie_key()))
 | 
					              else ydl.get_info_extractor(cls.ie_key()))
 | 
				
			||||||
        yield from ie._extract_from_webpage(url, webpage) or []
 | 
					        for info in ie._extract_from_webpage(url, webpage) or []:
 | 
				
			||||||
 | 
					            # url = None since we do not want to set (webpage/original)_url
 | 
				
			||||||
 | 
					            ydl.add_default_extra_info(info, ie, None)
 | 
				
			||||||
 | 
					            yield info
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @classmethod
 | 
					    @classmethod
 | 
				
			||||||
    def _extract_from_webpage(cls, url, webpage):
 | 
					    def _extract_from_webpage(cls, url, webpage):
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -933,21 +933,6 @@ class GenericIE(InfoExtractor):
 | 
				
			|||||||
                'skip_download': True,
 | 
					                'skip_download': True,
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        # YouTube <object> embed
 | 
					 | 
				
			||||||
        {
 | 
					 | 
				
			||||||
            'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
 | 
					 | 
				
			||||||
            'md5': '516718101ec834f74318df76259fb3cc',
 | 
					 | 
				
			||||||
            'info_dict': {
 | 
					 | 
				
			||||||
                'id': 'msN87y-iEx0',
 | 
					 | 
				
			||||||
                'ext': 'webm',
 | 
					 | 
				
			||||||
                'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
 | 
					 | 
				
			||||||
                'upload_date': '20080526',
 | 
					 | 
				
			||||||
                'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
 | 
					 | 
				
			||||||
                'uploader': 'Christopher Sykes',
 | 
					 | 
				
			||||||
                'uploader_id': 'ChristopherJSykes',
 | 
					 | 
				
			||||||
            },
 | 
					 | 
				
			||||||
            'add_ie': ['Youtube'],
 | 
					 | 
				
			||||||
        },
 | 
					 | 
				
			||||||
        # Camtasia studio
 | 
					        # Camtasia studio
 | 
				
			||||||
        {
 | 
					        {
 | 
				
			||||||
            'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 | 
					            'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -2266,6 +2266,42 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    _WEBPAGE_TESTS = [
 | 
				
			||||||
 | 
					        # YouTube <object> embed
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
 | 
				
			||||||
 | 
					            'md5': '873c81d308b979f0e23ee7e620b312a3',
 | 
				
			||||||
 | 
					            'info_dict': {
 | 
				
			||||||
 | 
					                'id': 'msN87y-iEx0',
 | 
				
			||||||
 | 
					                'ext': 'mp4',
 | 
				
			||||||
 | 
					                'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
 | 
				
			||||||
 | 
					                'upload_date': '20080526',
 | 
				
			||||||
 | 
					                'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
 | 
				
			||||||
 | 
					                'uploader': 'Christopher Sykes',
 | 
				
			||||||
 | 
					                'uploader_id': 'ChristopherJSykes',
 | 
				
			||||||
 | 
					                'age_limit': 0,
 | 
				
			||||||
 | 
					                'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
 | 
				
			||||||
 | 
					                'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
 | 
				
			||||||
 | 
					                'playable_in_embed': True,
 | 
				
			||||||
 | 
					                'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
 | 
				
			||||||
 | 
					                'like_count': int,
 | 
				
			||||||
 | 
					                'comment_count': int,
 | 
				
			||||||
 | 
					                'channel': 'Christopher Sykes',
 | 
				
			||||||
 | 
					                'live_status': 'not_live',
 | 
				
			||||||
 | 
					                'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
 | 
				
			||||||
 | 
					                'availability': 'public',
 | 
				
			||||||
 | 
					                'duration': 195,
 | 
				
			||||||
 | 
					                'view_count': int,
 | 
				
			||||||
 | 
					                'categories': ['Science & Technology'],
 | 
				
			||||||
 | 
					                'channel_follower_count': int,
 | 
				
			||||||
 | 
					                'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            'params': {
 | 
				
			||||||
 | 
					                'skip_download': True,
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @classmethod
 | 
					    @classmethod
 | 
				
			||||||
    def suitable(cls, url):
 | 
					    def suitable(cls, url):
 | 
				
			||||||
        from ..utils import parse_qs
 | 
					        from ..utils import parse_qs
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user