mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[extractor/html5] Separate into own extractor (#4307)
Closes #4291 Authored by: coletdjnz, pukkandan
This commit is contained in:
		| @@ -85,7 +85,7 @@ class TestHTTPS(unittest.TestCase): | ||||
| 
 | ||||
|         ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True}) | ||||
|         r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port) | ||||
|         self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port) | ||||
|         self.assertEqual(r['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port) | ||||
| 
 | ||||
| 
 | ||||
| class TestClientCert(unittest.TestCase): | ||||
| @@ -113,7 +113,7 @@ class TestClientCert(unittest.TestCase): | ||||
|             **params, | ||||
|         }) | ||||
|         r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port) | ||||
|         self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port) | ||||
|         self.assertEqual(r['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port) | ||||
| 
 | ||||
|     def test_certificate_combined_nopass(self): | ||||
|         self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt')) | ||||
|   | ||||
| @@ -662,6 +662,7 @@ from .hse import ( | ||||
|     HSEShowIE, | ||||
|     HSEProductIE, | ||||
| ) | ||||
| from .genericembeds import HTML5MediaEmbedIE | ||||
| from .huajiao import HuajiaoIE | ||||
| from .huya import HuyaLiveIE | ||||
| from .huffpost import HuffPostIE | ||||
|   | ||||
| @@ -3776,25 +3776,6 @@ class GenericIE(InfoExtractor): | ||||
|         elif embeds: | ||||
|             return self.playlist_result(embeds, **info_dict) | ||||
| 
 | ||||
|         # Look for HTML5 media | ||||
|         entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') | ||||
|         if entries: | ||||
|             self.report_detected('HTML5 media') | ||||
|             if len(entries) == 1: | ||||
|                 entries[0].update({ | ||||
|                     'id': video_id, | ||||
|                     'title': video_title, | ||||
|                 }) | ||||
|             else: | ||||
|                 for num, entry in enumerate(entries, start=1): | ||||
|                     entry.update({ | ||||
|                         'id': f'{video_id}-{num}', | ||||
|                         'title': '%s (%d)' % (video_title, num), | ||||
|                     }) | ||||
|             for entry in entries: | ||||
|                 self._sort_formats(entry['formats']) | ||||
|             return self.playlist_result(entries, video_id, video_title) | ||||
| 
 | ||||
|         jwplayer_data = self._find_jwplayer_data( | ||||
|             webpage, video_id, transform_source=js_to_json) | ||||
|         if jwplayer_data: | ||||
|   | ||||
							
								
								
									
										27
									
								
								yt_dlp/extractor/genericembeds.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								yt_dlp/extractor/genericembeds.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| from .common import InfoExtractor | ||||
| 
 | ||||
| 
 | ||||
| class HTML5MediaEmbedIE(InfoExtractor): | ||||
|     _VALID_URL = False | ||||
|     IE_NAME = 'html5' | ||||
|     _WEBPAGE_TESTS = [ | ||||
|         { | ||||
|             'url': 'https://html.com/media/', | ||||
|             'info_dict': { | ||||
|                 'title': 'HTML5 Media', | ||||
|                 'description': 'md5:933b2d02ceffe7a7a0f3c8326d91cc2a', | ||||
|             }, | ||||
|             'playlist_count': 2 | ||||
|         } | ||||
|     ] | ||||
| 
 | ||||
|     def _extract_from_webpage(self, url, webpage): | ||||
|         video_id, title = self._generic_id(url), self._generic_title(url) | ||||
|         entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') or [] | ||||
|         for num, entry in enumerate(entries, start=1): | ||||
|             entry.update({ | ||||
|                 'id': f'{video_id}-{num}', | ||||
|                 'title': f'{title} ({num})', | ||||
|             }) | ||||
|             self._sort_formats(entry['formats']) | ||||
|             yield entry | ||||
		Reference in New Issue
	
	Block a user
	 pukkandan
					pukkandan