mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[extractor/html5] Separate into own extractor (#4307)
Closes #4291 Authored by: coletdjnz, pukkandan
This commit is contained in:
		| @@ -85,7 +85,7 @@ class TestHTTPS(unittest.TestCase): | |||||||
| 
 | 
 | ||||||
|         ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True}) |         ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True}) | ||||||
|         r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port) |         r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port) | ||||||
|         self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port) |         self.assertEqual(r['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class TestClientCert(unittest.TestCase): | class TestClientCert(unittest.TestCase): | ||||||
| @@ -113,7 +113,7 @@ class TestClientCert(unittest.TestCase): | |||||||
|             **params, |             **params, | ||||||
|         }) |         }) | ||||||
|         r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port) |         r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port) | ||||||
|         self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port) |         self.assertEqual(r['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port) | ||||||
| 
 | 
 | ||||||
|     def test_certificate_combined_nopass(self): |     def test_certificate_combined_nopass(self): | ||||||
|         self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt')) |         self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt')) | ||||||
|   | |||||||
| @@ -662,6 +662,7 @@ from .hse import ( | |||||||
|     HSEShowIE, |     HSEShowIE, | ||||||
|     HSEProductIE, |     HSEProductIE, | ||||||
| ) | ) | ||||||
|  | from .genericembeds import HTML5MediaEmbedIE | ||||||
| from .huajiao import HuajiaoIE | from .huajiao import HuajiaoIE | ||||||
| from .huya import HuyaLiveIE | from .huya import HuyaLiveIE | ||||||
| from .huffpost import HuffPostIE | from .huffpost import HuffPostIE | ||||||
|   | |||||||
| @@ -3776,25 +3776,6 @@ class GenericIE(InfoExtractor): | |||||||
|         elif embeds: |         elif embeds: | ||||||
|             return self.playlist_result(embeds, **info_dict) |             return self.playlist_result(embeds, **info_dict) | ||||||
| 
 | 
 | ||||||
|         # Look for HTML5 media |  | ||||||
|         entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') |  | ||||||
|         if entries: |  | ||||||
|             self.report_detected('HTML5 media') |  | ||||||
|             if len(entries) == 1: |  | ||||||
|                 entries[0].update({ |  | ||||||
|                     'id': video_id, |  | ||||||
|                     'title': video_title, |  | ||||||
|                 }) |  | ||||||
|             else: |  | ||||||
|                 for num, entry in enumerate(entries, start=1): |  | ||||||
|                     entry.update({ |  | ||||||
|                         'id': f'{video_id}-{num}', |  | ||||||
|                         'title': '%s (%d)' % (video_title, num), |  | ||||||
|                     }) |  | ||||||
|             for entry in entries: |  | ||||||
|                 self._sort_formats(entry['formats']) |  | ||||||
|             return self.playlist_result(entries, video_id, video_title) |  | ||||||
| 
 |  | ||||||
|         jwplayer_data = self._find_jwplayer_data( |         jwplayer_data = self._find_jwplayer_data( | ||||||
|             webpage, video_id, transform_source=js_to_json) |             webpage, video_id, transform_source=js_to_json) | ||||||
|         if jwplayer_data: |         if jwplayer_data: | ||||||
|   | |||||||
							
								
								
									
										27
									
								
								yt_dlp/extractor/genericembeds.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								yt_dlp/extractor/genericembeds.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | |||||||
|  | from .common import InfoExtractor | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class HTML5MediaEmbedIE(InfoExtractor): | ||||||
|  |     _VALID_URL = False | ||||||
|  |     IE_NAME = 'html5' | ||||||
|  |     _WEBPAGE_TESTS = [ | ||||||
|  |         { | ||||||
|  |             'url': 'https://html.com/media/', | ||||||
|  |             'info_dict': { | ||||||
|  |                 'title': 'HTML5 Media', | ||||||
|  |                 'description': 'md5:933b2d02ceffe7a7a0f3c8326d91cc2a', | ||||||
|  |             }, | ||||||
|  |             'playlist_count': 2 | ||||||
|  |         } | ||||||
|  |     ] | ||||||
|  | 
 | ||||||
|  |     def _extract_from_webpage(self, url, webpage): | ||||||
|  |         video_id, title = self._generic_id(url), self._generic_title(url) | ||||||
|  |         entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') or [] | ||||||
|  |         for num, entry in enumerate(entries, start=1): | ||||||
|  |             entry.update({ | ||||||
|  |                 'id': f'{video_id}-{num}', | ||||||
|  |                 'title': f'{title} ({num})', | ||||||
|  |             }) | ||||||
|  |             self._sort_formats(entry['formats']) | ||||||
|  |             yield entry | ||||||
		Reference in New Issue
	
	Block a user
	 pukkandan
					pukkandan