mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[extractor/generic:quoted-html] Add extractor (#5213)
Extracts embeds from escaped HTML within `data-html` attribute. Related: https://github.com/ytdl-org/youtube-dl/issues/21294, https://github.com/yt-dlp/yt-dlp/pull/5121 Authored by: coletdjnz Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
This commit is contained in:
		| @@ -1980,22 +1980,6 @@ class GenericIE(InfoExtractor): | ||||
|             }, | ||||
|             'playlist_count': 6, | ||||
|         }, | ||||
|         { | ||||
|             # Squarespace video embed, 2019-08-28 | ||||
|             'url': 'http://ootboxford.com', | ||||
|             'info_dict': { | ||||
|                 'id': 'Tc7b_JGdZfw', | ||||
|                 'title': 'Out of the Blue, at Childish Things 10', | ||||
|                 'ext': 'mp4', | ||||
|                 'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f', | ||||
|                 'uploader_id': 'helendouglashouse', | ||||
|                 'uploader': 'Helen & Douglas House', | ||||
|                 'upload_date': '20140328', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         # { | ||||
|         #     # Zype embed | ||||
|         #     'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites', | ||||
| @@ -2784,12 +2768,6 @@ class GenericIE(InfoExtractor): | ||||
|         # There probably should be a second run of generic extractor on unescaped webpage. | ||||
|         # webpage = urllib.parse.unquote(webpage) | ||||
| 
 | ||||
|         # Unescape squarespace embeds to be detected by generic extractor, | ||||
|         # see https://github.com/ytdl-org/youtube-dl/issues/21294 | ||||
|         webpage = re.sub( | ||||
|             r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>', | ||||
|             lambda x: unescapeHTML(x.group(0)), webpage) | ||||
| 
 | ||||
|         # TODO: Move to respective extractors | ||||
|         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage) | ||||
|         if bc_urls: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Matthew
					Matthew