mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	Use unescapeHTML for OpenGraph properties
These are attribute values, so we don't need the more complex and whitespace-destroying cleanHTML - we just need to unescape quotes, that's it.
This commit is contained in:
		| @@ -14,6 +14,7 @@ from ..utils import ( | |||||||
|     clean_html, |     clean_html, | ||||||
|     compiled_regex_type, |     compiled_regex_type, | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|  |     unescapeHTML, | ||||||
| ) | ) | ||||||
|  |  | ||||||
| class InfoExtractor(object): | class InfoExtractor(object): | ||||||
| @@ -270,7 +271,8 @@ class InfoExtractor(object): | |||||||
|     def _og_search_property(self, prop, html, name=None, **kargs): |     def _og_search_property(self, prop, html, name=None, **kargs): | ||||||
|         if name is None: |         if name is None: | ||||||
|             name = 'OpenGraph %s' % prop |             name = 'OpenGraph %s' % prop | ||||||
|         return self._html_search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs) |         escaped = self._search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs) | ||||||
|  |         return unescapeHTML(escaped) | ||||||
|  |  | ||||||
|     def _og_search_thumbnail(self, html, **kargs): |     def _og_search_thumbnail(self, html, **kargs): | ||||||
|         return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs) |         return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister