mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	Python 3 version of HTMLParser
This commit is contained in:
		| @@ -2,7 +2,6 @@ | |||||||
| # -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||||
|  |  | ||||||
| import gzip | import gzip | ||||||
| import HTMLParser |  | ||||||
| import locale | import locale | ||||||
| import os | import os | ||||||
| import re | import re | ||||||
| @@ -41,6 +40,11 @@ try: | |||||||
| except NameError: # Python 2 | except NameError: # Python 2 | ||||||
| 	import htmlentitydefs as compat_html_entities | 	import htmlentitydefs as compat_html_entities | ||||||
|  |  | ||||||
|  | try: | ||||||
|  | 	import html.parser as compat_html_parser | ||||||
|  | except NameError: # Python 2 | ||||||
|  | 	import HTMLParser as compat_html_parser | ||||||
|  |  | ||||||
| try: | try: | ||||||
| 	compat_str = unicode # Python 2 | 	compat_str = unicode # Python 2 | ||||||
| except NameError: | except NameError: | ||||||
| @@ -99,8 +103,8 @@ def htmlentity_transform(matchobj): | |||||||
| 	# Unknown entity in name, return its literal representation | 	# Unknown entity in name, return its literal representation | ||||||
| 	return (u'&%s;' % entity) | 	return (u'&%s;' % entity) | ||||||
|  |  | ||||||
| HTMLParser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix | compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix | ||||||
| class IDParser(HTMLParser.HTMLParser): | class IDParser(compat_html_parser.HTMLParser): | ||||||
| 	"""Modified HTMLParser that isolates a tag with the specified id""" | 	"""Modified HTMLParser that isolates a tag with the specified id""" | ||||||
| 	def __init__(self, id): | 	def __init__(self, id): | ||||||
| 		self.id = id | 		self.id = id | ||||||
| @@ -110,11 +114,11 @@ class IDParser(HTMLParser.HTMLParser): | |||||||
| 		self.html = None | 		self.html = None | ||||||
| 		self.watch_startpos = False | 		self.watch_startpos = False | ||||||
| 		self.error_count = 0 | 		self.error_count = 0 | ||||||
| 		HTMLParser.HTMLParser.__init__(self) | 		compat_html_parser.HTMLParser.__init__(self) | ||||||
|  |  | ||||||
| 	def error(self, message): | 	def error(self, message): | ||||||
| 		if self.error_count > 10 or self.started: | 		if self.error_count > 10 or self.started: | ||||||
| 			raise HTMLParser.HTMLParseError(message, self.getpos()) | 			raise compat_html_parser.HTMLParseError(message, self.getpos()) | ||||||
| 		self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line | 		self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line | ||||||
| 		self.error_count += 1 | 		self.error_count += 1 | ||||||
| 		self.goahead(1) | 		self.goahead(1) | ||||||
| @@ -170,7 +174,7 @@ def get_element_by_id(id, html): | |||||||
| 	parser = IDParser(id) | 	parser = IDParser(id) | ||||||
| 	try: | 	try: | ||||||
| 		parser.loads(html) | 		parser.loads(html) | ||||||
| 	except HTMLParser.HTMLParseError: | 	except compat_html_parser.HTMLParseError: | ||||||
| 		pass | 		pass | ||||||
| 	return parser.get_result() | 	return parser.get_result() | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister