mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	Correct XML ampersand fixup
This commit is contained in:
		| @@ -16,6 +16,7 @@ from youtube_dl.utils import ( | ||||
|     DateRange, | ||||
|     encodeFilename, | ||||
|     find_xpath_attr, | ||||
|     fix_xml_ampersands, | ||||
|     get_meta_content, | ||||
|     orderedSet, | ||||
|     parse_duration, | ||||
| @@ -200,5 +201,18 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(parse_duration('9:12:43'), 33163) | ||||
|         self.assertEqual(parse_duration('x:y'), None) | ||||
|  | ||||
|     def test_fix_xml_ampersands(self): | ||||
|         self.assertEqual( | ||||
|             fix_xml_ampersands('"&x=y&z=a'), '"&x=y&z=a') | ||||
|         self.assertEqual( | ||||
|             fix_xml_ampersands('"&x=y&wrong;&z=a'), | ||||
|             '"&x=y&wrong;&z=a') | ||||
|         self.assertEqual( | ||||
|             fix_xml_ampersands('&'><"'), | ||||
|             '&'><"') | ||||
|         self.assertEqual( | ||||
|             fix_xml_ampersands('Ӓ᪼'), 'Ӓ᪼') | ||||
|         self.assertEqual(fix_xml_ampersands('&#&#'), '&#&#') | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -3,7 +3,7 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     find_xpath_attr, | ||||
|     fix_xml_all_ampersand, | ||||
|     fix_xml_ampersands | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -33,7 +33,7 @@ class ClipsyndicateIE(InfoExtractor): | ||||
|         pdoc = self._download_xml( | ||||
|             'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars, | ||||
|             video_id, u'Downloading video info', | ||||
|             transform_source=fix_xml_all_ampersand)  | ||||
|             transform_source=fix_xml_ampersands) | ||||
|  | ||||
|         track_doc = pdoc.find('trackList/track') | ||||
|         def find_param(name): | ||||
|   | ||||
| @@ -4,7 +4,7 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     fix_xml_all_ampersand, | ||||
|     fix_xml_ampersands, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -27,7 +27,7 @@ class MetacriticIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         # The xml is not well formatted, there are raw '&' | ||||
|         info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id, | ||||
|             video_id, 'Downloading info xml', transform_source=fix_xml_all_ampersand) | ||||
|             video_id, 'Downloading info xml', transform_source=fix_xml_ampersands) | ||||
|  | ||||
|         clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id) | ||||
|         formats = [] | ||||
|   | ||||
| @@ -5,6 +5,7 @@ from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     ExtractorError, | ||||
|     fix_xml_ampersands, | ||||
| ) | ||||
|  | ||||
| def _media_xml_tag(tag): | ||||
| @@ -83,12 +84,9 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|         video_id = self._id_from_uri(uri) | ||||
|         data = compat_urllib_parse.urlencode({'uri': uri}) | ||||
|  | ||||
|         def fix_ampersand(s): | ||||
|             """ Fix unencoded ampersand in XML """ | ||||
|             return s.replace(u'& ', '& ') | ||||
|         idoc = self._download_xml( | ||||
|             self._FEED_URL + '?' + data, video_id, | ||||
|             u'Downloading info', transform_source=fix_ampersand) | ||||
|             u'Downloading info', transform_source=fix_xml_ampersands) | ||||
|         return [self._get_video_info(item) for item in idoc.findall('.//item')] | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -1092,9 +1092,12 @@ def month_by_name(name): | ||||
|         return None | ||||
|  | ||||
|  | ||||
| def fix_xml_all_ampersand(xml_str): | ||||
| def fix_xml_ampersands(xml_str): | ||||
|     """Replace all the '&' by '&' in XML""" | ||||
|     return xml_str.replace(u'&', u'&') | ||||
|     return re.sub( | ||||
|         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)', | ||||
|         u'&', | ||||
|         xml_str) | ||||
|  | ||||
|  | ||||
| def setproctitle(title): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister