mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 22:55:18 +00:00 
			
		
		
		
	Correct XML ampersand fixup
This commit is contained in:
		| @@ -16,6 +16,7 @@ from youtube_dl.utils import ( | |||||||
|     DateRange, |     DateRange, | ||||||
|     encodeFilename, |     encodeFilename, | ||||||
|     find_xpath_attr, |     find_xpath_attr, | ||||||
|  |     fix_xml_ampersands, | ||||||
|     get_meta_content, |     get_meta_content, | ||||||
|     orderedSet, |     orderedSet, | ||||||
|     parse_duration, |     parse_duration, | ||||||
| @@ -200,5 +201,18 @@ class TestUtil(unittest.TestCase): | |||||||
|         self.assertEqual(parse_duration('9:12:43'), 33163) |         self.assertEqual(parse_duration('9:12:43'), 33163) | ||||||
|         self.assertEqual(parse_duration('x:y'), None) |         self.assertEqual(parse_duration('x:y'), None) | ||||||
|  |  | ||||||
|  |     def test_fix_xml_ampersands(self): | ||||||
|  |         self.assertEqual( | ||||||
|  |             fix_xml_ampersands('"&x=y&z=a'), '"&x=y&z=a') | ||||||
|  |         self.assertEqual( | ||||||
|  |             fix_xml_ampersands('"&x=y&wrong;&z=a'), | ||||||
|  |             '"&x=y&wrong;&z=a') | ||||||
|  |         self.assertEqual( | ||||||
|  |             fix_xml_ampersands('&'><"'), | ||||||
|  |             '&'><"') | ||||||
|  |         self.assertEqual( | ||||||
|  |             fix_xml_ampersands('Ӓ᪼'), 'Ӓ᪼') | ||||||
|  |         self.assertEqual(fix_xml_ampersands('&#&#'), '&#&#') | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -3,7 +3,7 @@ import re | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     find_xpath_attr, |     find_xpath_attr, | ||||||
|     fix_xml_all_ampersand, |     fix_xml_ampersands | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -33,7 +33,7 @@ class ClipsyndicateIE(InfoExtractor): | |||||||
|         pdoc = self._download_xml( |         pdoc = self._download_xml( | ||||||
|             'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars, |             'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars, | ||||||
|             video_id, u'Downloading video info', |             video_id, u'Downloading video info', | ||||||
|             transform_source=fix_xml_all_ampersand)  |             transform_source=fix_xml_ampersands) | ||||||
|  |  | ||||||
|         track_doc = pdoc.find('trackList/track') |         track_doc = pdoc.find('trackList/track') | ||||||
|         def find_param(name): |         def find_param(name): | ||||||
|   | |||||||
| @@ -4,7 +4,7 @@ import re | |||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     fix_xml_all_ampersand, |     fix_xml_ampersands, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -27,7 +27,7 @@ class MetacriticIE(InfoExtractor): | |||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|         # The xml is not well formatted, there are raw '&' |         # The xml is not well formatted, there are raw '&' | ||||||
|         info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id, |         info = self._download_xml('http://www.metacritic.com/video_data?video=' + video_id, | ||||||
|             video_id, 'Downloading info xml', transform_source=fix_xml_all_ampersand) |             video_id, 'Downloading info xml', transform_source=fix_xml_ampersands) | ||||||
|  |  | ||||||
|         clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id) |         clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id) | ||||||
|         formats = [] |         formats = [] | ||||||
|   | |||||||
| @@ -5,6 +5,7 @@ from .common import InfoExtractor | |||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     compat_urllib_parse, |     compat_urllib_parse, | ||||||
|     ExtractorError, |     ExtractorError, | ||||||
|  |     fix_xml_ampersands, | ||||||
| ) | ) | ||||||
|  |  | ||||||
| def _media_xml_tag(tag): | def _media_xml_tag(tag): | ||||||
| @@ -83,12 +84,9 @@ class MTVServicesInfoExtractor(InfoExtractor): | |||||||
|         video_id = self._id_from_uri(uri) |         video_id = self._id_from_uri(uri) | ||||||
|         data = compat_urllib_parse.urlencode({'uri': uri}) |         data = compat_urllib_parse.urlencode({'uri': uri}) | ||||||
|  |  | ||||||
|         def fix_ampersand(s): |  | ||||||
|             """ Fix unencoded ampersand in XML """ |  | ||||||
|             return s.replace(u'& ', '& ') |  | ||||||
|         idoc = self._download_xml( |         idoc = self._download_xml( | ||||||
|             self._FEED_URL + '?' + data, video_id, |             self._FEED_URL + '?' + data, video_id, | ||||||
|             u'Downloading info', transform_source=fix_ampersand) |             u'Downloading info', transform_source=fix_xml_ampersands) | ||||||
|         return [self._get_video_info(item) for item in idoc.findall('.//item')] |         return [self._get_video_info(item) for item in idoc.findall('.//item')] | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1092,9 +1092,12 @@ def month_by_name(name): | |||||||
|         return None |         return None | ||||||
|  |  | ||||||
|  |  | ||||||
| def fix_xml_all_ampersand(xml_str): | def fix_xml_ampersands(xml_str): | ||||||
|     """Replace all the '&' by '&' in XML""" |     """Replace all the '&' by '&' in XML""" | ||||||
|     return xml_str.replace(u'&', u'&') |     return re.sub( | ||||||
|  |         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)', | ||||||
|  |         u'&', | ||||||
|  |         xml_str) | ||||||
|  |  | ||||||
|  |  | ||||||
| def setproctitle(title): | def setproctitle(title): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister