mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	[utils] Modernize tests
This commit is contained in:
		| @@ -1,6 +1,8 @@ | |||||||
| #!/usr/bin/env python | #!/usr/bin/env python | ||||||
| # coding: utf-8 | # coding: utf-8 | ||||||
|  |  | ||||||
|  | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| # Allow direct execution | # Allow direct execution | ||||||
| import os | import os | ||||||
| import sys | import sys | ||||||
| @@ -13,7 +15,6 @@ import io | |||||||
| import json | import json | ||||||
| import xml.etree.ElementTree | import xml.etree.ElementTree | ||||||
|  |  | ||||||
| #from youtube_dl.utils import htmlentity_transform |  | ||||||
| from youtube_dl.utils import ( | from youtube_dl.utils import ( | ||||||
|     DateRange, |     DateRange, | ||||||
|     encodeFilename, |     encodeFilename, | ||||||
| @@ -41,11 +42,6 @@ from youtube_dl.utils import ( | |||||||
|     uppercase_escape, |     uppercase_escape, | ||||||
| ) | ) | ||||||
|  |  | ||||||
| if sys.version_info < (3, 0): |  | ||||||
|     _compat_str = lambda b: b.decode('unicode-escape') |  | ||||||
| else: |  | ||||||
|     _compat_str = lambda s: s |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class TestUtil(unittest.TestCase): | class TestUtil(unittest.TestCase): | ||||||
|     def test_timeconvert(self): |     def test_timeconvert(self): | ||||||
| @@ -67,9 +63,9 @@ class TestUtil(unittest.TestCase): | |||||||
|         self.assertEqual('this - that', sanitize_filename('this: that')) |         self.assertEqual('this - that', sanitize_filename('this: that')) | ||||||
|  |  | ||||||
|         self.assertEqual(sanitize_filename('AT&T'), 'AT&T') |         self.assertEqual(sanitize_filename('AT&T'), 'AT&T') | ||||||
|         aumlaut = _compat_str('\xe4') |         aumlaut = 'ä' | ||||||
|         self.assertEqual(sanitize_filename(aumlaut), aumlaut) |         self.assertEqual(sanitize_filename(aumlaut), aumlaut) | ||||||
|         tests = _compat_str('\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430') |         tests = '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430' | ||||||
|         self.assertEqual(sanitize_filename(tests), tests) |         self.assertEqual(sanitize_filename(tests), tests) | ||||||
|  |  | ||||||
|         forbidden = '"\0\\/' |         forbidden = '"\0\\/' | ||||||
| @@ -91,9 +87,9 @@ class TestUtil(unittest.TestCase): | |||||||
|         self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True)) |         self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True)) | ||||||
|         self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True)) |         self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True)) | ||||||
|  |  | ||||||
|         tests = _compat_str('a\xe4b\u4e2d\u56fd\u7684c') |         tests = 'a\xe4b\u4e2d\u56fd\u7684c' | ||||||
|         self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c') |         self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c') | ||||||
|         self.assertTrue(sanitize_filename(_compat_str('\xf6'), restricted=True) != '')  # No empty filename |         self.assertTrue(sanitize_filename('\xf6', restricted=True) != '')  # No empty filename | ||||||
|  |  | ||||||
|         forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#' |         forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#' | ||||||
|         for fc in forbidden: |         for fc in forbidden: | ||||||
| @@ -101,8 +97,8 @@ class TestUtil(unittest.TestCase): | |||||||
|                 self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) |                 self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) | ||||||
|  |  | ||||||
|         # Handle a common case more neatly |         # Handle a common case more neatly | ||||||
|         self.assertEqual(sanitize_filename(_compat_str('\u5927\u58f0\u5e26 - Song'), restricted=True), 'Song') |         self.assertEqual(sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True), 'Song') | ||||||
|         self.assertEqual(sanitize_filename(_compat_str('\u603b\u7edf: Speech'), restricted=True), 'Speech') |         self.assertEqual(sanitize_filename('\u603b\u7edf: Speech', restricted=True), 'Speech') | ||||||
|         # .. but make sure the file name is never empty |         # .. but make sure the file name is never empty | ||||||
|         self.assertTrue(sanitize_filename('-', restricted=True) != '') |         self.assertTrue(sanitize_filename('-', restricted=True) != '') | ||||||
|         self.assertTrue(sanitize_filename(':', restricted=True) != '') |         self.assertTrue(sanitize_filename(':', restricted=True) != '') | ||||||
| @@ -120,7 +116,9 @@ class TestUtil(unittest.TestCase): | |||||||
|         self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1]) |         self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1]) | ||||||
|  |  | ||||||
|     def test_unescape_html(self): |     def test_unescape_html(self): | ||||||
|         self.assertEqual(unescapeHTML(_compat_str('%20;')), _compat_str('%20;')) |         self.assertEqual(unescapeHTML('%20;'), '%20;') | ||||||
|  |         self.assertEqual( | ||||||
|  |             unescapeHTML('é'), 'é') | ||||||
|          |          | ||||||
|     def test_daterange(self): |     def test_daterange(self): | ||||||
|         _20century = DateRange("19000101","20000101") |         _20century = DateRange("19000101","20000101") | ||||||
| @@ -138,7 +136,7 @@ class TestUtil(unittest.TestCase): | |||||||
|         self.assertEqual(unified_strdate('1968-12-10'), '19681210') |         self.assertEqual(unified_strdate('1968-12-10'), '19681210') | ||||||
|  |  | ||||||
|     def test_find_xpath_attr(self): |     def test_find_xpath_attr(self): | ||||||
|         testxml = u'''<root> |         testxml = '''<root> | ||||||
|             <node/> |             <node/> | ||||||
|             <node x="a"/> |             <node x="a"/> | ||||||
|             <node x="a" y="c" /> |             <node x="a" y="c" /> | ||||||
| @@ -151,18 +149,18 @@ class TestUtil(unittest.TestCase): | |||||||
|         self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2]) |         self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2]) | ||||||
|  |  | ||||||
|     def test_meta_parser(self): |     def test_meta_parser(self): | ||||||
|         testhtml = u''' |         testhtml = ''' | ||||||
|         <head> |         <head> | ||||||
|             <meta name="description" content="foo & bar"> |             <meta name="description" content="foo & bar"> | ||||||
|             <meta content='Plato' name='author'/> |             <meta content='Plato' name='author'/> | ||||||
|         </head> |         </head> | ||||||
|         ''' |         ''' | ||||||
|         get_meta = lambda name: get_meta_content(name, testhtml) |         get_meta = lambda name: get_meta_content(name, testhtml) | ||||||
|         self.assertEqual(get_meta('description'), u'foo & bar') |         self.assertEqual(get_meta('description'), 'foo & bar') | ||||||
|         self.assertEqual(get_meta('author'), 'Plato') |         self.assertEqual(get_meta('author'), 'Plato') | ||||||
|  |  | ||||||
|     def test_xpath_with_ns(self): |     def test_xpath_with_ns(self): | ||||||
|         testxml = u'''<root xmlns:media="http://example.com/"> |         testxml = '''<root xmlns:media="http://example.com/"> | ||||||
|             <media:song> |             <media:song> | ||||||
|                 <media:author>The Author</media:author> |                 <media:author>The Author</media:author> | ||||||
|                 <url>http://server.com/download.mp3</url> |                 <url>http://server.com/download.mp3</url> | ||||||
| @@ -171,8 +169,8 @@ class TestUtil(unittest.TestCase): | |||||||
|         doc = xml.etree.ElementTree.fromstring(testxml) |         doc = xml.etree.ElementTree.fromstring(testxml) | ||||||
|         find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'})) |         find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'})) | ||||||
|         self.assertTrue(find('media:song') is not None) |         self.assertTrue(find('media:song') is not None) | ||||||
|         self.assertEqual(find('media:song/media:author').text, u'The Author') |         self.assertEqual(find('media:song/media:author').text, 'The Author') | ||||||
|         self.assertEqual(find('media:song/url').text, u'http://server.com/download.mp3') |         self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3') | ||||||
|  |  | ||||||
|     def test_smuggle_url(self): |     def test_smuggle_url(self): | ||||||
|         data = {u"ö": u"ö", u"abc": [3]} |         data = {u"ö": u"ö", u"abc": [3]} | ||||||
| @@ -187,22 +185,22 @@ class TestUtil(unittest.TestCase): | |||||||
|         self.assertEqual(res_data, None) |         self.assertEqual(res_data, None) | ||||||
|  |  | ||||||
|     def test_shell_quote(self): |     def test_shell_quote(self): | ||||||
|         args = ['ffmpeg', '-i', encodeFilename(u'ñ€ß\'.mp4')] |         args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')] | ||||||
|         self.assertEqual(shell_quote(args), u"""ffmpeg -i 'ñ€ß'"'"'.mp4'""") |         self.assertEqual(shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""") | ||||||
|  |  | ||||||
|     def test_str_to_int(self): |     def test_str_to_int(self): | ||||||
|         self.assertEqual(str_to_int('123,456'), 123456) |         self.assertEqual(str_to_int('123,456'), 123456) | ||||||
|         self.assertEqual(str_to_int('123.456'), 123456) |         self.assertEqual(str_to_int('123.456'), 123456) | ||||||
|  |  | ||||||
|     def test_url_basename(self): |     def test_url_basename(self): | ||||||
|         self.assertEqual(url_basename(u'http://foo.de/'), u'') |         self.assertEqual(url_basename('http://foo.de/'), '') | ||||||
|         self.assertEqual(url_basename(u'http://foo.de/bar/baz'), u'baz') |         self.assertEqual(url_basename('http://foo.de/bar/baz'), 'baz') | ||||||
|         self.assertEqual(url_basename(u'http://foo.de/bar/baz?x=y'), u'baz') |         self.assertEqual(url_basename('http://foo.de/bar/baz?x=y'), 'baz') | ||||||
|         self.assertEqual(url_basename(u'http://foo.de/bar/baz#x=y'), u'baz') |         self.assertEqual(url_basename('http://foo.de/bar/baz#x=y'), 'baz') | ||||||
|         self.assertEqual(url_basename(u'http://foo.de/bar/baz/'), u'baz') |         self.assertEqual(url_basename('http://foo.de/bar/baz/'), 'baz') | ||||||
|         self.assertEqual( |         self.assertEqual( | ||||||
|             url_basename(u'http://media.w3.org/2010/05/sintel/trailer.mp4'), |             url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), | ||||||
|             u'trailer.mp4') |             'trailer.mp4') | ||||||
|  |  | ||||||
|     def test_parse_duration(self): |     def test_parse_duration(self): | ||||||
|         self.assertEqual(parse_duration(None), None) |         self.assertEqual(parse_duration(None), None) | ||||||
| @@ -256,16 +254,16 @@ class TestUtil(unittest.TestCase): | |||||||
|         testPL(5, 2, (20, 99), []) |         testPL(5, 2, (20, 99), []) | ||||||
|  |  | ||||||
|     def test_struct_unpack(self): |     def test_struct_unpack(self): | ||||||
|         self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,)) |         self.assertEqual(struct_unpack('!B', b'\x00'), (0,)) | ||||||
|  |  | ||||||
|     def test_read_batch_urls(self): |     def test_read_batch_urls(self): | ||||||
|         f = io.StringIO(u'''\xef\xbb\xbf foo |         f = io.StringIO('''\xef\xbb\xbf foo | ||||||
|             bar\r |             bar\r | ||||||
|             baz |             baz | ||||||
|             # More after this line\r |             # More after this line\r | ||||||
|             ; or after this |             ; or after this | ||||||
|             bam''') |             bam''') | ||||||
|         self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam']) |         self.assertEqual(read_batch_urls(f), ['foo', 'bar', 'baz', 'bam']) | ||||||
|  |  | ||||||
|     def test_urlencode_postdata(self): |     def test_urlencode_postdata(self): | ||||||
|         data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) |         data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) | ||||||
| @@ -282,8 +280,8 @@ class TestUtil(unittest.TestCase): | |||||||
|         self.assertEqual(d, [{"id": "532cb", "x": 3}]) |         self.assertEqual(d, [{"id": "532cb", "x": 3}]) | ||||||
|  |  | ||||||
|     def test_uppercase_escape(self): |     def test_uppercase_escape(self): | ||||||
|         self.assertEqual(uppercase_escape(u'aä'), u'aä') |         self.assertEqual(uppercase_escape('aä'), 'aä') | ||||||
|         self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐') |         self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -298,30 +298,6 @@ def xpath_with_ns(path, ns_map): | |||||||
|             replaced.append('{%s}%s' % (ns_map[ns], tag)) |             replaced.append('{%s}%s' % (ns_map[ns], tag)) | ||||||
|     return '/'.join(replaced) |     return '/'.join(replaced) | ||||||
|  |  | ||||||
| def htmlentity_transform(matchobj): |  | ||||||
|     """Transforms an HTML entity to a character. |  | ||||||
|  |  | ||||||
|     This function receives a match object and is intended to be used with |  | ||||||
|     the re.sub() function. |  | ||||||
|     """ |  | ||||||
|     entity = matchobj.group(1) |  | ||||||
|  |  | ||||||
|     # Known non-numeric HTML entity |  | ||||||
|     if entity in compat_html_entities.name2codepoint: |  | ||||||
|         return compat_chr(compat_html_entities.name2codepoint[entity]) |  | ||||||
|  |  | ||||||
|     mobj = re.match(u'(?u)#(x?\\d+)', entity) |  | ||||||
|     if mobj is not None: |  | ||||||
|         numstr = mobj.group(1) |  | ||||||
|         if numstr.startswith(u'x'): |  | ||||||
|             base = 16 |  | ||||||
|             numstr = u'0%s' % numstr |  | ||||||
|         else: |  | ||||||
|             base = 10 |  | ||||||
|         return compat_chr(int(numstr, base)) |  | ||||||
|  |  | ||||||
|     # Unknown entity in name, return its literal representation |  | ||||||
|     return (u'&%s;' % entity) |  | ||||||
|  |  | ||||||
| compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix | compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix | ||||||
| class BaseHTMLParser(compat_html_parser.HTMLParser): | class BaseHTMLParser(compat_html_parser.HTMLParser): | ||||||
| @@ -543,13 +519,33 @@ def orderedSet(iterable): | |||||||
|     return res |     return res | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def _htmlentity_transform(entity): | ||||||
|  |     """Transforms an HTML entity to a character.""" | ||||||
|  |     # Known non-numeric HTML entity | ||||||
|  |     if entity in compat_html_entities.name2codepoint: | ||||||
|  |         return compat_chr(compat_html_entities.name2codepoint[entity]) | ||||||
|  |  | ||||||
|  |     mobj = re.match(r'#(x?[0-9]+)', entity) | ||||||
|  |     if mobj is not None: | ||||||
|  |         numstr = mobj.group(1) | ||||||
|  |         if numstr.startswith(u'x'): | ||||||
|  |             base = 16 | ||||||
|  |             numstr = u'0%s' % numstr | ||||||
|  |         else: | ||||||
|  |             base = 10 | ||||||
|  |         return compat_chr(int(numstr, base)) | ||||||
|  |  | ||||||
|  |     # Unknown entity in name, return its literal representation | ||||||
|  |     return (u'&%s;' % entity) | ||||||
|  |  | ||||||
|  |  | ||||||
| def unescapeHTML(s): | def unescapeHTML(s): | ||||||
|     if s is None: |     if s is None: | ||||||
|         return None |         return None | ||||||
|     assert type(s) == compat_str |     assert type(s) == compat_str | ||||||
|  |  | ||||||
|     result = re.sub(r'(?u)&(.+?);', htmlentity_transform, s) |     return re.sub( | ||||||
|     return result |         r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s) | ||||||
|  |  | ||||||
|  |  | ||||||
| def encodeFilename(s, for_subprocess=False): | def encodeFilename(s, for_subprocess=False): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister