mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	Ignore BOM in batch files (Fixes #2450)
This commit is contained in:
		| @@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |||||||
|  |  | ||||||
|  |  | ||||||
| # Various small unit tests | # Various small unit tests | ||||||
|  | import io | ||||||
| import xml.etree.ElementTree | import xml.etree.ElementTree | ||||||
|  |  | ||||||
| #from youtube_dl.utils import htmlentity_transform | #from youtube_dl.utils import htmlentity_transform | ||||||
| @@ -21,6 +22,7 @@ from youtube_dl.utils import ( | |||||||
|     orderedSet, |     orderedSet, | ||||||
|     PagedList, |     PagedList, | ||||||
|     parse_duration, |     parse_duration, | ||||||
|  |     read_batch_urls, | ||||||
|     sanitize_filename, |     sanitize_filename, | ||||||
|     shell_quote, |     shell_quote, | ||||||
|     smuggle_url, |     smuggle_url, | ||||||
| @@ -250,5 +252,14 @@ class TestUtil(unittest.TestCase): | |||||||
|     def test_struct_unpack(self): |     def test_struct_unpack(self): | ||||||
|         self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,)) |         self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,)) | ||||||
|  |  | ||||||
|  |     def test_read_batch_urls(self): | ||||||
|  |         f = io.StringIO(u'''\xef\xbb\xbf foo | ||||||
|  |             bar\r | ||||||
|  |             baz | ||||||
|  |             # More after this line\r | ||||||
|  |             ; or after this | ||||||
|  |             bam''') | ||||||
|  |         self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam']) | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -71,6 +71,7 @@ from .utils import ( | |||||||
|     get_cachedir, |     get_cachedir, | ||||||
|     MaxDownloadsReached, |     MaxDownloadsReached, | ||||||
|     preferredencoding, |     preferredencoding, | ||||||
|  |     read_batch_urls, | ||||||
|     SameFileError, |     SameFileError, | ||||||
|     setproctitle, |     setproctitle, | ||||||
|     std_headers, |     std_headers, | ||||||
| @@ -552,21 +553,19 @@ def _real_main(argv=None): | |||||||
|         sys.exit(0) |         sys.exit(0) | ||||||
|  |  | ||||||
|     # Batch file verification |     # Batch file verification | ||||||
|     batchurls = [] |     batch_urls = [] | ||||||
|     if opts.batchfile is not None: |     if opts.batchfile is not None: | ||||||
|         try: |         try: | ||||||
|             if opts.batchfile == '-': |             if opts.batchfile == '-': | ||||||
|                 batchfd = sys.stdin |                 batchfd = sys.stdin | ||||||
|             else: |             else: | ||||||
|                 batchfd = open(opts.batchfile, 'r') |                 batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore') | ||||||
|             batchurls = batchfd.readlines() |             batch_urls = read_batch_urls(batchfd) | ||||||
|             batchurls = [x.strip() for x in batchurls] |  | ||||||
|             batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] |  | ||||||
|             if opts.verbose: |             if opts.verbose: | ||||||
|                 write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n') |                 write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n') | ||||||
|         except IOError: |         except IOError: | ||||||
|             sys.exit(u'ERROR: batch file could not be read') |             sys.exit(u'ERROR: batch file could not be read') | ||||||
|     all_urls = batchurls + args |     all_urls = batch_urls + args | ||||||
|     all_urls = [url.strip() for url in all_urls] |     all_urls = [url.strip() for url in all_urls] | ||||||
|     _enc = preferredencoding() |     _enc = preferredencoding() | ||||||
|     all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] |     all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] | ||||||
|   | |||||||
| @@ -1,6 +1,7 @@ | |||||||
| #!/usr/bin/env python | #!/usr/bin/env python | ||||||
| # -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||||
|  |  | ||||||
|  | import contextlib | ||||||
| import ctypes | import ctypes | ||||||
| import datetime | import datetime | ||||||
| import email.utils | import email.utils | ||||||
| @@ -1245,3 +1246,19 @@ except TypeError: | |||||||
| else: | else: | ||||||
|     struct_pack = struct.pack |     struct_pack = struct.pack | ||||||
|     struct_unpack = struct.unpack |     struct_unpack = struct.unpack | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def read_batch_urls(batch_fd): | ||||||
|  |     def fixup(url): | ||||||
|  |         if not isinstance(url, compat_str): | ||||||
|  |             url = url.decode('utf-8', 'replace') | ||||||
|  |         BOM_UTF8 = u'\xef\xbb\xbf' | ||||||
|  |         if url.startswith(BOM_UTF8): | ||||||
|  |             url = url[len(BOM_UTF8):] | ||||||
|  |         url = url.strip() | ||||||
|  |         if url.startswith(('#', ';', ']')): | ||||||
|  |             return False | ||||||
|  |         return url | ||||||
|  |  | ||||||
|  |     with contextlib.closing(batch_fd) as fd: | ||||||
|  |         return [url for url in map(fixup, fd) if url] | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister