mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	[utils] Place sanitize url function near other sanitizing functions
This commit is contained in:
		| @@ -39,6 +39,7 @@ from youtube_dl.utils import ( | |||||||
|     read_batch_urls, |     read_batch_urls, | ||||||
|     sanitize_filename, |     sanitize_filename, | ||||||
|     sanitize_path, |     sanitize_path, | ||||||
|  |     sanitize_url_path_consecutive_slashes, | ||||||
|     shell_quote, |     shell_quote, | ||||||
|     smuggle_url, |     smuggle_url, | ||||||
|     str_to_int, |     str_to_int, | ||||||
| @@ -55,7 +56,6 @@ from youtube_dl.utils import ( | |||||||
|     xpath_with_ns, |     xpath_with_ns, | ||||||
|     render_table, |     render_table, | ||||||
|     match_str, |     match_str, | ||||||
|     url_sanitize_consecutive_slashes, |  | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -169,6 +169,26 @@ class TestUtil(unittest.TestCase): | |||||||
|         self.assertEqual(sanitize_path('./abc'), 'abc') |         self.assertEqual(sanitize_path('./abc'), 'abc') | ||||||
|         self.assertEqual(sanitize_path('./../abc'), '..\\abc') |         self.assertEqual(sanitize_path('./../abc'), '..\\abc') | ||||||
|  |  | ||||||
|  |     def test_sanitize_url_path_consecutive_slashes(self): | ||||||
|  |         self.assertEqual( | ||||||
|  |             sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'), | ||||||
|  |             'http://hostname/foo/bar/filename.html') | ||||||
|  |         self.assertEqual( | ||||||
|  |             sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'), | ||||||
|  |             'http://hostname/foo/bar/filename.html') | ||||||
|  |         self.assertEqual( | ||||||
|  |             sanitize_url_path_consecutive_slashes('http://hostname//'), | ||||||
|  |             'http://hostname/') | ||||||
|  |         self.assertEqual( | ||||||
|  |             sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'), | ||||||
|  |             'http://hostname/foo/bar/filename.html') | ||||||
|  |         self.assertEqual( | ||||||
|  |             sanitize_url_path_consecutive_slashes('http://hostname/'), | ||||||
|  |             'http://hostname/') | ||||||
|  |         self.assertEqual( | ||||||
|  |             sanitize_url_path_consecutive_slashes('http://hostname/abc//'), | ||||||
|  |             'http://hostname/abc/') | ||||||
|  |  | ||||||
|     def test_ordered_set(self): |     def test_ordered_set(self): | ||||||
|         self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) |         self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) | ||||||
|         self.assertEqual(orderedSet([]), []) |         self.assertEqual(orderedSet([]), []) | ||||||
| @@ -539,21 +559,6 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') | |||||||
|             'like_count > 100 & dislike_count <? 50 & description', |             'like_count > 100 & dislike_count <? 50 & description', | ||||||
|             {'like_count': 190, 'dislike_count': 10})) |             {'like_count': 190, 'dislike_count': 10})) | ||||||
|  |  | ||||||
|     def test_url_sanitize_consecutive_slashes(self): |  | ||||||
|         self.assertEqual(url_sanitize_consecutive_slashes( |  | ||||||
|             'http://hostname/foo//bar/filename.html'), |  | ||||||
|             'http://hostname/foo/bar/filename.html') |  | ||||||
|         self.assertEqual(url_sanitize_consecutive_slashes( |  | ||||||
|             'http://hostname//foo/bar/filename.html'), |  | ||||||
|             'http://hostname/foo/bar/filename.html') |  | ||||||
|         self.assertEqual(url_sanitize_consecutive_slashes( |  | ||||||
|             'http://hostname//'), 'http://hostname/') |  | ||||||
|         self.assertEqual(url_sanitize_consecutive_slashes( |  | ||||||
|             'http://hostname/foo/bar/filename.html'), |  | ||||||
|             'http://hostname/foo/bar/filename.html') |  | ||||||
|         self.assertEqual(url_sanitize_consecutive_slashes( |  | ||||||
|             'http://hostname/'), 'http://hostname/') |  | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -326,6 +326,13 @@ def sanitize_path(s): | |||||||
|     return os.path.join(*sanitized_path) |     return os.path.join(*sanitized_path) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def sanitize_url_path_consecutive_slashes(url): | ||||||
|  |     """Collapses consecutive slashes in URLs' path""" | ||||||
|  |     parsed_url = list(compat_urlparse.urlparse(url)) | ||||||
|  |     parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2]) | ||||||
|  |     return compat_urlparse.urlunparse(parsed_url) | ||||||
|  |  | ||||||
|  |  | ||||||
| def orderedSet(iterable): | def orderedSet(iterable): | ||||||
|     """ Remove all duplicates from the input iterable """ |     """ Remove all duplicates from the input iterable """ | ||||||
|     res = [] |     res = [] | ||||||
| @@ -1804,18 +1811,3 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): | |||||||
|             return None  # No Proxy |             return None  # No Proxy | ||||||
|         return compat_urllib_request.ProxyHandler.proxy_open( |         return compat_urllib_request.ProxyHandler.proxy_open( | ||||||
|             self, req, proxy, type) |             self, req, proxy, type) | ||||||
|  |  | ||||||
|  |  | ||||||
| def url_sanitize_consecutive_slashes(url): |  | ||||||
|     """Sanitize URLs with consecutive slashes |  | ||||||
|  |  | ||||||
|     For example, transform both |  | ||||||
|         http://hostname/foo//bar/filename.html |  | ||||||
|     and |  | ||||||
|         http://hostname//foo/bar/filename.html |  | ||||||
|     into |  | ||||||
|         http://hostname/foo/bar/filename.html |  | ||||||
|     """ |  | ||||||
|     parsed_url = list(compat_urlparse.urlparse(url)) |  | ||||||
|     parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2]) |  | ||||||
|     return compat_urlparse.urlunparse(parsed_url) |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Sergey M․
					Sergey M․