mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	New optoin --restrict-filenames
This commit is contained in:
		| @@ -47,6 +47,8 @@ which means you can modify it, redistribute it or use it however you like. | |||||||
|                              %(extractor)s for the provider (youtube, metacafe, |                              %(extractor)s for the provider (youtube, metacafe, | ||||||
|                              etc), %(id)s for the video id and %% for a literal |                              etc), %(id)s for the video id and %% for a literal | ||||||
|                              percent. Use - to output to stdout. |                              percent. Use - to output to stdout. | ||||||
|  |     --restrict-filenames     Avoid some characters such as "&" and spaces in | ||||||
|  |                              filenames | ||||||
|     -a, --batch-file FILE    file containing URLs to download ('-' for stdin) |     -a, --batch-file FILE    file containing URLs to download ('-' for stdin) | ||||||
|     -w, --no-overwrites      do not overwrite files |     -w, --no-overwrites      do not overwrite files | ||||||
|     -c, --continue           resume partially downloaded files |     -c, --continue           resume partially downloaded files | ||||||
|   | |||||||
| @@ -30,11 +30,34 @@ class TestUtil(unittest.TestCase): | |||||||
| 		self.assertEqual(u'yes no', sanitize_filename(u'yes? no')) | 		self.assertEqual(u'yes no', sanitize_filename(u'yes? no')) | ||||||
| 		self.assertEqual(u'this - that', sanitize_filename(u'this: that')) | 		self.assertEqual(u'this - that', sanitize_filename(u'this: that')) | ||||||
|  |  | ||||||
|  | 		self.assertEqual(sanitize_filename(u'AT&T'), u'AT&T') | ||||||
| 		self.assertEqual(sanitize_filename(u'ä'), u'ä') | 		self.assertEqual(sanitize_filename(u'ä'), u'ä') | ||||||
| 		self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица') | 		self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица') | ||||||
|  |  | ||||||
| 		for forbidden in u'"\0\\/': | 		forbidden = u'"\0\\/' | ||||||
| 			self.assertTrue(forbidden not in sanitize_filename(forbidden)) | 		for fc in forbidden: | ||||||
|  | 			for fbc in forbidden: | ||||||
|  | 				self.assertTrue(fbc not in sanitize_filename(fc)) | ||||||
|  |  | ||||||
|  | 	def test_sanitize_filename_restricted(self): | ||||||
|  | 		self.assertEqual(sanitize_filename(u'abc', restricted=True), u'abc') | ||||||
|  | 		self.assertEqual(sanitize_filename(u'abc_d-e', restricted=True), u'abc_d-e') | ||||||
|  |  | ||||||
|  | 		self.assertEqual(sanitize_filename(u'123', restricted=True), u'123') | ||||||
|  |  | ||||||
|  | 		self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True)) | ||||||
|  | 		self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True)) | ||||||
|  |  | ||||||
|  | 		self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True)) | ||||||
|  | 		self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True)) | ||||||
|  | 		self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True)) | ||||||
|  | 		self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True)) | ||||||
|  |  | ||||||
|  | 		forbidden = u'"\0\\/&: \'\t\n' | ||||||
|  | 		for fc in forbidden: | ||||||
|  | 			print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True))) | ||||||
|  | 			for fbc in forbidden: | ||||||
|  | 				self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) | ||||||
|  |  | ||||||
| 	def test_ordered_set(self): | 	def test_ordered_set(self): | ||||||
| 		self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7]) | 		self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7]) | ||||||
|   | |||||||
| @@ -59,6 +59,8 @@ redistribute it or use it however you like. | |||||||
| \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe, | \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe, | ||||||
| \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal | \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal | ||||||
| \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout. | \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout. | ||||||
|  | --restrict-filenames\ \ \ \ \ Avoid\ some\ characters\ such\ as\ "&"\ and\ spaces\ in | ||||||
|  | \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames | ||||||
| -a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin) | -a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin) | ||||||
| -w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files | -w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files | ||||||
| -c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files | -c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files | ||||||
| @@ -210,7 +212,7 @@ Please note that Python 2.5 is not supported anymore. | |||||||
| .PP | .PP | ||||||
| Since June 2012 (#342) youtube-dl is packed as an executable zipfile, | Since June 2012 (#342) youtube-dl is packed as an executable zipfile, | ||||||
| simply unzip it (might need renaming to \f[C]youtube-dl.zip\f[] first on | simply unzip it (might need renaming to \f[C]youtube-dl.zip\f[] first on | ||||||
| some systems) or clone the git repo to see the code. | some systems) or clone the git repository, as laid out above. | ||||||
| If you modify the code, you can run it by executing the | If you modify the code, you can run it by executing the | ||||||
| \f[C]__main__.py\f[] file. | \f[C]__main__.py\f[] file. | ||||||
| To recompile the executable, run \f[C]make\ youtube-dl\f[]. | To recompile the executable, run \f[C]make\ youtube-dl\f[]. | ||||||
|   | |||||||
| @@ -3,7 +3,7 @@ __youtube-dl() | |||||||
|     local cur prev opts |     local cur prev opts | ||||||
|     COMPREPLY=() |     COMPREPLY=() | ||||||
|     cur="${COMP_WORDS[COMP_CWORD]}" |     cur="${COMP_WORDS[COMP_CWORD]}" | ||||||
|     opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt" |     opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt" | ||||||
|  |  | ||||||
|     if [[ ${cur} == * ]] ; then |     if [[ ${cur} == * ]] ; then | ||||||
|         COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) |         COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) | ||||||
|   | |||||||
| @@ -44,37 +44,38 @@ class FileDownloader(object): | |||||||
|  |  | ||||||
| 	Available options: | 	Available options: | ||||||
|  |  | ||||||
| 	username:         Username for authentication purposes. | 	username:          Username for authentication purposes. | ||||||
| 	password:         Password for authentication purposes. | 	password:          Password for authentication purposes. | ||||||
| 	usenetrc:         Use netrc for authentication instead. | 	usenetrc:          Use netrc for authentication instead. | ||||||
| 	quiet:            Do not print messages to stdout. | 	quiet:             Do not print messages to stdout. | ||||||
| 	forceurl:         Force printing final URL. | 	forceurl:          Force printing final URL. | ||||||
| 	forcetitle:       Force printing title. | 	forcetitle:        Force printing title. | ||||||
| 	forcethumbnail:   Force printing thumbnail URL. | 	forcethumbnail:    Force printing thumbnail URL. | ||||||
| 	forcedescription: Force printing description. | 	forcedescription:  Force printing description. | ||||||
| 	forcefilename:    Force printing final filename. | 	forcefilename:     Force printing final filename. | ||||||
| 	simulate:         Do not download the video files. | 	simulate:          Do not download the video files. | ||||||
| 	format:           Video format code. | 	format:            Video format code. | ||||||
| 	format_limit:     Highest quality format to try. | 	format_limit:      Highest quality format to try. | ||||||
| 	outtmpl:          Template for output names. | 	outtmpl:           Template for output names. | ||||||
| 	ignoreerrors:     Do not stop on download errors. | 	restrictfilenames: Do not allow "&" and spaces in file names | ||||||
| 	ratelimit:        Download speed limit, in bytes/sec. | 	ignoreerrors:      Do not stop on download errors. | ||||||
| 	nooverwrites:     Prevent overwriting files. | 	ratelimit:         Download speed limit, in bytes/sec. | ||||||
| 	retries:          Number of times to retry for HTTP error 5xx | 	nooverwrites:      Prevent overwriting files. | ||||||
| 	continuedl:       Try to continue downloads if possible. | 	retries:           Number of times to retry for HTTP error 5xx | ||||||
| 	noprogress:       Do not print the progress bar. | 	continuedl:        Try to continue downloads if possible. | ||||||
| 	playliststart:    Playlist item to start at. | 	noprogress:        Do not print the progress bar. | ||||||
| 	playlistend:      Playlist item to end at. | 	playliststart:     Playlist item to start at. | ||||||
| 	matchtitle:       Download only matching titles. | 	playlistend:       Playlist item to end at. | ||||||
| 	rejecttitle:      Reject downloads for matching titles. | 	matchtitle:        Download only matching titles. | ||||||
| 	logtostderr:      Log messages to stderr instead of stdout. | 	rejecttitle:       Reject downloads for matching titles. | ||||||
| 	consoletitle:     Display progress in console window's titlebar. | 	logtostderr:       Log messages to stderr instead of stdout. | ||||||
| 	nopart:           Do not use temporary .part files. | 	consoletitle:      Display progress in console window's titlebar. | ||||||
| 	updatetime:       Use the Last-modified header to set output file timestamps. | 	nopart:            Do not use temporary .part files. | ||||||
| 	writedescription: Write the video description to a .description file | 	updatetime:        Use the Last-modified header to set output file timestamps. | ||||||
| 	writeinfojson:    Write the video description to a .info.json file | 	writedescription:  Write the video description to a .description file | ||||||
| 	writesubtitles:   Write the video subtitles to a .srt file | 	writeinfojson:     Write the video description to a .info.json file | ||||||
| 	subtitleslang:    Language of the subtitles to download | 	writesubtitles:    Write the video subtitles to a .srt file | ||||||
|  | 	subtitleslang:     Language of the subtitles to download | ||||||
| 	""" | 	""" | ||||||
|  |  | ||||||
| 	params = None | 	params = None | ||||||
| @@ -349,7 +350,7 @@ class FileDownloader(object): | |||||||
| 	def process_info(self, info_dict): | 	def process_info(self, info_dict): | ||||||
| 		"""Process a single dictionary returned by an InfoExtractor.""" | 		"""Process a single dictionary returned by an InfoExtractor.""" | ||||||
|  |  | ||||||
| 		info_dict['stitle'] = sanitize_filename(info_dict['title']) | 		info_dict['stitle'] = sanitize_filename(info_dict['title'], self.params.get('restrictfilenames')) | ||||||
|  |  | ||||||
| 		reason = self._match_entry(info_dict) | 		reason = self._match_entry(info_dict) | ||||||
| 		if reason is not None: | 		if reason is not None: | ||||||
|   | |||||||
| @@ -272,6 +272,9 @@ def parseOpts(): | |||||||
| 			help='number downloaded files starting from 00000', default=False) | 			help='number downloaded files starting from 00000', default=False) | ||||||
| 	filesystem.add_option('-o', '--output', | 	filesystem.add_option('-o', '--output', | ||||||
| 			dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.') | 			dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.') | ||||||
|  | 	filesystem.add_option('--restrict-filenames', | ||||||
|  | 			action='store_true', dest='restrictfilenames', | ||||||
|  | 			help='Avoid some characters such as "&" and spaces in filenames', default=False) | ||||||
| 	filesystem.add_option('-a', '--batch-file', | 	filesystem.add_option('-a', '--batch-file', | ||||||
| 			dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') | 			dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') | ||||||
| 	filesystem.add_option('-w', '--no-overwrites', | 	filesystem.add_option('-w', '--no-overwrites', | ||||||
| @@ -485,6 +488,7 @@ def _real_main(): | |||||||
| 			or (opts.useid and u'%(id)s.%(ext)s') | 			or (opts.useid and u'%(id)s.%(ext)s') | ||||||
| 			or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') | 			or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') | ||||||
| 			or u'%(id)s.%(ext)s'), | 			or u'%(id)s.%(ext)s'), | ||||||
|  | 		'restrictfilenames': opts.restrictfilenames, | ||||||
| 		'ignoreerrors': opts.ignoreerrors, | 		'ignoreerrors': opts.ignoreerrors, | ||||||
| 		'ratelimit': opts.ratelimit, | 		'ratelimit': opts.ratelimit, | ||||||
| 		'nooverwrites': opts.nooverwrites, | 		'nooverwrites': opts.nooverwrites, | ||||||
|   | |||||||
| @@ -194,18 +194,22 @@ def timeconvert(timestr): | |||||||
| 	if timetuple is not None: | 	if timetuple is not None: | ||||||
| 		timestamp = email.utils.mktime_tz(timetuple) | 		timestamp = email.utils.mktime_tz(timetuple) | ||||||
| 	return timestamp | 	return timestamp | ||||||
| 	 |  | ||||||
| def sanitize_filename(s): | def sanitize_filename(s, restricted=False): | ||||||
| 	"""Sanitizes a string so it could be used as part of a filename.""" | 	"""Sanitizes a string so it could be used as part of a filename. | ||||||
|  | 	If restricted is set, use a stricter subset of allowed characters. | ||||||
|  | 	""" | ||||||
| 	def replace_insane(char): | 	def replace_insane(char): | ||||||
| 		if char == '?' or ord(char) < 32 or ord(char) == 127: | 		if char == '?' or ord(char) < 32 or ord(char) == 127: | ||||||
| 			return '' | 			return '' | ||||||
| 		elif char == '"': | 		elif char == '"': | ||||||
| 			return '\'' | 			return '' if restricted else 'FOO\'' | ||||||
| 		elif char == ':': | 		elif char == ':': | ||||||
| 			return ' -' | 			return '_-' if restricted else ' -' | ||||||
| 		elif char in '\\/|*<>': | 		elif char in '\\/|*<>': | ||||||
| 			return '-' | 			return '-' | ||||||
|  | 		if restricted and (char in '&\'' or char.isspace()): | ||||||
|  | 			return '_' | ||||||
| 		return char | 		return char | ||||||
|  |  | ||||||
| 	result = u''.join(map(replace_insane, s)) | 	result = u''.join(map(replace_insane, s)) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Philipp Hagemeister
					Philipp Hagemeister