diff --git a/README.md b/README.md index e267760d62..14acddbd00 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,8 @@ which means you can modify it, redistribute it or use it however you like. %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout. + --restrict-filenames Avoid some characters such as "&" and spaces in + filenames -a, --batch-file FILE file containing URLs to download ('-' for stdin) -w, --no-overwrites do not overwrite files -c, --continue resume partially downloaded files diff --git a/test/test_utils.py b/test/test_utils.py index e7c6d5b3d7..0a435ddc54 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -30,11 +30,34 @@ class TestUtil(unittest.TestCase): self.assertEqual(u'yes no', sanitize_filename(u'yes? no')) self.assertEqual(u'this - that', sanitize_filename(u'this: that')) + self.assertEqual(sanitize_filename(u'AT&T'), u'AT&T') self.assertEqual(sanitize_filename(u'ä'), u'ä') self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица') - for forbidden in u'"\0\\/': - self.assertTrue(forbidden not in sanitize_filename(forbidden)) + forbidden = u'"\0\\/' + for fc in forbidden: + for fbc in forbidden: + self.assertTrue(fbc not in sanitize_filename(fc)) + + def test_sanitize_filename_restricted(self): + self.assertEqual(sanitize_filename(u'abc', restricted=True), u'abc') + self.assertEqual(sanitize_filename(u'abc_d-e', restricted=True), u'abc_d-e') + + self.assertEqual(sanitize_filename(u'123', restricted=True), u'123') + + self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True)) + self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True)) + + self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True)) + self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True)) + self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True)) + self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True)) + + forbidden = u'"\0\\/&: \'\t\n' + for fc in forbidden: + print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True))) + for fbc in forbidden: + self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) def test_ordered_set(self): self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7]) diff --git a/youtube-dl.1 b/youtube-dl.1 index cfaefd0c8d..64120a8d24 100644 --- a/youtube-dl.1 +++ b/youtube-dl.1 @@ -59,6 +59,8 @@ redistribute it or use it however you like. \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe, \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout. +--restrict-filenames\ \ \ \ \ Avoid\ some\ characters\ such\ as\ "&"\ and\ spaces\ in +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames -a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin) -w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files -c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files @@ -210,7 +212,7 @@ Please note that Python 2.5 is not supported anymore. .PP Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to \f[C]youtube-dl.zip\f[] first on -some systems) or clone the git repo to see the code. +some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the \f[C]__main__.py\f[] file. To recompile the executable, run \f[C]make\ youtube-dl\f[]. diff --git a/youtube-dl.bash-completion b/youtube-dl.bash-completion index 76451a2b27..dee191cd41 100644 --- a/youtube-dl.bash-completion +++ b/youtube-dl.bash-completion @@ -3,7 +3,7 @@ __youtube-dl() local cur prev opts COMPREPLY=() cur="${COMP_WORDS[COMP_CWORD]}" - opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt" + opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt" if [[ ${cur} == * ]] ; then COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 37a842cdd1..4c79be4325 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -44,37 +44,38 @@ class FileDownloader(object): Available options: - username: Username for authentication purposes. - password: Password for authentication purposes. - usenetrc: Use netrc for authentication instead. - quiet: Do not print messages to stdout. - forceurl: Force printing final URL. - forcetitle: Force printing title. - forcethumbnail: Force printing thumbnail URL. - forcedescription: Force printing description. - forcefilename: Force printing final filename. - simulate: Do not download the video files. - format: Video format code. - format_limit: Highest quality format to try. - outtmpl: Template for output names. - ignoreerrors: Do not stop on download errors. - ratelimit: Download speed limit, in bytes/sec. - nooverwrites: Prevent overwriting files. - retries: Number of times to retry for HTTP error 5xx - continuedl: Try to continue downloads if possible. - noprogress: Do not print the progress bar. - playliststart: Playlist item to start at. - playlistend: Playlist item to end at. - matchtitle: Download only matching titles. - rejecttitle: Reject downloads for matching titles. - logtostderr: Log messages to stderr instead of stdout. - consoletitle: Display progress in console window's titlebar. - nopart: Do not use temporary .part files. - updatetime: Use the Last-modified header to set output file timestamps. - writedescription: Write the video description to a .description file - writeinfojson: Write the video description to a .info.json file - writesubtitles: Write the video subtitles to a .srt file - subtitleslang: Language of the subtitles to download + username: Username for authentication purposes. + password: Password for authentication purposes. + usenetrc: Use netrc for authentication instead. + quiet: Do not print messages to stdout. + forceurl: Force printing final URL. + forcetitle: Force printing title. + forcethumbnail: Force printing thumbnail URL. + forcedescription: Force printing description. + forcefilename: Force printing final filename. + simulate: Do not download the video files. + format: Video format code. + format_limit: Highest quality format to try. + outtmpl: Template for output names. + restrictfilenames: Do not allow "&" and spaces in file names + ignoreerrors: Do not stop on download errors. + ratelimit: Download speed limit, in bytes/sec. + nooverwrites: Prevent overwriting files. + retries: Number of times to retry for HTTP error 5xx + continuedl: Try to continue downloads if possible. + noprogress: Do not print the progress bar. + playliststart: Playlist item to start at. + playlistend: Playlist item to end at. + matchtitle: Download only matching titles. + rejecttitle: Reject downloads for matching titles. + logtostderr: Log messages to stderr instead of stdout. + consoletitle: Display progress in console window's titlebar. + nopart: Do not use temporary .part files. + updatetime: Use the Last-modified header to set output file timestamps. + writedescription: Write the video description to a .description file + writeinfojson: Write the video description to a .info.json file + writesubtitles: Write the video subtitles to a .srt file + subtitleslang: Language of the subtitles to download """ params = None @@ -349,7 +350,7 @@ class FileDownloader(object): def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" - info_dict['stitle'] = sanitize_filename(info_dict['title']) + info_dict['stitle'] = sanitize_filename(info_dict['title'], self.params.get('restrictfilenames')) reason = self._match_entry(info_dict) if reason is not None: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index bf4b55f481..1109e05cd4 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -272,6 +272,9 @@ def parseOpts(): help='number downloaded files starting from 00000', default=False) filesystem.add_option('-o', '--output', dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.') + filesystem.add_option('--restrict-filenames', + action='store_true', dest='restrictfilenames', + help='Avoid some characters such as "&" and spaces in filenames', default=False) filesystem.add_option('-a', '--batch-file', dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') filesystem.add_option('-w', '--no-overwrites', @@ -485,6 +488,7 @@ def _real_main(): or (opts.useid and u'%(id)s.%(ext)s') or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') or u'%(id)s.%(ext)s'), + 'restrictfilenames': opts.restrictfilenames, 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, 'nooverwrites': opts.nooverwrites, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 658fd2686b..55f2fe02c0 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -194,18 +194,22 @@ def timeconvert(timestr): if timetuple is not None: timestamp = email.utils.mktime_tz(timetuple) return timestamp - -def sanitize_filename(s): - """Sanitizes a string so it could be used as part of a filename.""" + +def sanitize_filename(s, restricted=False): + """Sanitizes a string so it could be used as part of a filename. + If restricted is set, use a stricter subset of allowed characters. + """ def replace_insane(char): if char == '?' or ord(char) < 32 or ord(char) == 127: return '' elif char == '"': - return '\'' + return '' if restricted else 'FOO\'' elif char == ':': - return ' -' + return '_-' if restricted else ' -' elif char in '\\/|*<>': return '-' + if restricted and (char in '&\'' or char.isspace()): + return '_' return char result = u''.join(map(replace_insane, s))