mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 14:45:14 +00:00 
			
		
		
		
	Merge branch 'master' into subtitles_rework
This commit is contained in:
		| @@ -32,12 +32,15 @@ tests = [ | |||||||
|     # 83 |     # 83 | ||||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", |     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", | ||||||
|      ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), |      ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), | ||||||
|     # 82 |     # 82 - vflZK4ZYR 2013/08/23 | ||||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", |     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", | ||||||
|      "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"), |      "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"), | ||||||
|     # 81 - vflLC8JvQ 2013/07/25 |     # 81 - vflLC8JvQ 2013/07/25 | ||||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.", |     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.", | ||||||
|      "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), |      "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), | ||||||
|  |     # 80 - vflZK4ZYR 2013/08/23 (sporadic) | ||||||
|  |     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>", | ||||||
|  |      "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>"), | ||||||
|     # 79 - vflLC8JvQ 2013/07/25 (sporadic) |     # 79 - vflLC8JvQ 2013/07/25 (sporadic) | ||||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/", |     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/", | ||||||
|      "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), |      "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), | ||||||
|   | |||||||
| @@ -71,12 +71,17 @@ class FFmpegPostProcessor(PostProcessor): | |||||||
|         programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] |         programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] | ||||||
|         return dict((program, executable(program)) for program in programs) |         return dict((program, executable(program)) for program in programs) | ||||||
|  |  | ||||||
|     def run_ffmpeg(self, path, out_path, opts): |     def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): | ||||||
|         if not self._exes['ffmpeg'] and not self._exes['avconv']: |         if not self._exes['ffmpeg'] and not self._exes['avconv']: | ||||||
|             raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.') |             raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.') | ||||||
|         cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)] |  | ||||||
|  |         files_cmd = [] | ||||||
|  |         for path in input_paths: | ||||||
|  |             files_cmd.extend(['-i', encodeFilename(path)]) | ||||||
|  |         cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd | ||||||
|                + opts + |                + opts + | ||||||
|                [encodeFilename(self._ffmpeg_filename_argument(out_path))]) |                [encodeFilename(self._ffmpeg_filename_argument(out_path))]) | ||||||
|  |  | ||||||
|         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||||
|         stdout,stderr = p.communicate() |         stdout,stderr = p.communicate() | ||||||
|         if p.returncode != 0: |         if p.returncode != 0: | ||||||
| @@ -84,6 +89,9 @@ class FFmpegPostProcessor(PostProcessor): | |||||||
|             msg = stderr.strip().split('\n')[-1] |             msg = stderr.strip().split('\n')[-1] | ||||||
|             raise FFmpegPostProcessorError(msg) |             raise FFmpegPostProcessorError(msg) | ||||||
|  |  | ||||||
|  |     def run_ffmpeg(self, path, out_path, opts): | ||||||
|  |         self.run_ffmpeg_multiple_files([path], out_path, opts) | ||||||
|  |  | ||||||
|     def _ffmpeg_filename_argument(self, fn): |     def _ffmpeg_filename_argument(self, fn): | ||||||
|         # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details |         # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details | ||||||
|         if fn.startswith(u'-'): |         if fn.startswith(u'-'): | ||||||
| @@ -232,3 +240,226 @@ class FFmpegVideoConvertor(FFmpegPostProcessor): | |||||||
|         information['format'] = self._preferedformat |         information['format'] = self._preferedformat | ||||||
|         information['ext'] = self._preferedformat |         information['ext'] = self._preferedformat | ||||||
|         return False,information |         return False,information | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): | ||||||
|  |     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt | ||||||
|  |     _lang_map = { | ||||||
|  |         'aa': 'aar', | ||||||
|  |         'ab': 'abk', | ||||||
|  |         'ae': 'ave', | ||||||
|  |         'af': 'afr', | ||||||
|  |         'ak': 'aka', | ||||||
|  |         'am': 'amh', | ||||||
|  |         'an': 'arg', | ||||||
|  |         'ar': 'ara', | ||||||
|  |         'as': 'asm', | ||||||
|  |         'av': 'ava', | ||||||
|  |         'ay': 'aym', | ||||||
|  |         'az': 'aze', | ||||||
|  |         'ba': 'bak', | ||||||
|  |         'be': 'bel', | ||||||
|  |         'bg': 'bul', | ||||||
|  |         'bh': 'bih', | ||||||
|  |         'bi': 'bis', | ||||||
|  |         'bm': 'bam', | ||||||
|  |         'bn': 'ben', | ||||||
|  |         'bo': 'bod', | ||||||
|  |         'br': 'bre', | ||||||
|  |         'bs': 'bos', | ||||||
|  |         'ca': 'cat', | ||||||
|  |         'ce': 'che', | ||||||
|  |         'ch': 'cha', | ||||||
|  |         'co': 'cos', | ||||||
|  |         'cr': 'cre', | ||||||
|  |         'cs': 'ces', | ||||||
|  |         'cu': 'chu', | ||||||
|  |         'cv': 'chv', | ||||||
|  |         'cy': 'cym', | ||||||
|  |         'da': 'dan', | ||||||
|  |         'de': 'deu', | ||||||
|  |         'dv': 'div', | ||||||
|  |         'dz': 'dzo', | ||||||
|  |         'ee': 'ewe', | ||||||
|  |         'el': 'ell', | ||||||
|  |         'en': 'eng', | ||||||
|  |         'eo': 'epo', | ||||||
|  |         'es': 'spa', | ||||||
|  |         'et': 'est', | ||||||
|  |         'eu': 'eus', | ||||||
|  |         'fa': 'fas', | ||||||
|  |         'ff': 'ful', | ||||||
|  |         'fi': 'fin', | ||||||
|  |         'fj': 'fij', | ||||||
|  |         'fo': 'fao', | ||||||
|  |         'fr': 'fra', | ||||||
|  |         'fy': 'fry', | ||||||
|  |         'ga': 'gle', | ||||||
|  |         'gd': 'gla', | ||||||
|  |         'gl': 'glg', | ||||||
|  |         'gn': 'grn', | ||||||
|  |         'gu': 'guj', | ||||||
|  |         'gv': 'glv', | ||||||
|  |         'ha': 'hau', | ||||||
|  |         'he': 'heb', | ||||||
|  |         'hi': 'hin', | ||||||
|  |         'ho': 'hmo', | ||||||
|  |         'hr': 'hrv', | ||||||
|  |         'ht': 'hat', | ||||||
|  |         'hu': 'hun', | ||||||
|  |         'hy': 'hye', | ||||||
|  |         'hz': 'her', | ||||||
|  |         'ia': 'ina', | ||||||
|  |         'id': 'ind', | ||||||
|  |         'ie': 'ile', | ||||||
|  |         'ig': 'ibo', | ||||||
|  |         'ii': 'iii', | ||||||
|  |         'ik': 'ipk', | ||||||
|  |         'io': 'ido', | ||||||
|  |         'is': 'isl', | ||||||
|  |         'it': 'ita', | ||||||
|  |         'iu': 'iku', | ||||||
|  |         'ja': 'jpn', | ||||||
|  |         'jv': 'jav', | ||||||
|  |         'ka': 'kat', | ||||||
|  |         'kg': 'kon', | ||||||
|  |         'ki': 'kik', | ||||||
|  |         'kj': 'kua', | ||||||
|  |         'kk': 'kaz', | ||||||
|  |         'kl': 'kal', | ||||||
|  |         'km': 'khm', | ||||||
|  |         'kn': 'kan', | ||||||
|  |         'ko': 'kor', | ||||||
|  |         'kr': 'kau', | ||||||
|  |         'ks': 'kas', | ||||||
|  |         'ku': 'kur', | ||||||
|  |         'kv': 'kom', | ||||||
|  |         'kw': 'cor', | ||||||
|  |         'ky': 'kir', | ||||||
|  |         'la': 'lat', | ||||||
|  |         'lb': 'ltz', | ||||||
|  |         'lg': 'lug', | ||||||
|  |         'li': 'lim', | ||||||
|  |         'ln': 'lin', | ||||||
|  |         'lo': 'lao', | ||||||
|  |         'lt': 'lit', | ||||||
|  |         'lu': 'lub', | ||||||
|  |         'lv': 'lav', | ||||||
|  |         'mg': 'mlg', | ||||||
|  |         'mh': 'mah', | ||||||
|  |         'mi': 'mri', | ||||||
|  |         'mk': 'mkd', | ||||||
|  |         'ml': 'mal', | ||||||
|  |         'mn': 'mon', | ||||||
|  |         'mr': 'mar', | ||||||
|  |         'ms': 'msa', | ||||||
|  |         'mt': 'mlt', | ||||||
|  |         'my': 'mya', | ||||||
|  |         'na': 'nau', | ||||||
|  |         'nb': 'nob', | ||||||
|  |         'nd': 'nde', | ||||||
|  |         'ne': 'nep', | ||||||
|  |         'ng': 'ndo', | ||||||
|  |         'nl': 'nld', | ||||||
|  |         'nn': 'nno', | ||||||
|  |         'no': 'nor', | ||||||
|  |         'nr': 'nbl', | ||||||
|  |         'nv': 'nav', | ||||||
|  |         'ny': 'nya', | ||||||
|  |         'oc': 'oci', | ||||||
|  |         'oj': 'oji', | ||||||
|  |         'om': 'orm', | ||||||
|  |         'or': 'ori', | ||||||
|  |         'os': 'oss', | ||||||
|  |         'pa': 'pan', | ||||||
|  |         'pi': 'pli', | ||||||
|  |         'pl': 'pol', | ||||||
|  |         'ps': 'pus', | ||||||
|  |         'pt': 'por', | ||||||
|  |         'qu': 'que', | ||||||
|  |         'rm': 'roh', | ||||||
|  |         'rn': 'run', | ||||||
|  |         'ro': 'ron', | ||||||
|  |         'ru': 'rus', | ||||||
|  |         'rw': 'kin', | ||||||
|  |         'sa': 'san', | ||||||
|  |         'sc': 'srd', | ||||||
|  |         'sd': 'snd', | ||||||
|  |         'se': 'sme', | ||||||
|  |         'sg': 'sag', | ||||||
|  |         'si': 'sin', | ||||||
|  |         'sk': 'slk', | ||||||
|  |         'sl': 'slv', | ||||||
|  |         'sm': 'smo', | ||||||
|  |         'sn': 'sna', | ||||||
|  |         'so': 'som', | ||||||
|  |         'sq': 'sqi', | ||||||
|  |         'sr': 'srp', | ||||||
|  |         'ss': 'ssw', | ||||||
|  |         'st': 'sot', | ||||||
|  |         'su': 'sun', | ||||||
|  |         'sv': 'swe', | ||||||
|  |         'sw': 'swa', | ||||||
|  |         'ta': 'tam', | ||||||
|  |         'te': 'tel', | ||||||
|  |         'tg': 'tgk', | ||||||
|  |         'th': 'tha', | ||||||
|  |         'ti': 'tir', | ||||||
|  |         'tk': 'tuk', | ||||||
|  |         'tl': 'tgl', | ||||||
|  |         'tn': 'tsn', | ||||||
|  |         'to': 'ton', | ||||||
|  |         'tr': 'tur', | ||||||
|  |         'ts': 'tso', | ||||||
|  |         'tt': 'tat', | ||||||
|  |         'tw': 'twi', | ||||||
|  |         'ty': 'tah', | ||||||
|  |         'ug': 'uig', | ||||||
|  |         'uk': 'ukr', | ||||||
|  |         'ur': 'urd', | ||||||
|  |         'uz': 'uzb', | ||||||
|  |         've': 'ven', | ||||||
|  |         'vi': 'vie', | ||||||
|  |         'vo': 'vol', | ||||||
|  |         'wa': 'wln', | ||||||
|  |         'wo': 'wol', | ||||||
|  |         'xh': 'xho', | ||||||
|  |         'yi': 'yid', | ||||||
|  |         'yo': 'yor', | ||||||
|  |         'za': 'zha', | ||||||
|  |         'zh': 'zho', | ||||||
|  |         'zu': 'zul', | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     def __init__(self, downloader=None, subtitlesformat='srt'): | ||||||
|  |         super(FFmpegEmbedSubtitlePP, self).__init__(downloader) | ||||||
|  |         self._subformat = subtitlesformat | ||||||
|  |  | ||||||
|  |     @classmethod | ||||||
|  |     def _conver_lang_code(cls, code): | ||||||
|  |         """Convert language code from ISO 639-1 to ISO 639-2/T""" | ||||||
|  |         return cls._lang_map.get(code[:2]) | ||||||
|  |  | ||||||
|  |     def run(self, information): | ||||||
|  |         if information['ext'] != u'mp4': | ||||||
|  |             self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files') | ||||||
|  |             return True, information | ||||||
|  |         sub_langs = [key for key in information['subtitles']] | ||||||
|  |  | ||||||
|  |         filename = information['filepath'] | ||||||
|  |         input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs] | ||||||
|  |  | ||||||
|  |         opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy'] | ||||||
|  |         for (i, lang) in enumerate(sub_langs): | ||||||
|  |             opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text']) | ||||||
|  |             lang_code = self._conver_lang_code(lang) | ||||||
|  |             if lang_code is not None: | ||||||
|  |                 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code]) | ||||||
|  |         opts.extend(['-f', 'mp4']) | ||||||
|  |  | ||||||
|  |         temp_filename = filename + u'.temp' | ||||||
|  |         self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) | ||||||
|  |         os.remove(encodeFilename(filename)) | ||||||
|  |         os.rename(encodeFilename(temp_filename), encodeFilename(filename)) | ||||||
|  |  | ||||||
|  |         return True, information | ||||||
|   | |||||||
| @@ -221,16 +221,19 @@ class YoutubeDL(object): | |||||||
|  |  | ||||||
|     def report_writesubtitles(self, sub_filename): |     def report_writesubtitles(self, sub_filename): | ||||||
|         """ Report that the subtitles file is being written """ |         """ Report that the subtitles file is being written """ | ||||||
|         self.to_screen(u'[info] Writing subtitle: ' + sub_filename) |         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename) | ||||||
|  |  | ||||||
|     def report_existingsubtitles(self, sub_filename): |  | ||||||
|         """ Report that the subtitles file has been already written """ |  | ||||||
|         self.to_screen(u'[info] Skipping existing subtitle: ' + sub_filename) |  | ||||||
|  |  | ||||||
|     def report_writeinfojson(self, infofn): |     def report_writeinfojson(self, infofn): | ||||||
|         """ Report that the metadata file has been written """ |         """ Report that the metadata file has been written """ | ||||||
|         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn) |         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn) | ||||||
|  |  | ||||||
|  |     def report_file_already_downloaded(self, file_name): | ||||||
|  |         """Report file has already been fully downloaded.""" | ||||||
|  |         try: | ||||||
|  |             self.to_screen(u'[download] %s has already been downloaded' % file_name) | ||||||
|  |         except (UnicodeEncodeError) as err: | ||||||
|  |             self.to_screen(u'[download] The file has already been downloaded') | ||||||
|  |  | ||||||
|     def increment_downloads(self): |     def increment_downloads(self): | ||||||
|         """Increment the ordinal that assigns a number to each file.""" |         """Increment the ordinal that assigns a number to each file.""" | ||||||
|         self._num_downloads += 1 |         self._num_downloads += 1 | ||||||
| @@ -489,16 +492,12 @@ class YoutubeDL(object): | |||||||
|             # that way it will silently go on when used with unsupporting IE |             # that way it will silently go on when used with unsupporting IE | ||||||
|             subtitles = info_dict['subtitles'] |             subtitles = info_dict['subtitles'] | ||||||
|             sub_format = self.params.get('subtitlesformat') |             sub_format = self.params.get('subtitlesformat') | ||||||
|  |  | ||||||
|             for sub_lang in subtitles.keys(): |             for sub_lang in subtitles.keys(): | ||||||
|                 sub = subtitles[sub_lang] |                 sub = subtitles[sub_lang] | ||||||
|                 if sub is None: |                 if sub is None: | ||||||
|                     continue |                     continue | ||||||
|                 try: |                 try: | ||||||
|                     sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format |                     sub_filename = subtitles_filename(filename, sub_lang, sub_format) | ||||||
|                     if os.path.isfile(encodeFilename(sub_filename)): |  | ||||||
|                         self.report_existingsubtitles(sub_filename) |  | ||||||
|                         continue |  | ||||||
|                     self.report_writesubtitles(sub_filename) |                     self.report_writesubtitles(sub_filename) | ||||||
|                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: |                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: | ||||||
|                             subfile.write(sub) |                             subfile.write(sub) | ||||||
|   | |||||||
| @@ -320,6 +320,8 @@ def parseOpts(overrideArguments=None): | |||||||
|             help='keeps the video file on disk after the post-processing; the video is erased by default') |             help='keeps the video file on disk after the post-processing; the video is erased by default') | ||||||
|     postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, |     postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, | ||||||
|             help='do not overwrite post-processed files; the post-processed files are overwritten by default') |             help='do not overwrite post-processed files; the post-processed files are overwritten by default') | ||||||
|  |     postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False, | ||||||
|  |             help='embed subtitles in the video (only for mp4 videos)') | ||||||
|  |  | ||||||
|  |  | ||||||
|     parser.add_option_group(general) |     parser.add_option_group(general) | ||||||
| @@ -608,6 +610,8 @@ def _real_main(argv=None): | |||||||
|         ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites)) |         ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites)) | ||||||
|     if opts.recodevideo: |     if opts.recodevideo: | ||||||
|         ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo)) |         ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo)) | ||||||
|  |     if opts.embedsubtitles: | ||||||
|  |         ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat)) | ||||||
|  |  | ||||||
|     # Update version |     # Update version | ||||||
|     if opts.update_self: |     if opts.update_self: | ||||||
|   | |||||||
| @@ -1,5 +1,6 @@ | |||||||
| import re | import re | ||||||
| import json | import json | ||||||
|  | import itertools | ||||||
| import socket | import socket | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| @@ -33,7 +34,7 @@ class DailyMotionSubtitlesIE(NoAutoSubtitlesIE): | |||||||
|         self._downloader.report_warning(u'video doesn\'t have subtitles') |         self._downloader.report_warning(u'video doesn\'t have subtitles') | ||||||
|         return {} |         return {} | ||||||
|  |  | ||||||
| class DailymotionIE(DailyMotionSubtitlesIE): | class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor): | ||||||
|     """Information Extractor for Dailymotion""" |     """Information Extractor for Dailymotion""" | ||||||
|  |  | ||||||
|     _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' |     _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' | ||||||
| @@ -43,7 +44,7 @@ class DailymotionIE(DailyMotionSubtitlesIE): | |||||||
|         u'file': u'x33vw9.mp4', |         u'file': u'x33vw9.mp4', | ||||||
|         u'md5': u'392c4b85a60a90dc4792da41ce3144eb', |         u'md5': u'392c4b85a60a90dc4792da41ce3144eb', | ||||||
|         u'info_dict': { |         u'info_dict': { | ||||||
|             u"uploader": u"Alex and Van .", |             u"uploader": u"Alex and Van .",  | ||||||
|             u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" |             u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -85,9 +86,9 @@ class DailymotionIE(DailyMotionSubtitlesIE): | |||||||
|         for key in ['stream_h264_hd1080_url','stream_h264_hd_url', |         for key in ['stream_h264_hd1080_url','stream_h264_hd_url', | ||||||
|                     'stream_h264_hq_url','stream_h264_url', |                     'stream_h264_hq_url','stream_h264_url', | ||||||
|                     'stream_h264_ld_url']: |                     'stream_h264_ld_url']: | ||||||
|             if info.get(key):  # key in info and info[key]: |             if info.get(key):#key in info and info[key]: | ||||||
|                 max_quality = key |                 max_quality = key | ||||||
|                 self.to_screen(u'%s: Using %s' % (video_id, key)) |                 self.to_screen(u'Using %s' % key) | ||||||
|                 break |                 break | ||||||
|         else: |         else: | ||||||
|             raise ExtractorError(u'Unable to extract video URL') |             raise ExtractorError(u'Unable to extract video URL') | ||||||
|   | |||||||
| @@ -24,6 +24,112 @@ from ..utils import ( | |||||||
|     orderedSet, |     orderedSet, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | class YoutubeBaseInfoExtractor(InfoExtractor): | ||||||
|  |     """Provide base functions for Youtube extractors""" | ||||||
|  |     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' | ||||||
|  |     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' | ||||||
|  |     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' | ||||||
|  |     _NETRC_MACHINE = 'youtube' | ||||||
|  |     # If True it will raise an error if no login info is provided | ||||||
|  |     _LOGIN_REQUIRED = False | ||||||
|  |  | ||||||
|  |     def report_lang(self): | ||||||
|  |         """Report attempt to set language.""" | ||||||
|  |         self.to_screen(u'Setting language') | ||||||
|  |  | ||||||
|  |     def _set_language(self): | ||||||
|  |         request = compat_urllib_request.Request(self._LANG_URL) | ||||||
|  |         try: | ||||||
|  |             self.report_lang() | ||||||
|  |             compat_urllib_request.urlopen(request).read() | ||||||
|  |         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||||
|  |             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err)) | ||||||
|  |             return False | ||||||
|  |         return True | ||||||
|  |  | ||||||
|  |     def _login(self): | ||||||
|  |         (username, password) = self._get_login_info() | ||||||
|  |         # No authentication to be performed | ||||||
|  |         if username is None: | ||||||
|  |             if self._LOGIN_REQUIRED: | ||||||
|  |                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True) | ||||||
|  |             return False | ||||||
|  |  | ||||||
|  |         request = compat_urllib_request.Request(self._LOGIN_URL) | ||||||
|  |         try: | ||||||
|  |             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||||
|  |         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||||
|  |             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err)) | ||||||
|  |             return False | ||||||
|  |  | ||||||
|  |         galx = None | ||||||
|  |         dsh = None | ||||||
|  |         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page) | ||||||
|  |         if match: | ||||||
|  |           galx = match.group(1) | ||||||
|  |         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page) | ||||||
|  |         if match: | ||||||
|  |           dsh = match.group(1) | ||||||
|  |  | ||||||
|  |         # Log in | ||||||
|  |         login_form_strs = { | ||||||
|  |                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', | ||||||
|  |                 u'Email': username, | ||||||
|  |                 u'GALX': galx, | ||||||
|  |                 u'Passwd': password, | ||||||
|  |                 u'PersistentCookie': u'yes', | ||||||
|  |                 u'_utf8': u'霱', | ||||||
|  |                 u'bgresponse': u'js_disabled', | ||||||
|  |                 u'checkConnection': u'', | ||||||
|  |                 u'checkedDomains': u'youtube', | ||||||
|  |                 u'dnConn': u'', | ||||||
|  |                 u'dsh': dsh, | ||||||
|  |                 u'pstMsg': u'0', | ||||||
|  |                 u'rmShown': u'1', | ||||||
|  |                 u'secTok': u'', | ||||||
|  |                 u'signIn': u'Sign in', | ||||||
|  |                 u'timeStmp': u'', | ||||||
|  |                 u'service': u'youtube', | ||||||
|  |                 u'uilel': u'3', | ||||||
|  |                 u'hl': u'en_US', | ||||||
|  |         } | ||||||
|  |         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode | ||||||
|  |         # chokes on unicode | ||||||
|  |         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items()) | ||||||
|  |         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii') | ||||||
|  |         request = compat_urllib_request.Request(self._LOGIN_URL, login_data) | ||||||
|  |         try: | ||||||
|  |             self.report_login() | ||||||
|  |             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||||
|  |             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None: | ||||||
|  |                 self._downloader.report_warning(u'unable to log in: bad username or password') | ||||||
|  |                 return False | ||||||
|  |         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||||
|  |             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err)) | ||||||
|  |             return False | ||||||
|  |         return True | ||||||
|  |  | ||||||
|  |     def _confirm_age(self): | ||||||
|  |         age_form = { | ||||||
|  |                 'next_url':     '/', | ||||||
|  |                 'action_confirm':   'Confirm', | ||||||
|  |                 } | ||||||
|  |         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) | ||||||
|  |         try: | ||||||
|  |             self.report_age_confirmation() | ||||||
|  |             compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||||
|  |         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||||
|  |             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err)) | ||||||
|  |         return True | ||||||
|  |  | ||||||
|  |     def _real_initialize(self): | ||||||
|  |         if self._downloader is None: | ||||||
|  |             return | ||||||
|  |         if not self._set_language(): | ||||||
|  |             return | ||||||
|  |         if not self._login(): | ||||||
|  |             return | ||||||
|  |         self._confirm_age() | ||||||
|  |  | ||||||
| class YoutubeSubtitlesIE(SubtitlesIE): | class YoutubeSubtitlesIE(SubtitlesIE): | ||||||
|  |  | ||||||
| @@ -83,8 +189,7 @@ class YoutubeSubtitlesIE(SubtitlesIE): | |||||||
|             self._downloader.report_warning(err_msg) |             self._downloader.report_warning(err_msg) | ||||||
|             return {} |             return {} | ||||||
|  |  | ||||||
|  | class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor): | ||||||
| class YoutubeIE(YoutubeSubtitlesIE): |  | ||||||
|     IE_DESC = u'YouTube.com' |     IE_DESC = u'YouTube.com' | ||||||
|     _VALID_URL = r"""^ |     _VALID_URL = r"""^ | ||||||
|                      ( |                      ( | ||||||
| @@ -95,7 +200,7 @@ class YoutubeIE(YoutubeSubtitlesIE): | |||||||
|                          (?:                                                  # the various things that can precede the ID: |                          (?:                                                  # the various things that can precede the ID: | ||||||
|                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ |                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ | ||||||
|                              |(?:                                             # or the v= param in all its forms |                              |(?:                                             # or the v= param in all its forms | ||||||
|                                  (?:watch|movie(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx) |                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx) | ||||||
|                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #! |                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #! | ||||||
|                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx) |                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx) | ||||||
|                                  v= |                                  v= | ||||||
| @@ -375,6 +480,8 @@ class YoutubeIE(YoutubeSubtitlesIE): | |||||||
|             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82] |             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82] | ||||||
|         elif len(s) == 81: |         elif len(s) == 81: | ||||||
|             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] |             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] | ||||||
|  |         elif len(s) == 80: | ||||||
|  |             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80] | ||||||
|         elif len(s) == 79: |         elif len(s) == 79: | ||||||
|             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] |             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] | ||||||
|  |  | ||||||
| @@ -390,105 +497,6 @@ class YoutubeIE(YoutubeSubtitlesIE): | |||||||
|             # Fallback to the other algortihms |             # Fallback to the other algortihms | ||||||
|             return self._decrypt_signature(s) |             return self._decrypt_signature(s) | ||||||
|  |  | ||||||
|  |  | ||||||
|     def _get_available_subtitles(self, video_id): |  | ||||||
|         self.report_video_subtitles_download(video_id) |  | ||||||
|         request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) |  | ||||||
|         try: |  | ||||||
|             sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') |  | ||||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: |  | ||||||
|             return (u'unable to download video subtitles: %s' % compat_str(err), None) |  | ||||||
|         sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) |  | ||||||
|         sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list) |  | ||||||
|         if not sub_lang_list: |  | ||||||
|             return (u'video doesn\'t have subtitles', None) |  | ||||||
|         return sub_lang_list |  | ||||||
|  |  | ||||||
|     def _list_available_subtitles(self, video_id): |  | ||||||
|         sub_lang_list = self._get_available_subtitles(video_id) |  | ||||||
|         self.report_video_subtitles_available(video_id, sub_lang_list) |  | ||||||
|  |  | ||||||
|     def _request_subtitle(self, sub_lang, sub_name, video_id, format): |  | ||||||
|         """ |  | ||||||
|         Return tuple: |  | ||||||
|         (error_message, sub_lang, sub) |  | ||||||
|         """ |  | ||||||
|         self.report_video_subtitles_request(video_id, sub_lang, format) |  | ||||||
|         params = compat_urllib_parse.urlencode({ |  | ||||||
|             'lang': sub_lang, |  | ||||||
|             'name': sub_name, |  | ||||||
|             'v': video_id, |  | ||||||
|             'fmt': format, |  | ||||||
|         }) |  | ||||||
|         url = 'http://www.youtube.com/api/timedtext?' + params |  | ||||||
|         try: |  | ||||||
|             sub = compat_urllib_request.urlopen(url).read().decode('utf-8') |  | ||||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: |  | ||||||
|             return (u'unable to download video subtitles: %s' % compat_str(err), None, None) |  | ||||||
|         if not sub: |  | ||||||
|             return (u'Did not fetch video subtitles', None, None) |  | ||||||
|         return (None, sub_lang, sub) |  | ||||||
|  |  | ||||||
|     def _request_automatic_caption(self, video_id, webpage): |  | ||||||
|         """We need the webpage for getting the captions url, pass it as an |  | ||||||
|            argument to speed up the process.""" |  | ||||||
|         sub_lang = self._downloader.params.get('subtitleslang') or 'en' |  | ||||||
|         sub_format = self._downloader.params.get('subtitlesformat') |  | ||||||
|         self.to_screen(u'%s: Looking for automatic captions' % video_id) |  | ||||||
|         mobj = re.search(r';ytplayer.config = ({.*?});', webpage) |  | ||||||
|         err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang |  | ||||||
|         if mobj is None: |  | ||||||
|             return [(err_msg, None, None)] |  | ||||||
|         player_config = json.loads(mobj.group(1)) |  | ||||||
|         try: |  | ||||||
|             args = player_config[u'args'] |  | ||||||
|             caption_url = args[u'ttsurl'] |  | ||||||
|             timestamp = args[u'timestamp'] |  | ||||||
|             params = compat_urllib_parse.urlencode({ |  | ||||||
|                 'lang': 'en', |  | ||||||
|                 'tlang': sub_lang, |  | ||||||
|                 'fmt': sub_format, |  | ||||||
|                 'ts': timestamp, |  | ||||||
|                 'kind': 'asr', |  | ||||||
|             }) |  | ||||||
|             subtitles_url = caption_url + '&' + params |  | ||||||
|             sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') |  | ||||||
|             return [(None, sub_lang, sub)] |  | ||||||
|         except KeyError: |  | ||||||
|             return [(err_msg, None, None)] |  | ||||||
|  |  | ||||||
|     def _extract_subtitle(self, video_id): |  | ||||||
|         """ |  | ||||||
|         Return a list with a tuple: |  | ||||||
|         [(error_message, sub_lang, sub)] |  | ||||||
|         """ |  | ||||||
|         sub_lang_list = self._get_available_subtitles(video_id) |  | ||||||
|         sub_format = self._downloader.params.get('subtitlesformat') |  | ||||||
|         if  isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles |  | ||||||
|             return [(sub_lang_list[0], None, None)] |  | ||||||
|         if self._downloader.params.get('subtitleslang', False): |  | ||||||
|             sub_lang = self._downloader.params.get('subtitleslang') |  | ||||||
|         elif 'en' in sub_lang_list: |  | ||||||
|             sub_lang = 'en' |  | ||||||
|         else: |  | ||||||
|             sub_lang = list(sub_lang_list.keys())[0] |  | ||||||
|         if not sub_lang in sub_lang_list: |  | ||||||
|             return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)] |  | ||||||
|  |  | ||||||
|         subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) |  | ||||||
|         return [subtitle] |  | ||||||
|  |  | ||||||
|     def _extract_all_subtitles(self, video_id): |  | ||||||
|         sub_lang_list = self._get_available_subtitles(video_id) |  | ||||||
|         sub_format = self._downloader.params.get('subtitlesformat') |  | ||||||
|         if  isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles |  | ||||||
|             return [(sub_lang_list[0], None, None)] |  | ||||||
|         subtitles = [] |  | ||||||
|         for sub_lang in sub_lang_list: |  | ||||||
|             subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) |  | ||||||
|             subtitles.append(subtitle) |  | ||||||
|         return subtitles |  | ||||||
|  |  | ||||||
|     def _print_formats(self, formats): |     def _print_formats(self, formats): | ||||||
|         print('Available formats:') |         print('Available formats:') | ||||||
|         for x in formats: |         for x in formats: | ||||||
|   | |||||||
| @@ -657,6 +657,9 @@ def determine_ext(url, default_ext=u'unknown_video'): | |||||||
|     else: |     else: | ||||||
|         return default_ext |         return default_ext | ||||||
|  |  | ||||||
|  | def subtitles_filename(filename, sub_lang, sub_format): | ||||||
|  |     return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format | ||||||
|  |  | ||||||
| def date_from_str(date_str): | def date_from_str(date_str): | ||||||
|     """ |     """ | ||||||
|     Return a datetime object from a string in the format YYYYMMDD or |     Return a datetime object from a string in the format YYYYMMDD or | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Ismael Mejia
					Ismael Mejia