mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-31 06:35:12 +00:00 
			
		
		
		
	Merge branch 'master' into subtitles_rework
This commit is contained in:
		| @@ -32,12 +32,15 @@ tests = [ | ||||
|     # 83 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", | ||||
|      ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"), | ||||
|     # 82 | ||||
|     # 82 - vflZK4ZYR 2013/08/23 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", | ||||
|      "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"), | ||||
|     # 81 - vflLC8JvQ 2013/07/25 | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.", | ||||
|      "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), | ||||
|     # 80 - vflZK4ZYR 2013/08/23 (sporadic) | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>", | ||||
|      "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>"), | ||||
|     # 79 - vflLC8JvQ 2013/07/25 (sporadic) | ||||
|     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/", | ||||
|      "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), | ||||
|   | ||||
| @@ -71,12 +71,17 @@ class FFmpegPostProcessor(PostProcessor): | ||||
|         programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] | ||||
|         return dict((program, executable(program)) for program in programs) | ||||
|  | ||||
|     def run_ffmpeg(self, path, out_path, opts): | ||||
|     def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): | ||||
|         if not self._exes['ffmpeg'] and not self._exes['avconv']: | ||||
|             raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.') | ||||
|         cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)] | ||||
|  | ||||
|         files_cmd = [] | ||||
|         for path in input_paths: | ||||
|             files_cmd.extend(['-i', encodeFilename(path)]) | ||||
|         cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd | ||||
|                + opts + | ||||
|                [encodeFilename(self._ffmpeg_filename_argument(out_path))]) | ||||
|  | ||||
|         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
|         stdout,stderr = p.communicate() | ||||
|         if p.returncode != 0: | ||||
| @@ -84,6 +89,9 @@ class FFmpegPostProcessor(PostProcessor): | ||||
|             msg = stderr.strip().split('\n')[-1] | ||||
|             raise FFmpegPostProcessorError(msg) | ||||
|  | ||||
|     def run_ffmpeg(self, path, out_path, opts): | ||||
|         self.run_ffmpeg_multiple_files([path], out_path, opts) | ||||
|  | ||||
|     def _ffmpeg_filename_argument(self, fn): | ||||
|         # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details | ||||
|         if fn.startswith(u'-'): | ||||
| @@ -232,3 +240,226 @@ class FFmpegVideoConvertor(FFmpegPostProcessor): | ||||
|         information['format'] = self._preferedformat | ||||
|         information['ext'] = self._preferedformat | ||||
|         return False,information | ||||
|  | ||||
|  | ||||
| class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): | ||||
|     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt | ||||
|     _lang_map = { | ||||
|         'aa': 'aar', | ||||
|         'ab': 'abk', | ||||
|         'ae': 'ave', | ||||
|         'af': 'afr', | ||||
|         'ak': 'aka', | ||||
|         'am': 'amh', | ||||
|         'an': 'arg', | ||||
|         'ar': 'ara', | ||||
|         'as': 'asm', | ||||
|         'av': 'ava', | ||||
|         'ay': 'aym', | ||||
|         'az': 'aze', | ||||
|         'ba': 'bak', | ||||
|         'be': 'bel', | ||||
|         'bg': 'bul', | ||||
|         'bh': 'bih', | ||||
|         'bi': 'bis', | ||||
|         'bm': 'bam', | ||||
|         'bn': 'ben', | ||||
|         'bo': 'bod', | ||||
|         'br': 'bre', | ||||
|         'bs': 'bos', | ||||
|         'ca': 'cat', | ||||
|         'ce': 'che', | ||||
|         'ch': 'cha', | ||||
|         'co': 'cos', | ||||
|         'cr': 'cre', | ||||
|         'cs': 'ces', | ||||
|         'cu': 'chu', | ||||
|         'cv': 'chv', | ||||
|         'cy': 'cym', | ||||
|         'da': 'dan', | ||||
|         'de': 'deu', | ||||
|         'dv': 'div', | ||||
|         'dz': 'dzo', | ||||
|         'ee': 'ewe', | ||||
|         'el': 'ell', | ||||
|         'en': 'eng', | ||||
|         'eo': 'epo', | ||||
|         'es': 'spa', | ||||
|         'et': 'est', | ||||
|         'eu': 'eus', | ||||
|         'fa': 'fas', | ||||
|         'ff': 'ful', | ||||
|         'fi': 'fin', | ||||
|         'fj': 'fij', | ||||
|         'fo': 'fao', | ||||
|         'fr': 'fra', | ||||
|         'fy': 'fry', | ||||
|         'ga': 'gle', | ||||
|         'gd': 'gla', | ||||
|         'gl': 'glg', | ||||
|         'gn': 'grn', | ||||
|         'gu': 'guj', | ||||
|         'gv': 'glv', | ||||
|         'ha': 'hau', | ||||
|         'he': 'heb', | ||||
|         'hi': 'hin', | ||||
|         'ho': 'hmo', | ||||
|         'hr': 'hrv', | ||||
|         'ht': 'hat', | ||||
|         'hu': 'hun', | ||||
|         'hy': 'hye', | ||||
|         'hz': 'her', | ||||
|         'ia': 'ina', | ||||
|         'id': 'ind', | ||||
|         'ie': 'ile', | ||||
|         'ig': 'ibo', | ||||
|         'ii': 'iii', | ||||
|         'ik': 'ipk', | ||||
|         'io': 'ido', | ||||
|         'is': 'isl', | ||||
|         'it': 'ita', | ||||
|         'iu': 'iku', | ||||
|         'ja': 'jpn', | ||||
|         'jv': 'jav', | ||||
|         'ka': 'kat', | ||||
|         'kg': 'kon', | ||||
|         'ki': 'kik', | ||||
|         'kj': 'kua', | ||||
|         'kk': 'kaz', | ||||
|         'kl': 'kal', | ||||
|         'km': 'khm', | ||||
|         'kn': 'kan', | ||||
|         'ko': 'kor', | ||||
|         'kr': 'kau', | ||||
|         'ks': 'kas', | ||||
|         'ku': 'kur', | ||||
|         'kv': 'kom', | ||||
|         'kw': 'cor', | ||||
|         'ky': 'kir', | ||||
|         'la': 'lat', | ||||
|         'lb': 'ltz', | ||||
|         'lg': 'lug', | ||||
|         'li': 'lim', | ||||
|         'ln': 'lin', | ||||
|         'lo': 'lao', | ||||
|         'lt': 'lit', | ||||
|         'lu': 'lub', | ||||
|         'lv': 'lav', | ||||
|         'mg': 'mlg', | ||||
|         'mh': 'mah', | ||||
|         'mi': 'mri', | ||||
|         'mk': 'mkd', | ||||
|         'ml': 'mal', | ||||
|         'mn': 'mon', | ||||
|         'mr': 'mar', | ||||
|         'ms': 'msa', | ||||
|         'mt': 'mlt', | ||||
|         'my': 'mya', | ||||
|         'na': 'nau', | ||||
|         'nb': 'nob', | ||||
|         'nd': 'nde', | ||||
|         'ne': 'nep', | ||||
|         'ng': 'ndo', | ||||
|         'nl': 'nld', | ||||
|         'nn': 'nno', | ||||
|         'no': 'nor', | ||||
|         'nr': 'nbl', | ||||
|         'nv': 'nav', | ||||
|         'ny': 'nya', | ||||
|         'oc': 'oci', | ||||
|         'oj': 'oji', | ||||
|         'om': 'orm', | ||||
|         'or': 'ori', | ||||
|         'os': 'oss', | ||||
|         'pa': 'pan', | ||||
|         'pi': 'pli', | ||||
|         'pl': 'pol', | ||||
|         'ps': 'pus', | ||||
|         'pt': 'por', | ||||
|         'qu': 'que', | ||||
|         'rm': 'roh', | ||||
|         'rn': 'run', | ||||
|         'ro': 'ron', | ||||
|         'ru': 'rus', | ||||
|         'rw': 'kin', | ||||
|         'sa': 'san', | ||||
|         'sc': 'srd', | ||||
|         'sd': 'snd', | ||||
|         'se': 'sme', | ||||
|         'sg': 'sag', | ||||
|         'si': 'sin', | ||||
|         'sk': 'slk', | ||||
|         'sl': 'slv', | ||||
|         'sm': 'smo', | ||||
|         'sn': 'sna', | ||||
|         'so': 'som', | ||||
|         'sq': 'sqi', | ||||
|         'sr': 'srp', | ||||
|         'ss': 'ssw', | ||||
|         'st': 'sot', | ||||
|         'su': 'sun', | ||||
|         'sv': 'swe', | ||||
|         'sw': 'swa', | ||||
|         'ta': 'tam', | ||||
|         'te': 'tel', | ||||
|         'tg': 'tgk', | ||||
|         'th': 'tha', | ||||
|         'ti': 'tir', | ||||
|         'tk': 'tuk', | ||||
|         'tl': 'tgl', | ||||
|         'tn': 'tsn', | ||||
|         'to': 'ton', | ||||
|         'tr': 'tur', | ||||
|         'ts': 'tso', | ||||
|         'tt': 'tat', | ||||
|         'tw': 'twi', | ||||
|         'ty': 'tah', | ||||
|         'ug': 'uig', | ||||
|         'uk': 'ukr', | ||||
|         'ur': 'urd', | ||||
|         'uz': 'uzb', | ||||
|         've': 'ven', | ||||
|         'vi': 'vie', | ||||
|         'vo': 'vol', | ||||
|         'wa': 'wln', | ||||
|         'wo': 'wol', | ||||
|         'xh': 'xho', | ||||
|         'yi': 'yid', | ||||
|         'yo': 'yor', | ||||
|         'za': 'zha', | ||||
|         'zh': 'zho', | ||||
|         'zu': 'zul', | ||||
|     } | ||||
|  | ||||
|     def __init__(self, downloader=None, subtitlesformat='srt'): | ||||
|         super(FFmpegEmbedSubtitlePP, self).__init__(downloader) | ||||
|         self._subformat = subtitlesformat | ||||
|  | ||||
|     @classmethod | ||||
|     def _conver_lang_code(cls, code): | ||||
|         """Convert language code from ISO 639-1 to ISO 639-2/T""" | ||||
|         return cls._lang_map.get(code[:2]) | ||||
|  | ||||
|     def run(self, information): | ||||
|         if information['ext'] != u'mp4': | ||||
|             self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files') | ||||
|             return True, information | ||||
|         sub_langs = [key for key in information['subtitles']] | ||||
|  | ||||
|         filename = information['filepath'] | ||||
|         input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs] | ||||
|  | ||||
|         opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy'] | ||||
|         for (i, lang) in enumerate(sub_langs): | ||||
|             opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text']) | ||||
|             lang_code = self._conver_lang_code(lang) | ||||
|             if lang_code is not None: | ||||
|                 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code]) | ||||
|         opts.extend(['-f', 'mp4']) | ||||
|  | ||||
|         temp_filename = filename + u'.temp' | ||||
|         self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) | ||||
|         os.remove(encodeFilename(filename)) | ||||
|         os.rename(encodeFilename(temp_filename), encodeFilename(filename)) | ||||
|  | ||||
|         return True, information | ||||
|   | ||||
| @@ -221,16 +221,19 @@ class YoutubeDL(object): | ||||
|  | ||||
|     def report_writesubtitles(self, sub_filename): | ||||
|         """ Report that the subtitles file is being written """ | ||||
|         self.to_screen(u'[info] Writing subtitle: ' + sub_filename) | ||||
|  | ||||
|     def report_existingsubtitles(self, sub_filename): | ||||
|         """ Report that the subtitles file has been already written """ | ||||
|         self.to_screen(u'[info] Skipping existing subtitle: ' + sub_filename) | ||||
|         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename) | ||||
|  | ||||
|     def report_writeinfojson(self, infofn): | ||||
|         """ Report that the metadata file has been written """ | ||||
|         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn) | ||||
|  | ||||
|     def report_file_already_downloaded(self, file_name): | ||||
|         """Report file has already been fully downloaded.""" | ||||
|         try: | ||||
|             self.to_screen(u'[download] %s has already been downloaded' % file_name) | ||||
|         except (UnicodeEncodeError) as err: | ||||
|             self.to_screen(u'[download] The file has already been downloaded') | ||||
|  | ||||
|     def increment_downloads(self): | ||||
|         """Increment the ordinal that assigns a number to each file.""" | ||||
|         self._num_downloads += 1 | ||||
| @@ -489,16 +492,12 @@ class YoutubeDL(object): | ||||
|             # that way it will silently go on when used with unsupporting IE | ||||
|             subtitles = info_dict['subtitles'] | ||||
|             sub_format = self.params.get('subtitlesformat') | ||||
|  | ||||
|             for sub_lang in subtitles.keys(): | ||||
|                 sub = subtitles[sub_lang] | ||||
|                 if sub is None: | ||||
|                     continue | ||||
|                 try: | ||||
|                     sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format | ||||
|                     if os.path.isfile(encodeFilename(sub_filename)): | ||||
|                         self.report_existingsubtitles(sub_filename) | ||||
|                         continue | ||||
|                     sub_filename = subtitles_filename(filename, sub_lang, sub_format) | ||||
|                     self.report_writesubtitles(sub_filename) | ||||
|                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: | ||||
|                             subfile.write(sub) | ||||
|   | ||||
| @@ -320,6 +320,8 @@ def parseOpts(overrideArguments=None): | ||||
|             help='keeps the video file on disk after the post-processing; the video is erased by default') | ||||
|     postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, | ||||
|             help='do not overwrite post-processed files; the post-processed files are overwritten by default') | ||||
|     postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False, | ||||
|             help='embed subtitles in the video (only for mp4 videos)') | ||||
|  | ||||
|  | ||||
|     parser.add_option_group(general) | ||||
| @@ -608,6 +610,8 @@ def _real_main(argv=None): | ||||
|         ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites)) | ||||
|     if opts.recodevideo: | ||||
|         ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo)) | ||||
|     if opts.embedsubtitles: | ||||
|         ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat)) | ||||
|  | ||||
|     # Update version | ||||
|     if opts.update_self: | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| import re | ||||
| import json | ||||
| import itertools | ||||
| import socket | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -33,7 +34,7 @@ class DailyMotionSubtitlesIE(NoAutoSubtitlesIE): | ||||
|         self._downloader.report_warning(u'video doesn\'t have subtitles') | ||||
|         return {} | ||||
|  | ||||
| class DailymotionIE(DailyMotionSubtitlesIE): | ||||
| class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor): | ||||
|     """Information Extractor for Dailymotion""" | ||||
|  | ||||
|     _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' | ||||
| @@ -85,9 +86,9 @@ class DailymotionIE(DailyMotionSubtitlesIE): | ||||
|         for key in ['stream_h264_hd1080_url','stream_h264_hd_url', | ||||
|                     'stream_h264_hq_url','stream_h264_url', | ||||
|                     'stream_h264_ld_url']: | ||||
|             if info.get(key):  # key in info and info[key]: | ||||
|             if info.get(key):#key in info and info[key]: | ||||
|                 max_quality = key | ||||
|                 self.to_screen(u'%s: Using %s' % (video_id, key)) | ||||
|                 self.to_screen(u'Using %s' % key) | ||||
|                 break | ||||
|         else: | ||||
|             raise ExtractorError(u'Unable to extract video URL') | ||||
|   | ||||
| @@ -24,6 +24,112 @@ from ..utils import ( | ||||
|     orderedSet, | ||||
| ) | ||||
|  | ||||
| class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|     """Provide base functions for Youtube extractors""" | ||||
|     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' | ||||
|     _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' | ||||
|     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' | ||||
|     _NETRC_MACHINE = 'youtube' | ||||
|     # If True it will raise an error if no login info is provided | ||||
|     _LOGIN_REQUIRED = False | ||||
|  | ||||
|     def report_lang(self): | ||||
|         """Report attempt to set language.""" | ||||
|         self.to_screen(u'Setting language') | ||||
|  | ||||
|     def _set_language(self): | ||||
|         request = compat_urllib_request.Request(self._LANG_URL) | ||||
|         try: | ||||
|             self.report_lang() | ||||
|             compat_urllib_request.urlopen(request).read() | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to set language: %s' % compat_str(err)) | ||||
|             return False | ||||
|         return True | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         # No authentication to be performed | ||||
|         if username is None: | ||||
|             if self._LOGIN_REQUIRED: | ||||
|                 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True) | ||||
|             return False | ||||
|  | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL) | ||||
|         try: | ||||
|             login_page = compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err)) | ||||
|             return False | ||||
|  | ||||
|         galx = None | ||||
|         dsh = None | ||||
|         match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page) | ||||
|         if match: | ||||
|           galx = match.group(1) | ||||
|         match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page) | ||||
|         if match: | ||||
|           dsh = match.group(1) | ||||
|  | ||||
|         # Log in | ||||
|         login_form_strs = { | ||||
|                 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1', | ||||
|                 u'Email': username, | ||||
|                 u'GALX': galx, | ||||
|                 u'Passwd': password, | ||||
|                 u'PersistentCookie': u'yes', | ||||
|                 u'_utf8': u'霱', | ||||
|                 u'bgresponse': u'js_disabled', | ||||
|                 u'checkConnection': u'', | ||||
|                 u'checkedDomains': u'youtube', | ||||
|                 u'dnConn': u'', | ||||
|                 u'dsh': dsh, | ||||
|                 u'pstMsg': u'0', | ||||
|                 u'rmShown': u'1', | ||||
|                 u'secTok': u'', | ||||
|                 u'signIn': u'Sign in', | ||||
|                 u'timeStmp': u'', | ||||
|                 u'service': u'youtube', | ||||
|                 u'uilel': u'3', | ||||
|                 u'hl': u'en_US', | ||||
|         } | ||||
|         # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode | ||||
|         # chokes on unicode | ||||
|         login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items()) | ||||
|         login_data = compat_urllib_parse.urlencode(login_form).encode('ascii') | ||||
|         request = compat_urllib_request.Request(self._LOGIN_URL, login_data) | ||||
|         try: | ||||
|             self.report_login() | ||||
|             login_results = compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|             if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None: | ||||
|                 self._downloader.report_warning(u'unable to log in: bad username or password') | ||||
|                 return False | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             self._downloader.report_warning(u'unable to log in: %s' % compat_str(err)) | ||||
|             return False | ||||
|         return True | ||||
|  | ||||
|     def _confirm_age(self): | ||||
|         age_form = { | ||||
|                 'next_url':     '/', | ||||
|                 'action_confirm':   'Confirm', | ||||
|                 } | ||||
|         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) | ||||
|         try: | ||||
|             self.report_age_confirmation() | ||||
|             compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err)) | ||||
|         return True | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         if self._downloader is None: | ||||
|             return | ||||
|         if not self._set_language(): | ||||
|             return | ||||
|         if not self._login(): | ||||
|             return | ||||
|         self._confirm_age() | ||||
|  | ||||
| class YoutubeSubtitlesIE(SubtitlesIE): | ||||
|  | ||||
| @@ -83,8 +189,7 @@ class YoutubeSubtitlesIE(SubtitlesIE): | ||||
|             self._downloader.report_warning(err_msg) | ||||
|             return {} | ||||
|  | ||||
|  | ||||
| class YoutubeIE(YoutubeSubtitlesIE): | ||||
| class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor): | ||||
|     IE_DESC = u'YouTube.com' | ||||
|     _VALID_URL = r"""^ | ||||
|                      ( | ||||
| @@ -95,7 +200,7 @@ class YoutubeIE(YoutubeSubtitlesIE): | ||||
|                          (?:                                                  # the various things that can precede the ID: | ||||
|                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ | ||||
|                              |(?:                                             # or the v= param in all its forms | ||||
|                                  (?:watch|movie(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx) | ||||
|                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx) | ||||
|                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #! | ||||
|                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx) | ||||
|                                  v= | ||||
| @@ -375,6 +480,8 @@ class YoutubeIE(YoutubeSubtitlesIE): | ||||
|             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82] | ||||
|         elif len(s) == 81: | ||||
|             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] | ||||
|         elif len(s) == 80: | ||||
|             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80] | ||||
|         elif len(s) == 79: | ||||
|             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] | ||||
|  | ||||
| @@ -390,105 +497,6 @@ class YoutubeIE(YoutubeSubtitlesIE): | ||||
|             # Fallback to the other algortihms | ||||
|             return self._decrypt_signature(s) | ||||
|  | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id): | ||||
|         self.report_video_subtitles_download(video_id) | ||||
|         request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) | ||||
|         try: | ||||
|             sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             return (u'unable to download video subtitles: %s' % compat_str(err), None) | ||||
|         sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list) | ||||
|         sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list) | ||||
|         if not sub_lang_list: | ||||
|             return (u'video doesn\'t have subtitles', None) | ||||
|         return sub_lang_list | ||||
|  | ||||
|     def _list_available_subtitles(self, video_id): | ||||
|         sub_lang_list = self._get_available_subtitles(video_id) | ||||
|         self.report_video_subtitles_available(video_id, sub_lang_list) | ||||
|  | ||||
|     def _request_subtitle(self, sub_lang, sub_name, video_id, format): | ||||
|         """ | ||||
|         Return tuple: | ||||
|         (error_message, sub_lang, sub) | ||||
|         """ | ||||
|         self.report_video_subtitles_request(video_id, sub_lang, format) | ||||
|         params = compat_urllib_parse.urlencode({ | ||||
|             'lang': sub_lang, | ||||
|             'name': sub_name, | ||||
|             'v': video_id, | ||||
|             'fmt': format, | ||||
|         }) | ||||
|         url = 'http://www.youtube.com/api/timedtext?' + params | ||||
|         try: | ||||
|             sub = compat_urllib_request.urlopen(url).read().decode('utf-8') | ||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|             return (u'unable to download video subtitles: %s' % compat_str(err), None, None) | ||||
|         if not sub: | ||||
|             return (u'Did not fetch video subtitles', None, None) | ||||
|         return (None, sub_lang, sub) | ||||
|  | ||||
|     def _request_automatic_caption(self, video_id, webpage): | ||||
|         """We need the webpage for getting the captions url, pass it as an | ||||
|            argument to speed up the process.""" | ||||
|         sub_lang = self._downloader.params.get('subtitleslang') or 'en' | ||||
|         sub_format = self._downloader.params.get('subtitlesformat') | ||||
|         self.to_screen(u'%s: Looking for automatic captions' % video_id) | ||||
|         mobj = re.search(r';ytplayer.config = ({.*?});', webpage) | ||||
|         err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang | ||||
|         if mobj is None: | ||||
|             return [(err_msg, None, None)] | ||||
|         player_config = json.loads(mobj.group(1)) | ||||
|         try: | ||||
|             args = player_config[u'args'] | ||||
|             caption_url = args[u'ttsurl'] | ||||
|             timestamp = args[u'timestamp'] | ||||
|             params = compat_urllib_parse.urlencode({ | ||||
|                 'lang': 'en', | ||||
|                 'tlang': sub_lang, | ||||
|                 'fmt': sub_format, | ||||
|                 'ts': timestamp, | ||||
|                 'kind': 'asr', | ||||
|             }) | ||||
|             subtitles_url = caption_url + '&' + params | ||||
|             sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') | ||||
|             return [(None, sub_lang, sub)] | ||||
|         except KeyError: | ||||
|             return [(err_msg, None, None)] | ||||
|  | ||||
|     def _extract_subtitle(self, video_id): | ||||
|         """ | ||||
|         Return a list with a tuple: | ||||
|         [(error_message, sub_lang, sub)] | ||||
|         """ | ||||
|         sub_lang_list = self._get_available_subtitles(video_id) | ||||
|         sub_format = self._downloader.params.get('subtitlesformat') | ||||
|         if  isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles | ||||
|             return [(sub_lang_list[0], None, None)] | ||||
|         if self._downloader.params.get('subtitleslang', False): | ||||
|             sub_lang = self._downloader.params.get('subtitleslang') | ||||
|         elif 'en' in sub_lang_list: | ||||
|             sub_lang = 'en' | ||||
|         else: | ||||
|             sub_lang = list(sub_lang_list.keys())[0] | ||||
|         if not sub_lang in sub_lang_list: | ||||
|             return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)] | ||||
|  | ||||
|         subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) | ||||
|         return [subtitle] | ||||
|  | ||||
|     def _extract_all_subtitles(self, video_id): | ||||
|         sub_lang_list = self._get_available_subtitles(video_id) | ||||
|         sub_format = self._downloader.params.get('subtitlesformat') | ||||
|         if  isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles | ||||
|             return [(sub_lang_list[0], None, None)] | ||||
|         subtitles = [] | ||||
|         for sub_lang in sub_lang_list: | ||||
|             subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) | ||||
|             subtitles.append(subtitle) | ||||
|         return subtitles | ||||
|  | ||||
|     def _print_formats(self, formats): | ||||
|         print('Available formats:') | ||||
|         for x in formats: | ||||
|   | ||||
| @@ -657,6 +657,9 @@ def determine_ext(url, default_ext=u'unknown_video'): | ||||
|     else: | ||||
|         return default_ext | ||||
|  | ||||
| def subtitles_filename(filename, sub_lang, sub_format): | ||||
|     return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format | ||||
|  | ||||
| def date_from_str(date_str): | ||||
|     """ | ||||
|     Return a datetime object from a string in the format YYYYMMDD or | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Ismael Mejia
					Ismael Mejia