mirror of
				https://github.com/yt-dlp/yt-dlp.git
				synced 2025-10-30 22:25:19 +00:00 
			
		
		
		
	[skip travis] adding automerge support
basically copying content of youtube_dl folder to youtube_dlc and excluding the youtube_dl folder when compiling
This commit is contained in:
		| @@ -35,4 +35,7 @@ jobs: | ||||
|     - env: JYTHON=true; YTDL_TEST_SET=core | ||||
| before_install: | ||||
|   - if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi | ||||
| before_script: | ||||
|   - rm -rf /youtube_dlc/* | ||||
|   - cp /youtube_dl/* /youtube_dlc | ||||
| script: ./devscripts/run_tests.sh | ||||
|   | ||||
							
								
								
									
										2
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								setup.py
									
									
									
									
									
								
							| @@ -67,7 +67,7 @@ setup( | ||||
|     long_description=LONG_DESCRIPTION, | ||||
|     # long_description_content_type="text/markdown", | ||||
|     url="https://github.com/blackjack4494/youtube-dlc", | ||||
|     packages=find_packages(), | ||||
|     packages=find_packages(exclude=("youtube_dl",)), | ||||
| 	#packages=[ | ||||
|     #    'youtube_dlc', | ||||
|     #    'youtube_dlc.extractor', 'youtube_dlc.downloader', | ||||
|   | ||||
							
								
								
									
										33
									
								
								youtube-dlc.spec
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								youtube-dlc.spec
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
| # -*- mode: python ; coding: utf-8 -*- | ||||
|  | ||||
| block_cipher = None | ||||
|  | ||||
|  | ||||
| a = Analysis(['youtube_dlc\\__main__.py'], | ||||
|              pathex=['D:\\gitkraken\\youtube-dl'], | ||||
|              binaries=[], | ||||
|              datas=[], | ||||
|              hiddenimports=[], | ||||
|              hookspath=[], | ||||
|              runtime_hooks=[], | ||||
|              excludes=[], | ||||
|              win_no_prefer_redirects=False, | ||||
|              win_private_assemblies=False, | ||||
|              cipher=block_cipher, | ||||
|              noarchive=False) | ||||
| pyz = PYZ(a.pure, a.zipped_data, | ||||
|              cipher=block_cipher) | ||||
| exe = EXE(pyz, | ||||
|           a.scripts, | ||||
|           a.binaries, | ||||
|           a.zipfiles, | ||||
|           a.datas, | ||||
|           [], | ||||
|           name='youtube-dlc', | ||||
|           debug=False, | ||||
|           bootloader_ignore_signals=False, | ||||
|           strip=False, | ||||
|           upx=True, | ||||
|           upx_exclude=[], | ||||
|           runtime_tmpdir=None, | ||||
|           console=True ) | ||||
							
								
								
									
										2417
									
								
								youtube_dl/YoutubeDL.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2417
									
								
								youtube_dl/YoutubeDL.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										483
									
								
								youtube_dl/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										483
									
								
								youtube_dl/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,483 @@ | ||||
| #!/usr/bin/env python | ||||
| # coding: utf-8 | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
|  | ||||
| import codecs | ||||
| import io | ||||
| import os | ||||
| import random | ||||
| import sys | ||||
|  | ||||
|  | ||||
| from .options import ( | ||||
|     parseOpts, | ||||
| ) | ||||
| from .compat import ( | ||||
|     compat_getpass, | ||||
|     compat_shlex_split, | ||||
|     workaround_optparse_bug9161, | ||||
| ) | ||||
| from .utils import ( | ||||
|     DateRange, | ||||
|     decodeOption, | ||||
|     DEFAULT_OUTTMPL, | ||||
|     DownloadError, | ||||
|     expand_path, | ||||
|     match_filter_func, | ||||
|     MaxDownloadsReached, | ||||
|     preferredencoding, | ||||
|     read_batch_urls, | ||||
|     SameFileError, | ||||
|     setproctitle, | ||||
|     std_headers, | ||||
|     write_string, | ||||
|     render_table, | ||||
| ) | ||||
| from .update import update_self | ||||
| from .downloader import ( | ||||
|     FileDownloader, | ||||
| ) | ||||
| from .extractor import gen_extractors, list_extractors | ||||
| from .extractor.adobepass import MSO_INFO | ||||
| from .YoutubeDL import YoutubeDL | ||||
|  | ||||
|  | ||||
| def _real_main(argv=None): | ||||
|     # Compatibility fixes for Windows | ||||
|     if sys.platform == 'win32': | ||||
|         # https://github.com/ytdl-org/youtube-dl/issues/820 | ||||
|         codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) | ||||
|  | ||||
|     workaround_optparse_bug9161() | ||||
|  | ||||
|     setproctitle('youtube-dlc') | ||||
|  | ||||
|     parser, opts, args = parseOpts(argv) | ||||
|  | ||||
|     # Set user agent | ||||
|     if opts.user_agent is not None: | ||||
|         std_headers['User-Agent'] = opts.user_agent | ||||
|  | ||||
|     # Set referer | ||||
|     if opts.referer is not None: | ||||
|         std_headers['Referer'] = opts.referer | ||||
|  | ||||
|     # Custom HTTP headers | ||||
|     if opts.headers is not None: | ||||
|         for h in opts.headers: | ||||
|             if ':' not in h: | ||||
|                 parser.error('wrong header formatting, it should be key:value, not "%s"' % h) | ||||
|             key, value = h.split(':', 1) | ||||
|             if opts.verbose: | ||||
|                 write_string('[debug] Adding header from command line option %s:%s\n' % (key, value)) | ||||
|             std_headers[key] = value | ||||
|  | ||||
|     # Dump user agent | ||||
|     if opts.dump_user_agent: | ||||
|         write_string(std_headers['User-Agent'] + '\n', out=sys.stdout) | ||||
|         sys.exit(0) | ||||
|  | ||||
|     # Batch file verification | ||||
|     batch_urls = [] | ||||
|     if opts.batchfile is not None: | ||||
|         try: | ||||
|             if opts.batchfile == '-': | ||||
|                 batchfd = sys.stdin | ||||
|             else: | ||||
|                 batchfd = io.open( | ||||
|                     expand_path(opts.batchfile), | ||||
|                     'r', encoding='utf-8', errors='ignore') | ||||
|             batch_urls = read_batch_urls(batchfd) | ||||
|             if opts.verbose: | ||||
|                 write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') | ||||
|         except IOError: | ||||
|             sys.exit('ERROR: batch file %s could not be read' % opts.batchfile) | ||||
|     all_urls = batch_urls + [url.strip() for url in args]  # batch_urls are already striped in read_batch_urls | ||||
|     _enc = preferredencoding() | ||||
|     all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] | ||||
|  | ||||
|     if opts.list_extractors: | ||||
|         for ie in list_extractors(opts.age_limit): | ||||
|             write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout) | ||||
|             matchedUrls = [url for url in all_urls if ie.suitable(url)] | ||||
|             for mu in matchedUrls: | ||||
|                 write_string('  ' + mu + '\n', out=sys.stdout) | ||||
|         sys.exit(0) | ||||
|     if opts.list_extractor_descriptions: | ||||
|         for ie in list_extractors(opts.age_limit): | ||||
|             if not ie._WORKING: | ||||
|                 continue | ||||
|             desc = getattr(ie, 'IE_DESC', ie.IE_NAME) | ||||
|             if desc is False: | ||||
|                 continue | ||||
|             if hasattr(ie, 'SEARCH_KEY'): | ||||
|                 _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow') | ||||
|                 _COUNTS = ('', '5', '10', 'all') | ||||
|                 desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) | ||||
|             write_string(desc + '\n', out=sys.stdout) | ||||
|         sys.exit(0) | ||||
|     if opts.ap_list_mso: | ||||
|         table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()] | ||||
|         write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout) | ||||
|         sys.exit(0) | ||||
|  | ||||
|     # Conflicting, missing and erroneous options | ||||
|     if opts.usenetrc and (opts.username is not None or opts.password is not None): | ||||
|         parser.error('using .netrc conflicts with giving username/password') | ||||
|     if opts.password is not None and opts.username is None: | ||||
|         parser.error('account username missing\n') | ||||
|     if opts.ap_password is not None and opts.ap_username is None: | ||||
|         parser.error('TV Provider account username missing\n') | ||||
|     if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid): | ||||
|         parser.error('using output template conflicts with using title, video ID or auto number') | ||||
|     if opts.autonumber_size is not None: | ||||
|         if opts.autonumber_size <= 0: | ||||
|             parser.error('auto number size must be positive') | ||||
|     if opts.autonumber_start is not None: | ||||
|         if opts.autonumber_start < 0: | ||||
|             parser.error('auto number start must be positive or 0') | ||||
|     if opts.usetitle and opts.useid: | ||||
|         parser.error('using title conflicts with using video ID') | ||||
|     if opts.username is not None and opts.password is None: | ||||
|         opts.password = compat_getpass('Type account password and press [Return]: ') | ||||
|     if opts.ap_username is not None and opts.ap_password is None: | ||||
|         opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ') | ||||
|     if opts.ratelimit is not None: | ||||
|         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) | ||||
|         if numeric_limit is None: | ||||
|             parser.error('invalid rate limit specified') | ||||
|         opts.ratelimit = numeric_limit | ||||
|     if opts.min_filesize is not None: | ||||
|         numeric_limit = FileDownloader.parse_bytes(opts.min_filesize) | ||||
|         if numeric_limit is None: | ||||
|             parser.error('invalid min_filesize specified') | ||||
|         opts.min_filesize = numeric_limit | ||||
|     if opts.max_filesize is not None: | ||||
|         numeric_limit = FileDownloader.parse_bytes(opts.max_filesize) | ||||
|         if numeric_limit is None: | ||||
|             parser.error('invalid max_filesize specified') | ||||
|         opts.max_filesize = numeric_limit | ||||
|     if opts.sleep_interval is not None: | ||||
|         if opts.sleep_interval < 0: | ||||
|             parser.error('sleep interval must be positive or 0') | ||||
|     if opts.max_sleep_interval is not None: | ||||
|         if opts.max_sleep_interval < 0: | ||||
|             parser.error('max sleep interval must be positive or 0') | ||||
|         if opts.sleep_interval is None: | ||||
|             parser.error('min sleep interval must be specified, use --min-sleep-interval') | ||||
|         if opts.max_sleep_interval < opts.sleep_interval: | ||||
|             parser.error('max sleep interval must be greater than or equal to min sleep interval') | ||||
|     else: | ||||
|         opts.max_sleep_interval = opts.sleep_interval | ||||
|     if opts.ap_mso and opts.ap_mso not in MSO_INFO: | ||||
|         parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers') | ||||
|  | ||||
|     def parse_retries(retries): | ||||
|         if retries in ('inf', 'infinite'): | ||||
|             parsed_retries = float('inf') | ||||
|         else: | ||||
|             try: | ||||
|                 parsed_retries = int(retries) | ||||
|             except (TypeError, ValueError): | ||||
|                 parser.error('invalid retry count specified') | ||||
|         return parsed_retries | ||||
|     if opts.retries is not None: | ||||
|         opts.retries = parse_retries(opts.retries) | ||||
|     if opts.fragment_retries is not None: | ||||
|         opts.fragment_retries = parse_retries(opts.fragment_retries) | ||||
|     if opts.buffersize is not None: | ||||
|         numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) | ||||
|         if numeric_buffersize is None: | ||||
|             parser.error('invalid buffer size specified') | ||||
|         opts.buffersize = numeric_buffersize | ||||
|     if opts.http_chunk_size is not None: | ||||
|         numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size) | ||||
|         if not numeric_chunksize: | ||||
|             parser.error('invalid http chunk size specified') | ||||
|         opts.http_chunk_size = numeric_chunksize | ||||
|     if opts.playliststart <= 0: | ||||
|         raise ValueError('Playlist start must be positive') | ||||
|     if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart: | ||||
|         raise ValueError('Playlist end must be greater than playlist start') | ||||
|     if opts.extractaudio: | ||||
|         if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']: | ||||
|             parser.error('invalid audio format specified') | ||||
|     if opts.audioquality: | ||||
|         opts.audioquality = opts.audioquality.strip('k').strip('K') | ||||
|         if not opts.audioquality.isdigit(): | ||||
|             parser.error('invalid audio quality specified') | ||||
|     if opts.recodevideo is not None: | ||||
|         if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']: | ||||
|             parser.error('invalid video recode format specified') | ||||
|     if opts.convertsubtitles is not None: | ||||
|         if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']: | ||||
|             parser.error('invalid subtitle format specified') | ||||
|  | ||||
|     if opts.date is not None: | ||||
|         date = DateRange.day(opts.date) | ||||
|     else: | ||||
|         date = DateRange(opts.dateafter, opts.datebefore) | ||||
|  | ||||
|     # Do not download videos when there are audio-only formats | ||||
|     if opts.extractaudio and not opts.keepvideo and opts.format is None: | ||||
|         opts.format = 'bestaudio/best' | ||||
|  | ||||
|     # --all-sub automatically sets --write-sub if --write-auto-sub is not given | ||||
|     # this was the old behaviour if only --all-sub was given. | ||||
|     if opts.allsubtitles and not opts.writeautomaticsub: | ||||
|         opts.writesubtitles = True | ||||
|  | ||||
|     outtmpl = ((opts.outtmpl is not None and opts.outtmpl) | ||||
|                or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') | ||||
|                or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') | ||||
|                or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') | ||||
|                or (opts.usetitle and '%(title)s-%(id)s.%(ext)s') | ||||
|                or (opts.useid and '%(id)s.%(ext)s') | ||||
|                or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') | ||||
|                or DEFAULT_OUTTMPL) | ||||
|     if not os.path.splitext(outtmpl)[1] and opts.extractaudio: | ||||
|         parser.error('Cannot download a video and extract audio into the same' | ||||
|                      ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' | ||||
|                      ' template'.format(outtmpl)) | ||||
|  | ||||
|     any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json | ||||
|     any_printing = opts.print_json | ||||
|     download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive | ||||
|  | ||||
|     # PostProcessors | ||||
|     postprocessors = [] | ||||
|     if opts.metafromtitle: | ||||
|         postprocessors.append({ | ||||
|             'key': 'MetadataFromTitle', | ||||
|             'titleformat': opts.metafromtitle | ||||
|         }) | ||||
|     if opts.extractaudio: | ||||
|         postprocessors.append({ | ||||
|             'key': 'FFmpegExtractAudio', | ||||
|             'preferredcodec': opts.audioformat, | ||||
|             'preferredquality': opts.audioquality, | ||||
|             'nopostoverwrites': opts.nopostoverwrites, | ||||
|         }) | ||||
|     if opts.recodevideo: | ||||
|         postprocessors.append({ | ||||
|             'key': 'FFmpegVideoConvertor', | ||||
|             'preferedformat': opts.recodevideo, | ||||
|         }) | ||||
|     # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and | ||||
|     # FFmpegExtractAudioPP as containers before conversion may not support | ||||
|     # metadata (3gp, webm, etc.) | ||||
|     # And this post-processor should be placed before other metadata | ||||
|     # manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of | ||||
|     # extra metadata. By default ffmpeg preserves metadata applicable for both | ||||
|     # source and target containers. From this point the container won't change, | ||||
|     # so metadata can be added here. | ||||
|     if opts.addmetadata: | ||||
|         postprocessors.append({'key': 'FFmpegMetadata'}) | ||||
|     if opts.convertsubtitles: | ||||
|         postprocessors.append({ | ||||
|             'key': 'FFmpegSubtitlesConvertor', | ||||
|             'format': opts.convertsubtitles, | ||||
|         }) | ||||
|     if opts.embedsubtitles: | ||||
|         postprocessors.append({ | ||||
|             'key': 'FFmpegEmbedSubtitle', | ||||
|         }) | ||||
|     if opts.embedthumbnail: | ||||
|         already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails | ||||
|         postprocessors.append({ | ||||
|             'key': 'EmbedThumbnail', | ||||
|             'already_have_thumbnail': already_have_thumbnail | ||||
|         }) | ||||
|         if not already_have_thumbnail: | ||||
|             opts.writethumbnail = True | ||||
|     # XAttrMetadataPP should be run after post-processors that may change file | ||||
|     # contents | ||||
|     if opts.xattrs: | ||||
|         postprocessors.append({'key': 'XAttrMetadata'}) | ||||
|     # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way. | ||||
|     # So if the user is able to remove the file before your postprocessor runs it might cause a few problems. | ||||
|     if opts.exec_cmd: | ||||
|         postprocessors.append({ | ||||
|             'key': 'ExecAfterDownload', | ||||
|             'exec_cmd': opts.exec_cmd, | ||||
|         }) | ||||
|     external_downloader_args = None | ||||
|     if opts.external_downloader_args: | ||||
|         external_downloader_args = compat_shlex_split(opts.external_downloader_args) | ||||
|     postprocessor_args = None | ||||
|     if opts.postprocessor_args: | ||||
|         postprocessor_args = compat_shlex_split(opts.postprocessor_args) | ||||
|     match_filter = ( | ||||
|         None if opts.match_filter is None | ||||
|         else match_filter_func(opts.match_filter)) | ||||
|  | ||||
|     ydl_opts = { | ||||
|         'usenetrc': opts.usenetrc, | ||||
|         'username': opts.username, | ||||
|         'password': opts.password, | ||||
|         'twofactor': opts.twofactor, | ||||
|         'videopassword': opts.videopassword, | ||||
|         'ap_mso': opts.ap_mso, | ||||
|         'ap_username': opts.ap_username, | ||||
|         'ap_password': opts.ap_password, | ||||
|         'quiet': (opts.quiet or any_getting or any_printing), | ||||
|         'no_warnings': opts.no_warnings, | ||||
|         'forceurl': opts.geturl, | ||||
|         'forcetitle': opts.gettitle, | ||||
|         'forceid': opts.getid, | ||||
|         'forcethumbnail': opts.getthumbnail, | ||||
|         'forcedescription': opts.getdescription, | ||||
|         'forceduration': opts.getduration, | ||||
|         'forcefilename': opts.getfilename, | ||||
|         'forceformat': opts.getformat, | ||||
|         'forcejson': opts.dumpjson or opts.print_json, | ||||
|         'dump_single_json': opts.dump_single_json, | ||||
|         'simulate': opts.simulate or any_getting, | ||||
|         'skip_download': opts.skip_download, | ||||
|         'format': opts.format, | ||||
|         'listformats': opts.listformats, | ||||
|         'outtmpl': outtmpl, | ||||
|         'autonumber_size': opts.autonumber_size, | ||||
|         'autonumber_start': opts.autonumber_start, | ||||
|         'restrictfilenames': opts.restrictfilenames, | ||||
|         'ignoreerrors': opts.ignoreerrors, | ||||
|         'force_generic_extractor': opts.force_generic_extractor, | ||||
|         'ratelimit': opts.ratelimit, | ||||
|         'nooverwrites': opts.nooverwrites, | ||||
|         'retries': opts.retries, | ||||
|         'fragment_retries': opts.fragment_retries, | ||||
|         'skip_unavailable_fragments': opts.skip_unavailable_fragments, | ||||
|         'keep_fragments': opts.keep_fragments, | ||||
|         'buffersize': opts.buffersize, | ||||
|         'noresizebuffer': opts.noresizebuffer, | ||||
|         'http_chunk_size': opts.http_chunk_size, | ||||
|         'continuedl': opts.continue_dl, | ||||
|         'noprogress': opts.noprogress, | ||||
|         'progress_with_newline': opts.progress_with_newline, | ||||
|         'playliststart': opts.playliststart, | ||||
|         'playlistend': opts.playlistend, | ||||
|         'playlistreverse': opts.playlist_reverse, | ||||
|         'playlistrandom': opts.playlist_random, | ||||
|         'noplaylist': opts.noplaylist, | ||||
|         'logtostderr': opts.outtmpl == '-', | ||||
|         'consoletitle': opts.consoletitle, | ||||
|         'nopart': opts.nopart, | ||||
|         'updatetime': opts.updatetime, | ||||
|         'writedescription': opts.writedescription, | ||||
|         'writeannotations': opts.writeannotations, | ||||
|         'writeinfojson': opts.writeinfojson, | ||||
|         'writethumbnail': opts.writethumbnail, | ||||
|         'write_all_thumbnails': opts.write_all_thumbnails, | ||||
|         'writesubtitles': opts.writesubtitles, | ||||
|         'writeautomaticsub': opts.writeautomaticsub, | ||||
|         'allsubtitles': opts.allsubtitles, | ||||
|         'listsubtitles': opts.listsubtitles, | ||||
|         'subtitlesformat': opts.subtitlesformat, | ||||
|         'subtitleslangs': opts.subtitleslangs, | ||||
|         'matchtitle': decodeOption(opts.matchtitle), | ||||
|         'rejecttitle': decodeOption(opts.rejecttitle), | ||||
|         'max_downloads': opts.max_downloads, | ||||
|         'prefer_free_formats': opts.prefer_free_formats, | ||||
|         'verbose': opts.verbose, | ||||
|         'dump_intermediate_pages': opts.dump_intermediate_pages, | ||||
|         'write_pages': opts.write_pages, | ||||
|         'test': opts.test, | ||||
|         'keepvideo': opts.keepvideo, | ||||
|         'min_filesize': opts.min_filesize, | ||||
|         'max_filesize': opts.max_filesize, | ||||
|         'min_views': opts.min_views, | ||||
|         'max_views': opts.max_views, | ||||
|         'daterange': date, | ||||
|         'cachedir': opts.cachedir, | ||||
|         'youtube_print_sig_code': opts.youtube_print_sig_code, | ||||
|         'age_limit': opts.age_limit, | ||||
|         'download_archive': download_archive_fn, | ||||
|         'cookiefile': opts.cookiefile, | ||||
|         'nocheckcertificate': opts.no_check_certificate, | ||||
|         'prefer_insecure': opts.prefer_insecure, | ||||
|         'proxy': opts.proxy, | ||||
|         'socket_timeout': opts.socket_timeout, | ||||
|         'bidi_workaround': opts.bidi_workaround, | ||||
|         'debug_printtraffic': opts.debug_printtraffic, | ||||
|         'prefer_ffmpeg': opts.prefer_ffmpeg, | ||||
|         'include_ads': opts.include_ads, | ||||
|         'default_search': opts.default_search, | ||||
|         'youtube_include_dash_manifest': opts.youtube_include_dash_manifest, | ||||
|         'encoding': opts.encoding, | ||||
|         'extract_flat': opts.extract_flat, | ||||
|         'mark_watched': opts.mark_watched, | ||||
|         'merge_output_format': opts.merge_output_format, | ||||
|         'postprocessors': postprocessors, | ||||
|         'fixup': opts.fixup, | ||||
|         'source_address': opts.source_address, | ||||
|         'call_home': opts.call_home, | ||||
|         'sleep_interval': opts.sleep_interval, | ||||
|         'max_sleep_interval': opts.max_sleep_interval, | ||||
|         'external_downloader': opts.external_downloader, | ||||
|         'list_thumbnails': opts.list_thumbnails, | ||||
|         'playlist_items': opts.playlist_items, | ||||
|         'xattr_set_filesize': opts.xattr_set_filesize, | ||||
|         'match_filter': match_filter, | ||||
|         'no_color': opts.no_color, | ||||
|         'ffmpeg_location': opts.ffmpeg_location, | ||||
|         'hls_prefer_native': opts.hls_prefer_native, | ||||
|         'hls_use_mpegts': opts.hls_use_mpegts, | ||||
|         'external_downloader_args': external_downloader_args, | ||||
|         'postprocessor_args': postprocessor_args, | ||||
|         'cn_verification_proxy': opts.cn_verification_proxy, | ||||
|         'geo_verification_proxy': opts.geo_verification_proxy, | ||||
|         'config_location': opts.config_location, | ||||
|         'geo_bypass': opts.geo_bypass, | ||||
|         'geo_bypass_country': opts.geo_bypass_country, | ||||
|         'geo_bypass_ip_block': opts.geo_bypass_ip_block, | ||||
|         # just for deprecation check | ||||
|         'autonumber': opts.autonumber if opts.autonumber is True else None, | ||||
|         'usetitle': opts.usetitle if opts.usetitle is True else None, | ||||
|     } | ||||
|  | ||||
|     with YoutubeDL(ydl_opts) as ydl: | ||||
|         # Update version | ||||
|         if opts.update_self: | ||||
|             update_self(ydl.to_screen, opts.verbose, ydl._opener) | ||||
|  | ||||
|         # Remove cache dir | ||||
|         if opts.rm_cachedir: | ||||
|             ydl.cache.remove() | ||||
|  | ||||
|         # Maybe do nothing | ||||
|         if (len(all_urls) < 1) and (opts.load_info_filename is None): | ||||
|             if opts.update_self or opts.rm_cachedir: | ||||
|                 sys.exit() | ||||
|  | ||||
|             ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) | ||||
|             parser.error( | ||||
|                 'You must provide at least one URL.\n' | ||||
|                 'Type youtube-dlc --help to see a list of all options.') | ||||
|  | ||||
|         try: | ||||
|             if opts.load_info_filename is not None: | ||||
|                 retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename)) | ||||
|             else: | ||||
|                 retcode = ydl.download(all_urls) | ||||
|         except MaxDownloadsReached: | ||||
|             ydl.to_screen('--max-download limit reached, aborting.') | ||||
|             retcode = 101 | ||||
|  | ||||
|     sys.exit(retcode) | ||||
|  | ||||
|  | ||||
| def main(argv=None): | ||||
|     try: | ||||
|         _real_main(argv) | ||||
|     except DownloadError: | ||||
|         sys.exit(1) | ||||
|     except SameFileError: | ||||
|         sys.exit('ERROR: fixed output name but more than one file to download') | ||||
|     except KeyboardInterrupt: | ||||
|         sys.exit('\nERROR: Interrupted by user') | ||||
|  | ||||
|  | ||||
| __all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] | ||||
							
								
								
									
										19
									
								
								youtube_dl/__main__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								youtube_dl/__main__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,19 @@ | ||||
| #!/usr/bin/env python | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| # Execute with | ||||
| # $ python youtube_dlc/__main__.py (2.6+) | ||||
| # $ python -m youtube_dlc          (2.7+) | ||||
|  | ||||
| import sys | ||||
|  | ||||
| if __package__ is None and not hasattr(sys, 'frozen'): | ||||
|     # direct call of __main__.py | ||||
|     import os.path | ||||
|     path = os.path.realpath(os.path.abspath(__file__)) | ||||
|     sys.path.insert(0, os.path.dirname(os.path.dirname(path))) | ||||
|  | ||||
| import youtube_dlc | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     youtube_dlc.main() | ||||
							
								
								
									
										361
									
								
								youtube_dl/aes.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										361
									
								
								youtube_dl/aes.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,361 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from math import ceil | ||||
|  | ||||
| from .compat import compat_b64decode | ||||
| from .utils import bytes_to_intlist, intlist_to_bytes | ||||
|  | ||||
| BLOCK_SIZE_BYTES = 16 | ||||
|  | ||||
|  | ||||
| def aes_ctr_decrypt(data, key, counter): | ||||
|     """ | ||||
|     Decrypt with aes in counter mode | ||||
|  | ||||
|     @param {int[]} data        cipher | ||||
|     @param {int[]} key         16/24/32-Byte cipher key | ||||
|     @param {instance} counter  Instance whose next_value function (@returns {int[]}  16-Byte block) | ||||
|                                returns the next counter block | ||||
|     @returns {int[]}           decrypted data | ||||
|     """ | ||||
|     expanded_key = key_expansion(key) | ||||
|     block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) | ||||
|  | ||||
|     decrypted_data = [] | ||||
|     for i in range(block_count): | ||||
|         counter_block = counter.next_value() | ||||
|         block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] | ||||
|         block += [0] * (BLOCK_SIZE_BYTES - len(block)) | ||||
|  | ||||
|         cipher_counter_block = aes_encrypt(counter_block, expanded_key) | ||||
|         decrypted_data += xor(block, cipher_counter_block) | ||||
|     decrypted_data = decrypted_data[:len(data)] | ||||
|  | ||||
|     return decrypted_data | ||||
|  | ||||
|  | ||||
| def aes_cbc_decrypt(data, key, iv): | ||||
|     """ | ||||
|     Decrypt with aes in CBC mode | ||||
|  | ||||
|     @param {int[]} data        cipher | ||||
|     @param {int[]} key         16/24/32-Byte cipher key | ||||
|     @param {int[]} iv          16-Byte IV | ||||
|     @returns {int[]}           decrypted data | ||||
|     """ | ||||
|     expanded_key = key_expansion(key) | ||||
|     block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) | ||||
|  | ||||
|     decrypted_data = [] | ||||
|     previous_cipher_block = iv | ||||
|     for i in range(block_count): | ||||
|         block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] | ||||
|         block += [0] * (BLOCK_SIZE_BYTES - len(block)) | ||||
|  | ||||
|         decrypted_block = aes_decrypt(block, expanded_key) | ||||
|         decrypted_data += xor(decrypted_block, previous_cipher_block) | ||||
|         previous_cipher_block = block | ||||
|     decrypted_data = decrypted_data[:len(data)] | ||||
|  | ||||
|     return decrypted_data | ||||
|  | ||||
|  | ||||
| def aes_cbc_encrypt(data, key, iv): | ||||
|     """ | ||||
|     Encrypt with aes in CBC mode. Using PKCS#7 padding | ||||
|  | ||||
|     @param {int[]} data        cleartext | ||||
|     @param {int[]} key         16/24/32-Byte cipher key | ||||
|     @param {int[]} iv          16-Byte IV | ||||
|     @returns {int[]}           encrypted data | ||||
|     """ | ||||
|     expanded_key = key_expansion(key) | ||||
|     block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) | ||||
|  | ||||
|     encrypted_data = [] | ||||
|     previous_cipher_block = iv | ||||
|     for i in range(block_count): | ||||
|         block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] | ||||
|         remaining_length = BLOCK_SIZE_BYTES - len(block) | ||||
|         block += [remaining_length] * remaining_length | ||||
|         mixed_block = xor(block, previous_cipher_block) | ||||
|  | ||||
|         encrypted_block = aes_encrypt(mixed_block, expanded_key) | ||||
|         encrypted_data += encrypted_block | ||||
|  | ||||
|         previous_cipher_block = encrypted_block | ||||
|  | ||||
|     return encrypted_data | ||||
|  | ||||
|  | ||||
| def key_expansion(data): | ||||
|     """ | ||||
|     Generate key schedule | ||||
|  | ||||
|     @param {int[]} data  16/24/32-Byte cipher key | ||||
|     @returns {int[]}     176/208/240-Byte expanded key | ||||
|     """ | ||||
|     data = data[:]  # copy | ||||
|     rcon_iteration = 1 | ||||
|     key_size_bytes = len(data) | ||||
|     expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES | ||||
|  | ||||
|     while len(data) < expanded_key_size_bytes: | ||||
|         temp = data[-4:] | ||||
|         temp = key_schedule_core(temp, rcon_iteration) | ||||
|         rcon_iteration += 1 | ||||
|         data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) | ||||
|  | ||||
|         for _ in range(3): | ||||
|             temp = data[-4:] | ||||
|             data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) | ||||
|  | ||||
|         if key_size_bytes == 32: | ||||
|             temp = data[-4:] | ||||
|             temp = sub_bytes(temp) | ||||
|             data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) | ||||
|  | ||||
|         for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): | ||||
|             temp = data[-4:] | ||||
|             data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) | ||||
|     data = data[:expanded_key_size_bytes] | ||||
|  | ||||
|     return data | ||||
|  | ||||
|  | ||||
| def aes_encrypt(data, expanded_key): | ||||
|     """ | ||||
|     Encrypt one block with aes | ||||
|  | ||||
|     @param {int[]} data          16-Byte state | ||||
|     @param {int[]} expanded_key  176/208/240-Byte expanded key | ||||
|     @returns {int[]}             16-Byte cipher | ||||
|     """ | ||||
|     rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 | ||||
|  | ||||
|     data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) | ||||
|     for i in range(1, rounds + 1): | ||||
|         data = sub_bytes(data) | ||||
|         data = shift_rows(data) | ||||
|         if i != rounds: | ||||
|             data = mix_columns(data) | ||||
|         data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) | ||||
|  | ||||
|     return data | ||||
|  | ||||
|  | ||||
| def aes_decrypt(data, expanded_key): | ||||
|     """ | ||||
|     Decrypt one block with aes | ||||
|  | ||||
|     @param {int[]} data          16-Byte cipher | ||||
|     @param {int[]} expanded_key  176/208/240-Byte expanded key | ||||
|     @returns {int[]}             16-Byte state | ||||
|     """ | ||||
|     rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 | ||||
|  | ||||
|     for i in range(rounds, 0, -1): | ||||
|         data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) | ||||
|         if i != rounds: | ||||
|             data = mix_columns_inv(data) | ||||
|         data = shift_rows_inv(data) | ||||
|         data = sub_bytes_inv(data) | ||||
|     data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) | ||||
|  | ||||
|     return data | ||||
|  | ||||
|  | ||||
| def aes_decrypt_text(data, password, key_size_bytes): | ||||
|     """ | ||||
|     Decrypt text | ||||
|     - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter | ||||
|     - The cipher key is retrieved by encrypting the first 16 Byte of 'password' | ||||
|       with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's) | ||||
|     - Mode of operation is 'counter' | ||||
|  | ||||
|     @param {str} data                    Base64 encoded string | ||||
|     @param {str,unicode} password        Password (will be encoded with utf-8) | ||||
|     @param {int} key_size_bytes          Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit | ||||
|     @returns {str}                       Decrypted data | ||||
|     """ | ||||
|     NONCE_LENGTH_BYTES = 8 | ||||
|  | ||||
|     data = bytes_to_intlist(compat_b64decode(data)) | ||||
|     password = bytes_to_intlist(password.encode('utf-8')) | ||||
|  | ||||
|     key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) | ||||
|     key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) | ||||
|  | ||||
|     nonce = data[:NONCE_LENGTH_BYTES] | ||||
|     cipher = data[NONCE_LENGTH_BYTES:] | ||||
|  | ||||
|     class Counter(object): | ||||
|         __value = nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES) | ||||
|  | ||||
|         def next_value(self): | ||||
|             temp = self.__value | ||||
|             self.__value = inc(self.__value) | ||||
|             return temp | ||||
|  | ||||
|     decrypted_data = aes_ctr_decrypt(cipher, key, Counter()) | ||||
|     plaintext = intlist_to_bytes(decrypted_data) | ||||
|  | ||||
|     return plaintext | ||||
|  | ||||
|  | ||||
| RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) | ||||
| SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, | ||||
|         0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, | ||||
|         0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, | ||||
|         0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, | ||||
|         0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, | ||||
|         0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, | ||||
|         0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, | ||||
|         0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, | ||||
|         0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, | ||||
|         0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, | ||||
|         0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, | ||||
|         0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, | ||||
|         0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, | ||||
|         0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, | ||||
|         0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, | ||||
|         0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16) | ||||
| SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, | ||||
|             0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, | ||||
|             0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, | ||||
|             0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, | ||||
|             0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, | ||||
|             0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, | ||||
|             0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, | ||||
|             0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, | ||||
|             0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, | ||||
|             0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, | ||||
|             0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, | ||||
|             0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, | ||||
|             0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, | ||||
|             0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, | ||||
|             0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, | ||||
|             0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d) | ||||
| MIX_COLUMN_MATRIX = ((0x2, 0x3, 0x1, 0x1), | ||||
|                      (0x1, 0x2, 0x3, 0x1), | ||||
|                      (0x1, 0x1, 0x2, 0x3), | ||||
|                      (0x3, 0x1, 0x1, 0x2)) | ||||
| MIX_COLUMN_MATRIX_INV = ((0xE, 0xB, 0xD, 0x9), | ||||
|                          (0x9, 0xE, 0xB, 0xD), | ||||
|                          (0xD, 0x9, 0xE, 0xB), | ||||
|                          (0xB, 0xD, 0x9, 0xE)) | ||||
| RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35, | ||||
|                       0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA, | ||||
|                       0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31, | ||||
|                       0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD, | ||||
|                       0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88, | ||||
|                       0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A, | ||||
|                       0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3, | ||||
|                       0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0, | ||||
|                       0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41, | ||||
|                       0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75, | ||||
|                       0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80, | ||||
|                       0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54, | ||||
|                       0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA, | ||||
|                       0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E, | ||||
|                       0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17, | ||||
|                       0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01) | ||||
| RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03, | ||||
|                       0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1, | ||||
|                       0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78, | ||||
|                       0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e, | ||||
|                       0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38, | ||||
|                       0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10, | ||||
|                       0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba, | ||||
|                       0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57, | ||||
|                       0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8, | ||||
|                       0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0, | ||||
|                       0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7, | ||||
|                       0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d, | ||||
|                       0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1, | ||||
|                       0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab, | ||||
|                       0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5, | ||||
|                       0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07) | ||||
|  | ||||
|  | ||||
| def sub_bytes(data): | ||||
|     return [SBOX[x] for x in data] | ||||
|  | ||||
|  | ||||
| def sub_bytes_inv(data): | ||||
|     return [SBOX_INV[x] for x in data] | ||||
|  | ||||
|  | ||||
| def rotate(data): | ||||
|     return data[1:] + [data[0]] | ||||
|  | ||||
|  | ||||
| def key_schedule_core(data, rcon_iteration): | ||||
|     data = rotate(data) | ||||
|     data = sub_bytes(data) | ||||
|     data[0] = data[0] ^ RCON[rcon_iteration] | ||||
|  | ||||
|     return data | ||||
|  | ||||
|  | ||||
| def xor(data1, data2): | ||||
|     return [x ^ y for x, y in zip(data1, data2)] | ||||
|  | ||||
|  | ||||
| def rijndael_mul(a, b): | ||||
|     if(a == 0 or b == 0): | ||||
|         return 0 | ||||
|     return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF] | ||||
|  | ||||
|  | ||||
| def mix_column(data, matrix): | ||||
|     data_mixed = [] | ||||
|     for row in range(4): | ||||
|         mixed = 0 | ||||
|         for column in range(4): | ||||
|             # xor is (+) and (-) | ||||
|             mixed ^= rijndael_mul(data[column], matrix[row][column]) | ||||
|         data_mixed.append(mixed) | ||||
|     return data_mixed | ||||
|  | ||||
|  | ||||
| def mix_columns(data, matrix=MIX_COLUMN_MATRIX): | ||||
|     data_mixed = [] | ||||
|     for i in range(4): | ||||
|         column = data[i * 4: (i + 1) * 4] | ||||
|         data_mixed += mix_column(column, matrix) | ||||
|     return data_mixed | ||||
|  | ||||
|  | ||||
| def mix_columns_inv(data): | ||||
|     return mix_columns(data, MIX_COLUMN_MATRIX_INV) | ||||
|  | ||||
|  | ||||
| def shift_rows(data): | ||||
|     data_shifted = [] | ||||
|     for column in range(4): | ||||
|         for row in range(4): | ||||
|             data_shifted.append(data[((column + row) & 0b11) * 4 + row]) | ||||
|     return data_shifted | ||||
|  | ||||
|  | ||||
| def shift_rows_inv(data): | ||||
|     data_shifted = [] | ||||
|     for column in range(4): | ||||
|         for row in range(4): | ||||
|             data_shifted.append(data[((column - row) & 0b11) * 4 + row]) | ||||
|     return data_shifted | ||||
|  | ||||
|  | ||||
| def inc(data): | ||||
|     data = data[:]  # copy | ||||
|     for i in range(len(data) - 1, -1, -1): | ||||
|         if data[i] == 255: | ||||
|             data[i] = 0 | ||||
|         else: | ||||
|             data[i] = data[i] + 1 | ||||
|             break | ||||
|     return data | ||||
|  | ||||
|  | ||||
| __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text'] | ||||
							
								
								
									
										96
									
								
								youtube_dl/cache.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								youtube_dl/cache.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,96 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import errno | ||||
| import io | ||||
| import json | ||||
| import os | ||||
| import re | ||||
| import shutil | ||||
| import traceback | ||||
|  | ||||
| from .compat import compat_getenv | ||||
| from .utils import ( | ||||
|     expand_path, | ||||
|     write_json_file, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class Cache(object): | ||||
|     def __init__(self, ydl): | ||||
|         self._ydl = ydl | ||||
|  | ||||
|     def _get_root_dir(self): | ||||
|         res = self._ydl.params.get('cachedir') | ||||
|         if res is None: | ||||
|             cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache') | ||||
|             res = os.path.join(cache_root, 'youtube-dlc') | ||||
|         return expand_path(res) | ||||
|  | ||||
|     def _get_cache_fn(self, section, key, dtype): | ||||
|         assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \ | ||||
|             'invalid section %r' % section | ||||
|         assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key | ||||
|         return os.path.join( | ||||
|             self._get_root_dir(), section, '%s.%s' % (key, dtype)) | ||||
|  | ||||
|     @property | ||||
|     def enabled(self): | ||||
|         return self._ydl.params.get('cachedir') is not False | ||||
|  | ||||
|     def store(self, section, key, data, dtype='json'): | ||||
|         assert dtype in ('json',) | ||||
|  | ||||
|         if not self.enabled: | ||||
|             return | ||||
|  | ||||
|         fn = self._get_cache_fn(section, key, dtype) | ||||
|         try: | ||||
|             try: | ||||
|                 os.makedirs(os.path.dirname(fn)) | ||||
|             except OSError as ose: | ||||
|                 if ose.errno != errno.EEXIST: | ||||
|                     raise | ||||
|             write_json_file(data, fn) | ||||
|         except Exception: | ||||
|             tb = traceback.format_exc() | ||||
|             self._ydl.report_warning( | ||||
|                 'Writing cache to %r failed: %s' % (fn, tb)) | ||||
|  | ||||
|     def load(self, section, key, dtype='json', default=None): | ||||
|         assert dtype in ('json',) | ||||
|  | ||||
|         if not self.enabled: | ||||
|             return default | ||||
|  | ||||
|         cache_fn = self._get_cache_fn(section, key, dtype) | ||||
|         try: | ||||
|             try: | ||||
|                 with io.open(cache_fn, 'r', encoding='utf-8') as cachef: | ||||
|                     return json.load(cachef) | ||||
|             except ValueError: | ||||
|                 try: | ||||
|                     file_size = os.path.getsize(cache_fn) | ||||
|                 except (OSError, IOError) as oe: | ||||
|                     file_size = str(oe) | ||||
|                 self._ydl.report_warning( | ||||
|                     'Cache retrieval from %s failed (%s)' % (cache_fn, file_size)) | ||||
|         except IOError: | ||||
|             pass  # No cache available | ||||
|  | ||||
|         return default | ||||
|  | ||||
|     def remove(self): | ||||
|         if not self.enabled: | ||||
|             self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)') | ||||
|             return | ||||
|  | ||||
|         cachedir = self._get_root_dir() | ||||
|         if not any((term in cachedir) for term in ('cache', 'tmp')): | ||||
|             raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir) | ||||
|  | ||||
|         self._ydl.to_screen( | ||||
|             'Removing cache dir %s .' % cachedir, skip_eol=True) | ||||
|         if os.path.exists(cachedir): | ||||
|             self._ydl.to_screen('.', skip_eol=True) | ||||
|             shutil.rmtree(cachedir) | ||||
|         self._ydl.to_screen('.') | ||||
							
								
								
									
										3050
									
								
								youtube_dl/compat.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3050
									
								
								youtube_dl/compat.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										63
									
								
								youtube_dl/downloader/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								youtube_dl/downloader/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,63 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from .f4m import F4mFD | ||||
| from .hls import HlsFD | ||||
| from .http import HttpFD | ||||
| from .rtmp import RtmpFD | ||||
| from .dash import DashSegmentsFD | ||||
| from .rtsp import RtspFD | ||||
| from .ism import IsmFD | ||||
| from .youtube_live_chat import YoutubeLiveChatReplayFD | ||||
| from .external import ( | ||||
|     get_external_downloader, | ||||
|     FFmpegFD, | ||||
| ) | ||||
|  | ||||
| from ..utils import ( | ||||
|     determine_protocol, | ||||
| ) | ||||
|  | ||||
| PROTOCOL_MAP = { | ||||
|     'rtmp': RtmpFD, | ||||
|     'm3u8_native': HlsFD, | ||||
|     'm3u8': FFmpegFD, | ||||
|     'mms': RtspFD, | ||||
|     'rtsp': RtspFD, | ||||
|     'f4m': F4mFD, | ||||
|     'http_dash_segments': DashSegmentsFD, | ||||
|     'ism': IsmFD, | ||||
|     'youtube_live_chat_replay': YoutubeLiveChatReplayFD, | ||||
| } | ||||
|  | ||||
|  | ||||
| def get_suitable_downloader(info_dict, params={}): | ||||
|     """Get the downloader class that can handle the info dict.""" | ||||
|     protocol = determine_protocol(info_dict) | ||||
|     info_dict['protocol'] = protocol | ||||
|  | ||||
|     # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict): | ||||
|     #     return FFmpegFD | ||||
|  | ||||
|     external_downloader = params.get('external_downloader') | ||||
|     if external_downloader is not None: | ||||
|         ed = get_external_downloader(external_downloader) | ||||
|         if ed.can_download(info_dict): | ||||
|             return ed | ||||
|  | ||||
|     if protocol.startswith('m3u8') and info_dict.get('is_live'): | ||||
|         return FFmpegFD | ||||
|  | ||||
|     if protocol == 'm3u8' and params.get('hls_prefer_native') is True: | ||||
|         return HlsFD | ||||
|  | ||||
|     if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False: | ||||
|         return FFmpegFD | ||||
|  | ||||
|     return PROTOCOL_MAP.get(protocol, HttpFD) | ||||
|  | ||||
|  | ||||
| __all__ = [ | ||||
|     'get_suitable_downloader', | ||||
|     'FileDownloader', | ||||
| ] | ||||
							
								
								
									
										391
									
								
								youtube_dl/downloader/common.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										391
									
								
								youtube_dl/downloader/common.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,391 @@ | ||||
| from __future__ import division, unicode_literals | ||||
|  | ||||
| import os | ||||
| import re | ||||
| import sys | ||||
| import time | ||||
| import random | ||||
|  | ||||
| from ..compat import compat_os_name | ||||
| from ..utils import ( | ||||
|     decodeArgument, | ||||
|     encodeFilename, | ||||
|     error_to_compat_str, | ||||
|     format_bytes, | ||||
|     shell_quote, | ||||
|     timeconvert, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FileDownloader(object): | ||||
|     """File Downloader class. | ||||
|  | ||||
|     File downloader objects are the ones responsible of downloading the | ||||
|     actual video file and writing it to disk. | ||||
|  | ||||
|     File downloaders accept a lot of parameters. In order not to saturate | ||||
|     the object constructor with arguments, it receives a dictionary of | ||||
|     options instead. | ||||
|  | ||||
|     Available options: | ||||
|  | ||||
|     verbose:            Print additional info to stdout. | ||||
|     quiet:              Do not print messages to stdout. | ||||
|     ratelimit:          Download speed limit, in bytes/sec. | ||||
|     retries:            Number of times to retry for HTTP error 5xx | ||||
|     buffersize:         Size of download buffer in bytes. | ||||
|     noresizebuffer:     Do not automatically resize the download buffer. | ||||
|     continuedl:         Try to continue downloads if possible. | ||||
|     noprogress:         Do not print the progress bar. | ||||
|     logtostderr:        Log messages to stderr instead of stdout. | ||||
|     consoletitle:       Display progress in console window's titlebar. | ||||
|     nopart:             Do not use temporary .part files. | ||||
|     updatetime:         Use the Last-modified header to set output file timestamps. | ||||
|     test:               Download only first bytes to test the downloader. | ||||
|     min_filesize:       Skip files smaller than this size | ||||
|     max_filesize:       Skip files larger than this size | ||||
|     xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. | ||||
|     external_downloader_args:  A list of additional command-line arguments for the | ||||
|                         external downloader. | ||||
|     hls_use_mpegts:     Use the mpegts container for HLS videos. | ||||
|     http_chunk_size:    Size of a chunk for chunk-based HTTP downloading. May be | ||||
|                         useful for bypassing bandwidth throttling imposed by | ||||
|                         a webserver (experimental) | ||||
|  | ||||
|     Subclasses of this one must re-define the real_download method. | ||||
|     """ | ||||
|  | ||||
|     _TEST_FILE_SIZE = 10241 | ||||
|     params = None | ||||
|  | ||||
|     def __init__(self, ydl, params): | ||||
|         """Create a FileDownloader object with the given options.""" | ||||
|         self.ydl = ydl | ||||
|         self._progress_hooks = [] | ||||
|         self.params = params | ||||
|         self.add_progress_hook(self.report_progress) | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_seconds(seconds): | ||||
|         (mins, secs) = divmod(seconds, 60) | ||||
|         (hours, mins) = divmod(mins, 60) | ||||
|         if hours > 99: | ||||
|             return '--:--:--' | ||||
|         if hours == 0: | ||||
|             return '%02d:%02d' % (mins, secs) | ||||
|         else: | ||||
|             return '%02d:%02d:%02d' % (hours, mins, secs) | ||||
|  | ||||
|     @staticmethod | ||||
|     def calc_percent(byte_counter, data_len): | ||||
|         if data_len is None: | ||||
|             return None | ||||
|         return float(byte_counter) / float(data_len) * 100.0 | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_percent(percent): | ||||
|         if percent is None: | ||||
|             return '---.-%' | ||||
|         return '%6s' % ('%3.1f%%' % percent) | ||||
|  | ||||
|     @staticmethod | ||||
|     def calc_eta(start, now, total, current): | ||||
|         if total is None: | ||||
|             return None | ||||
|         if now is None: | ||||
|             now = time.time() | ||||
|         dif = now - start | ||||
|         if current == 0 or dif < 0.001:  # One millisecond | ||||
|             return None | ||||
|         rate = float(current) / dif | ||||
|         return int((float(total) - float(current)) / rate) | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_eta(eta): | ||||
|         if eta is None: | ||||
|             return '--:--' | ||||
|         return FileDownloader.format_seconds(eta) | ||||
|  | ||||
|     @staticmethod | ||||
|     def calc_speed(start, now, bytes): | ||||
|         dif = now - start | ||||
|         if bytes == 0 or dif < 0.001:  # One millisecond | ||||
|             return None | ||||
|         return float(bytes) / dif | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_speed(speed): | ||||
|         if speed is None: | ||||
|             return '%10s' % '---b/s' | ||||
|         return '%10s' % ('%s/s' % format_bytes(speed)) | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_retries(retries): | ||||
|         return 'inf' if retries == float('inf') else '%.0f' % retries | ||||
|  | ||||
|     @staticmethod | ||||
|     def best_block_size(elapsed_time, bytes): | ||||
|         new_min = max(bytes / 2.0, 1.0) | ||||
|         new_max = min(max(bytes * 2.0, 1.0), 4194304)  # Do not surpass 4 MB | ||||
|         if elapsed_time < 0.001: | ||||
|             return int(new_max) | ||||
|         rate = bytes / elapsed_time | ||||
|         if rate > new_max: | ||||
|             return int(new_max) | ||||
|         if rate < new_min: | ||||
|             return int(new_min) | ||||
|         return int(rate) | ||||
|  | ||||
|     @staticmethod | ||||
|     def parse_bytes(bytestr): | ||||
|         """Parse a string indicating a byte quantity into an integer.""" | ||||
|         matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) | ||||
|         if matchobj is None: | ||||
|             return None | ||||
|         number = float(matchobj.group(1)) | ||||
|         multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) | ||||
|         return int(round(number * multiplier)) | ||||
|  | ||||
|     def to_screen(self, *args, **kargs): | ||||
|         self.ydl.to_screen(*args, **kargs) | ||||
|  | ||||
|     def to_stderr(self, message): | ||||
|         self.ydl.to_screen(message) | ||||
|  | ||||
|     def to_console_title(self, message): | ||||
|         self.ydl.to_console_title(message) | ||||
|  | ||||
|     def trouble(self, *args, **kargs): | ||||
|         self.ydl.trouble(*args, **kargs) | ||||
|  | ||||
|     def report_warning(self, *args, **kargs): | ||||
|         self.ydl.report_warning(*args, **kargs) | ||||
|  | ||||
|     def report_error(self, *args, **kargs): | ||||
|         self.ydl.report_error(*args, **kargs) | ||||
|  | ||||
|     def slow_down(self, start_time, now, byte_counter): | ||||
|         """Sleep if the download speed is over the rate limit.""" | ||||
|         rate_limit = self.params.get('ratelimit') | ||||
|         if rate_limit is None or byte_counter == 0: | ||||
|             return | ||||
|         if now is None: | ||||
|             now = time.time() | ||||
|         elapsed = now - start_time | ||||
|         if elapsed <= 0.0: | ||||
|             return | ||||
|         speed = float(byte_counter) / elapsed | ||||
|         if speed > rate_limit: | ||||
|             sleep_time = float(byte_counter) / rate_limit - elapsed | ||||
|             if sleep_time > 0: | ||||
|                 time.sleep(sleep_time) | ||||
|  | ||||
|     def temp_name(self, filename): | ||||
|         """Returns a temporary filename for the given filename.""" | ||||
|         if self.params.get('nopart', False) or filename == '-' or \ | ||||
|                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): | ||||
|             return filename | ||||
|         return filename + '.part' | ||||
|  | ||||
|     def undo_temp_name(self, filename): | ||||
|         if filename.endswith('.part'): | ||||
|             return filename[:-len('.part')] | ||||
|         return filename | ||||
|  | ||||
|     def ytdl_filename(self, filename): | ||||
|         return filename + '.ytdl' | ||||
|  | ||||
|     def try_rename(self, old_filename, new_filename): | ||||
|         try: | ||||
|             if old_filename == new_filename: | ||||
|                 return | ||||
|             os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) | ||||
|         except (IOError, OSError) as err: | ||||
|             self.report_error('unable to rename file: %s' % error_to_compat_str(err)) | ||||
|  | ||||
|     def try_utime(self, filename, last_modified_hdr): | ||||
|         """Try to set the last-modified time of the given file.""" | ||||
|         if last_modified_hdr is None: | ||||
|             return | ||||
|         if not os.path.isfile(encodeFilename(filename)): | ||||
|             return | ||||
|         timestr = last_modified_hdr | ||||
|         if timestr is None: | ||||
|             return | ||||
|         filetime = timeconvert(timestr) | ||||
|         if filetime is None: | ||||
|             return filetime | ||||
|         # Ignore obviously invalid dates | ||||
|         if filetime == 0: | ||||
|             return | ||||
|         try: | ||||
|             os.utime(filename, (time.time(), filetime)) | ||||
|         except Exception: | ||||
|             pass | ||||
|         return filetime | ||||
|  | ||||
|     def report_destination(self, filename): | ||||
|         """Report destination filename.""" | ||||
|         self.to_screen('[download] Destination: ' + filename) | ||||
|  | ||||
|     def _report_progress_status(self, msg, is_last_line=False): | ||||
|         fullmsg = '[download] ' + msg | ||||
|         if self.params.get('progress_with_newline', False): | ||||
|             self.to_screen(fullmsg) | ||||
|         else: | ||||
|             if compat_os_name == 'nt': | ||||
|                 prev_len = getattr(self, '_report_progress_prev_line_length', | ||||
|                                    0) | ||||
|                 if prev_len > len(fullmsg): | ||||
|                     fullmsg += ' ' * (prev_len - len(fullmsg)) | ||||
|                 self._report_progress_prev_line_length = len(fullmsg) | ||||
|                 clear_line = '\r' | ||||
|             else: | ||||
|                 clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r') | ||||
|             self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) | ||||
|         self.to_console_title('youtube-dlc ' + msg) | ||||
|  | ||||
|     def report_progress(self, s): | ||||
|         if s['status'] == 'finished': | ||||
|             if self.params.get('noprogress', False): | ||||
|                 self.to_screen('[download] Download completed') | ||||
|             else: | ||||
|                 msg_template = '100%%' | ||||
|                 if s.get('total_bytes') is not None: | ||||
|                     s['_total_bytes_str'] = format_bytes(s['total_bytes']) | ||||
|                     msg_template += ' of %(_total_bytes_str)s' | ||||
|                 if s.get('elapsed') is not None: | ||||
|                     s['_elapsed_str'] = self.format_seconds(s['elapsed']) | ||||
|                     msg_template += ' in %(_elapsed_str)s' | ||||
|                 self._report_progress_status( | ||||
|                     msg_template % s, is_last_line=True) | ||||
|  | ||||
|         if self.params.get('noprogress'): | ||||
|             return | ||||
|  | ||||
|         if s['status'] != 'downloading': | ||||
|             return | ||||
|  | ||||
|         if s.get('eta') is not None: | ||||
|             s['_eta_str'] = self.format_eta(s['eta']) | ||||
|         else: | ||||
|             s['_eta_str'] = 'Unknown ETA' | ||||
|  | ||||
|         if s.get('total_bytes') and s.get('downloaded_bytes') is not None: | ||||
|             s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes']) | ||||
|         elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None: | ||||
|             s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate']) | ||||
|         else: | ||||
|             if s.get('downloaded_bytes') == 0: | ||||
|                 s['_percent_str'] = self.format_percent(0) | ||||
|             else: | ||||
|                 s['_percent_str'] = 'Unknown %' | ||||
|  | ||||
|         if s.get('speed') is not None: | ||||
|             s['_speed_str'] = self.format_speed(s['speed']) | ||||
|         else: | ||||
|             s['_speed_str'] = 'Unknown speed' | ||||
|  | ||||
|         if s.get('total_bytes') is not None: | ||||
|             s['_total_bytes_str'] = format_bytes(s['total_bytes']) | ||||
|             msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s' | ||||
|         elif s.get('total_bytes_estimate') is not None: | ||||
|             s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate']) | ||||
|             msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s' | ||||
|         else: | ||||
|             if s.get('downloaded_bytes') is not None: | ||||
|                 s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes']) | ||||
|                 if s.get('elapsed'): | ||||
|                     s['_elapsed_str'] = self.format_seconds(s['elapsed']) | ||||
|                     msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)' | ||||
|                 else: | ||||
|                     msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' | ||||
|             else: | ||||
|                 msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' | ||||
|  | ||||
|         self._report_progress_status(msg_template % s) | ||||
|  | ||||
|     def report_resuming_byte(self, resume_len): | ||||
|         """Report attempt to resume at given byte.""" | ||||
|         self.to_screen('[download] Resuming download at byte %s' % resume_len) | ||||
|  | ||||
|     def report_retry(self, err, count, retries): | ||||
|         """Report retry in case of HTTP error 5xx""" | ||||
|         self.to_screen( | ||||
|             '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...' | ||||
|             % (error_to_compat_str(err), count, self.format_retries(retries))) | ||||
|  | ||||
|     def report_file_already_downloaded(self, file_name): | ||||
|         """Report file has already been fully downloaded.""" | ||||
|         try: | ||||
|             self.to_screen('[download] %s has already been downloaded' % file_name) | ||||
|         except UnicodeEncodeError: | ||||
|             self.to_screen('[download] The file has already been downloaded') | ||||
|  | ||||
|     def report_unable_to_resume(self): | ||||
|         """Report it was impossible to resume download.""" | ||||
|         self.to_screen('[download] Unable to resume') | ||||
|  | ||||
|     def download(self, filename, info_dict): | ||||
|         """Download to a filename using the info from info_dict | ||||
|         Return True on success and False otherwise | ||||
|         """ | ||||
|  | ||||
|         nooverwrites_and_exists = ( | ||||
|             self.params.get('nooverwrites', False) | ||||
|             and os.path.exists(encodeFilename(filename)) | ||||
|         ) | ||||
|  | ||||
|         if not hasattr(filename, 'write'): | ||||
|             continuedl_and_exists = ( | ||||
|                 self.params.get('continuedl', True) | ||||
|                 and os.path.isfile(encodeFilename(filename)) | ||||
|                 and not self.params.get('nopart', False) | ||||
|             ) | ||||
|  | ||||
|             # Check file already present | ||||
|             if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists): | ||||
|                 self.report_file_already_downloaded(filename) | ||||
|                 self._hook_progress({ | ||||
|                     'filename': filename, | ||||
|                     'status': 'finished', | ||||
|                     'total_bytes': os.path.getsize(encodeFilename(filename)), | ||||
|                 }) | ||||
|                 return True | ||||
|  | ||||
|         min_sleep_interval = self.params.get('sleep_interval') | ||||
|         if min_sleep_interval: | ||||
|             max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) | ||||
|             sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) | ||||
|             self.to_screen( | ||||
|                 '[download] Sleeping %s seconds...' % ( | ||||
|                     int(sleep_interval) if sleep_interval.is_integer() | ||||
|                     else '%.2f' % sleep_interval)) | ||||
|             time.sleep(sleep_interval) | ||||
|  | ||||
|         return self.real_download(filename, info_dict) | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         """Real download process. Redefine in subclasses.""" | ||||
|         raise NotImplementedError('This method must be implemented by subclasses') | ||||
|  | ||||
|     def _hook_progress(self, status): | ||||
|         for ph in self._progress_hooks: | ||||
|             ph(status) | ||||
|  | ||||
|     def add_progress_hook(self, ph): | ||||
|         # See YoutubeDl.py (search for progress_hooks) for a description of | ||||
|         # this interface | ||||
|         self._progress_hooks.append(ph) | ||||
|  | ||||
|     def _debug_cmd(self, args, exe=None): | ||||
|         if not self.params.get('verbose', False): | ||||
|             return | ||||
|  | ||||
|         str_args = [decodeArgument(a) for a in args] | ||||
|  | ||||
|         if exe is None: | ||||
|             exe = os.path.basename(str_args[0]) | ||||
|  | ||||
|         self.to_screen('[debug] %s command line: %s' % ( | ||||
|             exe, shell_quote(str_args))) | ||||
							
								
								
									
										80
									
								
								youtube_dl/downloader/dash.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								youtube_dl/downloader/dash.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .fragment import FragmentFD | ||||
| from ..compat import compat_urllib_error | ||||
| from ..utils import ( | ||||
|     DownloadError, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DashSegmentsFD(FragmentFD): | ||||
|     """ | ||||
|     Download segments in a DASH manifest | ||||
|     """ | ||||
|  | ||||
|     FD_NAME = 'dashsegments' | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         fragment_base_url = info_dict.get('fragment_base_url') | ||||
|         fragments = info_dict['fragments'][:1] if self.params.get( | ||||
|             'test', False) else info_dict['fragments'] | ||||
|  | ||||
|         ctx = { | ||||
|             'filename': filename, | ||||
|             'total_frags': len(fragments), | ||||
|         } | ||||
|  | ||||
|         self._prepare_and_start_frag_download(ctx) | ||||
|  | ||||
|         fragment_retries = self.params.get('fragment_retries', 0) | ||||
|         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) | ||||
|  | ||||
|         frag_index = 0 | ||||
|         for i, fragment in enumerate(fragments): | ||||
|             frag_index += 1 | ||||
|             if frag_index <= ctx['fragment_index']: | ||||
|                 continue | ||||
|             # In DASH, the first segment contains necessary headers to | ||||
|             # generate a valid MP4 file, so always abort for the first segment | ||||
|             fatal = i == 0 or not skip_unavailable_fragments | ||||
|             count = 0 | ||||
|             while count <= fragment_retries: | ||||
|                 try: | ||||
|                     fragment_url = fragment.get('url') | ||||
|                     if not fragment_url: | ||||
|                         assert fragment_base_url | ||||
|                         fragment_url = urljoin(fragment_base_url, fragment['path']) | ||||
|                     success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) | ||||
|                     if not success: | ||||
|                         return False | ||||
|                     self._append_fragment(ctx, frag_content) | ||||
|                     break | ||||
|                 except compat_urllib_error.HTTPError as err: | ||||
|                     # YouTube may often return 404 HTTP error for a fragment causing the | ||||
|                     # whole download to fail. However if the same fragment is immediately | ||||
|                     # retried with the same request data this usually succeeds (1-2 attempts | ||||
|                     # is usually enough) thus allowing to download the whole file successfully. | ||||
|                     # To be future-proof we will retry all fragments that fail with any | ||||
|                     # HTTP error. | ||||
|                     count += 1 | ||||
|                     if count <= fragment_retries: | ||||
|                         self.report_retry_fragment(err, frag_index, count, fragment_retries) | ||||
|                 except DownloadError: | ||||
|                     # Don't retry fragment if error occurred during HTTP downloading | ||||
|                     # itself since it has own retry settings | ||||
|                     if not fatal: | ||||
|                         self.report_skip_fragment(frag_index) | ||||
|                         break | ||||
|                     raise | ||||
|  | ||||
|             if count > fragment_retries: | ||||
|                 if not fatal: | ||||
|                     self.report_skip_fragment(frag_index) | ||||
|                     continue | ||||
|                 self.report_error('giving up after %s fragment retries' % fragment_retries) | ||||
|                 return False | ||||
|  | ||||
|         self._finish_frag_download(ctx) | ||||
|  | ||||
|         return True | ||||
							
								
								
									
										371
									
								
								youtube_dl/downloader/external.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										371
									
								
								youtube_dl/downloader/external.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,371 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import os.path | ||||
| import re | ||||
| import subprocess | ||||
| import sys | ||||
| import time | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from ..compat import ( | ||||
|     compat_setenv, | ||||
|     compat_str, | ||||
| ) | ||||
| from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS | ||||
| from ..utils import ( | ||||
|     cli_option, | ||||
|     cli_valueless_option, | ||||
|     cli_bool_option, | ||||
|     cli_configuration_args, | ||||
|     encodeFilename, | ||||
|     encodeArgument, | ||||
|     handle_youtubedl_headers, | ||||
|     check_executable, | ||||
|     is_outdated_version, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ExternalFD(FileDownloader): | ||||
|     def real_download(self, filename, info_dict): | ||||
|         self.report_destination(filename) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|  | ||||
|         try: | ||||
|             started = time.time() | ||||
|             retval = self._call_downloader(tmpfilename, info_dict) | ||||
|         except KeyboardInterrupt: | ||||
|             if not info_dict.get('is_live'): | ||||
|                 raise | ||||
|             # Live stream downloading cancellation should be considered as | ||||
|             # correct and expected termination thus all postprocessing | ||||
|             # should take place | ||||
|             retval = 0 | ||||
|             self.to_screen('[%s] Interrupted by user' % self.get_basename()) | ||||
|  | ||||
|         if retval == 0: | ||||
|             status = { | ||||
|                 'filename': filename, | ||||
|                 'status': 'finished', | ||||
|                 'elapsed': time.time() - started, | ||||
|             } | ||||
|             if filename != '-': | ||||
|                 fsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|                 self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) | ||||
|                 self.try_rename(tmpfilename, filename) | ||||
|                 status.update({ | ||||
|                     'downloaded_bytes': fsize, | ||||
|                     'total_bytes': fsize, | ||||
|                 }) | ||||
|             self._hook_progress(status) | ||||
|             return True | ||||
|         else: | ||||
|             self.to_stderr('\n') | ||||
|             self.report_error('%s exited with code %d' % ( | ||||
|                 self.get_basename(), retval)) | ||||
|             return False | ||||
|  | ||||
|     @classmethod | ||||
|     def get_basename(cls): | ||||
|         return cls.__name__[:-2].lower() | ||||
|  | ||||
|     @property | ||||
|     def exe(self): | ||||
|         return self.params.get('external_downloader') | ||||
|  | ||||
|     @classmethod | ||||
|     def available(cls): | ||||
|         return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT]) | ||||
|  | ||||
|     @classmethod | ||||
|     def supports(cls, info_dict): | ||||
|         return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') | ||||
|  | ||||
|     @classmethod | ||||
|     def can_download(cls, info_dict): | ||||
|         return cls.available() and cls.supports(info_dict) | ||||
|  | ||||
|     def _option(self, command_option, param): | ||||
|         return cli_option(self.params, command_option, param) | ||||
|  | ||||
|     def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None): | ||||
|         return cli_bool_option(self.params, command_option, param, true_value, false_value, separator) | ||||
|  | ||||
|     def _valueless_option(self, command_option, param, expected_value=True): | ||||
|         return cli_valueless_option(self.params, command_option, param, expected_value) | ||||
|  | ||||
|     def _configuration_args(self, default=[]): | ||||
|         return cli_configuration_args(self.params, 'external_downloader_args', default) | ||||
|  | ||||
|     def _call_downloader(self, tmpfilename, info_dict): | ||||
|         """ Either overwrite this or implement _make_cmd """ | ||||
|         cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] | ||||
|  | ||||
|         self._debug_cmd(cmd) | ||||
|  | ||||
|         p = subprocess.Popen( | ||||
|             cmd, stderr=subprocess.PIPE) | ||||
|         _, stderr = p.communicate() | ||||
|         if p.returncode != 0: | ||||
|             self.to_stderr(stderr.decode('utf-8', 'replace')) | ||||
|         return p.returncode | ||||
|  | ||||
|  | ||||
| class CurlFD(ExternalFD): | ||||
|     AVAILABLE_OPT = '-V' | ||||
|  | ||||
|     def _make_cmd(self, tmpfilename, info_dict): | ||||
|         cmd = [self.exe, '--location', '-o', tmpfilename] | ||||
|         for key, val in info_dict['http_headers'].items(): | ||||
|             cmd += ['--header', '%s: %s' % (key, val)] | ||||
|         cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') | ||||
|         cmd += self._valueless_option('--silent', 'noprogress') | ||||
|         cmd += self._valueless_option('--verbose', 'verbose') | ||||
|         cmd += self._option('--limit-rate', 'ratelimit') | ||||
|         retry = self._option('--retry', 'retries') | ||||
|         if len(retry) == 2: | ||||
|             if retry[1] in ('inf', 'infinite'): | ||||
|                 retry[1] = '2147483647' | ||||
|             cmd += retry | ||||
|         cmd += self._option('--max-filesize', 'max_filesize') | ||||
|         cmd += self._option('--interface', 'source_address') | ||||
|         cmd += self._option('--proxy', 'proxy') | ||||
|         cmd += self._valueless_option('--insecure', 'nocheckcertificate') | ||||
|         cmd += self._configuration_args() | ||||
|         cmd += ['--', info_dict['url']] | ||||
|         return cmd | ||||
|  | ||||
|     def _call_downloader(self, tmpfilename, info_dict): | ||||
|         cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] | ||||
|  | ||||
|         self._debug_cmd(cmd) | ||||
|  | ||||
|         # curl writes the progress to stderr so don't capture it. | ||||
|         p = subprocess.Popen(cmd) | ||||
|         p.communicate() | ||||
|         return p.returncode | ||||
|  | ||||
|  | ||||
| class AxelFD(ExternalFD): | ||||
|     AVAILABLE_OPT = '-V' | ||||
|  | ||||
|     def _make_cmd(self, tmpfilename, info_dict): | ||||
|         cmd = [self.exe, '-o', tmpfilename] | ||||
|         for key, val in info_dict['http_headers'].items(): | ||||
|             cmd += ['-H', '%s: %s' % (key, val)] | ||||
|         cmd += self._configuration_args() | ||||
|         cmd += ['--', info_dict['url']] | ||||
|         return cmd | ||||
|  | ||||
|  | ||||
| class WgetFD(ExternalFD): | ||||
|     AVAILABLE_OPT = '--version' | ||||
|  | ||||
|     def _make_cmd(self, tmpfilename, info_dict): | ||||
|         cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] | ||||
|         for key, val in info_dict['http_headers'].items(): | ||||
|             cmd += ['--header', '%s: %s' % (key, val)] | ||||
|         cmd += self._option('--limit-rate', 'ratelimit') | ||||
|         retry = self._option('--tries', 'retries') | ||||
|         if len(retry) == 2: | ||||
|             if retry[1] in ('inf', 'infinite'): | ||||
|                 retry[1] = '0' | ||||
|             cmd += retry | ||||
|         cmd += self._option('--bind-address', 'source_address') | ||||
|         cmd += self._option('--proxy', 'proxy') | ||||
|         cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') | ||||
|         cmd += self._configuration_args() | ||||
|         cmd += ['--', info_dict['url']] | ||||
|         return cmd | ||||
|  | ||||
|  | ||||
| class Aria2cFD(ExternalFD): | ||||
|     AVAILABLE_OPT = '-v' | ||||
|  | ||||
|     def _make_cmd(self, tmpfilename, info_dict): | ||||
|         cmd = [self.exe, '-c'] | ||||
|         cmd += self._configuration_args([ | ||||
|             '--min-split-size', '1M', '--max-connection-per-server', '4']) | ||||
|         dn = os.path.dirname(tmpfilename) | ||||
|         if dn: | ||||
|             cmd += ['--dir', dn] | ||||
|         cmd += ['--out', os.path.basename(tmpfilename)] | ||||
|         for key, val in info_dict['http_headers'].items(): | ||||
|             cmd += ['--header', '%s: %s' % (key, val)] | ||||
|         cmd += self._option('--interface', 'source_address') | ||||
|         cmd += self._option('--all-proxy', 'proxy') | ||||
|         cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') | ||||
|         cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=') | ||||
|         cmd += ['--', info_dict['url']] | ||||
|         return cmd | ||||
|  | ||||
|  | ||||
| class HttpieFD(ExternalFD): | ||||
|     @classmethod | ||||
|     def available(cls): | ||||
|         return check_executable('http', ['--version']) | ||||
|  | ||||
|     def _make_cmd(self, tmpfilename, info_dict): | ||||
|         cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] | ||||
|         for key, val in info_dict['http_headers'].items(): | ||||
|             cmd += ['%s:%s' % (key, val)] | ||||
|         return cmd | ||||
|  | ||||
|  | ||||
| class FFmpegFD(ExternalFD): | ||||
|     @classmethod | ||||
|     def supports(cls, info_dict): | ||||
|         return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms') | ||||
|  | ||||
|     @classmethod | ||||
|     def available(cls): | ||||
|         return FFmpegPostProcessor().available | ||||
|  | ||||
|     def _call_downloader(self, tmpfilename, info_dict): | ||||
|         url = info_dict['url'] | ||||
|         ffpp = FFmpegPostProcessor(downloader=self) | ||||
|         if not ffpp.available: | ||||
|             self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') | ||||
|             return False | ||||
|         ffpp.check_version() | ||||
|  | ||||
|         args = [ffpp.executable, '-y'] | ||||
|  | ||||
|         for log_level in ('quiet', 'verbose'): | ||||
|             if self.params.get(log_level, False): | ||||
|                 args += ['-loglevel', log_level] | ||||
|                 break | ||||
|  | ||||
|         seekable = info_dict.get('_seekable') | ||||
|         if seekable is not None: | ||||
|             # setting -seekable prevents ffmpeg from guessing if the server | ||||
|             # supports seeking(by adding the header `Range: bytes=0-`), which | ||||
|             # can cause problems in some cases | ||||
|             # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127 | ||||
|             # http://trac.ffmpeg.org/ticket/6125#comment:10 | ||||
|             args += ['-seekable', '1' if seekable else '0'] | ||||
|  | ||||
|         args += self._configuration_args() | ||||
|  | ||||
|         # start_time = info_dict.get('start_time') or 0 | ||||
|         # if start_time: | ||||
|         #     args += ['-ss', compat_str(start_time)] | ||||
|         # end_time = info_dict.get('end_time') | ||||
|         # if end_time: | ||||
|         #     args += ['-t', compat_str(end_time - start_time)] | ||||
|  | ||||
|         if info_dict['http_headers'] and re.match(r'^https?://', url): | ||||
|             # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: | ||||
|             # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. | ||||
|             headers = handle_youtubedl_headers(info_dict['http_headers']) | ||||
|             args += [ | ||||
|                 '-headers', | ||||
|                 ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())] | ||||
|  | ||||
|         env = None | ||||
|         proxy = self.params.get('proxy') | ||||
|         if proxy: | ||||
|             if not re.match(r'^[\da-zA-Z]+://', proxy): | ||||
|                 proxy = 'http://%s' % proxy | ||||
|  | ||||
|             if proxy.startswith('socks'): | ||||
|                 self.report_warning( | ||||
|                     '%s does not support SOCKS proxies. Downloading is likely to fail. ' | ||||
|                     'Consider adding --hls-prefer-native to your command.' % self.get_basename()) | ||||
|  | ||||
|             # Since December 2015 ffmpeg supports -http_proxy option (see | ||||
|             # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) | ||||
|             # We could switch to the following code if we are able to detect version properly | ||||
|             # args += ['-http_proxy', proxy] | ||||
|             env = os.environ.copy() | ||||
|             compat_setenv('HTTP_PROXY', proxy, env=env) | ||||
|             compat_setenv('http_proxy', proxy, env=env) | ||||
|  | ||||
|         protocol = info_dict.get('protocol') | ||||
|  | ||||
|         if protocol == 'rtmp': | ||||
|             player_url = info_dict.get('player_url') | ||||
|             page_url = info_dict.get('page_url') | ||||
|             app = info_dict.get('app') | ||||
|             play_path = info_dict.get('play_path') | ||||
|             tc_url = info_dict.get('tc_url') | ||||
|             flash_version = info_dict.get('flash_version') | ||||
|             live = info_dict.get('rtmp_live', False) | ||||
|             conn = info_dict.get('rtmp_conn') | ||||
|             if player_url is not None: | ||||
|                 args += ['-rtmp_swfverify', player_url] | ||||
|             if page_url is not None: | ||||
|                 args += ['-rtmp_pageurl', page_url] | ||||
|             if app is not None: | ||||
|                 args += ['-rtmp_app', app] | ||||
|             if play_path is not None: | ||||
|                 args += ['-rtmp_playpath', play_path] | ||||
|             if tc_url is not None: | ||||
|                 args += ['-rtmp_tcurl', tc_url] | ||||
|             if flash_version is not None: | ||||
|                 args += ['-rtmp_flashver', flash_version] | ||||
|             if live: | ||||
|                 args += ['-rtmp_live', 'live'] | ||||
|             if isinstance(conn, list): | ||||
|                 for entry in conn: | ||||
|                     args += ['-rtmp_conn', entry] | ||||
|             elif isinstance(conn, compat_str): | ||||
|                 args += ['-rtmp_conn', conn] | ||||
|  | ||||
|         args += ['-i', url, '-c', 'copy'] | ||||
|  | ||||
|         if self.params.get('test', False): | ||||
|             args += ['-fs', compat_str(self._TEST_FILE_SIZE)] | ||||
|  | ||||
|         if protocol in ('m3u8', 'm3u8_native'): | ||||
|             if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': | ||||
|                 args += ['-f', 'mpegts'] | ||||
|             else: | ||||
|                 args += ['-f', 'mp4'] | ||||
|                 if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): | ||||
|                     args += ['-bsf:a', 'aac_adtstoasc'] | ||||
|         elif protocol == 'rtmp': | ||||
|             args += ['-f', 'flv'] | ||||
|         else: | ||||
|             args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])] | ||||
|  | ||||
|         args = [encodeArgument(opt) for opt in args] | ||||
|         args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) | ||||
|  | ||||
|         self._debug_cmd(args) | ||||
|  | ||||
|         proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env) | ||||
|         try: | ||||
|             retval = proc.wait() | ||||
|         except KeyboardInterrupt: | ||||
|             # subprocces.run would send the SIGKILL signal to ffmpeg and the | ||||
|             # mp4 file couldn't be played, but if we ask ffmpeg to quit it | ||||
|             # produces a file that is playable (this is mostly useful for live | ||||
|             # streams). Note that Windows is not affected and produces playable | ||||
|             # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). | ||||
|             if sys.platform != 'win32': | ||||
|                 proc.communicate(b'q') | ||||
|             raise | ||||
|         return retval | ||||
|  | ||||
|  | ||||
| class AVconvFD(FFmpegFD): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| _BY_NAME = dict( | ||||
|     (klass.get_basename(), klass) | ||||
|     for name, klass in globals().items() | ||||
|     if name.endswith('FD') and name != 'ExternalFD' | ||||
| ) | ||||
|  | ||||
|  | ||||
| def list_external_downloaders(): | ||||
|     return sorted(_BY_NAME.keys()) | ||||
|  | ||||
|  | ||||
| def get_external_downloader(external_downloader): | ||||
|     """ Given the name of the executable, see whether we support the given | ||||
|         downloader . """ | ||||
|     # Drop .exe extension on Windows | ||||
|     bn = os.path.splitext(os.path.basename(external_downloader))[0] | ||||
|     return _BY_NAME[bn] | ||||
							
								
								
									
										438
									
								
								youtube_dl/downloader/f4m.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										438
									
								
								youtube_dl/downloader/f4m.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,438 @@ | ||||
| from __future__ import division, unicode_literals | ||||
|  | ||||
| import io | ||||
| import itertools | ||||
| import time | ||||
|  | ||||
| from .fragment import FragmentFD | ||||
| from ..compat import ( | ||||
|     compat_b64decode, | ||||
|     compat_etree_fromstring, | ||||
|     compat_urlparse, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_struct_pack, | ||||
|     compat_struct_unpack, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     fix_xml_ampersands, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DataTruncatedError(Exception): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class FlvReader(io.BytesIO): | ||||
|     """ | ||||
|     Reader for Flv files | ||||
|     The file format is documented in https://www.adobe.com/devnet/f4v.html | ||||
|     """ | ||||
|  | ||||
|     def read_bytes(self, n): | ||||
|         data = self.read(n) | ||||
|         if len(data) < n: | ||||
|             raise DataTruncatedError( | ||||
|                 'FlvReader error: need %d bytes while only %d bytes got' % ( | ||||
|                     n, len(data))) | ||||
|         return data | ||||
|  | ||||
|     # Utility functions for reading numbers and strings | ||||
|     def read_unsigned_long_long(self): | ||||
|         return compat_struct_unpack('!Q', self.read_bytes(8))[0] | ||||
|  | ||||
|     def read_unsigned_int(self): | ||||
|         return compat_struct_unpack('!I', self.read_bytes(4))[0] | ||||
|  | ||||
|     def read_unsigned_char(self): | ||||
|         return compat_struct_unpack('!B', self.read_bytes(1))[0] | ||||
|  | ||||
|     def read_string(self): | ||||
|         res = b'' | ||||
|         while True: | ||||
|             char = self.read_bytes(1) | ||||
|             if char == b'\x00': | ||||
|                 break | ||||
|             res += char | ||||
|         return res | ||||
|  | ||||
|     def read_box_info(self): | ||||
|         """ | ||||
|         Read a box and return the info as a tuple: (box_size, box_type, box_data) | ||||
|         """ | ||||
|         real_size = size = self.read_unsigned_int() | ||||
|         box_type = self.read_bytes(4) | ||||
|         header_end = 8 | ||||
|         if size == 1: | ||||
|             real_size = self.read_unsigned_long_long() | ||||
|             header_end = 16 | ||||
|         return real_size, box_type, self.read_bytes(real_size - header_end) | ||||
|  | ||||
|     def read_asrt(self): | ||||
|         # version | ||||
|         self.read_unsigned_char() | ||||
|         # flags | ||||
|         self.read_bytes(3) | ||||
|         quality_entry_count = self.read_unsigned_char() | ||||
|         # QualityEntryCount | ||||
|         for i in range(quality_entry_count): | ||||
|             self.read_string() | ||||
|  | ||||
|         segment_run_count = self.read_unsigned_int() | ||||
|         segments = [] | ||||
|         for i in range(segment_run_count): | ||||
|             first_segment = self.read_unsigned_int() | ||||
|             fragments_per_segment = self.read_unsigned_int() | ||||
|             segments.append((first_segment, fragments_per_segment)) | ||||
|  | ||||
|         return { | ||||
|             'segment_run': segments, | ||||
|         } | ||||
|  | ||||
|     def read_afrt(self): | ||||
|         # version | ||||
|         self.read_unsigned_char() | ||||
|         # flags | ||||
|         self.read_bytes(3) | ||||
|         # time scale | ||||
|         self.read_unsigned_int() | ||||
|  | ||||
|         quality_entry_count = self.read_unsigned_char() | ||||
|         # QualitySegmentUrlModifiers | ||||
|         for i in range(quality_entry_count): | ||||
|             self.read_string() | ||||
|  | ||||
|         fragments_count = self.read_unsigned_int() | ||||
|         fragments = [] | ||||
|         for i in range(fragments_count): | ||||
|             first = self.read_unsigned_int() | ||||
|             first_ts = self.read_unsigned_long_long() | ||||
|             duration = self.read_unsigned_int() | ||||
|             if duration == 0: | ||||
|                 discontinuity_indicator = self.read_unsigned_char() | ||||
|             else: | ||||
|                 discontinuity_indicator = None | ||||
|             fragments.append({ | ||||
|                 'first': first, | ||||
|                 'ts': first_ts, | ||||
|                 'duration': duration, | ||||
|                 'discontinuity_indicator': discontinuity_indicator, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             'fragments': fragments, | ||||
|         } | ||||
|  | ||||
|     def read_abst(self): | ||||
|         # version | ||||
|         self.read_unsigned_char() | ||||
|         # flags | ||||
|         self.read_bytes(3) | ||||
|  | ||||
|         self.read_unsigned_int()  # BootstrapinfoVersion | ||||
|         # Profile,Live,Update,Reserved | ||||
|         flags = self.read_unsigned_char() | ||||
|         live = flags & 0x20 != 0 | ||||
|         # time scale | ||||
|         self.read_unsigned_int() | ||||
|         # CurrentMediaTime | ||||
|         self.read_unsigned_long_long() | ||||
|         # SmpteTimeCodeOffset | ||||
|         self.read_unsigned_long_long() | ||||
|  | ||||
|         self.read_string()  # MovieIdentifier | ||||
|         server_count = self.read_unsigned_char() | ||||
|         # ServerEntryTable | ||||
|         for i in range(server_count): | ||||
|             self.read_string() | ||||
|         quality_count = self.read_unsigned_char() | ||||
|         # QualityEntryTable | ||||
|         for i in range(quality_count): | ||||
|             self.read_string() | ||||
|         # DrmData | ||||
|         self.read_string() | ||||
|         # MetaData | ||||
|         self.read_string() | ||||
|  | ||||
|         segments_count = self.read_unsigned_char() | ||||
|         segments = [] | ||||
|         for i in range(segments_count): | ||||
|             box_size, box_type, box_data = self.read_box_info() | ||||
|             assert box_type == b'asrt' | ||||
|             segment = FlvReader(box_data).read_asrt() | ||||
|             segments.append(segment) | ||||
|         fragments_run_count = self.read_unsigned_char() | ||||
|         fragments = [] | ||||
|         for i in range(fragments_run_count): | ||||
|             box_size, box_type, box_data = self.read_box_info() | ||||
|             assert box_type == b'afrt' | ||||
|             fragments.append(FlvReader(box_data).read_afrt()) | ||||
|  | ||||
|         return { | ||||
|             'segments': segments, | ||||
|             'fragments': fragments, | ||||
|             'live': live, | ||||
|         } | ||||
|  | ||||
|     def read_bootstrap_info(self): | ||||
|         total_size, box_type, box_data = self.read_box_info() | ||||
|         assert box_type == b'abst' | ||||
|         return FlvReader(box_data).read_abst() | ||||
|  | ||||
|  | ||||
| def read_bootstrap_info(bootstrap_bytes): | ||||
|     return FlvReader(bootstrap_bytes).read_bootstrap_info() | ||||
|  | ||||
|  | ||||
| def build_fragments_list(boot_info): | ||||
|     """ Return a list of (segment, fragment) for each fragment in the video """ | ||||
|     res = [] | ||||
|     segment_run_table = boot_info['segments'][0] | ||||
|     fragment_run_entry_table = boot_info['fragments'][0]['fragments'] | ||||
|     first_frag_number = fragment_run_entry_table[0]['first'] | ||||
|     fragments_counter = itertools.count(first_frag_number) | ||||
|     for segment, fragments_count in segment_run_table['segment_run']: | ||||
|         # In some live HDS streams (for example Rai), `fragments_count` is | ||||
|         # abnormal and causing out-of-memory errors. It's OK to change the | ||||
|         # number of fragments for live streams as they are updated periodically | ||||
|         if fragments_count == 4294967295 and boot_info['live']: | ||||
|             fragments_count = 2 | ||||
|         for _ in range(fragments_count): | ||||
|             res.append((segment, next(fragments_counter))) | ||||
|  | ||||
|     if boot_info['live']: | ||||
|         res = res[-2:] | ||||
|  | ||||
|     return res | ||||
|  | ||||
|  | ||||
| def write_unsigned_int(stream, val): | ||||
|     stream.write(compat_struct_pack('!I', val)) | ||||
|  | ||||
|  | ||||
| def write_unsigned_int_24(stream, val): | ||||
|     stream.write(compat_struct_pack('!I', val)[1:]) | ||||
|  | ||||
|  | ||||
| def write_flv_header(stream): | ||||
|     """Writes the FLV header to stream""" | ||||
|     # FLV header | ||||
|     stream.write(b'FLV\x01') | ||||
|     stream.write(b'\x05') | ||||
|     stream.write(b'\x00\x00\x00\x09') | ||||
|     stream.write(b'\x00\x00\x00\x00') | ||||
|  | ||||
|  | ||||
| def write_metadata_tag(stream, metadata): | ||||
|     """Writes optional metadata tag to stream""" | ||||
|     SCRIPT_TAG = b'\x12' | ||||
|     FLV_TAG_HEADER_LEN = 11 | ||||
|  | ||||
|     if metadata: | ||||
|         stream.write(SCRIPT_TAG) | ||||
|         write_unsigned_int_24(stream, len(metadata)) | ||||
|         stream.write(b'\x00\x00\x00\x00\x00\x00\x00') | ||||
|         stream.write(metadata) | ||||
|         write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata)) | ||||
|  | ||||
|  | ||||
| def remove_encrypted_media(media): | ||||
|     return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib | ||||
|                                  and 'drmAdditionalHeaderSetId' not in e.attrib, | ||||
|                        media)) | ||||
|  | ||||
|  | ||||
| def _add_ns(prop, ver=1): | ||||
|     return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop) | ||||
|  | ||||
|  | ||||
| def get_base_url(manifest): | ||||
|     base_url = xpath_text( | ||||
|         manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)], | ||||
|         'base URL', default=None) | ||||
|     if base_url: | ||||
|         base_url = base_url.strip() | ||||
|     return base_url | ||||
|  | ||||
|  | ||||
| class F4mFD(FragmentFD): | ||||
|     """ | ||||
|     A downloader for f4m manifests or AdobeHDS. | ||||
|     """ | ||||
|  | ||||
|     FD_NAME = 'f4m' | ||||
|  | ||||
|     def _get_unencrypted_media(self, doc): | ||||
|         media = doc.findall(_add_ns('media')) | ||||
|         if not media: | ||||
|             self.report_error('No media found') | ||||
|         for e in (doc.findall(_add_ns('drmAdditionalHeader')) | ||||
|                   + doc.findall(_add_ns('drmAdditionalHeaderSet'))): | ||||
|             # If id attribute is missing it's valid for all media nodes | ||||
|             # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute | ||||
|             if 'id' not in e.attrib: | ||||
|                 self.report_error('Missing ID in f4m DRM') | ||||
|         media = remove_encrypted_media(media) | ||||
|         if not media: | ||||
|             self.report_error('Unsupported DRM') | ||||
|         return media | ||||
|  | ||||
|     def _get_bootstrap_from_url(self, bootstrap_url): | ||||
|         bootstrap = self.ydl.urlopen(bootstrap_url).read() | ||||
|         return read_bootstrap_info(bootstrap) | ||||
|  | ||||
|     def _update_live_fragments(self, bootstrap_url, latest_fragment): | ||||
|         fragments_list = [] | ||||
|         retries = 30 | ||||
|         while (not fragments_list) and (retries > 0): | ||||
|             boot_info = self._get_bootstrap_from_url(bootstrap_url) | ||||
|             fragments_list = build_fragments_list(boot_info) | ||||
|             fragments_list = [f for f in fragments_list if f[1] > latest_fragment] | ||||
|             if not fragments_list: | ||||
|                 # Retry after a while | ||||
|                 time.sleep(5.0) | ||||
|                 retries -= 1 | ||||
|  | ||||
|         if not fragments_list: | ||||
|             self.report_error('Failed to update fragments') | ||||
|  | ||||
|         return fragments_list | ||||
|  | ||||
|     def _parse_bootstrap_node(self, node, base_url): | ||||
|         # Sometimes non empty inline bootstrap info can be specified along | ||||
|         # with bootstrap url attribute (e.g. dummy inline bootstrap info | ||||
|         # contains whitespace characters in [1]). We will prefer bootstrap | ||||
|         # url over inline bootstrap info when present. | ||||
|         # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m | ||||
|         bootstrap_url = node.get('url') | ||||
|         if bootstrap_url: | ||||
|             bootstrap_url = compat_urlparse.urljoin( | ||||
|                 base_url, bootstrap_url) | ||||
|             boot_info = self._get_bootstrap_from_url(bootstrap_url) | ||||
|         else: | ||||
|             bootstrap_url = None | ||||
|             bootstrap = compat_b64decode(node.text) | ||||
|             boot_info = read_bootstrap_info(bootstrap) | ||||
|         return boot_info, bootstrap_url | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         man_url = info_dict['url'] | ||||
|         requested_bitrate = info_dict.get('tbr') | ||||
|         self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) | ||||
|  | ||||
|         urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) | ||||
|         man_url = urlh.geturl() | ||||
|         # Some manifests may be malformed, e.g. prosiebensat1 generated manifests | ||||
|         # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244 | ||||
|         # and https://github.com/ytdl-org/youtube-dl/issues/7823) | ||||
|         manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip() | ||||
|  | ||||
|         doc = compat_etree_fromstring(manifest) | ||||
|         formats = [(int(f.attrib.get('bitrate', -1)), f) | ||||
|                    for f in self._get_unencrypted_media(doc)] | ||||
|         if requested_bitrate is None or len(formats) == 1: | ||||
|             # get the best format | ||||
|             formats = sorted(formats, key=lambda f: f[0]) | ||||
|             rate, media = formats[-1] | ||||
|         else: | ||||
|             rate, media = list(filter( | ||||
|                 lambda f: int(f[0]) == requested_bitrate, formats))[0] | ||||
|  | ||||
|         # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. | ||||
|         man_base_url = get_base_url(doc) or man_url | ||||
|  | ||||
|         base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url']) | ||||
|         bootstrap_node = doc.find(_add_ns('bootstrapInfo')) | ||||
|         boot_info, bootstrap_url = self._parse_bootstrap_node( | ||||
|             bootstrap_node, man_base_url) | ||||
|         live = boot_info['live'] | ||||
|         metadata_node = media.find(_add_ns('metadata')) | ||||
|         if metadata_node is not None: | ||||
|             metadata = compat_b64decode(metadata_node.text) | ||||
|         else: | ||||
|             metadata = None | ||||
|  | ||||
|         fragments_list = build_fragments_list(boot_info) | ||||
|         test = self.params.get('test', False) | ||||
|         if test: | ||||
|             # We only download the first fragment | ||||
|             fragments_list = fragments_list[:1] | ||||
|         total_frags = len(fragments_list) | ||||
|         # For some akamai manifests we'll need to add a query to the fragment url | ||||
|         akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) | ||||
|  | ||||
|         ctx = { | ||||
|             'filename': filename, | ||||
|             'total_frags': total_frags, | ||||
|             'live': live, | ||||
|         } | ||||
|  | ||||
|         self._prepare_frag_download(ctx) | ||||
|  | ||||
|         dest_stream = ctx['dest_stream'] | ||||
|  | ||||
|         if ctx['complete_frags_downloaded_bytes'] == 0: | ||||
|             write_flv_header(dest_stream) | ||||
|             if not live: | ||||
|                 write_metadata_tag(dest_stream, metadata) | ||||
|  | ||||
|         base_url_parsed = compat_urllib_parse_urlparse(base_url) | ||||
|  | ||||
|         self._start_frag_download(ctx) | ||||
|  | ||||
|         frag_index = 0 | ||||
|         while fragments_list: | ||||
|             seg_i, frag_i = fragments_list.pop(0) | ||||
|             frag_index += 1 | ||||
|             if frag_index <= ctx['fragment_index']: | ||||
|                 continue | ||||
|             name = 'Seg%d-Frag%d' % (seg_i, frag_i) | ||||
|             query = [] | ||||
|             if base_url_parsed.query: | ||||
|                 query.append(base_url_parsed.query) | ||||
|             if akamai_pv: | ||||
|                 query.append(akamai_pv.strip(';')) | ||||
|             if info_dict.get('extra_param_to_segment_url'): | ||||
|                 query.append(info_dict['extra_param_to_segment_url']) | ||||
|             url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query)) | ||||
|             try: | ||||
|                 success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict) | ||||
|                 if not success: | ||||
|                     return False | ||||
|                 reader = FlvReader(down_data) | ||||
|                 while True: | ||||
|                     try: | ||||
|                         _, box_type, box_data = reader.read_box_info() | ||||
|                     except DataTruncatedError: | ||||
|                         if test: | ||||
|                             # In tests, segments may be truncated, and thus | ||||
|                             # FlvReader may not be able to parse the whole | ||||
|                             # chunk. If so, write the segment as is | ||||
|                             # See https://github.com/ytdl-org/youtube-dl/issues/9214 | ||||
|                             dest_stream.write(down_data) | ||||
|                             break | ||||
|                         raise | ||||
|                     if box_type == b'mdat': | ||||
|                         self._append_fragment(ctx, box_data) | ||||
|                         break | ||||
|             except (compat_urllib_error.HTTPError, ) as err: | ||||
|                 if live and (err.code == 404 or err.code == 410): | ||||
|                     # We didn't keep up with the live window. Continue | ||||
|                     # with the next available fragment. | ||||
|                     msg = 'Fragment %d unavailable' % frag_i | ||||
|                     self.report_warning(msg) | ||||
|                     fragments_list = [] | ||||
|                 else: | ||||
|                     raise | ||||
|  | ||||
|             if not fragments_list and not test and live and bootstrap_url: | ||||
|                 fragments_list = self._update_live_fragments(bootstrap_url, frag_i) | ||||
|                 total_frags += len(fragments_list) | ||||
|                 if fragments_list and (fragments_list[0][1] > frag_i + 1): | ||||
|                     msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) | ||||
|                     self.report_warning(msg) | ||||
|  | ||||
|         self._finish_frag_download(ctx) | ||||
|  | ||||
|         return True | ||||
							
								
								
									
										269
									
								
								youtube_dl/downloader/fragment.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										269
									
								
								youtube_dl/downloader/fragment.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,269 @@ | ||||
| from __future__ import division, unicode_literals | ||||
|  | ||||
| import os | ||||
| import time | ||||
| import json | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from .http import HttpFD | ||||
| from ..utils import ( | ||||
|     error_to_compat_str, | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
|     sanitized_Request, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class HttpQuietDownloader(HttpFD): | ||||
|     def to_screen(self, *args, **kargs): | ||||
|         pass | ||||
|  | ||||
|  | ||||
| class FragmentFD(FileDownloader): | ||||
|     """ | ||||
|     A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). | ||||
|  | ||||
|     Available options: | ||||
|  | ||||
|     fragment_retries:   Number of times to retry a fragment for HTTP error (DASH | ||||
|                         and hlsnative only) | ||||
|     skip_unavailable_fragments: | ||||
|                         Skip unavailable fragments (DASH and hlsnative only) | ||||
|     keep_fragments:     Keep downloaded fragments on disk after downloading is | ||||
|                         finished | ||||
|  | ||||
|     For each incomplete fragment download youtube-dlc keeps on disk a special | ||||
|     bookkeeping file with download state and metadata (in future such files will | ||||
|     be used for any incomplete download handled by youtube-dlc). This file is | ||||
|     used to properly handle resuming, check download file consistency and detect | ||||
|     potential errors. The file has a .ytdl extension and represents a standard | ||||
|     JSON file of the following format: | ||||
|  | ||||
|     extractor: | ||||
|         Dictionary of extractor related data. TBD. | ||||
|  | ||||
|     downloader: | ||||
|         Dictionary of downloader related data. May contain following data: | ||||
|             current_fragment: | ||||
|                 Dictionary with current (being downloaded) fragment data: | ||||
|                 index:  0-based index of current fragment among all fragments | ||||
|             fragment_count: | ||||
|                 Total count of fragments | ||||
|  | ||||
|     This feature is experimental and file format may change in future. | ||||
|     """ | ||||
|  | ||||
|     def report_retry_fragment(self, err, frag_index, count, retries): | ||||
|         self.to_screen( | ||||
|             '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...' | ||||
|             % (error_to_compat_str(err), frag_index, count, self.format_retries(retries))) | ||||
|  | ||||
|     def report_skip_fragment(self, frag_index): | ||||
|         self.to_screen('[download] Skipping fragment %d...' % frag_index) | ||||
|  | ||||
|     def _prepare_url(self, info_dict, url): | ||||
|         headers = info_dict.get('http_headers') | ||||
|         return sanitized_Request(url, None, headers) if headers else url | ||||
|  | ||||
|     def _prepare_and_start_frag_download(self, ctx): | ||||
|         self._prepare_frag_download(ctx) | ||||
|         self._start_frag_download(ctx) | ||||
|  | ||||
|     @staticmethod | ||||
|     def __do_ytdl_file(ctx): | ||||
|         return not ctx['live'] and not ctx['tmpfilename'] == '-' | ||||
|  | ||||
|     def _read_ytdl_file(self, ctx): | ||||
|         assert 'ytdl_corrupt' not in ctx | ||||
|         stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r') | ||||
|         try: | ||||
|             ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index'] | ||||
|         except Exception: | ||||
|             ctx['ytdl_corrupt'] = True | ||||
|         finally: | ||||
|             stream.close() | ||||
|  | ||||
|     def _write_ytdl_file(self, ctx): | ||||
|         frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w') | ||||
|         downloader = { | ||||
|             'current_fragment': { | ||||
|                 'index': ctx['fragment_index'], | ||||
|             }, | ||||
|         } | ||||
|         if ctx.get('fragment_count') is not None: | ||||
|             downloader['fragment_count'] = ctx['fragment_count'] | ||||
|         frag_index_stream.write(json.dumps({'downloader': downloader})) | ||||
|         frag_index_stream.close() | ||||
|  | ||||
|     def _download_fragment(self, ctx, frag_url, info_dict, headers=None): | ||||
|         fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) | ||||
|         success = ctx['dl'].download(fragment_filename, { | ||||
|             'url': frag_url, | ||||
|             'http_headers': headers or info_dict.get('http_headers'), | ||||
|         }) | ||||
|         if not success: | ||||
|             return False, None | ||||
|         down, frag_sanitized = sanitize_open(fragment_filename, 'rb') | ||||
|         ctx['fragment_filename_sanitized'] = frag_sanitized | ||||
|         frag_content = down.read() | ||||
|         down.close() | ||||
|         return True, frag_content | ||||
|  | ||||
|     def _append_fragment(self, ctx, frag_content): | ||||
|         try: | ||||
|             ctx['dest_stream'].write(frag_content) | ||||
|             ctx['dest_stream'].flush() | ||||
|         finally: | ||||
|             if self.__do_ytdl_file(ctx): | ||||
|                 self._write_ytdl_file(ctx) | ||||
|             if not self.params.get('keep_fragments', False): | ||||
|                 os.remove(encodeFilename(ctx['fragment_filename_sanitized'])) | ||||
|             del ctx['fragment_filename_sanitized'] | ||||
|  | ||||
|     def _prepare_frag_download(self, ctx): | ||||
|         if 'live' not in ctx: | ||||
|             ctx['live'] = False | ||||
|         if not ctx['live']: | ||||
|             total_frags_str = '%d' % ctx['total_frags'] | ||||
|             ad_frags = ctx.get('ad_frags', 0) | ||||
|             if ad_frags: | ||||
|                 total_frags_str += ' (not including %d ad)' % ad_frags | ||||
|         else: | ||||
|             total_frags_str = 'unknown (live)' | ||||
|         self.to_screen( | ||||
|             '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) | ||||
|         self.report_destination(ctx['filename']) | ||||
|         dl = HttpQuietDownloader( | ||||
|             self.ydl, | ||||
|             { | ||||
|                 'continuedl': True, | ||||
|                 'quiet': True, | ||||
|                 'noprogress': True, | ||||
|                 'ratelimit': self.params.get('ratelimit'), | ||||
|                 'retries': self.params.get('retries', 0), | ||||
|                 'nopart': self.params.get('nopart', False), | ||||
|                 'test': self.params.get('test', False), | ||||
|             } | ||||
|         ) | ||||
|         tmpfilename = self.temp_name(ctx['filename']) | ||||
|         open_mode = 'wb' | ||||
|         resume_len = 0 | ||||
|  | ||||
|         # Establish possible resume length | ||||
|         if os.path.isfile(encodeFilename(tmpfilename)): | ||||
|             open_mode = 'ab' | ||||
|             resume_len = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|  | ||||
|         # Should be initialized before ytdl file check | ||||
|         ctx.update({ | ||||
|             'tmpfilename': tmpfilename, | ||||
|             'fragment_index': 0, | ||||
|         }) | ||||
|  | ||||
|         if self.__do_ytdl_file(ctx): | ||||
|             if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): | ||||
|                 self._read_ytdl_file(ctx) | ||||
|                 is_corrupt = ctx.get('ytdl_corrupt') is True | ||||
|                 is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0 | ||||
|                 if is_corrupt or is_inconsistent: | ||||
|                     message = ( | ||||
|                         '.ytdl file is corrupt' if is_corrupt else | ||||
|                         'Inconsistent state of incomplete fragment download') | ||||
|                     self.report_warning( | ||||
|                         '%s. Restarting from the beginning...' % message) | ||||
|                     ctx['fragment_index'] = resume_len = 0 | ||||
|                     if 'ytdl_corrupt' in ctx: | ||||
|                         del ctx['ytdl_corrupt'] | ||||
|                     self._write_ytdl_file(ctx) | ||||
|             else: | ||||
|                 self._write_ytdl_file(ctx) | ||||
|                 assert ctx['fragment_index'] == 0 | ||||
|  | ||||
|         dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode) | ||||
|  | ||||
|         ctx.update({ | ||||
|             'dl': dl, | ||||
|             'dest_stream': dest_stream, | ||||
|             'tmpfilename': tmpfilename, | ||||
|             # Total complete fragments downloaded so far in bytes | ||||
|             'complete_frags_downloaded_bytes': resume_len, | ||||
|         }) | ||||
|  | ||||
|     def _start_frag_download(self, ctx): | ||||
|         resume_len = ctx['complete_frags_downloaded_bytes'] | ||||
|         total_frags = ctx['total_frags'] | ||||
|         # This dict stores the download progress, it's updated by the progress | ||||
|         # hook | ||||
|         state = { | ||||
|             'status': 'downloading', | ||||
|             'downloaded_bytes': resume_len, | ||||
|             'fragment_index': ctx['fragment_index'], | ||||
|             'fragment_count': total_frags, | ||||
|             'filename': ctx['filename'], | ||||
|             'tmpfilename': ctx['tmpfilename'], | ||||
|         } | ||||
|  | ||||
|         start = time.time() | ||||
|         ctx.update({ | ||||
|             'started': start, | ||||
|             # Amount of fragment's bytes downloaded by the time of the previous | ||||
|             # frag progress hook invocation | ||||
|             'prev_frag_downloaded_bytes': 0, | ||||
|         }) | ||||
|  | ||||
|         def frag_progress_hook(s): | ||||
|             if s['status'] not in ('downloading', 'finished'): | ||||
|                 return | ||||
|  | ||||
|             time_now = time.time() | ||||
|             state['elapsed'] = time_now - start | ||||
|             frag_total_bytes = s.get('total_bytes') or 0 | ||||
|             if not ctx['live']: | ||||
|                 estimated_size = ( | ||||
|                     (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) | ||||
|                     / (state['fragment_index'] + 1) * total_frags) | ||||
|                 state['total_bytes_estimate'] = estimated_size | ||||
|  | ||||
|             if s['status'] == 'finished': | ||||
|                 state['fragment_index'] += 1 | ||||
|                 ctx['fragment_index'] = state['fragment_index'] | ||||
|                 state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] | ||||
|                 ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] | ||||
|                 ctx['prev_frag_downloaded_bytes'] = 0 | ||||
|             else: | ||||
|                 frag_downloaded_bytes = s['downloaded_bytes'] | ||||
|                 state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes'] | ||||
|                 if not ctx['live']: | ||||
|                     state['eta'] = self.calc_eta( | ||||
|                         start, time_now, estimated_size - resume_len, | ||||
|                         state['downloaded_bytes'] - resume_len) | ||||
|                 state['speed'] = s.get('speed') or ctx.get('speed') | ||||
|                 ctx['speed'] = state['speed'] | ||||
|                 ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes | ||||
|             self._hook_progress(state) | ||||
|  | ||||
|         ctx['dl'].add_progress_hook(frag_progress_hook) | ||||
|  | ||||
|         return start | ||||
|  | ||||
|     def _finish_frag_download(self, ctx): | ||||
|         ctx['dest_stream'].close() | ||||
|         if self.__do_ytdl_file(ctx): | ||||
|             ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) | ||||
|             if os.path.isfile(ytdl_filename): | ||||
|                 os.remove(ytdl_filename) | ||||
|         elapsed = time.time() - ctx['started'] | ||||
|  | ||||
|         if ctx['tmpfilename'] == '-': | ||||
|             downloaded_bytes = ctx['complete_frags_downloaded_bytes'] | ||||
|         else: | ||||
|             self.try_rename(ctx['tmpfilename'], ctx['filename']) | ||||
|             downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) | ||||
|  | ||||
|         self._hook_progress({ | ||||
|             'downloaded_bytes': downloaded_bytes, | ||||
|             'total_bytes': downloaded_bytes, | ||||
|             'filename': ctx['filename'], | ||||
|             'status': 'finished', | ||||
|             'elapsed': elapsed, | ||||
|         }) | ||||
							
								
								
									
										210
									
								
								youtube_dl/downloader/hls.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										210
									
								
								youtube_dl/downloader/hls.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,210 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import binascii | ||||
| try: | ||||
|     from Crypto.Cipher import AES | ||||
|     can_decrypt_frag = True | ||||
| except ImportError: | ||||
|     can_decrypt_frag = False | ||||
|  | ||||
| from .fragment import FragmentFD | ||||
| from .external import FFmpegFD | ||||
|  | ||||
| from ..compat import ( | ||||
|     compat_urllib_error, | ||||
|     compat_urlparse, | ||||
|     compat_struct_pack, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     parse_m3u8_attributes, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class HlsFD(FragmentFD): | ||||
|     """ A limited implementation that does not require ffmpeg """ | ||||
|  | ||||
|     FD_NAME = 'hlsnative' | ||||
|  | ||||
|     @staticmethod | ||||
|     def can_download(manifest, info_dict): | ||||
|         UNSUPPORTED_FEATURES = ( | ||||
|             r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)',  # encrypted streams [1] | ||||
|             # r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [2] | ||||
|  | ||||
|             # Live streams heuristic does not always work (e.g. geo restricted to Germany | ||||
|             # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) | ||||
|             # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)',  # live streams [3] | ||||
|  | ||||
|             # This heuristic also is not correct since segments may not be appended as well. | ||||
|             # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite | ||||
|             # no segments will definitely be appended to the end of the playlist. | ||||
|             # r'#EXT-X-PLAYLIST-TYPE:EVENT',  # media segments may be appended to the end of | ||||
|             #                                 # event media playlists [4] | ||||
|  | ||||
|             # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 | ||||
|             # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 | ||||
|             # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 | ||||
|             # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 | ||||
|         ) | ||||
|         check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] | ||||
|         is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest | ||||
|         check_results.append(can_decrypt_frag or not is_aes128_enc) | ||||
|         check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest)) | ||||
|         check_results.append(not info_dict.get('is_live')) | ||||
|         return all(check_results) | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         man_url = info_dict['url'] | ||||
|         self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) | ||||
|  | ||||
|         urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) | ||||
|         man_url = urlh.geturl() | ||||
|         s = urlh.read().decode('utf-8', 'ignore') | ||||
|  | ||||
|         if not self.can_download(s, info_dict): | ||||
|             if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'): | ||||
|                 self.report_error('pycrypto not found. Please install it.') | ||||
|                 return False | ||||
|             self.report_warning( | ||||
|                 'hlsnative has detected features it does not support, ' | ||||
|                 'extraction will be delegated to ffmpeg') | ||||
|             fd = FFmpegFD(self.ydl, self.params) | ||||
|             for ph in self._progress_hooks: | ||||
|                 fd.add_progress_hook(ph) | ||||
|             return fd.real_download(filename, info_dict) | ||||
|  | ||||
|         def is_ad_fragment_start(s): | ||||
|             return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s | ||||
|                     or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) | ||||
|  | ||||
|         def is_ad_fragment_end(s): | ||||
|             return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s | ||||
|                     or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) | ||||
|  | ||||
|         media_frags = 0 | ||||
|         ad_frags = 0 | ||||
|         ad_frag_next = False | ||||
|         for line in s.splitlines(): | ||||
|             line = line.strip() | ||||
|             if not line: | ||||
|                 continue | ||||
|             if line.startswith('#'): | ||||
|                 if is_ad_fragment_start(line): | ||||
|                     ad_frag_next = True | ||||
|                 elif is_ad_fragment_end(line): | ||||
|                     ad_frag_next = False | ||||
|                 continue | ||||
|             if ad_frag_next: | ||||
|                 ad_frags += 1 | ||||
|                 continue | ||||
|             media_frags += 1 | ||||
|  | ||||
|         ctx = { | ||||
|             'filename': filename, | ||||
|             'total_frags': media_frags, | ||||
|             'ad_frags': ad_frags, | ||||
|         } | ||||
|  | ||||
|         self._prepare_and_start_frag_download(ctx) | ||||
|  | ||||
|         fragment_retries = self.params.get('fragment_retries', 0) | ||||
|         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) | ||||
|         test = self.params.get('test', False) | ||||
|  | ||||
|         extra_query = None | ||||
|         extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') | ||||
|         if extra_param_to_segment_url: | ||||
|             extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url) | ||||
|         i = 0 | ||||
|         media_sequence = 0 | ||||
|         decrypt_info = {'METHOD': 'NONE'} | ||||
|         byte_range = {} | ||||
|         frag_index = 0 | ||||
|         ad_frag_next = False | ||||
|         for line in s.splitlines(): | ||||
|             line = line.strip() | ||||
|             if line: | ||||
|                 if not line.startswith('#'): | ||||
|                     if ad_frag_next: | ||||
|                         continue | ||||
|                     frag_index += 1 | ||||
|                     if frag_index <= ctx['fragment_index']: | ||||
|                         continue | ||||
|                     frag_url = ( | ||||
|                         line | ||||
|                         if re.match(r'^https?://', line) | ||||
|                         else compat_urlparse.urljoin(man_url, line)) | ||||
|                     if extra_query: | ||||
|                         frag_url = update_url_query(frag_url, extra_query) | ||||
|                     count = 0 | ||||
|                     headers = info_dict.get('http_headers', {}) | ||||
|                     if byte_range: | ||||
|                         headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end']) | ||||
|                     while count <= fragment_retries: | ||||
|                         try: | ||||
|                             success, frag_content = self._download_fragment( | ||||
|                                 ctx, frag_url, info_dict, headers) | ||||
|                             if not success: | ||||
|                                 return False | ||||
|                             break | ||||
|                         except compat_urllib_error.HTTPError as err: | ||||
|                             # Unavailable (possibly temporary) fragments may be served. | ||||
|                             # First we try to retry then either skip or abort. | ||||
|                             # See https://github.com/ytdl-org/youtube-dl/issues/10165, | ||||
|                             # https://github.com/ytdl-org/youtube-dl/issues/10448). | ||||
|                             count += 1 | ||||
|                             if count <= fragment_retries: | ||||
|                                 self.report_retry_fragment(err, frag_index, count, fragment_retries) | ||||
|                     if count > fragment_retries: | ||||
|                         if skip_unavailable_fragments: | ||||
|                             i += 1 | ||||
|                             media_sequence += 1 | ||||
|                             self.report_skip_fragment(frag_index) | ||||
|                             continue | ||||
|                         self.report_error( | ||||
|                             'giving up after %s fragment retries' % fragment_retries) | ||||
|                         return False | ||||
|                     if decrypt_info['METHOD'] == 'AES-128': | ||||
|                         iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) | ||||
|                         decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( | ||||
|                             self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read() | ||||
|                         frag_content = AES.new( | ||||
|                             decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) | ||||
|                     self._append_fragment(ctx, frag_content) | ||||
|                     # We only download the first fragment during the test | ||||
|                     if test: | ||||
|                         break | ||||
|                     i += 1 | ||||
|                     media_sequence += 1 | ||||
|                 elif line.startswith('#EXT-X-KEY'): | ||||
|                     decrypt_url = decrypt_info.get('URI') | ||||
|                     decrypt_info = parse_m3u8_attributes(line[11:]) | ||||
|                     if decrypt_info['METHOD'] == 'AES-128': | ||||
|                         if 'IV' in decrypt_info: | ||||
|                             decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) | ||||
|                         if not re.match(r'^https?://', decrypt_info['URI']): | ||||
|                             decrypt_info['URI'] = compat_urlparse.urljoin( | ||||
|                                 man_url, decrypt_info['URI']) | ||||
|                         if extra_query: | ||||
|                             decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) | ||||
|                         if decrypt_url != decrypt_info['URI']: | ||||
|                             decrypt_info['KEY'] = None | ||||
|                 elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): | ||||
|                     media_sequence = int(line[22:]) | ||||
|                 elif line.startswith('#EXT-X-BYTERANGE'): | ||||
|                     splitted_byte_range = line[17:].split('@') | ||||
|                     sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] | ||||
|                     byte_range = { | ||||
|                         'start': sub_range_start, | ||||
|                         'end': sub_range_start + int(splitted_byte_range[0]), | ||||
|                     } | ||||
|                 elif is_ad_fragment_start(line): | ||||
|                     ad_frag_next = True | ||||
|                 elif is_ad_fragment_end(line): | ||||
|                     ad_frag_next = False | ||||
|  | ||||
|         self._finish_frag_download(ctx) | ||||
|  | ||||
|         return True | ||||
							
								
								
									
										354
									
								
								youtube_dl/downloader/http.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										354
									
								
								youtube_dl/downloader/http.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,354 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import errno | ||||
| import os | ||||
| import socket | ||||
| import time | ||||
| import random | ||||
| import re | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ContentTooShortError, | ||||
|     encodeFilename, | ||||
|     int_or_none, | ||||
|     sanitize_open, | ||||
|     sanitized_Request, | ||||
|     write_xattr, | ||||
|     XAttrMetadataError, | ||||
|     XAttrUnavailableError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class HttpFD(FileDownloader): | ||||
|     def real_download(self, filename, info_dict): | ||||
|         url = info_dict['url'] | ||||
|  | ||||
|         class DownloadContext(dict): | ||||
|             __getattr__ = dict.get | ||||
|             __setattr__ = dict.__setitem__ | ||||
|             __delattr__ = dict.__delitem__ | ||||
|  | ||||
|         ctx = DownloadContext() | ||||
|         ctx.filename = filename | ||||
|         ctx.tmpfilename = self.temp_name(filename) | ||||
|         ctx.stream = None | ||||
|  | ||||
|         # Do not include the Accept-Encoding header | ||||
|         headers = {'Youtubedl-no-compression': 'True'} | ||||
|         add_headers = info_dict.get('http_headers') | ||||
|         if add_headers: | ||||
|             headers.update(add_headers) | ||||
|  | ||||
|         is_test = self.params.get('test', False) | ||||
|         chunk_size = self._TEST_FILE_SIZE if is_test else ( | ||||
|             info_dict.get('downloader_options', {}).get('http_chunk_size') | ||||
|             or self.params.get('http_chunk_size') or 0) | ||||
|  | ||||
|         ctx.open_mode = 'wb' | ||||
|         ctx.resume_len = 0 | ||||
|         ctx.data_len = None | ||||
|         ctx.block_size = self.params.get('buffersize', 1024) | ||||
|         ctx.start_time = time.time() | ||||
|         ctx.chunk_size = None | ||||
|  | ||||
|         if self.params.get('continuedl', True): | ||||
|             # Establish possible resume length | ||||
|             if os.path.isfile(encodeFilename(ctx.tmpfilename)): | ||||
|                 ctx.resume_len = os.path.getsize( | ||||
|                     encodeFilename(ctx.tmpfilename)) | ||||
|  | ||||
|         ctx.is_resume = ctx.resume_len > 0 | ||||
|  | ||||
|         count = 0 | ||||
|         retries = self.params.get('retries', 0) | ||||
|  | ||||
|         class SucceedDownload(Exception): | ||||
|             pass | ||||
|  | ||||
|         class RetryDownload(Exception): | ||||
|             def __init__(self, source_error): | ||||
|                 self.source_error = source_error | ||||
|  | ||||
|         class NextFragment(Exception): | ||||
|             pass | ||||
|  | ||||
|         def set_range(req, start, end): | ||||
|             range_header = 'bytes=%d-' % start | ||||
|             if end: | ||||
|                 range_header += compat_str(end) | ||||
|             req.add_header('Range', range_header) | ||||
|  | ||||
|         def establish_connection(): | ||||
|             ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size) | ||||
|                               if not is_test and chunk_size else chunk_size) | ||||
|             if ctx.resume_len > 0: | ||||
|                 range_start = ctx.resume_len | ||||
|                 if ctx.is_resume: | ||||
|                     self.report_resuming_byte(ctx.resume_len) | ||||
|                 ctx.open_mode = 'ab' | ||||
|             elif ctx.chunk_size > 0: | ||||
|                 range_start = 0 | ||||
|             else: | ||||
|                 range_start = None | ||||
|             ctx.is_resume = False | ||||
|             range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None | ||||
|             if range_end and ctx.data_len is not None and range_end >= ctx.data_len: | ||||
|                 range_end = ctx.data_len - 1 | ||||
|             has_range = range_start is not None | ||||
|             ctx.has_range = has_range | ||||
|             request = sanitized_Request(url, None, headers) | ||||
|             if has_range: | ||||
|                 set_range(request, range_start, range_end) | ||||
|             # Establish connection | ||||
|             try: | ||||
|                 ctx.data = self.ydl.urlopen(request) | ||||
|                 # When trying to resume, Content-Range HTTP header of response has to be checked | ||||
|                 # to match the value of requested Range HTTP header. This is due to a webservers | ||||
|                 # that don't support resuming and serve a whole file with no Content-Range | ||||
|                 # set in response despite of requested Range (see | ||||
|                 # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799) | ||||
|                 if has_range: | ||||
|                     content_range = ctx.data.headers.get('Content-Range') | ||||
|                     if content_range: | ||||
|                         content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range) | ||||
|                         # Content-Range is present and matches requested Range, resume is possible | ||||
|                         if content_range_m: | ||||
|                             if range_start == int(content_range_m.group(1)): | ||||
|                                 content_range_end = int_or_none(content_range_m.group(2)) | ||||
|                                 content_len = int_or_none(content_range_m.group(3)) | ||||
|                                 accept_content_len = ( | ||||
|                                     # Non-chunked download | ||||
|                                     not ctx.chunk_size | ||||
|                                     # Chunked download and requested piece or | ||||
|                                     # its part is promised to be served | ||||
|                                     or content_range_end == range_end | ||||
|                                     or content_len < range_end) | ||||
|                                 if accept_content_len: | ||||
|                                     ctx.data_len = content_len | ||||
|                                     return | ||||
|                     # Content-Range is either not present or invalid. Assuming remote webserver is | ||||
|                     # trying to send the whole file, resume is not possible, so wiping the local file | ||||
|                     # and performing entire redownload | ||||
|                     self.report_unable_to_resume() | ||||
|                     ctx.resume_len = 0 | ||||
|                     ctx.open_mode = 'wb' | ||||
|                 ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None)) | ||||
|                 return | ||||
|             except (compat_urllib_error.HTTPError, ) as err: | ||||
|                 if err.code == 416: | ||||
|                     # Unable to resume (requested range not satisfiable) | ||||
|                     try: | ||||
|                         # Open the connection again without the range header | ||||
|                         ctx.data = self.ydl.urlopen( | ||||
|                             sanitized_Request(url, None, headers)) | ||||
|                         content_length = ctx.data.info()['Content-Length'] | ||||
|                     except (compat_urllib_error.HTTPError, ) as err: | ||||
|                         if err.code < 500 or err.code >= 600: | ||||
|                             raise | ||||
|                     else: | ||||
|                         # Examine the reported length | ||||
|                         if (content_length is not None | ||||
|                                 and (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)): | ||||
|                             # The file had already been fully downloaded. | ||||
|                             # Explanation to the above condition: in issue #175 it was revealed that | ||||
|                             # YouTube sometimes adds or removes a few bytes from the end of the file, | ||||
|                             # changing the file size slightly and causing problems for some users. So | ||||
|                             # I decided to implement a suggested change and consider the file | ||||
|                             # completely downloaded if the file size differs less than 100 bytes from | ||||
|                             # the one in the hard drive. | ||||
|                             self.report_file_already_downloaded(ctx.filename) | ||||
|                             self.try_rename(ctx.tmpfilename, ctx.filename) | ||||
|                             self._hook_progress({ | ||||
|                                 'filename': ctx.filename, | ||||
|                                 'status': 'finished', | ||||
|                                 'downloaded_bytes': ctx.resume_len, | ||||
|                                 'total_bytes': ctx.resume_len, | ||||
|                             }) | ||||
|                             raise SucceedDownload() | ||||
|                         else: | ||||
|                             # The length does not match, we start the download over | ||||
|                             self.report_unable_to_resume() | ||||
|                             ctx.resume_len = 0 | ||||
|                             ctx.open_mode = 'wb' | ||||
|                             return | ||||
|                 elif err.code < 500 or err.code >= 600: | ||||
|                     # Unexpected HTTP error | ||||
|                     raise | ||||
|                 raise RetryDownload(err) | ||||
|             except socket.error as err: | ||||
|                 if err.errno != errno.ECONNRESET: | ||||
|                     # Connection reset is no problem, just retry | ||||
|                     raise | ||||
|                 raise RetryDownload(err) | ||||
|  | ||||
|         def download(): | ||||
|             data_len = ctx.data.info().get('Content-length', None) | ||||
|  | ||||
|             # Range HTTP header may be ignored/unsupported by a webserver | ||||
|             # (e.g. extractor/scivee.py, extractor/bambuser.py). | ||||
|             # However, for a test we still would like to download just a piece of a file. | ||||
|             # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control | ||||
|             # block size when downloading a file. | ||||
|             if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE): | ||||
|                 data_len = self._TEST_FILE_SIZE | ||||
|  | ||||
|             if data_len is not None: | ||||
|                 data_len = int(data_len) + ctx.resume_len | ||||
|                 min_data_len = self.params.get('min_filesize') | ||||
|                 max_data_len = self.params.get('max_filesize') | ||||
|                 if min_data_len is not None and data_len < min_data_len: | ||||
|                     self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) | ||||
|                     return False | ||||
|                 if max_data_len is not None and data_len > max_data_len: | ||||
|                     self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) | ||||
|                     return False | ||||
|  | ||||
|             byte_counter = 0 + ctx.resume_len | ||||
|             block_size = ctx.block_size | ||||
|             start = time.time() | ||||
|  | ||||
|             # measure time over whole while-loop, so slow_down() and best_block_size() work together properly | ||||
|             now = None  # needed for slow_down() in the first loop run | ||||
|             before = start  # start measuring | ||||
|  | ||||
|             def retry(e): | ||||
|                 to_stdout = ctx.tmpfilename == '-' | ||||
|                 if not to_stdout: | ||||
|                     ctx.stream.close() | ||||
|                 ctx.stream = None | ||||
|                 ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename)) | ||||
|                 raise RetryDownload(e) | ||||
|  | ||||
|             while True: | ||||
|                 try: | ||||
|                     # Download and write | ||||
|                     data_block = ctx.data.read(block_size if data_len is None else min(block_size, data_len - byte_counter)) | ||||
|                 # socket.timeout is a subclass of socket.error but may not have | ||||
|                 # errno set | ||||
|                 except socket.timeout as e: | ||||
|                     retry(e) | ||||
|                 except socket.error as e: | ||||
|                     if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT): | ||||
|                         raise | ||||
|                     retry(e) | ||||
|  | ||||
|                 byte_counter += len(data_block) | ||||
|  | ||||
|                 # exit loop when download is finished | ||||
|                 if len(data_block) == 0: | ||||
|                     break | ||||
|  | ||||
|                 # Open destination file just in time | ||||
|                 if ctx.stream is None: | ||||
|                     try: | ||||
|                         ctx.stream, ctx.tmpfilename = sanitize_open( | ||||
|                             ctx.tmpfilename, ctx.open_mode) | ||||
|                         assert ctx.stream is not None | ||||
|                         ctx.filename = self.undo_temp_name(ctx.tmpfilename) | ||||
|                         self.report_destination(ctx.filename) | ||||
|                     except (OSError, IOError) as err: | ||||
|                         self.report_error('unable to open for writing: %s' % str(err)) | ||||
|                         return False | ||||
|  | ||||
|                     if self.params.get('xattr_set_filesize', False) and data_len is not None: | ||||
|                         try: | ||||
|                             write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8')) | ||||
|                         except (XAttrUnavailableError, XAttrMetadataError) as err: | ||||
|                             self.report_error('unable to set filesize xattr: %s' % str(err)) | ||||
|  | ||||
|                 try: | ||||
|                     ctx.stream.write(data_block) | ||||
|                 except (IOError, OSError) as err: | ||||
|                     self.to_stderr('\n') | ||||
|                     self.report_error('unable to write data: %s' % str(err)) | ||||
|                     return False | ||||
|  | ||||
|                 # Apply rate limit | ||||
|                 self.slow_down(start, now, byte_counter - ctx.resume_len) | ||||
|  | ||||
|                 # end measuring of one loop run | ||||
|                 now = time.time() | ||||
|                 after = now | ||||
|  | ||||
|                 # Adjust block size | ||||
|                 if not self.params.get('noresizebuffer', False): | ||||
|                     block_size = self.best_block_size(after - before, len(data_block)) | ||||
|  | ||||
|                 before = after | ||||
|  | ||||
|                 # Progress message | ||||
|                 speed = self.calc_speed(start, now, byte_counter - ctx.resume_len) | ||||
|                 if ctx.data_len is None: | ||||
|                     eta = None | ||||
|                 else: | ||||
|                     eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len) | ||||
|  | ||||
|                 self._hook_progress({ | ||||
|                     'status': 'downloading', | ||||
|                     'downloaded_bytes': byte_counter, | ||||
|                     'total_bytes': ctx.data_len, | ||||
|                     'tmpfilename': ctx.tmpfilename, | ||||
|                     'filename': ctx.filename, | ||||
|                     'eta': eta, | ||||
|                     'speed': speed, | ||||
|                     'elapsed': now - ctx.start_time, | ||||
|                 }) | ||||
|  | ||||
|                 if data_len is not None and byte_counter == data_len: | ||||
|                     break | ||||
|  | ||||
|             if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: | ||||
|                 ctx.resume_len = byte_counter | ||||
|                 # ctx.block_size = block_size | ||||
|                 raise NextFragment() | ||||
|  | ||||
|             if ctx.stream is None: | ||||
|                 self.to_stderr('\n') | ||||
|                 self.report_error('Did not get any data blocks') | ||||
|                 return False | ||||
|             if ctx.tmpfilename != '-': | ||||
|                 ctx.stream.close() | ||||
|  | ||||
|             if data_len is not None and byte_counter != data_len: | ||||
|                 err = ContentTooShortError(byte_counter, int(data_len)) | ||||
|                 if count <= retries: | ||||
|                     retry(err) | ||||
|                 raise err | ||||
|  | ||||
|             self.try_rename(ctx.tmpfilename, ctx.filename) | ||||
|  | ||||
|             # Update file modification time | ||||
|             if self.params.get('updatetime', True): | ||||
|                 info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None)) | ||||
|  | ||||
|             self._hook_progress({ | ||||
|                 'downloaded_bytes': byte_counter, | ||||
|                 'total_bytes': byte_counter, | ||||
|                 'filename': ctx.filename, | ||||
|                 'status': 'finished', | ||||
|                 'elapsed': time.time() - ctx.start_time, | ||||
|             }) | ||||
|  | ||||
|             return True | ||||
|  | ||||
|         while count <= retries: | ||||
|             try: | ||||
|                 establish_connection() | ||||
|                 return download() | ||||
|             except RetryDownload as e: | ||||
|                 count += 1 | ||||
|                 if count <= retries: | ||||
|                     self.report_retry(e.source_error, count, retries) | ||||
|                 continue | ||||
|             except NextFragment: | ||||
|                 continue | ||||
|             except SucceedDownload: | ||||
|                 return True | ||||
|  | ||||
|         self.report_error('giving up after %s retries' % retries) | ||||
|         return False | ||||
							
								
								
									
										259
									
								
								youtube_dl/downloader/ism.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										259
									
								
								youtube_dl/downloader/ism.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,259 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import time | ||||
| import binascii | ||||
| import io | ||||
|  | ||||
| from .fragment import FragmentFD | ||||
| from ..compat import ( | ||||
|     compat_Struct, | ||||
|     compat_urllib_error, | ||||
| ) | ||||
|  | ||||
|  | ||||
| u8 = compat_Struct('>B') | ||||
| u88 = compat_Struct('>Bx') | ||||
| u16 = compat_Struct('>H') | ||||
| u1616 = compat_Struct('>Hxx') | ||||
| u32 = compat_Struct('>I') | ||||
| u64 = compat_Struct('>Q') | ||||
|  | ||||
| s88 = compat_Struct('>bx') | ||||
| s16 = compat_Struct('>h') | ||||
| s1616 = compat_Struct('>hxx') | ||||
| s32 = compat_Struct('>i') | ||||
|  | ||||
| unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000) | ||||
|  | ||||
| TRACK_ENABLED = 0x1 | ||||
| TRACK_IN_MOVIE = 0x2 | ||||
| TRACK_IN_PREVIEW = 0x4 | ||||
|  | ||||
| SELF_CONTAINED = 0x1 | ||||
|  | ||||
|  | ||||
| def box(box_type, payload): | ||||
|     return u32.pack(8 + len(payload)) + box_type + payload | ||||
|  | ||||
|  | ||||
| def full_box(box_type, version, flags, payload): | ||||
|     return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload) | ||||
|  | ||||
|  | ||||
| def write_piff_header(stream, params): | ||||
|     track_id = params['track_id'] | ||||
|     fourcc = params['fourcc'] | ||||
|     duration = params['duration'] | ||||
|     timescale = params.get('timescale', 10000000) | ||||
|     language = params.get('language', 'und') | ||||
|     height = params.get('height', 0) | ||||
|     width = params.get('width', 0) | ||||
|     is_audio = width == 0 and height == 0 | ||||
|     creation_time = modification_time = int(time.time()) | ||||
|  | ||||
|     ftyp_payload = b'isml'  # major brand | ||||
|     ftyp_payload += u32.pack(1)  # minor version | ||||
|     ftyp_payload += b'piff' + b'iso2'  # compatible brands | ||||
|     stream.write(box(b'ftyp', ftyp_payload))  # File Type Box | ||||
|  | ||||
|     mvhd_payload = u64.pack(creation_time) | ||||
|     mvhd_payload += u64.pack(modification_time) | ||||
|     mvhd_payload += u32.pack(timescale) | ||||
|     mvhd_payload += u64.pack(duration) | ||||
|     mvhd_payload += s1616.pack(1)  # rate | ||||
|     mvhd_payload += s88.pack(1)  # volume | ||||
|     mvhd_payload += u16.pack(0)  # reserved | ||||
|     mvhd_payload += u32.pack(0) * 2  # reserved | ||||
|     mvhd_payload += unity_matrix | ||||
|     mvhd_payload += u32.pack(0) * 6  # pre defined | ||||
|     mvhd_payload += u32.pack(0xffffffff)  # next track id | ||||
|     moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload)  # Movie Header Box | ||||
|  | ||||
|     tkhd_payload = u64.pack(creation_time) | ||||
|     tkhd_payload += u64.pack(modification_time) | ||||
|     tkhd_payload += u32.pack(track_id)  # track id | ||||
|     tkhd_payload += u32.pack(0)  # reserved | ||||
|     tkhd_payload += u64.pack(duration) | ||||
|     tkhd_payload += u32.pack(0) * 2  # reserved | ||||
|     tkhd_payload += s16.pack(0)  # layer | ||||
|     tkhd_payload += s16.pack(0)  # alternate group | ||||
|     tkhd_payload += s88.pack(1 if is_audio else 0)  # volume | ||||
|     tkhd_payload += u16.pack(0)  # reserved | ||||
|     tkhd_payload += unity_matrix | ||||
|     tkhd_payload += u1616.pack(width) | ||||
|     tkhd_payload += u1616.pack(height) | ||||
|     trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload)  # Track Header Box | ||||
|  | ||||
|     mdhd_payload = u64.pack(creation_time) | ||||
|     mdhd_payload += u64.pack(modification_time) | ||||
|     mdhd_payload += u32.pack(timescale) | ||||
|     mdhd_payload += u64.pack(duration) | ||||
|     mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60)) | ||||
|     mdhd_payload += u16.pack(0)  # pre defined | ||||
|     mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload)  # Media Header Box | ||||
|  | ||||
|     hdlr_payload = u32.pack(0)  # pre defined | ||||
|     hdlr_payload += b'soun' if is_audio else b'vide'  # handler type | ||||
|     hdlr_payload += u32.pack(0) * 3  # reserved | ||||
|     hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0'  # name | ||||
|     mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload)  # Handler Reference Box | ||||
|  | ||||
|     if is_audio: | ||||
|         smhd_payload = s88.pack(0)  # balance | ||||
|         smhd_payload += u16.pack(0)  # reserved | ||||
|         media_header_box = full_box(b'smhd', 0, 0, smhd_payload)  # Sound Media Header | ||||
|     else: | ||||
|         vmhd_payload = u16.pack(0)  # graphics mode | ||||
|         vmhd_payload += u16.pack(0) * 3  # opcolor | ||||
|         media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload)  # Video Media Header | ||||
|     minf_payload = media_header_box | ||||
|  | ||||
|     dref_payload = u32.pack(1)  # entry count | ||||
|     dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'')  # Data Entry URL Box | ||||
|     dinf_payload = full_box(b'dref', 0, 0, dref_payload)  # Data Reference Box | ||||
|     minf_payload += box(b'dinf', dinf_payload)  # Data Information Box | ||||
|  | ||||
|     stsd_payload = u32.pack(1)  # entry count | ||||
|  | ||||
|     sample_entry_payload = u8.pack(0) * 6  # reserved | ||||
|     sample_entry_payload += u16.pack(1)  # data reference index | ||||
|     if is_audio: | ||||
|         sample_entry_payload += u32.pack(0) * 2  # reserved | ||||
|         sample_entry_payload += u16.pack(params.get('channels', 2)) | ||||
|         sample_entry_payload += u16.pack(params.get('bits_per_sample', 16)) | ||||
|         sample_entry_payload += u16.pack(0)  # pre defined | ||||
|         sample_entry_payload += u16.pack(0)  # reserved | ||||
|         sample_entry_payload += u1616.pack(params['sampling_rate']) | ||||
|  | ||||
|         if fourcc == 'AACL': | ||||
|             sample_entry_box = box(b'mp4a', sample_entry_payload) | ||||
|     else: | ||||
|         sample_entry_payload += u16.pack(0)  # pre defined | ||||
|         sample_entry_payload += u16.pack(0)  # reserved | ||||
|         sample_entry_payload += u32.pack(0) * 3  # pre defined | ||||
|         sample_entry_payload += u16.pack(width) | ||||
|         sample_entry_payload += u16.pack(height) | ||||
|         sample_entry_payload += u1616.pack(0x48)  # horiz resolution 72 dpi | ||||
|         sample_entry_payload += u1616.pack(0x48)  # vert resolution 72 dpi | ||||
|         sample_entry_payload += u32.pack(0)  # reserved | ||||
|         sample_entry_payload += u16.pack(1)  # frame count | ||||
|         sample_entry_payload += u8.pack(0) * 32  # compressor name | ||||
|         sample_entry_payload += u16.pack(0x18)  # depth | ||||
|         sample_entry_payload += s16.pack(-1)  # pre defined | ||||
|  | ||||
|         codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8')) | ||||
|         if fourcc in ('H264', 'AVC1'): | ||||
|             sps, pps = codec_private_data.split(u32.pack(1))[1:] | ||||
|             avcc_payload = u8.pack(1)  # configuration version | ||||
|             avcc_payload += sps[1:4]  # avc profile indication + profile compatibility + avc level indication | ||||
|             avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1))  # complete representation (1) + reserved (11111) + length size minus one | ||||
|             avcc_payload += u8.pack(1)  # reserved (0) + number of sps (0000001) | ||||
|             avcc_payload += u16.pack(len(sps)) | ||||
|             avcc_payload += sps | ||||
|             avcc_payload += u8.pack(1)  # number of pps | ||||
|             avcc_payload += u16.pack(len(pps)) | ||||
|             avcc_payload += pps | ||||
|             sample_entry_payload += box(b'avcC', avcc_payload)  # AVC Decoder Configuration Record | ||||
|             sample_entry_box = box(b'avc1', sample_entry_payload)  # AVC Simple Entry | ||||
|     stsd_payload += sample_entry_box | ||||
|  | ||||
|     stbl_payload = full_box(b'stsd', 0, 0, stsd_payload)  # Sample Description Box | ||||
|  | ||||
|     stts_payload = u32.pack(0)  # entry count | ||||
|     stbl_payload += full_box(b'stts', 0, 0, stts_payload)  # Decoding Time to Sample Box | ||||
|  | ||||
|     stsc_payload = u32.pack(0)  # entry count | ||||
|     stbl_payload += full_box(b'stsc', 0, 0, stsc_payload)  # Sample To Chunk Box | ||||
|  | ||||
|     stco_payload = u32.pack(0)  # entry count | ||||
|     stbl_payload += full_box(b'stco', 0, 0, stco_payload)  # Chunk Offset Box | ||||
|  | ||||
|     minf_payload += box(b'stbl', stbl_payload)  # Sample Table Box | ||||
|  | ||||
|     mdia_payload += box(b'minf', minf_payload)  # Media Information Box | ||||
|  | ||||
|     trak_payload += box(b'mdia', mdia_payload)  # Media Box | ||||
|  | ||||
|     moov_payload += box(b'trak', trak_payload)  # Track Box | ||||
|  | ||||
|     mehd_payload = u64.pack(duration) | ||||
|     mvex_payload = full_box(b'mehd', 1, 0, mehd_payload)  # Movie Extends Header Box | ||||
|  | ||||
|     trex_payload = u32.pack(track_id)  # track id | ||||
|     trex_payload += u32.pack(1)  # default sample description index | ||||
|     trex_payload += u32.pack(0)  # default sample duration | ||||
|     trex_payload += u32.pack(0)  # default sample size | ||||
|     trex_payload += u32.pack(0)  # default sample flags | ||||
|     mvex_payload += full_box(b'trex', 0, 0, trex_payload)  # Track Extends Box | ||||
|  | ||||
|     moov_payload += box(b'mvex', mvex_payload)  # Movie Extends Box | ||||
|     stream.write(box(b'moov', moov_payload))  # Movie Box | ||||
|  | ||||
|  | ||||
| def extract_box_data(data, box_sequence): | ||||
|     data_reader = io.BytesIO(data) | ||||
|     while True: | ||||
|         box_size = u32.unpack(data_reader.read(4))[0] | ||||
|         box_type = data_reader.read(4) | ||||
|         if box_type == box_sequence[0]: | ||||
|             box_data = data_reader.read(box_size - 8) | ||||
|             if len(box_sequence) == 1: | ||||
|                 return box_data | ||||
|             return extract_box_data(box_data, box_sequence[1:]) | ||||
|         data_reader.seek(box_size - 8, 1) | ||||
|  | ||||
|  | ||||
| class IsmFD(FragmentFD): | ||||
|     """ | ||||
|     Download segments in a ISM manifest | ||||
|     """ | ||||
|  | ||||
|     FD_NAME = 'ism' | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         segments = info_dict['fragments'][:1] if self.params.get( | ||||
|             'test', False) else info_dict['fragments'] | ||||
|  | ||||
|         ctx = { | ||||
|             'filename': filename, | ||||
|             'total_frags': len(segments), | ||||
|         } | ||||
|  | ||||
|         self._prepare_and_start_frag_download(ctx) | ||||
|  | ||||
|         fragment_retries = self.params.get('fragment_retries', 0) | ||||
|         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) | ||||
|  | ||||
|         track_written = False | ||||
|         frag_index = 0 | ||||
|         for i, segment in enumerate(segments): | ||||
|             frag_index += 1 | ||||
|             if frag_index <= ctx['fragment_index']: | ||||
|                 continue | ||||
|             count = 0 | ||||
|             while count <= fragment_retries: | ||||
|                 try: | ||||
|                     success, frag_content = self._download_fragment(ctx, segment['url'], info_dict) | ||||
|                     if not success: | ||||
|                         return False | ||||
|                     if not track_written: | ||||
|                         tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd']) | ||||
|                         info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] | ||||
|                         write_piff_header(ctx['dest_stream'], info_dict['_download_params']) | ||||
|                         track_written = True | ||||
|                     self._append_fragment(ctx, frag_content) | ||||
|                     break | ||||
|                 except compat_urllib_error.HTTPError as err: | ||||
|                     count += 1 | ||||
|                     if count <= fragment_retries: | ||||
|                         self.report_retry_fragment(err, frag_index, count, fragment_retries) | ||||
|             if count > fragment_retries: | ||||
|                 if skip_unavailable_fragments: | ||||
|                     self.report_skip_fragment(frag_index) | ||||
|                     continue | ||||
|                 self.report_error('giving up after %s fragment retries' % fragment_retries) | ||||
|                 return False | ||||
|  | ||||
|         self._finish_frag_download(ctx) | ||||
|  | ||||
|         return True | ||||
							
								
								
									
										214
									
								
								youtube_dl/downloader/rtmp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										214
									
								
								youtube_dl/downloader/rtmp.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,214 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import os | ||||
| import re | ||||
| import subprocess | ||||
| import time | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     check_executable, | ||||
|     encodeFilename, | ||||
|     encodeArgument, | ||||
|     get_exe_version, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def rtmpdump_version(): | ||||
|     return get_exe_version( | ||||
|         'rtmpdump', ['--help'], r'(?i)RTMPDump\s*v?([0-9a-zA-Z._-]+)') | ||||
|  | ||||
|  | ||||
| class RtmpFD(FileDownloader): | ||||
|     def real_download(self, filename, info_dict): | ||||
|         def run_rtmpdump(args): | ||||
|             start = time.time() | ||||
|             resume_percent = None | ||||
|             resume_downloaded_data_len = None | ||||
|             proc = subprocess.Popen(args, stderr=subprocess.PIPE) | ||||
|             cursor_in_new_line = True | ||||
|             proc_stderr_closed = False | ||||
|             try: | ||||
|                 while not proc_stderr_closed: | ||||
|                     # read line from stderr | ||||
|                     line = '' | ||||
|                     while True: | ||||
|                         char = proc.stderr.read(1) | ||||
|                         if not char: | ||||
|                             proc_stderr_closed = True | ||||
|                             break | ||||
|                         if char in [b'\r', b'\n']: | ||||
|                             break | ||||
|                         line += char.decode('ascii', 'replace') | ||||
|                     if not line: | ||||
|                         # proc_stderr_closed is True | ||||
|                         continue | ||||
|                     mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line) | ||||
|                     if mobj: | ||||
|                         downloaded_data_len = int(float(mobj.group(1)) * 1024) | ||||
|                         percent = float(mobj.group(2)) | ||||
|                         if not resume_percent: | ||||
|                             resume_percent = percent | ||||
|                             resume_downloaded_data_len = downloaded_data_len | ||||
|                         time_now = time.time() | ||||
|                         eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent) | ||||
|                         speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len) | ||||
|                         data_len = None | ||||
|                         if percent > 0: | ||||
|                             data_len = int(downloaded_data_len * 100 / percent) | ||||
|                         self._hook_progress({ | ||||
|                             'status': 'downloading', | ||||
|                             'downloaded_bytes': downloaded_data_len, | ||||
|                             'total_bytes_estimate': data_len, | ||||
|                             'tmpfilename': tmpfilename, | ||||
|                             'filename': filename, | ||||
|                             'eta': eta, | ||||
|                             'elapsed': time_now - start, | ||||
|                             'speed': speed, | ||||
|                         }) | ||||
|                         cursor_in_new_line = False | ||||
|                     else: | ||||
|                         # no percent for live streams | ||||
|                         mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line) | ||||
|                         if mobj: | ||||
|                             downloaded_data_len = int(float(mobj.group(1)) * 1024) | ||||
|                             time_now = time.time() | ||||
|                             speed = self.calc_speed(start, time_now, downloaded_data_len) | ||||
|                             self._hook_progress({ | ||||
|                                 'downloaded_bytes': downloaded_data_len, | ||||
|                                 'tmpfilename': tmpfilename, | ||||
|                                 'filename': filename, | ||||
|                                 'status': 'downloading', | ||||
|                                 'elapsed': time_now - start, | ||||
|                                 'speed': speed, | ||||
|                             }) | ||||
|                             cursor_in_new_line = False | ||||
|                         elif self.params.get('verbose', False): | ||||
|                             if not cursor_in_new_line: | ||||
|                                 self.to_screen('') | ||||
|                             cursor_in_new_line = True | ||||
|                             self.to_screen('[rtmpdump] ' + line) | ||||
|             finally: | ||||
|                 proc.wait() | ||||
|             if not cursor_in_new_line: | ||||
|                 self.to_screen('') | ||||
|             return proc.returncode | ||||
|  | ||||
|         url = info_dict['url'] | ||||
|         player_url = info_dict.get('player_url') | ||||
|         page_url = info_dict.get('page_url') | ||||
|         app = info_dict.get('app') | ||||
|         play_path = info_dict.get('play_path') | ||||
|         tc_url = info_dict.get('tc_url') | ||||
|         flash_version = info_dict.get('flash_version') | ||||
|         live = info_dict.get('rtmp_live', False) | ||||
|         conn = info_dict.get('rtmp_conn') | ||||
|         protocol = info_dict.get('rtmp_protocol') | ||||
|         real_time = info_dict.get('rtmp_real_time', False) | ||||
|         no_resume = info_dict.get('no_resume', False) | ||||
|         continue_dl = self.params.get('continuedl', True) | ||||
|  | ||||
|         self.report_destination(filename) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|         test = self.params.get('test', False) | ||||
|  | ||||
|         # Check for rtmpdump first | ||||
|         if not check_executable('rtmpdump', ['-h']): | ||||
|             self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.') | ||||
|             return False | ||||
|  | ||||
|         # Download using rtmpdump. rtmpdump returns exit code 2 when | ||||
|         # the connection was interrupted and resuming appears to be | ||||
|         # possible. This is part of rtmpdump's normal usage, AFAIK. | ||||
|         basic_args = [ | ||||
|             'rtmpdump', '--verbose', '-r', url, | ||||
|             '-o', tmpfilename] | ||||
|         if player_url is not None: | ||||
|             basic_args += ['--swfVfy', player_url] | ||||
|         if page_url is not None: | ||||
|             basic_args += ['--pageUrl', page_url] | ||||
|         if app is not None: | ||||
|             basic_args += ['--app', app] | ||||
|         if play_path is not None: | ||||
|             basic_args += ['--playpath', play_path] | ||||
|         if tc_url is not None: | ||||
|             basic_args += ['--tcUrl', tc_url] | ||||
|         if test: | ||||
|             basic_args += ['--stop', '1'] | ||||
|         if flash_version is not None: | ||||
|             basic_args += ['--flashVer', flash_version] | ||||
|         if live: | ||||
|             basic_args += ['--live'] | ||||
|         if isinstance(conn, list): | ||||
|             for entry in conn: | ||||
|                 basic_args += ['--conn', entry] | ||||
|         elif isinstance(conn, compat_str): | ||||
|             basic_args += ['--conn', conn] | ||||
|         if protocol is not None: | ||||
|             basic_args += ['--protocol', protocol] | ||||
|         if real_time: | ||||
|             basic_args += ['--realtime'] | ||||
|  | ||||
|         args = basic_args | ||||
|         if not no_resume and continue_dl and not live: | ||||
|             args += ['--resume'] | ||||
|         if not live and continue_dl: | ||||
|             args += ['--skip', '1'] | ||||
|  | ||||
|         args = [encodeArgument(a) for a in args] | ||||
|  | ||||
|         self._debug_cmd(args, exe='rtmpdump') | ||||
|  | ||||
|         RD_SUCCESS = 0 | ||||
|         RD_FAILED = 1 | ||||
|         RD_INCOMPLETE = 2 | ||||
|         RD_NO_CONNECT = 3 | ||||
|  | ||||
|         started = time.time() | ||||
|  | ||||
|         try: | ||||
|             retval = run_rtmpdump(args) | ||||
|         except KeyboardInterrupt: | ||||
|             if not info_dict.get('is_live'): | ||||
|                 raise | ||||
|             retval = RD_SUCCESS | ||||
|             self.to_screen('\n[rtmpdump] Interrupted by user') | ||||
|  | ||||
|         if retval == RD_NO_CONNECT: | ||||
|             self.report_error('[rtmpdump] Could not connect to RTMP server.') | ||||
|             return False | ||||
|  | ||||
|         while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: | ||||
|             prevsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize) | ||||
|             time.sleep(5.0)  # This seems to be needed | ||||
|             args = basic_args + ['--resume'] | ||||
|             if retval == RD_FAILED: | ||||
|                 args += ['--skip', '1'] | ||||
|             args = [encodeArgument(a) for a in args] | ||||
|             retval = run_rtmpdump(args) | ||||
|             cursize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             if prevsize == cursize and retval == RD_FAILED: | ||||
|                 break | ||||
|             # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those | ||||
|             if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024: | ||||
|                 self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.') | ||||
|                 retval = RD_SUCCESS | ||||
|                 break | ||||
|         if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): | ||||
|             fsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize) | ||||
|             self.try_rename(tmpfilename, filename) | ||||
|             self._hook_progress({ | ||||
|                 'downloaded_bytes': fsize, | ||||
|                 'total_bytes': fsize, | ||||
|                 'filename': filename, | ||||
|                 'status': 'finished', | ||||
|                 'elapsed': time.time() - started, | ||||
|             }) | ||||
|             return True | ||||
|         else: | ||||
|             self.to_stderr('\n') | ||||
|             self.report_error('rtmpdump exited with code %d' % retval) | ||||
|             return False | ||||
							
								
								
									
										47
									
								
								youtube_dl/downloader/rtsp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								youtube_dl/downloader/rtsp.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,47 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import os | ||||
| import subprocess | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from ..utils import ( | ||||
|     check_executable, | ||||
|     encodeFilename, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RtspFD(FileDownloader): | ||||
|     def real_download(self, filename, info_dict): | ||||
|         url = info_dict['url'] | ||||
|         self.report_destination(filename) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|  | ||||
|         if check_executable('mplayer', ['-h']): | ||||
|             args = [ | ||||
|                 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', | ||||
|                 '-dumpstream', '-dumpfile', tmpfilename, url] | ||||
|         elif check_executable('mpv', ['-h']): | ||||
|             args = [ | ||||
|                 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url] | ||||
|         else: | ||||
|             self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.') | ||||
|             return False | ||||
|  | ||||
|         self._debug_cmd(args) | ||||
|  | ||||
|         retval = subprocess.call(args) | ||||
|         if retval == 0: | ||||
|             fsize = os.path.getsize(encodeFilename(tmpfilename)) | ||||
|             self.to_screen('\r[%s] %s bytes' % (args[0], fsize)) | ||||
|             self.try_rename(tmpfilename, filename) | ||||
|             self._hook_progress({ | ||||
|                 'downloaded_bytes': fsize, | ||||
|                 'total_bytes': fsize, | ||||
|                 'filename': filename, | ||||
|                 'status': 'finished', | ||||
|             }) | ||||
|             return True | ||||
|         else: | ||||
|             self.to_stderr('\n') | ||||
|             self.report_error('%s exited with code %d' % (args[0], retval)) | ||||
|             return False | ||||
							
								
								
									
										94
									
								
								youtube_dl/downloader/youtube_live_chat.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										94
									
								
								youtube_dl/downloader/youtube_live_chat.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,94 @@ | ||||
| from __future__ import division, unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .fragment import FragmentFD | ||||
|  | ||||
|  | ||||
| class YoutubeLiveChatReplayFD(FragmentFD): | ||||
|     """ Downloads YouTube live chat replays fragment by fragment """ | ||||
|  | ||||
|     FD_NAME = 'youtube_live_chat_replay' | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         video_id = info_dict['video_id'] | ||||
|         self.to_screen('[%s] Downloading live chat' % self.FD_NAME) | ||||
|  | ||||
|         test = self.params.get('test', False) | ||||
|  | ||||
|         ctx = { | ||||
|             'filename': filename, | ||||
|             'live': True, | ||||
|             'total_frags': None, | ||||
|         } | ||||
|  | ||||
|         def dl_fragment(url): | ||||
|             headers = info_dict.get('http_headers', {}) | ||||
|             return self._download_fragment(ctx, url, info_dict, headers) | ||||
|  | ||||
|         def parse_yt_initial_data(data): | ||||
|             window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?)(?<=});' | ||||
|             var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?)(?<=});' | ||||
|             for patt in window_patt, var_patt: | ||||
|                 try: | ||||
|                     raw_json = re.search(patt, data).group(1) | ||||
|                     return json.loads(raw_json) | ||||
|                 except AttributeError: | ||||
|                     continue | ||||
|  | ||||
|         self._prepare_and_start_frag_download(ctx) | ||||
|  | ||||
|         success, raw_fragment = dl_fragment( | ||||
|             'https://www.youtube.com/watch?v={}'.format(video_id)) | ||||
|         if not success: | ||||
|             return False | ||||
|         data = parse_yt_initial_data(raw_fragment) | ||||
|         continuation_id = data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] | ||||
|         # no data yet but required to call _append_fragment | ||||
|         self._append_fragment(ctx, b'') | ||||
|  | ||||
|         first = True | ||||
|         offset = None | ||||
|         while continuation_id is not None: | ||||
|             data = None | ||||
|             if first: | ||||
|                 url = 'https://www.youtube.com/live_chat_replay?continuation={}'.format(continuation_id) | ||||
|                 success, raw_fragment = dl_fragment(url) | ||||
|                 if not success: | ||||
|                     return False | ||||
|                 data = parse_yt_initial_data(raw_fragment) | ||||
|             else: | ||||
|                 url = ('https://www.youtube.com/live_chat_replay/get_live_chat_replay' | ||||
|                        + '?continuation={}'.format(continuation_id) | ||||
|                        + '&playerOffsetMs={}'.format(offset - 5000) | ||||
|                        + '&hidden=false' | ||||
|                        + '&pbj=1') | ||||
|                 success, raw_fragment = dl_fragment(url) | ||||
|                 if not success: | ||||
|                     return False | ||||
|                 data = json.loads(raw_fragment)['response'] | ||||
|  | ||||
|             first = False | ||||
|             continuation_id = None | ||||
|  | ||||
|             live_chat_continuation = data['continuationContents']['liveChatContinuation'] | ||||
|             offset = None | ||||
|             processed_fragment = bytearray() | ||||
|             if 'actions' in live_chat_continuation: | ||||
|                 for action in live_chat_continuation['actions']: | ||||
|                     if 'replayChatItemAction' in action: | ||||
|                         replay_chat_item_action = action['replayChatItemAction'] | ||||
|                         offset = int(replay_chat_item_action['videoOffsetTimeMsec']) | ||||
|                     processed_fragment.extend( | ||||
|                         json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') | ||||
|                 continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation'] | ||||
|  | ||||
|             self._append_fragment(ctx, processed_fragment) | ||||
|  | ||||
|             if test or offset is None: | ||||
|                 break | ||||
|  | ||||
|         self._finish_frag_download(ctx) | ||||
|  | ||||
|         return True | ||||
							
								
								
									
										46
									
								
								youtube_dl/extractor/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								youtube_dl/extractor/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,46 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| try: | ||||
|     from .lazy_extractors import * | ||||
|     from .lazy_extractors import _ALL_CLASSES | ||||
|     _LAZY_LOADER = True | ||||
| except ImportError: | ||||
|     _LAZY_LOADER = False | ||||
|     from .extractors import * | ||||
|  | ||||
|     _ALL_CLASSES = [ | ||||
|         klass | ||||
|         for name, klass in globals().items() | ||||
|         if name.endswith('IE') and name != 'GenericIE' | ||||
|     ] | ||||
|     _ALL_CLASSES.append(GenericIE) | ||||
|  | ||||
|  | ||||
| def gen_extractor_classes(): | ||||
|     """ Return a list of supported extractors. | ||||
|     The order does matter; the first extractor matched is the one handling the URL. | ||||
|     """ | ||||
|     return _ALL_CLASSES | ||||
|  | ||||
|  | ||||
| def gen_extractors(): | ||||
|     """ Return a list of an instance of every supported extractor. | ||||
|     The order does matter; the first extractor matched is the one handling the URL. | ||||
|     """ | ||||
|     return [klass() for klass in gen_extractor_classes()] | ||||
|  | ||||
|  | ||||
| def list_extractors(age_limit): | ||||
|     """ | ||||
|     Return a list of extractors that are suitable for the given age, | ||||
|     sorted by extractor ID. | ||||
|     """ | ||||
|  | ||||
|     return sorted( | ||||
|         filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()), | ||||
|         key=lambda ie: ie.IE_NAME.lower()) | ||||
|  | ||||
|  | ||||
| def get_info_extractor(ie_name): | ||||
|     """Returns the info extractor class with the given ie_name""" | ||||
|     return globals()[ie_name + 'IE'] | ||||
							
								
								
									
										193
									
								
								youtube_dl/extractor/abc.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										193
									
								
								youtube_dl/extractor/abc.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,193 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import hashlib | ||||
| import hmac | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     js_to_json, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     try_get, | ||||
|     unescapeHTML, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ABCIE(InfoExtractor): | ||||
|     IE_NAME = 'abc.net.au' | ||||
|     _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', | ||||
|         'md5': 'cb3dd03b18455a661071ee1e28344d9f', | ||||
|         'info_dict': { | ||||
|             'id': '5868334', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Australia to help staff Ebola treatment centre in Sierra Leone', | ||||
|             'description': 'md5:809ad29c67a05f54eb41f2a105693a67', | ||||
|         }, | ||||
|         'skip': 'this video has expired', | ||||
|     }, { | ||||
|         'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326', | ||||
|         'md5': 'db2a5369238b51f9811ad815b69dc086', | ||||
|         'info_dict': { | ||||
|             'id': 'NvqvPeNZsHU', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20150816', | ||||
|             'uploader': 'ABC News (Australia)', | ||||
|             'description': 'Government backbencher Warren Entsch introduces a cross-party sponsored bill to legalise same-sex marriage, saying the bill is designed to promote "an inclusive Australia, not a divided one.". Read more here: http://ab.co/1Mwc6ef', | ||||
|             'uploader_id': 'NewsOnABC', | ||||
|             'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill', | ||||
|         }, | ||||
|         'add_ie': ['Youtube'], | ||||
|         'skip': 'Not accessible from Travis CI server', | ||||
|     }, { | ||||
|         'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080', | ||||
|         'md5': 'b96eee7c9edf4fc5a358a0252881cc1f', | ||||
|         'info_dict': { | ||||
|             'id': '6880080', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'NAB lifts interest rates, following Westpac and CBA', | ||||
|             'description': 'md5:f13d8edc81e462fce4a0437c7dc04728', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.abc.net.au/news/2015-10-19/6866214', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         mobj = re.search( | ||||
|             r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);', | ||||
|             webpage) | ||||
|         if mobj is None: | ||||
|             expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None) | ||||
|             if expired: | ||||
|                 raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True) | ||||
|             raise ExtractorError('Unable to extract video urls') | ||||
|  | ||||
|         urls_info = self._parse_json( | ||||
|             mobj.group('json_data'), video_id, transform_source=js_to_json) | ||||
|  | ||||
|         if not isinstance(urls_info, list): | ||||
|             urls_info = [urls_info] | ||||
|  | ||||
|         if mobj.group('type') == 'YouTube': | ||||
|             return self.playlist_result([ | ||||
|                 self.url_result(url_info['url']) for url_info in urls_info]) | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': url_info['url'], | ||||
|             'vcodec': url_info.get('codec') if mobj.group('type') == 'Video' else 'none', | ||||
|             'width': int_or_none(url_info.get('width')), | ||||
|             'height': int_or_none(url_info.get('height')), | ||||
|             'tbr': int_or_none(url_info.get('bitrate')), | ||||
|             'filesize': int_or_none(url_info.get('filesize')), | ||||
|         } for url_info in urls_info] | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'formats': formats, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ABCIViewIE(InfoExtractor): | ||||
|     IE_NAME = 'abc.net.au:iview' | ||||
|     _VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)' | ||||
|     _GEO_COUNTRIES = ['AU'] | ||||
|  | ||||
|     # ABC iview programs are normally available for 14 days only. | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00', | ||||
|         'md5': '67715ce3c78426b11ba167d875ac6abf', | ||||
|         'info_dict': { | ||||
|             'id': 'LE1927H001S00', | ||||
|             'ext': 'mp4', | ||||
|             'title': "Series 11 Ep 1", | ||||
|             'series': "Gruen", | ||||
|             'description': 'md5:52cc744ad35045baf6aded2ce7287f67', | ||||
|             'upload_date': '20190925', | ||||
|             'uploader_id': 'abc1', | ||||
|             'timestamp': 1569445289, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         video_params = self._download_json( | ||||
|             'https://iview.abc.net.au/api/programs/' + video_id, video_id) | ||||
|         title = unescapeHTML(video_params.get('title') or video_params['seriesTitle']) | ||||
|         stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) | ||||
|  | ||||
|         house_number = video_params.get('episodeHouseNumber') or video_id | ||||
|         path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format( | ||||
|             int(time.time()), house_number) | ||||
|         sig = hmac.new( | ||||
|             b'android.content.res.Resources', | ||||
|             path.encode('utf-8'), hashlib.sha256).hexdigest() | ||||
|         token = self._download_webpage( | ||||
|             'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id) | ||||
|  | ||||
|         def tokenize_url(url, token): | ||||
|             return update_url_query(url, { | ||||
|                 'hdnea': token, | ||||
|             }) | ||||
|  | ||||
|         for sd in ('720', 'sd', 'sd-low'): | ||||
|             sd_url = try_get( | ||||
|                 stream, lambda x: x['streams']['hls'][sd], compat_str) | ||||
|             if not sd_url: | ||||
|                 continue | ||||
|             formats = self._extract_m3u8_formats( | ||||
|                 tokenize_url(sd_url, token), video_id, 'mp4', | ||||
|                 entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) | ||||
|             if formats: | ||||
|                 break | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         src_vtt = stream.get('captions', {}).get('src-vtt') | ||||
|         if src_vtt: | ||||
|             subtitles['en'] = [{ | ||||
|                 'url': src_vtt, | ||||
|                 'ext': 'vtt', | ||||
|             }] | ||||
|  | ||||
|         is_live = video_params.get('livestream') == '1' | ||||
|         if is_live: | ||||
|             title = self._live_title(title) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': video_params.get('description'), | ||||
|             'thumbnail': video_params.get('thumbnail'), | ||||
|             'duration': int_or_none(video_params.get('eventDuration')), | ||||
|             'timestamp': parse_iso8601(video_params.get('pubDate'), ' '), | ||||
|             'series': unescapeHTML(video_params.get('seriesTitle')), | ||||
|             'series_id': video_params.get('seriesHouseNumber') or video_id[:7], | ||||
|             'season_number': int_or_none(self._search_regex( | ||||
|                 r'\bSeries\s+(\d+)\b', title, 'season number', default=None)), | ||||
|             'episode_number': int_or_none(self._search_regex( | ||||
|                 r'\bEp\s+(\d+)\b', title, 'episode number', default=None)), | ||||
|             'episode_id': house_number, | ||||
|             'uploader_id': video_params.get('channel'), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'is_live': is_live, | ||||
|         } | ||||
							
								
								
									
										148
									
								
								youtube_dl/extractor/abcnews.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										148
									
								
								youtube_dl/extractor/abcnews.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,148 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import calendar | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from .amp import AMPIE | ||||
| from .common import InfoExtractor | ||||
| from .youtube import YoutubeIE | ||||
| from ..compat import compat_urlparse | ||||
|  | ||||
|  | ||||
| class AbcNewsVideoIE(AMPIE): | ||||
|     IE_NAME = 'abcnews:video' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?: | ||||
|                             abcnews\.go\.com/ | ||||
|                             (?: | ||||
|                                 [^/]+/video/(?P<display_id>[0-9a-z-]+)-| | ||||
|                                 video/embed\?.*?\bid= | ||||
|                             )| | ||||
|                             fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/ | ||||
|                         ) | ||||
|                         (?P<id>\d+) | ||||
|                     ''' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://abcnews.go.com/ThisWeek/video/week-exclusive-irans-foreign-minister-zarif-20411932', | ||||
|         'info_dict': { | ||||
|             'id': '20411932', | ||||
|             'ext': 'mp4', | ||||
|             'display_id': 'week-exclusive-irans-foreign-minister-zarif', | ||||
|             'title': '\'This Week\' Exclusive: Iran\'s Foreign Minister Zarif', | ||||
|             'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.', | ||||
|             'duration': 180, | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://abcnews.go.com/video/embed?id=46979033', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('display_id') | ||||
|         video_id = mobj.group('id') | ||||
|         info_dict = self._extract_feed_info( | ||||
|             'http://abcnews.go.com/video/itemfeed?id=%s' % video_id) | ||||
|         info_dict.update({ | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|         }) | ||||
|         return info_dict | ||||
|  | ||||
|  | ||||
| class AbcNewsIE(InfoExtractor): | ||||
|     IE_NAME = 'abcnews' | ||||
|     _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY', | ||||
|         'info_dict': { | ||||
|             'id': '10505354', | ||||
|             'ext': 'flv', | ||||
|             'display_id': 'dramatic-video-rare-death-job-america', | ||||
|             'title': 'Occupational Hazards', | ||||
|             'description': 'Nightline investigates the dangers that lurk at various jobs.', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'upload_date': '20100428', | ||||
|             'timestamp': 1272412800, | ||||
|         }, | ||||
|         'add_ie': ['AbcNewsVideo'], | ||||
|     }, { | ||||
|         'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818', | ||||
|         'info_dict': { | ||||
|             'id': '38897857', | ||||
|             'ext': 'mp4', | ||||
|             'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016', | ||||
|             'title': 'Justin Timberlake Drops Hints For Secret Single', | ||||
|             'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.', | ||||
|             'upload_date': '20160515', | ||||
|             'timestamp': 1463329500, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|             # The embedded YouTube video is blocked due to copyright issues | ||||
|             'playlist_items': '1', | ||||
|         }, | ||||
|         'add_ie': ['AbcNewsVideo'], | ||||
|     }, { | ||||
|         'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('display_id') | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_url = self._search_regex( | ||||
|             r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL') | ||||
|         full_video_url = compat_urlparse.urljoin(url, video_url) | ||||
|  | ||||
|         youtube_url = YoutubeIE._extract_url(webpage) | ||||
|  | ||||
|         timestamp = None | ||||
|         date_str = self._html_search_regex( | ||||
|             r'<span[^>]+class="timestamp">([^<]+)</span>', | ||||
|             webpage, 'timestamp', fatal=False) | ||||
|         if date_str: | ||||
|             tz_offset = 0 | ||||
|             if date_str.endswith(' ET'):  # Eastern Time | ||||
|                 tz_offset = -5 | ||||
|                 date_str = date_str[:-3] | ||||
|             date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p'] | ||||
|             for date_format in date_formats: | ||||
|                 try: | ||||
|                     timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format)) | ||||
|                 except ValueError: | ||||
|                     continue | ||||
|             if timestamp is not None: | ||||
|                 timestamp -= tz_offset * 3600 | ||||
|  | ||||
|         entry = { | ||||
|             '_type': 'url_transparent', | ||||
|             'ie_key': AbcNewsVideoIE.ie_key(), | ||||
|             'url': full_video_url, | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'timestamp': timestamp, | ||||
|         } | ||||
|  | ||||
|         if youtube_url: | ||||
|             entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())] | ||||
|             return self.playlist_result(entries) | ||||
|  | ||||
|         return entry | ||||
							
								
								
									
										137
									
								
								youtube_dl/extractor/abcotvs.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										137
									
								
								youtube_dl/extractor/abcotvs.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,137 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     dict_get, | ||||
|     int_or_none, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ABCOTVSIE(InfoExtractor): | ||||
|     IE_NAME = 'abcotvs' | ||||
|     IE_DESC = 'ABC Owned Television Stations' | ||||
|     _VALID_URL = r'https?://(?P<site>abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:(?:/[^/]+)*/(?P<display_id>[^/]+))?/(?P<id>\d+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/', | ||||
|             'info_dict': { | ||||
|                 'id': '472548', | ||||
|                 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'East Bay museum celebrates synthesized music', | ||||
|                 'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3', | ||||
|                 'thumbnail': r're:^https?://.*\.jpg$', | ||||
|                 'timestamp': 1421118520, | ||||
|                 'upload_date': '20150113', | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://abc7news.com/472581', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://6abc.com/man-75-killed-after-being-struck-by-vehicle-in-chester/5725182/', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|     ] | ||||
|     _SITE_MAP = { | ||||
|         '6abc': 'wpvi', | ||||
|         'abc11': 'wtvd', | ||||
|         'abc13': 'ktrk', | ||||
|         'abc30': 'kfsn', | ||||
|         'abc7': 'kabc', | ||||
|         'abc7chicago': 'wls', | ||||
|         'abc7news': 'kgo', | ||||
|         'abc7ny': 'wabc', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         site, display_id, video_id = re.match(self._VALID_URL, url).groups() | ||||
|         display_id = display_id or video_id | ||||
|         station = self._SITE_MAP[site] | ||||
|  | ||||
|         data = self._download_json( | ||||
|             'https://api.abcotvs.com/v2/content', display_id, query={ | ||||
|                 'id': video_id, | ||||
|                 'key': 'otv.web.%s.story' % station, | ||||
|                 'station': station, | ||||
|             })['data'] | ||||
|         video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data | ||||
|         video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id)) | ||||
|         title = video.get('title') or video['linkText'] | ||||
|  | ||||
|         formats = [] | ||||
|         m3u8_url = video.get('m3u8') | ||||
|         if m3u8_url: | ||||
|             formats = self._extract_m3u8_formats( | ||||
|                 video['m3u8'].split('?')[0], display_id, 'mp4', m3u8_id='hls', fatal=False) | ||||
|         mp4_url = video.get('mp4') | ||||
|         if mp4_url: | ||||
|             formats.append({ | ||||
|                 'abr': 128, | ||||
|                 'format_id': 'https', | ||||
|                 'height': 360, | ||||
|                 'url': mp4_url, | ||||
|                 'width': 640, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         image = video.get('image') or {} | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': dict_get(video, ('description', 'caption'), try_get(video, lambda x: x['meta']['description'])), | ||||
|             'thumbnail': dict_get(image, ('source', 'dynamicSource')), | ||||
|             'timestamp': int_or_none(video.get('date')), | ||||
|             'duration': int_or_none(video.get('length')), | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ABCOTVSClipsIE(InfoExtractor): | ||||
|     IE_NAME = 'abcotvs:clips' | ||||
|     _VALID_URL = r'https?://clips\.abcotvs\.com/(?:[^/]+/)*video/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://clips.abcotvs.com/kabc/video/214814', | ||||
|         'info_dict': { | ||||
|             'id': '214814', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'SpaceX launch pad explosion destroys rocket, satellite', | ||||
|             'description': 'md5:9f186e5ad8f490f65409965ee9c7be1b', | ||||
|             'upload_date': '20160901', | ||||
|             'timestamp': 1472756695, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         video_data = self._download_json('https://clips.abcotvs.com/vogo/video/getByIds?ids=' + video_id, video_id)['results'][0] | ||||
|         title = video_data['title'] | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             video_data['videoURL'].split('?')[0], video_id, 'mp4') | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': video_data.get('description'), | ||||
|             'thumbnail': video_data.get('thumbnailURL'), | ||||
|             'duration': int_or_none(video_data.get('duration')), | ||||
|             'timestamp': int_or_none(video_data.get('pubDate')), | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										41
									
								
								youtube_dl/extractor/academicearth.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								youtube_dl/extractor/academicearth.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,41 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class AcademicEarthCourseIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P<id>[^?#/]+)' | ||||
|     IE_NAME = 'AcademicEarth:Course' | ||||
|     _TEST = { | ||||
|         'url': 'http://academicearth.org/playlists/laws-of-nature/', | ||||
|         'info_dict': { | ||||
|             'id': 'laws-of-nature', | ||||
|             'title': 'Laws of Nature', | ||||
|             'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.', | ||||
|         }, | ||||
|         'playlist_count': 3, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|         title = self._html_search_regex( | ||||
|             r'<h1 class="playlist-name"[^>]*?>(.*?)</h1>', webpage, 'title') | ||||
|         description = self._html_search_regex( | ||||
|             r'<p class="excerpt"[^>]*?>(.*?)</p>', | ||||
|             webpage, 'description', fatal=False) | ||||
|         urls = re.findall( | ||||
|             r'<li class="lecture-preview">\s*?<a target="_blank" href="([^"]+)">', | ||||
|             webpage) | ||||
|         entries = [self.url_result(u) for u in urls] | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': playlist_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'entries': entries, | ||||
|         } | ||||
							
								
								
									
										135
									
								
								youtube_dl/extractor/acast.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										135
									
								
								youtube_dl/extractor/acast.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,135 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import functools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     try_get, | ||||
|     unified_timestamp, | ||||
|     OnDemandPagedList, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ACastIE(InfoExtractor): | ||||
|     IE_NAME = 'acast' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?: | ||||
|                             (?:(?:embed|www)\.)?acast\.com/| | ||||
|                             play\.acast\.com/s/ | ||||
|                         ) | ||||
|                         (?P<channel>[^/]+)/(?P<id>[^/#?]+) | ||||
|                     ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', | ||||
|         'md5': '16d936099ec5ca2d5869e3a813ee8dc4', | ||||
|         'info_dict': { | ||||
|             'id': '2a92b283-1a75-4ad8-8396-499c641de0d9', | ||||
|             'ext': 'mp3', | ||||
|             'title': '2. Raggarmordet - Röster ur det förflutna', | ||||
|             'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4', | ||||
|             'timestamp': 1477346700, | ||||
|             'upload_date': '20161024', | ||||
|             'duration': 2766.602563, | ||||
|             'creator': 'Anton Berg & Martin Johnson', | ||||
|             'series': 'Spår', | ||||
|             'episode': '2. Raggarmordet - Röster ur det förflutna', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         channel, display_id = re.match(self._VALID_URL, url).groups() | ||||
|         s = self._download_json( | ||||
|             'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id), | ||||
|             display_id) | ||||
|         media_url = s['url'] | ||||
|         if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id): | ||||
|             episode_url = s.get('episodeUrl') | ||||
|             if episode_url: | ||||
|                 display_id = episode_url | ||||
|             else: | ||||
|                 channel, display_id = re.match(self._VALID_URL, s['link']).groups() | ||||
|         cast_data = self._download_json( | ||||
|             'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), | ||||
|             display_id)['result'] | ||||
|         e = cast_data['episode'] | ||||
|         title = e.get('name') or s['title'] | ||||
|         return { | ||||
|             'id': compat_str(e['id']), | ||||
|             'display_id': display_id, | ||||
|             'url': media_url, | ||||
|             'title': title, | ||||
|             'description': e.get('summary') or clean_html(e.get('description') or s.get('description')), | ||||
|             'thumbnail': e.get('image'), | ||||
|             'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')), | ||||
|             'duration': float_or_none(e.get('duration') or s.get('duration')), | ||||
|             'filesize': int_or_none(e.get('contentLength')), | ||||
|             'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str), | ||||
|             'series': try_get(cast_data, lambda x: x['show']['name'], compat_str), | ||||
|             'season_number': int_or_none(e.get('seasonNumber')), | ||||
|             'episode': title, | ||||
|             'episode_number': int_or_none(e.get('episodeNumber')), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ACastChannelIE(InfoExtractor): | ||||
|     IE_NAME = 'acast:channel' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?: | ||||
|                             (?:www\.)?acast\.com/| | ||||
|                             play\.acast\.com/s/ | ||||
|                         ) | ||||
|                         (?P<id>[^/#?]+) | ||||
|                     ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.acast.com/todayinfocus', | ||||
|         'info_dict': { | ||||
|             'id': '4efc5294-5385-4847-98bd-519799ce5786', | ||||
|             'title': 'Today in Focus', | ||||
|             'description': 'md5:9ba5564de5ce897faeb12963f4537a64', | ||||
|         }, | ||||
|         'playlist_mincount': 35, | ||||
|     }, { | ||||
|         'url': 'http://play.acast.com/s/ft-banking-weekly', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _API_BASE_URL = 'https://play.acast.com/api/' | ||||
|     _PAGE_SIZE = 10 | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url) | ||||
|  | ||||
|     def _fetch_page(self, channel_slug, page): | ||||
|         casts = self._download_json( | ||||
|             self._API_BASE_URL + 'channels/%s/acasts?page=%s' % (channel_slug, page), | ||||
|             channel_slug, note='Download page %d of channel data' % page) | ||||
|         for cast in casts: | ||||
|             yield self.url_result( | ||||
|                 'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']), | ||||
|                 'ACast', cast['id']) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         channel_slug = self._match_id(url) | ||||
|         channel_data = self._download_json( | ||||
|             self._API_BASE_URL + 'channels/%s' % channel_slug, channel_slug) | ||||
|         entries = OnDemandPagedList(functools.partial( | ||||
|             self._fetch_page, channel_slug), self._PAGE_SIZE) | ||||
|         return self.playlist_result(entries, compat_str( | ||||
|             channel_data['id']), channel_data['name'], channel_data.get('description')) | ||||
							
								
								
									
										207
									
								
								youtube_dl/extractor/adn.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										207
									
								
								youtube_dl/extractor/adn.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,207 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import base64 | ||||
| import binascii | ||||
| import json | ||||
| import os | ||||
| import random | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..aes import aes_cbc_decrypt | ||||
| from ..compat import ( | ||||
|     compat_b64decode, | ||||
|     compat_ord, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     bytes_to_intlist, | ||||
|     bytes_to_long, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     intlist_to_bytes, | ||||
|     long_to_bytes, | ||||
|     pkcs1pad, | ||||
|     strip_or_none, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ADNIE(InfoExtractor): | ||||
|     IE_DESC = 'Anime Digital Network' | ||||
|     _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', | ||||
|         'md5': 'e497370d847fd79d9d4c74be55575c7a', | ||||
|         'info_dict': { | ||||
|             'id': '7778', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Blue Exorcist - Kyôto Saga - Épisode 1', | ||||
|             'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5', | ||||
|         } | ||||
|     } | ||||
|     _BASE_URL = 'http://animedigitalnetwork.fr' | ||||
|     _RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537) | ||||
|     _POS_ALIGN_MAP = { | ||||
|         'start': 1, | ||||
|         'end': 3, | ||||
|     } | ||||
|     _LINE_ALIGN_MAP = { | ||||
|         'middle': 8, | ||||
|         'end': 4, | ||||
|     } | ||||
|  | ||||
|     @staticmethod | ||||
|     def _ass_subtitles_timecode(seconds): | ||||
|         return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100) | ||||
|  | ||||
|     def _get_subtitles(self, sub_path, video_id): | ||||
|         if not sub_path: | ||||
|             return None | ||||
|  | ||||
|         enc_subtitles = self._download_webpage( | ||||
|             urljoin(self._BASE_URL, sub_path), | ||||
|             video_id, 'Downloading subtitles location', fatal=False) or '{}' | ||||
|         subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location') | ||||
|         if subtitle_location: | ||||
|             enc_subtitles = self._download_webpage( | ||||
|                 urljoin(self._BASE_URL, subtitle_location), | ||||
|                 video_id, 'Downloading subtitles data', fatal=False, | ||||
|                 headers={'Origin': 'https://animedigitalnetwork.fr'}) | ||||
|         if not enc_subtitles: | ||||
|             return None | ||||
|  | ||||
|         # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js | ||||
|         dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( | ||||
|             bytes_to_intlist(compat_b64decode(enc_subtitles[24:])), | ||||
|             bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')), | ||||
|             bytes_to_intlist(compat_b64decode(enc_subtitles[:24])) | ||||
|         )) | ||||
|         subtitles_json = self._parse_json( | ||||
|             dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(), | ||||
|             None, fatal=False) | ||||
|         if not subtitles_json: | ||||
|             return None | ||||
|  | ||||
|         subtitles = {} | ||||
|         for sub_lang, sub in subtitles_json.items(): | ||||
|             ssa = '''[Script Info] | ||||
| ScriptType:V4.00 | ||||
| [V4 Styles] | ||||
| Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,TertiaryColour,BackColour,Bold,Italic,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,AlphaLevel,Encoding | ||||
| Style: Default,Arial,18,16777215,16777215,16777215,0,-1,0,1,1,0,2,20,20,20,0,0 | ||||
| [Events] | ||||
| Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' | ||||
|             for current in sub: | ||||
|                 start, end, text, line_align, position_align = ( | ||||
|                     float_or_none(current.get('startTime')), | ||||
|                     float_or_none(current.get('endTime')), | ||||
|                     current.get('text'), current.get('lineAlign'), | ||||
|                     current.get('positionAlign')) | ||||
|                 if start is None or end is None or text is None: | ||||
|                     continue | ||||
|                 alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0) | ||||
|                 ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % ( | ||||
|                     self._ass_subtitles_timecode(start), | ||||
|                     self._ass_subtitles_timecode(end), | ||||
|                     '{\\a%d}' % alignment if alignment != 2 else '', | ||||
|                     text.replace('\n', '\\N').replace('<i>', '{\\i1}').replace('</i>', '{\\i0}')) | ||||
|  | ||||
|             if sub_lang == 'vostf': | ||||
|                 sub_lang = 'fr' | ||||
|             subtitles.setdefault(sub_lang, []).extend([{ | ||||
|                 'ext': 'json', | ||||
|                 'data': json.dumps(sub), | ||||
|             }, { | ||||
|                 'ext': 'ssa', | ||||
|                 'data': ssa, | ||||
|             }]) | ||||
|         return subtitles | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         player_config = self._parse_json(self._search_regex( | ||||
|             r'playerConfig\s*=\s*({.+});', webpage, | ||||
|             'player config', default='{}'), video_id, fatal=False) | ||||
|         if not player_config: | ||||
|             config_url = urljoin(self._BASE_URL, self._search_regex( | ||||
|                 r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"', | ||||
|                 webpage, 'config url')) | ||||
|             player_config = self._download_json( | ||||
|                 config_url, video_id, | ||||
|                 'Downloading player config JSON metadata')['player'] | ||||
|  | ||||
|         video_info = {} | ||||
|         video_info_str = self._search_regex( | ||||
|             r'videoInfo\s*=\s*({.+});', webpage, | ||||
|             'video info', fatal=False) | ||||
|         if video_info_str: | ||||
|             video_info = self._parse_json( | ||||
|                 video_info_str, video_id, fatal=False) or {} | ||||
|  | ||||
|         options = player_config.get('options') or {} | ||||
|         metas = options.get('metas') or {} | ||||
|         links = player_config.get('links') or {} | ||||
|         sub_path = player_config.get('subtitles') | ||||
|         error = None | ||||
|         if not links: | ||||
|             links_url = player_config.get('linksurl') or options['videoUrl'] | ||||
|             token = options['token'] | ||||
|             self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)]) | ||||
|             message = bytes_to_intlist(json.dumps({ | ||||
|                 'k': self._K, | ||||
|                 'e': 60, | ||||
|                 't': token, | ||||
|             })) | ||||
|             padded_message = intlist_to_bytes(pkcs1pad(message, 128)) | ||||
|             n, e = self._RSA_KEY | ||||
|             encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n)) | ||||
|             authorization = base64.b64encode(encrypted_message).decode() | ||||
|             links_data = self._download_json( | ||||
|                 urljoin(self._BASE_URL, links_url), video_id, | ||||
|                 'Downloading links JSON metadata', headers={ | ||||
|                     'Authorization': 'Bearer ' + authorization, | ||||
|                 }) | ||||
|             links = links_data.get('links') or {} | ||||
|             metas = metas or links_data.get('meta') or {} | ||||
|             sub_path = sub_path or links_data.get('subtitles') or \ | ||||
|                 'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id | ||||
|             sub_path += '&token=' + token | ||||
|             error = links_data.get('error') | ||||
|         title = metas.get('title') or video_info['title'] | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id, qualities in links.items(): | ||||
|             if not isinstance(qualities, dict): | ||||
|                 continue | ||||
|             for quality, load_balancer_url in qualities.items(): | ||||
|                 load_balancer_data = self._download_json( | ||||
|                     load_balancer_url, video_id, | ||||
|                     'Downloading %s %s JSON metadata' % (format_id, quality), | ||||
|                     fatal=False) or {} | ||||
|                 m3u8_url = load_balancer_data.get('location') | ||||
|                 if not m3u8_url: | ||||
|                     continue | ||||
|                 m3u8_formats = self._extract_m3u8_formats( | ||||
|                     m3u8_url, video_id, 'mp4', 'm3u8_native', | ||||
|                     m3u8_id=format_id, fatal=False) | ||||
|                 if format_id == 'vf': | ||||
|                     for f in m3u8_formats: | ||||
|                         f['language'] = 'fr' | ||||
|                 formats.extend(m3u8_formats) | ||||
|         if not error: | ||||
|             error = options.get('error') | ||||
|         if not formats and error: | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': strip_or_none(metas.get('summary') or video_info.get('resume')), | ||||
|             'thumbnail': video_info.get('image'), | ||||
|             'formats': formats, | ||||
|             'subtitles': self.extract_subtitles(sub_path, video_id), | ||||
|             'episode': metas.get('subtitle') or video_info.get('videoTitle'), | ||||
|             'series': video_info.get('playlistTitle'), | ||||
|         } | ||||
							
								
								
									
										37
									
								
								youtube_dl/extractor/adobeconnect.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								youtube_dl/extractor/adobeconnect.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_urlparse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AdobeConnectIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://\w+\.adobeconnect\.com/(?P<id>[\w-]+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title') | ||||
|         qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1]) | ||||
|         is_live = qs.get('isLive', ['false'])[0] == 'true' | ||||
|         formats = [] | ||||
|         for con_string in qs['conStrings'][0].split(','): | ||||
|             formats.append({ | ||||
|                 'format_id': con_string.split('://')[0], | ||||
|                 'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]), | ||||
|                 'ext': 'flv', | ||||
|                 'play_path': 'mp4:' + qs['streamName'][0], | ||||
|                 'rtmp_conn': 'S:' + qs['ticket'][0], | ||||
|                 'rtmp_live': is_live, | ||||
|                 'url': con_string, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': self._live_title(title) if is_live else title, | ||||
|             'formats': formats, | ||||
|             'is_live': is_live, | ||||
|         } | ||||
							
								
								
									
										1572
									
								
								youtube_dl/extractor/adobepass.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1572
									
								
								youtube_dl/extractor/adobepass.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										288
									
								
								youtube_dl/extractor/adobetv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										288
									
								
								youtube_dl/extractor/adobetv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,288 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import functools | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     ISO639Utils, | ||||
|     OnDemandPagedList, | ||||
|     parse_duration, | ||||
|     str_or_none, | ||||
|     str_to_int, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AdobeTVBaseIE(InfoExtractor): | ||||
|     def _call_api(self, path, video_id, query, note=None): | ||||
|         return self._download_json( | ||||
|             'http://tv.adobe.com/api/v4/' + path, | ||||
|             video_id, note, query=query)['data'] | ||||
|  | ||||
|     def _parse_subtitles(self, video_data, url_key): | ||||
|         subtitles = {} | ||||
|         for translation in video_data.get('translations', []): | ||||
|             vtt_path = translation.get(url_key) | ||||
|             if not vtt_path: | ||||
|                 continue | ||||
|             lang = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium']) | ||||
|             subtitles.setdefault(lang, []).append({ | ||||
|                 'ext': 'vtt', | ||||
|                 'url': vtt_path, | ||||
|             }) | ||||
|         return subtitles | ||||
|  | ||||
|     def _parse_video_data(self, video_data): | ||||
|         video_id = compat_str(video_data['id']) | ||||
|         title = video_data['title'] | ||||
|  | ||||
|         s3_extracted = False | ||||
|         formats = [] | ||||
|         for source in video_data.get('videos', []): | ||||
|             source_url = source.get('url') | ||||
|             if not source_url: | ||||
|                 continue | ||||
|             f = { | ||||
|                 'format_id': source.get('quality_level'), | ||||
|                 'fps': int_or_none(source.get('frame_rate')), | ||||
|                 'height': int_or_none(source.get('height')), | ||||
|                 'tbr': int_or_none(source.get('video_data_rate')), | ||||
|                 'width': int_or_none(source.get('width')), | ||||
|                 'url': source_url, | ||||
|             } | ||||
|             original_filename = source.get('original_filename') | ||||
|             if original_filename: | ||||
|                 if not (f.get('height') and f.get('width')): | ||||
|                     mobj = re.search(r'_(\d+)x(\d+)', original_filename) | ||||
|                     if mobj: | ||||
|                         f.update({ | ||||
|                             'height': int(mobj.group(2)), | ||||
|                             'width': int(mobj.group(1)), | ||||
|                         }) | ||||
|                 if original_filename.startswith('s3://') and not s3_extracted: | ||||
|                     formats.append({ | ||||
|                         'format_id': 'original', | ||||
|                         'preference': 1, | ||||
|                         'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'), | ||||
|                     }) | ||||
|                     s3_extracted = True | ||||
|             formats.append(f) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': video_data.get('description'), | ||||
|             'thumbnail': video_data.get('thumbnail'), | ||||
|             'upload_date': unified_strdate(video_data.get('start_date')), | ||||
|             'duration': parse_duration(video_data.get('duration')), | ||||
|             'view_count': str_to_int(video_data.get('playcount')), | ||||
|             'formats': formats, | ||||
|             'subtitles': self._parse_subtitles(video_data, 'vtt'), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class AdobeTVEmbedIE(AdobeTVBaseIE): | ||||
|     IE_NAME = 'adobetv:embed' | ||||
|     _VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://tv.adobe.com/embed/22/4153', | ||||
|         'md5': 'c8c0461bf04d54574fc2b4d07ac6783a', | ||||
|         'info_dict': { | ||||
|             'id': '4153', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Creating Graphics Optimized for BlackBerry', | ||||
|             'description': 'md5:eac6e8dced38bdaae51cd94447927459', | ||||
|             'thumbnail': r're:https?://.*\.jpg$', | ||||
|             'upload_date': '20091109', | ||||
|             'duration': 377, | ||||
|             'view_count': int, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         video_data = self._call_api( | ||||
|             'episode/' + video_id, video_id, {'disclosure': 'standard'})[0] | ||||
|         return self._parse_video_data(video_data) | ||||
|  | ||||
|  | ||||
| class AdobeTVIE(AdobeTVBaseIE): | ||||
|     IE_NAME = 'adobetv' | ||||
|     _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/', | ||||
|         'md5': '9bc5727bcdd55251f35ad311ca74fa1e', | ||||
|         'info_dict': { | ||||
|             'id': '10981', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop', | ||||
|             'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311', | ||||
|             'thumbnail': r're:https?://.*\.jpg$', | ||||
|             'upload_date': '20110914', | ||||
|             'duration': 60, | ||||
|             'view_count': int, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         language, show_urlname, urlname = re.match(self._VALID_URL, url).groups() | ||||
|         if not language: | ||||
|             language = 'en' | ||||
|  | ||||
|         video_data = self._call_api( | ||||
|             'episode/get', urlname, { | ||||
|                 'disclosure': 'standard', | ||||
|                 'language': language, | ||||
|                 'show_urlname': show_urlname, | ||||
|                 'urlname': urlname, | ||||
|             })[0] | ||||
|         return self._parse_video_data(video_data) | ||||
|  | ||||
|  | ||||
| class AdobeTVPlaylistBaseIE(AdobeTVBaseIE): | ||||
|     _PAGE_SIZE = 25 | ||||
|  | ||||
|     def _fetch_page(self, display_id, query, page): | ||||
|         page += 1 | ||||
|         query['page'] = page | ||||
|         for element_data in self._call_api( | ||||
|                 self._RESOURCE, display_id, query, 'Download Page %d' % page): | ||||
|             yield self._process_data(element_data) | ||||
|  | ||||
|     def _extract_playlist_entries(self, display_id, query): | ||||
|         return OnDemandPagedList(functools.partial( | ||||
|             self._fetch_page, display_id, query), self._PAGE_SIZE) | ||||
|  | ||||
|  | ||||
| class AdobeTVShowIE(AdobeTVPlaylistBaseIE): | ||||
|     IE_NAME = 'adobetv:show' | ||||
|     _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost', | ||||
|         'info_dict': { | ||||
|             'id': '36', | ||||
|             'title': 'The Complete Picture with Julieanne Kost', | ||||
|             'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27', | ||||
|         }, | ||||
|         'playlist_mincount': 136, | ||||
|     } | ||||
|     _RESOURCE = 'episode' | ||||
|     _process_data = AdobeTVBaseIE._parse_video_data | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         language, show_urlname = re.match(self._VALID_URL, url).groups() | ||||
|         if not language: | ||||
|             language = 'en' | ||||
|         query = { | ||||
|             'disclosure': 'standard', | ||||
|             'language': language, | ||||
|             'show_urlname': show_urlname, | ||||
|         } | ||||
|  | ||||
|         show_data = self._call_api( | ||||
|             'show/get', show_urlname, query)[0] | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             self._extract_playlist_entries(show_urlname, query), | ||||
|             str_or_none(show_data.get('id')), | ||||
|             show_data.get('show_name'), | ||||
|             show_data.get('show_description')) | ||||
|  | ||||
|  | ||||
| class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): | ||||
|     IE_NAME = 'adobetv:channel' | ||||
|     _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://tv.adobe.com/channel/development', | ||||
|         'info_dict': { | ||||
|             'id': 'development', | ||||
|         }, | ||||
|         'playlist_mincount': 96, | ||||
|     } | ||||
|     _RESOURCE = 'show' | ||||
|  | ||||
|     def _process_data(self, show_data): | ||||
|         return self.url_result( | ||||
|             show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id'))) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups() | ||||
|         if not language: | ||||
|             language = 'en' | ||||
|         query = { | ||||
|             'channel_urlname': channel_urlname, | ||||
|             'language': language, | ||||
|         } | ||||
|         if category_urlname: | ||||
|             query['category_urlname'] = category_urlname | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             self._extract_playlist_entries(channel_urlname, query), | ||||
|             channel_urlname) | ||||
|  | ||||
|  | ||||
| class AdobeTVVideoIE(AdobeTVBaseIE): | ||||
|     IE_NAME = 'adobetv:video' | ||||
|     _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners | ||||
|         'url': 'https://video.tv.adobe.com/v/2456/', | ||||
|         'md5': '43662b577c018ad707a63766462b1e87', | ||||
|         'info_dict': { | ||||
|             'id': '2456', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'New experience with Acrobat DC', | ||||
|             'description': 'New experience with Acrobat DC', | ||||
|             'duration': 248.667, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_data = self._parse_json(self._search_regex( | ||||
|             r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id) | ||||
|         title = video_data['title'] | ||||
|  | ||||
|         formats = [] | ||||
|         sources = video_data.get('sources') or [] | ||||
|         for source in sources: | ||||
|             source_src = source.get('src') | ||||
|             if not source_src: | ||||
|                 continue | ||||
|             formats.append({ | ||||
|                 'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000), | ||||
|                 'format_id': '-'.join(filter(None, [source.get('format'), source.get('label')])), | ||||
|                 'height': int_or_none(source.get('height') or None), | ||||
|                 'tbr': int_or_none(source.get('bitrate') or None), | ||||
|                 'width': int_or_none(source.get('width') or None), | ||||
|                 'url': source_src, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         # For both metadata and downloaded files the duration varies among | ||||
|         # formats. I just pick the max one | ||||
|         duration = max(filter(None, [ | ||||
|             float_or_none(source.get('duration'), scale=1000) | ||||
|             for source in sources])) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'title': title, | ||||
|             'description': video_data.get('description'), | ||||
|             'thumbnail': video_data.get('video', {}).get('poster'), | ||||
|             'duration': duration, | ||||
|             'subtitles': self._parse_subtitles(video_data, 'vttPath'), | ||||
|         } | ||||
							
								
								
									
										202
									
								
								youtube_dl/extractor/adultswim.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										202
									
								
								youtube_dl/extractor/adultswim.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,202 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .turner import TurnerBaseIE | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     mimetype2ext, | ||||
|     parse_age_limit, | ||||
|     parse_iso8601, | ||||
|     strip_or_none, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AdultSwimIE(TurnerBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<show_path>[^/?#]+)(?:/(?P<episode_path>[^/?#]+))?' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://adultswim.com/videos/rick-and-morty/pilot', | ||||
|         'info_dict': { | ||||
|             'id': 'rQxZvXQ4ROaSOqq-or2Mow', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Rick and Morty - Pilot', | ||||
|             'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.', | ||||
|             'timestamp': 1543294800, | ||||
|             'upload_date': '20181127', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'expected_warnings': ['Unable to download f4m manifest'], | ||||
|     }, { | ||||
|         'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/', | ||||
|         'info_dict': { | ||||
|             'id': 'sY3cMUR_TbuE4YmdjzbIcQ', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine', | ||||
|             'description': 'Dr. Brule reports live from Wine Country with a special report on wines.  \nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.', | ||||
|             'upload_date': '20080124', | ||||
|             'timestamp': 1201150800, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': '404 Not Found', | ||||
|     }, { | ||||
|         'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/', | ||||
|         'info_dict': { | ||||
|             'id': 'I0LQFQkaSUaFp8PnAWHhoQ', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Decker - Inside Decker: A New Hero', | ||||
|             'description': 'The guys recap the conclusion of the season. They announce a new hero, take a peek into the Victorville Film Archive and welcome back the talented James Dean.', | ||||
|             'timestamp': 1469480460, | ||||
|             'upload_date': '20160725', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'expected_warnings': ['Unable to download f4m manifest'], | ||||
|     }, { | ||||
|         'url': 'http://www.adultswim.com/videos/attack-on-titan', | ||||
|         'info_dict': { | ||||
|             'id': 'attack-on-titan', | ||||
|             'title': 'Attack on Titan', | ||||
|             'description': 'md5:41caa9416906d90711e31dc00cb7db7e', | ||||
|         }, | ||||
|         'playlist_mincount': 12, | ||||
|     }, { | ||||
|         'url': 'http://www.adultswim.com/videos/streams/williams-stream', | ||||
|         'info_dict': { | ||||
|             'id': 'd8DEBj7QRfetLsRgFnGEyg', | ||||
|             'ext': 'mp4', | ||||
|             'title': r're:^Williams Stream \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', | ||||
|             'description': 'original programming', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': '404 Not Found', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         show_path, episode_path = re.match(self._VALID_URL, url).groups() | ||||
|         display_id = episode_path or show_path | ||||
|         query = '''query { | ||||
|   getShowBySlug(slug:"%s") { | ||||
|     %%s | ||||
|   } | ||||
| }''' % show_path | ||||
|         if episode_path: | ||||
|             query = query % '''title | ||||
|     getVideoBySlug(slug:"%s") { | ||||
|       _id | ||||
|       auth | ||||
|       description | ||||
|       duration | ||||
|       episodeNumber | ||||
|       launchDate | ||||
|       mediaID | ||||
|       seasonNumber | ||||
|       poster | ||||
|       title | ||||
|       tvRating | ||||
|     }''' % episode_path | ||||
|             ['getVideoBySlug'] | ||||
|         else: | ||||
|             query = query % '''metaDescription | ||||
|     title | ||||
|     videos(first:1000,sort:["episode_number"]) { | ||||
|       edges { | ||||
|         node { | ||||
|            _id | ||||
|            slug | ||||
|         } | ||||
|       } | ||||
|     }''' | ||||
|         show_data = self._download_json( | ||||
|             'https://www.adultswim.com/api/search', display_id, | ||||
|             data=json.dumps({'query': query}).encode(), | ||||
|             headers={'Content-Type': 'application/json'})['data']['getShowBySlug'] | ||||
|         if episode_path: | ||||
|             video_data = show_data['getVideoBySlug'] | ||||
|             video_id = video_data['_id'] | ||||
|             episode_title = title = video_data['title'] | ||||
|             series = show_data.get('title') | ||||
|             if series: | ||||
|                 title = '%s - %s' % (series, title) | ||||
|             info = { | ||||
|                 'id': video_id, | ||||
|                 'title': title, | ||||
|                 'description': strip_or_none(video_data.get('description')), | ||||
|                 'duration': float_or_none(video_data.get('duration')), | ||||
|                 'formats': [], | ||||
|                 'subtitles': {}, | ||||
|                 'age_limit': parse_age_limit(video_data.get('tvRating')), | ||||
|                 'thumbnail': video_data.get('poster'), | ||||
|                 'timestamp': parse_iso8601(video_data.get('launchDate')), | ||||
|                 'series': series, | ||||
|                 'season_number': int_or_none(video_data.get('seasonNumber')), | ||||
|                 'episode': episode_title, | ||||
|                 'episode_number': int_or_none(video_data.get('episodeNumber')), | ||||
|             } | ||||
|  | ||||
|             auth = video_data.get('auth') | ||||
|             media_id = video_data.get('mediaID') | ||||
|             if media_id: | ||||
|                 info.update(self._extract_ngtv_info(media_id, { | ||||
|                     # CDN_TOKEN_APP_ID from: | ||||
|                     # https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js | ||||
|                     'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE', | ||||
|                 }, { | ||||
|                     'url': url, | ||||
|                     'site_name': 'AdultSwim', | ||||
|                     'auth_required': auth, | ||||
|                 })) | ||||
|  | ||||
|             if not auth: | ||||
|                 extract_data = self._download_json( | ||||
|                     'https://www.adultswim.com/api/shows/v1/videos/' + video_id, | ||||
|                     video_id, query={'fields': 'stream'}, fatal=False) or {} | ||||
|                 assets = try_get(extract_data, lambda x: x['data']['video']['stream']['assets'], list) or [] | ||||
|                 for asset in assets: | ||||
|                     asset_url = asset.get('url') | ||||
|                     if not asset_url: | ||||
|                         continue | ||||
|                     ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type'))) | ||||
|                     if ext == 'm3u8': | ||||
|                         info['formats'].extend(self._extract_m3u8_formats( | ||||
|                             asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) | ||||
|                     elif ext == 'f4m': | ||||
|                         continue | ||||
|                         # info['formats'].extend(self._extract_f4m_formats( | ||||
|                         #     asset_url, video_id, f4m_id='hds', fatal=False)) | ||||
|                     elif ext in ('scc', 'ttml', 'vtt'): | ||||
|                         info['subtitles'].setdefault('en', []).append({ | ||||
|                             'url': asset_url, | ||||
|                         }) | ||||
|             self._sort_formats(info['formats']) | ||||
|  | ||||
|             return info | ||||
|         else: | ||||
|             entries = [] | ||||
|             for edge in show_data.get('videos', {}).get('edges', []): | ||||
|                 video = edge.get('node') or {} | ||||
|                 slug = video.get('slug') | ||||
|                 if not slug: | ||||
|                     continue | ||||
|                 entries.append(self.url_result( | ||||
|                     'http://adultswim.com/videos/%s/%s' % (show_path, slug), | ||||
|                     'AdultSwim', video.get('_id'))) | ||||
|             return self.playlist_result( | ||||
|                 entries, show_path, show_data.get('title'), | ||||
|                 strip_or_none(show_data.get('metaDescription'))) | ||||
							
								
								
									
										247
									
								
								youtube_dl/extractor/aenetworks.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										247
									
								
								youtube_dl/extractor/aenetworks.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,247 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .theplatform import ThePlatformIE | ||||
| from ..utils import ( | ||||
|     extract_attributes, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     smuggle_url, | ||||
|     update_url_query, | ||||
| ) | ||||
| from ..compat import ( | ||||
|     compat_urlparse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AENetworksBaseIE(ThePlatformIE): | ||||
|     _THEPLATFORM_KEY = 'crazyjava' | ||||
|     _THEPLATFORM_SECRET = 's3cr3t' | ||||
|  | ||||
|     def _extract_aen_smil(self, smil_url, video_id, auth=None): | ||||
|         query = {'mbr': 'true'} | ||||
|         if auth: | ||||
|             query['auth'] = auth | ||||
|         TP_SMIL_QUERY = [{ | ||||
|             'assetTypes': 'high_video_ak', | ||||
|             'switch': 'hls_high_ak' | ||||
|         }, { | ||||
|             'assetTypes': 'high_video_s3' | ||||
|         }, { | ||||
|             'assetTypes': 'high_video_s3', | ||||
|             'switch': 'hls_ingest_fastly' | ||||
|         }] | ||||
|         formats = [] | ||||
|         subtitles = {} | ||||
|         last_e = None | ||||
|         for q in TP_SMIL_QUERY: | ||||
|             q.update(query) | ||||
|             m_url = update_url_query(smil_url, q) | ||||
|             m_url = self._sign_url(m_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET) | ||||
|             try: | ||||
|                 tp_formats, tp_subtitles = self._extract_theplatform_smil( | ||||
|                     m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes'])) | ||||
|             except ExtractorError as e: | ||||
|                 last_e = e | ||||
|                 continue | ||||
|             formats.extend(tp_formats) | ||||
|             subtitles = self._merge_subtitles(subtitles, tp_subtitles) | ||||
|         if last_e and not formats: | ||||
|             raise last_e | ||||
|         self._sort_formats(formats) | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class AENetworksIE(AENetworksBaseIE): | ||||
|     IE_NAME = 'aenetworks' | ||||
|     IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?:www\.)? | ||||
|                         (?P<domain> | ||||
|                             (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com| | ||||
|                             fyi\.tv | ||||
|                         )/ | ||||
|                         (?: | ||||
|                             shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})| | ||||
|                             movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?| | ||||
|                             specials/(?P<special_display_id>[^/]+)/(?:full-special|preview-)| | ||||
|                             collections/[^/]+/(?P<collection_display_id>[^/]+) | ||||
|                         ) | ||||
|                     ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', | ||||
|         'info_dict': { | ||||
|             'id': '22253814', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Winter is Coming', | ||||
|             'description': 'md5:641f424b7a19d8e24f26dea22cf59d74', | ||||
|             'timestamp': 1338306241, | ||||
|             'upload_date': '20120529', | ||||
|             'uploader': 'AENE-NEW', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     }, { | ||||
|         'url': 'http://www.history.com/shows/ancient-aliens/season-1', | ||||
|         'info_dict': { | ||||
|             'id': '71889446852', | ||||
|         }, | ||||
|         'playlist_mincount': 5, | ||||
|     }, { | ||||
|         'url': 'http://www.mylifetime.com/shows/atlanta-plastic', | ||||
|         'info_dict': { | ||||
|             'id': 'SERIES4317', | ||||
|             'title': 'Atlanta Plastic', | ||||
|         }, | ||||
|         'playlist_mincount': 2, | ||||
|     }, { | ||||
|         'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story', | ||||
|         'only_matching': True | ||||
|     }] | ||||
|     _DOMAIN_TO_REQUESTOR_ID = { | ||||
|         'history.com': 'HISTORY', | ||||
|         'aetv.com': 'AETV', | ||||
|         'mylifetime.com': 'LIFETIME', | ||||
|         'lifetimemovieclub.com': 'LIFETIMEMOVIECLUB', | ||||
|         'fyi.tv': 'FYI', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups() | ||||
|         display_id = show_path or movie_display_id or special_display_id or collection_display_id | ||||
|         webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers()) | ||||
|         if show_path: | ||||
|             url_parts = show_path.split('/') | ||||
|             url_parts_len = len(url_parts) | ||||
|             if url_parts_len == 1: | ||||
|                 entries = [] | ||||
|                 for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage): | ||||
|                     entries.append(self.url_result( | ||||
|                         compat_urlparse.urljoin(url, season_url_path), 'AENetworks')) | ||||
|                 if entries: | ||||
|                     return self.playlist_result( | ||||
|                         entries, self._html_search_meta('aetn:SeriesId', webpage), | ||||
|                         self._html_search_meta('aetn:SeriesTitle', webpage)) | ||||
|                 else: | ||||
|                     # single season | ||||
|                     url_parts_len = 2 | ||||
|             if url_parts_len == 2: | ||||
|                 entries = [] | ||||
|                 for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage): | ||||
|                     episode_attributes = extract_attributes(episode_item) | ||||
|                     episode_url = compat_urlparse.urljoin( | ||||
|                         url, episode_attributes['data-canonical']) | ||||
|                     entries.append(self.url_result( | ||||
|                         episode_url, 'AENetworks', | ||||
|                         episode_attributes.get('data-videoid') or episode_attributes.get('data-video-id'))) | ||||
|                 return self.playlist_result( | ||||
|                     entries, self._html_search_meta('aetn:SeasonId', webpage)) | ||||
|  | ||||
|         video_id = self._html_search_meta('aetn:VideoID', webpage) | ||||
|         media_url = self._search_regex( | ||||
|             [r"media_url\s*=\s*'(?P<url>[^']+)'", | ||||
|              r'data-media-url=(?P<url>(?:https?:)?//[^\s>]+)', | ||||
|              r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'], | ||||
|             webpage, 'video url', group='url') | ||||
|         theplatform_metadata = self._download_theplatform_metadata(self._search_regex( | ||||
|             r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) | ||||
|         info = self._parse_theplatform_metadata(theplatform_metadata) | ||||
|         auth = None | ||||
|         if theplatform_metadata.get('AETN$isBehindWall'): | ||||
|             requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain] | ||||
|             resource = self._get_mvpd_resource( | ||||
|                 requestor_id, theplatform_metadata['title'], | ||||
|                 theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'), | ||||
|                 theplatform_metadata['ratings'][0]['rating']) | ||||
|             auth = self._extract_mvpd_auth( | ||||
|                 url, video_id, requestor_id, resource) | ||||
|         info.update(self._search_json_ld(webpage, video_id, fatal=False)) | ||||
|         info.update(self._extract_aen_smil(media_url, video_id, auth)) | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class HistoryTopicIE(AENetworksBaseIE): | ||||
|     IE_NAME = 'history:topic' | ||||
|     IE_DESC = 'History.com Topic' | ||||
|     _VALID_URL = r'https?://(?:www\.)?history\.com/topics/[^/]+/(?P<id>[\w+-]+?)-video' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.history.com/topics/valentines-day/history-of-valentines-day-video', | ||||
|         'info_dict': { | ||||
|             'id': '40700995724', | ||||
|             'ext': 'mp4', | ||||
|             'title': "History of Valentine’s Day", | ||||
|             'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', | ||||
|             'timestamp': 1375819729, | ||||
|             'upload_date': '20130806', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     }] | ||||
|  | ||||
|     def theplatform_url_result(self, theplatform_url, video_id, query): | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'url': smuggle_url( | ||||
|                 update_url_query(theplatform_url, query), | ||||
|                 { | ||||
|                     'sig': { | ||||
|                         'key': self._THEPLATFORM_KEY, | ||||
|                         'secret': self._THEPLATFORM_SECRET, | ||||
|                     }, | ||||
|                     'force_smil_url': True | ||||
|                 }), | ||||
|             'ie_key': 'ThePlatform', | ||||
|         } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         video_id = self._search_regex( | ||||
|             r'<phoenix-iframe[^>]+src="[^"]+\btpid=(\d+)', webpage, 'tpid') | ||||
|         result = self._download_json( | ||||
|             'https://feeds.video.aetnd.com/api/v2/history/videos', | ||||
|             video_id, query={'filter[id]': video_id})['results'][0] | ||||
|         title = result['title'] | ||||
|         info = self._extract_aen_smil(result['publicUrl'], video_id) | ||||
|         info.update({ | ||||
|             'title': title, | ||||
|             'description': result.get('description'), | ||||
|             'duration': int_or_none(result.get('duration')), | ||||
|             'timestamp': int_or_none(result.get('added'), 1000), | ||||
|         }) | ||||
|         return info | ||||
							
								
								
									
										367
									
								
								youtube_dl/extractor/afreecatv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										367
									
								
								youtube_dl/extractor/afreecatv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,367 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_xpath | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     url_or_none, | ||||
|     urlencode_postdata, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AfreecaTVIE(InfoExtractor): | ||||
|     IE_NAME = 'afreecatv' | ||||
|     IE_DESC = 'afreecatv.com' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?: | ||||
|                             (?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)? | ||||
|                             (?: | ||||
|                                 /app/(?:index|read_ucc_bbs)\.cgi| | ||||
|                                 /player/[Pp]layer\.(?:swf|html) | ||||
|                             )\?.*?\bnTitleNo=| | ||||
|                             vod\.afreecatv\.com/PLAYER/STATION/ | ||||
|                         ) | ||||
|                         (?P<id>\d+) | ||||
|                     ''' | ||||
|     _NETRC_MACHINE = 'afreecatv' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', | ||||
|         'md5': 'f72c89fe7ecc14c1b5ce506c4996046e', | ||||
|         'info_dict': { | ||||
|             'id': '36164052', | ||||
|             'ext': 'mp4', | ||||
|             'title': '데일리 에이프릴 요정들의 시상식!', | ||||
|             'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', | ||||
|             'uploader': 'dailyapril', | ||||
|             'uploader_id': 'dailyapril', | ||||
|             'upload_date': '20160503', | ||||
|         }, | ||||
|         'skip': 'Video is gone', | ||||
|     }, { | ||||
|         'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867', | ||||
|         'info_dict': { | ||||
|             'id': '36153164', | ||||
|             'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", | ||||
|             'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', | ||||
|             'uploader': 'dailyapril', | ||||
|             'uploader_id': 'dailyapril', | ||||
|         }, | ||||
|         'playlist_count': 2, | ||||
|         'playlist': [{ | ||||
|             'md5': 'd8b7c174568da61d774ef0203159bf97', | ||||
|             'info_dict': { | ||||
|                 'id': '36153164_1', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", | ||||
|                 'upload_date': '20160502', | ||||
|             }, | ||||
|         }, { | ||||
|             'md5': '58f2ce7f6044e34439ab2d50612ab02b', | ||||
|             'info_dict': { | ||||
|                 'id': '36153164_2', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", | ||||
|                 'upload_date': '20160502', | ||||
|             }, | ||||
|         }], | ||||
|         'skip': 'Video is gone', | ||||
|     }, { | ||||
|         'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793', | ||||
|         'info_dict': { | ||||
|             'id': '18650793', | ||||
|             'ext': 'mp4', | ||||
|             'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'uploader': '윈아디', | ||||
|             'uploader_id': 'badkids', | ||||
|             'duration': 107, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652', | ||||
|         'info_dict': { | ||||
|             'id': '10481652', | ||||
|             'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", | ||||
|             'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', | ||||
|             'uploader': 'dailyapril', | ||||
|             'uploader_id': 'dailyapril', | ||||
|             'duration': 6492, | ||||
|         }, | ||||
|         'playlist_count': 2, | ||||
|         'playlist': [{ | ||||
|             'md5': 'd8b7c174568da61d774ef0203159bf97', | ||||
|             'info_dict': { | ||||
|                 'id': '20160502_c4c62b9d_174361386_1', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)", | ||||
|                 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', | ||||
|                 'uploader': 'dailyapril', | ||||
|                 'uploader_id': 'dailyapril', | ||||
|                 'upload_date': '20160502', | ||||
|                 'duration': 3601, | ||||
|             }, | ||||
|         }, { | ||||
|             'md5': '58f2ce7f6044e34439ab2d50612ab02b', | ||||
|             'info_dict': { | ||||
|                 'id': '20160502_39e739bb_174361386_2', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)", | ||||
|                 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', | ||||
|                 'uploader': 'dailyapril', | ||||
|                 'uploader_id': 'dailyapril', | ||||
|                 'upload_date': '20160502', | ||||
|                 'duration': 2891, | ||||
|             }, | ||||
|         }], | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # non standard key | ||||
|         'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605', | ||||
|         'info_dict': { | ||||
|             'id': '20170411_BE689A0E_190960999_1_2_h', | ||||
|             'ext': 'mp4', | ||||
|             'title': '혼자사는여자집', | ||||
|             'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', | ||||
|             'uploader': '♥이슬이', | ||||
|             'uploader_id': 'dasl8121', | ||||
|             'upload_date': '20170411', | ||||
|             'duration': 213, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # PARTIAL_ADULT | ||||
|         'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439', | ||||
|         'info_dict': { | ||||
|             'id': '20180327_27901457_202289533_1', | ||||
|             'ext': 'mp4', | ||||
|             'title': '[생]빨개요♥ (part 1)', | ||||
|             'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', | ||||
|             'uploader': '[SA]서아', | ||||
|             'uploader_id': 'bjdyrksu', | ||||
|             'upload_date': '20180327', | ||||
|             'duration': 3601, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'expected_warnings': ['adult content'], | ||||
|     }, { | ||||
|         'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def parse_video_key(key): | ||||
|         video_key = {} | ||||
|         m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key) | ||||
|         if m: | ||||
|             video_key['upload_date'] = m.group('upload_date') | ||||
|             video_key['part'] = int(m.group('part')) | ||||
|         return video_key | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _login(self): | ||||
|         username, password = self._get_login_info() | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         login_form = { | ||||
|             'szWork': 'login', | ||||
|             'szType': 'json', | ||||
|             'szUid': username, | ||||
|             'szPassword': password, | ||||
|             'isSaveId': 'false', | ||||
|             'szScriptVar': 'oLoginRet', | ||||
|             'szAction': '', | ||||
|         } | ||||
|  | ||||
|         response = self._download_json( | ||||
|             'https://login.afreecatv.com/app/LoginAction.php', None, | ||||
|             'Logging in', data=urlencode_postdata(login_form)) | ||||
|  | ||||
|         _ERRORS = { | ||||
|             -4: 'Your account has been suspended due to a violation of our terms and policies.', | ||||
|             -5: 'https://member.afreecatv.com/app/user_delete_progress.php', | ||||
|             -6: 'https://login.afreecatv.com/membership/changeMember.php', | ||||
|             -8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.", | ||||
|             -9: 'https://member.afreecatv.com/app/pop_login_block.php', | ||||
|             -11: 'https://login.afreecatv.com/afreeca/second_login.php', | ||||
|             -12: 'https://member.afreecatv.com/app/user_security.php', | ||||
|             0: 'The username does not exist or you have entered the wrong password.', | ||||
|             -1: 'The username does not exist or you have entered the wrong password.', | ||||
|             -3: 'You have entered your username/password incorrectly.', | ||||
|             -7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.', | ||||
|             -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.', | ||||
|             -32008: 'You have failed to log in. Please contact our Help Center.', | ||||
|         } | ||||
|  | ||||
|         result = int_or_none(response.get('RESULT')) | ||||
|         if result != 1: | ||||
|             error = _ERRORS.get(result, 'You have failed to log in.') | ||||
|             raise ExtractorError( | ||||
|                 'Unable to login: %s said: %s' % (self.IE_NAME, error), | ||||
|                 expected=True) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         if re.search(r'alert\(["\']This video has been deleted', webpage): | ||||
|             raise ExtractorError( | ||||
|                 'Video %s has been deleted' % video_id, expected=True) | ||||
|  | ||||
|         station_id = self._search_regex( | ||||
|             r'nStationNo\s*=\s*(\d+)', webpage, 'station') | ||||
|         bbs_id = self._search_regex( | ||||
|             r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs') | ||||
|         video_id = self._search_regex( | ||||
|             r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id) | ||||
|  | ||||
|         partial_view = False | ||||
|         for _ in range(2): | ||||
|             query = { | ||||
|                 'nTitleNo': video_id, | ||||
|                 'nStationNo': station_id, | ||||
|                 'nBbsNo': bbs_id, | ||||
|             } | ||||
|             if partial_view: | ||||
|                 query['partialView'] = 'SKIP_ADULT' | ||||
|             video_xml = self._download_xml( | ||||
|                 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php', | ||||
|                 video_id, 'Downloading video info XML%s' | ||||
|                 % (' (skipping adult)' if partial_view else ''), | ||||
|                 video_id, headers={ | ||||
|                     'Referer': url, | ||||
|                 }, query=query) | ||||
|  | ||||
|             flag = xpath_text(video_xml, './track/flag', 'flag', default=None) | ||||
|             if flag and flag == 'SUCCEED': | ||||
|                 break | ||||
|             if flag == 'PARTIAL_ADULT': | ||||
|                 self._downloader.report_warning( | ||||
|                     'In accordance with local laws and regulations, underage users are restricted from watching adult content. ' | ||||
|                     'Only content suitable for all ages will be downloaded. ' | ||||
|                     'Provide account credentials if you wish to download restricted content.') | ||||
|                 partial_view = True | ||||
|                 continue | ||||
|             elif flag == 'ADULT': | ||||
|                 error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.' | ||||
|             else: | ||||
|                 error = flag | ||||
|             raise ExtractorError( | ||||
|                 '%s said: %s' % (self.IE_NAME, error), expected=True) | ||||
|         else: | ||||
|             raise ExtractorError('Unable to download video info') | ||||
|  | ||||
|         video_element = video_xml.findall(compat_xpath('./track/video'))[-1] | ||||
|         if video_element is None or video_element.text is None: | ||||
|             raise ExtractorError( | ||||
|                 'Video %s video does not exist' % video_id, expected=True) | ||||
|  | ||||
|         video_url = video_element.text.strip() | ||||
|  | ||||
|         title = xpath_text(video_xml, './track/title', 'title', fatal=True) | ||||
|  | ||||
|         uploader = xpath_text(video_xml, './track/nickname', 'uploader') | ||||
|         uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id') | ||||
|         duration = int_or_none(xpath_text( | ||||
|             video_xml, './track/duration', 'duration')) | ||||
|         thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') | ||||
|  | ||||
|         common_entry = { | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|  | ||||
|         info = common_entry.copy() | ||||
|         info.update({ | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'duration': duration, | ||||
|         }) | ||||
|  | ||||
|         if not video_url: | ||||
|             entries = [] | ||||
|             file_elements = video_element.findall(compat_xpath('./file')) | ||||
|             one = len(file_elements) == 1 | ||||
|             for file_num, file_element in enumerate(file_elements, start=1): | ||||
|                 file_url = url_or_none(file_element.text) | ||||
|                 if not file_url: | ||||
|                     continue | ||||
|                 key = file_element.get('key', '') | ||||
|                 upload_date = self._search_regex( | ||||
|                     r'^(\d{8})_', key, 'upload date', default=None) | ||||
|                 file_duration = int_or_none(file_element.get('duration')) | ||||
|                 format_id = key if key else '%s_%s' % (video_id, file_num) | ||||
|                 if determine_ext(file_url) == 'm3u8': | ||||
|                     formats = self._extract_m3u8_formats( | ||||
|                         file_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                         m3u8_id='hls', | ||||
|                         note='Downloading part %d m3u8 information' % file_num) | ||||
|                 else: | ||||
|                     formats = [{ | ||||
|                         'url': file_url, | ||||
|                         'format_id': 'http', | ||||
|                     }] | ||||
|                 if not formats: | ||||
|                     continue | ||||
|                 self._sort_formats(formats) | ||||
|                 file_info = common_entry.copy() | ||||
|                 file_info.update({ | ||||
|                     'id': format_id, | ||||
|                     'title': title if one else '%s (part %d)' % (title, file_num), | ||||
|                     'upload_date': upload_date, | ||||
|                     'duration': file_duration, | ||||
|                     'formats': formats, | ||||
|                 }) | ||||
|                 entries.append(file_info) | ||||
|             entries_info = info.copy() | ||||
|             entries_info.update({ | ||||
|                 '_type': 'multi_video', | ||||
|                 'entries': entries, | ||||
|             }) | ||||
|             return entries_info | ||||
|  | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'duration': duration, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|  | ||||
|         if determine_ext(video_url) == 'm3u8': | ||||
|             info['formats'] = self._extract_m3u8_formats( | ||||
|                 video_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                 m3u8_id='hls') | ||||
|         else: | ||||
|             app, playpath = video_url.split('mp4:') | ||||
|             info.update({ | ||||
|                 'url': app, | ||||
|                 'ext': 'flv', | ||||
|                 'play_path': 'mp4:' + playpath, | ||||
|                 'rtmp_live': True,  # downloading won't end without this | ||||
|             }) | ||||
|  | ||||
|         return info | ||||
							
								
								
									
										66
									
								
								youtube_dl/extractor/airmozilla.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								youtube_dl/extractor/airmozilla.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,66 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AirMozillaIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://air\.mozilla\.org/(?P<id>[0-9a-z-]+)/?' | ||||
|     _TEST = { | ||||
|         'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/', | ||||
|         'md5': '8d02f53ee39cf006009180e21df1f3ba', | ||||
|         'info_dict': { | ||||
|             'id': '6x4q2w', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco', | ||||
|             'thumbnail': r're:https?://.*/poster\.jpg', | ||||
|             'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...', | ||||
|             'timestamp': 1422487800, | ||||
|             'upload_date': '20150128', | ||||
|             'location': 'SFO Commons', | ||||
|             'duration': 3780, | ||||
|             'view_count': int, | ||||
|             'categories': ['Main', 'Privacy'], | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id') | ||||
|  | ||||
|         embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id) | ||||
|         jwconfig = self._parse_json(self._search_regex( | ||||
|             r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config'] | ||||
|  | ||||
|         info_dict = self._parse_jwplayer_data(jwconfig, video_id) | ||||
|         view_count = int_or_none(self._html_search_regex( | ||||
|             r'Views since archived: ([0-9]+)', | ||||
|             webpage, 'view count', fatal=False)) | ||||
|         timestamp = parse_iso8601(self._html_search_regex( | ||||
|             r'<time datetime="(.*?)"', webpage, 'timestamp', fatal=False)) | ||||
|         duration = parse_duration(self._search_regex( | ||||
|             r'Duration:\s*(\d+\s*hours?\s*\d+\s*minutes?)', | ||||
|             webpage, 'duration', fatal=False)) | ||||
|  | ||||
|         info_dict.update({ | ||||
|             'id': video_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'url': self._og_search_url(webpage), | ||||
|             'display_id': display_id, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'timestamp': timestamp, | ||||
|             'location': self._html_search_regex(r'Location: (.*)', webpage, 'location', default=None), | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'categories': re.findall(r'<a href=".*?" class="channel">(.*?)</a>', webpage), | ||||
|         }) | ||||
|  | ||||
|         return info_dict | ||||
							
								
								
									
										53
									
								
								youtube_dl/extractor/aliexpress.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								youtube_dl/extractor/aliexpress.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     float_or_none, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AliExpressLiveIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://live.aliexpress.com/live/2800002704436634', | ||||
|         'md5': 'e729e25d47c5e557f2630eaf99b740a5', | ||||
|         'info_dict': { | ||||
|             'id': '2800002704436634', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'CASIMA7.22', | ||||
|             'thumbnail': r're:http://.*\.jpg', | ||||
|             'uploader': 'CASIMA Official Store', | ||||
|             'timestamp': 1500717600, | ||||
|             'upload_date': '20170722', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         data = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'(?s)runParams\s*=\s*({.+?})\s*;?\s*var', | ||||
|                 webpage, 'runParams'), | ||||
|             video_id) | ||||
|  | ||||
|         title = data['title'] | ||||
|  | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             data['replyStreamUrl'], video_id, 'mp4', | ||||
|             entry_protocol='m3u8_native', m3u8_id='hls') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': data.get('coverUrl'), | ||||
|             'uploader': try_get( | ||||
|                 data, lambda x: x['followBar']['name'], compat_str), | ||||
|             'timestamp': float_or_none(data.get('startTimeLong'), scale=1000), | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										33
									
								
								youtube_dl/extractor/aljazeera.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								youtube_dl/extractor/aljazeera.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class AlJazeeraIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html', | ||||
|         'info_dict': { | ||||
|             'id': '3792260579001', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The Slum - Episode 1: Deliverance', | ||||
|             'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.', | ||||
|             'uploader_id': '665003303001', | ||||
|             'timestamp': 1411116829, | ||||
|             'upload_date': '20140919', | ||||
|         }, | ||||
|         'add_ie': ['BrightcoveNew'], | ||||
|         'skip': 'Not accessible from Travis CI server', | ||||
|     }, { | ||||
|         'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         program_name = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, program_name) | ||||
|         brightcove_id = self._search_regex( | ||||
|             r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id') | ||||
|         return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) | ||||
							
								
								
									
										132
									
								
								youtube_dl/extractor/allocine.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										132
									
								
								youtube_dl/extractor/allocine.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,132 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     qualities, | ||||
|     remove_end, | ||||
|     try_get, | ||||
|     unified_timestamp, | ||||
|     url_basename, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AllocineIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P<id>[0-9]+)(?:\.html)?' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html', | ||||
|         'md5': '0c9fcf59a841f65635fa300ac43d8269', | ||||
|         'info_dict': { | ||||
|             'id': '19546517', | ||||
|             'display_id': '18635087', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Astérix - Le Domaine des Dieux Teaser VF', | ||||
|             'description': 'md5:4a754271d9c6f16c72629a8a993ee884', | ||||
|             'thumbnail': r're:http://.*\.jpg', | ||||
|             'duration': 39, | ||||
|             'timestamp': 1404273600, | ||||
|             'upload_date': '20140702', | ||||
|             'view_count': int, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html', | ||||
|         'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0', | ||||
|         'info_dict': { | ||||
|             'id': '19540403', | ||||
|             'display_id': '19540403', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Planes 2 Bande-annonce VF', | ||||
|             'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway', | ||||
|             'thumbnail': r're:http://.*\.jpg', | ||||
|             'duration': 69, | ||||
|             'timestamp': 1385659800, | ||||
|             'upload_date': '20131128', | ||||
|             'view_count': int, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html', | ||||
|         'md5': '101250fb127ef9ca3d73186ff22a47ce', | ||||
|         'info_dict': { | ||||
|             'id': '19544709', | ||||
|             'display_id': '19544709', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Dragons 2 - Bande annonce finale VF', | ||||
|             'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a', | ||||
|             'thumbnail': r're:http://.*\.jpg', | ||||
|             'duration': 144, | ||||
|             'timestamp': 1397589900, | ||||
|             'upload_date': '20140415', | ||||
|             'view_count': int, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.allocine.fr/video/video-19550147/', | ||||
|         'md5': '3566c0668c0235e2d224fd8edb389f67', | ||||
|         'info_dict': { | ||||
|             'id': '19550147', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger', | ||||
|             'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354', | ||||
|             'thumbnail': r're:http://.*\.jpg', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         formats = [] | ||||
|         quality = qualities(['ld', 'md', 'hd']) | ||||
|  | ||||
|         model = self._html_search_regex( | ||||
|             r'data-model="([^"]+)"', webpage, 'data model', default=None) | ||||
|         if model: | ||||
|             model_data = self._parse_json(model, display_id) | ||||
|             video = model_data['videos'][0] | ||||
|             title = video['title'] | ||||
|             for video_url in video['sources'].values(): | ||||
|                 video_id, format_id = url_basename(video_url).split('_')[:2] | ||||
|                 formats.append({ | ||||
|                     'format_id': format_id, | ||||
|                     'quality': quality(format_id), | ||||
|                     'url': video_url, | ||||
|                 }) | ||||
|             duration = int_or_none(video.get('duration')) | ||||
|             view_count = int_or_none(video.get('view_count')) | ||||
|             timestamp = unified_timestamp(try_get( | ||||
|                 video, lambda x: x['added_at']['date'], compat_str)) | ||||
|         else: | ||||
|             video_id = display_id | ||||
|             media_data = self._download_json( | ||||
|                 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) | ||||
|             title = remove_end( | ||||
|                 self._html_search_regex( | ||||
|                     r'(?s)<title>(.+?)</title>', webpage, 'title').strip(), | ||||
|                 ' - AlloCiné') | ||||
|             for key, value in media_data['video'].items(): | ||||
|                 if not key.endswith('Path'): | ||||
|                     continue | ||||
|                 format_id = key[:-len('Path')] | ||||
|                 formats.append({ | ||||
|                     'format_id': format_id, | ||||
|                     'quality': quality(format_id), | ||||
|                     'url': value, | ||||
|                 }) | ||||
|             duration, view_count, timestamp = [None] * 3 | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'duration': duration, | ||||
|             'timestamp': timestamp, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										77
									
								
								youtube_dl/extractor/alphaporno.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								youtube_dl/extractor/alphaporno.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,77 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_iso8601, | ||||
|     parse_duration, | ||||
|     parse_filesize, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AlphaPornoIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/', | ||||
|         'md5': 'feb6d3bba8848cd54467a87ad34bd38e', | ||||
|         'info_dict': { | ||||
|             'id': '258807', | ||||
|             'display_id': 'sensual-striptease-porn-with-samantha-alexandra', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Sensual striptease porn with Samantha Alexandra', | ||||
|             'thumbnail': r're:https?://.*\.jpg$', | ||||
|             'timestamp': 1418694611, | ||||
|             'upload_date': '20141216', | ||||
|             'duration': 387, | ||||
|             'filesize_approx': 54120000, | ||||
|             'tbr': 1145, | ||||
|             'categories': list, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None) | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             r"video_url\s*:\s*'([^']+)'", webpage, 'video url') | ||||
|         ext = self._html_search_meta( | ||||
|             'encodingFormat', webpage, 'ext', default='.mp4')[1:] | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             [r'<meta content="([^"]+)" itemprop="description">', | ||||
|              r'class="title" itemprop="name">([^<]+)<'], | ||||
|             webpage, 'title') | ||||
|         thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail') | ||||
|         timestamp = parse_iso8601(self._html_search_meta( | ||||
|             'uploadDate', webpage, 'upload date')) | ||||
|         duration = parse_duration(self._html_search_meta( | ||||
|             'duration', webpage, 'duration')) | ||||
|         filesize_approx = parse_filesize(self._html_search_meta( | ||||
|             'contentSize', webpage, 'file size')) | ||||
|         bitrate = int_or_none(self._html_search_meta( | ||||
|             'bitrate', webpage, 'bitrate')) | ||||
|         categories = self._html_search_meta( | ||||
|             'keywords', webpage, 'categories', default='').split(',') | ||||
|  | ||||
|         age_limit = self._rta_search(webpage) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'url': video_url, | ||||
|             'ext': ext, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': timestamp, | ||||
|             'duration': duration, | ||||
|             'filesize_approx': filesize_approx, | ||||
|             'tbr': bitrate, | ||||
|             'categories': categories, | ||||
|             'age_limit': age_limit, | ||||
|         } | ||||
							
								
								
									
										118
									
								
								youtube_dl/extractor/amcnetworks.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										118
									
								
								youtube_dl/extractor/amcnetworks.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,118 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .theplatform import ThePlatformIE | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_age_limit, | ||||
|     try_get, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AMCNetworksIE(ThePlatformIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1', | ||||
|         'md5': '', | ||||
|         'info_dict': { | ||||
|             'id': 's3MX01Nl4vPH', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Maron - Season 4 - Step 1', | ||||
|             'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.', | ||||
|             'age_limit': 17, | ||||
|             'upload_date': '20160505', | ||||
|             'timestamp': 1462468831, | ||||
|             'uploader': 'AMCN', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Requires TV provider accounts', | ||||
|     }, { | ||||
|         'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.amc.com/shows/preacher/full-episodes/season-01/episode-00/pilot', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.wetv.com/shows/million-dollar-matchmaker/season-01/episode-06-the-dumped-dj-and-shallow-hal', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.ifc.com/movies/chaos', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.wetv.com/shows/mama-june-from-not-to-hot/full-episode/season-01/thin-tervention', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         query = { | ||||
|             'mbr': 'true', | ||||
|             'manifest': 'm3u', | ||||
|         } | ||||
|         media_url = self._search_regex( | ||||
|             r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', | ||||
|             webpage, 'media url') | ||||
|         theplatform_metadata = self._download_theplatform_metadata(self._search_regex( | ||||
|             r'link\.theplatform\.com/s/([^?]+)', | ||||
|             media_url, 'theplatform_path'), display_id) | ||||
|         info = self._parse_theplatform_metadata(theplatform_metadata) | ||||
|         video_id = theplatform_metadata['pid'] | ||||
|         title = theplatform_metadata['title'] | ||||
|         rating = try_get( | ||||
|             theplatform_metadata, lambda x: x['ratings'][0]['rating']) | ||||
|         auth_required = self._search_regex( | ||||
|             r'window\.authRequired\s*=\s*(true|false);', | ||||
|             webpage, 'auth required') | ||||
|         if auth_required == 'true': | ||||
|             requestor_id = self._search_regex( | ||||
|                 r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', | ||||
|                 webpage, 'requestor id') | ||||
|             resource = self._get_mvpd_resource( | ||||
|                 requestor_id, title, video_id, rating) | ||||
|             query['auth'] = self._extract_mvpd_auth( | ||||
|                 url, video_id, requestor_id, resource) | ||||
|         media_url = update_url_query(media_url, query) | ||||
|         formats, subtitles = self._extract_theplatform_smil( | ||||
|             media_url, video_id) | ||||
|         self._sort_formats(formats) | ||||
|         info.update({ | ||||
|             'id': video_id, | ||||
|             'subtitles': subtitles, | ||||
|             'formats': formats, | ||||
|             'age_limit': parse_age_limit(parse_age_limit(rating)), | ||||
|         }) | ||||
|         ns_keys = theplatform_metadata.get('$xmlns', {}).keys() | ||||
|         if ns_keys: | ||||
|             ns = list(ns_keys)[0] | ||||
|             series = theplatform_metadata.get(ns + '$show') | ||||
|             season_number = int_or_none( | ||||
|                 theplatform_metadata.get(ns + '$season')) | ||||
|             episode = theplatform_metadata.get(ns + '$episodeTitle') | ||||
|             episode_number = int_or_none( | ||||
|                 theplatform_metadata.get(ns + '$episode')) | ||||
|             if season_number: | ||||
|                 title = 'Season %d - %s' % (season_number, title) | ||||
|             if series: | ||||
|                 title = '%s - %s' % (series, title) | ||||
|             info.update({ | ||||
|                 'title': title, | ||||
|                 'series': series, | ||||
|                 'season_number': season_number, | ||||
|                 'episode': episode, | ||||
|                 'episode_number': episode_number, | ||||
|             }) | ||||
|         return info | ||||
							
								
								
									
										82
									
								
								youtube_dl/extractor/americastestkitchen.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								youtube_dl/extractor/americastestkitchen.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,82 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     try_get, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AmericasTestKitchenIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers', | ||||
|         'md5': 'b861c3e365ac38ad319cfd509c30577f', | ||||
|         'info_dict': { | ||||
|             'id': '5b400b9ee338f922cb06450c', | ||||
|             'title': 'Weeknight Japanese Suppers', | ||||
|             'ext': 'mp4', | ||||
|             'description': 'md5:3d0c1a44bb3b27607ce82652db25b4a8', | ||||
|             'thumbnail': r're:^https?://', | ||||
|             'timestamp': 1523664000, | ||||
|             'upload_date': '20180414', | ||||
|             'release_date': '20180414', | ||||
|             'series': "America's Test Kitchen", | ||||
|             'season_number': 18, | ||||
|             'episode': 'Weeknight Japanese Suppers', | ||||
|             'episode_number': 15, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_data = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>', | ||||
|                 webpage, 'initial context'), | ||||
|             video_id, js_to_json) | ||||
|  | ||||
|         ep_data = try_get( | ||||
|             video_data, | ||||
|             (lambda x: x['episodeDetail']['content']['data'], | ||||
|              lambda x: x['videoDetail']['content']['data']), dict) | ||||
|         ep_meta = ep_data.get('full_video', {}) | ||||
|  | ||||
|         zype_id = ep_data.get('zype_id') or ep_meta['zype_id'] | ||||
|  | ||||
|         title = ep_data.get('title') or ep_meta.get('title') | ||||
|         description = clean_html(ep_meta.get('episode_description') or ep_data.get( | ||||
|             'description') or ep_meta.get('description')) | ||||
|         thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url']) | ||||
|         release_date = unified_strdate(ep_data.get('aired_at')) | ||||
|  | ||||
|         season_number = int_or_none(ep_meta.get('season_number')) | ||||
|         episode = ep_meta.get('title') | ||||
|         episode_number = int_or_none(ep_meta.get('episode_number')) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id, | ||||
|             'ie_key': 'Zype', | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'release_date': release_date, | ||||
|             'series': "America's Test Kitchen", | ||||
|             'season_number': season_number, | ||||
|             'episode': episode, | ||||
|             'episode_number': episode_number, | ||||
|         } | ||||
							
								
								
									
										102
									
								
								youtube_dl/extractor/amp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										102
									
								
								youtube_dl/extractor/amp.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,102 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     mimetype2ext, | ||||
|     parse_iso8601, | ||||
|     url_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AMPIE(InfoExtractor): | ||||
|     # parse Akamai Adaptive Media Player feed | ||||
|     def _extract_feed_info(self, url): | ||||
|         feed = self._download_json( | ||||
|             url, None, 'Downloading Akamai AMP feed', | ||||
|             'Unable to download Akamai AMP feed') | ||||
|         item = feed.get('channel', {}).get('item') | ||||
|         if not item: | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error'])) | ||||
|  | ||||
|         video_id = item['guid'] | ||||
|  | ||||
|         def get_media_node(name, default=None): | ||||
|             media_name = 'media-%s' % name | ||||
|             media_group = item.get('media-group') or item | ||||
|             return media_group.get(media_name) or item.get(media_name) or item.get(name, default) | ||||
|  | ||||
|         thumbnails = [] | ||||
|         media_thumbnail = get_media_node('thumbnail') | ||||
|         if media_thumbnail: | ||||
|             if isinstance(media_thumbnail, dict): | ||||
|                 media_thumbnail = [media_thumbnail] | ||||
|             for thumbnail_data in media_thumbnail: | ||||
|                 thumbnail = thumbnail_data.get('@attributes', {}) | ||||
|                 thumbnail_url = url_or_none(thumbnail.get('url')) | ||||
|                 if not thumbnail_url: | ||||
|                     continue | ||||
|                 thumbnails.append({ | ||||
|                     'url': self._proto_relative_url(thumbnail_url, 'http:'), | ||||
|                     'width': int_or_none(thumbnail.get('width')), | ||||
|                     'height': int_or_none(thumbnail.get('height')), | ||||
|                 }) | ||||
|  | ||||
|         subtitles = {} | ||||
|         media_subtitle = get_media_node('subTitle') | ||||
|         if media_subtitle: | ||||
|             if isinstance(media_subtitle, dict): | ||||
|                 media_subtitle = [media_subtitle] | ||||
|             for subtitle_data in media_subtitle: | ||||
|                 subtitle = subtitle_data.get('@attributes', {}) | ||||
|                 subtitle_href = url_or_none(subtitle.get('href')) | ||||
|                 if not subtitle_href: | ||||
|                     continue | ||||
|                 subtitles.setdefault(subtitle.get('lang') or 'en', []).append({ | ||||
|                     'url': subtitle_href, | ||||
|                     'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href), | ||||
|                 }) | ||||
|  | ||||
|         formats = [] | ||||
|         media_content = get_media_node('content') | ||||
|         if isinstance(media_content, dict): | ||||
|             media_content = [media_content] | ||||
|         for media_data in media_content: | ||||
|             media = media_data.get('@attributes', {}) | ||||
|             media_url = url_or_none(media.get('url')) | ||||
|             if not media_url: | ||||
|                 continue | ||||
|             ext = mimetype2ext(media.get('type')) or determine_ext(media_url) | ||||
|             if ext == 'f4m': | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', | ||||
|                     video_id, f4m_id='hds', fatal=False)) | ||||
|             elif ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'), | ||||
|                     'url': media_url, | ||||
|                     'tbr': int_or_none(media.get('bitrate')), | ||||
|                     'filesize': int_or_none(media.get('fileSize')), | ||||
|                     'ext': ext, | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': get_media_node('title'), | ||||
|             'description': get_media_node('description'), | ||||
|             'thumbnails': thumbnails, | ||||
|             'timestamp': timestamp, | ||||
|             'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')), | ||||
|             'subtitles': subtitles, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										293
									
								
								youtube_dl/extractor/animeondemand.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										293
									
								
								youtube_dl/extractor/animeondemand.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,293 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     extract_attributes, | ||||
|     ExtractorError, | ||||
|     url_or_none, | ||||
|     urlencode_postdata, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AnimeOnDemandIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?anime-on-demand\.de/anime/(?P<id>\d+)' | ||||
|     _LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in' | ||||
|     _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply' | ||||
|     _NETRC_MACHINE = 'animeondemand' | ||||
|     # German-speaking countries of Europe | ||||
|     _GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU'] | ||||
|     _TESTS = [{ | ||||
|         # jap, OmU | ||||
|         'url': 'https://www.anime-on-demand.de/anime/161', | ||||
|         'info_dict': { | ||||
|             'id': '161', | ||||
|             'title': 'Grimgar, Ashes and Illusions (OmU)', | ||||
|             'description': 'md5:6681ce3c07c7189d255ac6ab23812d31', | ||||
|         }, | ||||
|         'playlist_mincount': 4, | ||||
|     }, { | ||||
|         # Film wording is used instead of Episode, ger/jap, Dub/OmU | ||||
|         'url': 'https://www.anime-on-demand.de/anime/39', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # Episodes without titles, jap, OmU | ||||
|         'url': 'https://www.anime-on-demand.de/anime/162', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # ger/jap, Dub/OmU, account required | ||||
|         'url': 'https://www.anime-on-demand.de/anime/169', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # Full length film, non-series, ger/jap, Dub/OmU, account required | ||||
|         'url': 'https://www.anime-on-demand.de/anime/185', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # Flash videos | ||||
|         'url': 'https://www.anime-on-demand.de/anime/12', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _login(self): | ||||
|         username, password = self._get_login_info() | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         login_page = self._download_webpage( | ||||
|             self._LOGIN_URL, None, 'Downloading login page') | ||||
|  | ||||
|         if '>Our licensing terms allow the distribution of animes only to German-speaking countries of Europe' in login_page: | ||||
|             self.raise_geo_restricted( | ||||
|                 '%s is only available in German-speaking countries of Europe' % self.IE_NAME) | ||||
|  | ||||
|         login_form = self._form_hidden_inputs('new_user', login_page) | ||||
|  | ||||
|         login_form.update({ | ||||
|             'user[login]': username, | ||||
|             'user[password]': password, | ||||
|         }) | ||||
|  | ||||
|         post_url = self._search_regex( | ||||
|             r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, | ||||
|             'post url', default=self._LOGIN_URL, group='url') | ||||
|  | ||||
|         if not post_url.startswith('http'): | ||||
|             post_url = urljoin(self._LOGIN_URL, post_url) | ||||
|  | ||||
|         response = self._download_webpage( | ||||
|             post_url, None, 'Logging in', | ||||
|             data=urlencode_postdata(login_form), headers={ | ||||
|                 'Referer': self._LOGIN_URL, | ||||
|             }) | ||||
|  | ||||
|         if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')): | ||||
|             error = self._search_regex( | ||||
|                 r'<p[^>]+\bclass=(["\'])(?:(?!\1).)*\balert\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</p>', | ||||
|                 response, 'error', default=None, group='error') | ||||
|             if error: | ||||
|                 raise ExtractorError('Unable to login: %s' % error, expected=True) | ||||
|             raise ExtractorError('Unable to log in') | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         anime_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, anime_id) | ||||
|  | ||||
|         if 'data-playlist=' not in webpage: | ||||
|             self._download_webpage( | ||||
|                 self._APPLY_HTML5_URL, anime_id, | ||||
|                 'Activating HTML5 beta', 'Unable to apply HTML5 beta') | ||||
|             webpage = self._download_webpage(url, anime_id) | ||||
|  | ||||
|         csrf_token = self._html_search_meta( | ||||
|             'csrf-token', webpage, 'csrf token', fatal=True) | ||||
|  | ||||
|         anime_title = self._html_search_regex( | ||||
|             r'(?s)<h1[^>]+itemprop="name"[^>]*>(.+?)</h1>', | ||||
|             webpage, 'anime name') | ||||
|         anime_description = self._html_search_regex( | ||||
|             r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>', | ||||
|             webpage, 'anime description', default=None) | ||||
|  | ||||
|         entries = [] | ||||
|  | ||||
|         def extract_info(html, video_id, num=None): | ||||
|             title, description = [None] * 2 | ||||
|             formats = [] | ||||
|  | ||||
|             for input_ in re.findall( | ||||
|                     r'<input[^>]+class=["\'].*?streamstarter[^>]+>', html): | ||||
|                 attributes = extract_attributes(input_) | ||||
|                 title = attributes.get('data-dialog-header') | ||||
|                 playlist_urls = [] | ||||
|                 for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'): | ||||
|                     playlist_url = attributes.get(playlist_key) | ||||
|                     if isinstance(playlist_url, compat_str) and re.match( | ||||
|                             r'/?[\da-zA-Z]+', playlist_url): | ||||
|                         playlist_urls.append(attributes[playlist_key]) | ||||
|                 if not playlist_urls: | ||||
|                     continue | ||||
|  | ||||
|                 lang = attributes.get('data-lang') | ||||
|                 lang_note = attributes.get('value') | ||||
|  | ||||
|                 for playlist_url in playlist_urls: | ||||
|                     kind = self._search_regex( | ||||
|                         r'videomaterialurl/\d+/([^/]+)/', | ||||
|                         playlist_url, 'media kind', default=None) | ||||
|                     format_id_list = [] | ||||
|                     if lang: | ||||
|                         format_id_list.append(lang) | ||||
|                     if kind: | ||||
|                         format_id_list.append(kind) | ||||
|                     if not format_id_list and num is not None: | ||||
|                         format_id_list.append(compat_str(num)) | ||||
|                     format_id = '-'.join(format_id_list) | ||||
|                     format_note = ', '.join(filter(None, (kind, lang_note))) | ||||
|                     item_id_list = [] | ||||
|                     if format_id: | ||||
|                         item_id_list.append(format_id) | ||||
|                     item_id_list.append('videomaterial') | ||||
|                     playlist = self._download_json( | ||||
|                         urljoin(url, playlist_url), video_id, | ||||
|                         'Downloading %s JSON' % ' '.join(item_id_list), | ||||
|                         headers={ | ||||
|                             'X-Requested-With': 'XMLHttpRequest', | ||||
|                             'X-CSRF-Token': csrf_token, | ||||
|                             'Referer': url, | ||||
|                             'Accept': 'application/json, text/javascript, */*; q=0.01', | ||||
|                         }, fatal=False) | ||||
|                     if not playlist: | ||||
|                         continue | ||||
|                     stream_url = url_or_none(playlist.get('streamurl')) | ||||
|                     if stream_url: | ||||
|                         rtmp = re.search( | ||||
|                             r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+/))(?P<playpath>mp[34]:.+)', | ||||
|                             stream_url) | ||||
|                         if rtmp: | ||||
|                             formats.append({ | ||||
|                                 'url': rtmp.group('url'), | ||||
|                                 'app': rtmp.group('app'), | ||||
|                                 'play_path': rtmp.group('playpath'), | ||||
|                                 'page_url': url, | ||||
|                                 'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf', | ||||
|                                 'rtmp_real_time': True, | ||||
|                                 'format_id': 'rtmp', | ||||
|                                 'ext': 'flv', | ||||
|                             }) | ||||
|                             continue | ||||
|                     start_video = playlist.get('startvideo', 0) | ||||
|                     playlist = playlist.get('playlist') | ||||
|                     if not playlist or not isinstance(playlist, list): | ||||
|                         continue | ||||
|                     playlist = playlist[start_video] | ||||
|                     title = playlist.get('title') | ||||
|                     if not title: | ||||
|                         continue | ||||
|                     description = playlist.get('description') | ||||
|                     for source in playlist.get('sources', []): | ||||
|                         file_ = source.get('file') | ||||
|                         if not file_: | ||||
|                             continue | ||||
|                         ext = determine_ext(file_) | ||||
|                         format_id_list = [lang, kind] | ||||
|                         if ext == 'm3u8': | ||||
|                             format_id_list.append('hls') | ||||
|                         elif source.get('type') == 'video/dash' or ext == 'mpd': | ||||
|                             format_id_list.append('dash') | ||||
|                         format_id = '-'.join(filter(None, format_id_list)) | ||||
|                         if ext == 'm3u8': | ||||
|                             file_formats = self._extract_m3u8_formats( | ||||
|                                 file_, video_id, 'mp4', | ||||
|                                 entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False) | ||||
|                         elif source.get('type') == 'video/dash' or ext == 'mpd': | ||||
|                             continue | ||||
|                             file_formats = self._extract_mpd_formats( | ||||
|                                 file_, video_id, mpd_id=format_id, fatal=False) | ||||
|                         else: | ||||
|                             continue | ||||
|                         for f in file_formats: | ||||
|                             f.update({ | ||||
|                                 'language': lang, | ||||
|                                 'format_note': format_note, | ||||
|                             }) | ||||
|                         formats.extend(file_formats) | ||||
|  | ||||
|             return { | ||||
|                 'title': title, | ||||
|                 'description': description, | ||||
|                 'formats': formats, | ||||
|             } | ||||
|  | ||||
|         def extract_entries(html, video_id, common_info, num=None): | ||||
|             info = extract_info(html, video_id, num) | ||||
|  | ||||
|             if info['formats']: | ||||
|                 self._sort_formats(info['formats']) | ||||
|                 f = common_info.copy() | ||||
|                 f.update(info) | ||||
|                 entries.append(f) | ||||
|  | ||||
|             # Extract teaser/trailer only when full episode is not available | ||||
|             if not info['formats']: | ||||
|                 m = re.search( | ||||
|                     r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<', | ||||
|                     html) | ||||
|                 if m: | ||||
|                     f = common_info.copy() | ||||
|                     f.update({ | ||||
|                         'id': '%s-%s' % (f['id'], m.group('kind').lower()), | ||||
|                         'title': m.group('title'), | ||||
|                         'url': urljoin(url, m.group('href')), | ||||
|                     }) | ||||
|                     entries.append(f) | ||||
|  | ||||
|         def extract_episodes(html): | ||||
|             for num, episode_html in enumerate(re.findall( | ||||
|                     r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1): | ||||
|                 episodebox_title = self._search_regex( | ||||
|                     (r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1', | ||||
|                      r'class="episodebox-title"[^>]+>(?P<title>.+?)<'), | ||||
|                     episode_html, 'episodebox title', default=None, group='title') | ||||
|                 if not episodebox_title: | ||||
|                     continue | ||||
|  | ||||
|                 episode_number = int(self._search_regex( | ||||
|                     r'(?:Episode|Film)\s*(\d+)', | ||||
|                     episodebox_title, 'episode number', default=num)) | ||||
|                 episode_title = self._search_regex( | ||||
|                     r'(?:Episode|Film)\s*\d+\s*-\s*(.+)', | ||||
|                     episodebox_title, 'episode title', default=None) | ||||
|  | ||||
|                 video_id = 'episode-%d' % episode_number | ||||
|  | ||||
|                 common_info = { | ||||
|                     'id': video_id, | ||||
|                     'series': anime_title, | ||||
|                     'episode': episode_title, | ||||
|                     'episode_number': episode_number, | ||||
|                 } | ||||
|  | ||||
|                 extract_entries(episode_html, video_id, common_info) | ||||
|  | ||||
|         def extract_film(html, video_id): | ||||
|             common_info = { | ||||
|                 'id': anime_id, | ||||
|                 'title': anime_title, | ||||
|                 'description': anime_description, | ||||
|             } | ||||
|             extract_entries(html, video_id, common_info) | ||||
|  | ||||
|         extract_episodes(webpage) | ||||
|  | ||||
|         if not entries: | ||||
|             extract_film(webpage, anime_id) | ||||
|  | ||||
|         return self.playlist_result(entries, anime_id, anime_title, anime_description) | ||||
							
								
								
									
										314
									
								
								youtube_dl/extractor/anvato.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										314
									
								
								youtube_dl/extractor/anvato.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,314 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import base64 | ||||
| import hashlib | ||||
| import json | ||||
| import random | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..aes import aes_encrypt | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     bytes_to_intlist, | ||||
|     determine_ext, | ||||
|     intlist_to_bytes, | ||||
|     int_or_none, | ||||
|     strip_jsonp, | ||||
|     unescapeHTML, | ||||
|     unsmuggle_url, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def md5_text(s): | ||||
|     if not isinstance(s, compat_str): | ||||
|         s = compat_str(s) | ||||
|     return hashlib.md5(s.encode('utf-8')).hexdigest() | ||||
|  | ||||
|  | ||||
| class AnvatoIE(InfoExtractor): | ||||
|     _VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)' | ||||
|  | ||||
|     # Copied from anvplayer.min.js | ||||
|     _ANVACK_TABLE = { | ||||
|         'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ', | ||||
|         'nbcu_nbcd_desktop_web_qa_1a6f01bdd0dc45a439043b694c8a031d': 'eSxJUbA2UUKBTXryyQ2d6NuM8oEqaPySvaPzfKNA', | ||||
|         'nbcu_nbcd_desktop_web_acc_eb2ff240a5d4ae9a63d4c297c32716b6c523a129': '89JR3RtUGbvKuuJIiKOMK0SoarLb5MUx8v89RcbP', | ||||
|         'nbcu_nbcd_watchvod_web_prod_e61107507180976724ec8e8319fe24ba5b4b60e1': 'Uc7dFt7MJ9GsBWB5T7iPvLaMSOt8BBxv4hAXk5vv', | ||||
|         'nbcu_nbcd_watchvod_web_qa_42afedba88a36203db5a4c09a5ba29d045302232': 'T12oDYVFP2IaFvxkmYMy5dKxswpLHtGZa4ZAXEi7', | ||||
|         'nbcu_nbcd_watchvod_web_acc_9193214448e2e636b0ffb78abacfd9c4f937c6ca': 'MmobcxUxMedUpohNWwXaOnMjlbiyTOBLL6d46ZpR', | ||||
|         'nbcu_local_monitor_web_acc_f998ad54eaf26acd8ee033eb36f39a7b791c6335': 'QvfIoPYrwsjUCcASiw3AIkVtQob2LtJHfidp9iWg', | ||||
|         'nbcu_cable_monitor_web_acc_a413759603e8bedfcd3c61b14767796e17834077': 'uwVPJLShvJWSs6sWEIuVem7MTF8A4IknMMzIlFto', | ||||
|         'nbcu_nbcd_mcpstage_web_qa_4c43a8f6e95a88dbb40276c0630ba9f693a63a4e': 'PxVYZVwjhgd5TeoPRxL3whssb5OUPnM3zyAzq8GY', | ||||
|         'nbcu_comcast_comcast_web_prod_074080762ad4ce956b26b43fb22abf153443a8c4': 'afnaRZfDyg1Z3WZHdupKfy6xrbAG2MHqe3VfuSwh', | ||||
|         'nbcu_comcast_comcast_web_qa_706103bb93ead3ef70b1de12a0e95e3c4481ade0': 'DcjsVbX9b3uoPlhdriIiovgFQZVxpISZwz0cx1ZK', | ||||
|         'nbcu_comcast_comcastcable_web_prod_669f04817536743563d7331c9293e59fbdbe3d07': '0RwMN2cWy10qhAhOscq3eK7aEe0wqnKt3vJ0WS4D', | ||||
|         'nbcu_comcast_comcastcable_web_qa_3d9d2d66219094127f0f6b09cc3c7bb076e3e1ca': '2r8G9DEya7PCqBceKZgrn2XkXgASjwLMuaFE1Aad', | ||||
|         'hearst_hearst_demo_web_stage_960726dfef3337059a01a78816e43b29ec04dfc7': 'cuZBPXTR6kSdoTCVXwk5KGA8rk3NrgGn4H6e9Dsp', | ||||
|         'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922': 'IOaaLQ8ymqVyem14QuAvE5SndQynTcH5CrLkU2Ih', | ||||
|         'anvato_nextmedia_demo_web_stage_9787d56a02ff6b9f43e9a2b0920d8ca88beb5818': 'Pqu9zVzI1ApiIzbVA3VkGBEQHvdKSUuKpD6s2uaR', | ||||
|         'anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a': 'du1ccmn7RxzgizwbWU7hyUaGodNlJn7HtXI0WgXW', | ||||
|         'anvato_scripps_app_web_stage_360797e00fe2826be142155c4618cc52fce6c26c': '2PMrQ0BRoqCWl7nzphj0GouIMEh2mZYivAT0S1Su', | ||||
|         'fs2go_fs2go_go_all_prod_21934911ccfafc03a075894ead2260d11e2ddd24': 'RcuHlKikW2IJw6HvVoEkqq2UsuEJlbEl11pWXs4Q', | ||||
|         'fs2go_fs2go_go_web_prod_ead4b0eec7460c1a07783808db21b49cf1f2f9a7': '4K0HTT2u1zkQA2MaGaZmkLa1BthGSBdr7jllrhk5', | ||||
|         'fs2go_fs2go_go_web_stage_407585454a4400355d4391691c67f361': 'ftnc37VKRJBmHfoGGi3kT05bHyeJzilEzhKJCyl3', | ||||
|         'fs2go_fs2go_go_android_stage_44b714db6f8477f29afcba15a41e1d30': 'CtxpPvVpo6AbZGomYUhkKs7juHZwNml9b9J0J2gI', | ||||
|         'anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67': 'Pw0XX5KBDsyRnPS0R2JrSrXftsy8Jnz5pAjaYC8s', | ||||
|         'anvato_cbslocal_app_web_stage_547a5f096594cd3e00620c6f825cad1096d28c80': '37OBUhX2uwNyKhhrNzSSNHSRPZpApC3trdqDBpuz', | ||||
|         'fs2go_att_att_web_prod_1042dddd089a05438b6a08f972941176f699ffd8': 'JLcF20JwYvpv6uAGcLWIaV12jKwaL1R8us4b6Zkg', | ||||
|         'fs2go_att_att_web_stage_807c5001955fc114a3331fe027ddc76e': 'gbu1oO1y0JiOFh4SUipt86P288JHpyjSqolrrT1x', | ||||
|         'fs2go_fs2go_tudor_web_prod_a7dd8e5a7cdc830cae55eae6f3e9fee5ee49eb9b': 'ipcp87VCEZXPPe868j3orLqzc03oTy7DXsGkAXXH', | ||||
|         'anvato_mhz_app_web_prod_b808218b30de7fdf60340cbd9831512bc1bf6d37': 'Stlm5Gs6BEhJLRTZHcNquyzxGqr23EuFmE5DCgjX', | ||||
|         'fs2go_charter_charter_web_stage_c2c6e5a68375a1bf00fff213d3ff8f61a835a54c': 'Lz4hbJp1fwL6jlcz4M2PMzghM4jp4aAmybtT5dPc', | ||||
|         'fs2go_charter_charter_web_prod_ebfe3b10f1af215a7321cd3d629e0b81dfa6fa8c': 'vUJsK345A1bVmyYDRhZX0lqFIgVXuqhmuyp1EtPK', | ||||
|         'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b': 'GDKq1ixvX3MoBNdU5IOYmYa2DTUXYOozPjrCJnW7', | ||||
|         'anvato_epfox_app_web_stage_a3c2ce60f8f83ef374a88b68ee73a950f8ab87ce': '2jz2NH4BsXMaDsoJ5qkHMbcczAfIReo2eFYuVC1C', | ||||
|         'fs2go_verizon_verizon_web_stage_08e6df0354a4803f1b1f2428b5a9a382e8dbcd62': 'rKTVapNaAcmnUbGL4ZcuOoY4SE7VmZSQsblPFr7e', | ||||
|         'fs2go_verizon_verizon_web_prod_f909564cb606eff1f731b5e22e0928676732c445': 'qLSUuHerM3u9eNPzaHyUK52obai5MvE4XDJfqYe1', | ||||
|         'fs2go_foxcom_synd_web_stage_f7b9091f00ea25a4fdaaae77fca5b54cdc7e7043': '96VKF2vLd24fFiDfwPFpzM5llFN4TiIGAlodE0Re', | ||||
|         'fs2go_foxcom_synd_web_prod_0f2cdd64d87e4ab6a1d54aada0ff7a7c8387a064': 'agiPjbXEyEZUkbuhcnmVPhe9NNVbDjCFq2xkcx51', | ||||
|         'anvato_own_app_web_stage_1214ade5d28422c4dae9d03c1243aba0563c4dba': 'mzhamNac3swG4WsJAiUTacnGIODi6SWeVWk5D7ho', | ||||
|         'anvato_own_app_web_prod_944e162ed927ec3e9ed13eb68ed2f1008ee7565e': '9TSxh6G2TXOLBoYm9ro3LdNjjvnXpKb8UR8KoIP9', | ||||
|         'anvato_scripps_app_ftv_prod_a10a10468edd5afb16fb48171c03b956176afad1': 'COJ2i2UIPK7xZqIWswxe7FaVBOVgRkP1F6O6qGoH', | ||||
|         'anvato_scripps_app_ftv_stage_77d3ad2bdb021ec37ca2e35eb09acd396a974c9a': 'Q7nnopNLe2PPfGLOTYBqxSaRpl209IhqaEuDZi1F', | ||||
|         'anvato_univision_app_web_stage_551236ef07a0e17718c3995c35586b5ed8cb5031': 'D92PoLS6UitwxDRA191HUGT9OYcOjV6mPMa5wNyo', | ||||
|         'anvato_univision_app_web_prod_039a5c0a6009e637ae8ac906718a79911e0e65e1': '5mVS5u4SQjtw6NGw2uhMbKEIONIiLqRKck5RwQLR', | ||||
|         'nbcu_cnbc_springfield_ios_prod_670207fae43d6e9a94c351688851a2ce': 'M7fqCCIP9lW53oJbHs19OlJlpDrVyc2OL8gNeuTa', | ||||
|         'nbcu_cnbc_springfieldvod_ios_prod_7a5f04b1ceceb0e9c9e2264a44aa236e08e034c2': 'Yia6QbJahW0S7K1I0drksimhZb4UFq92xLBmmMvk', | ||||
|         'anvato_cox_app_web_prod_ce45cda237969f93e7130f50ee8bb6280c1484ab': 'cc0miZexpFtdoqZGvdhfXsLy7FXjRAOgb9V0f5fZ', | ||||
|         'anvato_cox_app_web_stage_c23dbe016a8e9d8c7101d10172b92434f6088bf9': 'yivU3MYHd2eDZcOfmLbINVtqxyecKTOp8OjOuoGJ', | ||||
|         'anvato_chnzero_app_web_stage_b1164d1352b579e792e542fddf13ee34c0eeb46b': 'A76QkXMmVH8lTCfU15xva1mZnSVcqeY4Xb22Kp7m', | ||||
|         'anvato_chnzero_app_web_prod_253d358928dc08ec161eda2389d53707288a730c': 'OA5QI3ZWZZkdtUEDqh28AH8GedsF6FqzJI32596b', | ||||
|         'anvato_discovery_vodpoc_web_stage_9fa7077b5e8af1f8355f65d4fb8d2e0e9d54e2b7': 'q3oT191tTQ5g3JCP67PkjLASI9s16DuWZ6fYmry3', | ||||
|         'anvato_discovery_vodpoc_web_prod_688614983167a1af6cdf6d76343fda10a65223c1': 'qRvRQCTVHd0VVOHsMvvfidyWmlYVrTbjby7WqIuK', | ||||
|         'nbcu_cnbc_springfieldvod_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua', | ||||
|         'nbcu_cnbc_springfield_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua', | ||||
|         'nbcu_nbcd_capture_web_stage_4dd9d585bfb984ebf856dee35db027b2465cc4ae': '0j1Ov4Vopyi2HpBZJYdL2m8ERJVGYh3nNpzPiO8F', | ||||
|         'nbcu_nbcd_watch3_android_prod_7712ca5fcf1c22f19ec1870a9650f9c37db22dcf': '3LN2UB3rPUAMu7ZriWkHky9vpLMXYha8JbSnxBlx', | ||||
|         'nbcu_nbcd_watchvod3_android_prod_0910a3a4692d57c0b5ff4316075bc5d096be45b9': 'mJagcQ2II30vUOAauOXne7ERwbf5S9nlB3IP17lQ', | ||||
|         'anvato_scripps_app_atv_prod_790deda22e16e71e83df58f880cd389908a45d52': 'CB6trI1mpoDIM5o54DNTsji90NDBQPZ4z4RqBNSH', | ||||
|         'nbcu_nbcd_watchv4_android_prod_ff67cef9cb409158c6f8c3533edddadd0b750507': 'j8CHQCUWjlYERj4NFRmUYOND85QNbHViH09UwuKm', | ||||
|         'nbcu_nbcd_watchvodv4_android_prod_a814d781609989dea6a629d50ae4c7ad8cc8e907': 'rkVnUXxdA9rawVLUlDQtMue9Y4Q7lFEaIotcUhjt', | ||||
|         'rvVKpA50qlOPLFxMjrCGf5pdkdQDm7qn': '1J7ZkY5Qz5lMLi93QOH9IveE7EYB3rLl', | ||||
|         'nbcu_dtv_local_web_prod_b266cf49defe255fd4426a97e27c09e513e9f82f': 'HuLnJDqzLa4saCzYMJ79zDRSQpEduw1TzjMNQu2b', | ||||
|         'nbcu_att_local_web_prod_4cef038b2d969a6b7d700a56a599040b6a619f67': 'Q0Em5VDc2KpydUrVwzWRXAwoNBulWUxCq2faK0AV', | ||||
|         'nbcu_dish_local_web_prod_c56dcaf2da2e9157a4266c82a78195f1dd570f6b': 'bC1LWmRz9ayj2AlzizeJ1HuhTfIaJGsDBnZNgoRg', | ||||
|         'nbcu_verizon_local_web_prod_88bebd2ce006d4ed980de8133496f9a74cb9b3e1': 'wzhDKJZpgvUSS1EQvpCQP8Q59qVzcPixqDGJefSk', | ||||
|         'nbcu_charter_local_web_prod_9ad90f7fc4023643bb718f0fe0fd5beea2382a50': 'PyNbxNhEWLzy1ZvWEQelRuIQY88Eub7xbSVRMdfT', | ||||
|         'nbcu_suddenlink_local_web_prod_20fb711725cac224baa1c1cb0b1c324d25e97178': '0Rph41lPXZbb3fqeXtHjjbxfSrNbtZp1Ygq7Jypa', | ||||
|         'nbcu_wow_local_web_prod_652d9ce4f552d9c2e7b5b1ed37b8cb48155174ad': 'qayIBZ70w1dItm2zS42AptXnxW15mkjRrwnBjMPv', | ||||
|         'nbcu_centurylink_local_web_prod_2034402b029bf3e837ad46814d9e4b1d1345ccd5': 'StePcPMkjsX51PcizLdLRMzxMEl5k2FlsMLUNV4k', | ||||
|         'nbcu_atlanticbrd_local_web_prod_8d5f5ecbf7f7b2f5e6d908dd75d90ae3565f682e': 'NtYLb4TFUS0pRs3XTkyO5sbVGYjVf17bVbjaGscI', | ||||
|         'nbcu_nbcd_watchvod_web_dev_08bc05699be47c4f31d5080263a8cfadc16d0f7c': 'hwxi2dgDoSWgfmVVXOYZm14uuvku4QfopstXckhr', | ||||
|         'anvato_nextmedia_app_web_prod_a4fa8c7204aa65e71044b57aaf63711980cfe5a0': 'tQN1oGPYY1nM85rJYePWGcIb92TG0gSqoVpQTWOw', | ||||
|         'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749': 'GUXNf5ZDX2jFUpu4WT2Go4DJ5nhUCzpnwDRRUx1K', | ||||
|         'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa': 'bLDYF8JqfG42b7bwKEgQiU9E2LTIAtnKzSgYpFUH', | ||||
|         'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a': 'icgGoYGipQMMSEvhplZX1pwbN69srwKYWksz3xWK', | ||||
|         'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336': 'fA2iQdI7RDpynqzQYIpXALVS83NTPr8LLFK4LFsu', | ||||
|         'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg', | ||||
|         'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg', | ||||
|         'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99': 'P3uXJ0fXXditBPCGkfvlnVScpPEfKmc64Zv7ZgbK', | ||||
|         'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe': 'mGPvo5ZA5SgjOFAPEPXv7AnOpFUICX8hvFQVz69n', | ||||
|         'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582': 'qyT6PXXLjVNCrHaRVj0ugAhalNRS7Ee9BP7LUokD', | ||||
|         'nbcu_nbcd_watchvodv4_web_stage_4108362fba2d4ede21f262fea3c4162cbafd66c7': 'DhaU5lj0W2gEdcSSsnxURq8t7KIWtJfD966crVDk', | ||||
|         'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn', | ||||
|         'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W', | ||||
|         'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ', | ||||
|         'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ' | ||||
|     } | ||||
|  | ||||
|     _MCP_TO_ACCESS_KEY_TABLE = { | ||||
|         'qa': 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922', | ||||
|         'lin': 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749', | ||||
|         'univison': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa', | ||||
|         'uni': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa', | ||||
|         'dev': 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a', | ||||
|         'sps': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336', | ||||
|         'spsstg': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336', | ||||
|         'anv': 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3', | ||||
|         'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900', | ||||
|         'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99', | ||||
|         'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe', | ||||
|         'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582' | ||||
|     } | ||||
|  | ||||
|     _API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA' | ||||
|  | ||||
|     _ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1' | ||||
|     _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         # from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874 | ||||
|         'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496', | ||||
|         'info_dict': { | ||||
|             'id': '4465496', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'VIDEO: Humpback whale breaches right next to NH boat', | ||||
|             'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.', | ||||
|             'duration': 22, | ||||
|             'timestamp': 1534855680, | ||||
|             'upload_date': '20180821', | ||||
|             'uploader': 'ANV', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/ | ||||
|         'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         super(AnvatoIE, self).__init__(*args, **kwargs) | ||||
|         self.__server_time = None | ||||
|  | ||||
|     def _server_time(self, access_key, video_id): | ||||
|         if self.__server_time is not None: | ||||
|             return self.__server_time | ||||
|  | ||||
|         self.__server_time = int(self._download_json( | ||||
|             self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id, | ||||
|             note='Fetching server time')['server_time']) | ||||
|  | ||||
|         return self.__server_time | ||||
|  | ||||
|     def _api_prefix(self, access_key): | ||||
|         return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage') | ||||
|  | ||||
|     def _get_video_json(self, access_key, video_id): | ||||
|         # See et() in anvplayer.min.js, which is an alias of getVideoJSON() | ||||
|         video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key) | ||||
|         server_time = self._server_time(access_key, video_id) | ||||
|         input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time)) | ||||
|  | ||||
|         auth_secret = intlist_to_bytes(aes_encrypt( | ||||
|             bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY))) | ||||
|  | ||||
|         video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii') | ||||
|         anvrid = md5_text(time.time() * 1000 * random.random())[:30] | ||||
|         payload = { | ||||
|             'api': { | ||||
|                 'anvrid': anvrid, | ||||
|                 'anvstk': md5_text('%s|%s|%d|%s' % ( | ||||
|                     access_key, anvrid, server_time, | ||||
|                     self._ANVACK_TABLE.get(access_key, self._API_KEY))), | ||||
|                 'anvts': server_time, | ||||
|             }, | ||||
|         } | ||||
|  | ||||
|         return self._download_json( | ||||
|             video_data_url, video_id, transform_source=strip_jsonp, | ||||
|             data=json.dumps(payload).encode('utf-8')) | ||||
|  | ||||
|     def _get_anvato_videos(self, access_key, video_id): | ||||
|         video_data = self._get_video_json(access_key, video_id) | ||||
|  | ||||
|         formats = [] | ||||
|         for published_url in video_data['published_urls']: | ||||
|             video_url = published_url['embed_url'] | ||||
|             media_format = published_url.get('format') | ||||
|             ext = determine_ext(video_url) | ||||
|  | ||||
|             if ext == 'smil' or media_format == 'smil': | ||||
|                 formats.extend(self._extract_smil_formats(video_url, video_id)) | ||||
|                 continue | ||||
|  | ||||
|             tbr = int_or_none(published_url.get('kbps')) | ||||
|             a_format = { | ||||
|                 'url': video_url, | ||||
|                 'format_id': ('-'.join(filter(None, ['http', published_url.get('cdn_name')]))).lower(), | ||||
|                 'tbr': tbr if tbr != 0 else None, | ||||
|             } | ||||
|  | ||||
|             if media_format == 'm3u8' and tbr is not None: | ||||
|                 a_format.update({ | ||||
|                     'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])), | ||||
|                     'ext': 'mp4', | ||||
|                 }) | ||||
|             elif media_format == 'm3u8-variant' or ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     video_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|                 continue | ||||
|             elif ext == 'mp3' or media_format == 'mp3': | ||||
|                 a_format['vcodec'] = 'none' | ||||
|             else: | ||||
|                 a_format.update({ | ||||
|                     'width': int_or_none(published_url.get('width')), | ||||
|                     'height': int_or_none(published_url.get('height')), | ||||
|                 }) | ||||
|             formats.append(a_format) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         for caption in video_data.get('captions', []): | ||||
|             a_caption = { | ||||
|                 'url': caption['url'], | ||||
|                 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None | ||||
|             } | ||||
|             subtitles.setdefault(caption['language'], []).append(a_caption) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'title': video_data.get('def_title'), | ||||
|             'description': video_data.get('def_description'), | ||||
|             'tags': video_data.get('def_tags', '').split(','), | ||||
|             'categories': video_data.get('categories'), | ||||
|             'thumbnail': video_data.get('thumbnail'), | ||||
|             'timestamp': int_or_none(video_data.get( | ||||
|                 'ts_published') or video_data.get('ts_added')), | ||||
|             'uploader': video_data.get('mcp_id'), | ||||
|             'duration': int_or_none(video_data.get('duration')), | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_urls(ie, webpage, video_id): | ||||
|         entries = [] | ||||
|         for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage): | ||||
|             anvplayer_data = ie._parse_json( | ||||
|                 mobj.group('anvp'), video_id, transform_source=unescapeHTML, | ||||
|                 fatal=False) | ||||
|             if not anvplayer_data: | ||||
|                 continue | ||||
|             video = anvplayer_data.get('video') | ||||
|             if not isinstance(video, compat_str) or not video.isdigit(): | ||||
|                 continue | ||||
|             access_key = anvplayer_data.get('accessKey') | ||||
|             if not access_key: | ||||
|                 mcp = anvplayer_data.get('mcp') | ||||
|                 if mcp: | ||||
|                     access_key = AnvatoIE._MCP_TO_ACCESS_KEY_TABLE.get( | ||||
|                         mcp.lower()) | ||||
|             if not access_key: | ||||
|                 continue | ||||
|             entries.append(ie.url_result( | ||||
|                 'anvato:%s:%s' % (access_key, video), ie=AnvatoIE.ie_key(), | ||||
|                 video_id=video)) | ||||
|         return entries | ||||
|  | ||||
|     def _extract_anvato_videos(self, webpage, video_id): | ||||
|         anvplayer_data = self._parse_json( | ||||
|             self._html_search_regex( | ||||
|                 self._ANVP_RE, webpage, 'Anvato player data', group='anvp'), | ||||
|             video_id) | ||||
|         return self._get_anvato_videos( | ||||
|             anvplayer_data['accessKey'], anvplayer_data['video']) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|         self._initialize_geo_bypass({ | ||||
|             'countries': smuggled_data.get('geo_countries'), | ||||
|         }) | ||||
|  | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         access_key, video_id = mobj.group('access_key_or_mcp', 'id') | ||||
|         if access_key not in self._ANVACK_TABLE: | ||||
|             access_key = self._MCP_TO_ACCESS_KEY_TABLE.get( | ||||
|                 access_key) or access_key | ||||
|         return self._get_anvato_videos(access_key, video_id) | ||||
							
								
								
									
										133
									
								
								youtube_dl/extractor/aol.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								youtube_dl/extractor/aol.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,133 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_urllib_parse_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     url_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AolIE(InfoExtractor): | ||||
|     IE_NAME = 'aol.com' | ||||
|     _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         # video with 5min ID | ||||
|         'url': 'https://www.aol.com/video/view/u-s--official-warns-of-largest-ever-irs-phone-scam/518167793/', | ||||
|         'md5': '18ef68f48740e86ae94b98da815eec42', | ||||
|         'info_dict': { | ||||
|             'id': '518167793', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam', | ||||
|             'description': 'A major phone scam has cost thousands of taxpayers more than $1 million, with less than a month until income tax returns are due to the IRS.', | ||||
|             'timestamp': 1395405060, | ||||
|             'upload_date': '20140321', | ||||
|             'uploader': 'Newsy Studio', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         # video with vidible ID | ||||
|         'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', | ||||
|         'info_dict': { | ||||
|             'id': '5707d6b8e4b090497b04f706', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Netflix is Raising Rates', | ||||
|             'description': 'Netflix is rewarding millions of it’s long-standing members with an increase in cost. Veuer’s Carly Figueroa has more.', | ||||
|             'upload_date': '20160408', | ||||
|             'timestamp': 1460123280, | ||||
|             'uploader': 'Veuer', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.aol.com/video/view/donald-trump-spokeswoman-tones-down-megyn-kelly-attacks/519442220/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'aol-video:5707d6b8e4b090497b04f706', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.aol.com/video/playlist/PL8245/5ca79d19d21f1a04035db606/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.aol.ca/video/view/u-s-woman-s-family-arrested-for-murder-first-pinned-on-panhandler-police/5c7ccf45bc03931fa04b2fe1/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.aol.co.uk/video/view/-one-dead-and-22-hurt-in-bus-crash-/5cb3a6f3d21f1a072b457347/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.aol.de/video/view/eva-braun-privataufnahmen-von-hitlers-geliebter-werden-digitalisiert/5cb2d49de98ab54c113d3d5d/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         response = self._download_json( | ||||
|             'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id, | ||||
|             video_id)['response'] | ||||
|         if response['statusText'] != 'Ok': | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True) | ||||
|  | ||||
|         video_data = response['data'] | ||||
|         formats = [] | ||||
|         m3u8_url = url_or_none(video_data.get('videoMasterPlaylist')) | ||||
|         if m3u8_url: | ||||
|             formats.extend(self._extract_m3u8_formats( | ||||
|                 m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) | ||||
|         for rendition in video_data.get('renditions', []): | ||||
|             video_url = url_or_none(rendition.get('url')) | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             ext = rendition.get('format') | ||||
|             if ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) | ||||
|             else: | ||||
|                 f = { | ||||
|                     'url': video_url, | ||||
|                     'format_id': rendition.get('quality'), | ||||
|                 } | ||||
|                 mobj = re.search(r'(\d+)x(\d+)', video_url) | ||||
|                 if mobj: | ||||
|                     f.update({ | ||||
|                         'width': int(mobj.group(1)), | ||||
|                         'height': int(mobj.group(2)), | ||||
|                     }) | ||||
|                 else: | ||||
|                     qs = compat_parse_qs(compat_urllib_parse_urlparse(video_url).query) | ||||
|                     f.update({ | ||||
|                         'width': int_or_none(qs.get('w', [None])[0]), | ||||
|                         'height': int_or_none(qs.get('h', [None])[0]), | ||||
|                     }) | ||||
|                 formats.append(f) | ||||
|         self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_data['title'], | ||||
|             'duration': int_or_none(video_data.get('duration')), | ||||
|             'timestamp': int_or_none(video_data.get('publishDate')), | ||||
|             'view_count': int_or_none(video_data.get('views')), | ||||
|             'description': video_data.get('description'), | ||||
|             'uploader': video_data.get('videoOwner'), | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										94
									
								
								youtube_dl/extractor/apa.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										94
									
								
								youtube_dl/extractor/apa.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,94 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     js_to_json, | ||||
|     url_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class APAIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029', | ||||
|         'md5': '2b12292faeb0a7d930c778c7a5b4759b', | ||||
|         'info_dict': { | ||||
|             'id': 'jjv85FdZ', | ||||
|             'ext': 'mp4', | ||||
|             'title': '"Blau ist mysteriös": Die Blue Man Group im Interview', | ||||
|             'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'duration': 254, | ||||
|             'timestamp': 1519211149, | ||||
|             'upload_date': '20180221', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://uvp-rma.sf.apa.at/embed/70404cca-2f47-4855-bbb8-20b1fae58f76', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://uvp-kleinezeitung.sf.apa.at/embed/f1c44979-dba2-4ebf-b021-e4cf2cac3c81', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_urls(webpage): | ||||
|         return [ | ||||
|             mobj.group('url') | ||||
|             for mobj in re.finditer( | ||||
|                 r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1', | ||||
|                 webpage)] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         jwplatform_id = self._search_regex( | ||||
|             r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage, | ||||
|             'jwplatform id', default=None) | ||||
|  | ||||
|         if jwplatform_id: | ||||
|             return self.url_result( | ||||
|                 'jwplatform:' + jwplatform_id, ie='JWPlatform', | ||||
|                 video_id=video_id) | ||||
|  | ||||
|         sources = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'), | ||||
|             video_id, transform_source=js_to_json) | ||||
|  | ||||
|         formats = [] | ||||
|         for source in sources: | ||||
|             if not isinstance(source, dict): | ||||
|                 continue | ||||
|             source_url = url_or_none(source.get('file')) | ||||
|             if not source_url: | ||||
|                 continue | ||||
|             ext = determine_ext(source_url) | ||||
|             if ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     source_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': source_url, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnail = self._search_regex( | ||||
|             r'image\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, | ||||
|             'thumbnail', fatal=False, group='url') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_id, | ||||
|             'thumbnail': thumbnail, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										95
									
								
								youtube_dl/extractor/aparat.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										95
									
								
								youtube_dl/extractor/aparat.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,95 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     merge_dicts, | ||||
|     mimetype2ext, | ||||
|     url_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AparatIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.aparat.com/v/wP8On', | ||||
|         'md5': '131aca2e14fe7c4dcb3c4877ba300c89', | ||||
|         'info_dict': { | ||||
|             'id': 'wP8On', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'تیم گلکسی 11 - زومیت', | ||||
|             'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028', | ||||
|             'duration': 231, | ||||
|             'timestamp': 1387394859, | ||||
|             'upload_date': '20131218', | ||||
|             'view_count': int, | ||||
|         }, | ||||
|     }, { | ||||
|         # multiple formats | ||||
|         'url': 'https://www.aparat.com/v/8dflw/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         # Provides more metadata | ||||
|         webpage = self._download_webpage(url, video_id, fatal=False) | ||||
|  | ||||
|         if not webpage: | ||||
|             # Note: There is an easier-to-parse configuration at | ||||
|             # http://www.aparat.com/video/video/config/videohash/%video_id | ||||
|             # but the URL in there does not work | ||||
|             webpage = self._download_webpage( | ||||
|                 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id, | ||||
|                 video_id) | ||||
|  | ||||
|         options = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)', | ||||
|                 webpage, 'options', group='value'), | ||||
|             video_id) | ||||
|  | ||||
|         player = options['plugins']['sabaPlayerPlugin'] | ||||
|  | ||||
|         formats = [] | ||||
|         for sources in player['multiSRC']: | ||||
|             for item in sources: | ||||
|                 if not isinstance(item, dict): | ||||
|                     continue | ||||
|                 file_url = url_or_none(item.get('src')) | ||||
|                 if not file_url: | ||||
|                     continue | ||||
|                 item_type = item.get('type') | ||||
|                 if item_type == 'application/vnd.apple.mpegurl': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         file_url, video_id, 'mp4', | ||||
|                         entry_protocol='m3u8_native', m3u8_id='hls', | ||||
|                         fatal=False)) | ||||
|                 else: | ||||
|                     ext = mimetype2ext(item.get('type')) | ||||
|                     label = item.get('label') | ||||
|                     formats.append({ | ||||
|                         'url': file_url, | ||||
|                         'ext': ext, | ||||
|                         'format_id': 'http-%s' % (label or ext), | ||||
|                         'height': int_or_none(self._search_regex( | ||||
|                             r'(\d+)[pP]', label or '', 'height', | ||||
|                             default=None)), | ||||
|                     }) | ||||
|         self._sort_formats( | ||||
|             formats, field_preference=('height', 'width', 'tbr', 'format_id')) | ||||
|  | ||||
|         info = self._search_json_ld(webpage, video_id, default={}) | ||||
|  | ||||
|         if not info.get('title'): | ||||
|             info['title'] = player['title'] | ||||
|  | ||||
|         return merge_dicts(info, { | ||||
|             'id': video_id, | ||||
|             'thumbnail': url_or_none(options.get('poster')), | ||||
|             'duration': int_or_none(player.get('duration')), | ||||
|             'formats': formats, | ||||
|         }) | ||||
							
								
								
									
										50
									
								
								youtube_dl/extractor/appleconnect.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								youtube_dl/extractor/appleconnect.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     str_to_int, | ||||
|     ExtractorError | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AppleConnectIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3', | ||||
|         'md5': 'e7c38568a01ea45402570e6029206723', | ||||
|         'info_dict': { | ||||
|             'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3', | ||||
|             'ext': 'm4v', | ||||
|             'title': 'Energy', | ||||
|             'uploader': 'Drake', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'upload_date': '20150710', | ||||
|             'timestamp': 1436545535, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         try: | ||||
|             video_json = self._html_search_regex( | ||||
|                 r'class="auc-video-data">(\{.*?\})', webpage, 'json') | ||||
|         except ExtractorError: | ||||
|             raise ExtractorError('This post doesn\'t contain a video', expected=True) | ||||
|  | ||||
|         video_data = self._parse_json(video_json, video_id) | ||||
|         timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp')) | ||||
|         like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_data['sslSrc'], | ||||
|             'title': video_data['title'], | ||||
|             'description': video_data['description'], | ||||
|             'uploader': video_data['artistName'], | ||||
|             'thumbnail': video_data['artworkUrl'], | ||||
|             'timestamp': timestamp, | ||||
|             'like_count': like_count, | ||||
|         } | ||||
							
								
								
									
										283
									
								
								youtube_dl/extractor/appletrailers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										283
									
								
								youtube_dl/extractor/appletrailers.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,283 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AppleTrailersIE(InfoExtractor): | ||||
|     IE_NAME = 'appletrailers' | ||||
|     _VALID_URL = r'https?://(?:www\.|movie)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://trailers.apple.com/trailers/wb/manofsteel/', | ||||
|         'info_dict': { | ||||
|             'id': '5111', | ||||
|             'title': 'Man of Steel', | ||||
|         }, | ||||
|         'playlist': [ | ||||
|             { | ||||
|                 'md5': 'd97a8e575432dbcb81b7c3acb741f8a8', | ||||
|                 'info_dict': { | ||||
|                     'id': 'manofsteel-trailer4', | ||||
|                     'ext': 'mov', | ||||
|                     'duration': 111, | ||||
|                     'title': 'Trailer 4', | ||||
|                     'upload_date': '20130523', | ||||
|                     'uploader_id': 'wb', | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 'md5': 'b8017b7131b721fb4e8d6f49e1df908c', | ||||
|                 'info_dict': { | ||||
|                     'id': 'manofsteel-trailer3', | ||||
|                     'ext': 'mov', | ||||
|                     'duration': 182, | ||||
|                     'title': 'Trailer 3', | ||||
|                     'upload_date': '20130417', | ||||
|                     'uploader_id': 'wb', | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 'md5': 'd0f1e1150989b9924679b441f3404d48', | ||||
|                 'info_dict': { | ||||
|                     'id': 'manofsteel-trailer', | ||||
|                     'ext': 'mov', | ||||
|                     'duration': 148, | ||||
|                     'title': 'Trailer', | ||||
|                     'upload_date': '20121212', | ||||
|                     'uploader_id': 'wb', | ||||
|                 }, | ||||
|             }, | ||||
|             { | ||||
|                 'md5': '5fe08795b943eb2e757fa95cb6def1cb', | ||||
|                 'info_dict': { | ||||
|                     'id': 'manofsteel-teaser', | ||||
|                     'ext': 'mov', | ||||
|                     'duration': 93, | ||||
|                     'title': 'Teaser', | ||||
|                     'upload_date': '20120721', | ||||
|                     'uploader_id': 'wb', | ||||
|                 }, | ||||
|             }, | ||||
|         ] | ||||
|     }, { | ||||
|         'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', | ||||
|         'info_dict': { | ||||
|             'id': '4489', | ||||
|             'title': 'Blackthorn', | ||||
|         }, | ||||
|         'playlist_mincount': 2, | ||||
|         'expected_warnings': ['Unable to download JSON metadata'], | ||||
|     }, { | ||||
|         # json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json | ||||
|         'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/', | ||||
|         'info_dict': { | ||||
|             'id': '15881', | ||||
|             'title': 'Kung Fu Panda 3', | ||||
|         }, | ||||
|         'playlist_mincount': 4, | ||||
|     }, { | ||||
|         'url': 'http://trailers.apple.com/ca/metropole/autrui/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://movietrailers.apple.com/trailers/focus_features/kuboandthetwostrings/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _JSON_RE = r'iTunes.playURL\((.*?)\);' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         movie = mobj.group('movie') | ||||
|         uploader_id = mobj.group('company') | ||||
|  | ||||
|         webpage = self._download_webpage(url, movie) | ||||
|         film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id') | ||||
|         film_data = self._download_json( | ||||
|             'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id, | ||||
|             film_id, fatal=False) | ||||
|  | ||||
|         if film_data: | ||||
|             entries = [] | ||||
|             for clip in film_data.get('clips', []): | ||||
|                 clip_title = clip['title'] | ||||
|  | ||||
|                 formats = [] | ||||
|                 for version, version_data in clip.get('versions', {}).items(): | ||||
|                     for size, size_data in version_data.get('sizes', {}).items(): | ||||
|                         src = size_data.get('src') | ||||
|                         if not src: | ||||
|                             continue | ||||
|                         formats.append({ | ||||
|                             'format_id': '%s-%s' % (version, size), | ||||
|                             'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src), | ||||
|                             'width': int_or_none(size_data.get('width')), | ||||
|                             'height': int_or_none(size_data.get('height')), | ||||
|                             'language': version[:2], | ||||
|                         }) | ||||
|                 self._sort_formats(formats) | ||||
|  | ||||
|                 entries.append({ | ||||
|                     'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(), | ||||
|                     'formats': formats, | ||||
|                     'title': clip_title, | ||||
|                     'thumbnail': clip.get('screen') or clip.get('thumb'), | ||||
|                     'duration': parse_duration(clip.get('runtime') or clip.get('faded')), | ||||
|                     'upload_date': unified_strdate(clip.get('posted')), | ||||
|                     'uploader_id': uploader_id, | ||||
|                 }) | ||||
|  | ||||
|             page_data = film_data.get('page', {}) | ||||
|             return self.playlist_result(entries, film_id, page_data.get('movie_title')) | ||||
|  | ||||
|         playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') | ||||
|  | ||||
|         def fix_html(s): | ||||
|             s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s) | ||||
|             s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s) | ||||
|             # The ' in the onClick attributes are not escaped, it couldn't be parsed | ||||
|             # like: http://trailers.apple.com/trailers/wb/gravity/ | ||||
|  | ||||
|             def _clean_json(m): | ||||
|                 return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') | ||||
|             s = re.sub(self._JSON_RE, _clean_json, s) | ||||
|             s = '<html>%s</html>' % s | ||||
|             return s | ||||
|         doc = self._download_xml(playlist_url, movie, transform_source=fix_html) | ||||
|  | ||||
|         playlist = [] | ||||
|         for li in doc.findall('./div/ul/li'): | ||||
|             on_click = li.find('.//a').attrib['onClick'] | ||||
|             trailer_info_json = self._search_regex(self._JSON_RE, | ||||
|                                                    on_click, 'trailer info') | ||||
|             trailer_info = json.loads(trailer_info_json) | ||||
|             first_url = trailer_info.get('url') | ||||
|             if not first_url: | ||||
|                 continue | ||||
|             title = trailer_info['title'] | ||||
|             video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() | ||||
|             thumbnail = li.find('.//img').attrib['src'] | ||||
|             upload_date = trailer_info['posted'].replace('-', '') | ||||
|  | ||||
|             runtime = trailer_info['runtime'] | ||||
|             m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime) | ||||
|             duration = None | ||||
|             if m: | ||||
|                 duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) | ||||
|  | ||||
|             trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() | ||||
|             settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) | ||||
|             settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') | ||||
|  | ||||
|             formats = [] | ||||
|             for format in settings['metadata']['sizes']: | ||||
|                 # The src is a file pointing to the real video file | ||||
|                 format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src']) | ||||
|                 formats.append({ | ||||
|                     'url': format_url, | ||||
|                     'format': format['type'], | ||||
|                     'width': int_or_none(format['width']), | ||||
|                     'height': int_or_none(format['height']), | ||||
|                 }) | ||||
|  | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|             playlist.append({ | ||||
|                 '_type': 'video', | ||||
|                 'id': video_id, | ||||
|                 'formats': formats, | ||||
|                 'title': title, | ||||
|                 'duration': duration, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 'upload_date': upload_date, | ||||
|                 'uploader_id': uploader_id, | ||||
|                 'http_headers': { | ||||
|                     'User-Agent': 'QuickTime compatible (youtube-dlc)', | ||||
|                 }, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': movie, | ||||
|             'entries': playlist, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class AppleTrailersSectionIE(InfoExtractor): | ||||
|     IE_NAME = 'appletrailers:section' | ||||
|     _SECTIONS = { | ||||
|         'justadded': { | ||||
|             'feed_path': 'just_added', | ||||
|             'title': 'Just Added', | ||||
|         }, | ||||
|         'exclusive': { | ||||
|             'feed_path': 'exclusive', | ||||
|             'title': 'Exclusive', | ||||
|         }, | ||||
|         'justhd': { | ||||
|             'feed_path': 'just_hd', | ||||
|             'title': 'Just HD', | ||||
|         }, | ||||
|         'mostpopular': { | ||||
|             'feed_path': 'most_pop', | ||||
|             'title': 'Most Popular', | ||||
|         }, | ||||
|         'moviestudios': { | ||||
|             'feed_path': 'studios', | ||||
|             'title': 'Movie Studios', | ||||
|         }, | ||||
|     } | ||||
|     _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS) | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://trailers.apple.com/#section=justadded', | ||||
|         'info_dict': { | ||||
|             'title': 'Just Added', | ||||
|             'id': 'justadded', | ||||
|         }, | ||||
|         'playlist_mincount': 80, | ||||
|     }, { | ||||
|         'url': 'http://trailers.apple.com/#section=exclusive', | ||||
|         'info_dict': { | ||||
|             'title': 'Exclusive', | ||||
|             'id': 'exclusive', | ||||
|         }, | ||||
|         'playlist_mincount': 80, | ||||
|     }, { | ||||
|         'url': 'http://trailers.apple.com/#section=justhd', | ||||
|         'info_dict': { | ||||
|             'title': 'Just HD', | ||||
|             'id': 'justhd', | ||||
|         }, | ||||
|         'playlist_mincount': 80, | ||||
|     }, { | ||||
|         'url': 'http://trailers.apple.com/#section=mostpopular', | ||||
|         'info_dict': { | ||||
|             'title': 'Most Popular', | ||||
|             'id': 'mostpopular', | ||||
|         }, | ||||
|         'playlist_mincount': 30, | ||||
|     }, { | ||||
|         'url': 'http://trailers.apple.com/#section=moviestudios', | ||||
|         'info_dict': { | ||||
|             'title': 'Movie Studios', | ||||
|             'id': 'moviestudios', | ||||
|         }, | ||||
|         'playlist_mincount': 80, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         section = self._match_id(url) | ||||
|         section_data = self._download_json( | ||||
|             'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'], | ||||
|             section) | ||||
|         entries = [ | ||||
|             self.url_result('http://trailers.apple.com' + e['location']) | ||||
|             for e in section_data] | ||||
|         return self.playlist_result(entries, section, self._SECTIONS[section]['title']) | ||||
							
								
								
									
										65
									
								
								youtube_dl/extractor/archiveorg.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								youtube_dl/extractor/archiveorg.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,65 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     clean_html, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ArchiveOrgIE(InfoExtractor): | ||||
|     IE_NAME = 'archive.org' | ||||
|     IE_DESC = 'archive.org videos' | ||||
|     _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect', | ||||
|         'md5': '8af1d4cf447933ed3c7f4871162602db', | ||||
|         'info_dict': { | ||||
|             'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect', | ||||
|             'ext': 'ogg', | ||||
|             'title': '1968 Demo - FJCC Conference Presentation Reel #1', | ||||
|             'description': 'md5:da45c349df039f1cc8075268eb1b5c25', | ||||
|             'upload_date': '19681210', | ||||
|             'uploader': 'SRI International' | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://archive.org/details/Cops1922', | ||||
|         'md5': '0869000b4ce265e8ca62738b336b268a', | ||||
|         'info_dict': { | ||||
|             'id': 'Cops1922', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Buster Keaton\'s "Cops" (1922)', | ||||
|             'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage( | ||||
|             'http://archive.org/embed/' + video_id, video_id) | ||||
|         jwplayer_playlist = self._parse_json(self._search_regex( | ||||
|             r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)", | ||||
|             webpage, 'jwplayer playlist'), video_id) | ||||
|         info = self._parse_jwplayer_data( | ||||
|             {'playlist': jwplayer_playlist}, video_id, base_url=url) | ||||
|  | ||||
|         def get_optional(metadata, field): | ||||
|             return metadata.get(field, [None])[0] | ||||
|  | ||||
|         metadata = self._download_json( | ||||
|             'http://archive.org/details/' + video_id, video_id, query={ | ||||
|                 'output': 'json', | ||||
|             })['metadata'] | ||||
|         info.update({ | ||||
|             'title': get_optional(metadata, 'title') or info.get('title'), | ||||
|             'description': clean_html(get_optional(metadata, 'description')), | ||||
|         }) | ||||
|         if info.get('_type') != 'playlist': | ||||
|             info.update({ | ||||
|                 'uploader': get_optional(metadata, 'creator'), | ||||
|                 'upload_date': unified_strdate(get_optional(metadata, 'date')), | ||||
|             }) | ||||
|         return info | ||||
							
								
								
									
										422
									
								
								youtube_dl/extractor/ard.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										422
									
								
								youtube_dl/extractor/ard.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,422 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .generic import GenericIE | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     qualities, | ||||
|     str_or_none, | ||||
|     try_get, | ||||
|     unified_strdate, | ||||
|     unified_timestamp, | ||||
|     update_url_query, | ||||
|     url_or_none, | ||||
|     xpath_text, | ||||
| ) | ||||
| from ..compat import compat_etree_fromstring | ||||
|  | ||||
|  | ||||
| class ARDMediathekBaseIE(InfoExtractor): | ||||
|     _GEO_COUNTRIES = ['DE'] | ||||
|  | ||||
|     def _extract_media_info(self, media_info_url, webpage, video_id): | ||||
|         media_info = self._download_json( | ||||
|             media_info_url, video_id, 'Downloading media JSON') | ||||
|         return self._parse_media_info(media_info, video_id, '"fsk"' in webpage) | ||||
|  | ||||
|     def _parse_media_info(self, media_info, video_id, fsk): | ||||
|         formats = self._extract_formats(media_info, video_id) | ||||
|  | ||||
|         if not formats: | ||||
|             if fsk: | ||||
|                 raise ExtractorError( | ||||
|                     'This video is only available after 20:00', expected=True) | ||||
|             elif media_info.get('_geoblocked'): | ||||
|                 self.raise_geo_restricted( | ||||
|                     'This video is not available due to geoblocking', | ||||
|                     countries=self._GEO_COUNTRIES) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         subtitle_url = media_info.get('_subtitleUrl') | ||||
|         if subtitle_url: | ||||
|             subtitles['de'] = [{ | ||||
|                 'ext': 'ttml', | ||||
|                 'url': subtitle_url, | ||||
|             }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'duration': int_or_none(media_info.get('_duration')), | ||||
|             'thumbnail': media_info.get('_previewImage'), | ||||
|             'is_live': media_info.get('_isLive') is True, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|     def _extract_formats(self, media_info, video_id): | ||||
|         type_ = media_info.get('_type') | ||||
|         media_array = media_info.get('_mediaArray', []) | ||||
|         formats = [] | ||||
|         for num, media in enumerate(media_array): | ||||
|             for stream in media.get('_mediaStreamArray', []): | ||||
|                 stream_urls = stream.get('_stream') | ||||
|                 if not stream_urls: | ||||
|                     continue | ||||
|                 if not isinstance(stream_urls, list): | ||||
|                     stream_urls = [stream_urls] | ||||
|                 quality = stream.get('_quality') | ||||
|                 server = stream.get('_server') | ||||
|                 for stream_url in stream_urls: | ||||
|                     if not url_or_none(stream_url): | ||||
|                         continue | ||||
|                     ext = determine_ext(stream_url) | ||||
|                     if quality != 'auto' and ext in ('f4m', 'm3u8'): | ||||
|                         continue | ||||
|                     if ext == 'f4m': | ||||
|                         formats.extend(self._extract_f4m_formats( | ||||
|                             update_url_query(stream_url, { | ||||
|                                 'hdcore': '3.1.1', | ||||
|                                 'plugin': 'aasp-3.1.1.69.124' | ||||
|                             }), video_id, f4m_id='hds', fatal=False)) | ||||
|                     elif ext == 'm3u8': | ||||
|                         formats.extend(self._extract_m3u8_formats( | ||||
|                             stream_url, video_id, 'mp4', 'm3u8_native', | ||||
|                             m3u8_id='hls', fatal=False)) | ||||
|                     else: | ||||
|                         if server and server.startswith('rtmp'): | ||||
|                             f = { | ||||
|                                 'url': server, | ||||
|                                 'play_path': stream_url, | ||||
|                                 'format_id': 'a%s-rtmp-%s' % (num, quality), | ||||
|                             } | ||||
|                         else: | ||||
|                             f = { | ||||
|                                 'url': stream_url, | ||||
|                                 'format_id': 'a%s-%s-%s' % (num, ext, quality) | ||||
|                             } | ||||
|                         m = re.search( | ||||
|                             r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', | ||||
|                             stream_url) | ||||
|                         if m: | ||||
|                             f.update({ | ||||
|                                 'width': int(m.group('width')), | ||||
|                                 'height': int(m.group('height')), | ||||
|                             }) | ||||
|                         if type_ == 'audio': | ||||
|                             f['vcodec'] = 'none' | ||||
|                         formats.append(f) | ||||
|         return formats | ||||
|  | ||||
|  | ||||
| class ARDMediathekIE(ARDMediathekBaseIE): | ||||
|     IE_NAME = 'ARD:mediathek' | ||||
|     _VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         # available till 26.07.2022 | ||||
|         'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822', | ||||
|         'info_dict': { | ||||
|             'id': '44726822', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?', | ||||
|             'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5', | ||||
|             'duration': 1740, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # audio | ||||
|         'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # audio | ||||
|         'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # determine video id from url | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         document_id = None | ||||
|  | ||||
|         numid = re.search(r'documentId=([0-9]+)', url) | ||||
|         if numid: | ||||
|             document_id = video_id = numid.group(1) | ||||
|         else: | ||||
|             video_id = m.group('video_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         ERRORS = ( | ||||
|             ('>Leider liegt eine Störung vor.', 'Video %s is unavailable'), | ||||
|             ('>Der gewünschte Beitrag ist nicht mehr verfügbar.<', | ||||
|              'Video %s is no longer available'), | ||||
|         ) | ||||
|  | ||||
|         for pattern, message in ERRORS: | ||||
|             if pattern in webpage: | ||||
|                 raise ExtractorError(message % video_id, expected=True) | ||||
|  | ||||
|         if re.search(r'[\?&]rss($|[=&])', url): | ||||
|             doc = compat_etree_fromstring(webpage.encode('utf-8')) | ||||
|             if doc.tag == 'rss': | ||||
|                 return GenericIE()._extract_rss(url, video_id, doc) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', | ||||
|              r'<meta name="dcterms\.title" content="(.*?)"/>', | ||||
|              r'<h4 class="headline">(.*?)</h4>', | ||||
|              r'<title[^>]*>(.*?)</title>'], | ||||
|             webpage, 'title') | ||||
|         description = self._html_search_meta( | ||||
|             'dcterms.abstract', webpage, 'description', default=None) | ||||
|         if description is None: | ||||
|             description = self._html_search_meta( | ||||
|                 'description', webpage, 'meta description', default=None) | ||||
|         if description is None: | ||||
|             description = self._html_search_regex( | ||||
|                 r'<p\s+class="teasertext">(.+?)</p>', | ||||
|                 webpage, 'teaser text', default=None) | ||||
|  | ||||
|         # Thumbnail is sometimes not present. | ||||
|         # It is in the mobile version, but that seems to use a different URL | ||||
|         # structure altogether. | ||||
|         thumbnail = self._og_search_thumbnail(webpage, default=None) | ||||
|  | ||||
|         media_streams = re.findall(r'''(?x) | ||||
|             mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s* | ||||
|             "([^"]+)"''', webpage) | ||||
|  | ||||
|         if media_streams: | ||||
|             QUALITIES = qualities(['lo', 'hi', 'hq']) | ||||
|             formats = [] | ||||
|             for furl in set(media_streams): | ||||
|                 if furl.endswith('.f4m'): | ||||
|                     fid = 'f4m' | ||||
|                 else: | ||||
|                     fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl) | ||||
|                     fid = fid_m.group(1) if fid_m else None | ||||
|                 formats.append({ | ||||
|                     'quality': QUALITIES(fid), | ||||
|                     'format_id': fid, | ||||
|                     'url': furl, | ||||
|                 }) | ||||
|             self._sort_formats(formats) | ||||
|             info = { | ||||
|                 'formats': formats, | ||||
|             } | ||||
|         else:  # request JSON file | ||||
|             if not document_id: | ||||
|                 video_id = self._search_regex( | ||||
|                     r'/play/(?:config|media)/(\d+)', webpage, 'media id') | ||||
|             info = self._extract_media_info( | ||||
|                 'http://www.ardmediathek.de/play/media/%s' % video_id, | ||||
|                 webpage, video_id) | ||||
|  | ||||
|         info.update({ | ||||
|             'id': video_id, | ||||
|             'title': self._live_title(title) if info.get('is_live') else title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|         }) | ||||
|  | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class ARDIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html' | ||||
|     _TESTS = [{ | ||||
|         # available till 14.02.2019 | ||||
|         'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html', | ||||
|         'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49', | ||||
|         'info_dict': { | ||||
|             'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video', | ||||
|             'id': '102', | ||||
|             'ext': 'mp4', | ||||
|             'duration': 4435.0, | ||||
|             'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?', | ||||
|             'upload_date': '20180214', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('display_id') | ||||
|  | ||||
|         player_url = mobj.group('mainurl') + '~playerXml.xml' | ||||
|         doc = self._download_xml(player_url, display_id) | ||||
|         video_node = doc.find('./video') | ||||
|         upload_date = unified_strdate(xpath_text( | ||||
|             video_node, './broadcastDate')) | ||||
|         thumbnail = xpath_text(video_node, './/teaserImage//variant/url') | ||||
|  | ||||
|         formats = [] | ||||
|         for a in video_node.findall('.//asset'): | ||||
|             f = { | ||||
|                 'format_id': a.attrib['type'], | ||||
|                 'width': int_or_none(a.find('./frameWidth').text), | ||||
|                 'height': int_or_none(a.find('./frameHeight').text), | ||||
|                 'vbr': int_or_none(a.find('./bitrateVideo').text), | ||||
|                 'abr': int_or_none(a.find('./bitrateAudio').text), | ||||
|                 'vcodec': a.find('./codecVideo').text, | ||||
|                 'tbr': int_or_none(a.find('./totalBitrate').text), | ||||
|             } | ||||
|             if a.find('./serverPrefix').text: | ||||
|                 f['url'] = a.find('./serverPrefix').text | ||||
|                 f['playpath'] = a.find('./fileName').text | ||||
|             else: | ||||
|                 f['url'] = a.find('./fileName').text | ||||
|             formats.append(f) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': mobj.group('id'), | ||||
|             'formats': formats, | ||||
|             'display_id': display_id, | ||||
|             'title': video_node.find('./title').text, | ||||
|             'duration': parse_duration(video_node.find('./duration').text), | ||||
|             'upload_date': upload_date, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ARDBetaMediathekIE(ARDMediathekBaseIE): | ||||
|     _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', | ||||
|         'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f', | ||||
|         'info_dict': { | ||||
|             'display_id': 'die-robuste-roswita', | ||||
|             'id': '70153354', | ||||
|             'title': 'Die robuste Roswita', | ||||
|             'description': r're:^Der Mord.*trüber ist als die Ilm.', | ||||
|             'duration': 5316, | ||||
|             'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard', | ||||
|             'timestamp': 1577047500, | ||||
|             'upload_date': '20191222', | ||||
|             'ext': 'mp4', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://ardmediathek.de/ard/video/saartalk/saartalk-gesellschaftsgift-haltung-gegen-hass/sr-fernsehen/Y3JpZDovL3NyLW9ubGluZS5kZS9TVF84MTY4MA/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.ardmediathek.de/ard/video/trailer/private-eyes-s01-e01/one/Y3JpZDovL3dkci5kZS9CZWl0cmFnLTE1MTgwYzczLWNiMTEtNGNkMS1iMjUyLTg5MGYzOWQxZmQ1YQ/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('video_id') | ||||
|         display_id = mobj.group('display_id') | ||||
|         if display_id: | ||||
|             display_id = display_id.rstrip('/') | ||||
|         if not display_id: | ||||
|             display_id = video_id | ||||
|  | ||||
|         player_page = self._download_json( | ||||
|             'https://api.ardmediathek.de/public-gateway', | ||||
|             display_id, data=json.dumps({ | ||||
|                 'query': '''{ | ||||
|   playerPage(client:"%s", clipId: "%s") { | ||||
|     blockedByFsk | ||||
|     broadcastedOn | ||||
|     maturityContentRating | ||||
|     mediaCollection { | ||||
|       _duration | ||||
|       _geoblocked | ||||
|       _isLive | ||||
|       _mediaArray { | ||||
|         _mediaStreamArray { | ||||
|           _quality | ||||
|           _server | ||||
|           _stream | ||||
|         } | ||||
|       } | ||||
|       _previewImage | ||||
|       _subtitleUrl | ||||
|       _type | ||||
|     } | ||||
|     show { | ||||
|       title | ||||
|     } | ||||
|     synopsis | ||||
|     title | ||||
|     tracking { | ||||
|       atiCustomVars { | ||||
|         contentId | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| }''' % (mobj.group('client'), video_id), | ||||
|             }).encode(), headers={ | ||||
|                 'Content-Type': 'application/json' | ||||
|             })['data']['playerPage'] | ||||
|         title = player_page['title'] | ||||
|         content_id = str_or_none(try_get( | ||||
|             player_page, lambda x: x['tracking']['atiCustomVars']['contentId'])) | ||||
|         media_collection = player_page.get('mediaCollection') or {} | ||||
|         if not media_collection and content_id: | ||||
|             media_collection = self._download_json( | ||||
|                 'https://www.ardmediathek.de/play/media/' + content_id, | ||||
|                 content_id, fatal=False) or {} | ||||
|         info = self._parse_media_info( | ||||
|             media_collection, content_id or video_id, | ||||
|             player_page.get('blockedByFsk')) | ||||
|         age_limit = None | ||||
|         description = player_page.get('synopsis') | ||||
|         maturity_content_rating = player_page.get('maturityContentRating') | ||||
|         if maturity_content_rating: | ||||
|             age_limit = int_or_none(maturity_content_rating.lstrip('FSK')) | ||||
|         if not age_limit and description: | ||||
|             age_limit = int_or_none(self._search_regex( | ||||
|                 r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None)) | ||||
|         info.update({ | ||||
|             'age_limit': age_limit, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'timestamp': unified_timestamp(player_page.get('broadcastedOn')), | ||||
|             'series': try_get(player_page, lambda x: x['show']['title']), | ||||
|         }) | ||||
|         return info | ||||
							
								
								
									
										133
									
								
								youtube_dl/extractor/arkena.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								youtube_dl/extractor/arkena.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,133 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     mimetype2ext, | ||||
|     parse_iso8601, | ||||
|     strip_jsonp, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ArkenaIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|                         https?:// | ||||
|                             (?: | ||||
|                                 video\.arkena\.com/play2/embed/player\?| | ||||
|                                 play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P<id>[^/]+)/[^/]+/(?P<account_id>\d+) | ||||
|                             ) | ||||
|                         ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411', | ||||
|         'md5': 'b96f2f71b359a8ecd05ce4e1daa72365', | ||||
|         'info_dict': { | ||||
|             'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Big Buck Bunny', | ||||
|             'description': 'Royalty free test video', | ||||
|             'timestamp': 1432816365, | ||||
|             'upload_date': '20150528', | ||||
|             'is_live': False, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://play.arkena.com/config/avp/v1/player/media/327336/darkmatter/131064/?callbackMethod=jQuery1111002221189684892677_1469227595972', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://video.arkena.com/play2/embed/player?accountId=472718&mediaId=35763b3b-00090078-bf604299&pageStyling=styled', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_url(webpage): | ||||
|         # See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1', | ||||
|             webpage) | ||||
|         if mobj: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         account_id = mobj.group('account_id') | ||||
|  | ||||
|         # Handle http://video.arkena.com/play2/embed/player URL | ||||
|         if not video_id: | ||||
|             qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) | ||||
|             video_id = qs.get('mediaId', [None])[0] | ||||
|             account_id = qs.get('accountId', [None])[0] | ||||
|             if not video_id or not account_id: | ||||
|                 raise ExtractorError('Invalid URL', expected=True) | ||||
|  | ||||
|         playlist = self._download_json( | ||||
|             'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_' | ||||
|             % (video_id, account_id), | ||||
|             video_id, transform_source=strip_jsonp)['Playlist'][0] | ||||
|  | ||||
|         media_info = playlist['MediaInfo'] | ||||
|         title = media_info['Title'] | ||||
|         media_files = playlist['MediaFiles'] | ||||
|  | ||||
|         is_live = False | ||||
|         formats = [] | ||||
|         for kind_case, kind_formats in media_files.items(): | ||||
|             kind = kind_case.lower() | ||||
|             for f in kind_formats: | ||||
|                 f_url = f.get('Url') | ||||
|                 if not f_url: | ||||
|                     continue | ||||
|                 is_live = f.get('Live') == 'true' | ||||
|                 exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None)) | ||||
|                 if kind == 'm3u8' or 'm3u8' in exts: | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         f_url, video_id, 'mp4', 'm3u8_native', | ||||
|                         m3u8_id=kind, fatal=False, live=is_live)) | ||||
|                 elif kind == 'flash' or 'f4m' in exts: | ||||
|                     formats.extend(self._extract_f4m_formats( | ||||
|                         f_url, video_id, f4m_id=kind, fatal=False)) | ||||
|                 elif kind == 'dash' or 'mpd' in exts: | ||||
|                     formats.extend(self._extract_mpd_formats( | ||||
|                         f_url, video_id, mpd_id=kind, fatal=False)) | ||||
|                 elif kind == 'silverlight': | ||||
|                     # TODO: process when ism is supported (see | ||||
|                     # https://github.com/ytdl-org/youtube-dl/issues/8118) | ||||
|                     continue | ||||
|                 else: | ||||
|                     tbr = float_or_none(f.get('Bitrate'), 1000) | ||||
|                     formats.append({ | ||||
|                         'url': f_url, | ||||
|                         'format_id': '%s-%d' % (kind, tbr) if tbr else kind, | ||||
|                         'tbr': tbr, | ||||
|                     }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         description = media_info.get('Description') | ||||
|         video_id = media_info.get('VideoId') or video_id | ||||
|         timestamp = parse_iso8601(media_info.get('PublishDate')) | ||||
|         thumbnails = [{ | ||||
|             'url': thumbnail['Url'], | ||||
|             'width': int_or_none(thumbnail.get('Size')), | ||||
|         } for thumbnail in (media_info.get('Poster') or []) if thumbnail.get('Url')] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'timestamp': timestamp, | ||||
|             'is_live': is_live, | ||||
|             'thumbnails': thumbnails, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										201
									
								
								youtube_dl/extractor/arte.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										201
									
								
								youtube_dl/extractor/arte.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,201 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     qualities, | ||||
|     try_get, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
| # There are different sources of video in arte.tv, the extraction process | ||||
| # is different for each one. The videos usually expire in 7 days, so we can't | ||||
| # add tests. | ||||
|  | ||||
|  | ||||
| class ArteTVBaseIE(InfoExtractor): | ||||
|     def _extract_from_json_url(self, json_url, video_id, lang, title=None): | ||||
|         info = self._download_json(json_url, video_id) | ||||
|         player_info = info['videoJsonPlayer'] | ||||
|  | ||||
|         vsr = try_get(player_info, lambda x: x['VSR'], dict) | ||||
|         if not vsr: | ||||
|             error = None | ||||
|             if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error': | ||||
|                 error = try_get( | ||||
|                     player_info, lambda x: x['custom_msg']['msg'], compat_str) | ||||
|             if not error: | ||||
|                 error = 'Video %s is not available' % player_info.get('VID') or video_id | ||||
|             raise ExtractorError(error, expected=True) | ||||
|  | ||||
|         upload_date_str = player_info.get('shootingDate') | ||||
|         if not upload_date_str: | ||||
|             upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0] | ||||
|  | ||||
|         title = (player_info.get('VTI') or title or player_info['VID']).strip() | ||||
|         subtitle = player_info.get('VSU', '').strip() | ||||
|         if subtitle: | ||||
|             title += ' - %s' % subtitle | ||||
|  | ||||
|         info_dict = { | ||||
|             'id': player_info['VID'], | ||||
|             'title': title, | ||||
|             'description': player_info.get('VDE'), | ||||
|             'upload_date': unified_strdate(upload_date_str), | ||||
|             'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), | ||||
|         } | ||||
|         qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ']) | ||||
|  | ||||
|         LANGS = { | ||||
|             'fr': 'F', | ||||
|             'de': 'A', | ||||
|             'en': 'E[ANG]', | ||||
|             'es': 'E[ESP]', | ||||
|             'it': 'E[ITA]', | ||||
|             'pl': 'E[POL]', | ||||
|         } | ||||
|  | ||||
|         langcode = LANGS.get(lang, lang) | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id, format_dict in vsr.items(): | ||||
|             f = dict(format_dict) | ||||
|             versionCode = f.get('versionCode') | ||||
|             l = re.escape(langcode) | ||||
|  | ||||
|             # Language preference from most to least priority | ||||
|             # Reference: section 6.8 of | ||||
|             # https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf | ||||
|             PREFERENCES = ( | ||||
|                 # original version in requested language, without subtitles | ||||
|                 r'VO{0}$'.format(l), | ||||
|                 # original version in requested language, with partial subtitles in requested language | ||||
|                 r'VO{0}-ST{0}$'.format(l), | ||||
|                 # original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language | ||||
|                 r'VO{0}-STM{0}$'.format(l), | ||||
|                 # non-original (dubbed) version in requested language, without subtitles | ||||
|                 r'V{0}$'.format(l), | ||||
|                 # non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language | ||||
|                 r'V{0}-ST{0}$'.format(l), | ||||
|                 # non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language | ||||
|                 r'V{0}-STM{0}$'.format(l), | ||||
|                 # original version in requested language, with partial subtitles in different language | ||||
|                 r'VO{0}-ST(?!{0}).+?$'.format(l), | ||||
|                 # original version in requested language, with subtitles for the deaf and hard-of-hearing in different language | ||||
|                 r'VO{0}-STM(?!{0}).+?$'.format(l), | ||||
|                 # original version in different language, with partial subtitles in requested language | ||||
|                 r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l), | ||||
|                 # original version in different language, with subtitles for the deaf and hard-of-hearing in requested language | ||||
|                 r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l), | ||||
|                 # original version in different language, without subtitles | ||||
|                 r'VO(?:(?!{0}))?$'.format(l), | ||||
|                 # original version in different language, with partial subtitles in different language | ||||
|                 r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l), | ||||
|                 # original version in different language, with subtitles for the deaf and hard-of-hearing in different language | ||||
|                 r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l), | ||||
|             ) | ||||
|  | ||||
|             for pref, p in enumerate(PREFERENCES): | ||||
|                 if re.match(p, versionCode): | ||||
|                     lang_pref = len(PREFERENCES) - pref | ||||
|                     break | ||||
|             else: | ||||
|                 lang_pref = -1 | ||||
|  | ||||
|             format = { | ||||
|                 'format_id': format_id, | ||||
|                 'preference': -10 if f.get('videoFormat') == 'M3U8' else None, | ||||
|                 'language_preference': lang_pref, | ||||
|                 'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')), | ||||
|                 'width': int_or_none(f.get('width')), | ||||
|                 'height': int_or_none(f.get('height')), | ||||
|                 'tbr': int_or_none(f.get('bitrate')), | ||||
|                 'quality': qfunc(f.get('quality')), | ||||
|             } | ||||
|  | ||||
|             if f.get('mediaType') == 'rtmp': | ||||
|                 format['url'] = f['streamer'] | ||||
|                 format['play_path'] = 'mp4:' + f['url'] | ||||
|                 format['ext'] = 'flv' | ||||
|             else: | ||||
|                 format['url'] = f['url'] | ||||
|  | ||||
|             formats.append(format) | ||||
|  | ||||
|         self._check_formats(formats, video_id) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         info_dict['formats'] = formats | ||||
|         return info_dict | ||||
|  | ||||
|  | ||||
| class ArteTVPlus7IE(ArteTVBaseIE): | ||||
|     IE_NAME = 'arte.tv:+7' | ||||
|     _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>\d{6}-\d{3}-[AF])' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', | ||||
|         'info_dict': { | ||||
|             'id': '088501-000-A', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Mexico: Stealing Petrol to Survive', | ||||
|             'upload_date': '20190628', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         lang, video_id = re.match(self._VALID_URL, url).groups() | ||||
|         return self._extract_from_json_url( | ||||
|             'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id), | ||||
|             video_id, lang) | ||||
|  | ||||
|  | ||||
| class ArteTVEmbedIE(ArteTVPlus7IE): | ||||
|     IE_NAME = 'arte.tv:embed' | ||||
|     _VALID_URL = r'''(?x) | ||||
|         https://www\.arte\.tv | ||||
|         /player/v3/index\.php\?json_url= | ||||
|         (?P<json_url> | ||||
|             https?://api\.arte\.tv/api/player/v1/config/ | ||||
|             (?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF]) | ||||
|         ) | ||||
|     ''' | ||||
|  | ||||
|     _TESTS = [] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         json_url, lang, video_id = re.match(self._VALID_URL, url).groups() | ||||
|         return self._extract_from_json_url(json_url, video_id, lang) | ||||
|  | ||||
|  | ||||
| class ArteTVPlaylistIE(ArteTVBaseIE): | ||||
|     IE_NAME = 'arte.tv:playlist' | ||||
|     _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/', | ||||
|         'info_dict': { | ||||
|             'id': 'RC-016954', | ||||
|             'title': 'Earn a Living', | ||||
|             'description': 'md5:d322c55011514b3a7241f7fb80d494c2', | ||||
|         }, | ||||
|         'playlist_mincount': 6, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         lang, playlist_id = re.match(self._VALID_URL, url).groups() | ||||
|         collection = self._download_json( | ||||
|             'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos' | ||||
|             % (lang, playlist_id), playlist_id) | ||||
|         title = collection.get('title') | ||||
|         description = collection.get('shortDescription') or collection.get('teaserText') | ||||
|         entries = [ | ||||
|             self._extract_from_json_url( | ||||
|                 video['jsonUrl'], video.get('programId') or playlist_id, lang) | ||||
|             for video in collection['videos'] if video.get('jsonUrl')] | ||||
|         return self.playlist_result(entries, playlist_id, title, description) | ||||
							
								
								
									
										145
									
								
								youtube_dl/extractor/asiancrush.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										145
									
								
								youtube_dl/extractor/asiancrush.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,145 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .kaltura import KalturaIE | ||||
| from ..utils import extract_attributes | ||||
|  | ||||
|  | ||||
| class AsianCrushIE(InfoExtractor): | ||||
|     _VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|cocoro\.tv))' | ||||
|     _VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % _VALID_URL_BASE | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/', | ||||
|         'md5': 'c3b740e48d0ba002a42c0b72857beae6', | ||||
|         'info_dict': { | ||||
|             'id': '1_y4tmjm5r', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Women Who Flirt', | ||||
|             'description': 'md5:7e986615808bcfb11756eb503a751487', | ||||
|             'timestamp': 1496936429, | ||||
|             'upload_date': '20170608', | ||||
|             'uploader_id': 'craig@crifkin.com', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.midnightpulp.com/video/010400v/drifters/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         host = mobj.group('host') | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         entry_id, partner_id, title = [None] * 3 | ||||
|  | ||||
|         vars = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars', | ||||
|                 default='{}'), video_id, fatal=False) | ||||
|         if vars: | ||||
|             entry_id = vars.get('entry_id') | ||||
|             partner_id = vars.get('partner_id') | ||||
|             title = vars.get('vid_label') | ||||
|  | ||||
|         if not entry_id: | ||||
|             entry_id = self._search_regex( | ||||
|                 r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id') | ||||
|  | ||||
|         player = self._download_webpage( | ||||
|             'https://api.%s/embeddedVideoPlayer' % host, video_id, | ||||
|             query={'id': entry_id}) | ||||
|  | ||||
|         kaltura_id = self._search_regex( | ||||
|             r'entry_id["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', player, | ||||
|             'kaltura id', group='id') | ||||
|  | ||||
|         if not partner_id: | ||||
|             partner_id = self._search_regex( | ||||
|                 r'/p(?:artner_id)?/(\d+)', player, 'partner id', | ||||
|                 default='513551') | ||||
|  | ||||
|         description = self._html_search_regex( | ||||
|             r'(?s)<div[^>]+\bclass=["\']description["\'][^>]*>(.+?)</div>', | ||||
|             webpage, 'description', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': 'kaltura:%s:%s' % (partner_id, kaltura_id), | ||||
|             'ie_key': KalturaIE.ie_key(), | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class AsianCrushPlaylistIE(InfoExtractor): | ||||
|     _VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushIE._VALID_URL_BASE | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/', | ||||
|         'info_dict': { | ||||
|             'id': '12481', | ||||
|             'title': 'Scholar Who Walks the Night', | ||||
|             'description': 'md5:7addd7c5132a09fd4741152d96cce886', | ||||
|         }, | ||||
|         'playlist_count': 20, | ||||
|     }, { | ||||
|         'url': 'https://www.yuyutv.com/series/013920s/peep-show/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.midnightpulp.com/series/016375s/mononoke/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         entries = [] | ||||
|  | ||||
|         for mobj in re.finditer( | ||||
|                 r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL, | ||||
|                 webpage): | ||||
|             attrs = extract_attributes(mobj.group(0)) | ||||
|             if attrs.get('class') == 'clearfix': | ||||
|                 entries.append(self.url_result( | ||||
|                     mobj.group('url'), ie=AsianCrushIE.ie_key())) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage, | ||||
|             'title', default=None) or self._og_search_title( | ||||
|             webpage, default=None) or self._html_search_meta( | ||||
|             'twitter:title', webpage, 'title', | ||||
|             default=None) or self._search_regex( | ||||
|             r'<title>([^<]+)</title>', webpage, 'title', fatal=False) | ||||
|         if title: | ||||
|             title = re.sub(r'\s*\|\s*.+?$', '', title) | ||||
|  | ||||
|         description = self._og_search_description( | ||||
|             webpage, default=None) or self._html_search_meta( | ||||
|             'twitter:description', webpage, 'description', fatal=False) | ||||
|  | ||||
|         return self.playlist_result(entries, playlist_id, title, description) | ||||
							
								
								
									
										118
									
								
								youtube_dl/extractor/atresplayer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										118
									
								
								youtube_dl/extractor/atresplayer.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,118 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_HTTPError | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AtresPlayerIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})' | ||||
|     _NETRC_MACHINE = 'atresplayer' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/', | ||||
|             'info_dict': { | ||||
|                 'id': '5d4aa2c57ed1a88fc715a615', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Capítulo 7: Asuntos pendientes', | ||||
|                 'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc', | ||||
|                 'duration': 3413, | ||||
|             }, | ||||
|             'params': { | ||||
|                 'format': 'bestvideo', | ||||
|             }, | ||||
|             'skip': 'This video is only available for registered users' | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|     ] | ||||
|     _API_BASE = 'https://api.atresplayer.com/' | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _handle_error(self, e, code): | ||||
|         if isinstance(e.cause, compat_HTTPError) and e.cause.code == code: | ||||
|             error = self._parse_json(e.cause.read(), None) | ||||
|             if error.get('error') == 'required_registered': | ||||
|                 self.raise_login_required() | ||||
|             raise ExtractorError(error['error_description'], expected=True) | ||||
|         raise | ||||
|  | ||||
|     def _login(self): | ||||
|         username, password = self._get_login_info() | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         self._request_webpage( | ||||
|             self._API_BASE + 'login', None, 'Downloading login page') | ||||
|  | ||||
|         try: | ||||
|             target_url = self._download_json( | ||||
|                 'https://account.atresmedia.com/api/login', None, | ||||
|                 'Logging in', headers={ | ||||
|                     'Content-Type': 'application/x-www-form-urlencoded' | ||||
|                 }, data=urlencode_postdata({ | ||||
|                     'username': username, | ||||
|                     'password': password, | ||||
|                 }))['targetUrl'] | ||||
|         except ExtractorError as e: | ||||
|             self._handle_error(e, 400) | ||||
|  | ||||
|         self._request_webpage(target_url, None, 'Following Target URL') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id, video_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         try: | ||||
|             episode = self._download_json( | ||||
|                 self._API_BASE + 'client/v1/player/episode/' + video_id, video_id) | ||||
|         except ExtractorError as e: | ||||
|             self._handle_error(e, 403) | ||||
|  | ||||
|         title = episode['titulo'] | ||||
|  | ||||
|         formats = [] | ||||
|         for source in episode.get('sources', []): | ||||
|             src = source.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             src_type = source.get('type') | ||||
|             if src_type == 'application/vnd.apple.mpegurl': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     src, video_id, 'mp4', 'm3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|             elif src_type == 'application/dash+xml': | ||||
|                 formats.extend(self._extract_mpd_formats( | ||||
|                     src, video_id, mpd_id='dash', fatal=False)) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         heartbeat = episode.get('heartbeat') or {} | ||||
|         omniture = episode.get('omniture') or {} | ||||
|         get_meta = lambda x: heartbeat.get(x) or omniture.get(x) | ||||
|  | ||||
|         return { | ||||
|             'display_id': display_id, | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': episode.get('descripcion'), | ||||
|             'thumbnail': episode.get('imgPoster'), | ||||
|             'duration': int_or_none(episode.get('duration')), | ||||
|             'formats': formats, | ||||
|             'channel': get_meta('channel'), | ||||
|             'season': get_meta('season'), | ||||
|             'episode_number': int_or_none(get_meta('episodeNumber')), | ||||
|         } | ||||
							
								
								
									
										55
									
								
								youtube_dl/extractor/atttechchannel.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								youtube_dl/extractor/atttechchannel.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unified_strdate | ||||
|  | ||||
|  | ||||
| class ATTTechChannelIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://techchannel\.att\.com/play-video\.cfm/([^/]+/)*(?P<id>.+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://techchannel.att.com/play-video.cfm/2014/1/27/ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use', | ||||
|         'info_dict': { | ||||
|             'id': '11316', | ||||
|             'display_id': 'ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use', | ||||
|             'ext': 'flv', | ||||
|             'title': 'AT&T Archives : The UNIX System: Making Computers Easier to Use', | ||||
|             'description': 'A 1982 film about UNIX is the foundation for software in use around Bell Labs and AT&T.', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'upload_date': '20140127', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             r"url\s*:\s*'(rtmp://[^']+)'", | ||||
|             webpage, 'video URL') | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'mediaid\s*=\s*(\d+)', | ||||
|             webpage, 'video id', fatal=False) | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         description = self._og_search_description(webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         upload_date = unified_strdate(self._search_regex( | ||||
|             r'[Rr]elease\s+date:\s*(\d{1,2}/\d{1,2}/\d{4})', | ||||
|             webpage, 'upload date', fatal=False), False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'flv', | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'upload_date': upload_date, | ||||
|         } | ||||
							
								
								
									
										75
									
								
								youtube_dl/extractor/atvat.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								youtube_dl/extractor/atvat.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,75 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ATVAtIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/', | ||||
|         'md5': 'c3b6b975fb3150fc628572939df205f2', | ||||
|         'info_dict': { | ||||
|             'id': '1698447', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'DI, 21.03.17 | 20:05 Uhr 1/1', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         video_data = self._parse_json(unescapeHTML(self._search_regex( | ||||
|             [r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1', | ||||
|              r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'], | ||||
|             webpage, 'player data', group='json')), | ||||
|             display_id)['config']['initial_video'] | ||||
|  | ||||
|         video_id = video_data['id'] | ||||
|         video_title = video_data['title'] | ||||
|  | ||||
|         parts = [] | ||||
|         for part in video_data.get('parts', []): | ||||
|             part_id = part['id'] | ||||
|             part_title = part['title'] | ||||
|  | ||||
|             formats = [] | ||||
|             for source in part.get('sources', []): | ||||
|                 source_url = source.get('src') | ||||
|                 if not source_url: | ||||
|                     continue | ||||
|                 ext = determine_ext(source_url) | ||||
|                 if ext == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         source_url, part_id, 'mp4', 'm3u8_native', | ||||
|                         m3u8_id='hls', fatal=False)) | ||||
|                 else: | ||||
|                     formats.append({ | ||||
|                         'format_id': source.get('delivery'), | ||||
|                         'url': source_url, | ||||
|                     }) | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|             parts.append({ | ||||
|                 'id': part_id, | ||||
|                 'title': part_title, | ||||
|                 'thumbnail': part.get('preview_image_url'), | ||||
|                 'duration': int_or_none(part.get('duration')), | ||||
|                 'is_live': part.get('is_livestream'), | ||||
|                 'formats': formats, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'multi_video', | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'entries': parts, | ||||
|         } | ||||
							
								
								
									
										93
									
								
								youtube_dl/extractor/audimedia.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										93
									
								
								youtube_dl/extractor/audimedia.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,93 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AudiMediaIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?:video/)?(?P<id>[^/?#]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467', | ||||
|         'md5': '79a8b71c46d49042609795ab59779b66', | ||||
|         'info_dict': { | ||||
|             'id': '1565', | ||||
|             'ext': 'mp4', | ||||
|             'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test', | ||||
|             'description': 'md5:60e5d30a78ced725f7b8d34370762941', | ||||
|             'upload_date': '20151124', | ||||
|             'timestamp': 1448354940, | ||||
|             'duration': 74022, | ||||
|             'view_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         raw_payload = self._search_regex([ | ||||
|             r'class="amtv-embed"[^>]+id="([0-9a-z-]+)"', | ||||
|             r'id="([0-9a-z-]+)"[^>]+class="amtv-embed"', | ||||
|             r'class=\\"amtv-embed\\"[^>]+id=\\"([0-9a-z-]+)\\"', | ||||
|             r'id=\\"([0-9a-z-]+)\\"[^>]+class=\\"amtv-embed\\"', | ||||
|             r'id=(?:\\)?"(amtve-[a-z]-\d+-[a-z]{2})', | ||||
|         ], webpage, 'raw payload') | ||||
|         _, stage_mode, video_id, _ = raw_payload.split('-') | ||||
|  | ||||
|         # TODO: handle s and e stage_mode (live streams and ended live streams) | ||||
|         if stage_mode not in ('s', 'e'): | ||||
|             video_data = self._download_json( | ||||
|                 'https://www.audimedia.tv/api/video/v1/videos/' + video_id, | ||||
|                 video_id, query={ | ||||
|                     'embed[]': ['video_versions', 'thumbnail_image'], | ||||
|                 })['results'] | ||||
|             formats = [] | ||||
|  | ||||
|             stream_url_hls = video_data.get('stream_url_hls') | ||||
|             if stream_url_hls: | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     stream_url_hls, video_id, 'mp4', | ||||
|                     entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) | ||||
|  | ||||
|             stream_url_hds = video_data.get('stream_url_hds') | ||||
|             if stream_url_hds: | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     stream_url_hds + '?hdcore=3.4.0', | ||||
|                     video_id, f4m_id='hds', fatal=False)) | ||||
|  | ||||
|             for video_version in video_data.get('video_versions', []): | ||||
|                 video_version_url = video_version.get('download_url') or video_version.get('stream_url') | ||||
|                 if not video_version_url: | ||||
|                     continue | ||||
|                 f = { | ||||
|                     'url': video_version_url, | ||||
|                     'width': int_or_none(video_version.get('width')), | ||||
|                     'height': int_or_none(video_version.get('height')), | ||||
|                     'abr': int_or_none(video_version.get('audio_bitrate')), | ||||
|                     'vbr': int_or_none(video_version.get('video_bitrate')), | ||||
|                 } | ||||
|                 bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None) | ||||
|                 if bitrate: | ||||
|                     f.update({ | ||||
|                         'format_id': 'http-%s' % bitrate, | ||||
|                     }) | ||||
|                 formats.append(f) | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|             return { | ||||
|                 'id': video_id, | ||||
|                 'title': video_data['title'], | ||||
|                 'description': video_data.get('subtitle'), | ||||
|                 'thumbnail': video_data.get('thumbnail_image', {}).get('file'), | ||||
|                 'timestamp': parse_iso8601(video_data.get('publication_date')), | ||||
|                 'duration': int_or_none(video_data.get('duration')), | ||||
|                 'view_count': int_or_none(video_data.get('view_count')), | ||||
|                 'formats': formats, | ||||
|             } | ||||
							
								
								
									
										73
									
								
								youtube_dl/extractor/audioboom.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								youtube_dl/extractor/audioboom.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,73 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     float_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AudioBoomIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://audioboom.com/posts/7398103-asim-chaudhry', | ||||
|         'md5': '7b00192e593ff227e6a315486979a42d', | ||||
|         'info_dict': { | ||||
|             'id': '7398103', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Asim Chaudhry', | ||||
|             'description': 'md5:2f3fef17dacc2595b5362e1d7d3602fc', | ||||
|             'duration': 4000.99, | ||||
|             'uploader': 'Sue Perkins: An hour or so with...', | ||||
|             'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         clip = None | ||||
|  | ||||
|         clip_store = self._parse_json( | ||||
|             self._html_search_regex( | ||||
|                 r'data-new-clip-store=(["\'])(?P<json>{.+?})\1', | ||||
|                 webpage, 'clip store', default='{}', group='json'), | ||||
|             video_id, fatal=False) | ||||
|         if clip_store: | ||||
|             clips = clip_store.get('clips') | ||||
|             if clips and isinstance(clips, list) and isinstance(clips[0], dict): | ||||
|                 clip = clips[0] | ||||
|  | ||||
|         def from_clip(field): | ||||
|             if clip: | ||||
|                 return clip.get(field) | ||||
|  | ||||
|         audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property( | ||||
|             'audio', webpage, 'audio url') | ||||
|         title = from_clip('title') or self._html_search_meta( | ||||
|             ['og:title', 'og:audio:title', 'audio_title'], webpage) | ||||
|         description = from_clip('description') or clean_html(from_clip('formattedDescription')) or self._og_search_description(webpage) | ||||
|  | ||||
|         duration = float_or_none(from_clip('duration') or self._html_search_meta( | ||||
|             'weibo:audio:duration', webpage)) | ||||
|  | ||||
|         uploader = from_clip('author') or self._html_search_meta( | ||||
|             ['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader') | ||||
|         uploader_url = from_clip('author_url') or self._html_search_meta( | ||||
|             'audioboo:channel', webpage, 'uploader url') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': audio_url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'duration': duration, | ||||
|             'uploader': uploader, | ||||
|             'uploader_url': uploader_url, | ||||
|         } | ||||
							
								
								
									
										145
									
								
								youtube_dl/extractor/audiomack.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										145
									
								
								youtube_dl/extractor/audiomack.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,145 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import itertools | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .soundcloud import SoundcloudIE | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     url_basename, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AudiomackIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)' | ||||
|     IE_NAME = 'audiomack' | ||||
|     _TESTS = [ | ||||
|         # hosted on audiomack | ||||
|         { | ||||
|             'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary', | ||||
|             'info_dict': | ||||
|             { | ||||
|                 'id': '310086', | ||||
|                 'ext': 'mp3', | ||||
|                 'uploader': 'Roosh Williams', | ||||
|                 'title': 'Extraordinary' | ||||
|             } | ||||
|         }, | ||||
|         # audiomack wrapper around soundcloud song | ||||
|         { | ||||
|             'add_ie': ['Soundcloud'], | ||||
|             'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle', | ||||
|             'info_dict': { | ||||
|                 'id': '258901379', | ||||
|                 'ext': 'mp3', | ||||
|                 'description': 'mamba day freestyle for the legend Kobe Bryant ', | ||||
|                 'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]', | ||||
|                 'uploader': 'ILOVEMAKONNEN', | ||||
|                 'upload_date': '20160414', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # URLs end with [uploader name]/[uploader title] | ||||
|         # this title is whatever the user types in, and is rarely | ||||
|         # the proper song title.  Real metadata is in the api response | ||||
|         album_url_tag = self._match_id(url) | ||||
|  | ||||
|         # Request the extended version of the api for extra fields like artist and title | ||||
|         api_response = self._download_json( | ||||
|             'http://www.audiomack.com/api/music/url/song/%s?extended=1&_=%d' % ( | ||||
|                 album_url_tag, time.time()), | ||||
|             album_url_tag) | ||||
|  | ||||
|         # API is inconsistent with errors | ||||
|         if 'url' not in api_response or not api_response['url'] or 'error' in api_response: | ||||
|             raise ExtractorError('Invalid url %s' % url) | ||||
|  | ||||
|         # Audiomack wraps a lot of soundcloud tracks in their branded wrapper | ||||
|         # if so, pass the work off to the soundcloud extractor | ||||
|         if SoundcloudIE.suitable(api_response['url']): | ||||
|             return self.url_result(api_response['url'], SoundcloudIE.ie_key()) | ||||
|  | ||||
|         return { | ||||
|             'id': compat_str(api_response.get('id', album_url_tag)), | ||||
|             'uploader': api_response.get('artist'), | ||||
|             'title': api_response.get('title'), | ||||
|             'url': api_response['url'], | ||||
|         } | ||||
|  | ||||
|  | ||||
| class AudiomackAlbumIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)' | ||||
|     IE_NAME = 'audiomack:album' | ||||
|     _TESTS = [ | ||||
|         # Standard album playlist | ||||
|         { | ||||
|             'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape', | ||||
|             'playlist_count': 15, | ||||
|             'info_dict': | ||||
|             { | ||||
|                 'id': '812251', | ||||
|                 'title': 'Tha Tour: Part 2 (Official Mixtape)' | ||||
|             } | ||||
|         }, | ||||
|         # Album playlist ripped from fakeshoredrive with no metadata | ||||
|         { | ||||
|             'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project', | ||||
|             'info_dict': { | ||||
|                 'title': 'PPP (Pistol P Project)', | ||||
|                 'id': '837572', | ||||
|             }, | ||||
|             'playlist': [{ | ||||
|                 'info_dict': { | ||||
|                     'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)', | ||||
|                     'id': '837577', | ||||
|                     'ext': 'mp3', | ||||
|                     'uploader': 'Lil Herb a.k.a. G Herbo', | ||||
|                 } | ||||
|             }], | ||||
|             'params': { | ||||
|                 'playliststart': 9, | ||||
|                 'playlistend': 9, | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         # URLs end with [uploader name]/[uploader title] | ||||
|         # this title is whatever the user types in, and is rarely | ||||
|         # the proper song title.  Real metadata is in the api response | ||||
|         album_url_tag = self._match_id(url) | ||||
|         result = {'_type': 'playlist', 'entries': []} | ||||
|         # There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata | ||||
|         # Therefore we don't know how many songs the album has and must infi-loop until failure | ||||
|         for track_no in itertools.count(): | ||||
|             # Get song's metadata | ||||
|             api_response = self._download_json( | ||||
|                 'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d' | ||||
|                 % (album_url_tag, track_no, time.time()), album_url_tag, | ||||
|                 note='Querying song information (%d)' % (track_no + 1)) | ||||
|  | ||||
|             # Total failure, only occurs when url is totally wrong | ||||
|             # Won't happen in middle of valid playlist (next case) | ||||
|             if 'url' not in api_response or 'error' in api_response: | ||||
|                 raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url)) | ||||
|             # URL is good but song id doesn't exist - usually means end of playlist | ||||
|             elif not api_response['url']: | ||||
|                 break | ||||
|             else: | ||||
|                 # Pull out the album metadata and add to result (if it exists) | ||||
|                 for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]: | ||||
|                     if apikey in api_response and resultkey not in result: | ||||
|                         result[resultkey] = api_response[apikey] | ||||
|                 song_id = url_basename(api_response['url']).rpartition('.')[0] | ||||
|                 result['entries'].append({ | ||||
|                     'id': compat_str(api_response.get('id', song_id)), | ||||
|                     'uploader': api_response.get('artist'), | ||||
|                     'title': api_response.get('title', song_id), | ||||
|                     'url': api_response['url'], | ||||
|                 }) | ||||
|         return result | ||||
							
								
								
									
										185
									
								
								youtube_dl/extractor/awaan.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										185
									
								
								youtube_dl/extractor/awaan.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,185 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import base64 | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse_urlencode, | ||||
|     compat_str, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     smuggle_url, | ||||
|     unsmuggle_url, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AWAANIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         show_id, video_id, season_id = re.match(self._VALID_URL, url).groups() | ||||
|         if video_id and int(video_id) > 0: | ||||
|             return self.url_result( | ||||
|                 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo') | ||||
|         elif season_id and int(season_id) > 0: | ||||
|             return self.url_result(smuggle_url( | ||||
|                 'http://awaan.ae/program/season/%s' % season_id, | ||||
|                 {'show_id': show_id}), 'AWAANSeason') | ||||
|         else: | ||||
|             return self.url_result( | ||||
|                 'http://awaan.ae/program/%s' % show_id, 'AWAANSeason') | ||||
|  | ||||
|  | ||||
| class AWAANBaseIE(InfoExtractor): | ||||
|     def _parse_video_data(self, video_data, video_id, is_live): | ||||
|         title = video_data.get('title_en') or video_data['title_ar'] | ||||
|         img = video_data.get('img') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': self._live_title(title) if is_live else title, | ||||
|             'description': video_data.get('description_en') or video_data.get('description_ar'), | ||||
|             'thumbnail': 'http://admin.mangomolo.com/analytics/%s' % img if img else None, | ||||
|             'duration': int_or_none(video_data.get('duration')), | ||||
|             'timestamp': parse_iso8601(video_data.get('create_time'), ' '), | ||||
|             'is_live': is_live, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class AWAANVideoIE(AWAANBaseIE): | ||||
|     IE_NAME = 'awaan:video' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375', | ||||
|         'md5': '5f61c33bfc7794315c671a62d43116aa', | ||||
|         'info_dict': | ||||
|         { | ||||
|             'id': '17375', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'رحلة العمر : الحلقة 1', | ||||
|             'description': 'md5:0156e935d870acb8ef0a66d24070c6d6', | ||||
|             'duration': 2041, | ||||
|             'timestamp': 1227504126, | ||||
|             'upload_date': '20081124', | ||||
|             'uploader_id': '71', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         video_data = self._download_json( | ||||
|             'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, | ||||
|             video_id, headers={'Origin': 'http://awaan.ae'}) | ||||
|         info = self._parse_video_data(video_data, video_id, False) | ||||
|  | ||||
|         embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({ | ||||
|             'id': video_data['id'], | ||||
|             'user_id': video_data['user_id'], | ||||
|             'signature': video_data['signature'], | ||||
|             'countries': 'Q0M=', | ||||
|             'filter': 'DENY', | ||||
|         }) | ||||
|         info.update({ | ||||
|             '_type': 'url_transparent', | ||||
|             'url': embed_url, | ||||
|             'ie_key': 'MangomoloVideo', | ||||
|         }) | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class AWAANLiveIE(AWAANBaseIE): | ||||
|     IE_NAME = 'awaan:live' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://awaan.ae/live/6/dubai-tv', | ||||
|         'info_dict': { | ||||
|             'id': '6', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'upload_date': '20150107', | ||||
|             'timestamp': 1420588800, | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         channel_id = self._match_id(url) | ||||
|  | ||||
|         channel_data = self._download_json( | ||||
|             'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id, | ||||
|             channel_id, headers={'Origin': 'http://awaan.ae'}) | ||||
|         info = self._parse_video_data(channel_data, channel_id, True) | ||||
|  | ||||
|         embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({ | ||||
|             'id': base64.b64encode(channel_data['user_id'].encode()).decode(), | ||||
|             'channelid': base64.b64encode(channel_data['id'].encode()).decode(), | ||||
|             'signature': channel_data['signature'], | ||||
|             'countries': 'Q0M=', | ||||
|             'filter': 'DENY', | ||||
|         }) | ||||
|         info.update({ | ||||
|             '_type': 'url_transparent', | ||||
|             'url': embed_url, | ||||
|             'ie_key': 'MangomoloLive', | ||||
|         }) | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class AWAANSeasonIE(InfoExtractor): | ||||
|     IE_NAME = 'awaan:season' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))' | ||||
|     _TEST = { | ||||
|         'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A', | ||||
|         'info_dict': | ||||
|         { | ||||
|             'id': '7910', | ||||
|             'title': 'محاضرات الشيخ الشعراوي', | ||||
|         }, | ||||
|         'playlist_mincount': 27, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|         show_id, season_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         data = {} | ||||
|         if season_id: | ||||
|             data['season'] = season_id | ||||
|             show_id = smuggled_data.get('show_id') | ||||
|             if show_id is None: | ||||
|                 season = self._download_json( | ||||
|                     'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id, | ||||
|                     season_id, headers={'Origin': 'http://awaan.ae'}) | ||||
|                 show_id = season['id'] | ||||
|         data['show_id'] = show_id | ||||
|         show = self._download_json( | ||||
|             'http://admin.mangomolo.com/analytics/index.php/plus/show', | ||||
|             show_id, data=urlencode_postdata(data), headers={ | ||||
|                 'Origin': 'http://awaan.ae', | ||||
|                 'Content-Type': 'application/x-www-form-urlencoded' | ||||
|             }) | ||||
|         if not season_id: | ||||
|             season_id = show['default_season'] | ||||
|         for season in show['seasons']: | ||||
|             if season['id'] == season_id: | ||||
|                 title = season.get('title_en') or season['title_ar'] | ||||
|  | ||||
|                 entries = [] | ||||
|                 for video in show['videos']: | ||||
|                     video_id = compat_str(video['id']) | ||||
|                     entries.append(self.url_result( | ||||
|                         'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id)) | ||||
|  | ||||
|                 return self.playlist_result(entries, season_id, title) | ||||
							
								
								
									
										78
									
								
								youtube_dl/extractor/aws.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								youtube_dl/extractor/aws.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,78 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import hashlib | ||||
| import hmac | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urllib_parse_urlencode | ||||
|  | ||||
|  | ||||
| class AWSIE(InfoExtractor): | ||||
|     _AWS_ALGORITHM = 'AWS4-HMAC-SHA256' | ||||
|     _AWS_REGION = 'us-east-1' | ||||
|  | ||||
|     def _aws_execute_api(self, aws_dict, video_id, query=None): | ||||
|         query = query or {} | ||||
|         amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') | ||||
|         date = amz_date[:8] | ||||
|         headers = { | ||||
|             'Accept': 'application/json', | ||||
|             'Host': self._AWS_PROXY_HOST, | ||||
|             'X-Amz-Date': amz_date, | ||||
|             'X-Api-Key': self._AWS_API_KEY | ||||
|         } | ||||
|         session_token = aws_dict.get('session_token') | ||||
|         if session_token: | ||||
|             headers['X-Amz-Security-Token'] = session_token | ||||
|  | ||||
|         def aws_hash(s): | ||||
|             return hashlib.sha256(s.encode('utf-8')).hexdigest() | ||||
|  | ||||
|         # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html | ||||
|         canonical_querystring = compat_urllib_parse_urlencode(query) | ||||
|         canonical_headers = '' | ||||
|         for header_name, header_value in sorted(headers.items()): | ||||
|             canonical_headers += '%s:%s\n' % (header_name.lower(), header_value) | ||||
|         signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())]) | ||||
|         canonical_request = '\n'.join([ | ||||
|             'GET', | ||||
|             aws_dict['uri'], | ||||
|             canonical_querystring, | ||||
|             canonical_headers, | ||||
|             signed_headers, | ||||
|             aws_hash('') | ||||
|         ]) | ||||
|  | ||||
|         # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html | ||||
|         credential_scope_list = [date, self._AWS_REGION, 'execute-api', 'aws4_request'] | ||||
|         credential_scope = '/'.join(credential_scope_list) | ||||
|         string_to_sign = '\n'.join([self._AWS_ALGORITHM, amz_date, credential_scope, aws_hash(canonical_request)]) | ||||
|  | ||||
|         # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html | ||||
|         def aws_hmac(key, msg): | ||||
|             return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) | ||||
|  | ||||
|         def aws_hmac_digest(key, msg): | ||||
|             return aws_hmac(key, msg).digest() | ||||
|  | ||||
|         def aws_hmac_hexdigest(key, msg): | ||||
|             return aws_hmac(key, msg).hexdigest() | ||||
|  | ||||
|         k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8') | ||||
|         for value in credential_scope_list: | ||||
|             k_signing = aws_hmac_digest(k_signing, value) | ||||
|  | ||||
|         signature = aws_hmac_hexdigest(k_signing, string_to_sign) | ||||
|  | ||||
|         # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html | ||||
|         headers['Authorization'] = ', '.join([ | ||||
|             '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), | ||||
|             'SignedHeaders=%s' % signed_headers, | ||||
|             'Signature=%s' % signature, | ||||
|         ]) | ||||
|  | ||||
|         return self._download_json( | ||||
|             'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), | ||||
|             video_id, headers=headers) | ||||
							
								
								
									
										66
									
								
								youtube_dl/extractor/azmedien.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								youtube_dl/extractor/azmedien.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,66 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .kaltura import KalturaIE | ||||
|  | ||||
|  | ||||
| class AZMedienIE(InfoExtractor): | ||||
|     IE_DESC = 'AZ Medien videos' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?:www\.)? | ||||
|                         (?P<host> | ||||
|                             telezueri\.ch| | ||||
|                             telebaern\.tv| | ||||
|                             telem1\.ch | ||||
|                         )/ | ||||
|                         [^/]+/ | ||||
|                         (?P<id> | ||||
|                             [^/]+-(?P<article_id>\d+) | ||||
|                         ) | ||||
|                         (?: | ||||
|                             \#video= | ||||
|                             (?P<kaltura_id> | ||||
|                                 [_0-9a-z]+ | ||||
|                             ) | ||||
|                         )? | ||||
|                     ''' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569', | ||||
|         'info_dict': { | ||||
|             'id': '1_anruz3wy', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen', | ||||
|             'uploader_id': 'TVOnline', | ||||
|             'upload_date': '20180930', | ||||
|             'timestamp': 1538328802, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1', | ||||
|         'only_matching': True | ||||
|     }] | ||||
|     _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/cb9f2f81ed22e9b47f4ca64ea3cc5a5d13e88d1d' | ||||
|     _PARTNER_ID = '1719221' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         host, display_id, article_id, entry_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         if not entry_id: | ||||
|             entry_id = self._download_json( | ||||
|                 self._API_TEMPL % (host, host.split('.')[0]), display_id, query={ | ||||
|                     'variables': json.dumps({ | ||||
|                         'contextId': 'NewsArticle:' + article_id, | ||||
|                     }), | ||||
|                 })['data']['context']['mainAsset']['video']['kaltura']['kalturaId'] | ||||
|  | ||||
|         return self.url_result( | ||||
|             'kaltura:%s:%s' % (self._PARTNER_ID, entry_id), | ||||
|             ie=KalturaIE.ie_key(), video_id=entry_id) | ||||
							
								
								
									
										56
									
								
								youtube_dl/extractor/baidu.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								youtube_dl/extractor/baidu.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,56 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unescapeHTML | ||||
|  | ||||
|  | ||||
| class BaiduVideoIE(InfoExtractor): | ||||
|     IE_DESC = '百度视频' | ||||
|     _VALID_URL = r'https?://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6', | ||||
|         'info_dict': { | ||||
|             'id': '1069', | ||||
|             'title': '中华小当家 TV版国语', | ||||
|             'description': 'md5:51be07afe461cf99fa61231421b5397c', | ||||
|         }, | ||||
|         'playlist_count': 52, | ||||
|     }, { | ||||
|         'url': 'http://v.baidu.com/show/11595.htm?frp=bdbrand', | ||||
|         'info_dict': { | ||||
|             'id': '11595', | ||||
|             'title': 're:^奔跑吧兄弟', | ||||
|             'description': 'md5:1bf88bad6d850930f542d51547c089b8', | ||||
|         }, | ||||
|         'playlist_mincount': 12, | ||||
|     }] | ||||
|  | ||||
|     def _call_api(self, path, category, playlist_id, note): | ||||
|         return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % ( | ||||
|             path, category, playlist_id), playlist_id, note) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         category, playlist_id = re.match(self._VALID_URL, url).groups() | ||||
|         if category == 'show': | ||||
|             category = 'tvshow' | ||||
|         if category == 'tv': | ||||
|             category = 'tvplay' | ||||
|  | ||||
|         playlist_detail = self._call_api( | ||||
|             'xqinfo', category, playlist_id, 'Download playlist JSON metadata') | ||||
|  | ||||
|         playlist_title = playlist_detail['title'] | ||||
|         playlist_description = unescapeHTML(playlist_detail.get('intro')) | ||||
|  | ||||
|         episodes_detail = self._call_api( | ||||
|             'xqsingle', category, playlist_id, 'Download episodes JSON metadata') | ||||
|  | ||||
|         entries = [self.url_result( | ||||
|             episode['url'], video_title=episode['title'] | ||||
|         ) for episode in episodes_detail['videos']] | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, playlist_id, playlist_title, playlist_description) | ||||
							
								
								
									
										417
									
								
								youtube_dl/extractor/bandcamp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										417
									
								
								youtube_dl/extractor/bandcamp.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,417 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import random | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     KNOWN_EXTENSIONS, | ||||
|     parse_filesize, | ||||
|     str_or_none, | ||||
|     try_get, | ||||
|     unescapeHTML, | ||||
|     update_url_query, | ||||
|     unified_strdate, | ||||
|     unified_timestamp, | ||||
|     url_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BandcampIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://youtube-dlc.bandcamp.com/track/youtube-dlc-test-song', | ||||
|         'md5': 'c557841d5e50261777a6585648adf439', | ||||
|         'info_dict': { | ||||
|             'id': '1812978515', | ||||
|             'ext': 'mp3', | ||||
|             'title': "youtube-dlc  \"'/\\\u00e4\u21ad - youtube-dlc test song \"'/\\\u00e4\u21ad", | ||||
|             'duration': 9.8485, | ||||
|         }, | ||||
|         '_skip': 'There is a limit of 200 free downloads / month for the test song' | ||||
|     }, { | ||||
|         # free download | ||||
|         'url': 'http://benprunty.bandcamp.com/track/lanius-battle', | ||||
|         'md5': '853e35bf34aa1d6fe2615ae612564b36', | ||||
|         'info_dict': { | ||||
|             'id': '2650410135', | ||||
|             'ext': 'aiff', | ||||
|             'title': 'Ben Prunty - Lanius (Battle)', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'uploader': 'Ben Prunty', | ||||
|             'timestamp': 1396508491, | ||||
|             'upload_date': '20140403', | ||||
|             'release_date': '20140403', | ||||
|             'duration': 260.877, | ||||
|             'track': 'Lanius (Battle)', | ||||
|             'track_number': 1, | ||||
|             'track_id': '2650410135', | ||||
|             'artist': 'Ben Prunty', | ||||
|             'album': 'FTL: Advanced Edition Soundtrack', | ||||
|         }, | ||||
|     }, { | ||||
|         # no free download, mp3 128 | ||||
|         'url': 'https://relapsealumni.bandcamp.com/track/hail-to-fire', | ||||
|         'md5': 'fec12ff55e804bb7f7ebeb77a800c8b7', | ||||
|         'info_dict': { | ||||
|             'id': '2584466013', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Mastodon - Hail to Fire', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'uploader': 'Mastodon', | ||||
|             'timestamp': 1322005399, | ||||
|             'upload_date': '20111122', | ||||
|             'release_date': '20040207', | ||||
|             'duration': 120.79, | ||||
|             'track': 'Hail to Fire', | ||||
|             'track_number': 5, | ||||
|             'track_id': '2584466013', | ||||
|             'artist': 'Mastodon', | ||||
|             'album': 'Call of the Mastodon', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         title = mobj.group('title') | ||||
|         webpage = self._download_webpage(url, title) | ||||
|         thumbnail = self._html_search_meta('og:image', webpage, default=None) | ||||
|  | ||||
|         track_id = None | ||||
|         track = None | ||||
|         track_number = None | ||||
|         duration = None | ||||
|  | ||||
|         formats = [] | ||||
|         track_info = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n', | ||||
|                 webpage, 'track info', default='{}'), title) | ||||
|         if track_info: | ||||
|             file_ = track_info.get('file') | ||||
|             if isinstance(file_, dict): | ||||
|                 for format_id, format_url in file_.items(): | ||||
|                     if not url_or_none(format_url): | ||||
|                         continue | ||||
|                     ext, abr_str = format_id.split('-', 1) | ||||
|                     formats.append({ | ||||
|                         'format_id': format_id, | ||||
|                         'url': self._proto_relative_url(format_url, 'http:'), | ||||
|                         'ext': ext, | ||||
|                         'vcodec': 'none', | ||||
|                         'acodec': ext, | ||||
|                         'abr': int_or_none(abr_str), | ||||
|                     }) | ||||
|             track = track_info.get('title') | ||||
|             track_id = str_or_none(track_info.get('track_id') or track_info.get('id')) | ||||
|             track_number = int_or_none(track_info.get('track_num')) | ||||
|             duration = float_or_none(track_info.get('duration')) | ||||
|  | ||||
|         def extract(key): | ||||
|             return self._search_regex( | ||||
|                 r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key, | ||||
|                 webpage, key, default=None, group='value') | ||||
|  | ||||
|         artist = extract('artist') | ||||
|         album = extract('album_title') | ||||
|         timestamp = unified_timestamp( | ||||
|             extract('publish_date') or extract('album_publish_date')) | ||||
|         release_date = unified_strdate(extract('album_release_date')) | ||||
|  | ||||
|         download_link = self._search_regex( | ||||
|             r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, | ||||
|             'download link', default=None, group='url') | ||||
|         if download_link: | ||||
|             track_id = self._search_regex( | ||||
|                 r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$', | ||||
|                 webpage, 'track id') | ||||
|  | ||||
|             download_webpage = self._download_webpage( | ||||
|                 download_link, track_id, 'Downloading free downloads page') | ||||
|  | ||||
|             blob = self._parse_json( | ||||
|                 self._search_regex( | ||||
|                     r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage, | ||||
|                     'blob', group='blob'), | ||||
|                 track_id, transform_source=unescapeHTML) | ||||
|  | ||||
|             info = try_get( | ||||
|                 blob, (lambda x: x['digital_items'][0], | ||||
|                        lambda x: x['download_items'][0]), dict) | ||||
|             if info: | ||||
|                 downloads = info.get('downloads') | ||||
|                 if isinstance(downloads, dict): | ||||
|                     if not track: | ||||
|                         track = info.get('title') | ||||
|                     if not artist: | ||||
|                         artist = info.get('artist') | ||||
|                     if not thumbnail: | ||||
|                         thumbnail = info.get('thumb_url') | ||||
|  | ||||
|                     download_formats = {} | ||||
|                     download_formats_list = blob.get('download_formats') | ||||
|                     if isinstance(download_formats_list, list): | ||||
|                         for f in blob['download_formats']: | ||||
|                             name, ext = f.get('name'), f.get('file_extension') | ||||
|                             if all(isinstance(x, compat_str) for x in (name, ext)): | ||||
|                                 download_formats[name] = ext.strip('.') | ||||
|  | ||||
|                     for format_id, f in downloads.items(): | ||||
|                         format_url = f.get('url') | ||||
|                         if not format_url: | ||||
|                             continue | ||||
|                         # Stat URL generation algorithm is reverse engineered from | ||||
|                         # download_*_bundle_*.js | ||||
|                         stat_url = update_url_query( | ||||
|                             format_url.replace('/download/', '/statdownload/'), { | ||||
|                                 '.rand': int(time.time() * 1000 * random.random()), | ||||
|                             }) | ||||
|                         format_id = f.get('encoding_name') or format_id | ||||
|                         stat = self._download_json( | ||||
|                             stat_url, track_id, 'Downloading %s JSON' % format_id, | ||||
|                             transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1], | ||||
|                             fatal=False) | ||||
|                         if not stat: | ||||
|                             continue | ||||
|                         retry_url = url_or_none(stat.get('retry_url')) | ||||
|                         if not retry_url: | ||||
|                             continue | ||||
|                         formats.append({ | ||||
|                             'url': self._proto_relative_url(retry_url, 'http:'), | ||||
|                             'ext': download_formats.get(format_id), | ||||
|                             'format_id': format_id, | ||||
|                             'format_note': f.get('description'), | ||||
|                             'filesize': parse_filesize(f.get('size_mb')), | ||||
|                             'vcodec': 'none', | ||||
|                         }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = '%s - %s' % (artist, track) if artist else track | ||||
|  | ||||
|         if not duration: | ||||
|             duration = float_or_none(self._html_search_meta( | ||||
|                 'duration', webpage, default=None)) | ||||
|  | ||||
|         return { | ||||
|             'id': track_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': artist, | ||||
|             'timestamp': timestamp, | ||||
|             'release_date': release_date, | ||||
|             'duration': duration, | ||||
|             'track': track, | ||||
|             'track_number': track_number, | ||||
|             'track_id': track_id, | ||||
|             'artist': artist, | ||||
|             'album': album, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class BandcampAlbumIE(InfoExtractor): | ||||
|     IE_NAME = 'Bandcamp:album' | ||||
|     _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', | ||||
|         'playlist': [ | ||||
|             { | ||||
|                 'md5': '39bc1eded3476e927c724321ddf116cf', | ||||
|                 'info_dict': { | ||||
|                     'id': '1353101989', | ||||
|                     'ext': 'mp3', | ||||
|                     'title': 'Intro', | ||||
|                 } | ||||
|             }, | ||||
|             { | ||||
|                 'md5': '1a2c32e2691474643e912cc6cd4bffaa', | ||||
|                 'info_dict': { | ||||
|                     'id': '38097443', | ||||
|                     'ext': 'mp3', | ||||
|                     'title': 'Kero One - Keep It Alive (Blazo remix)', | ||||
|                 } | ||||
|             }, | ||||
|         ], | ||||
|         'info_dict': { | ||||
|             'title': 'Jazz Format Mixtape vol.1', | ||||
|             'id': 'jazz-format-mixtape-vol-1', | ||||
|             'uploader_id': 'blazo', | ||||
|         }, | ||||
|         'params': { | ||||
|             'playlistend': 2 | ||||
|         }, | ||||
|         'skip': 'Bandcamp imposes download limits.' | ||||
|     }, { | ||||
|         'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave', | ||||
|         'info_dict': { | ||||
|             'title': 'Hierophany of the Open Grave', | ||||
|             'uploader_id': 'nightbringer', | ||||
|             'id': 'hierophany-of-the-open-grave', | ||||
|         }, | ||||
|         'playlist_mincount': 9, | ||||
|     }, { | ||||
|         'url': 'http://dotscale.bandcamp.com', | ||||
|         'info_dict': { | ||||
|             'title': 'Loom', | ||||
|             'id': 'dotscale', | ||||
|             'uploader_id': 'dotscale', | ||||
|         }, | ||||
|         'playlist_mincount': 7, | ||||
|     }, { | ||||
|         # with escaped quote in title | ||||
|         'url': 'https://jstrecords.bandcamp.com/album/entropy-ep', | ||||
|         'info_dict': { | ||||
|             'title': '"Entropy" EP', | ||||
|             'uploader_id': 'jstrecords', | ||||
|             'id': 'entropy-ep', | ||||
|         }, | ||||
|         'playlist_mincount': 3, | ||||
|     }, { | ||||
|         # not all tracks have songs | ||||
|         'url': 'https://insulters.bandcamp.com/album/we-are-the-plague', | ||||
|         'info_dict': { | ||||
|             'id': 'we-are-the-plague', | ||||
|             'title': 'WE ARE THE PLAGUE', | ||||
|             'uploader_id': 'insulters', | ||||
|         }, | ||||
|         'playlist_count': 2, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return (False | ||||
|                 if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url) | ||||
|                 else super(BandcampAlbumIE, cls).suitable(url)) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         uploader_id = mobj.group('subdomain') | ||||
|         album_id = mobj.group('album_id') | ||||
|         playlist_id = album_id or uploader_id | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|         track_elements = re.findall( | ||||
|             r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage) | ||||
|         if not track_elements: | ||||
|             raise ExtractorError('The page doesn\'t contain any tracks') | ||||
|         # Only tracks with duration info have songs | ||||
|         entries = [ | ||||
|             self.url_result( | ||||
|                 compat_urlparse.urljoin(url, t_path), | ||||
|                 ie=BandcampIE.ie_key(), | ||||
|                 video_title=self._search_regex( | ||||
|                     r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', | ||||
|                     elem_content, 'track title', fatal=False)) | ||||
|             for elem_content, t_path in track_elements | ||||
|             if self._html_search_meta('duration', elem_content, default=None)] | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"', | ||||
|             webpage, 'title', fatal=False) | ||||
|         if title: | ||||
|             title = title.replace(r'\"', '"') | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'uploader_id': uploader_id, | ||||
|             'id': playlist_id, | ||||
|             'title': title, | ||||
|             'entries': entries, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class BandcampWeeklyIE(InfoExtractor): | ||||
|     IE_NAME = 'Bandcamp:weekly' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://bandcamp.com/?show=224', | ||||
|         'md5': 'b00df799c733cf7e0c567ed187dea0fd', | ||||
|         'info_dict': { | ||||
|             'id': '224', | ||||
|             'ext': 'opus', | ||||
|             'title': 'BC Weekly April 4th 2017 - Magic Moments', | ||||
|             'description': 'md5:5d48150916e8e02d030623a48512c874', | ||||
|             'duration': 5829.77, | ||||
|             'release_date': '20170404', | ||||
|             'series': 'Bandcamp Weekly', | ||||
|             'episode': 'Magic Moments', | ||||
|             'episode_number': 208, | ||||
|             'episode_id': '224', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://bandcamp.com/?blah/blah@&show=228', | ||||
|         'only_matching': True | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         blob = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage, | ||||
|                 'blob', group='blob'), | ||||
|             video_id, transform_source=unescapeHTML) | ||||
|  | ||||
|         show = blob['bcw_show'] | ||||
|  | ||||
|         # This is desired because any invalid show id redirects to `bandcamp.com` | ||||
|         # which happens to expose the latest Bandcamp Weekly episode. | ||||
|         show_id = int_or_none(show.get('show_id')) or int_or_none(video_id) | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id, format_url in show['audio_stream'].items(): | ||||
|             if not url_or_none(format_url): | ||||
|                 continue | ||||
|             for known_ext in KNOWN_EXTENSIONS: | ||||
|                 if known_ext in format_id: | ||||
|                     ext = known_ext | ||||
|                     break | ||||
|             else: | ||||
|                 ext = None | ||||
|             formats.append({ | ||||
|                 'format_id': format_id, | ||||
|                 'url': format_url, | ||||
|                 'ext': ext, | ||||
|                 'vcodec': 'none', | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = show.get('audio_title') or 'Bandcamp Weekly' | ||||
|         subtitle = show.get('subtitle') | ||||
|         if subtitle: | ||||
|             title += ' - %s' % subtitle | ||||
|  | ||||
|         episode_number = None | ||||
|         seq = blob.get('bcw_seq') | ||||
|  | ||||
|         if seq and isinstance(seq, list): | ||||
|             try: | ||||
|                 episode_number = next( | ||||
|                     int_or_none(e.get('episode_number')) | ||||
|                     for e in seq | ||||
|                     if isinstance(e, dict) and int_or_none(e.get('id')) == show_id) | ||||
|             except StopIteration: | ||||
|                 pass | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': show.get('desc') or show.get('short_desc'), | ||||
|             'duration': float_or_none(show.get('audio_duration')), | ||||
|             'is_live': False, | ||||
|             'release_date': unified_strdate(show.get('published_date')), | ||||
|             'series': 'Bandcamp Weekly', | ||||
|             'episode': show.get('subtitle'), | ||||
|             'episode_number': episode_number, | ||||
|             'episode_id': compat_str(video_id), | ||||
|             'formats': formats | ||||
|         } | ||||
							
								
								
									
										1359
									
								
								youtube_dl/extractor/bbc.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1359
									
								
								youtube_dl/extractor/bbc.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										194
									
								
								youtube_dl/extractor/beampro.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										194
									
								
								youtube_dl/extractor/beampro.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,194 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     clean_html, | ||||
|     compat_str, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     try_get, | ||||
|     urljoin, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BeamProBaseIE(InfoExtractor): | ||||
|     _API_BASE = 'https://mixer.com/api/v1' | ||||
|     _RATINGS = {'family': 0, 'teen': 13, '18+': 18} | ||||
|  | ||||
|     def _extract_channel_info(self, chan): | ||||
|         user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id']) | ||||
|         return { | ||||
|             'uploader': chan.get('token') or try_get( | ||||
|                 chan, lambda x: x['user']['username'], compat_str), | ||||
|             'uploader_id': compat_str(user_id) if user_id else None, | ||||
|             'age_limit': self._RATINGS.get(chan.get('audience')), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class BeamProLiveIE(BeamProBaseIE): | ||||
|     IE_NAME = 'Mixer:live' | ||||
|     _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://mixer.com/niterhayven', | ||||
|         'info_dict': { | ||||
|             'id': '261562', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Introducing The Witcher 3 //  The Grind Starts Now!', | ||||
|             'description': 'md5:0b161ac080f15fe05d18a07adb44a74d', | ||||
|             'thumbnail': r're:https://.*\.jpg$', | ||||
|             'timestamp': 1483477281, | ||||
|             'upload_date': '20170103', | ||||
|             'uploader': 'niterhayven', | ||||
|             'uploader_id': '373396', | ||||
|             'age_limit': 18, | ||||
|             'is_live': True, | ||||
|             'view_count': int, | ||||
|         }, | ||||
|         'skip': 'niterhayven is offline', | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     _MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         channel_name = self._match_id(url) | ||||
|  | ||||
|         chan = self._download_json( | ||||
|             '%s/channels/%s' % (self._API_BASE, channel_name), channel_name) | ||||
|  | ||||
|         if chan.get('online') is False: | ||||
|             raise ExtractorError( | ||||
|                 '{0} is offline'.format(channel_name), expected=True) | ||||
|  | ||||
|         channel_id = chan['id'] | ||||
|  | ||||
|         def manifest_url(kind): | ||||
|             return self._MANIFEST_URL_TEMPLATE % (channel_id, kind) | ||||
|  | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls', | ||||
|             fatal=False) | ||||
|         formats.extend(self._extract_smil_formats( | ||||
|             manifest_url('smil'), channel_name, fatal=False)) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         info = { | ||||
|             'id': compat_str(chan.get('id') or channel_name), | ||||
|             'title': self._live_title(chan.get('name') or channel_name), | ||||
|             'description': clean_html(chan.get('description')), | ||||
|             'thumbnail': try_get( | ||||
|                 chan, lambda x: x['thumbnail']['url'], compat_str), | ||||
|             'timestamp': parse_iso8601(chan.get('updatedAt')), | ||||
|             'is_live': True, | ||||
|             'view_count': int_or_none(chan.get('viewersTotal')), | ||||
|             'formats': formats, | ||||
|         } | ||||
|         info.update(self._extract_channel_info(chan)) | ||||
|  | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class BeamProVodIE(BeamProBaseIE): | ||||
|     IE_NAME = 'Mixer:vod' | ||||
|     _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>[^?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://mixer.com/willow8714?vod=2259830', | ||||
|         'md5': 'b2431e6e8347dc92ebafb565d368b76b', | ||||
|         'info_dict': { | ||||
|             'id': '2259830', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'willow8714\'s Channel', | ||||
|             'duration': 6828.15, | ||||
|             'thumbnail': r're:https://.*source\.png$', | ||||
|             'timestamp': 1494046474, | ||||
|             'upload_date': '20170506', | ||||
|             'uploader': 'willow8714', | ||||
|             'uploader_id': '6085379', | ||||
|             'age_limit': 13, | ||||
|             'view_count': int, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_format(vod, vod_type): | ||||
|         if not vod.get('baseUrl'): | ||||
|             return [] | ||||
|  | ||||
|         if vod_type == 'hls': | ||||
|             filename, protocol = 'manifest.m3u8', 'm3u8_native' | ||||
|         elif vod_type == 'raw': | ||||
|             filename, protocol = 'source.mp4', 'https' | ||||
|         else: | ||||
|             assert False | ||||
|  | ||||
|         data = vod.get('data') if isinstance(vod.get('data'), dict) else {} | ||||
|  | ||||
|         format_id = [vod_type] | ||||
|         if isinstance(data.get('Height'), compat_str): | ||||
|             format_id.append('%sp' % data['Height']) | ||||
|  | ||||
|         return [{ | ||||
|             'url': urljoin(vod['baseUrl'], filename), | ||||
|             'format_id': '-'.join(format_id), | ||||
|             'ext': 'mp4', | ||||
|             'protocol': protocol, | ||||
|             'width': int_or_none(data.get('Width')), | ||||
|             'height': int_or_none(data.get('Height')), | ||||
|             'fps': int_or_none(data.get('Fps')), | ||||
|             'tbr': int_or_none(data.get('Bitrate'), 1000), | ||||
|         }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         vod_id = self._match_id(url) | ||||
|  | ||||
|         vod_info = self._download_json( | ||||
|             '%s/recordings/%s' % (self._API_BASE, vod_id), vod_id) | ||||
|  | ||||
|         state = vod_info.get('state') | ||||
|         if state != 'AVAILABLE': | ||||
|             raise ExtractorError( | ||||
|                 'VOD %s is not available (state: %s)' % (vod_id, state), | ||||
|                 expected=True) | ||||
|  | ||||
|         formats = [] | ||||
|         thumbnail_url = None | ||||
|  | ||||
|         for vod in vod_info['vods']: | ||||
|             vod_type = vod.get('format') | ||||
|             if vod_type in ('hls', 'raw'): | ||||
|                 formats.extend(self._extract_format(vod, vod_type)) | ||||
|             elif vod_type == 'thumbnail': | ||||
|                 thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png') | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         info = { | ||||
|             'id': vod_id, | ||||
|             'title': vod_info.get('name') or vod_id, | ||||
|             'duration': float_or_none(vod_info.get('duration')), | ||||
|             'thumbnail': thumbnail_url, | ||||
|             'timestamp': parse_iso8601(vod_info.get('createdAt')), | ||||
|             'view_count': int_or_none(vod_info.get('viewsTotal')), | ||||
|             'formats': formats, | ||||
|         } | ||||
|         info.update(self._extract_channel_info(vod_info.get('channel') or {})) | ||||
|  | ||||
|         return info | ||||
							
								
								
									
										103
									
								
								youtube_dl/extractor/beatport.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								youtube_dl/extractor/beatport.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,103 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class BeatportIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.|pro\.)?beatport\.com/track/(?P<display_id>[^/]+)/(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://beatport.com/track/synesthesia-original-mix/5379371', | ||||
|         'md5': 'b3c34d8639a2f6a7f734382358478887', | ||||
|         'info_dict': { | ||||
|             'id': '5379371', | ||||
|             'display_id': 'synesthesia-original-mix', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Froxic - Synesthesia (Original Mix)', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://beatport.com/track/love-and-war-original-mix/3756896', | ||||
|         'md5': 'e44c3025dfa38c6577fbaeb43da43514', | ||||
|         'info_dict': { | ||||
|             'id': '3756896', | ||||
|             'display_id': 'love-and-war-original-mix', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Wolfgang Gartner - Love & War (Original Mix)', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://beatport.com/track/birds-original-mix/4991738', | ||||
|         'md5': 'a1fd8e8046de3950fd039304c186c05f', | ||||
|         'info_dict': { | ||||
|             'id': '4991738', | ||||
|             'display_id': 'birds-original-mix', | ||||
|             'ext': 'mp4', | ||||
|             'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)", | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         track_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         playables = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'window\.Playables\s*=\s*({.+?});', webpage, | ||||
|                 'playables info', flags=re.DOTALL), | ||||
|             track_id) | ||||
|  | ||||
|         track = next(t for t in playables['tracks'] if t['id'] == int(track_id)) | ||||
|  | ||||
|         title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name'] | ||||
|         if track['mix']: | ||||
|             title += ' (' + track['mix'] + ')' | ||||
|  | ||||
|         formats = [] | ||||
|         for ext, info in track['preview'].items(): | ||||
|             if not info['url']: | ||||
|                 continue | ||||
|             fmt = { | ||||
|                 'url': info['url'], | ||||
|                 'ext': ext, | ||||
|                 'format_id': ext, | ||||
|                 'vcodec': 'none', | ||||
|             } | ||||
|             if ext == 'mp3': | ||||
|                 fmt['preference'] = 0 | ||||
|                 fmt['acodec'] = 'mp3' | ||||
|                 fmt['abr'] = 96 | ||||
|                 fmt['asr'] = 44100 | ||||
|             elif ext == 'mp4': | ||||
|                 fmt['preference'] = 1 | ||||
|                 fmt['acodec'] = 'aac' | ||||
|                 fmt['abr'] = 96 | ||||
|                 fmt['asr'] = 44100 | ||||
|             formats.append(fmt) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         images = [] | ||||
|         for name, info in track['images'].items(): | ||||
|             image_url = info.get('url') | ||||
|             if name == 'dynamic' or not image_url: | ||||
|                 continue | ||||
|             image = { | ||||
|                 'id': name, | ||||
|                 'url': image_url, | ||||
|                 'height': int_or_none(info.get('height')), | ||||
|                 'width': int_or_none(info.get('width')), | ||||
|             } | ||||
|             images.append(image) | ||||
|  | ||||
|         return { | ||||
|             'id': compat_str(track.get('id')) or track_id, | ||||
|             'display_id': track.get('slug') or display_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnails': images, | ||||
|         } | ||||
							
								
								
									
										116
									
								
								youtube_dl/extractor/beeg.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										116
									
								
								youtube_dl/extractor/beeg.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,116 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     unified_timestamp, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BeegIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         # api/v6 v1 | ||||
|         'url': 'http://beeg.com/5416503', | ||||
|         'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820', | ||||
|         'info_dict': { | ||||
|             'id': '5416503', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Sultry Striptease', | ||||
|             'description': 'md5:d22219c09da287c14bed3d6c37ce4bc2', | ||||
|             'timestamp': 1391813355, | ||||
|             'upload_date': '20140207', | ||||
|             'duration': 383, | ||||
|             'tags': list, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     }, { | ||||
|         # api/v6 v2 | ||||
|         'url': 'https://beeg.com/1941093077?t=911-1391', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # api/v6 v2 w/o t | ||||
|         'url': 'https://beeg.com/1277207756', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://beeg.porn/video/5416503', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://beeg.porn/5416503', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         beeg_version = self._search_regex( | ||||
|             r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version', | ||||
|             default='1546225636701') | ||||
|  | ||||
|         if len(video_id) >= 10: | ||||
|             query = { | ||||
|                 'v': 2, | ||||
|             } | ||||
|             qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) | ||||
|             t = qs.get('t', [''])[0].split('-') | ||||
|             if len(t) > 1: | ||||
|                 query.update({ | ||||
|                     's': t[0], | ||||
|                     'e': t[1], | ||||
|                 }) | ||||
|         else: | ||||
|             query = {'v': 1} | ||||
|  | ||||
|         for api_path in ('', 'api.'): | ||||
|             video = self._download_json( | ||||
|                 'https://%sbeeg.com/api/v6/%s/video/%s' | ||||
|                 % (api_path, beeg_version, video_id), video_id, | ||||
|                 fatal=api_path == 'api.', query=query) | ||||
|             if video: | ||||
|                 break | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id, video_url in video.items(): | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             height = self._search_regex( | ||||
|                 r'^(\d+)[pP]$', format_id, 'height', default=None) | ||||
|             if not height: | ||||
|                 continue | ||||
|             formats.append({ | ||||
|                 'url': self._proto_relative_url( | ||||
|                     video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'), | ||||
|                 'format_id': format_id, | ||||
|                 'height': int(height), | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = video['title'] | ||||
|         video_id = compat_str(video.get('id') or video_id) | ||||
|         display_id = video.get('code') | ||||
|         description = video.get('desc') | ||||
|         series = video.get('ps_name') | ||||
|  | ||||
|         timestamp = unified_timestamp(video.get('date')) | ||||
|         duration = int_or_none(video.get('duration')) | ||||
|  | ||||
|         tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'series': series, | ||||
|             'timestamp': timestamp, | ||||
|             'duration': duration, | ||||
|             'tags': tags, | ||||
|             'formats': formats, | ||||
|             'age_limit': self._rta_search(webpage), | ||||
|         } | ||||
							
								
								
									
										46
									
								
								youtube_dl/extractor/behindkink.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								youtube_dl/extractor/behindkink.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,46 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import url_basename | ||||
|  | ||||
|  | ||||
| class BehindKinkIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/', | ||||
|         'md5': '507b57d8fdcd75a41a9a7bdb7989c762', | ||||
|         'info_dict': { | ||||
|             'id': '37127', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'What are you passionate about – Marley Blaze', | ||||
|             'description': 'md5:aee8e9611b4ff70186f752975d9b94b4', | ||||
|             'upload_date': '20141205', | ||||
|             'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             r'<source src="([^"]+)"', webpage, 'video URL') | ||||
|         video_id = url_basename(video_url).split('_')[0] | ||||
|         upload_date = mobj.group('year') + mobj.group('month') + mobj.group('day') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'url': video_url, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'upload_date': upload_date, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
							
								
								
									
										88
									
								
								youtube_dl/extractor/bellmedia.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								youtube_dl/extractor/bellmedia.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class BellMediaIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)? | ||||
|         (?P<domain> | ||||
|             (?: | ||||
|                 ctv| | ||||
|                 tsn| | ||||
|                 bnn(?:bloomberg)?| | ||||
|                 thecomedynetwork| | ||||
|                 discovery| | ||||
|                 discoveryvelocity| | ||||
|                 sciencechannel| | ||||
|                 investigationdiscovery| | ||||
|                 animalplanet| | ||||
|                 bravo| | ||||
|                 mtv| | ||||
|                 space| | ||||
|                 etalk| | ||||
|                 marilyn | ||||
|             )\.ca| | ||||
|             (?:much|cp24)\.com | ||||
|         )/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070', | ||||
|         'md5': '36d3ef559cfe8af8efe15922cd3ce950', | ||||
|         'info_dict': { | ||||
|             'id': '1403070', | ||||
|             'ext': 'flv', | ||||
|             'title': 'David Cockfield\'s Top Picks', | ||||
|             'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3', | ||||
|             'upload_date': '20180525', | ||||
|             'timestamp': 1527288600, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.etalk.ca/video?videoid=663455', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.cp24.com/video?clipId=1982548', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _DOMAINS = { | ||||
|         'thecomedynetwork': 'comedy', | ||||
|         'discoveryvelocity': 'discvel', | ||||
|         'sciencechannel': 'discsci', | ||||
|         'investigationdiscovery': 'invdisc', | ||||
|         'animalplanet': 'aniplan', | ||||
|         'etalk': 'ctv', | ||||
|         'bnnbloomberg': 'bnn', | ||||
|         'marilyn': 'ctv_marilyn', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         domain, video_id = re.match(self._VALID_URL, url).groups() | ||||
|         domain = domain.split('.')[0] | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id), | ||||
|             'ie_key': 'NineCNineMedia', | ||||
|         } | ||||
							
								
								
									
										80
									
								
								youtube_dl/extractor/bet.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								youtube_dl/extractor/bet.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .mtv import MTVServicesInfoExtractor | ||||
| from ..utils import unified_strdate | ||||
|  | ||||
|  | ||||
| class BetIE(MTVServicesInfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html', | ||||
|             'info_dict': { | ||||
|                 'id': '07e96bd3-8850-3051-b856-271b457f0ab8', | ||||
|                 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'A Conversation With President Obama', | ||||
|                 'description': 'President Obama urges persistence in confronting racism and bias.', | ||||
|                 'duration': 1534, | ||||
|                 'upload_date': '20141208', | ||||
|                 'thumbnail': r're:(?i)^https?://.*\.jpg$', | ||||
|                 'subtitles': { | ||||
|                     'en': 'mincount:2', | ||||
|                 } | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html', | ||||
|             'info_dict': { | ||||
|                 'id': '9f516bf1-7543-39c4-8076-dd441b459ba9', | ||||
|                 'display_id': 'justice-for-ferguson-a-community-reacts', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Justice for Ferguson: A Community Reacts', | ||||
|                 'description': 'A BET News special.', | ||||
|                 'duration': 1696, | ||||
|                 'upload_date': '20141125', | ||||
|                 'thumbnail': r're:(?i)^https?://.*\.jpg$', | ||||
|                 'subtitles': { | ||||
|                     'en': 'mincount:2', | ||||
|                 } | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player" | ||||
|  | ||||
|     def _get_feed_query(self, uri): | ||||
|         return { | ||||
|             'uuid': uri, | ||||
|         } | ||||
|  | ||||
|     def _extract_mgid(self, webpage): | ||||
|         return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         mgid = self._extract_mgid(webpage) | ||||
|         videos_info = self._get_videos_info(mgid) | ||||
|  | ||||
|         info_dict = videos_info['entries'][0] | ||||
|  | ||||
|         upload_date = unified_strdate(self._html_search_meta('date', webpage)) | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|  | ||||
|         info_dict.update({ | ||||
|             'display_id': display_id, | ||||
|             'description': description, | ||||
|             'upload_date': upload_date, | ||||
|         }) | ||||
|  | ||||
|         return info_dict | ||||
							
								
								
									
										37
									
								
								youtube_dl/extractor/bfi.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								youtube_dl/extractor/bfi.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import extract_attributes | ||||
|  | ||||
|  | ||||
| class BFIPlayerIE(InfoExtractor): | ||||
|     IE_NAME = 'bfi:player' | ||||
|     _VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P<id>[\w-]+)-online' | ||||
|     _TEST = { | ||||
|         'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online', | ||||
|         'md5': 'e8783ebd8e061ec4bc6e9501ed547de8', | ||||
|         'info_dict': { | ||||
|             'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Computer Doctor', | ||||
|             'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b', | ||||
|         }, | ||||
|         'skip': 'BFI Player films cannot be played outside of the UK', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         entries = [] | ||||
|         for player_el in re.findall(r'(?s)<[^>]+class="player"[^>]*>', webpage): | ||||
|             player_attr = extract_attributes(player_el) | ||||
|             ooyala_id = player_attr.get('data-video-id') | ||||
|             if not ooyala_id: | ||||
|                 continue | ||||
|             entries.append(self.url_result( | ||||
|                 'ooyala:' + ooyala_id, 'Ooyala', | ||||
|                 ooyala_id, player_attr.get('data-label'))) | ||||
|         return self.playlist_result(entries) | ||||
							
								
								
									
										78
									
								
								youtube_dl/extractor/bigflix.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								youtube_dl/extractor/bigflix.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,78 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_b64decode, | ||||
|     compat_urllib_parse_unquote, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BigflixIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         # 2 formats | ||||
|         'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070', | ||||
|         'info_dict': { | ||||
|             'id': '16070', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Madarasapatinam', | ||||
|             'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b', | ||||
|             'formats': 'mincount:2', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         # multiple formats | ||||
|         'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>', | ||||
|             webpage, 'title') | ||||
|  | ||||
|         def decode_url(quoted_b64_url): | ||||
|             return compat_b64decode(compat_urllib_parse_unquote( | ||||
|                 quoted_b64_url)).decode('utf-8') | ||||
|  | ||||
|         formats = [] | ||||
|         for height, encoded_url in re.findall( | ||||
|                 r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage): | ||||
|             video_url = decode_url(encoded_url) | ||||
|             f = { | ||||
|                 'url': video_url, | ||||
|                 'format_id': '%sp' % height, | ||||
|                 'height': int(height), | ||||
|             } | ||||
|             if video_url.startswith('rtmp'): | ||||
|                 f['ext'] = 'flv' | ||||
|             formats.append(f) | ||||
|  | ||||
|         file_url = self._search_regex( | ||||
|             r'file=([^&]+)', webpage, 'video url', default=None) | ||||
|         if file_url: | ||||
|             video_url = decode_url(file_url) | ||||
|             if all(f['url'] != video_url for f in formats): | ||||
|                 formats.append({ | ||||
|                     'url': decode_url(file_url), | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'formats': formats | ||||
|         } | ||||
							
								
								
									
										40
									
								
								youtube_dl/extractor/bild.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								youtube_dl/extractor/bild.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BildIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P<display_id>[^/]+)-(?P<id>\d+)(?:,auto=true)?\.bild\.html' | ||||
|     IE_DESC = 'Bild.de' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html', | ||||
|         'md5': 'dd495cbd99f2413502a1713a1156ac8a', | ||||
|         'info_dict': { | ||||
|             'id': '38184146', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Das können die  neuen iPads', | ||||
|             'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'duration': 196, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         video_data = self._download_json( | ||||
|             url.split('.bild.html')[0] + ',view=json.bild.html', video_id) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': unescapeHTML(video_data['title']).strip(), | ||||
|             'description': unescapeHTML(video_data.get('description')), | ||||
|             'url': video_data['clipList'][0]['srces'][0]['src'], | ||||
|             'thumbnail': video_data.get('poster'), | ||||
|             'duration': int_or_none(video_data.get('durationSec')), | ||||
|         } | ||||
							
								
								
									
										450
									
								
								youtube_dl/extractor/bilibili.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										450
									
								
								youtube_dl/extractor/bilibili.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,450 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import hashlib | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     float_or_none, | ||||
|     parse_iso8601, | ||||
|     smuggle_url, | ||||
|     str_or_none, | ||||
|     strip_jsonp, | ||||
|     unified_timestamp, | ||||
|     unsmuggle_url, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BiliBiliIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?:(?:www|bangumi)\.)? | ||||
|                         bilibili\.(?:tv|com)/ | ||||
|                         (?: | ||||
|                             (?: | ||||
|                                 video/[aA][vV]| | ||||
|                                 anime/(?P<anime_id>\d+)/play\# | ||||
|                             )(?P<id_bv>\d+)| | ||||
|                             video/[bB][vV](?P<id>[^/?#&]+) | ||||
|                         ) | ||||
|                     ''' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.bilibili.tv/video/av1074402/', | ||||
|         'md5': '5f7d29e1a2872f3df0cf76b1f87d3788', | ||||
|         'info_dict': { | ||||
|             'id': '1074402', | ||||
|             'ext': 'flv', | ||||
|             'title': '【金坷垃】金泡沫', | ||||
|             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', | ||||
|             'duration': 308.067, | ||||
|             'timestamp': 1398012678, | ||||
|             'upload_date': '20140420', | ||||
|             'thumbnail': r're:^https?://.+\.jpg', | ||||
|             'uploader': '菊子桑', | ||||
|             'uploader_id': '156160', | ||||
|         }, | ||||
|     }, { | ||||
|         # Tested in BiliBiliBangumiIE | ||||
|         'url': 'http://bangumi.bilibili.com/anime/1869/play#40062', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://bangumi.bilibili.com/anime/5802/play#100643', | ||||
|         'md5': '3f721ad1e75030cc06faf73587cfec57', | ||||
|         'info_dict': { | ||||
|             'id': '100643', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'CHAOS;CHILD', | ||||
|             'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...', | ||||
|         }, | ||||
|         'skip': 'Geo-restricted to China', | ||||
|     }, { | ||||
|         # Title with double quotes | ||||
|         'url': 'http://www.bilibili.com/video/av8903802/', | ||||
|         'info_dict': { | ||||
|             'id': '8903802', | ||||
|             'title': '阿滴英文|英文歌分享#6 "Closer', | ||||
|             'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': '8903802_part1', | ||||
|                 'ext': 'flv', | ||||
|                 'title': '阿滴英文|英文歌分享#6 "Closer', | ||||
|                 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a', | ||||
|                 'uploader': '阿滴英文', | ||||
|                 'uploader_id': '65880958', | ||||
|                 'timestamp': 1488382634, | ||||
|                 'upload_date': '20170301', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True,  # Test metadata only | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': '8903802_part2', | ||||
|                 'ext': 'flv', | ||||
|                 'title': '阿滴英文|英文歌分享#6 "Closer', | ||||
|                 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a', | ||||
|                 'uploader': '阿滴英文', | ||||
|                 'uploader_id': '65880958', | ||||
|                 'timestamp': 1488382634, | ||||
|                 'upload_date': '20170301', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True,  # Test metadata only | ||||
|             }, | ||||
|         }] | ||||
|     }, { | ||||
|         # new BV video id format | ||||
|         'url': 'https://www.bilibili.com/video/BV1JE411F741', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _APP_KEY = 'iVGUTjsxvpLeuDCf' | ||||
|     _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt' | ||||
|  | ||||
|     def _report_error(self, result): | ||||
|         if 'message' in result: | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True) | ||||
|         elif 'code' in result: | ||||
|             raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True) | ||||
|         else: | ||||
|             raise ExtractorError('Can\'t extract Bangumi episode ID') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|  | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') or mobj.group('id_bv') | ||||
|         anime_id = mobj.group('anime_id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         if 'anime/' not in url: | ||||
|             cid = self._search_regex( | ||||
|                 r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid', | ||||
|                 default=None | ||||
|             ) or compat_parse_qs(self._search_regex( | ||||
|                 [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', | ||||
|                  r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)', | ||||
|                  r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], | ||||
|                 webpage, 'player parameters'))['cid'][0] | ||||
|         else: | ||||
|             if 'no_bangumi_tip' not in smuggled_data: | ||||
|                 self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dlc with %s' % ( | ||||
|                     video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id))) | ||||
|             headers = { | ||||
|                 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | ||||
|                 'Referer': url | ||||
|             } | ||||
|             headers.update(self.geo_verification_headers()) | ||||
|  | ||||
|             js = self._download_json( | ||||
|                 'http://bangumi.bilibili.com/web_api/get_source', video_id, | ||||
|                 data=urlencode_postdata({'episode_id': video_id}), | ||||
|                 headers=headers) | ||||
|             if 'result' not in js: | ||||
|                 self._report_error(js) | ||||
|             cid = js['result']['cid'] | ||||
|  | ||||
|         headers = { | ||||
|             'Referer': url | ||||
|         } | ||||
|         headers.update(self.geo_verification_headers()) | ||||
|  | ||||
|         entries = [] | ||||
|  | ||||
|         RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4') | ||||
|         for num, rendition in enumerate(RENDITIONS, start=1): | ||||
|             payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition) | ||||
|             sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest() | ||||
|  | ||||
|             video_info = self._download_json( | ||||
|                 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign), | ||||
|                 video_id, note='Downloading video info page', | ||||
|                 headers=headers, fatal=num == len(RENDITIONS)) | ||||
|  | ||||
|             if not video_info: | ||||
|                 continue | ||||
|  | ||||
|             if 'durl' not in video_info: | ||||
|                 if num < len(RENDITIONS): | ||||
|                     continue | ||||
|                 self._report_error(video_info) | ||||
|  | ||||
|             for idx, durl in enumerate(video_info['durl']): | ||||
|                 formats = [{ | ||||
|                     'url': durl['url'], | ||||
|                     'filesize': int_or_none(durl['size']), | ||||
|                 }] | ||||
|                 for backup_url in durl.get('backup_url', []): | ||||
|                     formats.append({ | ||||
|                         'url': backup_url, | ||||
|                         # backup URLs have lower priorities | ||||
|                         'preference': -2 if 'hd.mp4' in backup_url else -3, | ||||
|                     }) | ||||
|  | ||||
|                 for a_format in formats: | ||||
|                     a_format.setdefault('http_headers', {}).update({ | ||||
|                         'Referer': url, | ||||
|                     }) | ||||
|  | ||||
|                 self._sort_formats(formats) | ||||
|  | ||||
|                 entries.append({ | ||||
|                     'id': '%s_part%s' % (video_id, idx), | ||||
|                     'duration': float_or_none(durl.get('length'), 1000), | ||||
|                     'formats': formats, | ||||
|                 }) | ||||
|             break | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             ('<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1', | ||||
|              '(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title', | ||||
|             group='title') | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|         timestamp = unified_timestamp(self._html_search_regex( | ||||
|             r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', | ||||
|             default=None) or self._html_search_meta( | ||||
|             'uploadDate', webpage, 'timestamp', default=None)) | ||||
|         thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage) | ||||
|  | ||||
|         # TODO 'view_count' requires deobfuscating Javascript | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'timestamp': timestamp, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': float_or_none(video_info.get('timelength'), scale=1000), | ||||
|         } | ||||
|  | ||||
|         uploader_mobj = re.search( | ||||
|             r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>(?P<name>[^<]+)', | ||||
|             webpage) | ||||
|         if uploader_mobj: | ||||
|             info.update({ | ||||
|                 'uploader': uploader_mobj.group('name'), | ||||
|                 'uploader_id': uploader_mobj.group('id'), | ||||
|             }) | ||||
|         if not info.get('uploader'): | ||||
|             info['uploader'] = self._html_search_meta( | ||||
|                 'author', webpage, 'uploader', default=None) | ||||
|  | ||||
|         for entry in entries: | ||||
|             entry.update(info) | ||||
|  | ||||
|         if len(entries) == 1: | ||||
|             return entries[0] | ||||
|         else: | ||||
|             for idx, entry in enumerate(entries): | ||||
|                 entry['id'] = '%s_part%d' % (video_id, (idx + 1)) | ||||
|  | ||||
|             return { | ||||
|                 '_type': 'multi_video', | ||||
|                 'id': video_id, | ||||
|                 'title': title, | ||||
|                 'description': description, | ||||
|                 'entries': entries, | ||||
|             } | ||||
|  | ||||
|  | ||||
| class BiliBiliBangumiIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)' | ||||
|  | ||||
|     IE_NAME = 'bangumi.bilibili.com' | ||||
|     IE_DESC = 'BiliBili番剧' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://bangumi.bilibili.com/anime/1869', | ||||
|         'info_dict': { | ||||
|             'id': '1869', | ||||
|             'title': '混沌武士', | ||||
|             'description': 'md5:6a9622b911565794c11f25f81d6a97d2', | ||||
|         }, | ||||
|         'playlist_count': 26, | ||||
|     }, { | ||||
|         'url': 'http://bangumi.bilibili.com/anime/1869', | ||||
|         'info_dict': { | ||||
|             'id': '1869', | ||||
|             'title': '混沌武士', | ||||
|             'description': 'md5:6a9622b911565794c11f25f81d6a97d2', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'md5': '91da8621454dd58316851c27c68b0c13', | ||||
|             'info_dict': { | ||||
|                 'id': '40062', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '混沌武士', | ||||
|                 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...', | ||||
|                 'timestamp': 1414538739, | ||||
|                 'upload_date': '20141028', | ||||
|                 'episode': '疾风怒涛 Tempestuous Temperaments', | ||||
|                 'episode_number': 1, | ||||
|             }, | ||||
|         }], | ||||
|         'params': { | ||||
|             'playlist_items': '1', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         bangumi_id = self._match_id(url) | ||||
|  | ||||
|         # Sometimes this API returns a JSONP response | ||||
|         season_info = self._download_json( | ||||
|             'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id, | ||||
|             bangumi_id, transform_source=strip_jsonp)['result'] | ||||
|  | ||||
|         entries = [{ | ||||
|             '_type': 'url_transparent', | ||||
|             'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}), | ||||
|             'ie_key': BiliBiliIE.ie_key(), | ||||
|             'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '), | ||||
|             'episode': episode.get('index_title'), | ||||
|             'episode_number': int_or_none(episode.get('index')), | ||||
|         } for episode in season_info['episodes']] | ||||
|  | ||||
|         entries = sorted(entries, key=lambda entry: entry.get('episode_number')) | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, bangumi_id, | ||||
|             season_info.get('bangumi_title'), season_info.get('evaluate')) | ||||
|  | ||||
|  | ||||
| class BilibiliAudioBaseIE(InfoExtractor): | ||||
|     def _call_api(self, path, sid, query=None): | ||||
|         if not query: | ||||
|             query = {'sid': sid} | ||||
|         return self._download_json( | ||||
|             'https://www.bilibili.com/audio/music-service-c/web/' + path, | ||||
|             sid, query=query)['data'] | ||||
|  | ||||
|  | ||||
| class BilibiliAudioIE(BilibiliAudioBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://www.bilibili.com/audio/au1003142', | ||||
|         'md5': 'fec4987014ec94ef9e666d4d158ad03b', | ||||
|         'info_dict': { | ||||
|             'id': '1003142', | ||||
|             'ext': 'm4a', | ||||
|             'title': '【tsukimi】YELLOW / 神山羊', | ||||
|             'artist': 'tsukimi', | ||||
|             'comment_count': int, | ||||
|             'description': 'YELLOW的mp3版!', | ||||
|             'duration': 183, | ||||
|             'subtitles': { | ||||
|                 'origin': [{ | ||||
|                     'ext': 'lrc', | ||||
|                 }], | ||||
|             }, | ||||
|             'thumbnail': r're:^https?://.+\.jpg', | ||||
|             'timestamp': 1564836614, | ||||
|             'upload_date': '20190803', | ||||
|             'uploader': 'tsukimi-つきみぐー', | ||||
|             'view_count': int, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         au_id = self._match_id(url) | ||||
|  | ||||
|         play_data = self._call_api('url', au_id) | ||||
|         formats = [{ | ||||
|             'url': play_data['cdns'][0], | ||||
|             'filesize': int_or_none(play_data.get('size')), | ||||
|         }] | ||||
|  | ||||
|         song = self._call_api('song/info', au_id) | ||||
|         title = song['title'] | ||||
|         statistic = song.get('statistic') or {} | ||||
|  | ||||
|         subtitles = None | ||||
|         lyric = song.get('lyric') | ||||
|         if lyric: | ||||
|             subtitles = { | ||||
|                 'origin': [{ | ||||
|                     'url': lyric, | ||||
|                 }] | ||||
|             } | ||||
|  | ||||
|         return { | ||||
|             'id': au_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'artist': song.get('author'), | ||||
|             'comment_count': int_or_none(statistic.get('comment')), | ||||
|             'description': song.get('intro'), | ||||
|             'duration': int_or_none(song.get('duration')), | ||||
|             'subtitles': subtitles, | ||||
|             'thumbnail': song.get('cover'), | ||||
|             'timestamp': int_or_none(song.get('passtime')), | ||||
|             'uploader': song.get('uname'), | ||||
|             'view_count': int_or_none(statistic.get('play')), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class BilibiliAudioAlbumIE(BilibiliAudioBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://www.bilibili.com/audio/am10624', | ||||
|         'info_dict': { | ||||
|             'id': '10624', | ||||
|             'title': '每日新曲推荐(每日11:00更新)', | ||||
|             'description': '每天11:00更新,为你推送最新音乐', | ||||
|         }, | ||||
|         'playlist_count': 19, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         am_id = self._match_id(url) | ||||
|  | ||||
|         songs = self._call_api( | ||||
|             'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data'] | ||||
|  | ||||
|         entries = [] | ||||
|         for song in songs: | ||||
|             sid = str_or_none(song.get('id')) | ||||
|             if not sid: | ||||
|                 continue | ||||
|             entries.append(self.url_result( | ||||
|                 'https://www.bilibili.com/audio/au' + sid, | ||||
|                 BilibiliAudioIE.ie_key(), sid)) | ||||
|  | ||||
|         if entries: | ||||
|             album_data = self._call_api('menu/info', am_id) or {} | ||||
|             album_title = album_data.get('title') | ||||
|             if album_title: | ||||
|                 for entry in entries: | ||||
|                     entry['album'] = album_title | ||||
|                 return self.playlist_result( | ||||
|                     entries, am_id, album_title, album_data.get('intro')) | ||||
|  | ||||
|         return self.playlist_result(entries, am_id) | ||||
|  | ||||
|  | ||||
| class BiliBiliPlayerIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1', | ||||
|         'only_matching': True, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         return self.url_result( | ||||
|             'http://www.bilibili.tv/video/av%s/' % video_id, | ||||
|             ie=BiliBiliIE.ie_key(), video_id=video_id) | ||||
							
								
								
									
										86
									
								
								youtube_dl/extractor/biobiochiletv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								youtube_dl/extractor/biobiochiletv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,86 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     remove_end, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BioBioChileTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:tv|www)\.biobiochile\.cl/(?:notas|noticias)/(?:[^/]+/)+(?P<id>[^/]+)\.shtml' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml', | ||||
|         'md5': '26f51f03cf580265defefb4518faec09', | ||||
|         'info_dict': { | ||||
|             'id': 'sobre-camaras-y-camarillas-parlamentarias', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Sobre Cámaras y camarillas parlamentarias', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'uploader': 'Fernando Atria', | ||||
|         }, | ||||
|         'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html', | ||||
|     }, { | ||||
|         # different uploader layout | ||||
|         'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml', | ||||
|         'md5': 'edc2e6b58974c46d5b047dea3c539ff3', | ||||
|         'info_dict': { | ||||
|             'id': 'natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Natalia Valdebenito repasa a diputado Hasbún: Pasó a la categoría de hablar brutalidades', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'uploader': 'Piangella Obrador', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html', | ||||
|     }, { | ||||
|         'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml', | ||||
|         'info_dict': { | ||||
|             'id': 'b4xd0LK3SK', | ||||
|             'ext': 'mp4', | ||||
|             # TODO: fix url_transparent information overriding | ||||
|             # 'uploader': 'Juan Pablo Echenique', | ||||
|             'title': 'Comentario Oscar Cáceres', | ||||
|         }, | ||||
|         'params': { | ||||
|             # empty m3u8 manifest | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://tv.biobiochile.cl/notas/2015/10/21/exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo.shtml', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         rudo_url = self._search_regex( | ||||
|             r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', | ||||
|             webpage, 'embed URL', None, group='url') | ||||
|         if not rudo_url: | ||||
|             raise ExtractorError('No videos found') | ||||
|  | ||||
|         title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV') | ||||
|  | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         uploader = self._html_search_regex( | ||||
|             r'<a[^>]+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': rudo_url, | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': uploader, | ||||
|         } | ||||
							
								
								
									
										105
									
								
								youtube_dl/extractor/biqle.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										105
									
								
								youtube_dl/extractor/biqle.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,105 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .vk import VKIE | ||||
| from ..compat import ( | ||||
|     compat_b64decode, | ||||
|     compat_urllib_parse_unquote, | ||||
| ) | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class BIQLEIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)' | ||||
|     _TESTS = [{ | ||||
|         # Youtube embed | ||||
|         'url': 'https://biqle.ru/watch/-115995369_456239081', | ||||
|         'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06', | ||||
|         'info_dict': { | ||||
|             'id': '8v4f-avW-VI', | ||||
|             'ext': 'mp4', | ||||
|             'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer", | ||||
|             'description': 'Passe-Partout', | ||||
|             'uploader_id': 'mrsimpsonstef3', | ||||
|             'uploader': 'Phanolito', | ||||
|             'upload_date': '20120822', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://biqle.org/watch/-44781847_168547604', | ||||
|         'md5': '7f24e72af1db0edf7c1aaba513174f97', | ||||
|         'info_dict': { | ||||
|             'id': '-44781847_168547604', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Ребенок в шоке от автоматической мойки', | ||||
|             'timestamp': 1396633454, | ||||
|             'uploader': 'Dmitry Kotov', | ||||
|             'upload_date': '20140404', | ||||
|             'uploader_id': '47850140', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         embed_url = self._proto_relative_url(self._search_regex( | ||||
|             r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>', | ||||
|             webpage, 'embed url')) | ||||
|         if VKIE.suitable(embed_url): | ||||
|             return self.url_result(embed_url, VKIE.ie_key(), video_id) | ||||
|  | ||||
|         embed_page = self._download_webpage( | ||||
|             embed_url, video_id, headers={'Referer': url}) | ||||
|         video_ext = self._get_cookies(embed_url).get('video_ext') | ||||
|         if video_ext: | ||||
|             video_ext = compat_urllib_parse_unquote(video_ext.value) | ||||
|         if not video_ext: | ||||
|             video_ext = compat_b64decode(self._search_regex( | ||||
|                 r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)', | ||||
|                 embed_page, 'video_ext')).decode() | ||||
|         video_id, sig, _, access_token = video_ext.split(':') | ||||
|         item = self._download_json( | ||||
|             'https://api.vk.com/method/video.get', video_id, | ||||
|             headers={'User-Agent': 'okhttp/3.4.1'}, query={ | ||||
|                 'access_token': access_token, | ||||
|                 'sig': sig, | ||||
|                 'v': 5.44, | ||||
|                 'videos': video_id, | ||||
|             })['response']['items'][0] | ||||
|         title = item['title'] | ||||
|  | ||||
|         formats = [] | ||||
|         for f_id, f_url in item.get('files', {}).items(): | ||||
|             if f_id == 'external': | ||||
|                 return self.url_result(f_url) | ||||
|             ext, height = f_id.split('_') | ||||
|             formats.append({ | ||||
|                 'format_id': height + 'p', | ||||
|                 'url': f_url, | ||||
|                 'height': int_or_none(height), | ||||
|                 'ext': ext, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnails = [] | ||||
|         for k, v in item.items(): | ||||
|             if k.startswith('photo_') and v: | ||||
|                 width = k.replace('photo_', '') | ||||
|                 thumbnails.append({ | ||||
|                     'id': width, | ||||
|                     'url': v, | ||||
|                     'width': int_or_none(width), | ||||
|                 }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'comment_count': int_or_none(item.get('comments')), | ||||
|             'description': item.get('description'), | ||||
|             'duration': int_or_none(item.get('duration')), | ||||
|             'thumbnails': thumbnails, | ||||
|             'timestamp': int_or_none(item.get('date')), | ||||
|             'uploader': item.get('owner_id'), | ||||
|             'view_count': int_or_none(item.get('views')), | ||||
|         } | ||||
							
								
								
									
										142
									
								
								youtube_dl/extractor/bitchute.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										142
									
								
								youtube_dl/extractor/bitchute.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,142 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import itertools | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     orderedSet, | ||||
|     unified_strdate, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BitChuteIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.bitchute.com/video/szoMrox2JEI/', | ||||
|         'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb', | ||||
|         'info_dict': { | ||||
|             'id': 'szoMrox2JEI', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Fuck bitches get money', | ||||
|             'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'uploader': 'Victoria X Rave', | ||||
|             'upload_date': '20170813', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'https://www.bitchute.com/video/%s' % video_id, video_id, headers={ | ||||
|                 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', | ||||
|             }) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'), | ||||
|             webpage, 'title', default=None) or self._html_search_meta( | ||||
|             'description', webpage, 'title', | ||||
|             default=None) or self._og_search_description(webpage) | ||||
|  | ||||
|         format_urls = [] | ||||
|         for mobj in re.finditer( | ||||
|                 r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage): | ||||
|             format_urls.append(mobj.group('url')) | ||||
|         format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage)) | ||||
|  | ||||
|         formats = [ | ||||
|             {'url': format_url} | ||||
|             for format_url in orderedSet(format_urls)] | ||||
|  | ||||
|         if not formats: | ||||
|             formats = self._parse_html5_media_entries( | ||||
|                 url, webpage, video_id)[0]['formats'] | ||||
|  | ||||
|         self._check_formats(formats, video_id) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         description = self._html_search_regex( | ||||
|             r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>', | ||||
|             webpage, 'description', fatal=False) | ||||
|         thumbnail = self._og_search_thumbnail( | ||||
|             webpage, default=None) or self._html_search_meta( | ||||
|             'twitter:image:src', webpage, 'thumbnail') | ||||
|         uploader = self._html_search_regex( | ||||
|             (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>', | ||||
|              r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'), | ||||
|             webpage, 'uploader', fatal=False) | ||||
|  | ||||
|         upload_date = unified_strdate(self._search_regex( | ||||
|             r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.', | ||||
|             webpage, 'upload date', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'uploader': uploader, | ||||
|             'upload_date': upload_date, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class BitChuteChannelIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)' | ||||
|     _TEST = { | ||||
|         'url': 'https://www.bitchute.com/channel/victoriaxrave/', | ||||
|         'playlist_mincount': 185, | ||||
|         'info_dict': { | ||||
|             'id': 'victoriaxrave', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' | ||||
|  | ||||
|     def _entries(self, channel_id): | ||||
|         channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id | ||||
|         offset = 0 | ||||
|         for page_num in itertools.count(1): | ||||
|             data = self._download_json( | ||||
|                 '%sextend/' % channel_url, channel_id, | ||||
|                 'Downloading channel page %d' % page_num, | ||||
|                 data=urlencode_postdata({ | ||||
|                     'csrfmiddlewaretoken': self._TOKEN, | ||||
|                     'name': '', | ||||
|                     'offset': offset, | ||||
|                 }), headers={ | ||||
|                     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | ||||
|                     'Referer': channel_url, | ||||
|                     'X-Requested-With': 'XMLHttpRequest', | ||||
|                     'Cookie': 'csrftoken=%s' % self._TOKEN, | ||||
|                 }) | ||||
|             if data.get('success') is False: | ||||
|                 break | ||||
|             html = data.get('html') | ||||
|             if not html: | ||||
|                 break | ||||
|             video_ids = re.findall( | ||||
|                 r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)', | ||||
|                 html) | ||||
|             if not video_ids: | ||||
|                 break | ||||
|             offset += len(video_ids) | ||||
|             for video_id in video_ids: | ||||
|                 yield self.url_result( | ||||
|                     'https://www.bitchute.com/video/%s' % video_id, | ||||
|                     ie=BitChuteIE.ie_key(), video_id=video_id) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         channel_id = self._match_id(url) | ||||
|         return self.playlist_result( | ||||
|             self._entries(channel_id), playlist_id=channel_id) | ||||
							
								
								
									
										106
									
								
								youtube_dl/extractor/bleacherreport.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										106
									
								
								youtube_dl/extractor/bleacherreport.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,106 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .amp import AMPIE | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BleacherReportIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football', | ||||
|         'md5': 'a3ffc3dc73afdbc2010f02d98f990f20', | ||||
|         'info_dict': { | ||||
|             'id': '2496438', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?', | ||||
|             'uploader_id': 3992341, | ||||
|             'description': 'CFB, ACC, Florida State', | ||||
|             'timestamp': 1434380212, | ||||
|             'upload_date': '20150615', | ||||
|             'uploader': 'Team Stream Now ', | ||||
|         }, | ||||
|         'add_ie': ['Ooyala'], | ||||
|     }, { | ||||
|         'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo', | ||||
|         'md5': '6a5cd403418c7b01719248ca97fb0692', | ||||
|         'info_dict': { | ||||
|             'id': '2586817', | ||||
|             'ext': 'webm', | ||||
|             'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo', | ||||
|             'timestamp': 1446839961, | ||||
|             'uploader': 'Sean Fay', | ||||
|             'description': 'md5:b1601e2314c4d8eec23b6eafe086a757', | ||||
|             'uploader_id': 6466954, | ||||
|             'upload_date': '20151011', | ||||
|         }, | ||||
|         'add_ie': ['Youtube'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         article_id = self._match_id(url) | ||||
|  | ||||
|         article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article'] | ||||
|  | ||||
|         thumbnails = [] | ||||
|         primary_photo = article_data.get('primaryPhoto') | ||||
|         if primary_photo: | ||||
|             thumbnails = [{ | ||||
|                 'url': primary_photo['url'], | ||||
|                 'width': primary_photo.get('width'), | ||||
|                 'height': primary_photo.get('height'), | ||||
|             }] | ||||
|  | ||||
|         info = { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': article_id, | ||||
|             'title': article_data['title'], | ||||
|             'uploader': article_data.get('author', {}).get('name'), | ||||
|             'uploader_id': article_data.get('authorId'), | ||||
|             'timestamp': parse_iso8601(article_data.get('createdAt')), | ||||
|             'thumbnails': thumbnails, | ||||
|             'comment_count': int_or_none(article_data.get('commentsCount')), | ||||
|             'view_count': int_or_none(article_data.get('hitCount')), | ||||
|         } | ||||
|  | ||||
|         video = article_data.get('video') | ||||
|         if video: | ||||
|             video_type = video['type'] | ||||
|             if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'): | ||||
|                 info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id'] | ||||
|             elif video_type == 'ooyala.com': | ||||
|                 info['url'] = 'ooyala:%s' % video['id'] | ||||
|             elif video_type == 'youtube.com': | ||||
|                 info['url'] = video['id'] | ||||
|             elif video_type == 'vine.co': | ||||
|                 info['url'] = 'https://vine.co/v/%s' % video['id'] | ||||
|             else: | ||||
|                 info['url'] = video_type + video['id'] | ||||
|             return info | ||||
|         else: | ||||
|             raise ExtractorError('no video in the article', expected=True) | ||||
|  | ||||
|  | ||||
| class BleacherReportCMSIE(AMPIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms', | ||||
|         'md5': '2e4b0a997f9228ffa31fada5c53d1ed1', | ||||
|         'info_dict': { | ||||
|             'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Cena vs. Rollins Would Expose the Heavyweight Division', | ||||
|             'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id) | ||||
|         info['id'] = video_id | ||||
|         return info | ||||
							
								
								
									
										86
									
								
								youtube_dl/extractor/blinkx.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								youtube_dl/extractor/blinkx.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,86 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     remove_start, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BlinkxIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' | ||||
|     IE_NAME = 'blinkx' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ', | ||||
|         'md5': '337cf7a344663ec79bf93a526a2e06c7', | ||||
|         'info_dict': { | ||||
|             'id': 'Da0Gw3xc', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News', | ||||
|             'uploader': 'IGN News', | ||||
|             'upload_date': '20150217', | ||||
|             'timestamp': 1424215740, | ||||
|             'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.', | ||||
|             'duration': 47.743333, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         display_id = video_id[:8] | ||||
|  | ||||
|         api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' | ||||
|                    + 'video=%s' % video_id) | ||||
|         data_json = self._download_webpage(api_url, display_id) | ||||
|         data = json.loads(data_json)['api']['results'][0] | ||||
|         duration = None | ||||
|         thumbnails = [] | ||||
|         formats = [] | ||||
|         for m in data['media']: | ||||
|             if m['type'] == 'jpg': | ||||
|                 thumbnails.append({ | ||||
|                     'url': m['link'], | ||||
|                     'width': int(m['w']), | ||||
|                     'height': int(m['h']), | ||||
|                 }) | ||||
|             elif m['type'] == 'original': | ||||
|                 duration = float(m['d']) | ||||
|             elif m['type'] == 'youtube': | ||||
|                 yt_id = m['link'] | ||||
|                 self.to_screen('Youtube video detected: %s' % yt_id) | ||||
|                 return self.url_result(yt_id, 'Youtube', video_id=yt_id) | ||||
|             elif m['type'] in ('flv', 'mp4'): | ||||
|                 vcodec = remove_start(m['vcodec'], 'ff') | ||||
|                 acodec = remove_start(m['acodec'], 'ff') | ||||
|                 vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000) | ||||
|                 abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000) | ||||
|                 tbr = vbr + abr if vbr and abr else None | ||||
|                 format_id = '%s-%sk-%s' % (vcodec, tbr, m['w']) | ||||
|                 formats.append({ | ||||
|                     'format_id': format_id, | ||||
|                     'url': m['link'], | ||||
|                     'vcodec': vcodec, | ||||
|                     'acodec': acodec, | ||||
|                     'abr': abr, | ||||
|                     'vbr': vbr, | ||||
|                     'tbr': tbr, | ||||
|                     'width': int_or_none(m.get('w')), | ||||
|                     'height': int_or_none(m.get('h')), | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': display_id, | ||||
|             'fullid': video_id, | ||||
|             'title': data['title'], | ||||
|             'formats': formats, | ||||
|             'uploader': data['channel_name'], | ||||
|             'timestamp': data['pubdate_epoch'], | ||||
|             'description': data.get('description'), | ||||
|             'thumbnails': thumbnails, | ||||
|             'duration': duration, | ||||
|         } | ||||
							
								
								
									
										83
									
								
								youtube_dl/extractor/bloomberg.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								youtube_dl/extractor/bloomberg.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,83 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class BloombergIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bloomberg\.com/(?:[^/]+/)*(?P<id>[^/?#]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2', | ||||
|         # The md5 checksum changes | ||||
|         'info_dict': { | ||||
|             'id': 'qurhIVlJSB6hzkVi229d8g', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', | ||||
|             'description': 'md5:a8ba0302912d03d246979735c17d2761', | ||||
|         }, | ||||
|         'params': { | ||||
|             'format': 'best[format_id^=hds]', | ||||
|         }, | ||||
|     }, { | ||||
|         # video ID in BPlayer(...) | ||||
|         'url': 'http://www.bloomberg.com/features/2016-hello-world-new-zealand/', | ||||
|         'info_dict': { | ||||
|             'id': '938c7e72-3f25-4ddb-8b85-a9be731baa74', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Meet the Real-Life Tech Wizards of Middle Earth', | ||||
|             'description': 'Hello World, Episode 1: New Zealand’s freaky AI babies, robot exoskeletons, and a virtual you.', | ||||
|         }, | ||||
|         'params': { | ||||
|             'format': 'best[format_id^=hds]', | ||||
|         }, | ||||
|     }, { | ||||
|         # data-bmmrid= | ||||
|         'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.bloomberg.com/politics/videos/2015-11-25/karl-rove-on-jeb-bush-s-struggles-stopping-trump', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         name = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         video_id = self._search_regex( | ||||
|             (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', | ||||
|              r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', | ||||
|              r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'), | ||||
|             webpage, 'id', group='id', default=None) | ||||
|         if not video_id: | ||||
|             bplayer_data = self._parse_json(self._search_regex( | ||||
|                 r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name) | ||||
|             video_id = bplayer_data['id'] | ||||
|         title = re.sub(': Video$', '', self._og_search_title(webpage)) | ||||
|  | ||||
|         embed_info = self._download_json( | ||||
|             'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id) | ||||
|         formats = [] | ||||
|         for stream in embed_info['streams']: | ||||
|             stream_url = stream.get('url') | ||||
|             if not stream_url: | ||||
|                 continue | ||||
|             if stream['muxing_format'] == 'TS': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) | ||||
|             else: | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     stream_url, video_id, f4m_id='hds', fatal=False)) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|         } | ||||
							
								
								
									
										60
									
								
								youtube_dl/extractor/bokecc.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								youtube_dl/extractor/bokecc.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,60 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_parse_qs | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class BokeCCBaseIE(InfoExtractor): | ||||
|     def _extract_bokecc_formats(self, webpage, video_id, format_id=None): | ||||
|         player_params_str = self._html_search_regex( | ||||
|             r'<(?:script|embed)[^>]+src=(?P<q>["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P<query>.+?)(?P=q)', | ||||
|             webpage, 'player params', group='query') | ||||
|  | ||||
|         player_params = compat_parse_qs(player_params_str) | ||||
|  | ||||
|         info_xml = self._download_xml( | ||||
|             'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % ( | ||||
|                 player_params['siteid'][0], player_params['vid'][0]), video_id) | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': format_id, | ||||
|             'url': quality.find('./copy').attrib['playurl'], | ||||
|             'preference': int(quality.attrib['value']), | ||||
|         } for quality in info_xml.findall('./video/quality')] | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return formats | ||||
|  | ||||
|  | ||||
| class BokeCCIE(BokeCCBaseIE): | ||||
|     _IE_DESC = 'CC视频' | ||||
|     _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://union.bokecc.com/playvideo.bo?vid=E0ABAE9D4F509B189C33DC5901307461&uid=FE644790DE9D154A', | ||||
|         'info_dict': { | ||||
|             'id': 'FE644790DE9D154A_E0ABAE9D4F509B189C33DC5901307461', | ||||
|             'ext': 'flv', | ||||
|             'title': 'BokeCC Video', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query')) | ||||
|         if not qs.get('vid') or not qs.get('uid'): | ||||
|             raise ExtractorError('Invalid URL', expected=True) | ||||
|  | ||||
|         video_id = '%s_%s' % (qs['uid'][0], qs['vid'][0]) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': 'BokeCC Video',  # no title provided in the webpage | ||||
|             'formats': self._extract_bokecc_formats(webpage, video_id), | ||||
|         } | ||||
							
								
								
									
										72
									
								
								youtube_dl/extractor/bostonglobe.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								youtube_dl/extractor/bostonglobe.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,72 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| from ..utils import ( | ||||
|     extract_attributes, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BostonGlobeIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P<id>[^/]+)/\w+(?:\.html)?' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html', | ||||
|             'md5': '0a62181079c85c2d2b618c9a738aedaf', | ||||
|             'info_dict': { | ||||
|                 'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood', | ||||
|                 'id': '5320421710001', | ||||
|                 'ext': 'mp4', | ||||
|                 'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.', | ||||
|                 'timestamp': 1486877593, | ||||
|                 'upload_date': '20170212', | ||||
|                 'uploader_id': '245991542', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             # Embedded youtube video; we hand it off to the Generic extractor. | ||||
|             'url': 'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html', | ||||
|             'md5': '582b40327089d5c0c949b3c54b13c24b', | ||||
|             'info_dict': { | ||||
|                 'title': "Who Is Matt Damon's Favorite Batman?", | ||||
|                 'id': 'ZW1QCnlA6Qc', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20170217', | ||||
|                 'description': 'md5:3b3dccb9375867e0b4d527ed87d307cb', | ||||
|                 'uploader': 'The Late Late Show with James Corden', | ||||
|                 'uploader_id': 'TheLateLateShow', | ||||
|             }, | ||||
|             'expected_warnings': ['404'], | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         page_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, page_id) | ||||
|  | ||||
|         page_title = self._og_search_title(webpage, default=None) | ||||
|  | ||||
|         # <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject"> | ||||
|         entries = [] | ||||
|         for video in re.findall(r'(?i)(<video[^>]+>)', webpage): | ||||
|             attrs = extract_attributes(video) | ||||
|  | ||||
|             video_id = attrs.get('data-brightcove-video-id') | ||||
|             account_id = attrs.get('data-account') | ||||
|             player_id = attrs.get('data-player') | ||||
|             embed = attrs.get('data-embed') | ||||
|  | ||||
|             if video_id and account_id and player_id and embed: | ||||
|                 entries.append( | ||||
|                     'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' | ||||
|                     % (account_id, player_id, embed, video_id)) | ||||
|  | ||||
|         if len(entries) == 0: | ||||
|             return self.url_result(url, 'Generic') | ||||
|         elif len(entries) == 1: | ||||
|             return self.url_result(entries[0], 'BrightcoveNew') | ||||
|         else: | ||||
|             return self.playlist_from_matches(entries, page_id, page_title, ie='BrightcoveNew') | ||||
							
								
								
									
										62
									
								
								youtube_dl/extractor/bpb.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								youtube_dl/extractor/bpb.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,62 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     js_to_json, | ||||
|     determine_ext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BpbIE(InfoExtractor): | ||||
|     IE_DESC = 'Bundeszentrale für politische Bildung' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr', | ||||
|         # md5 fails in Python 2.6 due to buggy server response and wrong handling of urllib2 | ||||
|         'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f', | ||||
|         'info_dict': { | ||||
|             'id': '297', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR', | ||||
|             'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.' | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<h2 class="white">(.*?)</h2>', webpage, 'title') | ||||
|         video_info_dicts = re.findall( | ||||
|             r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage) | ||||
|  | ||||
|         formats = [] | ||||
|         for video_info in video_info_dicts: | ||||
|             video_info = self._parse_json( | ||||
|                 video_info, video_id, transform_source=js_to_json, fatal=False) | ||||
|             if not video_info: | ||||
|                 continue | ||||
|             video_url = video_info.get('src') | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             quality = 'high' if '_high' in video_url else 'low' | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|                 'preference': 10 if quality == 'high' else 0, | ||||
|                 'format_note': quality, | ||||
|                 'format_id': '%s-%s' % (quality, determine_ext(video_url)), | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'title': title, | ||||
|             'description': self._og_search_description(webpage), | ||||
|         } | ||||
							
								
								
									
										311
									
								
								youtube_dl/extractor/br.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										311
									
								
								youtube_dl/extractor/br.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,311 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
|     xpath_element, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BRIE(InfoExtractor): | ||||
|     IE_DESC = 'Bayerischer Rundfunk' | ||||
|     _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.br.de/mediathek/video/sendungen/abendschau/betriebliche-altersvorsorge-104.html', | ||||
|             'md5': '83a0477cf0b8451027eb566d88b51106', | ||||
|             'info_dict': { | ||||
|                 'id': '48f656ef-287e-486f-be86-459122db22cc', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Die böse Überraschung', | ||||
|                 'description': 'md5:ce9ac81b466ce775b8018f6801b48ac9', | ||||
|                 'duration': 180, | ||||
|                 'uploader': 'Reinhard Weber', | ||||
|                 'upload_date': '20150422', | ||||
|             }, | ||||
|             'skip': '404 not found', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html', | ||||
|             'md5': 'af3a3a4aa43ff0ce6a89504c67f427ef', | ||||
|             'info_dict': { | ||||
|                 'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Manfred Schreiber ist tot', | ||||
|                 'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97', | ||||
|                 'duration': 26, | ||||
|             }, | ||||
|             'skip': '404 not found', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html', | ||||
|             'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d', | ||||
|             'info_dict': { | ||||
|                 'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b', | ||||
|                 'ext': 'aac', | ||||
|                 'title': 'Kurzweilig und sehr bewegend', | ||||
|                 'description': 'md5:0351996e3283d64adeb38ede91fac54e', | ||||
|                 'duration': 296, | ||||
|             }, | ||||
|             'skip': '404 not found', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html', | ||||
|             'md5': 'dbab0aef2e047060ea7a21fc1ce1078a', | ||||
|             'info_dict': { | ||||
|                 'id': '6ba73750-d405-45d3-861d-1ce8c524e059', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Umweltbewusster Häuslebauer', | ||||
|                 'description': 'md5:d52dae9792d00226348c1dbb13c9bae2', | ||||
|                 'duration': 116, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html', | ||||
|             'md5': '23bca295f1650d698f94fc570977dae3', | ||||
|             'info_dict': { | ||||
|                 'id': 'd982c9ce-8648-4753-b358-98abb8aec43d', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Folge 1 - Metaphysik', | ||||
|                 'description': 'md5:bb659990e9e59905c3d41e369db1fbe3', | ||||
|                 'duration': 893, | ||||
|                 'uploader': 'Eva Maria Steimle', | ||||
|                 'upload_date': '20170208', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         base_url, display_id = re.search(self._VALID_URL, url).groups() | ||||
|         page = self._download_webpage(url, display_id) | ||||
|         xml_url = self._search_regex( | ||||
|             r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL') | ||||
|         xml = self._download_xml(base_url + xml_url, display_id) | ||||
|  | ||||
|         medias = [] | ||||
|  | ||||
|         for xml_media in xml.findall('video') + xml.findall('audio'): | ||||
|             media_id = xml_media.get('externalId') | ||||
|             media = { | ||||
|                 'id': media_id, | ||||
|                 'title': xpath_text(xml_media, 'title', 'title', True), | ||||
|                 'duration': parse_duration(xpath_text(xml_media, 'duration')), | ||||
|                 'formats': self._extract_formats(xpath_element( | ||||
|                     xml_media, 'assets'), media_id), | ||||
|                 'thumbnails': self._extract_thumbnails(xpath_element( | ||||
|                     xml_media, 'teaserImage/variants'), base_url), | ||||
|                 'description': xpath_text(xml_media, 'desc'), | ||||
|                 'webpage_url': xpath_text(xml_media, 'permalink'), | ||||
|                 'uploader': xpath_text(xml_media, 'author'), | ||||
|             } | ||||
|             broadcast_date = xpath_text(xml_media, 'broadcastDate') | ||||
|             if broadcast_date: | ||||
|                 media['upload_date'] = ''.join(reversed(broadcast_date.split('.'))) | ||||
|             medias.append(media) | ||||
|  | ||||
|         if len(medias) > 1: | ||||
|             self._downloader.report_warning( | ||||
|                 'found multiple medias; please ' | ||||
|                 'report this with the video URL to http://yt-dl.org/bug') | ||||
|         if not medias: | ||||
|             raise ExtractorError('No media entries found') | ||||
|         return medias[0] | ||||
|  | ||||
|     def _extract_formats(self, assets, media_id): | ||||
|         formats = [] | ||||
|         for asset in assets.findall('asset'): | ||||
|             format_url = xpath_text(asset, ['downloadUrl', 'url']) | ||||
|             asset_type = asset.get('type') | ||||
|             if asset_type.startswith('HDS'): | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False)) | ||||
|             elif asset_type.startswith('HLS'): | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False)) | ||||
|             else: | ||||
|                 format_info = { | ||||
|                     'ext': xpath_text(asset, 'mediaType'), | ||||
|                     'width': int_or_none(xpath_text(asset, 'frameWidth')), | ||||
|                     'height': int_or_none(xpath_text(asset, 'frameHeight')), | ||||
|                     'tbr': int_or_none(xpath_text(asset, 'bitrateVideo')), | ||||
|                     'abr': int_or_none(xpath_text(asset, 'bitrateAudio')), | ||||
|                     'vcodec': xpath_text(asset, 'codecVideo'), | ||||
|                     'acodec': xpath_text(asset, 'codecAudio'), | ||||
|                     'container': xpath_text(asset, 'mediaType'), | ||||
|                     'filesize': int_or_none(xpath_text(asset, 'size')), | ||||
|                 } | ||||
|                 format_url = self._proto_relative_url(format_url) | ||||
|                 if format_url: | ||||
|                     http_format_info = format_info.copy() | ||||
|                     http_format_info.update({ | ||||
|                         'url': format_url, | ||||
|                         'format_id': 'http-%s' % asset_type, | ||||
|                     }) | ||||
|                     formats.append(http_format_info) | ||||
|                 server_prefix = xpath_text(asset, 'serverPrefix') | ||||
|                 if server_prefix: | ||||
|                     rtmp_format_info = format_info.copy() | ||||
|                     rtmp_format_info.update({ | ||||
|                         'url': server_prefix, | ||||
|                         'play_path': xpath_text(asset, 'fileName'), | ||||
|                         'format_id': 'rtmp-%s' % asset_type, | ||||
|                     }) | ||||
|                     formats.append(rtmp_format_info) | ||||
|         self._sort_formats(formats) | ||||
|         return formats | ||||
|  | ||||
|     def _extract_thumbnails(self, variants, base_url): | ||||
|         thumbnails = [{ | ||||
|             'url': base_url + xpath_text(variant, 'url'), | ||||
|             'width': int_or_none(xpath_text(variant, 'width')), | ||||
|             'height': int_or_none(xpath_text(variant, 'height')), | ||||
|         } for variant in variants.findall('variant') if xpath_text(variant, 'url')] | ||||
|         thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True) | ||||
|         return thumbnails | ||||
|  | ||||
|  | ||||
| class BRMediathekIE(InfoExtractor): | ||||
|     IE_DESC = 'Bayerischer Rundfunk Mediathek' | ||||
|     _VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?P<id>av:[0-9a-f]{24})' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e', | ||||
|         'md5': 'fdc3d485835966d1622587d08ba632ec', | ||||
|         'info_dict': { | ||||
|             'id': 'av:5a1e6a6e8fce6d001871cc8e', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Die Sendung vom 28.11.2017', | ||||
|             'description': 'md5:6000cdca5912ab2277e5b7339f201ccc', | ||||
|             'timestamp': 1511942766, | ||||
|             'upload_date': '20171129', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         clip_id = self._match_id(url) | ||||
|  | ||||
|         clip = self._download_json( | ||||
|             'https://proxy-base.master.mango.express/graphql', | ||||
|             clip_id, data=json.dumps({ | ||||
|                 "query": """{ | ||||
|   viewer { | ||||
|     clip(id: "%s") { | ||||
|       title | ||||
|       description | ||||
|       duration | ||||
|       createdAt | ||||
|       ageRestriction | ||||
|       videoFiles { | ||||
|         edges { | ||||
|           node { | ||||
|             publicLocation | ||||
|             fileSize | ||||
|             videoProfile { | ||||
|               width | ||||
|               height | ||||
|               bitrate | ||||
|               encoding | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|       captionFiles { | ||||
|         edges { | ||||
|           node { | ||||
|             publicLocation | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|       teaserImages { | ||||
|         edges { | ||||
|           node { | ||||
|             imageFiles { | ||||
|               edges { | ||||
|                 node { | ||||
|                   publicLocation | ||||
|                   width | ||||
|                   height | ||||
|                 } | ||||
|               } | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| }""" % clip_id}).encode(), headers={ | ||||
|                 'Content-Type': 'application/json', | ||||
|             })['data']['viewer']['clip'] | ||||
|         title = clip['title'] | ||||
|  | ||||
|         formats = [] | ||||
|         for edge in clip.get('videoFiles', {}).get('edges', []): | ||||
|             node = edge.get('node', {}) | ||||
|             n_url = node.get('publicLocation') | ||||
|             if not n_url: | ||||
|                 continue | ||||
|             ext = determine_ext(n_url) | ||||
|             if ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     n_url, clip_id, 'mp4', 'm3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|             else: | ||||
|                 video_profile = node.get('videoProfile', {}) | ||||
|                 tbr = int_or_none(video_profile.get('bitrate')) | ||||
|                 format_id = 'http' | ||||
|                 if tbr: | ||||
|                     format_id += '-%d' % tbr | ||||
|                 formats.append({ | ||||
|                     'format_id': format_id, | ||||
|                     'url': n_url, | ||||
|                     'width': int_or_none(video_profile.get('width')), | ||||
|                     'height': int_or_none(video_profile.get('height')), | ||||
|                     'tbr': tbr, | ||||
|                     'filesize': int_or_none(node.get('fileSize')), | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         for edge in clip.get('captionFiles', {}).get('edges', []): | ||||
|             node = edge.get('node', {}) | ||||
|             n_url = node.get('publicLocation') | ||||
|             if not n_url: | ||||
|                 continue | ||||
|             subtitles.setdefault('de', []).append({ | ||||
|                 'url': n_url, | ||||
|             }) | ||||
|  | ||||
|         thumbnails = [] | ||||
|         for edge in clip.get('teaserImages', {}).get('edges', []): | ||||
|             for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []): | ||||
|                 node = image_edge.get('node', {}) | ||||
|                 n_url = node.get('publicLocation') | ||||
|                 if not n_url: | ||||
|                     continue | ||||
|                 thumbnails.append({ | ||||
|                     'url': n_url, | ||||
|                     'width': int_or_none(node.get('width')), | ||||
|                     'height': int_or_none(node.get('height')), | ||||
|                 }) | ||||
|  | ||||
|         return { | ||||
|             'id': clip_id, | ||||
|             'title': title, | ||||
|             'description': clip.get('description'), | ||||
|             'duration': int_or_none(clip.get('duration')), | ||||
|             'timestamp': parse_iso8601(clip.get('createdAt')), | ||||
|             'age_limit': int_or_none(clip.get('ageRestriction')), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'thumbnails': thumbnails, | ||||
|         } | ||||
							
								
								
									
										84
									
								
								youtube_dl/extractor/bravotv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								youtube_dl/extractor/bravotv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .adobepass import AdobePassIE | ||||
| from ..utils import ( | ||||
|     smuggle_url, | ||||
|     update_url_query, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BravoTVIE(AdobePassIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is', | ||||
|         'md5': 'e34684cfea2a96cd2ee1ef3a60909de9', | ||||
|         'info_dict': { | ||||
|             'id': 'epL0pmK1kQlT', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The Top Chef Season 16 Winner Is...', | ||||
|             'description': 'Find out who takes the title of Top Chef!', | ||||
|             'uploader': 'NBCU-BRAV', | ||||
|             'upload_date': '20190314', | ||||
|             'timestamp': 1552591860, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         settings = self._parse_json(self._search_regex( | ||||
|             r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'), | ||||
|             display_id) | ||||
|         info = {} | ||||
|         query = { | ||||
|             'mbr': 'true', | ||||
|         } | ||||
|         account_pid, release_pid = [None] * 2 | ||||
|         tve = settings.get('ls_tve') | ||||
|         if tve: | ||||
|             query['manifest'] = 'm3u' | ||||
|             mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage) | ||||
|             if mobj: | ||||
|                 account_pid, tp_path = mobj.groups() | ||||
|                 release_pid = tp_path.strip('/').split('/')[-1] | ||||
|             else: | ||||
|                 account_pid = 'HNK2IC' | ||||
|                 tp_path = release_pid = tve['release_pid'] | ||||
|             if tve.get('entitlement') == 'auth': | ||||
|                 adobe_pass = settings.get('tve_adobe_auth', {}) | ||||
|                 resource = self._get_mvpd_resource( | ||||
|                     adobe_pass.get('adobePassResourceId', 'bravo'), | ||||
|                     tve['title'], release_pid, tve.get('rating')) | ||||
|                 query['auth'] = self._extract_mvpd_auth( | ||||
|                     url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource) | ||||
|         else: | ||||
|             shared_playlist = settings['ls_playlist'] | ||||
|             account_pid = shared_playlist['account_pid'] | ||||
|             metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']] | ||||
|             tp_path = release_pid = metadata.get('release_pid') | ||||
|             if not release_pid: | ||||
|                 release_pid = metadata['guid'] | ||||
|                 tp_path = 'media/guid/2140479951/' + release_pid | ||||
|             info.update({ | ||||
|                 'title': metadata['title'], | ||||
|                 'description': metadata.get('description'), | ||||
|                 'season_number': int_or_none(metadata.get('season_num')), | ||||
|                 'episode_number': int_or_none(metadata.get('episode_num')), | ||||
|             }) | ||||
|             query['switch'] = 'progressive' | ||||
|         info.update({ | ||||
|             '_type': 'url_transparent', | ||||
|             'id': release_pid, | ||||
|             'url': smuggle_url(update_url_query( | ||||
|                 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path), | ||||
|                 query), {'force_smil_url': True}), | ||||
|             'ie_key': 'ThePlatform', | ||||
|         }) | ||||
|         return info | ||||
							
								
								
									
										91
									
								
								youtube_dl/extractor/breakcom.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										91
									
								
								youtube_dl/extractor/breakcom.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,91 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .youtube import YoutubeIE | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     url_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BreakIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056', | ||||
|         'info_dict': { | ||||
|             'id': '2468056', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'When Girls Act Like D-Bags', | ||||
|             'age_limit': 13, | ||||
|         }, | ||||
|     }, { | ||||
|         # youtube embed | ||||
|         'url': 'http://www.break.com/video/someone-forgot-boat-brakes-work', | ||||
|         'info_dict': { | ||||
|             'id': 'RrrDLdeL2HQ', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Whale Watching Boat Crashing Into San Diego Dock', | ||||
|             'description': 'md5:afc1b2772f0a8468be51dd80eb021069', | ||||
|             'upload_date': '20160331', | ||||
|             'uploader': 'Steve Holden', | ||||
|             'uploader_id': 'sdholden07', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.break.com/video/ugc/baby-flex-2773063', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id, video_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         youtube_url = YoutubeIE._extract_url(webpage) | ||||
|         if youtube_url: | ||||
|             return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) | ||||
|  | ||||
|         content = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'(?s)content["\']\s*:\s*(\[.+?\])\s*[,\n]', webpage, | ||||
|                 'content'), | ||||
|             display_id) | ||||
|  | ||||
|         formats = [] | ||||
|         for video in content: | ||||
|             video_url = url_or_none(video.get('url')) | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             bitrate = int_or_none(self._search_regex( | ||||
|                 r'(\d+)_kbps', video_url, 'tbr', default=None)) | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|                 'format_id': 'http-%d' % bitrate if bitrate else 'http', | ||||
|                 'tbr': bitrate, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             (r'title["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', | ||||
|              r'<h1[^>]*>(?P<value>[^<]+)'), webpage, 'title', group='value') | ||||
|  | ||||
|         def get(key, name): | ||||
|             return int_or_none(self._search_regex( | ||||
|                 r'%s["\']\s*:\s*["\'](\d+)' % key, webpage, name, | ||||
|                 default=None)) | ||||
|  | ||||
|         age_limit = get('ratings', 'age limit') | ||||
|         video_id = video_id or get('pid', 'video id') or display_id | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'age_limit': age_limit, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										677
									
								
								youtube_dl/extractor/brightcove.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										677
									
								
								youtube_dl/extractor/brightcove.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,677 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import base64 | ||||
| import re | ||||
| import struct | ||||
|  | ||||
| from .adobepass import AdobePassIE | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_etree_fromstring, | ||||
|     compat_HTTPError, | ||||
|     compat_parse_qs, | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urlparse, | ||||
|     compat_xml_parse_error, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     extract_attributes, | ||||
|     ExtractorError, | ||||
|     find_xpath_attr, | ||||
|     fix_xml_ampersands, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     mimetype2ext, | ||||
|     parse_iso8601, | ||||
|     smuggle_url, | ||||
|     str_or_none, | ||||
|     unescapeHTML, | ||||
|     unsmuggle_url, | ||||
|     UnsupportedError, | ||||
|     update_url_query, | ||||
|     url_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BrightcoveLegacyIE(InfoExtractor): | ||||
|     IE_NAME = 'brightcove:legacy' | ||||
|     _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/ | ||||
|             'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001', | ||||
|             'md5': '5423e113865d26e40624dce2e4b45d95', | ||||
|             'note': 'Test Brightcove downloads and detection in GenericIE', | ||||
|             'info_dict': { | ||||
|                 'id': '2371591881001', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', | ||||
|                 'uploader': '8TV', | ||||
|                 'description': 'md5:a950cc4285c43e44d763d036710cd9cd', | ||||
|                 'timestamp': 1368213670, | ||||
|                 'upload_date': '20130510', | ||||
|                 'uploader_id': '1589608506001', | ||||
|             }, | ||||
|             'skip': 'The player has been deactivated by the content owner', | ||||
|         }, | ||||
|         { | ||||
|             # From http://medianetwork.oracle.com/video/player/1785452137001 | ||||
|             'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001', | ||||
|             'info_dict': { | ||||
|                 'id': '1785452137001', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges', | ||||
|                 'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.', | ||||
|                 'uploader': 'Oracle', | ||||
|                 'timestamp': 1344975024, | ||||
|                 'upload_date': '20120814', | ||||
|                 'uploader_id': '1460825906', | ||||
|             }, | ||||
|             'skip': 'video not playable', | ||||
|         }, | ||||
|         { | ||||
|             # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/ | ||||
|             'url': 'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001', | ||||
|             'info_dict': { | ||||
|                 'id': '2750934548001', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'This Bracelet Acts as a Personal Thermostat', | ||||
|                 'description': 'md5:547b78c64f4112766ccf4e151c20b6a0', | ||||
|                 # 'uploader': 'Mashable', | ||||
|                 'timestamp': 1382041798, | ||||
|                 'upload_date': '20131017', | ||||
|                 'uploader_id': '1130468786001', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             # test that the default referer works | ||||
|             # from http://national.ballet.ca/interact/video/Lost_in_Motion_II/ | ||||
|             'url': 'http://link.brightcove.com/services/player/bcpid756015033001?bckey=AQ~~,AAAApYJi_Ck~,GxhXCegT1Dp39ilhXuxMJxasUhVNZiil&bctid=2878862109001', | ||||
|             'info_dict': { | ||||
|                 'id': '2878862109001', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Lost in Motion II', | ||||
|                 'description': 'md5:363109c02998fee92ec02211bd8000df', | ||||
|                 'uploader': 'National Ballet of Canada', | ||||
|             }, | ||||
|             'skip': 'Video gone', | ||||
|         }, | ||||
|         { | ||||
|             # test flv videos served by akamaihd.net | ||||
|             # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william | ||||
|             'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3Aevent-stream-356&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D', | ||||
|             # The md5 checksum changes on each download | ||||
|             'info_dict': { | ||||
|                 'id': '3750436379001', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', | ||||
|                 'uploader': 'RBTV Old (do not use)', | ||||
|                 'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', | ||||
|                 'timestamp': 1409122195, | ||||
|                 'upload_date': '20140827', | ||||
|                 'uploader_id': '710858724001', | ||||
|             }, | ||||
|             'skip': 'Video gone', | ||||
|         }, | ||||
|         { | ||||
|             # playlist with 'videoList' | ||||
|             # from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players | ||||
|             'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL', | ||||
|             'info_dict': { | ||||
|                 'title': 'Sealife', | ||||
|                 'id': '3550319591001', | ||||
|             }, | ||||
|             'playlist_mincount': 7, | ||||
|             'skip': 'Unsupported URL', | ||||
|         }, | ||||
|         { | ||||
|             # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965) | ||||
|             'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg', | ||||
|             'info_dict': { | ||||
|                 'id': '1522758701001', | ||||
|                 'title': 'Lesson 08', | ||||
|             }, | ||||
|             'playlist_mincount': 10, | ||||
|             'skip': 'Unsupported URL', | ||||
|         }, | ||||
|         { | ||||
|             # playerID inferred from bcpid | ||||
|             # from http://www.un.org/chinese/News/story.asp?NewsID=27724 | ||||
|             'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350', | ||||
|             'only_matching': True,  # Tested in GenericIE | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     @classmethod | ||||
|     def _build_brighcove_url(cls, object_str): | ||||
|         """ | ||||
|         Build a Brightcove url from a xml string containing | ||||
|         <object class="BrightcoveExperience">{params}</object> | ||||
|         """ | ||||
|  | ||||
|         # Fix up some stupid HTML, see https://github.com/ytdl-org/youtube-dl/issues/1553 | ||||
|         object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>', | ||||
|                             lambda m: m.group(1) + '/>', object_str) | ||||
|         # Fix up some stupid XML, see https://github.com/ytdl-org/youtube-dl/issues/1608 | ||||
|         object_str = object_str.replace('<--', '<!--') | ||||
|         # remove namespace to simplify extraction | ||||
|         object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str) | ||||
|         object_str = fix_xml_ampersands(object_str) | ||||
|  | ||||
|         try: | ||||
|             object_doc = compat_etree_fromstring(object_str.encode('utf-8')) | ||||
|         except compat_xml_parse_error: | ||||
|             return | ||||
|  | ||||
|         fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars') | ||||
|         if fv_el is not None: | ||||
|             flashvars = dict( | ||||
|                 (k, v[0]) | ||||
|                 for k, v in compat_parse_qs(fv_el.attrib['value']).items()) | ||||
|         else: | ||||
|             flashvars = {} | ||||
|  | ||||
|         data_url = object_doc.attrib.get('data', '') | ||||
|         data_url_params = compat_parse_qs(compat_urllib_parse_urlparse(data_url).query) | ||||
|  | ||||
|         def find_param(name): | ||||
|             if name in flashvars: | ||||
|                 return flashvars[name] | ||||
|             node = find_xpath_attr(object_doc, './param', 'name', name) | ||||
|             if node is not None: | ||||
|                 return node.attrib['value'] | ||||
|             return data_url_params.get(name) | ||||
|  | ||||
|         params = {} | ||||
|  | ||||
|         playerID = find_param('playerID') or find_param('playerId') | ||||
|         if playerID is None: | ||||
|             raise ExtractorError('Cannot find player ID') | ||||
|         params['playerID'] = playerID | ||||
|  | ||||
|         playerKey = find_param('playerKey') | ||||
|         # Not all pages define this value | ||||
|         if playerKey is not None: | ||||
|             params['playerKey'] = playerKey | ||||
|         # These fields hold the id of the video | ||||
|         videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') | ||||
|         if videoPlayer is not None: | ||||
|             if isinstance(videoPlayer, list): | ||||
|                 videoPlayer = videoPlayer[0] | ||||
|             videoPlayer = videoPlayer.strip() | ||||
|             # UUID is also possible for videoPlayer (e.g. | ||||
|             # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd | ||||
|             # or http://www8.hp.com/cn/zh/home.html) | ||||
|             if not (re.match( | ||||
|                     r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$', | ||||
|                     videoPlayer) or videoPlayer.startswith('ref:')): | ||||
|                 return None | ||||
|             params['@videoPlayer'] = videoPlayer | ||||
|         linkBase = find_param('linkBaseURL') | ||||
|         if linkBase is not None: | ||||
|             params['linkBaseURL'] = linkBase | ||||
|         return cls._make_brightcove_url(params) | ||||
|  | ||||
|     @classmethod | ||||
|     def _build_brighcove_url_from_js(cls, object_js): | ||||
|         # The layout of JS is as follows: | ||||
|         # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) { | ||||
|         #   // build Brightcove <object /> XML | ||||
|         # } | ||||
|         m = re.search( | ||||
|             r'''(?x)customBC\.createVideo\( | ||||
|                 .*?                                                  # skipping width and height | ||||
|                 ["\'](?P<playerID>\d+)["\']\s*,\s*                   # playerID | ||||
|                 ["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s*  # playerKey begins with AQ and is 50 characters | ||||
|                                                                      # in length, however it's appended to itself | ||||
|                                                                      # in places, so truncate | ||||
|                 ["\'](?P<videoID>\d+)["\']                           # @videoPlayer | ||||
|             ''', object_js) | ||||
|         if m: | ||||
|             return cls._make_brightcove_url(m.groupdict()) | ||||
|  | ||||
|     @classmethod | ||||
|     def _make_brightcove_url(cls, params): | ||||
|         return update_url_query( | ||||
|             'http://c.brightcove.com/services/viewer/htmlFederated', params) | ||||
|  | ||||
|     @classmethod | ||||
|     def _extract_brightcove_url(cls, webpage): | ||||
|         """Try to extract the brightcove url from the webpage, returns None | ||||
|         if it can't be found | ||||
|         """ | ||||
|         urls = cls._extract_brightcove_urls(webpage) | ||||
|         return urls[0] if urls else None | ||||
|  | ||||
|     @classmethod | ||||
|     def _extract_brightcove_urls(cls, webpage): | ||||
|         """Return a list of all Brightcove URLs from the webpage """ | ||||
|  | ||||
|         url_m = re.search( | ||||
|             r'''(?x) | ||||
|                 <meta\s+ | ||||
|                     (?:property|itemprop)=([\'"])(?:og:video|embedURL)\1[^>]+ | ||||
|                     content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2 | ||||
|             ''', webpage) | ||||
|         if url_m: | ||||
|             url = unescapeHTML(url_m.group('url')) | ||||
|             # Some sites don't add it, we can't download with this url, for example: | ||||
|             # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/ | ||||
|             if 'playerKey' in url or 'videoId' in url or 'idVideo' in url: | ||||
|                 return [url] | ||||
|  | ||||
|         matches = re.findall( | ||||
|             r'''(?sx)<object | ||||
|             (?: | ||||
|                 [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] | | ||||
|                 [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ | ||||
|             ).+?>\s*</object>''', | ||||
|             webpage) | ||||
|         if matches: | ||||
|             return list(filter(None, [cls._build_brighcove_url(m) for m in matches])) | ||||
|  | ||||
|         matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage) | ||||
|         if matches: | ||||
|             return list(filter(None, [ | ||||
|                 cls._build_brighcove_url_from_js(custom_bc) | ||||
|                 for custom_bc in matches])) | ||||
|         return [src for _, src in re.findall( | ||||
|             r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|  | ||||
|         # Change the 'videoId' and others field to '@videoPlayer' | ||||
|         url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url) | ||||
|         # Change bckey (used by bcove.me urls) to playerKey | ||||
|         url = re.sub(r'(?<=[?&])bckey', 'playerKey', url) | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         query_str = mobj.group('query') | ||||
|         query = compat_urlparse.parse_qs(query_str) | ||||
|  | ||||
|         videoPlayer = query.get('@videoPlayer') | ||||
|         if videoPlayer: | ||||
|             # We set the original url as the default 'Referer' header | ||||
|             referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url) | ||||
|             video_id = videoPlayer[0] | ||||
|             if 'playerID' not in query: | ||||
|                 mobj = re.search(r'/bcpid(\d+)', url) | ||||
|                 if mobj is not None: | ||||
|                     query['playerID'] = [mobj.group(1)] | ||||
|             publisher_id = query.get('publisherId') | ||||
|             if publisher_id and publisher_id[0].isdigit(): | ||||
|                 publisher_id = publisher_id[0] | ||||
|             if not publisher_id: | ||||
|                 player_key = query.get('playerKey') | ||||
|                 if player_key and ',' in player_key[0]: | ||||
|                     player_key = player_key[0] | ||||
|                 else: | ||||
|                     player_id = query.get('playerID') | ||||
|                     if player_id and player_id[0].isdigit(): | ||||
|                         headers = {} | ||||
|                         if referer: | ||||
|                             headers['Referer'] = referer | ||||
|                         player_page = self._download_webpage( | ||||
|                             'http://link.brightcove.com/services/player/bcpid' + player_id[0], | ||||
|                             video_id, headers=headers, fatal=False) | ||||
|                         if player_page: | ||||
|                             player_key = self._search_regex( | ||||
|                                 r'<param\s+name="playerKey"\s+value="([\w~,-]+)"', | ||||
|                                 player_page, 'player key', fatal=False) | ||||
|                 if player_key: | ||||
|                     enc_pub_id = player_key.split(',')[1].replace('~', '=') | ||||
|                     publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0] | ||||
|             if publisher_id: | ||||
|                 brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id) | ||||
|                 if referer: | ||||
|                     brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer}) | ||||
|                 return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id) | ||||
|         # TODO: figure out if it's possible to extract playlistId from playerKey | ||||
|         # elif 'playerKey' in query: | ||||
|         #     player_key = query['playerKey'] | ||||
|         #     return self._get_playlist_info(player_key[0]) | ||||
|         raise UnsupportedError(url) | ||||
|  | ||||
|  | ||||
| class BrightcoveNewIE(AdobePassIE): | ||||
|     IE_NAME = 'brightcove:new' | ||||
|     _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001', | ||||
|         'md5': 'c8100925723840d4b0d243f7025703be', | ||||
|         'info_dict': { | ||||
|             'id': '4463358922001', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Meet the man behind Popcorn Time', | ||||
|             'description': 'md5:eac376a4fe366edc70279bfb681aea16', | ||||
|             'duration': 165.768, | ||||
|             'timestamp': 1441391203, | ||||
|             'upload_date': '20150904', | ||||
|             'uploader_id': '929656772001', | ||||
|             'formats': 'mincount:20', | ||||
|         }, | ||||
|     }, { | ||||
|         # with rtmp streams | ||||
|         'url': 'http://players.brightcove.net/4036320279001/5d112ed9-283f-485f-a7f9-33f42e8bc042_default/index.html?videoId=4279049078001', | ||||
|         'info_dict': { | ||||
|             'id': '4279049078001', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Titansgrave: Chapter 0', | ||||
|             'description': 'Titansgrave: Chapter 0', | ||||
|             'duration': 1242.058, | ||||
|             'timestamp': 1433556729, | ||||
|             'upload_date': '20150606', | ||||
|             'uploader_id': '4036320279001', | ||||
|             'formats': 'mincount:39', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         # playlist stream | ||||
|         'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001', | ||||
|         'info_dict': { | ||||
|             'id': '5718313430001', | ||||
|             'title': 'No Audio Playlist', | ||||
|         }, | ||||
|         'playlist_count': 7, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # ref: prefixed video id | ||||
|         'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # non numeric ref: prefixed video id | ||||
|         'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # unavailable video without message but with error_code | ||||
|         'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_url(ie, webpage): | ||||
|         urls = BrightcoveNewIE._extract_urls(ie, webpage) | ||||
|         return urls[0] if urls else None | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_urls(ie, webpage): | ||||
|         # Reference: | ||||
|         # 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe | ||||
|         # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#tag | ||||
|         # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript | ||||
|         # 4. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/in-page-embed-player-implementation.html | ||||
|         # 5. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player | ||||
|  | ||||
|         entries = [] | ||||
|  | ||||
|         # Look for iframe embeds [1] | ||||
|         for _, url in re.findall( | ||||
|                 r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage): | ||||
|             entries.append(url if url.startswith('http') else 'http:' + url) | ||||
|  | ||||
|         # Look for <video> tags [2] and embed_in_page embeds [3] | ||||
|         # [2] looks like: | ||||
|         for video, script_tag, account_id, player_id, embed in re.findall( | ||||
|                 r'''(?isx) | ||||
|                     (<video(?:-js)?\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>) | ||||
|                     (?:.*? | ||||
|                         (<script[^>]+ | ||||
|                             src=["\'](?:https?:)?//players\.brightcove\.net/ | ||||
|                             (\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js | ||||
|                         ) | ||||
|                     )? | ||||
|                 ''', webpage): | ||||
|             attrs = extract_attributes(video) | ||||
|  | ||||
|             # According to examples from [4] it's unclear whether video id | ||||
|             # may be optional and what to do when it is | ||||
|             video_id = attrs.get('data-video-id') | ||||
|             if not video_id: | ||||
|                 continue | ||||
|  | ||||
|             account_id = account_id or attrs.get('data-account') | ||||
|             if not account_id: | ||||
|                 continue | ||||
|  | ||||
|             player_id = player_id or attrs.get('data-player') or 'default' | ||||
|             embed = embed or attrs.get('data-embed') or 'default' | ||||
|  | ||||
|             bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % ( | ||||
|                 account_id, player_id, embed, video_id) | ||||
|  | ||||
|             # Some brightcove videos may be embedded with video tag only and | ||||
|             # without script tag or any mentioning of brightcove at all. Such | ||||
|             # embeds are considered ambiguous since they are matched based only | ||||
|             # on data-video-id and data-account attributes and in the wild may | ||||
|             # not be brightcove embeds at all. Let's check reconstructed | ||||
|             # brightcove URLs in case of such embeds and only process valid | ||||
|             # ones. By this we ensure there is indeed a brightcove embed. | ||||
|             if not script_tag and not ie._is_valid_url( | ||||
|                     bc_url, video_id, 'possible brightcove video'): | ||||
|                 continue | ||||
|  | ||||
|             entries.append(bc_url) | ||||
|  | ||||
|         return entries | ||||
|  | ||||
|     def _parse_brightcove_metadata(self, json_data, video_id, headers={}): | ||||
|         title = json_data['name'].strip() | ||||
|  | ||||
|         formats = [] | ||||
|         for source in json_data.get('sources', []): | ||||
|             container = source.get('container') | ||||
|             ext = mimetype2ext(source.get('type')) | ||||
|             src = source.get('src') | ||||
|             # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object | ||||
|             if ext == 'ism' or container == 'WVM' or source.get('key_systems'): | ||||
|                 continue | ||||
|             elif ext == 'm3u8' or container == 'M2TS': | ||||
|                 if not src: | ||||
|                     continue | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) | ||||
|             elif ext == 'mpd': | ||||
|                 if not src: | ||||
|                     continue | ||||
|                 formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False)) | ||||
|             else: | ||||
|                 streaming_src = source.get('streaming_src') | ||||
|                 stream_name, app_name = source.get('stream_name'), source.get('app_name') | ||||
|                 if not src and not streaming_src and (not stream_name or not app_name): | ||||
|                     continue | ||||
|                 tbr = float_or_none(source.get('avg_bitrate'), 1000) | ||||
|                 height = int_or_none(source.get('height')) | ||||
|                 width = int_or_none(source.get('width')) | ||||
|                 f = { | ||||
|                     'tbr': tbr, | ||||
|                     'filesize': int_or_none(source.get('size')), | ||||
|                     'container': container, | ||||
|                     'ext': ext or container.lower(), | ||||
|                 } | ||||
|                 if width == 0 and height == 0: | ||||
|                     f.update({ | ||||
|                         'vcodec': 'none', | ||||
|                     }) | ||||
|                 else: | ||||
|                     f.update({ | ||||
|                         'width': width, | ||||
|                         'height': height, | ||||
|                         'vcodec': source.get('codec'), | ||||
|                     }) | ||||
|  | ||||
|                 def build_format_id(kind): | ||||
|                     format_id = kind | ||||
|                     if tbr: | ||||
|                         format_id += '-%dk' % int(tbr) | ||||
|                     if height: | ||||
|                         format_id += '-%dp' % height | ||||
|                     return format_id | ||||
|  | ||||
|                 if src or streaming_src: | ||||
|                     f.update({ | ||||
|                         'url': src or streaming_src, | ||||
|                         'format_id': build_format_id('http' if src else 'http-streaming'), | ||||
|                         'source_preference': 0 if src else -1, | ||||
|                     }) | ||||
|                 else: | ||||
|                     f.update({ | ||||
|                         'url': app_name, | ||||
|                         'play_path': stream_name, | ||||
|                         'format_id': build_format_id('rtmp'), | ||||
|                     }) | ||||
|                 formats.append(f) | ||||
|         if not formats: | ||||
|             # for sonyliv.com DRM protected videos | ||||
|             s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl') | ||||
|             if s3_source_url: | ||||
|                 formats.append({ | ||||
|                     'url': s3_source_url, | ||||
|                     'format_id': 'source', | ||||
|                 }) | ||||
|  | ||||
|         errors = json_data.get('errors') | ||||
|         if not formats and errors: | ||||
|             error = errors[0] | ||||
|             raise ExtractorError( | ||||
|                 error.get('message') or error.get('error_subcode') or error['error_code'], expected=True) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         for f in formats: | ||||
|             f.setdefault('http_headers', {}).update(headers) | ||||
|  | ||||
|         subtitles = {} | ||||
|         for text_track in json_data.get('text_tracks', []): | ||||
|             if text_track.get('kind') != 'captions': | ||||
|                 continue | ||||
|             text_track_url = url_or_none(text_track.get('src')) | ||||
|             if not text_track_url: | ||||
|                 continue | ||||
|             lang = (str_or_none(text_track.get('srclang')) | ||||
|                     or str_or_none(text_track.get('label')) or 'en').lower() | ||||
|             subtitles.setdefault(lang, []).append({ | ||||
|                 'url': text_track_url, | ||||
|             }) | ||||
|  | ||||
|         is_live = False | ||||
|         duration = float_or_none(json_data.get('duration'), 1000) | ||||
|         if duration is not None and duration <= 0: | ||||
|             is_live = True | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': self._live_title(title) if is_live else title, | ||||
|             'description': clean_html(json_data.get('description')), | ||||
|             'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), | ||||
|             'duration': duration, | ||||
|             'timestamp': parse_iso8601(json_data.get('published_at')), | ||||
|             'uploader_id': json_data.get('account_id'), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'tags': json_data.get('tags', []), | ||||
|             'is_live': is_live, | ||||
|         } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|         self._initialize_geo_bypass({ | ||||
|             'countries': smuggled_data.get('geo_countries'), | ||||
|             'ip_blocks': smuggled_data.get('geo_ip_blocks'), | ||||
|         }) | ||||
|  | ||||
|         account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         policy_key_id = '%s_%s' % (account_id, player_id) | ||||
|         policy_key = self._downloader.cache.load('brightcove', policy_key_id) | ||||
|         policy_key_extracted = False | ||||
|         store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x) | ||||
|  | ||||
|         def extract_policy_key(): | ||||
|             webpage = self._download_webpage( | ||||
|                 'http://players.brightcove.net/%s/%s_%s/index.min.js' | ||||
|                 % (account_id, player_id, embed), video_id) | ||||
|  | ||||
|             policy_key = None | ||||
|  | ||||
|             catalog = self._search_regex( | ||||
|                 r'catalog\(({.+?})\);', webpage, 'catalog', default=None) | ||||
|             if catalog: | ||||
|                 catalog = self._parse_json( | ||||
|                     js_to_json(catalog), video_id, fatal=False) | ||||
|                 if catalog: | ||||
|                     policy_key = catalog.get('policyKey') | ||||
|  | ||||
|             if not policy_key: | ||||
|                 policy_key = self._search_regex( | ||||
|                     r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1', | ||||
|                     webpage, 'policy key', group='pk') | ||||
|  | ||||
|             store_pk(policy_key) | ||||
|             return policy_key | ||||
|  | ||||
|         api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) | ||||
|         headers = {} | ||||
|         referrer = smuggled_data.get('referrer') | ||||
|         if referrer: | ||||
|             headers.update({ | ||||
|                 'Referer': referrer, | ||||
|                 'Origin': re.search(r'https?://[^/]+', referrer).group(0), | ||||
|             }) | ||||
|  | ||||
|         for _ in range(2): | ||||
|             if not policy_key: | ||||
|                 policy_key = extract_policy_key() | ||||
|                 policy_key_extracted = True | ||||
|             headers['Accept'] = 'application/json;pk=%s' % policy_key | ||||
|             try: | ||||
|                 json_data = self._download_json(api_url, video_id, headers=headers) | ||||
|                 break | ||||
|             except ExtractorError as e: | ||||
|                 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): | ||||
|                     json_data = self._parse_json(e.cause.read().decode(), video_id)[0] | ||||
|                     message = json_data.get('message') or json_data['error_code'] | ||||
|                     if json_data.get('error_subcode') == 'CLIENT_GEO': | ||||
|                         self.raise_geo_restricted(msg=message) | ||||
|                     elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted: | ||||
|                         policy_key = None | ||||
|                         store_pk(None) | ||||
|                         continue | ||||
|                     raise ExtractorError(message, expected=True) | ||||
|                 raise | ||||
|  | ||||
|         errors = json_data.get('errors') | ||||
|         if errors and errors[0].get('error_subcode') == 'TVE_AUTH': | ||||
|             custom_fields = json_data['custom_fields'] | ||||
|             tve_token = self._extract_mvpd_auth( | ||||
|                 smuggled_data['source_url'], video_id, | ||||
|                 custom_fields['bcadobepassrequestorid'], | ||||
|                 custom_fields['bcadobepassresourceid']) | ||||
|             json_data = self._download_json( | ||||
|                 api_url, video_id, headers={ | ||||
|                     'Accept': 'application/json;pk=%s' % policy_key | ||||
|                 }, query={ | ||||
|                     'tveToken': tve_token, | ||||
|                 }) | ||||
|  | ||||
|         if content_type == 'playlist': | ||||
|             return self.playlist_result( | ||||
|                 [self._parse_brightcove_metadata(vid, vid.get('id'), headers) | ||||
|                  for vid in json_data.get('videos', []) if vid.get('id')], | ||||
|                 json_data.get('id'), json_data.get('name'), | ||||
|                 json_data.get('description')) | ||||
|  | ||||
|         return self._parse_brightcove_metadata( | ||||
|             json_data, video_id, headers=headers) | ||||
							
								
								
									
										48
									
								
								youtube_dl/extractor/businessinsider.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								youtube_dl/extractor/businessinsider.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .jwplatform import JWPlatformIE | ||||
|  | ||||
|  | ||||
| class BusinessInsiderIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6', | ||||
|         'md5': 'ffed3e1e12a6f950aa2f7d83851b497a', | ||||
|         'info_dict': { | ||||
|             'id': 'cjGDb0X9', | ||||
|             'ext': 'mp4', | ||||
|             'title': "Bananas give you more radiation exposure than living next to a nuclear power plant", | ||||
|             'description': 'md5:0175a3baf200dd8fa658f94cade841b3', | ||||
|             'upload_date': '20160611', | ||||
|             'timestamp': 1465675620, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/', | ||||
|         'md5': '43f438dbc6da0b89f5ac42f68529d84a', | ||||
|         'info_dict': { | ||||
|             'id': '5zJwd4FK', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Deze dingen zorgen ervoor dat je minder snel een date scoort', | ||||
|             'description': 'md5:2af8975825d38a4fed24717bbe51db49', | ||||
|             'upload_date': '20170705', | ||||
|             'timestamp': 1499270528, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         jwplatform_id = self._search_regex( | ||||
|             (r'data-media-id=["\']([a-zA-Z0-9]{8})', | ||||
|              r'id=["\']jwplayer_([a-zA-Z0-9]{8})', | ||||
|              r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})', | ||||
|              r'(?:jwplatform\.com/players/|jwplayer_)([a-zA-Z0-9]{8})'), | ||||
|             webpage, 'jwplatform id') | ||||
|         return self.url_result( | ||||
|             'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(), | ||||
|             video_id=video_id) | ||||
							
								
								
									
										98
									
								
								youtube_dl/extractor/buzzfeed.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								youtube_dl/extractor/buzzfeed.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,98 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .facebook import FacebookIE | ||||
|  | ||||
|  | ||||
| class BuzzFeedIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?buzzfeed\.com/[^?#]*?/(?P<id>[^?#]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.buzzfeed.com/abagg/this-angry-ram-destroys-a-punching-bag-like-a-boss?utm_term=4ldqpia', | ||||
|         'info_dict': { | ||||
|             'id': 'this-angry-ram-destroys-a-punching-bag-like-a-boss', | ||||
|             'title': 'This Angry Ram Destroys A Punching Bag Like A Boss', | ||||
|             'description': 'Rambro!', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': 'aVCR29aE_OQ', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Angry Ram destroys a punching bag..', | ||||
|                 'description': 'md5:c59533190ef23fd4458a5e8c8c872345', | ||||
|                 'upload_date': '20141024', | ||||
|                 'uploader_id': 'Buddhanz1', | ||||
|                 'uploader': 'Angry Ram', | ||||
|             } | ||||
|         }] | ||||
|     }, { | ||||
|         'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia', | ||||
|         'params': { | ||||
|             'skip_download': True,  # Got enough YouTube download tests | ||||
|         }, | ||||
|         'info_dict': { | ||||
|             'id': 'look-at-this-cute-dog-omg', | ||||
|             'description': 're:Munchkin the Teddy Bear is back ?!', | ||||
|             'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': 'mVmBL8B-In0', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 're:Munchkin the Teddy Bear gets her exercise', | ||||
|                 'description': 'md5:28faab95cda6e361bcff06ec12fc21d8', | ||||
|                 'upload_date': '20141124', | ||||
|                 'uploader_id': 'CindysMunchkin', | ||||
|                 'uploader': 're:^Munchkin the', | ||||
|             }, | ||||
|         }] | ||||
|     }, { | ||||
|         'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK', | ||||
|         'info_dict': { | ||||
|             'id': 'the-most-adorable-crash-landing-ever', | ||||
|             'title': 'Watch This Baby Goose Make The Most Adorable Crash Landing', | ||||
|             'description': 'This gosling knows how to stick a landing.', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'md5': '763ca415512f91ca62e4621086900a23', | ||||
|             'info_dict': { | ||||
|                 'id': '971793786185728', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'We set up crash pads so that the goslings on our roof would have a safe landi...', | ||||
|                 'uploader': 'Calgary Outdoor Centre-University of Calgary', | ||||
|             }, | ||||
|         }], | ||||
|         'add_ie': ['Facebook'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         all_buckets = re.findall( | ||||
|             r'(?s)<div class="video-embed[^"]*"..*?rel:bf_bucket_data=\'([^\']+)\'', | ||||
|             webpage) | ||||
|  | ||||
|         entries = [] | ||||
|         for bd_json in all_buckets: | ||||
|             bd = json.loads(bd_json) | ||||
|             video = bd.get('video') or bd.get('progload_video') | ||||
|             if not video: | ||||
|                 continue | ||||
|             entries.append(self.url_result(video['url'])) | ||||
|  | ||||
|         facebook_urls = FacebookIE._extract_urls(webpage) | ||||
|         entries.extend([ | ||||
|             self.url_result(facebook_url) | ||||
|             for facebook_url in facebook_urls]) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': playlist_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'entries': entries, | ||||
|         } | ||||
							
								
								
									
										117
									
								
								youtube_dl/extractor/byutv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										117
									
								
								youtube_dl/extractor/byutv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,117 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     merge_dicts, | ||||
|     parse_duration, | ||||
|     url_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BYUtvIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?' | ||||
|     _TESTS = [{ | ||||
|         # ooyalaVOD | ||||
|         'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', | ||||
|         'info_dict': { | ||||
|             'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH', | ||||
|             'display_id': 'studio-c-season-5-episode-5', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Season 5 Episode 5', | ||||
|             'description': 'md5:1d31dc18ef4f075b28f6a65937d22c65', | ||||
|             'thumbnail': r're:^https?://.*', | ||||
|             'duration': 1486.486, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['Ooyala'], | ||||
|     }, { | ||||
|         # dvr | ||||
|         'url': 'https://www.byutv.org/player/8f1dab9b-b243-47c8-b525-3e2d021a3451/byu-softball-pacific-vs-byu-41219---game-2', | ||||
|         'info_dict': { | ||||
|             'id': '8f1dab9b-b243-47c8-b525-3e2d021a3451', | ||||
|             'display_id': 'byu-softball-pacific-vs-byu-41219---game-2', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Pacific vs. BYU (4/12/19)', | ||||
|             'description': 'md5:1ac7b57cb9a78015910a4834790ce1f3', | ||||
|             'duration': 11645, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.byutv.org/player/27741493-dc83-40b0-8420-e7ae38a2ae98/byu-football-toledo-vs-byu-93016?listid=4fe0fee5-0d3c-4a29-b725-e4948627f472&listindex=0&q=toledo', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') or video_id | ||||
|  | ||||
|         video = self._download_json( | ||||
|             'https://api.byutv.org/api3/catalog/getvideosforcontent', | ||||
|             display_id, query={ | ||||
|                 'contentid': video_id, | ||||
|                 'channel': 'byutv', | ||||
|                 'x-byutv-context': 'web$US', | ||||
|             }, headers={ | ||||
|                 'x-byutv-context': 'web$US', | ||||
|                 'x-byutv-platformkey': 'xsaaw9c7y5', | ||||
|             }) | ||||
|  | ||||
|         ep = video.get('ooyalaVOD') | ||||
|         if ep: | ||||
|             return { | ||||
|                 '_type': 'url_transparent', | ||||
|                 'ie_key': 'Ooyala', | ||||
|                 'url': 'ooyala:%s' % ep['providerId'], | ||||
|                 'id': video_id, | ||||
|                 'display_id': display_id, | ||||
|                 'title': ep.get('title'), | ||||
|                 'description': ep.get('description'), | ||||
|                 'thumbnail': ep.get('imageThumbnail'), | ||||
|             } | ||||
|  | ||||
|         info = {} | ||||
|         formats = [] | ||||
|         for format_id, ep in video.items(): | ||||
|             if not isinstance(ep, dict): | ||||
|                 continue | ||||
|             video_url = url_or_none(ep.get('videoUrl')) | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             ext = determine_ext(video_url) | ||||
|             if ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     video_url, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|             elif ext == 'mpd': | ||||
|                 formats.extend(self._extract_mpd_formats( | ||||
|                     video_url, video_id, mpd_id='dash', fatal=False)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': video_url, | ||||
|                     'format_id': format_id, | ||||
|                 }) | ||||
|             merge_dicts(info, { | ||||
|                 'title': ep.get('title'), | ||||
|                 'description': ep.get('description'), | ||||
|                 'thumbnail': ep.get('imageThumbnail'), | ||||
|                 'duration': parse_duration(ep.get('length')), | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return merge_dicts(info, { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': display_id, | ||||
|             'formats': formats, | ||||
|         }) | ||||
							
								
								
									
										65
									
								
								youtube_dl/extractor/c56.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								youtube_dl/extractor/c56.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,65 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import js_to_json | ||||
|  | ||||
|  | ||||
| class C56IE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)' | ||||
|     IE_NAME = '56.com' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html', | ||||
|         'md5': 'e59995ac63d0457783ea05f93f12a866', | ||||
|         'info_dict': { | ||||
|             'id': '93440716', | ||||
|             'ext': 'flv', | ||||
|             'title': '网事知多少 第32期:车怒', | ||||
|             'duration': 283.813, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.56.com/u47/v_MTM5NjQ5ODc2.html', | ||||
|         'md5': '', | ||||
|         'info_dict': { | ||||
|             'id': '82247482', | ||||
|             'title': '爱的诅咒之杜鹃花开', | ||||
|         }, | ||||
|         'playlist_count': 7, | ||||
|         'add_ie': ['Sohu'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) | ||||
|         text_id = mobj.group('textid') | ||||
|  | ||||
|         webpage = self._download_webpage(url, text_id) | ||||
|         sohu_video_info_str = self._search_regex( | ||||
|             r'var\s+sohuVideoInfo\s*=\s*({[^}]+});', webpage, 'Sohu video info', default=None) | ||||
|         if sohu_video_info_str: | ||||
|             sohu_video_info = self._parse_json( | ||||
|                 sohu_video_info_str, text_id, transform_source=js_to_json) | ||||
|             return self.url_result(sohu_video_info['url'], 'Sohu') | ||||
|  | ||||
|         page = self._download_json( | ||||
|             'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info') | ||||
|  | ||||
|         info = page['info'] | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'format_id': f['type'], | ||||
|                 'filesize': int(f['filesize']), | ||||
|                 'url': f['url'] | ||||
|             } for f in info['rfiles'] | ||||
|         ] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': info['vid'], | ||||
|             'title': info['Subject'], | ||||
|             'duration': int(info['duration']) / 1000.0, | ||||
|             'formats': formats, | ||||
|             'thumbnail': info.get('bimg') or info.get('img'), | ||||
|         } | ||||
							
								
								
									
										161
									
								
								youtube_dl/extractor/camdemy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										161
									
								
								youtube_dl/extractor/camdemy.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,161 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse_urlencode, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     parse_duration, | ||||
|     str_to_int, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CamdemyIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         # single file | ||||
|         'url': 'http://www.camdemy.com/media/5181/', | ||||
|         'md5': '5a5562b6a98b37873119102e052e311b', | ||||
|         'info_dict': { | ||||
|             'id': '5181', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Ch1-1 Introduction, Signals (02-23-2012)', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'creator': 'ss11spring', | ||||
|             'duration': 1591, | ||||
|             'upload_date': '20130114', | ||||
|             'view_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         # With non-empty description | ||||
|         # webpage returns "No permission or not login" | ||||
|         'url': 'http://www.camdemy.com/media/13885', | ||||
|         'md5': '4576a3bb2581f86c61044822adbd1249', | ||||
|         'info_dict': { | ||||
|             'id': '13885', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'EverCam + Camdemy QuickStart', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'description': 'md5:2a9f989c2b153a2342acee579c6e7db6', | ||||
|             'creator': 'evercam', | ||||
|             'duration': 318, | ||||
|         } | ||||
|     }, { | ||||
|         # External source (YouTube) | ||||
|         'url': 'http://www.camdemy.com/media/14842', | ||||
|         'info_dict': { | ||||
|             'id': '2vsYQzNIsJo', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Excel 2013 Tutorial - How to add Password Protection', | ||||
|             'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection', | ||||
|             'upload_date': '20130211', | ||||
|             'uploader': 'Hun Kim', | ||||
|             'uploader_id': 'hunkimtutorials', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         src_from = self._html_search_regex( | ||||
|             r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1", | ||||
|             webpage, 'external source', default=None, group='url') | ||||
|         if src_from: | ||||
|             return self.url_result(src_from) | ||||
|  | ||||
|         oembed_obj = self._download_json( | ||||
|             'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id) | ||||
|  | ||||
|         title = oembed_obj['title'] | ||||
|         thumb_url = oembed_obj['thumbnail_url'] | ||||
|         video_folder = compat_urlparse.urljoin(thumb_url, 'video/') | ||||
|         file_list_doc = self._download_xml( | ||||
|             compat_urlparse.urljoin(video_folder, 'fileList.xml'), | ||||
|             video_id, 'Downloading filelist XML') | ||||
|         file_name = file_list_doc.find('./video/item/fileName').text | ||||
|         video_url = compat_urlparse.urljoin(video_folder, file_name) | ||||
|  | ||||
|         # Some URLs return "No permission or not login" in a webpage despite being | ||||
|         # freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885) | ||||
|         upload_date = unified_strdate(self._search_regex( | ||||
|             r'>published on ([^<]+)<', webpage, | ||||
|             'upload date', default=None)) | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views', | ||||
|             webpage, 'view count', default=None)) | ||||
|         description = self._html_search_meta( | ||||
|             'description', webpage, default=None) or clean_html( | ||||
|             oembed_obj.get('description')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'thumbnail': thumb_url, | ||||
|             'description': description, | ||||
|             'creator': oembed_obj.get('author_name'), | ||||
|             'duration': parse_duration(oembed_obj.get('duration')), | ||||
|             'upload_date': upload_date, | ||||
|             'view_count': view_count, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class CamdemyFolderIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         # links with trailing slash | ||||
|         'url': 'http://www.camdemy.com/folder/450', | ||||
|         'info_dict': { | ||||
|             'id': '450', | ||||
|             'title': '信號與系統 2012 & 2011 (Signals and Systems)', | ||||
|         }, | ||||
|         'playlist_mincount': 145 | ||||
|     }, { | ||||
|         # links without trailing slash | ||||
|         # and multi-page | ||||
|         'url': 'http://www.camdemy.com/folder/853', | ||||
|         'info_dict': { | ||||
|             'id': '853', | ||||
|             'title': '科學計算 - 使用 Matlab' | ||||
|         }, | ||||
|         'playlist_mincount': 20 | ||||
|     }, { | ||||
|         # with displayMode parameter. For testing the codes to add parameters | ||||
|         'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg', | ||||
|         'info_dict': { | ||||
|             'id': '853', | ||||
|             'title': '科學計算 - 使用 Matlab' | ||||
|         }, | ||||
|         'playlist_mincount': 20 | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         folder_id = self._match_id(url) | ||||
|  | ||||
|         # Add displayMode=list so that all links are displayed in a single page | ||||
|         parsed_url = list(compat_urlparse.urlparse(url)) | ||||
|         query = dict(compat_urlparse.parse_qsl(parsed_url[4])) | ||||
|         query.update({'displayMode': 'list'}) | ||||
|         parsed_url[4] = compat_urllib_parse_urlencode(query) | ||||
|         final_url = compat_urlparse.urlunparse(parsed_url) | ||||
|  | ||||
|         page = self._download_webpage(final_url, folder_id) | ||||
|         matches = re.findall(r"href='(/media/\d+/?)'", page) | ||||
|  | ||||
|         entries = [self.url_result('http://www.camdemy.com' + media_path) | ||||
|                    for media_path in matches] | ||||
|  | ||||
|         folder_title = self._html_search_meta('keywords', page) | ||||
|  | ||||
|         return self.playlist_result(entries, folder_id, folder_title) | ||||
							
								
								
									
										98
									
								
								youtube_dl/extractor/cammodels.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								youtube_dl/extractor/cammodels.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,98 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     url_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CamModelsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.cammodels.com/cam/AutumnKnight/', | ||||
|         'only_matching': True, | ||||
|         'age_limit': 18 | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         user_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             url, user_id, headers=self.geo_verification_headers()) | ||||
|  | ||||
|         manifest_root = self._html_search_regex( | ||||
|             r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None) | ||||
|  | ||||
|         if not manifest_root: | ||||
|             ERRORS = ( | ||||
|                 ("I'm offline, but let's stay connected", 'This user is currently offline'), | ||||
|                 ('in a private show', 'This user is in a private show'), | ||||
|                 ('is currently performing LIVE', 'This model is currently performing live'), | ||||
|             ) | ||||
|             for pattern, message in ERRORS: | ||||
|                 if pattern in webpage: | ||||
|                     error = message | ||||
|                     expected = True | ||||
|                     break | ||||
|             else: | ||||
|                 error = 'Unable to find manifest URL root' | ||||
|                 expected = False | ||||
|             raise ExtractorError(error, expected=expected) | ||||
|  | ||||
|         manifest = self._download_json( | ||||
|             '%s%s.json' % (manifest_root, user_id), user_id) | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id, format_dict in manifest['formats'].items(): | ||||
|             if not isinstance(format_dict, dict): | ||||
|                 continue | ||||
|             encodings = format_dict.get('encodings') | ||||
|             if not isinstance(encodings, list): | ||||
|                 continue | ||||
|             vcodec = format_dict.get('videoCodec') | ||||
|             acodec = format_dict.get('audioCodec') | ||||
|             for media in encodings: | ||||
|                 if not isinstance(media, dict): | ||||
|                     continue | ||||
|                 media_url = url_or_none(media.get('location')) | ||||
|                 if not media_url: | ||||
|                     continue | ||||
|  | ||||
|                 format_id_list = [format_id] | ||||
|                 height = int_or_none(media.get('videoHeight')) | ||||
|                 if height is not None: | ||||
|                     format_id_list.append('%dp' % height) | ||||
|                 f = { | ||||
|                     'url': media_url, | ||||
|                     'format_id': '-'.join(format_id_list), | ||||
|                     'width': int_or_none(media.get('videoWidth')), | ||||
|                     'height': height, | ||||
|                     'vbr': int_or_none(media.get('videoKbps')), | ||||
|                     'abr': int_or_none(media.get('audioKbps')), | ||||
|                     'fps': int_or_none(media.get('fps')), | ||||
|                     'vcodec': vcodec, | ||||
|                     'acodec': acodec, | ||||
|                 } | ||||
|                 if 'rtmp' in format_id: | ||||
|                     f['ext'] = 'flv' | ||||
|                 elif 'hls' in format_id: | ||||
|                     f.update({ | ||||
|                         'ext': 'mp4', | ||||
|                         # hls skips fragments, preferring rtmp | ||||
|                         'preference': -1, | ||||
|                     }) | ||||
|                 else: | ||||
|                     continue | ||||
|                 formats.append(f) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': user_id, | ||||
|             'title': self._live_title(user_id), | ||||
|             'is_live': True, | ||||
|             'formats': formats, | ||||
|             'age_limit': 18 | ||||
|         } | ||||
							
								
								
									
										71
									
								
								youtube_dl/extractor/camtube.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								youtube_dl/extractor/camtube.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,71 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     unified_timestamp, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CamTubeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:(?:www|api)\.)?camtube\.co/recordings?/(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://camtube.co/recording/minafay-030618-1136-chaturbate-female', | ||||
|         'info_dict': { | ||||
|             'id': '42ad3956-dd5b-445a-8313-803ea6079fac', | ||||
|             'display_id': 'minafay-030618-1136-chaturbate-female', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'minafay-030618-1136-chaturbate-female', | ||||
|             'duration': 1274, | ||||
|             'timestamp': 1528018608, | ||||
|             'upload_date': '20180603', | ||||
|             'age_limit': 18 | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     _API_BASE = 'https://api.camtube.co' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         token = self._download_json( | ||||
|             '%s/rpc/session/new' % self._API_BASE, display_id, | ||||
|             'Downloading session token')['token'] | ||||
|  | ||||
|         self._set_cookie('api.camtube.co', 'session', token) | ||||
|  | ||||
|         video = self._download_json( | ||||
|             '%s/recordings/%s' % (self._API_BASE, display_id), display_id, | ||||
|             headers={'Referer': url}) | ||||
|  | ||||
|         video_id = video['uuid'] | ||||
|         timestamp = unified_timestamp(video.get('createdAt')) | ||||
|         duration = int_or_none(video.get('duration')) | ||||
|         view_count = int_or_none(video.get('viewCount')) | ||||
|         like_count = int_or_none(video.get('likeCount')) | ||||
|         creator = video.get('stageName') | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': '%s/recordings/%s/manifest.m3u8' | ||||
|                    % (self._API_BASE, video_id), | ||||
|             'format_id': 'hls', | ||||
|             'ext': 'mp4', | ||||
|             'protocol': 'm3u8_native', | ||||
|         }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': display_id, | ||||
|             'timestamp': timestamp, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'like_count': like_count, | ||||
|             'creator': creator, | ||||
|             'formats': formats, | ||||
|             'age_limit': 18 | ||||
|         } | ||||
							
								
								
									
										89
									
								
								youtube_dl/extractor/camwithher.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										89
									
								
								youtube_dl/extractor/camwithher.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,89 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CamWithHerIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?camwithher\.tv/view_video\.php\?.*\bviewkey=(?P<id>\w+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://camwithher.tv/view_video.php?viewkey=6e9a24e2c0e842e1f177&page=&viewtype=&category=', | ||||
|         'info_dict': { | ||||
|             'id': '5644', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Periscope Tease', | ||||
|             'description': 'In the clouds teasing on periscope to my favorite song', | ||||
|             'duration': 240, | ||||
|             'view_count': int, | ||||
|             'comment_count': int, | ||||
|             'uploader': 'MileenaK', | ||||
|             'upload_date': '20160322', | ||||
|             'age_limit': 18, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://camwithher.tv/view_video.php?viewkey=6dfd8b7c97531a459937', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://camwithher.tv/view_video.php?page=&viewkey=6e9a24e2c0e842e1f177&viewtype=&category=', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://camwithher.tv/view_video.php?viewkey=b6c3b5bea9515d1a1fc4&page=&viewtype=&category=mv', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         flv_id = self._html_search_regex( | ||||
|             r'<a[^>]+href=["\']/download/\?v=(\d+)', webpage, 'video id') | ||||
|  | ||||
|         # Video URL construction algorithm is reverse-engineered from cwhplayer.swf | ||||
|         rtmp_url = 'rtmp://camwithher.tv/clipshare/%s' % ( | ||||
|             ('mp4:%s.mp4' % flv_id) if int(flv_id) > 2010 else flv_id) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<div[^>]+style="float:left"[^>]*>\s*<h2>(.+?)</h2>', webpage, 'title') | ||||
|         description = self._html_search_regex( | ||||
|             r'>Description:</span>(.+?)</div>', webpage, 'description', default=None) | ||||
|  | ||||
|         runtime = self._search_regex( | ||||
|             r'Runtime\s*:\s*(.+?) \|', webpage, 'duration', default=None) | ||||
|         if runtime: | ||||
|             runtime = re.sub(r'[\s-]', '', runtime) | ||||
|         duration = parse_duration(runtime) | ||||
|         view_count = int_or_none(self._search_regex( | ||||
|             r'Views\s*:\s*(\d+)', webpage, 'view count', default=None)) | ||||
|         comment_count = int_or_none(self._search_regex( | ||||
|             r'Comments\s*:\s*(\d+)', webpage, 'comment count', default=None)) | ||||
|  | ||||
|         uploader = self._search_regex( | ||||
|             r'Added by\s*:\s*<a[^>]+>([^<]+)</a>', webpage, 'uploader', default=None) | ||||
|         upload_date = unified_strdate(self._search_regex( | ||||
|             r'Added on\s*:\s*([\d-]+)', webpage, 'upload date', default=None)) | ||||
|  | ||||
|         return { | ||||
|             'id': flv_id, | ||||
|             'url': rtmp_url, | ||||
|             'ext': 'flv', | ||||
|             'no_resume': True, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|             'uploader': uploader, | ||||
|             'upload_date': upload_date, | ||||
|             'age_limit': 18 | ||||
|         } | ||||
							
								
								
									
										73
									
								
								youtube_dl/extractor/canalc2.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								youtube_dl/extractor/canalc2.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,73 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import parse_duration | ||||
|  | ||||
|  | ||||
| class Canalc2IE(InfoExtractor): | ||||
|     IE_NAME = 'canalc2.tv' | ||||
|     _VALID_URL = r'https?://(?:(?:www\.)?canalc2\.tv/video/|archives-canalc2\.u-strasbg\.fr/video\.asp\?.*\bidVideo=)(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.canalc2.tv/video/12163', | ||||
|         'md5': '060158428b650f896c542dfbb3d6487f', | ||||
|         'info_dict': { | ||||
|             'id': '12163', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Terrasses du Numérique', | ||||
|             'duration': 122, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'http://www.canalc2.tv/video/%s' % video_id, video_id) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.+?)</h3>', | ||||
|             webpage, 'title') | ||||
|  | ||||
|         formats = [] | ||||
|         for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage): | ||||
|             if video_url.startswith('rtmp://'): | ||||
|                 rtmp = re.search( | ||||
|                     r'^(?P<url>rtmp://[^/]+/(?P<app>.+/))(?P<play_path>mp4:.+)$', video_url) | ||||
|                 formats.append({ | ||||
|                     'url': rtmp.group('url'), | ||||
|                     'format_id': 'rtmp', | ||||
|                     'ext': 'flv', | ||||
|                     'app': rtmp.group('app'), | ||||
|                     'play_path': rtmp.group('play_path'), | ||||
|                     'page_url': url, | ||||
|                 }) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': video_url, | ||||
|                     'format_id': 'http', | ||||
|                 }) | ||||
|  | ||||
|         if formats: | ||||
|             info = { | ||||
|                 'formats': formats, | ||||
|             } | ||||
|         else: | ||||
|             info = self._parse_html5_media_entries(url, webpage, url)[0] | ||||
|  | ||||
|         self._sort_formats(info['formats']) | ||||
|  | ||||
|         info.update({ | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'duration': parse_duration(self._search_regex( | ||||
|                 r'id=["\']video_duree["\'][^>]*>([^<]+)', | ||||
|                 webpage, 'duration', fatal=False)), | ||||
|         }) | ||||
|         return info | ||||
							
								
								
									
										116
									
								
								youtube_dl/extractor/canalplus.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										116
									
								
								youtube_dl/extractor/canalplus.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,116 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     # ExtractorError, | ||||
|     # HEADRequest, | ||||
|     int_or_none, | ||||
|     qualities, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CanalplusIE(InfoExtractor): | ||||
|     IE_DESC = 'mycanal.fr and piwiplus.fr' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?P<site>mycanal|piwiplus)\.fr/(?:[^/]+/)*(?P<display_id>[^?/]+)(?:\.html\?.*\bvid=|/p/)(?P<id>\d+)' | ||||
|     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json' | ||||
|     _SITE_ID_MAP = { | ||||
|         'mycanal': 'cplus', | ||||
|         'piwiplus': 'teletoon', | ||||
|     } | ||||
|  | ||||
|     # Only works for direct mp4 URLs | ||||
|     _GEO_COUNTRIES = ['FR'] | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://www.mycanal.fr/d17-emissions/lolywood/p/1397061', | ||||
|         'info_dict': { | ||||
|             'id': '1397061', | ||||
|             'display_id': 'lolywood', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Euro 2016 : Je préfère te prévenir - Lolywood - Episode 34', | ||||
|             'description': 'md5:7d97039d455cb29cdba0d652a0efaa5e', | ||||
|             'upload_date': '20160602', | ||||
|         }, | ||||
|     }, { | ||||
|         # geo restricted, bypassed | ||||
|         'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190', | ||||
|         'info_dict': { | ||||
|             'id': '1108190', | ||||
|             'display_id': 'pid1405-le-labyrinthe-boing-super-ranger', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'BOING SUPER RANGER - Ep : Le labyrinthe', | ||||
|             'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff', | ||||
|             'upload_date': '20140724', | ||||
|         }, | ||||
|         'expected_warnings': ['HTTP Error 403: Forbidden'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         site, display_id, video_id = re.match(self._VALID_URL, url).groups() | ||||
|  | ||||
|         site_id = self._SITE_ID_MAP[site] | ||||
|  | ||||
|         info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) | ||||
|         video_data = self._download_json(info_url, video_id, 'Downloading video JSON') | ||||
|  | ||||
|         if isinstance(video_data, list): | ||||
|             video_data = [video for video in video_data if video.get('ID') == video_id][0] | ||||
|         media = video_data['MEDIA'] | ||||
|         infos = video_data['INFOS'] | ||||
|  | ||||
|         preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD']) | ||||
|  | ||||
|         # _, fmt_url = next(iter(media['VIDEOS'].items())) | ||||
|         # if '/geo' in fmt_url.lower(): | ||||
|         #     response = self._request_webpage( | ||||
|         #         HEADRequest(fmt_url), video_id, | ||||
|         #         'Checking if the video is georestricted') | ||||
|         #     if '/blocage' in response.geturl(): | ||||
|         #         raise ExtractorError( | ||||
|         #             'The video is not available in your country', | ||||
|         #             expected=True) | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id, format_url in media['VIDEOS'].items(): | ||||
|             if not format_url: | ||||
|                 continue | ||||
|             if format_id == 'HLS': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     format_url, video_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False)) | ||||
|             elif format_id == 'HDS': | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     format_url + '?hdcore=2.11.3', video_id, f4m_id=format_id, fatal=False)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     # the secret extracted from ya function in http://player.canalplus.fr/common/js/canalPlayer.js | ||||
|                     'url': format_url + '?secret=pqzerjlsmdkjfoiuerhsdlfknaes', | ||||
|                     'format_id': format_id, | ||||
|                     'preference': preference(format_id), | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnails = [{ | ||||
|             'id': image_id, | ||||
|             'url': image_url, | ||||
|         } for image_id, image_url in media.get('images', {}).items()] | ||||
|  | ||||
|         titrage = infos['TITRAGE'] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': '%s - %s' % (titrage['TITRE'], | ||||
|                                   titrage['SOUS_TITRE']), | ||||
|             'upload_date': unified_strdate(infos.get('PUBLICATION', {}).get('DATE')), | ||||
|             'thumbnails': thumbnails, | ||||
|             'description': infos.get('DESCRIPTION'), | ||||
|             'duration': int_or_none(infos.get('DURATION')), | ||||
|             'view_count': int_or_none(infos.get('NB_VUES')), | ||||
|             'like_count': int_or_none(infos.get('NB_LIKES')), | ||||
|             'comment_count': int_or_none(infos.get('NB_COMMENTS')), | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										368
									
								
								youtube_dl/extractor/canvas.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										368
									
								
								youtube_dl/extractor/canvas.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,368 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .gigya import GigyaBaseIE | ||||
| from ..compat import compat_HTTPError | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     strip_or_none, | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     merge_dicts, | ||||
|     parse_iso8601, | ||||
|     str_or_none, | ||||
|     url_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CanvasIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', | ||||
|         'md5': '68993eda72ef62386a15ea2cf3c93107', | ||||
|         'info_dict': { | ||||
|             'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', | ||||
|             'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Nachtwacht: De Greystook', | ||||
|             'description': 'Nachtwacht: De Greystook', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'duration': 1468.04, | ||||
|         }, | ||||
|         'expected_warnings': ['is not a supported codec', 'Unknown MIME type'], | ||||
|     }, { | ||||
|         'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|     _HLS_ENTRY_PROTOCOLS_MAP = { | ||||
|         'HLS': 'm3u8_native', | ||||
|         'HLS_AES': 'm3u8', | ||||
|     } | ||||
|     _REST_API_BASE = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         site_id, video_id = mobj.group('site_id'), mobj.group('id') | ||||
|  | ||||
|         # Old API endpoint, serves more formats but may fail for some videos | ||||
|         data = self._download_json( | ||||
|             'https://mediazone.vrt.be/api/v1/%s/assets/%s' | ||||
|             % (site_id, video_id), video_id, 'Downloading asset JSON', | ||||
|             'Unable to download asset JSON', fatal=False) | ||||
|  | ||||
|         # New API endpoint | ||||
|         if not data: | ||||
|             token = self._download_json( | ||||
|                 '%s/tokens' % self._REST_API_BASE, video_id, | ||||
|                 'Downloading token', data=b'', | ||||
|                 headers={'Content-Type': 'application/json'})['vrtPlayerToken'] | ||||
|             data = self._download_json( | ||||
|                 '%s/videos/%s' % (self._REST_API_BASE, video_id), | ||||
|                 video_id, 'Downloading video JSON', fatal=False, query={ | ||||
|                     'vrtPlayerToken': token, | ||||
|                     'client': '%s@PROD' % site_id, | ||||
|                 }, expected_status=400) | ||||
|             message = data.get('message') | ||||
|             if message and not data.get('title'): | ||||
|                 if data.get('code') == 'AUTHENTICATION_REQUIRED': | ||||
|                     self.raise_login_required(message) | ||||
|                 raise ExtractorError(message, expected=True) | ||||
|  | ||||
|         title = data['title'] | ||||
|         description = data.get('description') | ||||
|  | ||||
|         formats = [] | ||||
|         for target in data['targetUrls']: | ||||
|             format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type')) | ||||
|             if not format_url or not format_type: | ||||
|                 continue | ||||
|             format_type = format_type.upper() | ||||
|             if format_type in self._HLS_ENTRY_PROTOCOLS_MAP: | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type], | ||||
|                     m3u8_id=format_type, fatal=False)) | ||||
|             elif format_type == 'HDS': | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     format_url, video_id, f4m_id=format_type, fatal=False)) | ||||
|             elif format_type == 'MPEG_DASH': | ||||
|                 formats.extend(self._extract_mpd_formats( | ||||
|                     format_url, video_id, mpd_id=format_type, fatal=False)) | ||||
|             elif format_type == 'HSS': | ||||
|                 formats.extend(self._extract_ism_formats( | ||||
|                     format_url, video_id, ism_id='mss', fatal=False)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'format_id': format_type, | ||||
|                     'url': format_url, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         subtitle_urls = data.get('subtitleUrls') | ||||
|         if isinstance(subtitle_urls, list): | ||||
|             for subtitle in subtitle_urls: | ||||
|                 subtitle_url = subtitle.get('url') | ||||
|                 if subtitle_url and subtitle.get('type') == 'CLOSED': | ||||
|                     subtitles.setdefault('nl', []).append({'url': subtitle_url}) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'formats': formats, | ||||
|             'duration': float_or_none(data.get('duration'), 1000), | ||||
|             'thumbnail': data.get('posterImageUrl'), | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class CanvasEenIE(InfoExtractor): | ||||
|     IE_DESC = 'canvas.be and een.be' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week', | ||||
|         'md5': 'ed66976748d12350b118455979cca293', | ||||
|         'info_dict': { | ||||
|             'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', | ||||
|             'display_id': 'de-afspraak-veilt-voor-de-warmste-week', | ||||
|             'ext': 'flv', | ||||
|             'title': 'De afspraak veilt voor de Warmste Week', | ||||
|             'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'duration': 49.02, | ||||
|         }, | ||||
|         'expected_warnings': ['is not a supported codec'], | ||||
|     }, { | ||||
|         # with subtitles | ||||
|         'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167', | ||||
|         'info_dict': { | ||||
|             'id': 'mz-ast-5240ff21-2d30-4101-bba6-92b5ec67c625', | ||||
|             'display_id': 'pieter-0167', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Pieter 0167', | ||||
|             'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'duration': 2553.08, | ||||
|             'subtitles': { | ||||
|                 'nl': [{ | ||||
|                     'ext': 'vtt', | ||||
|                 }], | ||||
|             }, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Pagina niet gevonden', | ||||
|     }, { | ||||
|         'url': 'https://www.een.be/thuis/emma-pakt-thilly-aan', | ||||
|         'info_dict': { | ||||
|             'id': 'md-ast-3a24ced2-64d7-44fb-b4ed-ed1aafbf90b8', | ||||
|             'display_id': 'emma-pakt-thilly-aan', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Emma pakt Thilly aan', | ||||
|             'description': 'md5:c5c9b572388a99b2690030afa3f3bad7', | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'duration': 118.24, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'expected_warnings': ['is not a supported codec'], | ||||
|     }, { | ||||
|         'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         site_id, display_id = mobj.group('site_id'), mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         title = strip_or_none(self._search_regex( | ||||
|             r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>', | ||||
|             webpage, 'title', default=None) or self._og_search_title( | ||||
|             webpage, default=None)) | ||||
|  | ||||
|         video_id = self._html_search_regex( | ||||
|             r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', | ||||
|             group='id') | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id), | ||||
|             'ie_key': CanvasIE.ie_key(), | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': self._og_search_description(webpage), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class VrtNUIE(GigyaBaseIE): | ||||
|     IE_DESC = 'VrtNU.be' | ||||
|     _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         # Available via old API endpoint | ||||
|         'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/', | ||||
|         'info_dict': { | ||||
|             'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'De zwarte weduwe', | ||||
|             'description': 'md5:db1227b0f318c849ba5eab1fef895ee4', | ||||
|             'duration': 1457.04, | ||||
|             'thumbnail': r're:^https?://.*\.jpg$', | ||||
|             'season': 'Season 1', | ||||
|             'season_number': 1, | ||||
|             'episode_number': 1, | ||||
|         }, | ||||
|         'skip': 'This video is only available for registered users', | ||||
|         'params': { | ||||
|             'username': '<snip>', | ||||
|             'password': '<snip>', | ||||
|         }, | ||||
|         'expected_warnings': ['is not a supported codec'], | ||||
|     }, { | ||||
|         # Only available via new API endpoint | ||||
|         'url': 'https://www.vrt.be/vrtnu/a-z/kamp-waes/1/kamp-waes-s1a5/', | ||||
|         'info_dict': { | ||||
|             'id': 'pbs-pub-0763b56c-64fb-4d38-b95b-af60bf433c71$vid-ad36a73c-4735-4f1f-b2c0-a38e6e6aa7e1', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Aflevering 5', | ||||
|             'description': 'Wie valt door de mand tijdens een missie?', | ||||
|             'duration': 2967.06, | ||||
|             'season': 'Season 1', | ||||
|             'season_number': 1, | ||||
|             'episode_number': 5, | ||||
|         }, | ||||
|         'skip': 'This video is only available for registered users', | ||||
|         'params': { | ||||
|             'username': '<snip>', | ||||
|             'password': '<snip>', | ||||
|         }, | ||||
|         'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'], | ||||
|     }] | ||||
|     _NETRC_MACHINE = 'vrtnu' | ||||
|     _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy' | ||||
|     _CONTEXT_ID = 'R3595707040' | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _login(self): | ||||
|         username, password = self._get_login_info() | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         auth_data = { | ||||
|             'APIKey': self._APIKEY, | ||||
|             'targetEnv': 'jssdk', | ||||
|             'loginID': username, | ||||
|             'password': password, | ||||
|             'authMode': 'cookie', | ||||
|         } | ||||
|  | ||||
|         auth_info = self._gigya_login(auth_data) | ||||
|  | ||||
|         # Sometimes authentication fails for no good reason, retry | ||||
|         login_attempt = 1 | ||||
|         while login_attempt <= 3: | ||||
|             try: | ||||
|                 # When requesting a token, no actual token is returned, but the | ||||
|                 # necessary cookies are set. | ||||
|                 self._request_webpage( | ||||
|                     'https://token.vrt.be', | ||||
|                     None, note='Requesting a token', errnote='Could not get a token', | ||||
|                     headers={ | ||||
|                         'Content-Type': 'application/json', | ||||
|                         'Referer': 'https://www.vrt.be/vrtnu/', | ||||
|                     }, | ||||
|                     data=json.dumps({ | ||||
|                         'uid': auth_info['UID'], | ||||
|                         'uidsig': auth_info['UIDSignature'], | ||||
|                         'ts': auth_info['signatureTimestamp'], | ||||
|                         'email': auth_info['profile']['email'], | ||||
|                     }).encode('utf-8')) | ||||
|             except ExtractorError as e: | ||||
|                 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: | ||||
|                     login_attempt += 1 | ||||
|                     self.report_warning('Authentication failed') | ||||
|                     self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again') | ||||
|                 else: | ||||
|                     raise e | ||||
|             else: | ||||
|                 break | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage, urlh = self._download_webpage_handle(url, display_id) | ||||
|  | ||||
|         info = self._search_json_ld(webpage, display_id, default={}) | ||||
|  | ||||
|         # title is optional here since it may be extracted by extractor | ||||
|         # that is delegated from here | ||||
|         title = strip_or_none(self._html_search_regex( | ||||
|             r'(?ms)<h1 class="content__heading">(.+?)</h1>', | ||||
|             webpage, 'title', default=None)) | ||||
|  | ||||
|         description = self._html_search_regex( | ||||
|             r'(?ms)<div class="content__description">(.+?)</div>', | ||||
|             webpage, 'description', default=None) | ||||
|  | ||||
|         season = self._html_search_regex( | ||||
|             [r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s* | ||||
|                     <span>seizoen\ (.+?)</span>\s* | ||||
|                 </div>''', | ||||
|              r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'], | ||||
|             webpage, 'season', default=None) | ||||
|  | ||||
|         season_number = int_or_none(season) | ||||
|  | ||||
|         episode_number = int_or_none(self._html_search_regex( | ||||
|             r'''(?xms)<div\ class="content__episode">\s* | ||||
|                     <abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span> | ||||
|                 </div>''', | ||||
|             webpage, 'episode_number', default=None)) | ||||
|  | ||||
|         release_date = parse_iso8601(self._html_search_regex( | ||||
|             r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"', | ||||
|             webpage, 'release_date', default=None)) | ||||
|  | ||||
|         # If there's a ? or a # in the URL, remove them and everything after | ||||
|         clean_url = urlh.geturl().split('?')[0].split('#')[0].strip('/') | ||||
|         securevideo_url = clean_url + '.mssecurevideo.json' | ||||
|  | ||||
|         try: | ||||
|             video = self._download_json(securevideo_url, display_id) | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: | ||||
|                 self.raise_login_required() | ||||
|             raise | ||||
|  | ||||
|         # We are dealing with a '../<show>.relevant' URL | ||||
|         redirect_url = video.get('url') | ||||
|         if redirect_url: | ||||
|             return self.url_result(self._proto_relative_url(redirect_url, 'https:')) | ||||
|  | ||||
|         # There is only one entry, but with an unknown key, so just get | ||||
|         # the first one | ||||
|         video_id = list(video.values())[0].get('videoid') | ||||
|  | ||||
|         return merge_dicts(info, { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id, | ||||
|             'ie_key': CanvasIE.ie_key(), | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'season': season, | ||||
|             'season_number': season_number, | ||||
|             'episode_number': episode_number, | ||||
|             'release_date': release_date, | ||||
|         }) | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user
	 Unknown
					Unknown