1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-12-30 19:41:27 +00:00

Merge branch 'yt-dlp:master' into threads

This commit is contained in:
Renan D.
2024-07-25 03:55:32 -03:00
committed by GitHub
946 changed files with 11894 additions and 9382 deletions

View File

@@ -4,6 +4,7 @@ import copy
import datetime as dt
import errno
import fileinput
import functools
import http.cookiejar
import io
import itertools
@@ -24,7 +25,7 @@ import traceback
import unicodedata
from .cache import Cache
from .compat import functools, urllib # isort: split
from .compat import urllib # isort: split
from .compat import compat_os_name, urllib_req_to_req
from .cookies import LenientSimpleCookie, load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
@@ -109,7 +110,6 @@ from .utils import (
determine_protocol,
encode_compat_str,
encodeFilename,
error_to_compat_str,
escapeHTML,
expand_path,
extract_basic_auth,
@@ -159,7 +159,7 @@ from .utils import (
write_json_file,
write_string,
)
from .utils._utils import _YDLLogger
from .utils._utils import _UnsafeExtensionError, _YDLLogger
from .utils.networking import (
HTTPHeaderDict,
clean_headers,
@@ -172,6 +172,20 @@ if compat_os_name == 'nt':
import ctypes
def _catch_unsafe_extension_error(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
try:
return func(self, *args, **kwargs)
except _UnsafeExtensionError as error:
self.report_error(
f'The extracted extension ({error.extension!r}) is unusual '
'and will be skipped for safety reasons. '
f'If you believe this is an error{bug_reports_message(",")}')
return wrapper
class YoutubeDL:
"""YoutubeDL class.
@@ -438,7 +452,8 @@ class YoutubeDL:
Can also just be a single color policy,
in which case it applies to all outputs.
Valid stream names are 'stdout' and 'stderr'.
Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
Valid color policies are one of 'always', 'auto',
'no_color', 'never', 'auto-tty' or 'no_color-tty'.
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
HTTP header
geo_bypass_country:
@@ -454,8 +469,9 @@ class YoutubeDL:
Set the value to 'native' to use the native downloader
compat_opts: Compatibility options. See "Differences in default behavior".
The following options do not work when used through the API:
filename, abort-on-error, multistreams, no-live-chat, format-sort
no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
filename, abort-on-error, multistreams, no-live-chat,
format-sort, no-clean-infojson, no-playlist-metafiles,
no-keep-subs, no-attach-info-json, allow-unsafe-ext.
Refer __init__.py for their implementation
progress_template: Dictionary of templates for progress outputs.
Allowed keys are 'download', 'postprocess',
@@ -582,8 +598,9 @@ class YoutubeDL:
'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data',
'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'extra_param_to_key_url',
'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version',
'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
}
_deprecated_multivalue_fields = {
'album_artist': 'album_artists',
@@ -594,7 +611,7 @@ class YoutubeDL:
}
_format_selection_exts = {
'audio': set(MEDIA_EXTENSIONS.common_audio),
'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
'video': {*MEDIA_EXTENSIONS.common_video, '3gp'},
'storyboards': set(MEDIA_EXTENSIONS.storyboards),
}
@@ -628,7 +645,7 @@ class YoutubeDL:
error=sys.stderr,
screen=sys.stderr if self.params.get('quiet') else stdout,
console=None if compat_os_name == 'nt' else next(
filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None),
)
try:
@@ -643,12 +660,15 @@ class YoutubeDL:
self.params['color'] = 'no_color'
term_allow_color = os.getenv('TERM', '').lower() != 'dumb'
no_color = bool(os.getenv('NO_COLOR'))
base_no_color = bool(os.getenv('NO_COLOR'))
def process_color_policy(stream):
stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
if policy in ('auto', None):
policy = traverse_obj(self.params, ('color', (stream_name, None), {str}, any)) or 'auto'
if policy in ('auto', 'auto-tty', 'no_color-tty'):
no_color = base_no_color
if policy.endswith('tty'):
no_color = policy.startswith('no_color')
if term_allow_color and supports_terminal_sequences(stream):
return 'no_color' if no_color else True
return False
@@ -679,9 +699,9 @@ class YoutubeDL:
width_args = [] if width is None else ['-w', str(width)]
sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
try:
self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
self._output_process = Popen(['bidiv', *width_args], **sp_kwargs)
except OSError:
self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
self._output_process = Popen(['fribidi', '-c', 'UTF-8', *width_args], **sp_kwargs)
self._output_channel = os.fdopen(master, 'rb')
except OSError as ose:
if ose.errno == errno.ENOENT:
@@ -822,8 +842,7 @@ class YoutubeDL:
)
self.report_warning(
'Long argument string detected. '
'Use -- to separate parameters and URLs, like this:\n%s' %
shell_quote(correct_argv))
f'Use -- to separate parameters and URLs, like this:\n{shell_quote(correct_argv)}')
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
@@ -922,7 +941,7 @@ class YoutubeDL:
if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
return
self._write_string(
'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
'{}{}'.format(self._bidi_workaround(message), ('' if skip_eol else '\n')),
self._out_files.screen, only_once=only_once)
def to_stderr(self, message, only_once=False):
@@ -1045,10 +1064,10 @@ class YoutubeDL:
return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
def report_warning(self, message, only_once=False):
'''
"""
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
'''
"""
if self.params.get('logger') is not None:
self.params['logger'].warning(message)
else:
@@ -1066,14 +1085,14 @@ class YoutubeDL:
self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
def report_error(self, message, *args, **kwargs):
'''
"""
Do the same as trouble, but prefixes the message with 'ERROR:', colored
in red if stderr is a tty file.
'''
"""
self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
def write_debug(self, message, only_once=False):
'''Log debug message or Print message to stderr'''
"""Log debug message or Print message to stderr"""
if not self.params.get('verbose', False):
return
message = f'[debug] {message}'
@@ -1085,14 +1104,14 @@ class YoutubeDL:
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
try:
self.to_screen('[download] %s has already been downloaded' % file_name)
self.to_screen(f'[download] {file_name} has already been downloaded')
except UnicodeEncodeError:
self.to_screen('[download] The file has already been downloaded')
def report_file_delete(self, file_name):
"""Report that existing file will be deleted."""
try:
self.to_screen('Deleting existing file %s' % file_name)
self.to_screen(f'Deleting existing file {file_name}')
except UnicodeEncodeError:
self.to_screen('Deleting existing file')
@@ -1147,7 +1166,7 @@ class YoutubeDL:
@staticmethod
def escape_outtmpl(outtmpl):
''' Escape any remaining strings like %s, %abc% etc. '''
""" Escape any remaining strings like %s, %abc% etc. """
return re.sub(
STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
@@ -1155,7 +1174,7 @@ class YoutubeDL:
@classmethod
def validate_outtmpl(cls, outtmpl):
''' @return None or Exception object '''
""" @return None or Exception object """
outtmpl = re.sub(
STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
lambda mobj: f'{mobj.group(0)[:-1]}s',
@@ -1208,13 +1227,13 @@ class YoutubeDL:
}
# Field is of the form key1.key2...
# where keys (except first) can be string, int, slice or "{field, ...}"
FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} # noqa: UP031
FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { # noqa: UP031
'inner': FIELD_INNER_RE,
'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
'field': rf'\w*(?:\.{FIELD_INNER_RE})*',
}
MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
MATH_OPERATORS_RE = r'(?:{})'.format('|'.join(map(re.escape, MATH_FUNCTIONS.keys())))
INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
(?P<negate>-)?
(?P<fields>{FIELD_RE})
@@ -1337,7 +1356,7 @@ class YoutubeDL:
value, default = None, na
fmt = outer_mobj.group('format')
if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
if fmt == 's' and last_field in field_size_compat_map and isinstance(value, int):
fmt = f'0{field_size_compat_map[last_field]:d}d'
flags = outer_mobj.group('conversion') or ''
@@ -1362,7 +1381,7 @@ class YoutubeDL:
elif fmt[-1] == 'U': # unicode normalized
value, fmt = unicodedata.normalize(
# "+" = compatibility equivalence, "#" = NFD
'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
'NF{}{}'.format('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
value), str_fmt
elif fmt[-1] == 'D': # decimal suffix
num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
@@ -1390,7 +1409,7 @@ class YoutubeDL:
if fmt[-1] in 'csra':
value = sanitizer(last_field, value)
key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format'))
TMPL_DICT[key] = value
return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
@@ -1400,6 +1419,7 @@ class YoutubeDL:
outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
return self.escape_outtmpl(outtmpl) % info_dict
@_catch_unsafe_extension_error
def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
if outtmpl is None:
@@ -1479,9 +1499,9 @@ class YoutubeDL:
date = info_dict.get('upload_date')
if date is not None:
dateRange = self.params.get('daterange', DateRange())
if date not in dateRange:
return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
date_range = self.params.get('daterange', DateRange())
if date not in date_range:
return f'{date_from_str(date).isoformat()} upload date is not in range {date_range}'
view_count = info_dict.get('view_count')
if view_count is not None:
min_views = self.params.get('min_views')
@@ -1491,7 +1511,7 @@ class YoutubeDL:
if max_views is not None and view_count > max_views:
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
return 'Skipping "%s" because it is age restricted' % video_title
return f'Skipping "{video_title}" because it is age restricted'
match_filter = self.params.get('match_filter')
if match_filter is None:
@@ -1544,7 +1564,7 @@ class YoutubeDL:
@staticmethod
def add_extra_info(info_dict, extra_info):
'''Set the keys from extra_info in info dict if they are missing'''
"""Set the keys from extra_info in info dict if they are missing"""
for key, value in extra_info.items():
info_dict.setdefault(key, value)
@@ -1590,7 +1610,7 @@ class YoutubeDL:
self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
'has already been recorded in the archive')
if self.params.get('break_on_existing', False):
raise ExistingVideoReached()
raise ExistingVideoReached
break
return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
else:
@@ -1616,8 +1636,8 @@ class YoutubeDL:
except GeoRestrictedError as e:
msg = e.msg
if e.countries:
msg += '\nThis video is available in %s.' % ', '.join(
map(ISO3166Utils.short2full, e.countries))
msg += '\nThis video is available in {}.'.format(', '.join(
map(ISO3166Utils.short2full, e.countries)))
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
self.report_error(msg)
except ExtractorError as e: # An error we somewhat expected
@@ -1826,8 +1846,8 @@ class YoutubeDL:
if isinstance(additional_urls, str):
additional_urls = [additional_urls]
self.to_screen(
'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
'[info] {}: {} additional URL(s) requested'.format(ie_result['id'], len(additional_urls)))
self.write_debug('Additional URLs: "{}"'.format('", "'.join(additional_urls)))
ie_result['additional_entries'] = [
self.extract_info(
url, download, extra_info=extra_info,
@@ -1879,8 +1899,8 @@ class YoutubeDL:
webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
if webpage_url and webpage_url in self._playlist_urls:
self.to_screen(
'[download] Skipping already downloaded playlist: %s'
% ie_result.get('title') or ie_result.get('id'))
'[download] Skipping already downloaded playlist: {}'.format(
ie_result.get('title')) or ie_result.get('id'))
return
self._playlist_level += 1
@@ -1895,8 +1915,8 @@ class YoutubeDL:
self._playlist_urls.clear()
elif result_type == 'compat_list':
self.report_warning(
'Extractor %s returned a compat_list result. '
'It needs to be updated.' % ie_result.get('extractor'))
'Extractor {} returned a compat_list result. '
'It needs to be updated.'.format(ie_result.get('extractor')))
def _fixup(r):
self.add_extra_info(r, {
@@ -1913,7 +1933,7 @@ class YoutubeDL:
]
return ie_result
else:
raise Exception('Invalid result type: %s' % result_type)
raise Exception(f'Invalid result type: {result_type}')
def _ensure_dir_exists(self, path):
return make_dir(path, self.report_error)
@@ -1927,6 +1947,8 @@ class YoutubeDL:
'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'),
'playlist_uploader_id': ie_result.get('uploader_id'),
'playlist_channel': ie_result.get('channel'),
'playlist_channel_id': ie_result.get('channel_id'),
**kwargs,
}
if strict:
@@ -2029,8 +2051,9 @@ class YoutubeDL:
resolved_entries[i] = (playlist_index, NO_DEFAULT)
continue
self.to_screen('[download] Downloading item %s of %s' % (
self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
self.to_screen(
f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} '
f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}')
entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
'playlist_index': playlist_index,
@@ -2080,9 +2103,9 @@ class YoutubeDL:
}
operator_rex = re.compile(r'''(?x)\s*
(?P<key>[\w.-]+)\s*
(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?P<op>{})(?P<none_inclusive>\s*\?)?\s*
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
''' % '|'.join(map(re.escape, OPERATORS.keys())))
'''.format('|'.join(map(re.escape, OPERATORS.keys()))))
m = operator_rex.fullmatch(filter_spec)
if m:
try:
@@ -2093,7 +2116,7 @@ class YoutubeDL:
comparison_value = parse_filesize(m.group('value') + 'B')
if comparison_value is None:
raise ValueError(
'Invalid value %r in format specification %r' % (
'Invalid value {!r} in format specification {!r}'.format(
m.group('value'), filter_spec))
op = OPERATORS[m.group('op')]
@@ -2103,15 +2126,15 @@ class YoutubeDL:
'^=': lambda attr, value: attr.startswith(value),
'$=': lambda attr, value: attr.endswith(value),
'*=': lambda attr, value: value in attr,
'~=': lambda attr, value: value.search(attr) is not None
'~=': lambda attr, value: value.search(attr) is not None,
}
str_operator_rex = re.compile(r'''(?x)\s*
(?P<key>[a-zA-Z0-9._-]+)\s*
(?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
(?P<negation>!\s*)?(?P<op>{})\s*(?P<none_inclusive>\?\s*)?
(?P<quote>["'])?
(?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
(?(quote)(?P=quote))\s*
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
'''.format('|'.join(map(re.escape, STR_OPERATORS.keys()))))
m = str_operator_rex.fullmatch(filter_spec)
if m:
if m.group('op') == '~=':
@@ -2125,7 +2148,7 @@ class YoutubeDL:
op = str_op
if not m:
raise SyntaxError('Invalid filter specification %r' % filter_spec)
raise SyntaxError(f'Invalid filter specification {filter_spec!r}')
def _filter(f):
actual_value = f.get(m.group('key'))
@@ -2141,7 +2164,7 @@ class YoutubeDL:
if working:
yield f
continue
self.to_screen('[info] Testing format %s' % f['format_id'])
self.to_screen('[info] Testing format {}'.format(f['format_id']))
path = self.get_output_path('temp')
if not self._ensure_dir_exists(f'{path}/'):
continue
@@ -2149,19 +2172,19 @@ class YoutubeDL:
temp_file.close()
try:
success, _ = self.dl(temp_file.name, f, test=True)
except (DownloadError, OSError, ValueError) + network_exceptions:
except (DownloadError, OSError, ValueError, *network_exceptions):
success = False
finally:
if os.path.exists(temp_file.name):
try:
os.remove(temp_file.name)
except OSError:
self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
self.report_warning(f'Unable to delete temporary file "{temp_file.name}"')
f['__working'] = success
if success:
yield f
else:
self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id']))
def _select_formats(self, formats, selector):
return list(selector({
@@ -2171,9 +2194,8 @@ class YoutubeDL:
or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
}))
def _default_format_spec(self, info_dict, download=True):
download = download and not self.params.get('simulate')
prefer_best = download and (
def _default_format_spec(self, info_dict):
prefer_best = (
self.params['outtmpl']['default'] == '-'
or info_dict.get('is_live') and not self.params.get('live_from_start'))
@@ -2181,7 +2203,7 @@ class YoutubeDL:
merger = FFmpegMergerPP(self)
return merger.available and merger.can_merge()
if not prefer_best and download and not can_merge():
if not prefer_best and not can_merge():
prefer_best = True
formats = self._get_formats(info_dict)
evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
@@ -2214,8 +2236,8 @@ class YoutubeDL:
def _parse_filter(tokens):
filter_parts = []
for type, string_, start, _, _ in tokens:
if type == tokenize.OP and string_ == ']':
for type_, string_, _start, _, _ in tokens:
if type_ == tokenize.OP and string_ == ']':
return ''.join(filter_parts)
else:
filter_parts.append(string_)
@@ -2225,23 +2247,23 @@ class YoutubeDL:
# E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
ALLOWED_OPS = ('/', '+', ',', '(', ')')
last_string, last_start, last_end, last_line = None, None, None, None
for type, string_, start, end, line in tokens:
if type == tokenize.OP and string_ == '[':
for type_, string_, start, end, line in tokens:
if type_ == tokenize.OP and string_ == '[':
if last_string:
yield tokenize.NAME, last_string, last_start, last_end, last_line
last_string = None
yield type, string_, start, end, line
yield type_, string_, start, end, line
# everything inside brackets will be handled by _parse_filter
for type, string_, start, end, line in tokens:
yield type, string_, start, end, line
if type == tokenize.OP and string_ == ']':
for type_, string_, start, end, line in tokens:
yield type_, string_, start, end, line
if type_ == tokenize.OP and string_ == ']':
break
elif type == tokenize.OP and string_ in ALLOWED_OPS:
elif type_ == tokenize.OP and string_ in ALLOWED_OPS:
if last_string:
yield tokenize.NAME, last_string, last_start, last_end, last_line
last_string = None
yield type, string_, start, end, line
elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
yield type_, string_, start, end, line
elif type_ in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
if not last_string:
last_string = string_
last_start = start
@@ -2254,13 +2276,13 @@ class YoutubeDL:
def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
selectors = []
current_selector = None
for type, string_, start, _, _ in tokens:
for type_, string_, start, _, _ in tokens:
# ENCODING is only defined in Python 3.x
if type == getattr(tokenize, 'ENCODING', None):
if type_ == getattr(tokenize, 'ENCODING', None):
continue
elif type in [tokenize.NAME, tokenize.NUMBER]:
elif type_ in [tokenize.NAME, tokenize.NUMBER]:
current_selector = FormatSelector(SINGLE, string_, [])
elif type == tokenize.OP:
elif type_ == tokenize.OP:
if string_ == ')':
if not inside_group:
# ')' will be handled by the parentheses group
@@ -2303,7 +2325,7 @@ class YoutubeDL:
current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
else:
raise syntax_error(f'Operator not recognized: "{string_}"', start)
elif type == tokenize.ENDMARKER:
elif type_ == tokenize.ENDMARKER:
break
if current_selector:
selectors.append(current_selector)
@@ -2378,7 +2400,7 @@ class YoutubeDL:
'acodec': the_only_audio.get('acodec'),
'abr': the_only_audio.get('abr'),
'asr': the_only_audio.get('asr'),
'audio_channels': the_only_audio.get('audio_channels')
'audio_channels': the_only_audio.get('audio_channels'),
})
return new_dict
@@ -2459,9 +2481,9 @@ class YoutubeDL:
format_fallback = not format_type and not format_modified # for b, w
_filter_f = (
(lambda f: f.get('%scodec' % format_type) != 'none')
(lambda f: f.get(f'{format_type}codec') != 'none')
if format_type and format_modified # bv*, ba*, wv*, wa*
else (lambda f: f.get('%scodec' % not_format_type) == 'none')
else (lambda f: f.get(f'{not_format_type}codec') == 'none')
if format_type # bv, ba, wv, wa
else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
if not format_modified # b, w
@@ -2529,7 +2551,7 @@ class YoutubeDL:
def __next__(self):
if self.counter >= len(self.tokens):
raise StopIteration()
raise StopIteration
value = self.tokens[self.counter]
self.counter += 1
return value
@@ -2612,7 +2634,7 @@ class YoutubeDL:
self._sort_thumbnails(thumbnails)
for i, t in enumerate(thumbnails):
if t.get('id') is None:
t['id'] = '%d' % i
t['id'] = str(i)
if t.get('width') and t.get('height'):
t['resolution'] = '%dx%d' % (t['width'], t['height'])
t['url'] = sanitize_url(t['url'])
@@ -2673,8 +2695,8 @@ class YoutubeDL:
# Auto generate title fields corresponding to the *_number fields when missing
# in order to always have clean titles. This is very common for TV series.
for field in ('chapter', 'season', 'episode'):
if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
if final and info_dict.get(f'{field}_number') is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict[f'{field}_number'])
for old_key, new_key in self._deprecated_multivalue_fields.items():
if new_key in info_dict and old_key in info_dict:
@@ -2706,8 +2728,8 @@ class YoutubeDL:
def report_force_conversion(field, field_not, conversion):
self.report_warning(
'"%s" field is not %s - forcing %s conversion, there is an error in extractor'
% (field, field_not, conversion))
f'"{field}" field is not {field_not} - forcing {conversion} conversion, '
'there is an error in extractor')
def sanitize_string_field(info, string_field):
field = info.get(string_field)
@@ -2824,28 +2846,28 @@ class YoutubeDL:
if not formats:
self.raise_no_formats(info_dict)
for format in formats:
sanitize_string_field(format, 'format_id')
sanitize_numeric_fields(format)
format['url'] = sanitize_url(format['url'])
if format.get('ext') is None:
format['ext'] = determine_ext(format['url']).lower()
if format['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
if format.get('acodec') is None:
format['acodec'] = format['ext']
if format.get('protocol') is None:
format['protocol'] = determine_protocol(format)
if format.get('resolution') is None:
format['resolution'] = self.format_resolution(format, default=None)
if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
format['dynamic_range'] = 'SDR'
if format.get('aspect_ratio') is None:
format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
for fmt in formats:
sanitize_string_field(fmt, 'format_id')
sanitize_numeric_fields(fmt)
fmt['url'] = sanitize_url(fmt['url'])
if fmt.get('ext') is None:
fmt['ext'] = determine_ext(fmt['url']).lower()
if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
if fmt.get('acodec') is None:
fmt['acodec'] = fmt['ext']
if fmt.get('protocol') is None:
fmt['protocol'] = determine_protocol(fmt)
if fmt.get('resolution') is None:
fmt['resolution'] = self.format_resolution(fmt, default=None)
if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none':
fmt['dynamic_range'] = 'SDR'
if fmt.get('aspect_ratio') is None:
fmt['aspect_ratio'] = try_call(lambda: round(fmt['width'] / fmt['height'], 2))
# For fragmented formats, "tbr" is often max bitrate and not average
if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url'))
and not format.get('filesize') and not format.get('filesize_approx')):
format['filesize_approx'] = filesize_from_tbr(format.get('tbr'), info_dict.get('duration'))
format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
if (('manifest-filesize-approx' in self.params['compat_opts'] or not fmt.get('manifest_url'))
and not fmt.get('filesize') and not fmt.get('filesize_approx')):
fmt['filesize_approx'] = filesize_from_tbr(fmt.get('tbr'), info_dict.get('duration'))
fmt['http_headers'] = self._calc_headers(collections.ChainMap(fmt, info_dict), load_cookies=True)
# Safeguard against old/insecure infojson when using --load-info-json
if info_dict.get('http_headers'):
@@ -2858,36 +2880,36 @@ class YoutubeDL:
self.sort_formats({
'formats': formats,
'_format_sort_fields': info_dict.get('_format_sort_fields')
'_format_sort_fields': info_dict.get('_format_sort_fields'),
})
# Sanitize and group by format_id
formats_dict = {}
for i, format in enumerate(formats):
if not format.get('format_id'):
format['format_id'] = str(i)
for i, fmt in enumerate(formats):
if not fmt.get('format_id'):
fmt['format_id'] = str(i)
else:
# Sanitize format_id from characters used in format selector expression
format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
formats_dict.setdefault(format['format_id'], []).append(format)
fmt['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', fmt['format_id'])
formats_dict.setdefault(fmt['format_id'], []).append(fmt)
# Make sure all formats have unique format_id
common_exts = set(itertools.chain(*self._format_selection_exts.values()))
for format_id, ambiguous_formats in formats_dict.items():
ambigious_id = len(ambiguous_formats) > 1
for i, format in enumerate(ambiguous_formats):
for i, fmt in enumerate(ambiguous_formats):
if ambigious_id:
format['format_id'] = '%s-%d' % (format_id, i)
fmt['format_id'] = f'{format_id}-{i}'
# Ensure there is no conflict between id and ext in format selection
# See https://github.com/yt-dlp/yt-dlp/issues/1282
if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
format['format_id'] = 'f%s' % format['format_id']
if fmt['format_id'] != fmt['ext'] and fmt['format_id'] in common_exts:
fmt['format_id'] = 'f{}'.format(fmt['format_id'])
if format.get('format') is None:
format['format'] = '{id} - {res}{note}'.format(
id=format['format_id'],
res=self.format_resolution(format),
note=format_field(format, 'format_note', ' (%s)'),
if fmt.get('format') is None:
fmt['format'] = '{id} - {res}{note}'.format(
id=fmt['format_id'],
res=self.format_resolution(fmt),
note=format_field(fmt, 'format_note', ' (%s)'),
)
if self.params.get('check_formats') is True:
@@ -2940,7 +2962,7 @@ class YoutubeDL:
continue
if format_selector is None:
req_format = self._default_format_spec(info_dict, download=download)
req_format = self._default_format_spec(info_dict)
self.write_debug(f'Default format spec: {req_format}')
format_selector = self.build_format_selector(req_format)
@@ -3009,7 +3031,7 @@ class YoutubeDL:
info_dict['requested_downloads'] = downloaded_formats
info_dict = self.run_all_pps('after_video', info_dict)
if max_downloads_reached:
raise MaxDownloadsReached()
raise MaxDownloadsReached
# We update the info dict with the selected best quality format (backwards compatibility)
info_dict.update(best_format)
@@ -3070,8 +3092,8 @@ class YoutubeDL:
else:
f = formats[-1]
self.report_warning(
'No subtitle format found matching "%s" for language %s, '
'using %s. Use --list-subs for a list of available subtitles' % (formats_query, lang, f['ext']))
'No subtitle format found matching "{}" for language {}, '
'using {}. Use --list-subs for a list of available subtitles'.format(formats_query, lang, f['ext']))
subs[lang] = f
return subs
@@ -3150,11 +3172,12 @@ class YoutubeDL:
if test:
verbose = self.params.get('verbose')
quiet = self.params.get('quiet') or not verbose
params = {
'test': True,
'quiet': self.params.get('quiet') or not verbose,
'quiet': quiet,
'verbose': verbose,
'noprogress': not verbose,
'noprogress': quiet,
'nopart': True,
'skip_unavailable_fragments': False,
'keep_fragments': False,
@@ -3189,6 +3212,7 @@ class YoutubeDL:
os.remove(file)
return None
@_catch_unsafe_extension_error
def process_info(self, info_dict):
"""Process a single resolved IE result. (Modifies it in-place)"""
@@ -3226,7 +3250,7 @@ class YoutubeDL:
def check_max_downloads():
if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
raise MaxDownloadsReached()
raise MaxDownloadsReached
if self.params.get('simulate'):
info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
@@ -3400,7 +3424,7 @@ class YoutubeDL:
for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
f['filepath'] = fname = prepend_extension(
correct_ext(temp_filename, info_dict['ext']),
'f%s' % f['format_id'], info_dict['ext'])
'f{}'.format(f['format_id']), info_dict['ext'])
downloaded.append(fname)
info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
success, real_download = self.dl(temp_filename, info_dict)
@@ -3433,7 +3457,7 @@ class YoutubeDL:
if temp_filename != '-':
fname = prepend_extension(
correct_ext(temp_filename, new_info['ext']),
'f%s' % f['format_id'], new_info['ext'])
'f{}'.format(f['format_id']), new_info['ext'])
if not self._ensure_dir_exists(fname):
return
f['filepath'] = fname
@@ -3465,11 +3489,11 @@ class YoutubeDL:
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
except network_exceptions as err:
self.report_error('unable to download video data: %s' % error_to_compat_str(err))
self.report_error(f'unable to download video data: {err}')
return
except OSError as err:
raise UnavailableVideoError(err)
except (ContentTooShortError, ) as err:
except ContentTooShortError as err:
self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
return
@@ -3536,13 +3560,13 @@ class YoutubeDL:
try:
replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
except PostProcessingError as err:
self.report_error('Postprocessing: %s' % str(err))
self.report_error(f'Postprocessing: {err}')
return
try:
for ph in self._post_hooks:
ph(info_dict['filepath'])
except Exception as err:
self.report_error('post hooks: %s' % str(err))
self.report_error(f'post hooks: {err}')
return
info_dict['__write_download_archive'] = True
@@ -3609,7 +3633,7 @@ class YoutubeDL:
@staticmethod
def sanitize_info(info_dict, remove_private_keys=False):
''' Sanitize the infodict for converting to json '''
""" Sanitize the infodict for converting to json """
if info_dict is None:
return info_dict
info_dict.setdefault('epoch', int(time.time()))
@@ -3644,7 +3668,7 @@ class YoutubeDL:
@staticmethod
def filter_requested_info(info_dict, actually_filter=True):
''' Alias of sanitize_info for backward compatibility '''
""" Alias of sanitize_info for backward compatibility """
return YoutubeDL.sanitize_info(info_dict, actually_filter)
def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
@@ -3666,7 +3690,7 @@ class YoutubeDL:
actual_post_extract(video_dict or {})
return
post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
post_extractor = info_dict.pop('__post_extractor', None) or dict
info_dict.update(post_extractor())
actual_post_extract(info_dict or {})
@@ -3771,7 +3795,7 @@ class YoutubeDL:
if format.get('width') and format.get('height'):
return '%dx%d' % (format['width'], format['height'])
elif format.get('height'):
return '%sp' % format['height']
return '{}p'.format(format['height'])
elif format.get('width'):
return '%dx?' % format['width']
return default
@@ -3788,7 +3812,7 @@ class YoutubeDL:
if fdict.get('language'):
if res:
res += ' '
res += '[%s]' % fdict['language']
res += '[{}]'.format(fdict['language'])
if fdict.get('format_note') is not None:
if res:
res += ' '
@@ -3800,7 +3824,7 @@ class YoutubeDL:
if fdict.get('container') is not None:
if res:
res += ', '
res += '%s container' % fdict['container']
res += '{} container'.format(fdict['container'])
if (fdict.get('vcodec') is not None
and fdict.get('vcodec') != 'none'):
if res:
@@ -3815,7 +3839,7 @@ class YoutubeDL:
if fdict.get('fps') is not None:
if res:
res += ', '
res += '%sfps' % fdict['fps']
res += '{}fps'.format(fdict['fps'])
if fdict.get('acodec') is not None:
if res:
res += ', '
@@ -3858,7 +3882,7 @@ class YoutubeDL:
format_field(f, 'format_id'),
format_field(f, 'ext'),
self.format_resolution(f),
self._format_note(f)
self._format_note(f),
] for f in formats if (f.get('preference') or 0) >= -1000]
return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
@@ -3964,11 +3988,11 @@ class YoutubeDL:
from .extractor.extractors import _LAZY_LOADER
from .extractor.extractors import (
_PLUGIN_CLASSES as plugin_ies,
_PLUGIN_OVERRIDES as plugin_ie_overrides
_PLUGIN_OVERRIDES as plugin_ie_overrides,
)
def get_encoding(stream):
ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
ret = str(getattr(stream, 'encoding', f'missing ({type(stream).__name__})'))
additional_info = []
if os.environ.get('TERM', '').lower() == 'dumb':
additional_info.append('dumb')
@@ -3979,13 +4003,13 @@ class YoutubeDL:
ret = f'{ret} ({",".join(additional_info)})'
return ret
encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
encoding_str = 'Encodings: locale {}, fs {}, pref {}, {}'.format(
locale.getpreferredencoding(),
sys.getfilesystemencoding(),
self.get_encoding(),
', '.join(
f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
if stream is not None and key != 'console')
if stream is not None and key != 'console'),
)
logger = self.params.get('logger')
@@ -4017,7 +4041,7 @@ class YoutubeDL:
else:
write_debug('Lazy loading extractors is disabled')
if self.params['compat_opts']:
write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
write_debug('Compatibility options: {}'.format(', '.join(self.params['compat_opts'])))
if current_git_head():
write_debug(f'Git HEAD: {current_git_head()}')
@@ -4026,14 +4050,14 @@ class YoutubeDL:
exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
if ffmpeg_features:
exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
exe_versions['ffmpeg'] += ' ({})'.format(','.join(sorted(ffmpeg_features)))
exe_versions['rtmpdump'] = rtmpdump_version()
exe_versions['phantomjs'] = PhantomJSwrapper._version()
exe_str = ', '.join(
f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
) or 'none'
write_debug('exe versions: %s' % exe_str)
write_debug(f'exe versions: {exe_str}')
from .compat.compat_utils import get_package_info
from .dependencies import available_dependencies
@@ -4045,7 +4069,7 @@ class YoutubeDL:
write_debug(f'Proxy map: {self.proxies}')
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
display_list = ['%s%s' % (
display_list = ['{}{}'.format(
klass.__name__, '' if klass.__name__ == name else f' as {name}')
for name, klass in plugins.items()]
if plugin_type == 'Extractor':
@@ -4062,14 +4086,13 @@ class YoutubeDL:
# Not implemented
if False and self.params.get('call_home'):
ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
write_debug('Public IP address: %s' % ipaddr)
write_debug(f'Public IP address: {ipaddr}')
latest_version = self.urlopen(
'https://yt-dl.org/latest/version').read().decode()
if version_tuple(latest_version) > version_tuple(__version__):
self.report_warning(
'You are using an outdated version (newest version: %s)! '
'See https://yt-dl.org/update if you need help updating.' %
latest_version)
f'You are using an outdated version (newest version: {latest_version})! '
'See https://yt-dl.org/update if you need help updating.')
@functools.cached_property
def proxies(self):
@@ -4103,7 +4126,7 @@ class YoutubeDL:
return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
def _get_available_impersonate_targets(self):
# todo(future): make available as public API
# TODO(future): make available as public API
return [
(target, rh.RH_NAME)
for rh in self._request_director.handlers.values()
@@ -4112,7 +4135,7 @@ class YoutubeDL:
]
def _impersonate_target_available(self, target):
# todo(future): make available as public API
# TODO(future): make available as public API
return any(
rh.is_supported_target(target)
for rh in self._request_director.handlers.values()
@@ -4238,7 +4261,7 @@ class YoutubeDL:
return encoding
def _write_info_json(self, label, ie_result, infofn, overwrite=None):
''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
""" Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error """
if overwrite is None:
overwrite = self.params.get('overwrites', True)
if not self.params.get('writeinfojson'):
@@ -4261,7 +4284,7 @@ class YoutubeDL:
return None
def _write_description(self, label, ie_result, descfn):
''' Write description and returns True = written, False = skip, None = error '''
""" Write description and returns True = written, False = skip, None = error """
if not self.params.get('writedescription'):
return False
elif not descfn:
@@ -4285,7 +4308,7 @@ class YoutubeDL:
return True
def _write_subtitles(self, info_dict, filename):
''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
""" Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error"""
ret = []
subtitles = info_dict.get('requested_subtitles')
if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
@@ -4331,7 +4354,7 @@ class YoutubeDL:
self.dl(sub_filename, sub_copy, subtitle=True)
sub_info['filepath'] = sub_filename
ret.append((sub_filename, sub_filename_final))
except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
except (DownloadError, ExtractorError, OSError, ValueError, *network_exceptions) as err:
msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
if self.params.get('ignoreerrors') is not True: # False or 'only_download'
if not self.params.get('ignoreerrors'):
@@ -4341,7 +4364,7 @@ class YoutubeDL:
return ret
def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error '''
""" Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error """
write_all = self.params.get('write_all_thumbnails', False)
thumbnails, ret = [], []
if write_all or self.params.get('writethumbnail', False):
@@ -4368,8 +4391,8 @@ class YoutubeDL:
existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
if existing_thumb:
self.to_screen('[info] %s is already present' % (
thumb_display_id if multiple else f'{label} thumbnail').capitalize())
self.to_screen('[info] {} is already present'.format((
thumb_display_id if multiple else f'{label} thumbnail').capitalize()))
t['filepath'] = existing_thumb
ret.append((existing_thumb, thumb_filename_final))
else:

View File

@@ -14,7 +14,7 @@ import os
import re
import traceback
from .compat import compat_os_name, compat_shlex_quote
from .compat import compat_os_name
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .downloader.external import get_external_downloader
from .extractor import list_extractor_classes
@@ -58,11 +58,13 @@ from .utils import (
read_stdin,
render_table,
setproctitle,
shell_quote,
traverse_obj,
variadic,
write_string,
)
from .utils.networking import std_headers
from .utils._utils import _UnsafeExtensionError
from .YoutubeDL import YoutubeDL
_IN_CLI = False
@@ -115,9 +117,9 @@ def print_extractor_information(opts, urls):
ie.description(markdown=False, search_examples=_SEARCHES)
for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False)
elif opts.ap_list_mso:
out = 'Supported TV Providers:\n%s\n' % render_table(
out = 'Supported TV Providers:\n{}\n'.format(render_table(
['mso', 'mso name'],
[[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()])
[[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]))
else:
return False
write_string(out, out=sys.stdout)
@@ -129,7 +131,7 @@ def set_compat_opts(opts):
if name not in opts.compat_opts:
return False
opts.compat_opts.discard(name)
opts.compat_opts.update(['*%s' % name])
opts.compat_opts.update([f'*{name}'])
return True
def set_default_compat(compat_name, opt_name, default=True, remove_compat=True):
@@ -222,7 +224,7 @@ def validate_options(opts):
validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval')
if opts.wait_for_video is not None:
min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None])
min_wait, max_wait, *_ = map(parse_duration, [*opts.wait_for_video.split('-', 1), None])
validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video),
'time range to wait for video', opts.wait_for_video)
validate_minmax(min_wait, max_wait, 'time range to wait for video')
@@ -264,9 +266,9 @@ def validate_options(opts):
# Retry sleep function
def parse_sleep_func(expr):
NUMBER_RE = r'\d+(?:\.\d+)?'
op, start, limit, step, *_ = tuple(re.fullmatch(
op, start, limit, step, *_ = (*tuple(re.fullmatch(
rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?',
expr.strip()).groups()) + (None, None)
expr.strip()).groups()), None, None)
if op == 'exp':
return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf'))
@@ -396,13 +398,13 @@ def validate_options(opts):
# MetadataParser
def metadataparser_actions(f):
if isinstance(f, str):
cmd = '--parse-metadata %s' % compat_shlex_quote(f)
cmd = f'--parse-metadata {shell_quote(f)}'
try:
actions = [MetadataFromFieldPP.to_action(f)]
except Exception as err:
raise ValueError(f'{cmd} is invalid; {err}')
else:
cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f))
cmd = f'--replace-in-metadata {shell_quote(f)}'
actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(','))
for action in actions:
@@ -413,7 +415,7 @@ def validate_options(opts):
yield action
if opts.metafromtitle is not None:
opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle)
opts.parse_metadata.setdefault('pre_process', []).append(f'title:{opts.metafromtitle}')
opts.parse_metadata = {
k: list(itertools.chain(*map(metadataparser_actions, v)))
for k, v in opts.parse_metadata.items()
@@ -466,7 +468,7 @@ def validate_options(opts):
default_downloader = ed.get_basename()
for policy in opts.color.values():
if policy not in ('always', 'auto', 'no_color', 'never'):
if policy not in ('always', 'auto', 'auto-tty', 'no_color', 'no_color-tty', 'never'):
raise ValueError(f'"{policy}" is not a valid color policy')
warnings, deprecation_warnings = [], []
@@ -592,6 +594,13 @@ def validate_options(opts):
if opts.ap_username is not None and opts.ap_password is None:
opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ')
# compat option changes global state destructively; only allow from cli
if 'allow-unsafe-ext' in opts.compat_opts:
warnings.append(
'Using allow-unsafe-ext opens you up to potential attacks. '
'Use with great care!')
_UnsafeExtensionError.sanitize_extension = lambda x, prepend=False: x
return warnings, deprecation_warnings
@@ -602,7 +611,7 @@ def get_postprocessors(opts):
yield {
'key': 'MetadataParser',
'actions': actions,
'when': when
'when': when,
}
sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove
if sponsorblock_query:
@@ -610,19 +619,19 @@ def get_postprocessors(opts):
'key': 'SponsorBlock',
'categories': sponsorblock_query,
'api': opts.sponsorblock_api,
'when': 'after_filter'
'when': 'after_filter',
}
if opts.convertsubtitles:
yield {
'key': 'FFmpegSubtitlesConvertor',
'format': opts.convertsubtitles,
'when': 'before_dl'
'when': 'before_dl',
}
if opts.convertthumbnails:
yield {
'key': 'FFmpegThumbnailsConvertor',
'format': opts.convertthumbnails,
'when': 'before_dl'
'when': 'before_dl',
}
if opts.extractaudio:
yield {
@@ -647,7 +656,7 @@ def get_postprocessors(opts):
yield {
'key': 'FFmpegEmbedSubtitle',
# already_have_subtitle = True prevents the file from being deleted after embedding
'already_have_subtitle': opts.writesubtitles and keep_subs
'already_have_subtitle': opts.writesubtitles and keep_subs,
}
if not opts.writeautomaticsub and keep_subs:
opts.writesubtitles = True
@@ -660,7 +669,7 @@ def get_postprocessors(opts):
'remove_sponsor_segments': opts.sponsorblock_remove,
'remove_ranges': opts.remove_ranges,
'sponsorblock_chapter_title': opts.sponsorblock_chapter_title,
'force_keyframes': opts.force_keyframes_at_cuts
'force_keyframes': opts.force_keyframes_at_cuts,
}
# FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
# FFmpegExtractAudioPP as containers before conversion may not support
@@ -694,7 +703,7 @@ def get_postprocessors(opts):
yield {
'key': 'EmbedThumbnail',
# already_have_thumbnail = True prevents the file from being deleted after embedding
'already_have_thumbnail': opts.writethumbnail
'already_have_thumbnail': opts.writethumbnail,
}
if not opts.writethumbnail:
opts.writethumbnail = True
@@ -741,7 +750,7 @@ def parse_options(argv=None):
print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:])
any_getting = any(getattr(opts, k) for k in (
'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename',
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl'
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl',
))
if opts.quiet is None:
opts.quiet = any_getting or opts.print_json or bool(opts.forceprint)
@@ -1002,7 +1011,7 @@ def _real_main(argv=None):
def make_row(target, handler):
return [
join_nonempty(target.client.title(), target.version, delim='-') or '-',
join_nonempty((target.os or "").title(), target.os_version, delim='-') or '-',
join_nonempty((target.os or '').title(), target.os_version, delim='-') or '-',
handler,
]

View File

@@ -68,7 +68,7 @@ def pad_block(block, padding_mode):
raise NotImplementedError(f'Padding mode {padding_mode} is not implemented')
if padding_mode == 'iso7816' and padding_size:
block = block + [0x80] # NB: += mutates list
block = [*block, 0x80] # NB: += mutates list
padding_size -= 1
return block + [PADDING_BYTE[padding_mode]] * padding_size
@@ -110,9 +110,7 @@ def aes_ecb_decrypt(data, key, iv=None):
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
encrypted_data += aes_decrypt(block, expanded_key)
encrypted_data = encrypted_data[:len(data)]
return encrypted_data
return encrypted_data[:len(data)]
def aes_ctr_decrypt(data, key, iv):
@@ -148,9 +146,7 @@ def aes_ctr_encrypt(data, key, iv):
cipher_counter_block = aes_encrypt(counter_block, expanded_key)
encrypted_data += xor(block, cipher_counter_block)
encrypted_data = encrypted_data[:len(data)]
return encrypted_data
return encrypted_data[:len(data)]
def aes_cbc_decrypt(data, key, iv):
@@ -174,9 +170,7 @@ def aes_cbc_decrypt(data, key, iv):
decrypted_block = aes_decrypt(block, expanded_key)
decrypted_data += xor(decrypted_block, previous_cipher_block)
previous_cipher_block = block
decrypted_data = decrypted_data[:len(data)]
return decrypted_data
return decrypted_data[:len(data)]
def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'):
@@ -224,7 +218,7 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key))
if len(nonce) == 12:
j0 = nonce + [0, 0, 0, 1]
j0 = [*nonce, 0, 0, 0, 1]
else:
fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8
ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big'))
@@ -242,11 +236,11 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
data
+ [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad
+ bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data
+ ((len(data) * 8).to_bytes(8, 'big'))) # length of data
+ ((len(data) * 8).to_bytes(8, 'big'))), # length of data
)
if tag != aes_ctr_encrypt(s_tag, key, j0):
raise ValueError("Mismatching authentication tag")
raise ValueError('Mismatching authentication tag')
return decrypted_data
@@ -288,9 +282,7 @@ def aes_decrypt(data, expanded_key):
data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV))
data = shift_rows_inv(data)
data = sub_bytes_inv(data)
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
return data
return xor(data, expanded_key[:BLOCK_SIZE_BYTES])
def aes_decrypt_text(data, password, key_size_bytes):
@@ -318,9 +310,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
cipher = data[NONCE_LENGTH_BYTES:]
decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES))
plaintext = intlist_to_bytes(decrypted_data)
return plaintext
return intlist_to_bytes(decrypted_data)
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
@@ -428,9 +418,7 @@ def key_expansion(data):
for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
temp = data[-4:]
data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
data = data[:expanded_key_size_bytes]
return data
return data[:expanded_key_size_bytes]
def iter_vector(iv):
@@ -511,7 +499,7 @@ def block_product(block_x, block_y):
# NIST SP 800-38D, Algorithm 1
if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES:
raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES)
raise ValueError(f'Length of blocks need to be {BLOCK_SIZE_BYTES} bytes')
block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1)
block_v = block_y[:]
@@ -534,7 +522,7 @@ def ghash(subkey, data):
# NIST SP 800-38D, Algorithm 2
if len(data) % BLOCK_SIZE_BYTES:
raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES)
raise ValueError(f'Length of data should be {BLOCK_SIZE_BYTES} bytes')
last_y = [0] * BLOCK_SIZE_BYTES
for i in range(0, len(data), BLOCK_SIZE_BYTES):

View File

@@ -81,10 +81,10 @@ class Cache:
cachedir = self._get_root_dir()
if not any((term in cachedir) for term in ('cache', 'tmp')):
raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
raise Exception(f'Not removing directory {cachedir} - this does not look like a cache dir')
self._ydl.to_screen(
'Removing cache dir %s .' % cachedir, skip_eol=True)
f'Removing cache dir {cachedir} .', skip_eol=True)
if os.path.exists(cachedir):
self._ydl.to_screen('.', skip_eol=True)
shutil.rmtree(cachedir)

View File

@@ -35,7 +35,7 @@ from .compat_utils import passthrough_module
from ..dependencies import brotli as compat_brotli # noqa: F401
from ..dependencies import websockets as compat_websockets # noqa: F401
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
from ..networking.exceptions import HTTPError as compat_HTTPError # noqa: F401
from ..networking.exceptions import HTTPError as compat_HTTPError
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))

View File

@@ -7,6 +7,6 @@ passthrough_module(__name__, 'functools')
del passthrough_module
try:
cache # >= 3.9
_ = cache # >= 3.9
except NameError:
cache = lru_cache(maxsize=None)

View File

@@ -1,16 +1,22 @@
tests = {
'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP',
'png': lambda h: h[:8] == b'\211PNG\r\n\032\n',
'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'),
'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'),
}
def what(file=None, h=None):
"""Detect format of image (Currently supports jpeg, png, webp, gif only)
Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py
Ref: https://github.com/python/cpython/blob/3.11/Lib/imghdr.py
Ref: https://www.w3.org/Graphics/JPEG/itu-t81.pdf
"""
if h is None:
with open(file, 'rb') as f:
h = f.read(12)
return next((type_ for type_, test in tests.items() if test(h)), None)
if h.startswith(b'RIFF') and h.startswith(b'WEBP', 8):
return 'webp'
if h.startswith(b'\x89PNG'):
return 'png'
if h.startswith(b'\xFF\xD8\xFF'):
return 'jpeg'
if h.startswith(b'GIF'):
return 'gif'
return None

View File

@@ -2,7 +2,9 @@ import base64
import collections
import contextlib
import datetime as dt
import functools
import glob
import hashlib
import http.cookiejar
import http.cookies
import io
@@ -17,14 +19,12 @@ import tempfile
import time
import urllib.request
from enum import Enum, auto
from hashlib import pbkdf2_hmac
from .aes import (
aes_cbc_decrypt_bytes,
aes_gcm_decrypt_and_verify_bytes,
unpad_pkcs7,
)
from .compat import functools # isort: split
from .compat import compat_os_name
from .dependencies import (
_SECRETSTORAGE_UNAVAILABLE_REASON,
@@ -146,7 +146,7 @@ def _extract_firefox_cookies(profile, container, logger):
identities = json.load(containers).get('identities', [])
container_id = next((context.get('userContextId') for context in identities if container in (
context.get('name'),
try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()),
)), None)
if not isinstance(container_id, int):
raise ValueError(f'could not find firefox container "{container}" in containers.json')
@@ -263,7 +263,7 @@ def _get_chromium_based_browser_settings(browser_name):
return {
'browser_dir': browser_dir,
'keyring_name': keyring_name,
'supports_profiles': browser_name not in browsers_without_profiles
'supports_profiles': browser_name not in browsers_without_profiles,
}
@@ -740,40 +740,38 @@ def _get_linux_desktop_environment(env, logger):
xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
desktop_session = env.get('DESKTOP_SESSION', None)
if xdg_current_desktop is not None:
xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
if xdg_current_desktop == 'Unity':
if desktop_session is not None and 'gnome-fallback' in desktop_session:
for part in map(str.strip, xdg_current_desktop.split(':')):
if part == 'Unity':
if desktop_session is not None and 'gnome-fallback' in desktop_session:
return _LinuxDesktopEnvironment.GNOME
else:
return _LinuxDesktopEnvironment.UNITY
elif part == 'Deepin':
return _LinuxDesktopEnvironment.DEEPIN
elif part == 'GNOME':
return _LinuxDesktopEnvironment.GNOME
else:
return _LinuxDesktopEnvironment.UNITY
elif xdg_current_desktop == 'Deepin':
return _LinuxDesktopEnvironment.DEEPIN
elif xdg_current_desktop == 'GNOME':
return _LinuxDesktopEnvironment.GNOME
elif xdg_current_desktop == 'X-Cinnamon':
return _LinuxDesktopEnvironment.CINNAMON
elif xdg_current_desktop == 'KDE':
kde_version = env.get('KDE_SESSION_VERSION', None)
if kde_version == '5':
return _LinuxDesktopEnvironment.KDE5
elif kde_version == '6':
return _LinuxDesktopEnvironment.KDE6
elif kde_version == '4':
return _LinuxDesktopEnvironment.KDE4
else:
logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
return _LinuxDesktopEnvironment.KDE4
elif xdg_current_desktop == 'Pantheon':
return _LinuxDesktopEnvironment.PANTHEON
elif xdg_current_desktop == 'XFCE':
return _LinuxDesktopEnvironment.XFCE
elif xdg_current_desktop == 'UKUI':
return _LinuxDesktopEnvironment.UKUI
elif xdg_current_desktop == 'LXQt':
return _LinuxDesktopEnvironment.LXQT
else:
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
elif part == 'X-Cinnamon':
return _LinuxDesktopEnvironment.CINNAMON
elif part == 'KDE':
kde_version = env.get('KDE_SESSION_VERSION', None)
if kde_version == '5':
return _LinuxDesktopEnvironment.KDE5
elif kde_version == '6':
return _LinuxDesktopEnvironment.KDE6
elif kde_version == '4':
return _LinuxDesktopEnvironment.KDE4
else:
logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
return _LinuxDesktopEnvironment.KDE4
elif part == 'Pantheon':
return _LinuxDesktopEnvironment.PANTHEON
elif part == 'XFCE':
return _LinuxDesktopEnvironment.XFCE
elif part == 'UKUI':
return _LinuxDesktopEnvironment.UKUI
elif part == 'LXQt':
return _LinuxDesktopEnvironment.LXQT
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
elif desktop_session is not None:
if desktop_session == 'deepin':
@@ -826,7 +824,7 @@ def _choose_linux_keyring(logger):
elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
linux_keyring = _LinuxKeyring.KWALLET6
elif desktop_environment in (
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER,
):
linux_keyring = _LinuxKeyring.BASICTEXT
else:
@@ -861,7 +859,7 @@ def _get_kwallet_network_wallet(keyring, logger):
'dbus-send', '--session', '--print-reply=literal',
f'--dest={service_name}',
wallet_path,
'org.kde.KWallet.networkWallet'
'org.kde.KWallet.networkWallet',
], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
if returncode:
@@ -891,7 +889,7 @@ def _get_kwallet_password(browser_keyring_name, keyring, logger):
'kwallet-query',
'--read-password', f'{browser_keyring_name} Safe Storage',
'--folder', f'{browser_keyring_name} Keys',
network_wallet
network_wallet,
], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
if returncode:
@@ -931,9 +929,8 @@ def _get_gnome_keyring_password(browser_keyring_name, logger):
for item in col.get_all_items():
if item.get_label() == f'{browser_keyring_name} Safe Storage':
return item.get_secret()
else:
logger.error('failed to read from keyring')
return b''
logger.error('failed to read from keyring')
return b''
def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
@@ -1002,7 +999,7 @@ def _get_windows_v10_key(browser_root, logger):
def pbkdf2_sha1(password, salt, iterations, key_length):
return pbkdf2_hmac('sha1', password, salt, iterations, key_length)
return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
@@ -1053,7 +1050,7 @@ def _decrypt_windows_dpapi(ciphertext, logger):
None, # pvReserved: must be NULL
None, # pPromptStruct: information about prompts to display
0, # dwFlags
ctypes.byref(blob_out) # pDataOut
ctypes.byref(blob_out), # pDataOut
)
if not ret:
logger.warning('failed to decrypt with DPAPI', only_once=True)
@@ -1129,24 +1126,24 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
_RESERVED = {
"expires",
"path",
"comment",
"domain",
"max-age",
"secure",
"httponly",
"version",
"samesite",
'expires',
'path',
'comment',
'domain',
'max-age',
'secure',
'httponly',
'version',
'samesite',
}
_FLAGS = {"secure", "httponly"}
_FLAGS = {'secure', 'httponly'}
# Added 'bad' group to catch the remaining value
_COOKIE_PATTERN = re.compile(r"""
_COOKIE_PATTERN = re.compile(r'''
\s* # Optional whitespace at start of cookie
(?P<key> # Start of group 'key'
[""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
[''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter
) # End of group 'key'
( # Optional group: there may not be a value.
\s*=\s* # Equal Sign
@@ -1156,7 +1153,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
| # or
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
| # or
[""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
[''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string
) # End of group 'val'
| # or
(?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
@@ -1164,7 +1161,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
)? # End of optional value group
\s* # Any number of spaces.
(\s+|;|$) # Ending either at space, semicolon, or EOS.
""", re.ASCII | re.VERBOSE)
''', re.ASCII | re.VERBOSE)
def load(self, data):
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
@@ -1260,14 +1257,14 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
# with no name, whereas http.cookiejar regards it as a
# cookie with no value.
name, value = '', name
f.write('%s\n' % '\t'.join((
f.write('{}\n'.format('\t'.join((
cookie.domain,
self._true_or_false(cookie.domain.startswith('.')),
cookie.path,
self._true_or_false(cookie.secure),
str_or_none(cookie.expires, default=''),
name, value
)))
name, value,
))))
def save(self, filename=None, ignore_discard=True, ignore_expires=True):
"""
@@ -1306,10 +1303,10 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
return line
cookie_list = line.split('\t')
if len(cookie_list) != self._ENTRY_LEN:
raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}')
cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit():
raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}')
return line
cf = io.StringIO()

View File

@@ -404,7 +404,7 @@ class FileDownloader:
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
self.to_screen('[download] Resuming download at byte %s' % resume_len)
self.to_screen(f'[download] Resuming download at byte {resume_len}')
def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
"""Report retry"""

View File

@@ -1,4 +1,5 @@
import enum
import functools
import json
import os
import re
@@ -9,7 +10,6 @@ import time
import uuid
from .fragment import FragmentFD
from ..compat import functools
from ..networking import Request
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
from ..utils import (
@@ -55,7 +55,7 @@ class ExternalFD(FragmentFD):
# correct and expected termination thus all postprocessing
# should take place
retval = 0
self.to_screen('[%s] Interrupted by user' % self.get_basename())
self.to_screen(f'[{self.get_basename()}] Interrupted by user')
finally:
if self._cookies_tempfile:
self.try_remove(self._cookies_tempfile)
@@ -108,7 +108,7 @@ class ExternalFD(FragmentFD):
return all((
not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES,
'+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES,
not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'),
not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url', 'extra_param_to_key_url'),
all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')),
))
@@ -172,7 +172,7 @@ class ExternalFD(FragmentFD):
decrypt_fragment = self.decrypter(info_dict)
dest, _ = self.sanitize_open(tmpfilename, 'wb')
for frag_index, fragment in enumerate(info_dict['fragments']):
fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
fragment_filename = f'{tmpfilename}-Frag{frag_index}'
try:
src, _ = self.sanitize_open(fragment_filename, 'rb')
except OSError as err:
@@ -186,7 +186,7 @@ class ExternalFD(FragmentFD):
if not self.params.get('keep_fragments', False):
self.try_remove(encodeFilename(fragment_filename))
dest.close()
self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename))
self.try_remove(encodeFilename(f'{tmpfilename}.frag.urls'))
return 0
def _call_process(self, cmd, info_dict):
@@ -336,11 +336,11 @@ class Aria2cFD(ExternalFD):
if 'fragments' in info_dict:
cmd += ['--uri-selector=inorder']
url_list_file = '%s.frag.urls' % tmpfilename
url_list_file = f'{tmpfilename}.frag.urls'
url_list = []
for frag_index, fragment in enumerate(info_dict['fragments']):
fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
fragment_filename = f'{os.path.basename(tmpfilename)}-Frag{frag_index}'
url_list.append('{}\n\tout={}'.format(fragment['url'], self._aria2c_filename(fragment_filename)))
stream, _ = self.sanitize_open(url_list_file, 'wb')
stream.write('\n'.join(url_list).encode())
stream.close()
@@ -357,7 +357,7 @@ class Aria2cFD(ExternalFD):
'id': sanitycheck,
'method': method,
'params': [f'token:{rpc_secret}', *params],
}).encode('utf-8')
}).encode()
request = Request(
f'http://localhost:{rpc_port}/jsonrpc',
data=d, headers={
@@ -416,7 +416,7 @@ class Aria2cFD(ExternalFD):
'total_bytes_estimate': total,
'eta': (total - downloaded) / (speed or 1),
'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None,
'elapsed': time.time() - started
'elapsed': time.time() - started,
})
self._hook_progress(status, info_dict)
@@ -509,12 +509,12 @@ class FFmpegFD(ExternalFD):
proxy = self.params.get('proxy')
if proxy:
if not re.match(r'^[\da-zA-Z]+://', proxy):
proxy = 'http://%s' % proxy
proxy = f'http://{proxy}'
if proxy.startswith('socks'):
self.report_warning(
'%s does not support SOCKS proxies. Downloading is likely to fail. '
'Consider adding --hls-prefer-native to your command.' % self.get_basename())
f'{self.get_basename()} does not support SOCKS proxies. Downloading is likely to fail. '
'Consider adding --hls-prefer-native to your command.')
# Since December 2015 ffmpeg supports -http_proxy option (see
# http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
@@ -575,7 +575,7 @@ class FFmpegFD(ExternalFD):
if end_time:
args += ['-t', str(end_time - start_time)]
args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']]
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']]
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
args += ['-c', 'copy']

View File

@@ -67,12 +67,12 @@ class FlvReader(io.BytesIO):
self.read_bytes(3)
quality_entry_count = self.read_unsigned_char()
# QualityEntryCount
for i in range(quality_entry_count):
for _ in range(quality_entry_count):
self.read_string()
segment_run_count = self.read_unsigned_int()
segments = []
for i in range(segment_run_count):
for _ in range(segment_run_count):
first_segment = self.read_unsigned_int()
fragments_per_segment = self.read_unsigned_int()
segments.append((first_segment, fragments_per_segment))
@@ -91,12 +91,12 @@ class FlvReader(io.BytesIO):
quality_entry_count = self.read_unsigned_char()
# QualitySegmentUrlModifiers
for i in range(quality_entry_count):
for _ in range(quality_entry_count):
self.read_string()
fragments_count = self.read_unsigned_int()
fragments = []
for i in range(fragments_count):
for _ in range(fragments_count):
first = self.read_unsigned_int()
first_ts = self.read_unsigned_long_long()
duration = self.read_unsigned_int()
@@ -135,11 +135,11 @@ class FlvReader(io.BytesIO):
self.read_string() # MovieIdentifier
server_count = self.read_unsigned_char()
# ServerEntryTable
for i in range(server_count):
for _ in range(server_count):
self.read_string()
quality_count = self.read_unsigned_char()
# QualityEntryTable
for i in range(quality_count):
for _ in range(quality_count):
self.read_string()
# DrmData
self.read_string()
@@ -148,14 +148,14 @@ class FlvReader(io.BytesIO):
segments_count = self.read_unsigned_char()
segments = []
for i in range(segments_count):
for _ in range(segments_count):
box_size, box_type, box_data = self.read_box_info()
assert box_type == b'asrt'
segment = FlvReader(box_data).read_asrt()
segments.append(segment)
fragments_run_count = self.read_unsigned_char()
fragments = []
for i in range(fragments_run_count):
for _ in range(fragments_run_count):
box_size, box_type, box_data = self.read_box_info()
assert box_type == b'afrt'
fragments.append(FlvReader(box_data).read_afrt())
@@ -309,7 +309,7 @@ class F4mFD(FragmentFD):
def real_download(self, filename, info_dict):
man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr')
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
self.to_screen(f'[{self.FD_NAME}] Downloading f4m manifest')
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.url
@@ -326,8 +326,8 @@ class F4mFD(FragmentFD):
formats = sorted(formats, key=lambda f: f[0])
rate, media = formats[-1]
else:
rate, media = list(filter(
lambda f: int(f[0]) == requested_bitrate, formats))[0]
rate, media = next(filter(
lambda f: int(f[0]) == requested_bitrate, formats))
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
man_base_url = get_base_url(doc) or man_url

View File

@@ -199,7 +199,7 @@ class FragmentFD(FileDownloader):
'.ytdl file is corrupt' if is_corrupt else
'Inconsistent state of incomplete fragment download')
self.report_warning(
'%s. Restarting from the beginning ...' % message)
f'{message}. Restarting from the beginning ...')
ctx['fragment_index'] = resume_len = 0
if 'ytdl_corrupt' in ctx:
del ctx['ytdl_corrupt']
@@ -366,10 +366,10 @@ class FragmentFD(FileDownloader):
return decrypt_fragment
def download_and_append_fragments_multiple(self, *args, **kwargs):
'''
"""
@params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ...
all args must be either tuple or list
'''
"""
interrupt_trigger = [True]
max_progress = len(args)
if max_progress == 1:
@@ -424,7 +424,7 @@ class FragmentFD(FileDownloader):
finally:
tpe.shutdown(wait=True)
if not interrupt_trigger[0] and not is_live:
raise KeyboardInterrupt()
raise KeyboardInterrupt
# we expect the user wants to stop and DO WANT the preceding postprocessors to run;
# so returning a intermediate result here instead of KeyboardInterrupt on live
return result

View File

@@ -72,7 +72,7 @@ class HlsFD(FragmentFD):
def real_download(self, filename, info_dict):
man_url = info_dict['url']
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest')
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.url
@@ -160,10 +160,12 @@ class HlsFD(FragmentFD):
extra_state = ctx.setdefault('extra_state', {})
format_index = info_dict.get('format_index')
extra_query = None
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
if extra_param_to_segment_url:
extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
extra_segment_query = None
if extra_param_to_segment_url := info_dict.get('extra_param_to_segment_url'):
extra_segment_query = urllib.parse.parse_qs(extra_param_to_segment_url)
extra_key_query = None
if extra_param_to_key_url := info_dict.get('extra_param_to_key_url'):
extra_key_query = urllib.parse.parse_qs(extra_param_to_key_url)
i = 0
media_sequence = 0
decrypt_info = {'METHOD': 'NONE'}
@@ -190,8 +192,8 @@ class HlsFD(FragmentFD):
if frag_index <= ctx['fragment_index']:
continue
frag_url = urljoin(man_url, line)
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
if extra_segment_query:
frag_url = update_url_query(frag_url, extra_segment_query)
fragments.append({
'frag_index': frag_index,
@@ -212,8 +214,8 @@ class HlsFD(FragmentFD):
frag_index += 1
map_info = parse_m3u8_attributes(line[11:])
frag_url = urljoin(man_url, map_info.get('URI'))
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
if extra_segment_query:
frag_url = update_url_query(frag_url, extra_segment_query)
if map_info.get('BYTERANGE'):
splitted_byte_range = map_info.get('BYTERANGE').split('@')
@@ -228,7 +230,7 @@ class HlsFD(FragmentFD):
'url': frag_url,
'decrypt_info': decrypt_info,
'byte_range': byte_range,
'media_sequence': media_sequence
'media_sequence': media_sequence,
})
media_sequence += 1
@@ -244,8 +246,10 @@ class HlsFD(FragmentFD):
decrypt_info['KEY'] = external_aes_key
else:
decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI'])
if extra_query:
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
if extra_key_query or extra_segment_query:
# Fall back to extra_segment_query to key for backwards compat
decrypt_info['URI'] = update_url_query(
decrypt_info['URI'], extra_key_query or extra_segment_query)
if decrypt_url != decrypt_info['URI']:
decrypt_info['KEY'] = None
@@ -350,9 +354,8 @@ class HlsFD(FragmentFD):
# XXX: this should probably be silent as well
# or verify that all segments contain the same data
self.report_warning(bug_reports_message(
'Discarding a %s block found in the middle of the stream; '
'if the subtitles display incorrectly,'
% (type(block).__name__)))
f'Discarding a {type(block).__name__} block found in the middle of the stream; '
'if the subtitles display incorrectly,'))
continue
block.write_into(output)

View File

@@ -176,7 +176,7 @@ class HttpFD(FileDownloader):
'downloaded_bytes': ctx.resume_len,
'total_bytes': ctx.resume_len,
}, info_dict)
raise SucceedDownload()
raise SucceedDownload
else:
# The length does not match, we start the download over
self.report_unable_to_resume()
@@ -194,7 +194,7 @@ class HttpFD(FileDownloader):
def close_stream():
if ctx.stream is not None:
if not ctx.tmpfilename == '-':
if ctx.tmpfilename != '-':
ctx.stream.close()
ctx.stream = None
@@ -268,20 +268,20 @@ class HttpFD(FileDownloader):
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
self.report_destination(ctx.filename)
except OSError as err:
self.report_error('unable to open for writing: %s' % str(err))
self.report_error(f'unable to open for writing: {err}')
return False
if self.params.get('xattr_set_filesize', False) and data_len is not None:
try:
write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode())
except (XAttrUnavailableError, XAttrMetadataError) as err:
self.report_error('unable to set filesize xattr: %s' % str(err))
self.report_error(f'unable to set filesize xattr: {err}')
try:
ctx.stream.write(data_block)
except OSError as err:
self.to_stderr('\n')
self.report_error('unable to write data: %s' % str(err))
self.report_error(f'unable to write data: {err}')
return False
# Apply rate limit
@@ -327,7 +327,7 @@ class HttpFD(FileDownloader):
elif now - ctx.throttle_start > 3:
if ctx.stream is not None and ctx.tmpfilename != '-':
ctx.stream.close()
raise ThrottledDownload()
raise ThrottledDownload
elif speed:
ctx.throttle_start = None
@@ -338,7 +338,7 @@ class HttpFD(FileDownloader):
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
ctx.resume_len = byte_counter
raise NextFragment()
raise NextFragment
if ctx.tmpfilename != '-':
ctx.stream.close()

View File

@@ -251,7 +251,7 @@ class IsmFD(FragmentFD):
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
frag_index = 0
for i, segment in enumerate(segments):
for segment in segments:
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue

View File

@@ -10,7 +10,7 @@ from ..version import __version__ as YT_DLP_VERSION
class MhtmlFD(FragmentFD):
_STYLESHEET = """\
_STYLESHEET = '''\
html, body {
margin: 0;
padding: 0;
@@ -45,7 +45,7 @@ body > figure > img {
max-width: 100%;
max-height: calc(100vh - 5em);
}
"""
'''
_STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET)
_STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET)
@@ -57,24 +57,19 @@ body > figure > img {
)).decode('us-ascii') + '?='
def _gen_cid(self, i, fragment, frag_boundary):
return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary)
return f'{i}.{frag_boundary}@yt-dlp.github.io.invalid'
def _gen_stub(self, *, fragments, frag_boundary, title):
output = io.StringIO()
output.write((
output.write(
'<!DOCTYPE html>'
'<html>'
'<head>'
'' '<meta name="generator" content="yt-dlp {version}">'
'' '<title>{title}</title>'
'' '<style>{styles}</style>'
'<body>'
).format(
version=escapeHTML(YT_DLP_VERSION),
styles=self._STYLESHEET,
title=escapeHTML(title)
))
f'<meta name="generator" content="yt-dlp {escapeHTML(YT_DLP_VERSION)}">'
f'<title>{escapeHTML(title)}</title>'
f'<style>{self._STYLESHEET}</style>'
'<body>')
t0 = 0
for i, frag in enumerate(fragments):
@@ -87,15 +82,12 @@ body > figure > img {
num=i + 1,
t0=srt_subtitles_timecode(t0),
t1=srt_subtitles_timecode(t1),
duration=formatSeconds(frag['duration'], msec=True)
duration=formatSeconds(frag['duration'], msec=True),
))
except (KeyError, ValueError, TypeError):
t1 = None
output.write((
'<figcaption>Slide #{num}</figcaption>'
).format(num=i + 1))
output.write('<img src="cid:{cid}">'.format(
cid=self._gen_cid(i, frag, frag_boundary)))
output.write(f'<figcaption>Slide #{i + 1}</figcaption>')
output.write(f'<img src="cid:{self._gen_cid(i, frag, frag_boundary)}">')
output.write('</figure>')
t0 = t1
@@ -126,31 +118,24 @@ body > figure > img {
stub = self._gen_stub(
fragments=fragments,
frag_boundary=frag_boundary,
title=title
title=title,
)
ctx['dest_stream'].write((
'MIME-Version: 1.0\r\n'
'From: <nowhere@yt-dlp.github.io.invalid>\r\n'
'To: <nowhere@yt-dlp.github.io.invalid>\r\n'
'Subject: {title}\r\n'
f'Subject: {self._escape_mime(title)}\r\n'
'Content-type: multipart/related; '
'' 'boundary="{boundary}"; '
'' 'type="text/html"\r\n'
'X.yt-dlp.Origin: {origin}\r\n'
f'boundary="{frag_boundary}"; '
'type="text/html"\r\n'
f'X.yt-dlp.Origin: {origin}\r\n'
'\r\n'
'--{boundary}\r\n'
f'--{frag_boundary}\r\n'
'Content-Type: text/html; charset=utf-8\r\n'
'Content-Length: {length}\r\n'
f'Content-Length: {len(stub)}\r\n'
'\r\n'
'{stub}\r\n'
).format(
origin=origin,
boundary=frag_boundary,
length=len(stub),
title=self._escape_mime(title),
stub=stub
).encode())
f'{stub}\r\n').encode())
extra_state['header_written'] = True
for i, fragment in enumerate(fragments):

View File

@@ -15,7 +15,7 @@ class NiconicoDmcFD(FileDownloader):
def real_download(self, filename, info_dict):
from ..extractor.niconico import NiconicoIE
self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
self.to_screen(f'[{self.FD_NAME}] Downloading from DMC')
ie = NiconicoIE(self.ydl)
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
@@ -34,7 +34,7 @@ class NiconicoDmcFD(FileDownloader):
try:
self.ydl.urlopen(request).read()
except Exception:
self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
self.to_screen(f'[{self.FD_NAME}] Heartbeat failed')
with heartbeat_lock:
if not download_complete:
@@ -85,14 +85,14 @@ class NiconicoLiveFD(FileDownloader):
'quality': live_quality,
'protocol': 'hls+fmp4',
'latency': live_latency,
'chasePlay': False
'chasePlay': False,
},
'room': {
'protocol': 'webSocket',
'commentable': True
'commentable': True,
},
'reconnect': True,
}
},
}))
else:
ws = ws_extractor
@@ -118,7 +118,7 @@ class NiconicoLiveFD(FileDownloader):
elif self.ydl.params.get('verbose', False):
if len(recv) > 100:
recv = recv[:100] + '...'
self.to_screen('[debug] Server said: %s' % recv)
self.to_screen(f'[debug] Server said: {recv}')
def ws_main():
reconnect = False
@@ -128,7 +128,7 @@ class NiconicoLiveFD(FileDownloader):
if ret is True:
return
except BaseException as e:
self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e)))
self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e)))
time.sleep(10)
continue
finally:

View File

@@ -180,9 +180,9 @@ class RtmpFD(FileDownloader):
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes')
time.sleep(5.0) # This seems to be needed
args = basic_args + ['--resume']
args = [*basic_args, '--resume']
if retval == RD_FAILED:
args += ['--skip', '1']
args = [encodeArgument(a) for a in args]
@@ -197,7 +197,7 @@ class RtmpFD(FileDownloader):
break
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes')
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,

View File

@@ -18,7 +18,7 @@ class YoutubeLiveChatFD(FragmentFD):
def real_download(self, filename, info_dict):
video_id = info_dict['video_id']
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
self.to_screen(f'[{self.FD_NAME}] Downloading live chat')
if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
self.report_warning('Live chat download runs until the livestream ends. '
'If you wish to download the video simultaneously, run a separate yt-dlp instance')

View File

@@ -76,6 +76,7 @@ from .aenetworks import (
)
from .aeonco import AeonCoIE
from .afreecatv import (
AfreecaTVCatchStoryIE,
AfreecaTVIE,
AfreecaTVLiveIE,
AfreecaTVUserIE,
@@ -503,7 +504,6 @@ from .dhm import DHMIE
from .digitalconcerthall import DigitalConcertHallIE
from .digiteka import DigitekaIE
from .discogs import DiscogsReleasePlaylistIE
from .discovery import DiscoveryIE
from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE
from .dlf import (
@@ -531,16 +531,12 @@ from .dplay import (
DiscoveryPlusIndiaShowIE,
DiscoveryPlusItalyIE,
DiscoveryPlusItalyShowIE,
DIYNetworkIE,
DPlayIE,
FoodNetworkIE,
GlobalCyclingNetworkPlusIE,
GoDiscoveryIE,
HGTVDeIE,
HGTVUsaIE,
InvestigationDiscoveryIE,
MotorTrendIE,
MotorTrendOnDemandIE,
ScienceChannelIE,
TravelChannelIE,
)
@@ -779,6 +775,7 @@ from .gopro import GoProIE
from .goshgay import GoshgayIE
from .gotostage import GoToStageIE
from .gputechconf import GPUTechConfIE
from .graspop import GraspopIE
from .gronkh import (
GronkhFeedIE,
GronkhIE,
@@ -969,6 +966,10 @@ from .la7 import (
LA7PodcastEpisodeIE,
LA7PodcastIE,
)
from .laracasts import (
LaracastsIE,
LaracastsPlaylistIE,
)
from .lastfm import (
LastFMIE,
LastFMPlaylistIE,
@@ -1113,12 +1114,15 @@ from .meipai import MeipaiIE
from .melonvod import MelonVODIE
from .metacritic import MetacriticIE
from .mgtv import MGTVIE
from .microsoftembed import MicrosoftEmbedIE
from .microsoftstream import MicrosoftStreamIE
from .microsoftvirtualacademy import (
MicrosoftVirtualAcademyCourseIE,
MicrosoftVirtualAcademyIE,
from .microsoftembed import (
MicrosoftBuildIE,
MicrosoftEmbedIE,
MicrosoftLearnEpisodeIE,
MicrosoftLearnPlaylistIE,
MicrosoftLearnSessionIE,
MicrosoftMediusIE,
)
from .microsoftstream import MicrosoftStreamIE
from .mildom import (
MildomClipIE,
MildomIE,
@@ -1603,6 +1607,7 @@ from .qqmusic import (
QQMusicPlaylistIE,
QQMusicSingerIE,
QQMusicToplistIE,
QQMusicVideoIE,
)
from .r7 import (
R7IE,
@@ -1755,7 +1760,10 @@ from .rtve import (
RTVETelevisionIE,
)
from .rtvs import RTVSIE
from .rtvslo import RTVSLOIE
from .rtvslo import (
RTVSLOIE,
RTVSLOShowIE,
)
from .rudovideo import RudoVideoIE
from .rule34video import Rule34VideoIE
from .rumble import (
@@ -1925,6 +1933,10 @@ from .spreaker import (
)
from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE
from .sproutvideo import (
SproutVideoIE,
VidsIoIE,
)
from .srgssr import (
SRGSSRIE,
SRGSSRPlayIE,
@@ -2311,6 +2323,7 @@ from .vidio import (
)
from .vidlii import VidLiiIE
from .vidly import VidlyIE
from .vidyard import VidyardIE
from .viewlift import (
ViewLiftEmbedIE,
ViewLiftIE,
@@ -2376,6 +2389,10 @@ from .vrt import (
VrtNUIE,
)
from .vtm import VTMIE
from .vtv import (
VTVIE,
VTVGoIE,
)
from .vuclip import VuClipIE
from .vvvvid import (
VVVVIDIE,

View File

@@ -4,7 +4,6 @@ import re
import time
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
dict_get,
@@ -67,7 +66,7 @@ class ABCIE(InfoExtractor):
'ext': 'mp4',
'title': 'WWI Centenary',
'description': 'md5:c2379ec0ca84072e86b446e536954546',
}
},
}, {
'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074',
'info_dict': {
@@ -75,7 +74,7 @@ class ABCIE(InfoExtractor):
'ext': 'mp4',
'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia',
'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f',
}
},
}, {
'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476',
'info_dict': {
@@ -86,7 +85,7 @@ class ABCIE(InfoExtractor):
'upload_date': '20200813',
'uploader': 'Behind the News',
'uploader_id': 'behindthenews',
}
},
}, {
'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540',
'info_dict': {
@@ -95,7 +94,7 @@ class ABCIE(InfoExtractor):
'ext': 'mp4',
'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.',
'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485',
}
},
}]
def _real_extract(self, url):
@@ -126,7 +125,7 @@ class ABCIE(InfoExtractor):
if mobj is None:
expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
if expired:
raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
raise ExtractorError(f'{self.IE_NAME} said: {expired}', expected=True)
raise ExtractorError('Unable to extract video urls')
urls_info = self._parse_json(
@@ -164,7 +163,7 @@ class ABCIE(InfoExtractor):
'height': height,
'tbr': bitrate,
'filesize': int_or_none(url_info.get('filesize')),
'format_id': format_id
'format_id': format_id,
})
return {
@@ -288,13 +287,12 @@ class ABCIViewIE(InfoExtractor):
stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
house_number = video_params.get('episodeHouseNumber') or video_id
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
int(time.time()), house_number)
path = f'/auth/hls/sign?ts={int(time.time())}&hn={house_number}&d=android-tablet'
sig = hmac.new(
b'android.content.res.Resources',
path.encode('utf-8'), hashlib.sha256).hexdigest()
path.encode(), hashlib.sha256).hexdigest()
token = self._download_webpage(
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
f'http://iview.abc.net.au{path}&sig={sig}', video_id)
def tokenize_url(url, token):
return update_url_query(url, {
@@ -303,7 +301,7 @@ class ABCIViewIE(InfoExtractor):
for sd in ('1080', '720', 'sd', 'sd-low'):
sd_url = try_get(
stream, lambda x: x['streams']['hls'][sd], compat_str)
stream, lambda x: x['streams']['hls'][sd], str)
if not sd_url:
continue
formats = self._extract_m3u8_formats(
@@ -358,7 +356,7 @@ class ABCIViewShowSeriesIE(InfoExtractor):
'description': 'md5:93119346c24a7c322d446d8eece430ff',
'series': 'Upper Middle Bogan',
'season': 'Series 1',
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$'
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
},
'playlist_count': 8,
}, {
@@ -386,7 +384,7 @@ class ABCIViewShowSeriesIE(InfoExtractor):
'description': 'Satirist Mark Humphries brings his unique perspective on current political events for 7.30.',
'series': '7.30 Mark Humphries Satire',
'season': 'Episodes',
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$'
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
},
'playlist_count': 15,
}]
@@ -398,7 +396,7 @@ class ABCIViewShowSeriesIE(InfoExtractor):
r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;',
webpage, 'initial state')
video_data = self._parse_json(
unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id)
unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id)
video_data = video_data['route']['pageData']['_embedded']
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])

View File

@@ -58,7 +58,7 @@ class AbcNewsVideoIE(AMPIE):
display_id = mobj.group('display_id')
video_id = mobj.group('id')
info_dict = self._extract_feed_info(
'http://abcnews.go.com/video/itemfeed?id=%s' % video_id)
f'http://abcnews.go.com/video/itemfeed?id={video_id}')
info_dict.update({
'id': video_id,
'display_id': display_id,

View File

@@ -1,5 +1,4 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
dict_get,
int_or_none,
@@ -57,11 +56,11 @@ class ABCOTVSIE(InfoExtractor):
data = self._download_json(
'https://api.abcotvs.com/v2/content', display_id, query={
'id': video_id,
'key': 'otv.web.%s.story' % station,
'key': f'otv.web.{station}.story',
'station': station,
})['data']
video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data
video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id))
video_id = str(dict_get(video, ('id', 'publishedKey'), video_id))
title = video.get('title') or video['linkText']
formats = []

View File

@@ -9,12 +9,12 @@ import re
import struct
import time
import urllib.parse
import urllib.request
import urllib.response
import uuid
from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
from ..networking import RequestHandler, Response
from ..networking.exceptions import TransportError
from ..utils import (
ExtractorError,
OnDemandPagedList,
@@ -26,37 +26,36 @@ from ..utils import (
traverse_obj,
update_url_query,
)
from ..utils.networking import clean_proxies
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
"""Add a handler for opening URLs, like _download_webpage"""
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
rh = ydl._request_director.handlers['Urllib']
if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
return
headers = ydl.params['http_headers'].copy()
proxies = ydl.proxies.copy()
clean_proxies(proxies, headers)
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=proxies)
assert isinstance(opener, urllib.request.OpenerDirector)
opener.add_handler(handler)
rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
class AbemaLicenseRH(RequestHandler):
_SUPPORTED_URL_SCHEMES = ('abematv-license',)
_SUPPORTED_PROXY_SCHEMES = None
_SUPPORTED_FEATURES = None
RH_NAME = 'abematv_license'
_STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
_HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
class AbemaLicenseHandler(urllib.request.BaseHandler):
handler_order = 499
STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
def __init__(self, ie: 'AbemaTVIE'):
# the protocol that this should really handle is 'abematv-license://'
# abematv_license_open is just a placeholder for development purposes
# ref. https://github.com/python/cpython/blob/f4c03484da59049eb62a9bf7777b963e2267d187/Lib/urllib/request.py#L510
setattr(self, 'abematv-license_open', getattr(self, 'abematv_license_open', None))
def __init__(self, *, ie: 'AbemaTVIE', **kwargs):
super().__init__(**kwargs)
self.ie = ie
def _send(self, request):
url = request.url
ticket = urllib.parse.urlparse(url).netloc
try:
response_data = self._get_videokey_from_ticket(ticket)
except ExtractorError as e:
raise TransportError(cause=e.cause) from e
except (IndexError, KeyError, TypeError) as e:
raise TransportError(cause=repr(e)) from e
return Response(
io.BytesIO(response_data), url,
headers={'Content-Length': str(len(response_data))})
def _get_videokey_from_ticket(self, ticket):
to_show = self.ie.get_param('verbose', False)
media_token = self.ie._get_media_token(to_show=to_show)
@@ -66,31 +65,23 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
query={'t': media_token},
data=json.dumps({
'kv': 'a',
'lt': ticket
}).encode('utf-8'),
'lt': ticket,
}).encode(),
headers={
'Content-Type': 'application/json',
})
res = decode_base_n(license_response['k'], table=self.STRTABLE)
res = decode_base_n(license_response['k'], table=self._STRTABLE)
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
h = hmac.new(
binascii.unhexlify(self.HKEY),
(license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
binascii.unhexlify(self._HKEY),
(license_response['cid'] + self.ie._DEVICE_ID).encode(),
digestmod=hashlib.sha256)
enckey = bytes_to_intlist(h.digest())
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
def abematv_license_open(self, url):
url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
ticket = urllib.parse.urlparse(url).netloc
response_data = self._get_videokey_from_ticket(ticket)
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
'Content-Length': str(len(response_data)),
}, url=url, code=200)
class AbemaTVBaseIE(InfoExtractor):
_NETRC_MACHINE = 'abematv'
@@ -103,11 +94,11 @@ class AbemaTVBaseIE(InfoExtractor):
@classmethod
def _generate_aks(cls, deviceid):
deviceid = deviceid.encode('utf-8')
deviceid = deviceid.encode()
# add 1 hour and then drop minute and secs
ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
time_struct = time.gmtime(ts_1hour)
ts_1hour_str = str(ts_1hour).encode('utf-8')
ts_1hour_str = str(ts_1hour).encode()
tmp = None
@@ -119,7 +110,7 @@ class AbemaTVBaseIE(InfoExtractor):
def mix_tmp(count):
nonlocal tmp
for i in range(count):
for _ in range(count):
mix_once(tmp)
def mix_twist(nonce):
@@ -139,7 +130,7 @@ class AbemaTVBaseIE(InfoExtractor):
if self._USERTOKEN:
return self._USERTOKEN
add_opener(self._downloader, AbemaLicenseHandler(self))
self._downloader._request_director.add_handler(AbemaLicenseRH(ie=self, logger=None))
username, _ = self._get_login_info()
auth_cache = username and self.cache.load(self._NETRC_MACHINE, username, min_ver='2024.01.19')
@@ -160,7 +151,7 @@ class AbemaTVBaseIE(InfoExtractor):
data=json.dumps({
'deviceId': self._DEVICE_ID,
'applicationKeySecret': aks,
}).encode('utf-8'),
}).encode(),
headers={
'Content-Type': 'application/json',
})
@@ -180,7 +171,7 @@ class AbemaTVBaseIE(InfoExtractor):
'osLang': 'ja_JP',
'osTimezone': 'Asia/Tokyo',
'appId': 'tv.abema',
'appVersion': '3.27.1'
'appVersion': '3.27.1',
}, headers={
'Authorization': f'bearer {self._get_device_token()}',
})['token']
@@ -202,8 +193,8 @@ class AbemaTVBaseIE(InfoExtractor):
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
data=json.dumps({
method: username,
'password': password
}).encode('utf-8'), headers={
'password': password,
}).encode(), headers={
'Authorization': f'bearer {self._get_device_token()}',
'Origin': 'https://abema.tv',
'Referer': 'https://abema.tv/',
@@ -344,7 +335,7 @@ class AbemaTVIE(AbemaTVBaseIE):
description = self._html_search_regex(
(r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div'),
webpage, 'description', default=None, group=1)
if not description:
og_desc = self._html_search_meta(
@@ -368,6 +359,7 @@ class AbemaTVIE(AbemaTVBaseIE):
info['episode_number'] = epis if epis < 2000 else None
is_live, m3u8_url = False, None
availability = 'public'
if video_type == 'now-on-air':
is_live = True
channel_url = 'https://api.abema.io/v1/channels'
@@ -389,6 +381,7 @@ class AbemaTVIE(AbemaTVBaseIE):
if 3 not in ondemand_types:
# cannot acquire decryption key for these streams
self.report_warning('This is a premium-only stream')
availability = 'premium_only'
info.update(traverse_obj(api_response, {
'series': ('series', 'title'),
'season': ('season', 'name'),
@@ -408,6 +401,7 @@ class AbemaTVIE(AbemaTVBaseIE):
headers=headers)
if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False):
self.report_warning('This is a premium-only stream')
availability = 'premium_only'
m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8'
else:
@@ -425,6 +419,7 @@ class AbemaTVIE(AbemaTVBaseIE):
'description': description,
'formats': formats,
'is_live': is_live,
'availability': availability,
})
return info

View File

@@ -67,7 +67,7 @@ class ACastIE(ACastBaseIE):
'display_id': '2.raggarmordet-rosterurdetforflutna',
'season_number': 4,
'season': 'Season 4',
}
},
}, {
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
'only_matching': True,
@@ -93,13 +93,13 @@ class ACastIE(ACastBaseIE):
'series': 'Democracy Sausage with Mark Kenny',
'timestamp': 1684826362,
'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16',
}
},
}]
def _real_extract(self, url):
channel, display_id = self._match_valid_url(url).groups()
episode = self._call_api(
'%s/episodes/%s' % (channel, display_id),
f'{channel}/episodes/{display_id}',
display_id, {'showInfo': 'true'})
return self._extract_episode(
episode, self._extract_show_info(episode.get('show') or {}))
@@ -130,7 +130,7 @@ class ACastChannelIE(ACastBaseIE):
@classmethod
def suitable(cls, url):
return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
return False if ACastIE.suitable(url) else super().suitable(url)
def _real_extract(self, url):
show_slug = self._match_id(url)

View File

@@ -25,7 +25,7 @@ class AcFunVideoBaseIE(InfoExtractor):
'width': int_or_none(video.get('width')),
'height': int_or_none(video.get('height')),
'tbr': float_or_none(video.get('avgBitrate')),
**parse_codecs(video.get('codecs', ''))
**parse_codecs(video.get('codecs', '')),
})
return {
@@ -77,7 +77,7 @@ class AcFunVideoIE(AcFunVideoBaseIE):
'comment_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg)',
'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17',
}
},
}]
def _real_extract(self, url):

View File

@@ -7,7 +7,6 @@ import time
from .common import InfoExtractor
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import compat_b64decode
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
@@ -17,6 +16,7 @@ from ..utils import (
float_or_none,
int_or_none,
intlist_to_bytes,
join_nonempty,
long_to_bytes,
parse_iso8601,
pkcs1pad,
@@ -49,9 +49,9 @@ class ADNBaseIE(InfoExtractor):
class ADNIE(ADNBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/[^/?#]+/(?P<id>\d+)'
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
_TESTS = [{
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
'url': 'https://animationdigitalnetwork.com/video/fruits-basket/9841-episode-1-a-ce-soir',
'md5': '1c9ef066ceb302c86f80c2b371615261',
'info_dict': {
'id': '9841',
@@ -71,10 +71,10 @@ class ADNIE(ADNBaseIE):
},
'skip': 'Only available in French and German speaking Europe',
}, {
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
'url': 'http://animedigitalnetwork.com/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
'only_matching': True,
}, {
'url': 'https://animationdigitalnetwork.de/video/the-eminence-in-shadow/23550-folge-1',
'url': 'https://animationdigitalnetwork.com/de/video/the-eminence-in-shadow/23550-folge-1',
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
'info_dict': {
'id': '23550',
@@ -111,9 +111,9 @@ class ADNIE(ADNBaseIE):
# http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
compat_b64decode(enc_subtitles[24:]),
base64.b64decode(enc_subtitles[24:]),
binascii.unhexlify(self._K + '7fac1178830cfe0c'),
compat_b64decode(enc_subtitles[:24])))
base64.b64decode(enc_subtitles[:24])))
subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False)
if not subtitles_json:
return None
@@ -136,7 +136,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
if start is None or end is None or text is None:
continue
alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0)
ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % (
ssa += os.linesep + 'Dialogue: Marked=0,{},{},Default,,0,0,0,,{}{}'.format(
ass_subtitles_timecode(start),
ass_subtitles_timecode(end),
'{\\a%d}' % alignment if alignment != 2 else '',
@@ -178,7 +178,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
def _real_extract(self, url):
lang, video_id = self._match_valid_url(url).group('lang', 'id')
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/'
player = self._download_json(
video_base_url + 'configuration', video_id,
'Downloading player config JSON metadata',
@@ -218,13 +218,13 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
links_data = self._download_json(
links_url, video_id, 'Downloading links JSON metadata', headers={
'X-Player-Token': authorization,
'X-Target-Distribution': lang,
**self._HEADERS
'X-Target-Distribution': lang or 'fr',
**self._HEADERS,
}, query={
'freeWithAds': 'true',
'adaptive': 'false',
'withMetadata': 'true',
'source': 'Web'
'source': 'Web',
})
break
except ExtractorError as e:
@@ -256,7 +256,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
for quality, load_balancer_url in qualities.items():
load_balancer_data = self._download_json(
load_balancer_url, video_id,
'Downloading %s %s JSON metadata' % (format_id, quality),
f'Downloading {format_id} {quality} JSON metadata',
fatal=False) or {}
m3u8_url = load_balancer_data.get('location')
if not m3u8_url:
@@ -276,7 +276,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
self.raise_login_required('This video requires a subscription', method='password')
video = (self._download_json(
self._API_BASE_URL + 'video/%s' % video_id, video_id,
self._API_BASE_URL + f'video/{video_id}', video_id,
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
show = video.get('show') or {}
@@ -299,9 +299,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
class ADNSeasonIE(ADNBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/(?P<id>[^/?#]+)/?(?:$|[#?])'
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>[^/?#]+)/?(?:$|[#?])'
_TESTS = [{
'url': 'https://animationdigitalnetwork.fr/video/tokyo-mew-mew-new',
'url': 'https://animationdigitalnetwork.com/video/tokyo-mew-mew-new',
'playlist_count': 12,
'info_dict': {
'id': '911',
@@ -319,8 +319,8 @@ class ADNSeasonIE(ADNBaseIE):
episodes = self._download_json(
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
'Downloading episode list', headers={
'X-Target-Distribution': lang,
**self._HEADERS
'X-Target-Distribution': lang or 'fr',
**self._HEADERS,
}, query={
'order': 'asc',
'limit': '-1',
@@ -328,8 +328,8 @@ class ADNSeasonIE(ADNBaseIE):
def entries():
for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
yield self.url_result(
f'https://animationdigitalnetwork.{lang}/video/{video_show_slug}/{episode_id}',
ADNIE, episode_id)
yield self.url_result(join_nonempty(
'https://animationdigitalnetwork.com', lang, 'video',
video_show_slug, episode_id, delim='/'), ADNIE, episode_id)
return self.playlist_result(entries(), show_id, show.get('title'))

View File

@@ -1,8 +1,6 @@
import urllib.parse
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urlparse,
)
class AdobeConnectIE(InfoExtractor):
@@ -12,13 +10,13 @@ class AdobeConnectIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_extract_title(webpage)
qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
qs = urllib.parse.parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
is_live = qs.get('isLive', ['false'])[0] == 'true'
formats = []
for con_string in qs['conStrings'][0].split(','):
formats.append({
'format_id': con_string.split('://')[0],
'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
'app': urllib.parse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
'ext': 'flv',
'play_path': 'mp4:' + qs['streamName'][0],
'rtmp_conn': 'S:' + qs['ticket'][0],

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,6 @@ import functools
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ISO639Utils,
OnDemandPagedList,
@@ -36,7 +35,7 @@ class AdobeTVBaseIE(InfoExtractor):
return subtitles
def _parse_video_data(self, video_data):
video_id = compat_str(video_data['id'])
video_id = str(video_data['id'])
title = video_data['title']
s3_extracted = False
@@ -151,7 +150,7 @@ class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
page += 1
query['page'] = page
for element_data in self._call_api(
self._RESOURCE, display_id, query, 'Download Page %d' % page):
self._RESOURCE, display_id, query, f'Download Page {page}'):
yield self._process_data(element_data)
def _extract_playlist_entries(self, display_id, query):

View File

@@ -91,7 +91,7 @@ class AdultSwimIE(TurnerBaseIE):
getShowBySlug(slug:"%s") {
%%s
}
}''' % show_path
}''' % show_path # noqa: UP031
if episode_path:
query = query % '''title
getVideoBySlug(slug:"%s") {
@@ -128,7 +128,7 @@ class AdultSwimIE(TurnerBaseIE):
episode_title = title = video_data['title']
series = show_data.get('title')
if series:
title = '%s - %s' % (series, title)
title = f'{series} - {title}'
info = {
'id': video_id,
'title': title,
@@ -191,7 +191,7 @@ class AdultSwimIE(TurnerBaseIE):
if not slug:
continue
entries.append(self.url_result(
'http://adultswim.com/videos/%s/%s' % (show_path, slug),
f'http://adultswim.com/videos/{show_path}/{slug}',
'AdultSwim', video.get('_id')))
return self.playlist_result(
entries, show_path, show_data.get('title'),

View File

@@ -73,8 +73,8 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
def _extract_aetn_info(self, domain, filter_key, filter_value, url):
requestor_id, brand = self._DOMAIN_MAP[domain]
result = self._download_json(
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
filter_value, query={'filter[%s]' % filter_key: filter_value})
f'https://feeds.video.aetnd.com/api/v2/{brand}/videos',
filter_value, query={f'filter[{filter_key}]': filter_value})
result = traverse_obj(
result, ('results',
lambda k, v: k == 0 and v[filter_key] == filter_value),
@@ -142,7 +142,7 @@ class AENetworksIE(AENetworksBaseIE):
'skip_download': True,
},
'add_ie': ['ThePlatform'],
'skip': 'Geo-restricted - This content is not available in your location.'
'skip': 'Geo-restricted - This content is not available in your location.',
}, {
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
'info_dict': {
@@ -171,28 +171,28 @@ class AENetworksIE(AENetworksBaseIE):
'skip': 'This video is only available for users of participating TV providers.',
}, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
'only_matching': True
'only_matching': True,
}, {
'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
'only_matching': True
'only_matching': True,
}, {
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
'only_matching': True
'only_matching': True,
}, {
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
'only_matching': True
'only_matching': True,
}, {
'url': 'http://www.history.com/videos/history-of-valentines-day',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
'only_matching': True
'only_matching': True,
}]
def _real_extract(self, url):
@@ -209,14 +209,14 @@ class AENetworksListBaseIE(AENetworksBaseIE):
%s(slug: "%s") {
%s
}
}''' % (resource, slug, fields),
}''' % (resource, slug, fields), # noqa: UP031
}))['data'][resource]
def _real_extract(self, url):
domain, slug = self._match_valid_url(url).groups()
_, brand = self._DOMAIN_MAP[domain]
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
base_url = 'http://watch.%s' % domain
base_url = f'http://watch.{domain}'
entries = []
for item in (playlist.get(self._ITEMS_KEY) or []):
@@ -248,10 +248,10 @@ class AENetworksCollectionIE(AENetworksListBaseIE):
'playlist_mincount': 12,
}, {
'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://www.historyvault.com/collections/mysteryquest',
'only_matching': True
'only_matching': True,
}]
_RESOURCE = 'list'
_ITEMS_KEY = 'items'
@@ -309,7 +309,7 @@ class HistoryTopicIE(AENetworksBaseIE):
'info_dict': {
'id': '40700995724',
'ext': 'mp4',
'title': "History of Valentines Day",
'title': 'History of Valentines Day',
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
'timestamp': 1375819729,
'upload_date': '20130806',
@@ -364,6 +364,6 @@ class BiographyIE(AENetworksBaseIE):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
player_url = self._search_regex(
r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
rf'<phoenix-iframe[^>]+src="({HistoryPlayerIE._VALID_URL})',
webpage, 'player URL')
return self.url_result(player_url, HistoryPlayerIE.ie_key())

View File

@@ -16,8 +16,8 @@ class AeonCoIE(InfoExtractor):
'uploader': 'Semiconductor',
'uploader_id': 'semiconductor',
'uploader_url': 'https://vimeo.com/semiconductor',
'duration': 348
}
'duration': 348,
},
}, {
'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
'md5': '03582d795382e49f2fd0b427b55de409',
@@ -29,8 +29,8 @@ class AeonCoIE(InfoExtractor):
'uploader': 'Aeon Video',
'uploader_id': 'aeonvideo',
'uploader_url': 'https://vimeo.com/aeonvideo',
'duration': 1344
}
'duration': 1344,
},
}, {
'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out',
'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b',

View File

@@ -1,6 +1,7 @@
import functools
from .common import InfoExtractor
from ..networking import Request
from ..utils import (
ExtractorError,
OnDemandPagedList,
@@ -55,9 +56,16 @@ class AfreecaTVBaseIE(InfoExtractor):
if result != 1:
error = _ERRORS.get(result, 'You have failed to log in.')
raise ExtractorError(
'Unable to login: %s said: %s' % (self.IE_NAME, error),
f'Unable to login: {self.IE_NAME} said: {error}',
expected=True)
def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
return self._download_json(Request(
f'https://api.m.afreecatv.com/{endpoint}',
data=data, headers=headers, query=query,
extensions={'legacy_ssl': True}), display_id,
'Downloading API JSON', 'Unable to download API JSON')
class AfreecaTVIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv'
@@ -72,7 +80,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
)\?.*?\bnTitleNo=|
vod\.afreecatv\.com/(PLAYER/STATION|player)/
)
(?P<id>\d+)
(?P<id>\d+)/?(?:$|[?#&])
'''
_TESTS = [{
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
@@ -184,9 +192,9 @@ class AfreecaTVIE(AfreecaTVBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'https://api.m.afreecatv.com/station/video/a/view', video_id,
headers={'Referer': url}, data=urlencode_postdata({
data = self._call_api(
'station/video/a/view', video_id, headers={'Referer': url},
data=urlencode_postdata({
'nTitleNo': video_id,
'nApiLevel': 10,
}))['data']
@@ -227,7 +235,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
**traverse_obj(file_element, {
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
'timestamp': ('file_start', {unified_timestamp}),
})
}),
})
if traverse_obj(data, ('adult_status', {str})) == 'notLogin':
@@ -253,6 +261,43 @@ class AfreecaTVIE(AfreecaTVBaseIE):
return self.playlist_result(entries, video_id, multi_video=True, **common_info)
class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv:catchstory'
IE_DESC = 'afreecatv.com catch story'
_VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P<id>\d+)/catchstory'
_TESTS = [{
'url': 'https://vod.afreecatv.com/player/103247/catchstory',
'info_dict': {
'id': '103247',
},
'playlist_count': 2,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._call_api(
'catchstory/a/view', video_id, headers={'Referer': url},
query={'aStoryListIdx': '', 'nStoryIdx': video_id})
return self.playlist_result(self._entries(data), video_id)
@staticmethod
def _entries(data):
# 'files' is always a list with 1 element
yield from traverse_obj(data, (
'data', lambda _, v: v['story_type'] == 'catch',
'catch_list', lambda _, v: v['files'][0]['file'], {
'id': ('files', 0, 'file_info_key', {str}),
'url': ('files', 0, 'file', {url_or_none}),
'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}),
'title': ('title', {str}),
'uploader': ('writer_nick', {str}),
'uploader_id': ('writer_id', {str}),
'thumbnail': ('thumb', {url_or_none}),
'timestamp': ('write_timestamp', {int_or_none}),
}))
class AfreecaTVLiveIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv:live'
IE_DESC = 'afreecatv.com livestreams'

View File

@@ -168,7 +168,7 @@ class TokFMPodcastIE(InfoExtractor):
for ext in ('aac', 'mp3'):
url_data = self._download_json(
f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}',
media_id, 'Downloading podcast %s URL' % ext)
media_id, f'Downloading podcast {ext} URL')
# prevents inserting the mp3 (default) multiple times
if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']:
formats.append({
@@ -206,8 +206,8 @@ class TokFMAuditionIE(InfoExtractor):
}
@staticmethod
def _create_url(id):
return f'https://audycje.tokfm.pl/audycja/{id}'
def _create_url(video_id):
return f'https://audycje.tokfm.pl/audycja/{video_id}'
def _real_extract(self, url):
audition_id = self._match_id(url)

View File

@@ -26,7 +26,7 @@ class AirTVIE(InfoExtractor):
'view_count': int,
'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg',
'timestamp': 1664792603,
}
},
}, {
# with youtube_id
'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q',
@@ -54,7 +54,7 @@ class AirTVIE(InfoExtractor):
'channel': 'Newsflare',
'duration': 37,
'upload_date': '20180511',
}
},
}]
def _get_formats_and_subtitle(self, json_data, video_id):

View File

@@ -22,7 +22,7 @@ class AitubeKZVideoIE(InfoExtractor):
'timestamp': 1667370519,
'title': 'Ангел хранитель 1 серия',
'channel_follower_count': int,
}
},
}, {
# embed url
'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c',

View File

@@ -1,5 +1,4 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
float_or_none,
try_get,
@@ -44,7 +43,7 @@ class AliExpressLiveIE(InfoExtractor):
'title': title,
'thumbnail': data.get('coverUrl'),
'uploader': try_get(
data, lambda x: x['followBar']['name'], compat_str),
data, lambda x: x['followBar']['name'], str),
'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
'formats': formats,
}

View File

@@ -18,7 +18,7 @@ class AlJazeeraIE(InfoExtractor):
'timestamp': 1636219149,
'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.',
'upload_date': '20211106',
}
},
}, {
'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu',
'info_dict': {
@@ -33,7 +33,7 @@ class AlJazeeraIE(InfoExtractor):
BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P<account>\d+)/(?P<player_id>[a-zA-Z0-9]+)_(?P<embed>[^/]+)/index.html\?videoId=(?P<id>\d+)'
def _real_extract(self, url):
base, post_type, id = self._match_valid_url(url).groups()
base, post_type, display_id = self._match_valid_url(url).groups()
wp = {
'balkans.aljazeera.net': 'ajb',
'chinese.aljazeera.net': 'chinese',
@@ -47,11 +47,11 @@ class AlJazeeraIE(InfoExtractor):
'news': 'news',
}[post_type.split('/')[0]]
video = self._download_json(
f'https://{base}/graphql', id, query={
f'https://{base}/graphql', display_id, query={
'wp-site': wp,
'operationName': 'ArchipelagoSingleArticleQuery',
'variables': json.dumps({
'name': id,
'name': display_id,
'postType': post_type,
}),
}, headers={
@@ -64,7 +64,7 @@ class AlJazeeraIE(InfoExtractor):
embed = 'default'
if video_id is None:
webpage = self._download_webpage(url, id)
webpage = self._download_webpage(url, display_id)
account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id',
group=(1, 2, 3, 4), default=(None, None, None, None))
@@ -73,11 +73,11 @@ class AlJazeeraIE(InfoExtractor):
return {
'_type': 'url_transparent',
'url': url,
'ie_key': 'Generic'
'ie_key': 'Generic',
}
return {
'_type': 'url_transparent',
'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}',
'ie_key': 'BrightcoveNew'
'ie_key': 'BrightcoveNew',
}

View File

@@ -1,5 +1,4 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
qualities,
@@ -95,11 +94,11 @@ class AllocineIE(InfoExtractor):
duration = int_or_none(video.get('duration'))
view_count = int_or_none(video.get('view_count'))
timestamp = unified_timestamp(try_get(
video, lambda x: x['added_at']['date'], compat_str))
video, lambda x: x['added_at']['date'], str))
else:
video_id = display_id
media_data = self._download_json(
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
f'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media={video_id}', display_id)
title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné'))
for key, value in media_data['video'].items():
if not key.endswith('Path'):

View File

@@ -33,27 +33,27 @@ _QUERIES = {
video: getClip(clipIdentifier: $id) {
%s %s
}
}''' % (_FIELDS, _EXTRA_FIELDS),
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
'montage': '''query ($id: String!) {
video: getMontage(clipIdentifier: $id) {
%s
}
}''' % _FIELDS,
}''' % _FIELDS, # noqa: UP031
'Clips': '''query ($page: Int!, $user: String!, $game: Int) {
videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) {
data { %s %s }
}
}''' % (_FIELDS, _EXTRA_FIELDS),
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
'Montages': '''query ($page: Int!, $user: String!) {
videos: montages(search: createdDate, page: $page, user: $user) {
data { %s }
}
}''' % _FIELDS,
}''' % _FIELDS, # noqa: UP031
'Mobile Clips': '''query ($page: Int!, $user: String!) {
videos: clips(search: createdDate, page: $page, user: $user, mobile: true) {
data { %s %s }
}
}''' % (_FIELDS, _EXTRA_FIELDS),
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
}
@@ -121,7 +121,7 @@ class AllstarIE(AllstarBaseIE):
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230425',
'view_count': int,
}
},
}, {
'url': 'https://allstar.gg/clip?clip=8LJLY4JKB',
'info_dict': {
@@ -139,7 +139,7 @@ class AllstarIE(AllstarBaseIE):
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230702',
'view_count': int,
}
},
}, {
'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c',
'info_dict': {
@@ -155,7 +155,7 @@ class AllstarIE(AllstarBaseIE):
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230418',
'view_count': int,
}
},
}, {
'url': 'https://allstar.gg/montage?montage=RILJMH6QOS',
'info_dict': {
@@ -171,7 +171,7 @@ class AllstarIE(AllstarBaseIE):
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230703',
'view_count': int,
}
},
}]
def _real_extract(self, url):
@@ -191,28 +191,28 @@ class AllstarProfileIE(AllstarBaseIE):
'id': '62b8bdfc9021052f7905882d-clips',
'title': 'cherokee - Clips',
},
'playlist_mincount': 15
'playlist_mincount': 15,
}, {
'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-clips-730',
'title': 'cherokee - Clips - 730',
},
'playlist_mincount': 15
'playlist_mincount': 15,
}, {
'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-montages',
'title': 'cherokee - Montages',
},
'playlist_mincount': 4
'playlist_mincount': 4,
}, {
'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-mobile',
'title': 'cherokee - Mobile Clips',
},
'playlist_mincount': 1
'playlist_mincount': 1,
}]
_PAGE_SIZE = 10

View File

@@ -25,7 +25,7 @@ class AlphaPornoIE(InfoExtractor):
'tbr': 1145,
'categories': list,
'age_limit': 18,
}
},
}
def _real_extract(self, url):

View File

@@ -12,7 +12,7 @@ from ..utils import (
class Alsace20TVBaseIE(InfoExtractor):
def _extract_video(self, video_id, url=None):
info = self._download_json(
'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
f'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key={video_id}&habillage=0&mode=html',
video_id) or {}
title = info.get('titre')
@@ -24,9 +24,9 @@ class Alsace20TVBaseIE(InfoExtractor):
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
thumbnail = url_or_none(dict_get(info, ('image', 'preview')) or self._og_search_thumbnail(webpage))
upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
upload_date = unified_strdate(f'20{upload_date[:2]}-{upload_date[2:4]}-{upload_date[4:]}') if upload_date else None
return {
'id': video_id,
'title': title,

View File

@@ -34,7 +34,7 @@ class AltCensoredIE(InfoExtractor):
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
'view_count': int,
'categories': ['News & Politics'],
}
},
}]
def _real_extract(self, url):

View File

@@ -1,7 +1,7 @@
import re
import urllib.parse
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
clean_html,
@@ -21,7 +21,7 @@ class AluraIE(InfoExtractor):
'info_dict': {
'id': '60095',
'ext': 'mp4',
'title': 'Referências, ref-set e alter'
'title': 'Referências, ref-set e alter',
},
'skip': 'Requires alura account credentials'},
{
@@ -30,7 +30,7 @@ class AluraIE(InfoExtractor):
'only_matching': True},
{
'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219',
'only_matching': True}
'only_matching': True},
]
def _real_extract(self, url):
@@ -62,7 +62,7 @@ class AluraIE(InfoExtractor):
return {
'id': video_id,
'title': video_title,
"formats": formats
'formats': formats,
}
def _perform_login(self, username, password):
@@ -91,7 +91,7 @@ class AluraIE(InfoExtractor):
'post url', default=self._LOGIN_URL, group='url')
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url)
response = self._download_webpage(
post_url, None, 'Logging in',
@@ -103,7 +103,7 @@ class AluraIE(InfoExtractor):
r'(?s)<p[^>]+class="alert-message[^"]*">(.+?)</p>',
response, 'error message', default=None)
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError(f'Unable to login: {error}', expected=True)
raise ExtractorError('Unable to log in')
@@ -119,7 +119,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
@classmethod
def suitable(cls, url):
return False if AluraIE.suitable(url) else super(AluraCourseIE, cls).suitable(url)
return False if AluraIE.suitable(url) else super().suitable(url)
def _real_extract(self, url):
@@ -157,7 +157,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
'url': video_url,
'id_key': self.ie_key(),
'chapter': chapter,
'chapter_number': chapter_number
'chapter_number': chapter_number,
}
entries.append(entry)
return self.playlist_result(entries, course_path, course_title)

View File

@@ -24,7 +24,7 @@ class AmadeusTVIE(InfoExtractor):
'display_id': '65091a87ff85af59d9fc54c3',
'view_count': int,
'description': 'md5:a0357b9c215489e2067cbae0b777bb95',
}
},
}]
def _real_extract(self, url):

View File

@@ -25,7 +25,7 @@ class AmaraIE(InfoExtractor):
'uploader': 'PBS NewsHour',
'uploader_id': 'PBSNewsHour',
'timestamp': 1549639570,
}
},
}, {
# Vimeo
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
@@ -40,8 +40,8 @@ class AmaraIE(InfoExtractor):
'timestamp': 1294763658,
'upload_date': '20110111',
'uploader': 'Sam Morrill',
'uploader_id': 'sammorrill'
}
'uploader_id': 'sammorrill',
},
}, {
# Direct Link
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
@@ -55,13 +55,13 @@ class AmaraIE(InfoExtractor):
'subtitles': dict,
'upload_date': '20091007',
'timestamp': 1254942511,
}
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
meta = self._download_json(
'https://amara.org/api/videos/%s/' % video_id,
f'https://amara.org/api/videos/{video_id}/',
video_id, query={'format': 'json'})
title = meta['title']
video_url = meta['all_urls'][0]

View File

@@ -61,13 +61,13 @@ class AmazonStoreIE(InfoExtractor):
}]
def _real_extract(self, url):
id = self._match_id(url)
playlist_id = self._match_id(url)
for retry in self.RetryManager():
webpage = self._download_webpage(url, id)
webpage = self._download_webpage(url, playlist_id)
try:
data_json = self._search_json(
r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id,
r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', playlist_id,
transform_source=js_to_json)
except ExtractorError as e:
retry.error = e
@@ -81,7 +81,7 @@ class AmazonStoreIE(InfoExtractor):
'height': int_or_none(video.get('videoHeight')),
'width': int_or_none(video.get('videoWidth')),
} for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')]
return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title'))
return self.playlist_result(entries, playlist_id=playlist_id, playlist_title=data_json.get('title'))
class AmazonReviewsIE(InfoExtractor):

View File

@@ -25,7 +25,7 @@ class AmazonMiniTVBaseIE(InfoExtractor):
asin, note=note, headers={
'Content-Type': 'application/json',
'currentpageurl': '/',
'currentplatform': 'dWeb'
'currentplatform': 'dWeb',
}, data=json.dumps(data).encode() if data else None,
query=None if data else {
'deviceType': 'A1WMMUXPCUJL4N',

View File

@@ -64,8 +64,8 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
site, display_id = self._match_valid_url(url).groups()
requestor_id = self._REQUESTOR_ID_MAP[site]
page_data = self._download_json(
'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s'
% (requestor_id.lower(), display_id), display_id)['data']
f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}',
display_id)['data']
properties = page_data.get('properties') or {}
query = {
'mbr': 'true',
@@ -76,15 +76,15 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
try:
for v in page_data['children']:
if v.get('type') == 'video-player':
releasePid = v['properties']['currentVideo']['meta']['releasePid']
tp_path = 'M_UwQC/' + releasePid
release_pid = v['properties']['currentVideo']['meta']['releasePid']
tp_path = 'M_UwQC/' + release_pid
media_url = 'https://link.theplatform.com/s/' + tp_path
video_player_count += 1
except KeyError:
pass
if video_player_count > 1:
self.report_warning(
'The JSON data has %d video players. Only one will be extracted' % video_player_count)
f'The JSON data has {video_player_count} video players. Only one will be extracted')
# Fall back to videoPid if releasePid not found.
# TODO: Fall back to videoPid if releasePid manifest uses DRM.
@@ -131,7 +131,7 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
})
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
if ns_keys:
ns = list(ns_keys)[0]
ns = next(iter(ns_keys))
episode = theplatform_metadata.get(ns + '$episodeTitle') or None
episode_number = int_or_none(
theplatform_metadata.get(ns + '$episode'))

View File

@@ -87,13 +87,13 @@ class AmericasTestKitchenIE(InfoExtractor):
resource_type = 'episodes'
resource = self._download_json(
'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id)
f'https://www.americastestkitchen.com/api/v6/{resource_type}/{video_id}', video_id)
video = resource['video'] if is_episode else resource
episode = resource if is_episode else resource.get('episode') or {}
return {
'_type': 'url_transparent',
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
'url': 'https://player.zype.com/embed/{}.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ'.format(video['zypeId']),
'ie_key': 'Zype',
'description': clean_html(video.get('description')),
'timestamp': unified_timestamp(video.get('publishDate')),
@@ -174,22 +174,22 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
]
if season_number:
playlist_id = 'season_%d' % season_number
playlist_title = 'Season %d' % season_number
playlist_id = f'season_{season_number}'
playlist_title = f'Season {season_number}'
facet_filters.append('search_season_list:' + playlist_title)
else:
playlist_id = show
playlist_title = title
season_search = self._download_json(
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
f'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_{slug}_season_desc_production',
playlist_id, headers={
'Origin': 'https://www.americastestkitchen.com',
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
'X-Algolia-Application-Id': 'Y1FNZXUI30',
}, query={
'facetFilters': json.dumps(facet_filters),
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
'attributesToRetrieve': f'description,search_{slug}_episode_number,search_document_date,search_url,title,search_atk_episode_season',
'attributesToHighlight': '',
'hitsPerPage': 1000,
})
@@ -207,7 +207,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
'description': episode.get('description'),
'timestamp': unified_timestamp(episode.get('search_document_date')),
'season_number': season_number,
'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
'episode_number': int_or_none(episode.get(f'search_{slug}_episode_number')),
'ie_key': AmericasTestKitchenIE.ie_key(),
}

View File

@@ -19,12 +19,12 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
'Unable to download Akamai AMP feed', transform_source=strip_jsonp)
item = feed.get('channel', {}).get('item')
if not item:
raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
raise ExtractorError('{} said: {}'.format(self.IE_NAME, feed['error']))
video_id = item['guid']
def get_media_node(name, default=None):
media_name = 'media-%s' % name
media_name = f'media-{name}'
media_group = item.get('media-group') or item
return media_group.get(media_name) or item.get(media_name) or item.get(name, default)

View File

@@ -29,7 +29,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
'release_date': '20230121',
'release_timestamp': 1674285179,
'episode_id': 'e1tpt3d',
}
},
}, {
# embed url
'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd',
@@ -50,7 +50,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
'season': 'Season 2',
'season_number': 2,
'episode_id': 'e1shjqd',
}
},
}]
_WEBPAGE_TESTS = [{
@@ -72,7 +72,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
'uploader': 'Podcast Tempo',
'channel': 'apakatatempo',
}
},
}]
def _real_extract(self, url):

View File

@@ -15,8 +15,8 @@ class AngelIE(InfoExtractor):
'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons',
'description': 'md5:73b704897c20ab59c433a9c0a8202d5e',
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
'duration': 1359.0
}
'duration': 1359.0,
},
}, {
'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name',
'md5': 'e4774bad0a5f0ad2e90d175cafdb797d',
@@ -26,8 +26,8 @@ class AngelIE(InfoExtractor):
'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name',
'description': 'md5:aadfb4827a94415de5ff6426e6dee3be',
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
'duration': 3276.0
}
'duration': 3276.0,
},
}]
def _real_extract(self, url):
@@ -44,7 +44,7 @@ class AngelIE(InfoExtractor):
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'formats': formats,
'subtitles': subtitles
'subtitles': subtitles,
}
# Angel uses cloudinary in the background and supports image transformations.

View File

@@ -105,7 +105,7 @@ class Ant1NewsGrArticleIE(AntennaBaseIE):
info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle')
embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage))
if not embed_urls:
raise ExtractorError('no videos found for %s' % video_id, expected=True)
raise ExtractorError(f'no videos found for {video_id}', expected=True)
return self.playlist_from_matches(
embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(),
video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})

View File

@@ -238,7 +238,7 @@ class AnvatoIE(InfoExtractor):
'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900',
'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99',
'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe',
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
}
def _generate_nfl_token(self, anvack, mcp_id):
@@ -255,7 +255,7 @@ class AnvatoIE(InfoExtractor):
token
}
}
}''' % (anvack, mcp_id),
}''' % (anvack, mcp_id), # noqa: UP031
}).encode(), headers={
'Authorization': auth_token,
'Content-Type': 'application/json',
@@ -299,7 +299,7 @@ class AnvatoIE(InfoExtractor):
return self._download_json(
video_data_url, video_id, transform_source=strip_jsonp, query=query,
data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8'))
data=json.dumps({'api': api}, separators=(',', ':')).encode())
def _get_anvato_videos(self, access_key, video_id, token):
video_data = self._get_video_json(access_key, video_id, token)
@@ -358,7 +358,7 @@ class AnvatoIE(InfoExtractor):
for caption in video_data.get('captions', []):
a_caption = {
'url': caption['url'],
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None,
}
subtitles.setdefault(caption['language'], []).append(a_caption)
subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs)

View File

@@ -30,7 +30,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
'params': {
# m3u8 download
'skip_download': True,
}
},
}, {
# video with vidible ID
'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/',
@@ -46,7 +46,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
'params': {
# m3u8 download
'skip_download': True,
}
},
}, {
'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/',
'only_matching': True,
@@ -83,10 +83,10 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
return self._extract_yahoo_video(video_id, 'us')
response = self._download_json(
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
f'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/{video_id}/details',
video_id)['response']
if response['statusText'] != 'Ok':
raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True)
raise ExtractorError('{} said: {}'.format(self.IE_NAME, response['statusText']), expected=True)
video_data = response['data']
formats = []

View File

@@ -34,7 +34,7 @@ class APAIE(InfoExtractor):
video_id, base_url = mobj.group('id', 'base_url')
webpage = self._download_webpage(
'%s/player/%s' % (base_url, video_id), video_id)
f'{base_url}/player/{video_id}', video_id)
jwplatform_id = self._search_regex(
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
@@ -47,7 +47,7 @@ class APAIE(InfoExtractor):
def extract(field, name=None):
return self._search_regex(
r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field,
rf'\b{field}["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
webpage, name or field, default=None, group='value')
title = extract('title') or video_id

View File

@@ -24,7 +24,7 @@ class ApplePodcastsIE(InfoExtractor):
'duration': 6454,
'series': 'The Tim Dillon Show',
'thumbnail': 're:.+[.](png|jpe?g|webp)',
}
},
}, {
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
'only_matching': True,

View File

@@ -1,8 +1,8 @@
import json
import re
import urllib.parse
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
parse_duration,
@@ -64,7 +64,7 @@ class AppleTrailersIE(InfoExtractor):
'uploader_id': 'wb',
},
},
]
],
}, {
'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
'info_dict': {
@@ -99,7 +99,7 @@ class AppleTrailersIE(InfoExtractor):
webpage = self._download_webpage(url, movie)
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
film_data = self._download_json(
'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
f'http://trailers.apple.com/trailers/feeds/data/{film_id}.json',
film_id, fatal=False)
if film_data:
@@ -114,7 +114,7 @@ class AppleTrailersIE(InfoExtractor):
if not src:
continue
formats.append({
'format_id': '%s-%s' % (version, size),
'format_id': f'{version}-{size}',
'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
'width': int_or_none(size_data.get('width')),
'height': int_or_none(size_data.get('height')),
@@ -134,7 +134,7 @@ class AppleTrailersIE(InfoExtractor):
page_data = film_data.get('page', {})
return self.playlist_result(entries, film_id, page_data.get('movie_title'))
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
playlist_url = urllib.parse.urljoin(url, 'includes/playlists/itunes.inc')
def fix_html(s):
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
@@ -143,10 +143,9 @@ class AppleTrailersIE(InfoExtractor):
# like: http://trailers.apple.com/trailers/wb/gravity/
def _clean_json(m):
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
return 'iTunes.playURL({});'.format(m.group(1).replace('\'', '&#39;'))
s = re.sub(self._JSON_RE, _clean_json, s)
s = '<html>%s</html>' % s
return s
return f'<html>{s}</html>'
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
playlist = []
@@ -170,18 +169,18 @@ class AppleTrailersIE(InfoExtractor):
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
settings_json_url = urllib.parse.urljoin(url, f'includes/settings/{trailer_id}.json')
settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
formats = []
for format in settings['metadata']['sizes']:
for fmt in settings['metadata']['sizes']:
# The src is a file pointing to the real video file
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', fmt['src'])
formats.append({
'url': format_url,
'format': format['type'],
'width': int_or_none(format['width']),
'height': int_or_none(format['height']),
'format': fmt['type'],
'width': int_or_none(fmt['width']),
'height': int_or_none(fmt['height']),
})
playlist.append({
@@ -229,7 +228,7 @@ class AppleTrailersSectionIE(InfoExtractor):
'title': 'Movie Studios',
},
}
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS)
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>{})'.format('|'.join(_SECTIONS))
_TESTS = [{
'url': 'http://trailers.apple.com/#section=justadded',
'info_dict': {
@@ -270,7 +269,7 @@ class AppleTrailersSectionIE(InfoExtractor):
def _real_extract(self, url):
section = self._match_id(url)
section_data = self._download_json(
'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'],
'http://trailers.apple.com/trailers/home/feeds/{}.json'.format(self._SECTIONS[section]['feed_path']),
section)
entries = [
self.url_result('http://trailers.apple.com' + e['location'])

View File

@@ -1,10 +1,11 @@
from __future__ import annotations
import json
import re
import urllib.parse
from .common import InfoExtractor
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
from ..compat import compat_urllib_parse_unquote
from ..networking import HEADRequest
from ..networking.exceptions import HTTPError
from ..utils import (
@@ -145,7 +146,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': 'Bells Of Rostov',
'ext': 'mp3',
},
'skip': 'restricted'
'skip': 'restricted',
}, {
'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3',
'md5': '1d0aabe03edca83ca58d9ed3b493a3c3',
@@ -158,7 +159,7 @@ class ArchiveOrgIE(InfoExtractor):
'description': 'md5:012b2d668ae753be36896f343d12a236',
'upload_date': '20190928',
},
'skip': 'restricted'
'skip': 'restricted',
}, {
# Original formats are private
'url': 'https://archive.org/details/irelandthemakingofarepublic',
@@ -202,8 +203,8 @@ class ArchiveOrgIE(InfoExtractor):
'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg',
'display_id': 'irelandthemakingofarepublicreel2.mov',
},
}
]
},
],
}]
@staticmethod
@@ -220,7 +221,7 @@ class ArchiveOrgIE(InfoExtractor):
def _real_extract(self, url):
video_id = urllib.parse.unquote_plus(self._match_id(url))
identifier, entry_id = (video_id.split('/', 1) + [None])[:2]
identifier, _, entry_id = video_id.partition('/')
# Archive.org metadata API doesn't clearly demarcate playlist entries
# or subtitle tracks, so we get them from the embeddable player.
@@ -246,7 +247,7 @@ class ArchiveOrgIE(InfoExtractor):
if track['kind'] != 'subtitles':
continue
entries[p['orig']][track['label']] = {
'url': 'https://archive.org/' + track['file'].lstrip('/')
'url': 'https://archive.org/' + track['file'].lstrip('/'),
}
metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier)
@@ -293,7 +294,9 @@ class ArchiveOrgIE(InfoExtractor):
'height': int_or_none(f.get('width')),
'filesize': int_or_none(f.get('size'))})
extension = (f['name'].rsplit('.', 1) + [None])[1]
_, has_ext, extension = f['name'].rpartition('.')
if not has_ext:
extension = None
# We don't want to skip private formats if the user has access to them,
# however without access to an account with such privileges we can't implement/test this.
@@ -308,7 +311,7 @@ class ArchiveOrgIE(InfoExtractor):
'filesize': int_or_none(f.get('size')),
'protocol': 'https',
'source_preference': 0 if f.get('source') == 'original' else -1,
'format_note': f.get('source')
'format_note': f.get('source'),
})
for entry in entries.values():
@@ -371,7 +374,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader_url': 'https://www.youtube.com/user/Zeurel',
'thumbnail': r're:https?://.*\.(jpg|webp)',
'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg',
}
},
}, {
# Internal link
'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0',
@@ -388,7 +391,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader_url': 'https://www.youtube.com/user/1veritasium',
'thumbnail': r're:https?://.*\.(jpg|webp)',
'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA',
}
},
}, {
# Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description.
# Should use the date in the link. Title ends with '- Youtube'. Capture has description in eow-description
@@ -403,8 +406,8 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader_id': 'machinima',
'uploader_url': 'https://www.youtube.com/user/machinima',
'thumbnail': r're:https?://.*\.(jpg|webp)',
'uploader': 'machinima'
}
'uploader': 'machinima',
},
}, {
# FLV video. Video file URL does not provide itag information
'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw',
@@ -421,7 +424,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A',
'thumbnail': r're:https?://.*\.(jpg|webp)',
'uploader': 'jawed',
}
},
}, {
'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA',
'info_dict': {
@@ -437,7 +440,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader_url': 'https://www.youtube.com/user/itsmadeon',
'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w',
'thumbnail': r're:https?://.*\.(jpg|webp)',
}
},
}, {
# First capture is of dead video, second is the oldest from CDX response.
'url': 'https://web.archive.org/https://www.youtube.com/watch?v=1JYutPM8O6E',
@@ -454,7 +457,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA',
'thumbnail': r're:https?://.*\.(jpg|webp)',
'uploader': 'ETC News',
}
},
}, {
# First capture of dead video, capture date in link links to dead capture.
'url': 'https://web.archive.org/web/20180803221945/https://www.youtube.com/watch?v=6FPhZJGvf4E',
@@ -473,15 +476,15 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader': 'ETC News',
},
'expected_warnings': [
r'unable to download capture webpage \(it may not be archived\)'
]
r'unable to download capture webpage \(it may not be archived\)',
],
}, { # Very old YouTube page, has - YouTube in title.
'url': 'http://web.archive.org/web/20070302011044/http://youtube.com/watch?v=-06-KB9XTzg',
'info_dict': {
'id': '-06-KB9XTzg',
'ext': 'flv',
'title': 'New Coin Hack!! 100% Safe!!'
}
'title': 'New Coin Hack!! 100% Safe!!',
},
}, {
'url': 'web.archive.org/https://www.youtube.com/watch?v=dWW7qP423y8',
'info_dict': {
@@ -495,7 +498,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc',
'thumbnail': r're:https?://.*\.(jpg|webp)',
'uploader': 'DankPods',
}
},
}, {
# player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093
'url': 'https://web.archive.org/web/20200827003909if_/http://www.youtube.com/watch?v=6Dh-RL__uN4',
@@ -512,7 +515,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader_id': 'PewDiePie',
'uploader_url': 'https://www.youtube.com/user/PewDiePie',
'thumbnail': r're:https?://.*\.(jpg|webp)',
}
},
}, {
# ~June 2010 Capture. swfconfig
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y',
@@ -527,7 +530,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'thumbnail': r're:https?://.*\.(jpg|webp)',
'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks',
'upload_date': '20090520',
}
},
}, {
# Jan 2011: watch-video-date/eow-date surrounded by whitespace
'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc',
@@ -542,7 +545,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'thumbnail': r're:https?://.*\.(jpg|webp)',
'duration': 132,
'uploader_url': 'https://www.youtube.com/user/claybutlermusic',
}
},
}, {
# ~May 2009 swfArgs. ytcfg is spread out over various vars
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY',
@@ -557,7 +560,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'description': 'md5:4ca77d79538064e41e4cc464e93f44f0',
'thumbnail': r're:https?://.*\.(jpg|webp)',
'duration': 754,
}
},
}, {
# ~June 2012. Upload date is in another lang so cannot extract.
'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA',
@@ -571,7 +574,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader': 'BlackNerdComedy',
'duration': 182,
'thumbnail': r're:https?://.*\.(jpg|webp)',
}
},
}, {
# ~July 2013
'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM',
@@ -587,7 +590,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ',
'upload_date': '20060428',
'uploader': 'punkybird',
}
},
}, {
# April 2020: Player response in player config
'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en',
@@ -604,7 +607,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'thumbnail': r're:https?://.*\.(jpg|webp)',
'description': 'md5:c625bb3c02c4f5fb4205971e468fa341',
'uploader_url': 'https://www.youtube.com/user/GameGrumps',
}
},
}, {
# watch7-user-header with yt-user-info
'url': 'ytarchive:kbh4T_b4Ixw:20160307085057',
@@ -619,7 +622,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'thumbnail': r're:https?://.*\.(jpg|webp)',
'upload_date': '20150503',
'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA',
}
},
}, {
# April 2012
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU',
@@ -634,35 +637,35 @@ class YoutubeWebArchiveIE(InfoExtractor):
'duration': 200,
'upload_date': '20120407',
'uploader_id': 'thecomputernerd01',
}
},
}, {
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://web.archive.org/web/20050214000000_if/http://www.youtube.com/watch?v=0altSZ96U4M',
'only_matching': True
'only_matching': True,
}, {
# Video not archived, only capture is unavailable video page
'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10',
'only_matching': True
'only_matching': True,
}, { # Encoded url
'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://web.archive.org/web/20060527081937/http://www.youtube.com:80/watch.php?v=ELTFsLT73fA&amp;search=soccer',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg',
'only_matching': True
'only_matching': True,
}, {
'url': 'ytarchive:BaW_jenozKc:20050214000000',
'only_matching': True
'only_matching': True,
}, {
'url': 'ytarchive:BaW_jenozKc',
'only_matching': True
'only_matching': True,
},
]
_YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
@@ -673,13 +676,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
_YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers
_YT_ALL_THUMB_SERVERS = orderedSet(
_YT_DEFAULT_THUMB_SERVERS + ['img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(0, 5), 9)]])
[*_YT_DEFAULT_THUMB_SERVERS, 'img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(5), 9)]])
_WAYBACK_BASE_URL = 'https://web.archive.org/web/%sif_/'
_OLDEST_CAPTURE_DATE = 20050214000000
_NEWEST_CAPTURE_DATE = 20500101000000
def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False):
def _call_cdx_api(self, item_id, url, filters: list | None = None, collapse: list | None = None, query: dict | None = None, note=None, fatal=False):
# CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
query = {
'url': url,
@@ -688,14 +691,14 @@ class YoutubeWebArchiveIE(InfoExtractor):
'limit': 500,
'filter': ['statuscode:200'] + (filters or []),
'collapse': collapse or [],
**(query or {})
**(query or {}),
}
res = self._download_json(
'https://web.archive.org/cdx/search/cdx', item_id,
note or 'Downloading CDX API JSON', query=query, fatal=fatal)
if isinstance(res, list) and len(res) >= 2:
# format response to make it easier to use
return list(dict(zip(res[0], v)) for v in res[1:])
return [dict(zip(res[0], v)) for v in res[1:]]
elif not isinstance(res, list) or len(res) != 0:
self.report_warning('Error while parsing CDX API response' + bug_reports_message())
@@ -852,7 +855,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
{
'url': (self._WAYBACK_BASE_URL % (int_or_none(thumbnail_dict.get('timestamp')) or self._OLDEST_CAPTURE_DATE)) + thumbnail_dict.get('original'),
'filesize': int_or_none(thumbnail_dict.get('length')),
'preference': int_or_none(thumbnail_dict.get('length'))
'preference': int_or_none(thumbnail_dict.get('length')),
} for thumbnail_dict in response)
if not try_all:
break
@@ -893,7 +896,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
for retry in retry_manager:
try:
urlh = self._request_webpage(
HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id),
HEADRequest(f'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{video_id}'),
video_id, note='Fetching archived video file url', expected_status=True)
except ExtractorError as e:
# HTTP Error 404 is expected if the video is not saved.
@@ -924,21 +927,21 @@ class YoutubeWebArchiveIE(InfoExtractor):
info['thumbnails'] = self._extract_thumbnails(video_id)
if urlh:
url = compat_urllib_parse_unquote(urlh.url)
url = urllib.parse.unquote(urlh.url)
video_file_url_qs = parse_qs(url)
# Attempt to recover any ext & format info from playback url & response headers
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
fmt = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
itag = try_get(video_file_url_qs, lambda x: x['itag'][0])
if itag and itag in YoutubeIE._formats:
format.update(YoutubeIE._formats[itag])
format.update({'format_id': itag})
fmt.update(YoutubeIE._formats[itag])
fmt.update({'format_id': itag})
else:
mime = try_get(video_file_url_qs, lambda x: x['mime'][0])
ext = (mimetype2ext(mime)
or urlhandle_detect_ext(urlh)
or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type')))
format.update({'ext': ext})
info['formats'] = [format]
fmt.update({'ext': ext})
info['formats'] = [fmt]
if not info.get('duration'):
info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0]))

View File

@@ -4,6 +4,7 @@ from .common import InfoExtractor
from ..utils import (
extract_attributes,
int_or_none,
join_nonempty,
parse_iso8601,
try_get,
)
@@ -11,7 +12,7 @@ from ..utils import (
class ArcPublishingIE(InfoExtractor):
_UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
_VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX
_VALID_URL = rf'arcpublishing:(?P<org>[a-z]+):(?P<id>{_UUID_REGEX})'
_TESTS = [{
# https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/
'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
@@ -74,12 +75,12 @@ class ArcPublishingIE(InfoExtractor):
def _extract_embed_urls(cls, url, webpage):
entries = []
# https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage):
for powa_el in re.findall(rf'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="{ArcPublishingIE._UUID_REGEX}"[^>]*>)', webpage):
powa = extract_attributes(powa_el) or {}
org = powa.get('data-org')
uuid = powa.get('data-uuid')
if org and uuid:
entries.append('arcpublishing:%s:%s' % (org, uuid))
entries.append(f'arcpublishing:{org}:{uuid}')
return entries
def _real_extract(self, url):
@@ -122,7 +123,7 @@ class ArcPublishingIE(InfoExtractor):
elif stream_type in ('ts', 'hls'):
m3u8_formats = self._extract_m3u8_formats(
s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
if all([f.get('acodec') == 'none' for f in m3u8_formats]):
if all(f.get('acodec') == 'none' for f in m3u8_formats):
continue
for f in m3u8_formats:
height = f.get('height')
@@ -136,7 +137,7 @@ class ArcPublishingIE(InfoExtractor):
else:
vbr = int_or_none(s.get('bitrate'))
formats.append({
'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type,
'format_id': join_nonempty(stream_type, vbr),
'vbr': vbr,
'width': int_or_none(s.get('width')),
'height': int_or_none(s.get('height')),

View File

@@ -85,7 +85,7 @@ class ARDMediathekBaseIE(InfoExtractor):
formats.extend(self._extract_f4m_formats(
update_url_query(stream_url, {
'hdcore': '3.1.1',
'plugin': 'aasp-3.1.1.69.124'
'plugin': 'aasp-3.1.1.69.124',
}), video_id, f4m_id='hds', fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
@@ -96,12 +96,12 @@ class ARDMediathekBaseIE(InfoExtractor):
f = {
'url': server,
'play_path': stream_url,
'format_id': 'a%s-rtmp-%s' % (num, quality),
'format_id': f'a{num}-rtmp-{quality}',
}
else:
f = {
'url': stream_url,
'format_id': 'a%s-%s-%s' % (num, ext, quality)
'format_id': f'a{num}-{ext}-{quality}',
}
m = re.search(
r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$',

View File

@@ -64,7 +64,7 @@ class ArkenaIE(InfoExtractor):
raise ExtractorError('Invalid URL', expected=True)
media = self._download_json(
'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id),
f'https://video.qbrick.com/api/v1/public/accounts/{account_id}/medias/{video_id}',
video_id, query={
# https://video.qbrick.com/docs/api/examples/library-api.html
'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags',
@@ -131,8 +131,8 @@ class ArkenaIE(InfoExtractor):
formats.extend(self._extract_f4m_formats(
href, video_id, f4m_id='hds', fatal=False))
elif mime_type == 'application/dash+xml':
formats.extend(self._extract_f4m_formats(
href, video_id, f4m_id='hds', fatal=False))
formats.extend(self._extract_mpd_formats(
href, video_id, mpd_id='dash', fatal=False))
elif mime_type == 'application/vnd.ms-sstr+xml':
formats.extend(self._extract_ism_formats(
href, video_id, ism_id='mss', fatal=False))

View File

@@ -1,8 +1,6 @@
import urllib.parse
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import (
float_or_none,
format_field,
@@ -35,7 +33,7 @@ class ArnesIE(InfoExtractor):
'view_count': int,
'tags': ['linearna_algebra'],
'start_time': 10,
}
},
}, {
'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
'only_matching': True,
@@ -93,6 +91,6 @@ class ArnesIE(InfoExtractor):
'duration': float_or_none(video.get('duration'), 1000),
'view_count': int_or_none(video.get('views')),
'tags': video.get('hashtags'),
'start_time': int_or_none(compat_parse_qs(
compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
'start_time': int_or_none(urllib.parse.parse_qs(
urllib.parse.urlparse(url).query).get('t', [None])[0]),
}

View File

@@ -153,7 +153,7 @@ class Art19IE(InfoExtractor):
'series_id': ('series_id', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'release_timestamp': ('released_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601})
'modified_timestamp': ('updated_at', {parse_iso8601}),
})),
**traverse_obj(rss_metadata, ('content', {
'title': ('episode_title', {str}),

View File

@@ -20,15 +20,15 @@ class ArteTVBaseIE(InfoExtractor):
class ArteTVIE(ArteTVBaseIE):
_VALID_URL = r'''(?x)
_VALID_URL = rf'''(?x)
(?:https?://
(?:
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos|
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>{ArteTVBaseIE._ARTE_LANGUAGES})
)
|arte://program)
/(?P<id>\d{6}-\d{3}-[AF]|LIVE)
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
/(?P<id>\d{{6}}-\d{{3}}-[AF]|LIVE)
'''
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
'only_matching': True,
@@ -145,7 +145,7 @@ class ArteTVIE(ArteTVBaseIE):
language_code = self._LANG_MAP.get(lang)
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={
'x-validated-age': '18'
'x-validated-age': '18',
})
geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {}
@@ -247,7 +247,7 @@ class ArteTVEmbedIE(InfoExtractor):
'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
'upload_date': '20201116',
},
'skip': 'No video available'
'skip': 'No video available',
}, {
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
'only_matching': True,
@@ -262,7 +262,7 @@ class ArteTVEmbedIE(InfoExtractor):
class ArteTVPlaylistIE(ArteTVBaseIE):
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
_VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P<id>RC-\d{{6}})'
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
'only_matching': True,
@@ -298,7 +298,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
class ArteTVCategoryIE(ArteTVBaseIE):
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
_VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$'
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/politics-and-society/',
'info_dict': {
@@ -312,7 +312,7 @@ class ArteTVCategoryIE(ArteTVBaseIE):
@classmethod
def suitable(cls, url):
return (
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE))
and super().suitable(url))
def _real_extract(self, url):
@@ -321,12 +321,12 @@ class ArteTVCategoryIE(ArteTVBaseIE):
items = []
for video in re.finditer(
r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
rf'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/{lang}/videos/[\w/-]+)(?P=q)',
webpage):
video = video.group('url')
if video == url:
continue
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE)):
items.append(video)
title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None

View File

@@ -20,7 +20,7 @@ class AtresPlayerIE(InfoExtractor):
'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
'duration': 3413,
},
'skip': 'This video is only available for registered users'
'skip': 'This video is only available for registered users',
},
{
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
@@ -33,14 +33,6 @@ class AtresPlayerIE(InfoExtractor):
]
_API_BASE = 'https://api.atresplayer.com/'
def _handle_error(self, e, code):
if isinstance(e.cause, HTTPError) and e.cause.status == code:
error = self._parse_json(e.cause.response.read(), None)
if error.get('error') == 'required_registered':
self.raise_login_required()
raise ExtractorError(error['error_description'], expected=True)
raise
def _perform_login(self, username, password):
self._request_webpage(
self._API_BASE + 'login', None, 'Downloading login page')
@@ -49,13 +41,15 @@ class AtresPlayerIE(InfoExtractor):
target_url = self._download_json(
'https://account.atresmedia.com/api/login', None,
'Logging in', headers={
'Content-Type': 'application/x-www-form-urlencoded'
'Content-Type': 'application/x-www-form-urlencoded',
}, data=urlencode_postdata({
'username': username,
'password': password,
}))['targetUrl']
except ExtractorError as e:
self._handle_error(e, 400)
if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise ExtractorError('Invalid username and/or password', expected=True)
raise
self._request_webpage(target_url, None, 'Following Target URL')
@@ -66,7 +60,12 @@ class AtresPlayerIE(InfoExtractor):
episode = self._download_json(
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
except ExtractorError as e:
self._handle_error(e, 403)
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
error = self._parse_json(e.cause.response.read(), None)
if error.get('error') == 'required_registered':
self.raise_login_required()
raise ExtractorError(error['error_description'], expected=True)
raise
title = episode['titulo']

View File

@@ -12,7 +12,7 @@ class AtScaleConfEventIE(InfoExtractor):
'info_dict': {
'id': 'data-scale-spring-2022',
'title': 'Data @Scale Spring 2022',
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55',
},
}, {
'url': 'https://atscaleconference.com/events/video-scale-2021/',
@@ -20,15 +20,15 @@ class AtScaleConfEventIE(InfoExtractor):
'info_dict': {
'id': 'video-scale-2021',
'title': 'Video @Scale 2021',
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55',
},
}]
def _real_extract(self, url):
id = self._match_id(url)
webpage = self._download_webpage(url, id)
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
return self.playlist_from_matches(
re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage),
ie='Generic', playlist_id=id,
ie='Generic', playlist_id=playlist_id,
title=self._og_search_title(webpage), description=self._og_search_description(webpage))

View File

@@ -19,7 +19,7 @@ class ATVAtIE(InfoExtractor):
'id': 'v-ce9cgn1e70n5-1',
'ext': 'mp4',
'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen',
}
},
}, {
'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1',
'only_matching': True,
@@ -66,10 +66,10 @@ class ATVAtIE(InfoExtractor):
video_id=video_id)
video_title = json_data['views']['default']['page']['title']
contentResource = json_data['views']['default']['page']['contentResource']
content_id = contentResource[0]['id']
content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']}
for id, content in enumerate(contentResource)]
content_resource = json_data['views']['default']['page']['contentResource']
content_id = content_resource[0]['id']
content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']}
for id_, content in enumerate(content_resource)]
time_of_request = dt.datetime.now()
not_before = time_of_request - dt.timedelta(minutes=5)
@@ -87,17 +87,17 @@ class ATVAtIE(InfoExtractor):
videos = self._download_json(
'https://vas-v4.p7s1video.net/4.0/getsources',
content_id, 'Downloading videos JSON', query={
'token': jwt_token.decode('utf-8')
'token': jwt_token.decode('utf-8'),
})
video_id, videos_data = list(videos['data'].items())[0]
video_id, videos_data = next(iter(videos['data'].items()))
error_msg = try_get(videos_data, lambda x: x['error']['title'])
if error_msg == 'Geo check failed':
self.raise_geo_restricted(error_msg)
elif error_msg:
raise ExtractorError(error_msg)
entries = [
self._extract_video_info(url, contentResource[video['id']], video)
self._extract_video_info(url, content_resource[video['id']], video)
for video in videos_data]
return {

View File

@@ -19,7 +19,7 @@ class AudiMediaIE(InfoExtractor):
'timestamp': 1448354940,
'duration': 74022,
'view_count': int,
}
},
}, {
'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991',
'only_matching': True,
@@ -73,7 +73,7 @@ class AudiMediaIE(InfoExtractor):
bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)
if bitrate:
f.update({
'format_id': 'http-%s' % bitrate,
'format_id': f'http-{bitrate}',
})
formats.append(f)

View File

@@ -15,7 +15,7 @@ class AudioBoomIE(InfoExtractor):
'duration': 4000.99,
'uploader': 'Sue Perkins: An hour or so with...',
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
}
},
}, { # Direct mp3-file link
'url': 'https://audioboom.com/posts/8128496.mp3',
'md5': 'e329edf304d450def95c7f86a9165ee1',
@@ -27,7 +27,7 @@ class AudioBoomIE(InfoExtractor):
'duration': 1689.7,
'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race',
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904',
}
},
}, {
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
'only_matching': True,

View File

@@ -9,7 +9,7 @@ class AudiodraftBaseIE(InfoExtractor):
headers={
'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest',
}, data=f'id={player_entry_id}'.encode('utf-8'))
}, data=f'id={player_entry_id}'.encode())
return {
'id': str(data_json['entry_id']),
@@ -65,9 +65,10 @@ class AudiodraftCustomIE(AudiodraftBaseIE):
}]
def _real_extract(self, url):
id = self._match_id(url)
webpage = self._download_webpage(url, id)
player_entry_id = self._search_regex(r'playAudio\(\'(player_entry_\d+)\'\);', webpage, id, 'play entry id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player_entry_id = self._search_regex(
r'playAudio\(\'(player_entry_\d+)\'\);', webpage, video_id, 'play entry id')
return self._audiodraft_extract_from_id(player_entry_id)
@@ -89,5 +90,5 @@ class AudiodraftGenericIE(AudiodraftBaseIE):
}]
def _real_extract(self, url):
id = self._match_id(url)
return self._audiodraft_extract_from_id(f'player_entry_{id}')
video_id = self._match_id(url)
return self._audiodraft_extract_from_id(f'player_entry_{video_id}')

View File

@@ -3,7 +3,6 @@ import time
from .common import InfoExtractor
from .soundcloud import SoundcloudIE
from ..compat import compat_str
from ..utils import (
ExtractorError,
url_basename,
@@ -22,8 +21,8 @@ class AudiomackIE(InfoExtractor):
'id': '310086',
'ext': 'mp3',
'uploader': 'Roosh Williams',
'title': 'Extraordinary'
}
'title': 'Extraordinary',
},
},
# audiomack wrapper around soundcloud song
# Needs new test URL.
@@ -56,7 +55,7 @@ class AudiomackIE(InfoExtractor):
# API is inconsistent with errors
if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
raise ExtractorError('Invalid url %s' % url)
raise ExtractorError(f'Invalid url {url}')
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
# if so, pass the work off to the soundcloud extractor
@@ -64,7 +63,7 @@ class AudiomackIE(InfoExtractor):
return self.url_result(api_response['url'], SoundcloudIE.ie_key())
return {
'id': compat_str(api_response.get('id', album_url_tag)),
'id': str(api_response.get('id', album_url_tag)),
'uploader': api_response.get('artist'),
'title': api_response.get('title'),
'url': api_response['url'],
@@ -82,8 +81,8 @@ class AudiomackAlbumIE(InfoExtractor):
'info_dict':
{
'id': '812251',
'title': 'Tha Tour: Part 2 (Official Mixtape)'
}
'title': 'Tha Tour: Part 2 (Official Mixtape)',
},
},
# Album playlist ripped from fakeshoredrive with no metadata
{
@@ -98,16 +97,16 @@ class AudiomackAlbumIE(InfoExtractor):
'id': '837576',
'ext': 'mp3',
'uploader': 'Lil Herb a.k.a. G Herbo',
}
},
}, {
'info_dict': {
'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)',
'id': '837580',
'ext': 'mp3',
'uploader': 'Lil Herb a.k.a. G Herbo',
}
},
}],
}
},
]
def _real_extract(self, url):
@@ -123,12 +122,12 @@ class AudiomackAlbumIE(InfoExtractor):
api_response = self._download_json(
'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d'
% (album_url_tag, track_no, time.time()), album_url_tag,
note='Querying song information (%d)' % (track_no + 1))
note=f'Querying song information ({track_no + 1})')
# Total failure, only occurs when url is totally wrong
# Won't happen in middle of valid playlist (next case)
if 'url' not in api_response or 'error' in api_response:
raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url))
raise ExtractorError(f'Invalid url for track {track_no} of album url {url}')
# URL is good but song id doesn't exist - usually means end of playlist
elif not api_response['url']:
break
@@ -136,10 +135,10 @@ class AudiomackAlbumIE(InfoExtractor):
# Pull out the album metadata and add to result (if it exists)
for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
if apikey in api_response and resultkey not in result:
result[resultkey] = compat_str(api_response[apikey])
result[resultkey] = str(api_response[apikey])
song_id = url_basename(api_response['url']).rpartition('.')[0]
result['entries'].append({
'id': compat_str(api_response.get('id', song_id)),
'id': str(api_response.get('id', song_id)),
'uploader': api_response.get('artist'),
'title': api_response.get('title', song_id),
'url': api_response['url'],

View File

@@ -1,7 +1,7 @@
import random
import urllib.parse
from .common import InfoExtractor
from ..compat import compat_str, compat_urllib_parse_unquote
from ..utils import ExtractorError, str_or_none, try_get
@@ -15,13 +15,13 @@ class AudiusBaseIE(InfoExtractor):
if response_data is not None:
return response_data
if len(response) == 1 and 'message' in response:
raise ExtractorError('API error: %s' % response['message'],
raise ExtractorError('API error: {}'.format(response['message']),
expected=True)
raise ExtractorError('Unexpected API response')
def _select_api_base(self):
"""Selecting one of the currently available API hosts"""
response = super(AudiusBaseIE, self)._download_json(
response = super()._download_json(
'https://api.audius.co/', None,
note='Requesting available API hosts',
errnote='Unable to request available API hosts')
@@ -41,8 +41,8 @@ class AudiusBaseIE(InfoExtractor):
anything from this link, since the Audius API won't be able to resolve
this url
"""
url = compat_urllib_parse_unquote(url)
title = compat_urllib_parse_unquote(title)
url = urllib.parse.unquote(url)
title = urllib.parse.unquote(title)
if '/' in title or '%2F' in title:
fixed_title = title.replace('/', '%5C').replace('%2F', '%5C')
return url.replace(title, fixed_title)
@@ -54,19 +54,19 @@ class AudiusBaseIE(InfoExtractor):
if self._API_BASE is None:
self._select_api_base()
try:
response = super(AudiusBaseIE, self)._download_json(
'%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note,
response = super()._download_json(
f'{self._API_BASE}{self._API_V}{path}', item_id, note=note,
errnote=errnote, expected_status=expected_status)
except ExtractorError as exc:
# some of Audius API hosts may not work as expected and return HTML
if 'Failed to parse JSON' in compat_str(exc):
if 'Failed to parse JSON' in str(exc):
raise ExtractorError('An error occurred while receiving data. Try again',
expected=True)
raise exc
return self._get_response_data(response)
def _resolve_url(self, url, item_id):
return self._api_request('/resolve?url=%s' % url, item_id,
return self._api_request(f'/resolve?url={url}', item_id,
expected_status=404)
@@ -91,7 +91,7 @@ class AudiusIE(AudiusBaseIE):
'view_count': int,
'like_count': int,
'repost_count': int,
}
},
},
{
# Regular track
@@ -109,14 +109,14 @@ class AudiusIE(AudiusBaseIE):
'view_count': int,
'like_count': int,
'repost_count': int,
}
},
},
]
_ARTWORK_MAP = {
"150x150": 150,
"480x480": 480,
"1000x1000": 1000
'150x150': 150,
'480x480': 480,
'1000x1000': 1000,
}
def _real_extract(self, url):
@@ -130,7 +130,7 @@ class AudiusIE(AudiusBaseIE):
else: # API link
title = None
# uploader = None
track_data = self._api_request('/tracks/%s' % track_id, track_id)
track_data = self._api_request(f'/tracks/{track_id}', track_id)
if not isinstance(track_data, dict):
raise ExtractorError('Unexpected API response')
@@ -144,7 +144,7 @@ class AudiusIE(AudiusBaseIE):
if isinstance(artworks_data, dict):
for quality_key, thumbnail_url in artworks_data.items():
thumbnail = {
"url": thumbnail_url
'url': thumbnail_url,
}
quality_code = self._ARTWORK_MAP.get(quality_key)
if quality_code is not None:
@@ -154,12 +154,12 @@ class AudiusIE(AudiusBaseIE):
return {
'id': track_id,
'title': track_data.get('title', title),
'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id),
'url': f'{self._API_BASE}/v1/tracks/{track_id}/stream',
'ext': 'mp3',
'description': track_data.get('description'),
'duration': track_data.get('duration'),
'track': track_data.get('title'),
'artist': try_get(track_data, lambda x: x['user']['name'], compat_str),
'artist': try_get(track_data, lambda x: x['user']['name'], str),
'genre': track_data.get('genre'),
'thumbnails': thumbnails,
'view_count': track_data.get('play_count'),
@@ -175,11 +175,11 @@ class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE
_TESTS = [
{
'url': 'audius:9RWlo',
'only_matching': True
'only_matching': True,
},
{
'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo',
'only_matching': True
'only_matching': True,
},
]
@@ -207,7 +207,7 @@ class AudiusPlaylistIE(AudiusBaseIE):
if not track_id:
raise ExtractorError('Unable to get track ID from playlist')
entries.append(self.url_result(
'audius:%s' % track_id,
f'audius:{track_id}',
ie=AudiusTrackIE.ie_key(), video_id=track_id))
return entries
@@ -231,7 +231,7 @@ class AudiusPlaylistIE(AudiusBaseIE):
raise ExtractorError('Unable to get playlist ID')
playlist_tracks = self._api_request(
'/playlists/%s/tracks' % playlist_id,
f'/playlists/{playlist_id}/tracks',
title, note='Downloading playlist tracks metadata',
errnote='Unable to download playlist tracks metadata')
if not isinstance(playlist_tracks, list):
@@ -267,5 +267,5 @@ class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete I
profile_audius_id = _profile_data[0]['id']
profile_bio = _profile_data[0].get('bio')
api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id)
api_call = self._api_request(f'/full/users/handle/{profile_id}/tracks', profile_id)
return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio)

View File

@@ -1,10 +1,7 @@
import base64
import urllib.parse
from .common import InfoExtractor
from ..compat import (
compat_str,
compat_urllib_parse_urlencode,
)
from ..utils import (
format_field,
int_or_none,
@@ -22,14 +19,14 @@ class AWAANIE(InfoExtractor):
show_id, video_id, season_id = self._match_valid_url(url).groups()
if video_id and int(video_id) > 0:
return self.url_result(
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo')
f'http://awaan.ae/media/{video_id}', 'AWAANVideo')
elif season_id and int(season_id) > 0:
return self.url_result(smuggle_url(
'http://awaan.ae/program/season/%s' % season_id,
f'http://awaan.ae/program/season/{season_id}',
{'show_id': show_id}), 'AWAANSeason')
else:
return self.url_result(
'http://awaan.ae/program/%s' % show_id, 'AWAANSeason')
f'http://awaan.ae/program/{show_id}', 'AWAANSeason')
class AWAANBaseIE(InfoExtractor):
@@ -75,11 +72,11 @@ class AWAANVideoIE(AWAANBaseIE):
video_id = self._match_id(url)
video_data = self._download_json(
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
f'http://admin.mangomolo.com/analytics/index.php/plus/video?id={video_id}',
video_id, headers={'Origin': 'http://awaan.ae'})
info = self._parse_video_data(video_data, video_id, False)
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + urllib.parse.urlencode({
'id': video_data['id'],
'user_id': video_data['user_id'],
'signature': video_data['signature'],
@@ -117,11 +114,11 @@ class AWAANLiveIE(AWAANBaseIE):
channel_id = self._match_id(url)
channel_data = self._download_json(
'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id,
f'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id={channel_id}',
channel_id, headers={'Origin': 'http://awaan.ae'})
info = self._parse_video_data(channel_data, channel_id, True)
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + urllib.parse.urlencode({
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
'signature': channel_data['signature'],
@@ -159,7 +156,7 @@ class AWAANSeasonIE(InfoExtractor):
show_id = smuggled_data.get('show_id')
if show_id is None:
season = self._download_json(
'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id,
f'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id={season_id}',
season_id, headers={'Origin': 'http://awaan.ae'})
show_id = season['id']
data['show_id'] = show_id
@@ -167,7 +164,7 @@ class AWAANSeasonIE(InfoExtractor):
'http://admin.mangomolo.com/analytics/index.php/plus/show',
show_id, data=urlencode_postdata(data), headers={
'Origin': 'http://awaan.ae',
'Content-Type': 'application/x-www-form-urlencoded'
'Content-Type': 'application/x-www-form-urlencoded',
})
if not season_id:
season_id = show['default_season']
@@ -177,8 +174,8 @@ class AWAANSeasonIE(InfoExtractor):
entries = []
for video in show['videos']:
video_id = compat_str(video['id'])
video_id = str(video['id'])
entries.append(self.url_result(
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id))
f'http://awaan.ae/media/{video_id}', 'AWAANVideo', video_id))
return self.playlist_result(entries, season_id, title)

View File

@@ -1,9 +1,9 @@
import datetime as dt
import hashlib
import hmac
import urllib.parse
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode
class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
@@ -18,20 +18,20 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
'Accept': 'application/json',
'Host': self._AWS_PROXY_HOST,
'X-Amz-Date': amz_date,
'X-Api-Key': self._AWS_API_KEY
'X-Api-Key': self._AWS_API_KEY,
}
session_token = aws_dict.get('session_token')
if session_token:
headers['X-Amz-Security-Token'] = session_token
def aws_hash(s):
return hashlib.sha256(s.encode('utf-8')).hexdigest()
return hashlib.sha256(s.encode()).hexdigest()
# Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
canonical_querystring = compat_urllib_parse_urlencode(query)
canonical_querystring = urllib.parse.urlencode(query)
canonical_headers = ''
for header_name, header_value in sorted(headers.items()):
canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
canonical_headers += f'{header_name.lower()}:{header_value}\n'
signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())])
canonical_request = '\n'.join([
'GET',
@@ -39,7 +39,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
canonical_querystring,
canonical_headers,
signed_headers,
aws_hash('')
aws_hash(''),
])
# Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
@@ -49,7 +49,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
# Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
def aws_hmac(key, msg):
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
return hmac.new(key, msg.encode(), hashlib.sha256)
def aws_hmac_digest(key, msg):
return aws_hmac(key, msg).digest()
@@ -57,7 +57,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
def aws_hmac_hexdigest(key, msg):
return aws_hmac(key, msg).hexdigest()
k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8')
k_signing = ('AWS4' + aws_dict['secret_key']).encode()
for value in credential_scope_list:
k_signing = aws_hmac_digest(k_signing, value)
@@ -65,11 +65,11 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
# Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
headers['Authorization'] = ', '.join([
'%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
'SignedHeaders=%s' % signed_headers,
'Signature=%s' % signature,
'{} Credential={}/{}'.format(self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
f'SignedHeaders={signed_headers}',
f'Signature={signature}',
])
return self._download_json(
'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
'https://{}{}{}'.format(self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
video_id, headers=headers)

View File

@@ -38,14 +38,14 @@ class AZMedienIE(InfoExtractor):
'timestamp': 1538328802,
'view_count': int,
'thumbnail': 'http://cfvod.kaltura.com/p/1719221/sp/171922100/thumbnail/entry_id/1_anruz3wy/version/100031',
'duration': 1930
'duration': 1930,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
'only_matching': True
'only_matching': True,
}]
_API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
_PARTNER_ID = '1719221'
@@ -62,5 +62,5 @@ class AZMedienIE(InfoExtractor):
})['data']['context']['mainAsset']['video']['kaltura']['kalturaId']
return self.url_result(
'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
f'kaltura:{self._PARTNER_ID}:{entry_id}',
ie=KalturaIE.ie_key(), video_id=entry_id)

View File

@@ -24,8 +24,9 @@ class BaiduVideoIE(InfoExtractor):
}]
def _call_api(self, path, category, playlist_id, note):
return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % (
path, category, playlist_id), playlist_id, note)
return self._download_json(
f'http://app.video.baidu.com/{path}/?worktype=adnative{category}&id={playlist_id}',
playlist_id, note)
def _real_extract(self, url):
category, playlist_id = self._match_valid_url(url).groups()
@@ -44,7 +45,7 @@ class BaiduVideoIE(InfoExtractor):
'xqsingle', category, playlist_id, 'Download episodes JSON metadata')
entries = [self.url_result(
episode['url'], video_title=episode['title']
episode['url'], video_title=episode['title'],
) for episode in episodes_detail['videos']]
return self.playlist_result(

View File

@@ -1,15 +1,16 @@
import math
import urllib.parse
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import (
InAdvancePagedList,
determine_ext,
format_field,
int_or_none,
join_nonempty,
traverse_obj,
unified_timestamp,
url_or_none,
)
@@ -20,8 +21,8 @@ class BanByeBaseIE(InfoExtractor):
@staticmethod
def _extract_playlist_id(url, param='playlist'):
return compat_parse_qs(
compat_urllib_parse_urlparse(url).query).get(param, [None])[0]
return urllib.parse.parse_qs(
urllib.parse.urlparse(url).query).get(param, [None])[0]
def _extract_playlist(self, playlist_id):
data = self._download_json(f'{self._API_BASE}/playlists/{playlist_id}', playlist_id)
@@ -33,6 +34,7 @@ class BanByeBaseIE(InfoExtractor):
class BanByeIE(BanByeBaseIE):
_VALID_URL = r'https?://(?:www\.)?banbye\.com/(?:en/)?watch/(?P<id>[\w-]+)'
_TESTS = [{
# ['src']['mp4']['levels'] direct mp4 urls only
'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
'info_dict': {
@@ -61,6 +63,7 @@ class BanByeIE(BanByeBaseIE):
},
'playlist_mincount': 9,
}, {
# ['src']['mp4']['levels'] direct mp4 urls only
'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
'info_dict': {
'id': 'v_kb6_o1Kyq-CD',
@@ -80,6 +83,48 @@ class BanByeIE(BanByeBaseIE):
'view_count': int,
'comment_count': int,
},
}, {
# ['src']['hls']['levels'] variant m3u8 urls only; master m3u8 is 404
'url': 'https://banbye.com/watch/v_a_gPFuC9LoW5',
'info_dict': {
'id': 'v_a_gPFuC9LoW5',
'ext': 'mp4',
'title': 'md5:183524056bebdfa245fd6d214f63c0fe',
'description': 'md5:943ac87287ca98d28d8b8797719827c6',
'uploader': 'wRealu24',
'channel_id': 'ch_wrealu24',
'channel_url': 'https://banbye.com/channel/ch_wrealu24',
'upload_date': '20231113',
'timestamp': 1699874062,
'view_count': int,
'like_count': int,
'dislike_count': int,
'comment_count': int,
'thumbnail': 'https://cdn.banbye.com/video/v_a_gPFuC9LoW5/96.webp',
'tags': ['jaszczur', 'sejm', 'lewica', 'polska', 'ukrainizacja', 'pierwszeposiedzeniesejmu'],
},
'expected_warnings': ['Failed to download m3u8'],
}, {
# ['src']['hls']['masterPlaylist'] m3u8 only
'url': 'https://banbye.com/watch/v_B0rsKWsr-aaa',
'info_dict': {
'id': 'v_B0rsKWsr-aaa',
'ext': 'mp4',
'title': 'md5:00b254164b82101b3f9e5326037447ed',
'description': 'md5:3fd8b48aa81954ba024bc60f5de6e167',
'uploader': 'PSTV Piotr Szlachtowicz ',
'channel_id': 'ch_KV9EVObkB9wB',
'channel_url': 'https://banbye.com/channel/ch_KV9EVObkB9wB',
'upload_date': '20240629',
'timestamp': 1719646816,
'duration': 2377,
'view_count': int,
'like_count': int,
'dislike_count': int,
'comment_count': int,
'thumbnail': 'https://cdn.banbye.com/video/v_B0rsKWsr-aaa/96.webp',
'tags': ['Biden', 'Trump', 'Wybory', 'USA'],
},
}]
def _real_extract(self, url):
@@ -94,11 +139,24 @@ class BanByeIE(BanByeBaseIE):
'id': f'{quality}p',
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.webp',
} for quality in [48, 96, 144, 240, 512, 1080]]
formats = [{
'format_id': f'http-{quality}p',
'quality': quality,
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4',
} for quality in data['quality']]
formats = []
url_data = self._download_json(f'{self._API_BASE}/videos/{video_id}/url', video_id, data=b'')
if master_url := traverse_obj(url_data, ('src', 'hls', 'masterPlaylist', {url_or_none})):
formats = self._extract_m3u8_formats(master_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
for format_id, format_url in traverse_obj(url_data, (
'src', ('mp4', 'hls'), 'levels', {dict.items}, lambda _, v: url_or_none(v[1]))):
ext = determine_ext(format_url)
is_hls = ext == 'm3u8'
formats.append({
'url': format_url,
'ext': 'mp4' if is_hls else ext,
'format_id': join_nonempty(is_hls and 'hls', format_id),
'protocol': 'm3u8_native' if is_hls else 'https',
'height': int_or_none(format_id),
})
self._remove_duplicate_formats(formats)
return {
'id': video_id,

View File

@@ -3,7 +3,6 @@ import re
import time
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
KNOWN_EXTENSIONS,
ExtractorError,
@@ -42,7 +41,7 @@ class BandcampIE(InfoExtractor):
'uploader_id': 'youtube-dl',
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
},
'_skip': 'There is a limit of 200 free downloads / month for the test song'
'skip': 'There is a limit of 200 free downloads / month for the test song',
}, {
# free download
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
@@ -119,7 +118,7 @@ class BandcampIE(InfoExtractor):
def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
return self._parse_json(self._html_search_regex(
r'data-%s=(["\'])({.+?})\1' % attr, webpage,
rf'data-{attr}=(["\'])({{.+?}})\1', webpage,
attr + ' data', group=2), video_id, fatal=fatal)
def _real_extract(self, url):
@@ -167,7 +166,7 @@ class BandcampIE(InfoExtractor):
download_link = tralbum.get('freeDownloadPage')
if download_link:
track_id = compat_str(tralbum['id'])
track_id = str(tralbum['id'])
download_webpage = self._download_webpage(
download_link, track_id, 'Downloading free downloads page')
@@ -192,7 +191,7 @@ class BandcampIE(InfoExtractor):
if isinstance(download_formats_list, list):
for f in blob['download_formats']:
name, ext = f.get('name'), f.get('file_extension')
if all(isinstance(x, compat_str) for x in (name, ext)):
if all(isinstance(x, str) for x in (name, ext)):
download_formats[name] = ext.strip('.')
for format_id, f in downloads.items():
@@ -207,7 +206,7 @@ class BandcampIE(InfoExtractor):
})
format_id = f.get('encoding_name') or format_id
stat = self._download_json(
stat_url, track_id, 'Downloading %s JSON' % format_id,
stat_url, track_id, f'Downloading {format_id} JSON',
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
fatal=False)
if not stat:
@@ -225,7 +224,7 @@ class BandcampIE(InfoExtractor):
'acodec': format_id.split('-')[0],
})
title = '%s - %s' % (artist, track) if artist else track
title = f'{artist} - {track}' if artist else track
if not duration:
duration = float_or_none(self._html_search_meta(
@@ -267,7 +266,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'timestamp': 1311756226,
'upload_date': '20110727',
'uploader': 'Blazo',
}
},
},
{
'md5': '1a2c32e2691474643e912cc6cd4bffaa',
@@ -278,7 +277,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'timestamp': 1311757238,
'upload_date': '20110727',
'uploader': 'Blazo',
}
},
},
],
'info_dict': {
@@ -287,9 +286,9 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'uploader_id': 'blazo',
},
'params': {
'playlistend': 2
'playlistend': 2,
},
'skip': 'Bandcamp imposes download limits.'
'skip': 'Bandcamp imposes download limits.',
}, {
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
'info_dict': {
@@ -324,7 +323,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
def suitable(cls, url):
return (False
if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
else super(BandcampAlbumIE, cls).suitable(url))
else super().suitable(url))
def _real_extract(self, url):
uploader_id, album_id = self._match_valid_url(url).groups()
@@ -376,7 +375,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
},
}, {
'url': 'https://bandcamp.com/?blah/blah@&show=228',
'only_matching': True
'only_matching': True,
}]
def _real_extract(self, url):
@@ -407,7 +406,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
title = show.get('audio_title') or 'Bandcamp Weekly'
subtitle = show.get('subtitle')
if subtitle:
title += ' - %s' % subtitle
title += f' - {subtitle}'
return {
'id': show_id,
@@ -419,7 +418,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
'series': 'Bandcamp Weekly',
'episode': show.get('subtitle'),
'episode_id': show_id,
'formats': formats
'formats': formats,
}
@@ -440,7 +439,7 @@ class BandcampUserIE(InfoExtractor):
'url': 'http://dotscale.bandcamp.com',
'info_dict': {
'id': 'dotscale',
'title': 'Discography of dotscale'
'title': 'Discography of dotscale',
},
'playlist_count': 1,
}, {

View File

@@ -23,7 +23,7 @@ class BannedVideoIE(InfoExtractor):
'description': 'md5:560d96f02abbebe6c6b78b47465f6b28',
'upload_date': '20200324',
'timestamp': 1585087895,
}
},
}]
_GRAPHQL_GETMETADATA_QUERY = '''
@@ -84,15 +84,15 @@ query GetCommentReplies($id: String!) {
'GetCommentReplies': _GRAPHQL_GETCOMMENTSREPLIES_QUERY,
}
def _call_api(self, video_id, id, operation, note):
def _call_api(self, video_id, id_var, operation, note):
return self._download_json(
'https://api.infowarsmedia.com/graphql', video_id, note=note,
headers={
'Content-Type': 'application/json; charset=utf-8'
'Content-Type': 'application/json; charset=utf-8',
}, data=json.dumps({
'variables': {'id': id},
'variables': {'id': id_var},
'operationName': operation,
'query': self._GRAPHQL_QUERIES[operation]
'query': self._GRAPHQL_QUERIES[operation],
}).encode('utf8')).get('data')
def _get_comments(self, video_id, comments, comment_data):
@@ -151,5 +151,5 @@ query GetCommentReplies($id: String!) {
'tags': [tag.get('name') for tag in video_info.get('tags')],
'availability': self._availability(is_unlisted=video_info.get('unlisted')),
'comments': comments,
'__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments'))
'__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments')),
}

View File

@@ -2,10 +2,10 @@ import functools
import itertools
import json
import re
import urllib.parse
import xml.etree.ElementTree
from .common import InfoExtractor
from ..compat import compat_str, compat_urlparse
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
@@ -35,7 +35,7 @@ class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer'
_ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
_VALID_URL = r'''(?x)
_VALID_URL = rf'''(?x)
https?://
(?:www\.)?bbc\.co\.uk/
(?:
@@ -45,8 +45,8 @@ class BBCCoUkIE(InfoExtractor):
radio/player/|
events/[^/]+/play/[^/]+/
)
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
''' % _ID_REGEX
(?P<id>{_ID_REGEX})(?!/(?:episodes|broadcasts|clips))
'''
_EMBED_REGEX = [r'setPlaylist\("(?P<url>https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)']
_LOGIN_URL = 'https://account.bbc.com/signin'
@@ -75,7 +75,7 @@ class BBCCoUkIE(InfoExtractor):
'params': {
# rtmp download
'skip_download': True,
}
},
},
{
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
@@ -148,7 +148,7 @@ class BBCCoUkIE(InfoExtractor):
'params': {
# rtmp download
'skip_download': True,
}
},
}, {
'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
'note': 'Video',
@@ -162,7 +162,7 @@ class BBCCoUkIE(InfoExtractor):
'params': {
# rtmp download
'skip_download': True,
}
},
}, {
'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
'info_dict': {
@@ -268,19 +268,19 @@ class BBCCoUkIE(InfoExtractor):
error = clean_html(get_element_by_class('form-message', response))
if error:
raise ExtractorError(
'Unable to login: %s' % error, expected=True)
f'Unable to login: {error}', expected=True)
raise ExtractorError('Unable to log in')
class MediaSelectionError(Exception):
def __init__(self, id):
self.id = id
def __init__(self, error_id):
self.id = error_id
def _extract_asx_playlist(self, connection, programme_id):
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
def _extract_items(self, playlist):
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
return playlist.findall(f'./{{{self._EMP_PLAYLIST_NS}}}item')
def _extract_medias(self, media_selection):
error = media_selection.get('result')
@@ -312,7 +312,7 @@ class BBCCoUkIE(InfoExtractor):
def _raise_extractor_error(self, media_selection_error):
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
f'{self.IE_NAME} returned error: {media_selection_error.id}',
expected=True)
def _download_media_selector(self, programme_id):
@@ -372,7 +372,7 @@ class BBCCoUkIE(InfoExtractor):
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
formats.append({
'url': ref,
'format_id': 'ref%s_%s' % (i, format_id),
'format_id': f'ref{i}_{format_id}',
})
elif transfer_format == 'dash':
formats.extend(self._extract_mpd_formats(
@@ -394,7 +394,7 @@ class BBCCoUkIE(InfoExtractor):
href, programme_id, f4m_id=format_id, fatal=False))
else:
if not supplier and bitrate:
format_id += '-%d' % bitrate
format_id += f'-{bitrate}'
fmt = {
'format_id': format_id,
'filesize': file_size,
@@ -423,9 +423,9 @@ class BBCCoUkIE(InfoExtractor):
identifier = connection.get('identifier')
server = connection.get('server')
fmt.update({
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
'url': f'{protocol}://{server}/{application}?{auth_string}',
'play_path': identifier,
'app': '%s?%s' % (application, auth_string),
'app': f'{application}?{auth_string}',
'page_url': 'http://www.bbc.co.uk',
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
'rtmp_live': False,
@@ -441,7 +441,7 @@ class BBCCoUkIE(InfoExtractor):
def _download_playlist(self, playlist_id):
try:
playlist = self._download_json(
'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
f'http://www.bbc.co.uk/programmes/{playlist_id}/playlist.json',
playlist_id, 'Downloading playlist JSON')
formats = []
subtitles = {}
@@ -480,32 +480,32 @@ class BBCCoUkIE(InfoExtractor):
def _process_legacy_playlist(self, playlist_id):
return self._process_legacy_playlist_url(
'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
f'http://www.bbc.co.uk/iplayer/playlist/{playlist_id}', playlist_id)
def _download_legacy_playlist_url(self, url, playlist_id=None):
return self._download_xml(
url, playlist_id, 'Downloading legacy playlist XML')
def _extract_from_legacy_playlist(self, playlist, playlist_id):
no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
no_items = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}noItems')
if no_items is not None:
reason = no_items.get('reason')
if reason == 'preAvailability':
msg = 'Episode %s is not yet available' % playlist_id
msg = f'Episode {playlist_id} is not yet available'
elif reason == 'postAvailability':
msg = 'Episode %s is no longer available' % playlist_id
msg = f'Episode {playlist_id} is no longer available'
elif reason == 'noMedia':
msg = 'Episode %s is not currently available' % playlist_id
msg = f'Episode {playlist_id} is not currently available'
else:
msg = 'Episode %s is not available: %s' % (playlist_id, reason)
msg = f'Episode {playlist_id} is not available: {reason}'
raise ExtractorError(msg, expected=True)
for item in self._extract_items(playlist):
kind = item.get('kind')
if kind not in ('programme', 'radioProgramme'):
continue
title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
title = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}title').text
description_el = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}summary')
description = description_el.text if description_el is not None else None
def get_programme_id(item):
@@ -515,7 +515,7 @@ class BBCCoUkIE(InfoExtractor):
if value and re.match(r'^[pb][\da-z]{7}$', value):
return value
get_from_attributes(item)
mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
mediator = item.find(f'./{{{self._EMP_PLAYLIST_NS}}}mediator')
if mediator is not None:
return get_from_attributes(mediator)
@@ -555,7 +555,7 @@ class BBCCoUkIE(InfoExtractor):
if not programme_id:
programme_id = self._search_regex(
r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None)
rf'"vpid"\s*:\s*"({self._ID_REGEX})"', webpage, 'vpid', fatal=False, default=None)
if programme_id:
formats, subtitles = self._download_media_selector(programme_id)
@@ -641,7 +641,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
},
'params': {
'skip_download': True,
}
},
}, {
# article with single video embedded with data-playable containing XML playlist
# with direct video links as progressiveDownloadUrl (for now these are extracted)
@@ -884,7 +884,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'uploader_id': 'bbc_world_service',
'series': 'CrowdScience',
'chapters': [],
}
},
}, { # onion routes
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
'only_matching': True,
@@ -897,7 +897,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
def suitable(cls, url):
EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
else super(BBCIE, cls).suitable(url))
else super().suitable(url))
def _extract_from_media_meta(self, media_meta, video_id):
# Direct links to media in media metadata (e.g.
@@ -1009,7 +1009,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
if playlist:
entry = None
for key in ('streaming', 'progressiveDownload'):
playlist_url = playlist.get('%sUrl' % key)
playlist_url = playlist.get(f'{key}Url')
if not playlist_url:
continue
try:
@@ -1035,7 +1035,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
# http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
group_id = self._search_regex(
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
rf'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\']({self._ID_REGEX})',
webpage, 'group id', default=None)
if group_id:
return self.url_result(
@@ -1043,9 +1043,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
programme_id = self._search_regex(
[r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
[rf'data-(?:video-player|media)-vpid="({self._ID_REGEX})"',
rf'<param[^>]+name="externalIdentifier"[^>]+value="({self._ID_REGEX})"',
rf'videoId\s*:\s*["\']({self._ID_REGEX})["\']'],
webpage, 'vpid', default=None)
if programme_id:
@@ -1142,7 +1142,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
video_id, url_transparent=True)
entry.update({
'timestamp': traverse_obj(morph_payload, (
'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601})
'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601}),
),
**traverse_obj(video_data, {
'thumbnail': (('iChefImage', 'image'), {url_or_none}, any),
@@ -1189,7 +1189,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
'start_time': ('offset', 'start', {float_or_none}),
'end_time': ('offset', 'end', {float_or_none}),
})
}),
),
}
@@ -1287,7 +1287,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
'duration': ('versions', 0, 'duration', {int}),
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
})
}),
}
def is_type(*types):
@@ -1331,7 +1331,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
if blocks:
summary = []
for block in blocks:
text = try_get(block, lambda x: x['model']['text'], compat_str)
text = try_get(block, lambda x: x['model']['text'], str)
if text:
summary.append(text)
if summary:
@@ -1411,9 +1411,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
entries, playlist_id, playlist_title, playlist_description)
def extract_all(pattern):
return list(filter(None, map(
lambda s: self._parse_json(s, playlist_id, fatal=False),
re.findall(pattern, webpage))))
return list(filter(None, (
self._parse_json(s, playlist_id, fatal=False)
for s in re.findall(pattern, webpage))))
# US accessed article with single embedded video (e.g.
# https://www.bbc.com/news/uk-68546268)
@@ -1435,14 +1435,14 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
# Multiple video article (e.g.
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
EMBED_URL = rf'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+{self._ID_REGEX}(?:\b[^"]+)?'
entries = []
for match in extract_all(r'new\s+SMP\(({.+?})\)'):
embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
if embed_url and re.match(EMBED_URL, embed_url):
entries.append(embed_url)
entries.extend(re.findall(
r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
rf'setPlaylist\("({EMBED_URL})"\)', webpage))
if entries:
return self.playlist_result(
[self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
@@ -1492,11 +1492,11 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
video_id = media_meta.get('externalId')
if not video_id:
video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
video_id = playlist_id if len(medias) == 1 else f'{playlist_id}-{num}'
title = media_meta.get('caption')
if not title:
title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
title = playlist_title if len(medias) == 1 else f'{playlist_title} - Video {num}'
duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
@@ -1557,8 +1557,8 @@ class BBCCoUkArticleIE(InfoExtractor):
class BBCCoUkPlaylistBaseIE(InfoExtractor):
def _entries(self, webpage, url, playlist_id):
single_page = 'page' in compat_urlparse.parse_qs(
compat_urlparse.urlparse(url).query)
single_page = 'page' in urllib.parse.parse_qs(
urllib.parse.urlparse(url).query)
for page_num in itertools.count(2):
for video_id in re.findall(
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
@@ -1572,8 +1572,8 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor):
if not next_page:
break
webpage = self._download_webpage(
compat_urlparse.urljoin(url, next_page), playlist_id,
'Downloading page %d' % page_num, page_num)
urllib.parse.urljoin(url, next_page), playlist_id,
f'Downloading page {page_num}', page_num)
def _real_extract(self, url):
playlist_id = self._match_id(url)
@@ -1588,7 +1588,7 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor):
class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
_VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
_VALID_URL_TMPL = rf'https?://(?:www\.)?bbc\.co\.uk/iplayer/%s/(?P<id>{BBCCoUkIE._ID_REGEX})'
@staticmethod
def _get_default(episode, key, default_key='default'):
@@ -1712,11 +1712,11 @@ class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
variables['sliceId'] = series_id
return self._download_json(
'https://graph.ibl.api.bbc.co.uk/', pid, headers={
'Content-Type': 'application/json'
'Content-Type': 'application/json',
}, data=json.dumps({
'id': '5692d93d5aac8d796a0305e895e61551',
'variables': variables,
}).encode('utf-8'))['data']['programme']
}).encode())['data']['programme']
@staticmethod
def _get_playlist_data(data):
@@ -1776,7 +1776,7 @@ class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
def _call_api(self, pid, per_page, page=1, series_id=None):
return self._download_json(
'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid,
f'http://ibl.api.bbc.co.uk/ibl/v1/groups/{pid}/episodes',
pid, query={
'page': page,
'per_page': per_page,
@@ -1792,7 +1792,7 @@ class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
IE_NAME = 'bbc.co.uk:playlist'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
_VALID_URL = rf'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>{BBCCoUkIE._ID_REGEX})/(?:episodes|broadcasts|clips)'
_URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
_VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
_TESTS = [{

View File

@@ -1,7 +1,6 @@
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import int_or_none
@@ -33,7 +32,7 @@ class BeatportIE(InfoExtractor):
'display_id': 'birds-original-mix',
'ext': 'mp4',
'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
}
},
}]
def _real_extract(self, url):
@@ -51,7 +50,7 @@ class BeatportIE(InfoExtractor):
track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
title = ', '.join(a['name'] for a in track['artists']) + ' - ' + track['name']
if track['mix']:
title += ' (' + track['mix'] + ')'
@@ -89,7 +88,7 @@ class BeatportIE(InfoExtractor):
images.append(image)
return {
'id': compat_str(track.get('id')) or track_id,
'id': str(track.get('id')) or track_id,
'display_id': track.get('slug') or display_id,
'title': title,
'formats': formats,

View File

@@ -23,7 +23,7 @@ class BeegIE(InfoExtractor):
'upload_date': '20220131',
'timestamp': 1643656455,
'display_id': '2540839',
}
},
}, {
'url': 'https://beeg.com/-0599050563103750?t=4-861',
'md5': 'bd8b5ea75134f7f07fad63008db2060e',
@@ -38,7 +38,7 @@ class BeegIE(InfoExtractor):
'timestamp': 1643623200,
'display_id': '2569965',
'upload_date': '20220131',
}
},
}, {
# api/v6 v2
'url': 'https://beeg.com/1941093077?t=911-1391',
@@ -55,8 +55,8 @@ class BeegIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video = self._download_json(
'https://store.externulls.com/facts/file/%s' % video_id,
video_id, 'Downloading JSON for %s' % video_id)
f'https://store.externulls.com/facts/file/{video_id}',
video_id, f'Downloading JSON for {video_id}')
fc_facts = video.get('fc_facts')
first_fact = {}

View File

@@ -16,7 +16,7 @@ class BehindKinkIE(InfoExtractor):
'upload_date': '20141205',
'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
'age_limit': 18,
}
},
}
def _real_extract(self, url):

View File

@@ -86,6 +86,6 @@ class BellMediaIE(InfoExtractor):
return {
'_type': 'url_transparent',
'id': video_id,
'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id),
'url': f'9c9media:{self._DOMAINS.get(domain, domain)}_web:{video_id}',
'ie_key': 'NineCNineMedia',
}

View File

@@ -16,7 +16,7 @@ class BerufeTVIE(InfoExtractor):
'tags': ['Studienfilm'],
'duration': 602.440,
'thumbnail': r're:^https://asset-out-cdn\.video-cdn\.net/private/videos/DvKC3DUpMKvUZ_6fEnfg3u/thumbnails/793063\?quality=thumbnail&__token__=[^\s]+$',
}
},
}]
def _real_extract(self, url):
@@ -54,7 +54,7 @@ class BerufeTVIE(InfoExtractor):
subtitles.setdefault(track['language'], []).append({
'url': track['source'],
'name': track.get('label'),
'ext': 'vtt'
'ext': 'vtt',
})
return {

View File

@@ -19,7 +19,7 @@ class BetIE(MTVServicesInfoExtractor):
'thumbnail': r're:(?i)^https?://.*\.jpg$',
'subtitles': {
'en': 'mincount:2',
}
},
},
'params': {
# rtmp download
@@ -39,16 +39,16 @@ class BetIE(MTVServicesInfoExtractor):
'thumbnail': r're:(?i)^https?://.*\.jpg$',
'subtitles': {
'en': 'mincount:2',
}
},
},
'params': {
# rtmp download
'skip_download': True,
},
}
},
]
_FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/bet-mrss-player'
def _get_feed_query(self, uri):
return {

View File

@@ -98,8 +98,8 @@ class BFMTVArticleIE(BFMTVBaseIE):
'timestamp': 1673341692,
'duration': 109.269,
'tags': ['rmc', 'show', 'apolline de malherbe', 'info', 'talk', 'matinale', 'radio'],
'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg'
}
'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg',
},
}]
def _real_extract(self, url):

View File

@@ -1,10 +1,8 @@
import base64
import re
import urllib.parse
from .common import InfoExtractor
from ..compat import (
compat_b64decode,
compat_urllib_parse_unquote,
)
class BigflixIE(InfoExtractor):
@@ -21,7 +19,7 @@ class BigflixIE(InfoExtractor):
},
'params': {
'skip_download': True,
}
},
}, {
# multiple formats
'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',
@@ -38,7 +36,7 @@ class BigflixIE(InfoExtractor):
webpage, 'title')
def decode_url(quoted_b64_url):
return compat_b64decode(compat_urllib_parse_unquote(
return base64.b64decode(urllib.parse.unquote(
quoted_b64_url)).decode('utf-8')
formats = []
@@ -47,7 +45,7 @@ class BigflixIE(InfoExtractor):
video_url = decode_url(encoded_url)
f = {
'url': video_url,
'format_id': '%sp' % height,
'format_id': f'{height}p',
'height': int(height),
}
if video_url.startswith('rtmp'):
@@ -69,5 +67,5 @@ class BigflixIE(InfoExtractor):
'id': video_id,
'title': title,
'description': description,
'formats': formats
'formats': formats,
}

View File

@@ -36,7 +36,7 @@ class BigoIE(InfoExtractor):
raise ExtractorError('Received invalid JSON data')
if info_raw.get('code'):
raise ExtractorError(
'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True)
'Bigo says: {} (code {})'.format(info_raw.get('msg'), info_raw.get('code')), expected=True)
info = info_raw.get('data') or {}
if not info.get('alive'):

View File

@@ -20,7 +20,7 @@ class BildIE(InfoExtractor):
'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 196,
}
},
}, {
'note': 'static MP4 and HLS',
'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html',
@@ -32,7 +32,7 @@ class BildIE(InfoExtractor):
'description': 'md5:709b543c24dc31bbbffee73bccda34ad',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 69,
}
},
}]
def _real_extract(self, url):

View File

@@ -31,12 +31,12 @@ from ..utils import (
mimetype2ext,
parse_count,
parse_qs,
parse_resolution,
qualities,
smuggle_url,
srt_subtitles_timecode,
str_or_none,
traverse_obj,
try_call,
unified_timestamp,
unsmuggle_url,
url_or_none,
@@ -47,6 +47,23 @@ from ..utils import (
class BilibiliBaseIE(InfoExtractor):
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
_wbi_key_cache = {}
@property
def is_logged_in(self):
return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
def _check_missing_formats(self, play_info, formats):
parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
missing_formats = join_nonempty(*[
traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
for fmt in traverse_obj(play_info, (
'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
if missing_formats:
self.to_screen(
f'Format(s) {missing_formats} are missing; you have to login or '
f'become a premium member to download them. {self._login_hint()}')
def extract_formats(self, play_info):
format_names = {
@@ -86,18 +103,75 @@ class BilibiliBaseIE(InfoExtractor):
'format': format_names.get(video.get('id')),
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
if missing_formats:
self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
f'you have to login or become premium member to download them. {self._login_hint()}')
if formats:
self._check_missing_formats(play_info, formats)
fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
'url': ('url', {url_or_none}),
'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
'filesize': ('size', {int_or_none}),
}))
if fragments:
formats.append({
'url': fragments[0]['url'],
'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
**({
'fragments': fragments,
'protocol': 'http_dash_segments',
} if len(fragments) > 1 else {}),
**traverse_obj(play_info, {
'quality': ('quality', {int_or_none}),
'format_id': ('quality', {str_or_none}),
'format_note': ('quality', {lambda x: format_names.get(x)}),
'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
}),
**parse_resolution(format_names.get(play_info.get('quality'))),
})
return formats
def _download_playinfo(self, video_id, cid, headers=None):
def _get_wbi_key(self, video_id):
if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
return self._wbi_key_cache['key']
session_data = self._download_json(
'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
lookup = ''.join(traverse_obj(session_data, (
'data', 'wbi_img', ('img_url', 'sub_url'),
{lambda x: x.rpartition('/')[2].partition('.')[0]})))
# from getMixinKey() in the vendor js
mixin_key_enc_tab = [
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
36, 20, 34, 44, 52,
]
self._wbi_key_cache.update({
'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
'ts': time.time(),
})
return self._wbi_key_cache['key']
def _sign_wbi(self, params, video_id):
params['wts'] = round(time.time())
params = {
k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
for k, v in sorted(params.items())
}
query = urllib.parse.urlencode(params)
params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
return params
def _download_playinfo(self, bvid, cid, headers=None, qn=None):
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
if qn:
params['qn'] = qn
return self._download_json(
'https://api.bilibili.com/x/player/playurl', video_id,
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
note=f'Downloading video formats for cid {cid}', headers=headers)['data']
'https://api.bilibili.com/x/player/wbi/playurl', bvid,
query=self._sign_wbi(params, bvid), headers=headers,
note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
def json2srt(self, json_data):
srt_data = ''
@@ -112,21 +186,21 @@ class BilibiliBaseIE(InfoExtractor):
'danmaku': [{
'ext': 'xml',
'url': f'https://comment.bilibili.com/{cid}.xml',
}]
}],
}
subtitle_info = traverse_obj(self._download_json(
video_info = self._download_json(
'https://api.bilibili.com/x/player/v2', video_id,
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
for s in subs_list:
note=f'Extracting subtitle info {cid}')
if traverse_obj(video_info, ('data', 'need_login_subtitle')):
self.report_warning(
f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
for s in traverse_obj(video_info, (
'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
subtitles.setdefault(s['lan'], []).append({
'ext': 'srt',
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
})
return subtitles
@@ -203,19 +277,19 @@ class BilibiliBaseIE(InfoExtractor):
self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
return cid_edges
def _get_interactive_entries(self, video_id, cid, metainfo):
def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
graph_version = traverse_obj(
self._download_json(
'https://api.bilibili.com/x/player/wbi/v2', video_id,
'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
('data', 'interaction', 'graph_version', {int_or_none}))
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
for cid, edges in cid_edges.items():
play_info = self._download_playinfo(video_id, cid)
play_info = self._download_playinfo(video_id, cid, headers=headers)
yield {
**metainfo,
'id': f'{video_id}_{cid}',
'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}',
'formats': self.extract_formats(play_info),
'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
'duration': float_or_none(play_info.get('timelength'), scale=1000),
@@ -243,17 +317,17 @@ class BiliBiliIE(BilibiliBaseIE):
'timestamp': 1488353834,
'like_count': int,
'view_count': int,
'_old_archive_ids': ['bilibili 8903802_part1'],
},
}, {
'note': 'old av URL version',
'url': 'http://www.bilibili.com/video/av1074402/',
'info_dict': {
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
'id': 'BV11x411K7CN',
'ext': 'mp4',
'title': '【金坷垃】金泡沫',
'uploader': '菊子桑',
'uploader_id': '156160',
'id': 'BV11x411K7CN',
'title': '【金坷垃】金泡沫',
'duration': 308.36,
'upload_date': '20140420',
'timestamp': 1397983878,
@@ -262,6 +336,8 @@ class BiliBiliIE(BilibiliBaseIE):
'comment_count': int,
'view_count': int,
'tags': list,
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
'_old_archive_ids': ['bilibili 1074402_part1'],
},
'params': {'skip_download': True},
}, {
@@ -269,7 +345,7 @@ class BiliBiliIE(BilibiliBaseIE):
'url': 'https://www.bilibili.com/video/BV1bK411W797',
'info_dict': {
'id': 'BV1bK411W797',
'title': '物语中的人物是如何吐槽自己的OP的'
'title': '物语中的人物是如何吐槽自己的OP的',
},
'playlist_count': 18,
'playlist': [{
@@ -288,8 +364,9 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
'duration': 90.314,
}
}]
'_old_archive_ids': ['bilibili 498159642_part1'],
},
}],
}, {
'note': 'Specific page of Anthology',
'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
@@ -308,28 +385,8 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
'duration': 90.314,
}
}, {
'note': 'video has subtitles',
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
'info_dict': {
'id': 'BV12N4y1M7rh',
'ext': 'mp4',
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
'tags': list,
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
'duration': 313.557,
'upload_date': '20220709',
'uploader': '小夫太渴',
'timestamp': 1657347907,
'uploader_id': '1326814124',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'subtitles': 'count:2'
'_old_archive_ids': ['bilibili 498159642_part1'],
},
'params': {'listsubtitles': True},
}, {
'url': 'https://www.bilibili.com/video/av8903802/',
'info_dict': {
@@ -347,6 +404,7 @@ class BiliBiliIE(BilibiliBaseIE):
'comment_count': int,
'view_count': int,
'like_count': int,
'_old_archive_ids': ['bilibili 8903802_part1'],
},
'params': {
'skip_download': True,
@@ -370,6 +428,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 463665680_part1'],
},
'params': {'skip_download': True},
}, {
@@ -388,8 +447,8 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 893839363_part1'],
},
'params': {'skip_download': True},
}, {
'note': 'newer festival video',
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
@@ -406,8 +465,57 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 778246196_part1'],
},
}, {
'note': 'legacy flv/mp4 video',
'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
'info_dict': {
'id': 'BV1ms411Q7vw_p4',
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
'timestamp': 1458222815,
'upload_date': '20160317',
'description': '云南方言快乐生产线出品',
'duration': float,
'uploader': '一笑颠天',
'uploader_id': '3916081',
'view_count': int,
'comment_count': int,
'like_count': int,
'tags': list,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 4120229_part4'],
},
'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
'playlist_count': 19,
'playlist': [{
'info_dict': {
'id': 'BV1ms411Q7vw_p4_0',
'ext': 'flv',
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
'duration': 399.102,
},
}],
}, {
'note': 'legacy mp4-only video',
'url': 'https://www.bilibili.com/video/BV1nx411u79K',
'info_dict': {
'id': 'BV1nx411u79K',
'ext': 'mp4',
'title': '【练习室】201603声乐练习《No Air》with VigoVan',
'timestamp': 1508893551,
'upload_date': '20171025',
'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
'duration': 80.384,
'uploader': '伯远',
'uploader_id': '10584494',
'comment_count': int,
'view_count': int,
'like_count': int,
'tags': list,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 15700301_part1'],
},
'params': {'skip_download': True},
}, {
'note': 'interactive/split-path video',
'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
@@ -425,6 +533,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 292734508_part1'],
},
'playlist_count': 33,
'playlist': [{
@@ -443,6 +552,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 292734508_part1'],
},
}],
}, {
@@ -465,6 +575,29 @@ class BiliBiliIE(BilibiliBaseIE):
'upload_date': '20191021',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}, {
'note': 'video has subtitles, which requires login',
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
'info_dict': {
'id': 'BV12N4y1M7rh',
'ext': 'mp4',
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
'tags': list,
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
'duration': 313.557,
'upload_date': '20220709',
'uploader': '小夫太渴',
'timestamp': 1657347907,
'uploader_id': '1326814124',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'subtitles': 'count:2', # login required for CC subtitle
'_old_archive_ids': ['bilibili 898179753_part1'],
},
'params': {'listsubtitles': True},
'skip': 'login required for subtitle',
}, {
'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
'info_dict': {
@@ -498,8 +631,9 @@ class BiliBiliIE(BilibiliBaseIE):
if not self._match_valid_url(urlh.url):
return self.url_result(urlh.url)
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
headers['Referer'] = url
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
is_festival = 'videoData' not in initial_state
if is_festival:
video_data = initial_state['videoInfo']
@@ -548,7 +682,6 @@ class BiliBiliIE(BilibiliBaseIE):
aid = video_data.get('aid')
old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
festival_info = {}
@@ -586,19 +719,65 @@ class BiliBiliIE(BilibiliBaseIE):
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
if is_interactive:
return self.playlist_result(
self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{
'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
'__post_extractor': self.extract_comments(aid),
})
self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
__post_extractor=self.extract_comments(aid))
else:
return {
**metainfo,
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'chapters': self._get_chapters(aid, cid),
'subtitles': self.extract_subtitles(video_id, cid),
'formats': self.extract_formats(play_info),
'__post_extractor': self.extract_comments(aid),
}
formats = self.extract_formats(play_info)
if not traverse_obj(play_info, ('dash')):
# we only have legacy formats and need additional work
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
formats.extend(traverse_obj(
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
lambda _, v: not has_qn(v['quality'])))
self._check_missing_formats(play_info, formats)
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
if flv_formats and len(flv_formats) < len(formats):
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
if not self._configuration_arg('prefer_multi_flv'):
dropped_fmts = ', '.join(
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
if dropped_fmts:
self.to_screen(
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
else:
formats = traverse_obj(
# XXX: Filtering by extractor-arg is for testing purposes
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
) or [max(flv_formats, key=lambda x: x['quality'])]
if traverse_obj(formats, (0, 'fragments')):
# We have flv formats, which are individual short videos with their own timestamps and metainfo
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
return {
**metainfo,
'_type': 'multi_video',
'entries': [{
'id': f'{metainfo["id"]}_{idx}',
'title': metainfo['title'],
'http_headers': metainfo['http_headers'],
'formats': [{
**fragment,
'format_id': formats[0].get('format_id'),
}],
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
} for idx, fragment in enumerate(formats[0]['fragments'])],
'duration': float_or_none(play_info.get('timelength'), scale=1000),
}
else:
return {
**metainfo,
'formats': formats,
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'chapters': self._get_chapters(aid, cid),
'subtitles': self.extract_subtitles(video_id, cid),
'__post_extractor': self.extract_comments(aid),
}
class BiliBiliBangumiIE(BilibiliBaseIE):
@@ -640,7 +819,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
'duration': 1425.256,
'timestamp': 1554566400,
'upload_date': '20190406',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
'skip': 'Geo-restricted',
}, {
@@ -661,7 +840,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
'duration': 1922.129,
'timestamp': 1602853860,
'upload_date': '20201016',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}]
@@ -764,7 +943,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE):
'duration': 1525.777,
'timestamp': 1425074413,
'upload_date': '20150227',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}],
}]
@@ -794,7 +973,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
'title': '鬼灭之刃',
'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
},
'playlist_mincount': 26
'playlist_mincount': 26,
}, {
'url': 'https://www.bilibili.com/bangumi/play/ss2251',
'info_dict': {
@@ -819,7 +998,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
'duration': 1436.992,
'timestamp': 1343185080,
'upload_date': '20120725',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}],
}]
@@ -906,7 +1085,7 @@ class BilibiliCheeseIE(BilibiliCheeseBaseIE):
'upload_date': '20230924',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'view_count': int,
}
},
}]
def _real_extract(self, url):
@@ -939,7 +1118,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
'upload_date': '20230924',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'view_count': int,
}
},
}],
'params': {'playlist_items': '1'},
}, {
@@ -969,7 +1148,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
}))
class BilibiliSpaceBaseIE(InfoExtractor):
class BilibiliSpaceBaseIE(BilibiliBaseIE):
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
first_page = fetch_page(0)
metadata = get_metadata(first_page)
@@ -989,73 +1168,53 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
'id': '3985676',
},
'playlist_mincount': 178,
'skip': 'login required',
}, {
'url': 'https://space.bilibili.com/313580179/video',
'info_dict': {
'id': '313580179',
},
'playlist_mincount': 92,
'skip': 'login required',
}]
def _extract_signature(self, playlist_id):
session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
img_key = traverse_obj(
session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
sub_key = traverse_obj(
session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
session_key = img_key + sub_key
signature_values = []
for position in (
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
57, 62, 11, 36, 20, 34, 44, 52
):
char_at_position = try_call(lambda: session_key[position])
if char_at_position:
signature_values.append(char_at_position)
return ''.join(signature_values)[:32]
def _real_extract(self, url):
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
if not is_video_url:
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
'To download audios, add a "/audio" to the URL')
signature = self._extract_signature(playlist_id)
def fetch_page(page_idx):
query = {
'keyword': '',
'mid': playlist_id,
'order': 'pubdate',
'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
'order_avoided': 'true',
'platform': 'web',
'pn': page_idx + 1,
'ps': 30,
'tid': 0,
'web_location': 1550101,
'wts': int(time.time()),
}
query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
try:
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
playlist_id, note=f'Downloading page {page_idx}', query=query,
headers={'referer': url})
response = self._download_json(
'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
query=self._sign_wbi(query, playlist_id),
note=f'Downloading space page {page_idx}', headers={'Referer': url})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
raise ExtractorError(
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
raise
if response['code'] in (-352, -401):
status_code = response['code']
if status_code == -401:
raise ExtractorError(
f'Request is blocked by server ({-response["code"]}), '
'please add cookies, wait and try later.', expected=True)
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
elif status_code == -352 and not self.is_logged_in:
self.raise_login_required('Request is rejected, you need to login to access playlist')
elif status_code != 0:
raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
return response['data']
def get_metadata(page_data):
@@ -1163,7 +1322,7 @@ class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
'uploader_id': ('meta', 'mid', {str_or_none}),
'timestamp': ('meta', 'ptime', {int_or_none}),
'thumbnail': ('meta', 'cover', {url_or_none}),
})
}),
}
def get_entries(page_data):
@@ -1195,7 +1354,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
mid, sid = self._match_valid_url(url).group('mid', 'sid')
playlist_id = f'{mid}_{sid}'
playlist_meta = traverse_obj(self._download_json(
f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False,
), {
'title': ('data', 'meta', 'name', {str}),
'description': ('data', 'meta', 'description', {str}),
@@ -1217,7 +1376,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
'page_count': math.ceil(entry_count / page_size),
'page_size': page_size,
'uploader': self._get_uploader(mid, playlist_id),
**playlist_meta
**playlist_meta,
}
def get_entries(page_data):
@@ -1241,7 +1400,7 @@ class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
'upload_date': '20201109',
'modified_timestamp': int,
'modified_date': str,
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
'view_count': int,
'like_count': int,
},
@@ -1281,7 +1440,10 @@ class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://www.bilibili.com/watchlater/#/list',
'info_dict': {'id': 'watchlater'},
'info_dict': {
'id': r're:\d+',
'title': '稍后再看',
},
'playlist_mincount': 0,
'skip': 'login required',
}]
@@ -1345,7 +1507,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'uploader_id': '84912',
'timestamp': 1604905176,
'upload_date': '20201109',
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
},
'playlist_mincount': 22,
}, {
@@ -1357,21 +1519,26 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'skip': 'redirect url',
}, {
'url': 'https://www.bilibili.com/list/watchlater',
'info_dict': {'id': 'watchlater'},
'info_dict': {
'id': r're:2_\d+',
'title': '稍后再看',
'uploader': str,
'uploader_id': str,
},
'playlist_mincount': 0,
'skip': 'login required',
}, {
'url': 'https://www.bilibili.com/medialist/play/watchlater',
'info_dict': {'id': 'watchlater'},
'playlist_mincount': 0,
'skip': 'login required',
'skip': 'redirect url & login required',
}]
def _extract_medialist(self, query, list_id):
for page_num in itertools.count(1):
page_data = self._download_json(
'https://api.bilibili.com/x/v2/medialist/resource/list',
list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}',
)['data']
yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
@@ -1407,7 +1574,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'tid': ('tid', {int_or_none}),
'sort_field': ('sortFiled', {int_or_none}),
'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
})
}),
}
metadata = {
'id': f'{query["type"]}_{query["biz_id"]}',
@@ -1415,7 +1582,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'title': ('title', {str}),
'uploader': ('upper', 'name', {str}),
'uploader_id': ('upper', 'mid', {str_or_none}),
'timestamp': ('ctime', {int_or_none}),
'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
'thumbnail': ('cover', {url_or_none}),
})),
}
@@ -1430,26 +1597,26 @@ class BilibiliCategoryIE(InfoExtractor):
'url': 'https://www.bilibili.com/v/kichiku/mad',
'info_dict': {
'id': 'kichiku: mad',
'title': 'kichiku: mad'
'title': 'kichiku: mad',
},
'playlist_mincount': 45,
'params': {
'playlistend': 45
}
'playlistend': 45,
},
}]
def _fetch_page(self, api_url, num_pages, query, page_num):
parsed_json = self._download_json(
api_url, query, query={'Search_key': query, 'pn': page_num},
note='Extracting results from page %s of %s' % (page_num, num_pages))
note=f'Extracting results from page {page_num} of {num_pages}')
video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
if not video_list:
raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
raise ExtractorError(f'Failed to retrieve video list for page {page_num}')
for video in video_list:
yield self.url_result(
'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid'])
def _entries(self, category, subcategory, query):
# map of categories : subcategories : RIDs
@@ -1459,7 +1626,7 @@ class BilibiliCategoryIE(InfoExtractor):
'manual_vocaloid': 126,
'guide': 22,
'theatre': 216,
'course': 127
'course': 127,
},
}
@@ -1485,7 +1652,7 @@ class BilibiliCategoryIE(InfoExtractor):
def _real_extract(self, url):
category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
query = '%s: %s' % (category, subcategory)
query = f'{category}: {subcategory}'
return self.playlist_result(self._entries(category, subcategory, query), query, query)
@@ -1588,7 +1755,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
formats = [{
'url': play_data['cdns'][0],
'filesize': int_or_none(play_data.get('size')),
'vcodec': 'none'
'vcodec': 'none',
}]
for a_format in formats:
@@ -1606,7 +1773,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
subtitles = {
'origin': [{
'url': lyric,
}]
}],
}
return {
@@ -1674,7 +1841,7 @@ class BiliBiliPlayerIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
'http://www.bilibili.tv/video/av%s/' % video_id,
f'http://www.bilibili.tv/video/av{video_id}/',
ie=BiliBiliIE.ie_key(), video_id=video_id)
@@ -1702,11 +1869,10 @@ class BiliIntlBaseIE(InfoExtractor):
return json.get('data')
def json2srt(self, json):
data = '\n\n'.join(
return '\n\n'.join(
f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
for i, line in enumerate(traverse_obj(json, (
'body', lambda _, l: l['content'] and l['from'] and l['to']))))
return data
def _get_subtitles(self, *, ep_id=None, aid=None):
sub_json = self._call_api(
@@ -1808,14 +1974,15 @@ class BiliIntlBaseIE(InfoExtractor):
note='Downloading login key', errnote='Unable to download login key')['data']
public_key = Cryptodome.RSA.importKey(key_data['key'])
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
login_post = self._download_json(
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
data=urlencode_postdata({
'username': username,
'password': base64.b64encode(password_hash).decode('ascii'),
'keep_me': 'true',
's_locale': 'en_US',
'isTrusted': 'true'
'isTrusted': 'true',
}), note='Logging in', errnote='Unable to log in')
if login_post.get('code'):
if login_post.get('message'):
@@ -1842,17 +2009,17 @@ class BiliIntlIE(BiliIntlBaseIE):
'chapters': [{
'start_time': 0,
'end_time': 76.242,
'title': '<Untitled Chapter 1>'
'title': '<Untitled Chapter 1>',
}, {
'start_time': 76.242,
'end_time': 161.161,
'title': 'Intro'
'title': 'Intro',
}, {
'start_time': 1325.742,
'end_time': 1403.903,
'title': 'Outro'
'title': 'Outro',
}],
}
},
}, {
# Non-Bstation page
'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
@@ -1869,17 +2036,17 @@ class BiliIntlIE(BiliIntlBaseIE):
'chapters': [{
'start_time': 0,
'end_time': 88.0,
'title': '<Untitled Chapter 1>'
'title': '<Untitled Chapter 1>',
}, {
'start_time': 88.0,
'end_time': 156.0,
'title': 'Intro'
'title': 'Intro',
}, {
'start_time': 1173.0,
'end_time': 1259.535,
'title': 'Outro'
'title': 'Outro',
}],
}
},
}, {
# Subtitle with empty content
'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
@@ -1890,7 +2057,7 @@ class BiliIntlIE(BiliIntlBaseIE):
'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
'episode_number': 140,
},
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.',
}, {
# episode comment extraction
'url': 'https://www.bilibili.tv/en/play/34580/340317',
@@ -1908,20 +2075,20 @@ class BiliIntlIE(BiliIntlBaseIE):
'chapters': [{
'start_time': 0,
'end_time': 61.0,
'title': '<Untitled Chapter 1>'
'title': '<Untitled Chapter 1>',
}, {
'start_time': 61.0,
'end_time': 134.0,
'title': 'Intro'
'title': 'Intro',
}, {
'start_time': 1290.0,
'end_time': 1379.0,
'title': 'Outro'
'title': 'Outro',
}],
},
'params': {
'getcomments': True
}
'getcomments': True,
},
}, {
# user generated content comment extraction
'url': 'https://www.bilibili.tv/en/video/2045730385',
@@ -1936,8 +2103,8 @@ class BiliIntlIE(BiliIntlBaseIE):
'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
},
'params': {
'getcomments': True
}
'getcomments': True,
},
}, {
# episode id without intro and outro
'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
@@ -1992,7 +2159,7 @@ class BiliIntlIE(BiliIntlBaseIE):
# Non-Bstation layout, read through episode list
season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
video_data = traverse_obj(season_json, (
'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id,
), expected_type=dict, get_all=False)
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
@@ -2024,7 +2191,7 @@ class BiliIntlIE(BiliIntlBaseIE):
'id': replies.get('rpid'),
'like_count': int_or_none(replies.get('like_count')),
'parent': replies.get('parent'),
'timestamp': unified_timestamp(replies.get('ctime_text'))
'timestamp': unified_timestamp(replies.get('ctime_text')),
}
if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
@@ -2077,11 +2244,11 @@ class BiliIntlIE(BiliIntlBaseIE):
chapters = [{
'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
'title': 'Intro'
'title': 'Intro',
}, {
'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
'title': 'Outro'
'title': 'Outro',
}]
return {
@@ -2137,12 +2304,13 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
episode_id = str(episode['episode_id'])
yield self.url_result(smuggle_url(
BiliIntlIE._make_url(episode_id, series_id),
self._parse_video_metadata(episode)
self._parse_video_metadata(episode),
), BiliIntlIE, episode_id)
def _real_extract(self, url):
series_id = self._match_id(url)
series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
series_info = self._call_api(
f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
return self.playlist_result(
self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
@@ -2156,19 +2324,19 @@ class BiliLiveIE(InfoExtractor):
'url': 'https://live.bilibili.com/196',
'info_dict': {
'id': '33989',
'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群全员禁言66690667090209258459971⑧481 (功能一样,别多加)",
'description': '周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群全员禁言66690667090209258459971⑧481 (功能一样,别多加)',
'ext': 'flv',
'title': "太空狼人杀联动,不被爆杀就算赢",
'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
'title': '太空狼人杀联动,不被爆杀就算赢',
'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg',
'timestamp': 1650802769,
},
'skip': 'not live'
'skip': 'not live',
}, {
'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://live.bilibili.com/blanc/196',
'only_matching': True
'only_matching': True,
}]
_FORMATS = {
@@ -2209,7 +2377,7 @@ class BiliLiveIE(InfoExtractor):
raise ExtractorError('Streamer is not live', expected=True)
formats = []
for qn in self._FORMATS.keys():
for qn in self._FORMATS:
stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
'room_id': room_id,
'qn': qn,

View File

@@ -24,7 +24,7 @@ from ..utils import (
class BitChuteIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
_EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
_TESTS = [{
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
@@ -39,7 +39,7 @@ class BitChuteIE(InfoExtractor):
'upload_date': '20170103',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
},
}, {
# test case: video with different channel and uploader
@@ -55,7 +55,7 @@ class BitChuteIE(InfoExtractor):
'upload_date': '20231106',
'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
'channel': 'Full Measure with Sharyl Attkisson',
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/'
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/',
},
}, {
# video not downloadable in browser, but we can recover it
@@ -72,7 +72,7 @@ class BitChuteIE(InfoExtractor):
'upload_date': '20181113',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
},
'params': {'check_formats': None},
}, {
@@ -91,6 +91,9 @@ class BitChuteIE(InfoExtractor):
}, {
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
'only_matching': True,
}, {
'url': 'https://old.bitchute.com/video/UGlrF9o9b-Q/',
'only_matching': True,
}]
_GEO_BYPASS = False
@@ -115,7 +118,7 @@ class BitChuteIE(InfoExtractor):
continue
return {
'url': url,
'filesize': int_or_none(response.headers.get('Content-Length'))
'filesize': int_or_none(response.headers.get('Content-Length')),
}
def _raise_if_restricted(self, webpage):
@@ -132,7 +135,7 @@ class BitChuteIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
self._raise_if_restricted(webpage)
publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
@@ -171,13 +174,13 @@ class BitChuteIE(InfoExtractor):
class BitChuteChannelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.bitchute.com/channel/bitchute/',
'info_dict': {
'id': 'bitchute',
'title': 'BitChute',
'description': 'md5:5329fb3866125afa9446835594a9b138',
'description': 'md5:2134c37d64fc3a4846787c402956adac',
},
'playlist': [
{
@@ -196,7 +199,7 @@ class BitChuteChannelIE(InfoExtractor):
'duration': 16,
'view_count': int,
},
}
},
],
'params': {
'skip_download': True,
@@ -209,7 +212,10 @@ class BitChuteChannelIE(InfoExtractor):
'id': 'wV9Imujxasw9',
'title': 'Bruce MacDonald and "The Light of Darkness"',
'description': 'md5:747724ef404eebdfc04277714f81863e',
}
},
}, {
'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/',
'only_matching': True,
}]
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
@@ -224,13 +230,13 @@ class BitChuteChannelIE(InfoExtractor):
'container': 'playlist-video',
'title': 'title',
'description': 'description',
}
},
}
@staticmethod
def _make_url(playlist_id, playlist_type):
return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/'
return f'https://old.bitchute.com/{playlist_type}/{playlist_id}/'
def _fetch_page(self, playlist_id, playlist_type, page_num):
playlist_url = self._make_url(playlist_id, playlist_type)

View File

@@ -47,7 +47,7 @@ class BlackboardCollaborateIE(InfoExtractor):
region = mobj.group('region')
video_id = mobj.group('id')
info = self._download_json(
'https://{}.bbcollab.com/collab/api/csa/recordings/{}/data'.format(region, video_id), video_id)
f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/data', video_id)
duration = info.get('duration')
title = info['name']
upload_date = info.get('created')

View File

@@ -44,7 +44,7 @@ class BleacherReportIE(InfoExtractor):
def _real_extract(self, url):
article_id = self._match_id(url)
article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article']
article_data = self._download_json(f'http://api.bleacherreport.com/api/v1/articles/{article_id}', article_id)['article']
thumbnails = []
primary_photo = article_data.get('primaryPhoto')
@@ -71,11 +71,11 @@ class BleacherReportIE(InfoExtractor):
if video:
video_type = video['type']
if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'):
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
info['url'] = 'http://bleacherreport.com/video_embed?id={}'.format(video['id'])
elif video_type == 'youtube.com':
info['url'] = video['id']
elif video_type == 'vine.co':
info['url'] = 'https://vine.co/v/%s' % video['id']
info['url'] = 'https://vine.co/v/{}'.format(video['id'])
else:
info['url'] = video_type + video['id']
return info
@@ -99,12 +99,12 @@ class BleacherReportCMSIE(AMPIE):
},
'expected_warnings': [
'Unable to download f4m manifest'
]
'Unable to download f4m manifest',
],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id)
info = self._extract_feed_info(f'http://vid.bleacherreport.com/videos/{video_id}.akamai')
info['id'] = video_id
return info

View File

@@ -16,7 +16,7 @@ class BlerpIE(InfoExtractor):
'uploader_id': '5fb81e51aa66ae000c395478',
'ext': 'mp3',
'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
}
},
}, {
'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
'info_dict': {
@@ -25,11 +25,11 @@ class BlerpIE(InfoExtractor):
'uploader': '179617322678353920',
'uploader_id': '5ba99cf71386730004552c42',
'ext': 'mp3',
'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
}
'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'],
},
}]
_GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
_GRAPHQL_OPERATIONNAME = 'webBitePageGetBite'
_GRAPHQL_QUERY = (
'''query webBitePageGetBite($_id: MongoID!) {
web {
@@ -141,27 +141,26 @@ class BlerpIE(InfoExtractor):
'operationName': self._GRAPHQL_OPERATIONNAME,
'query': self._GRAPHQL_QUERY,
'variables': {
'_id': audio_id
}
'_id': audio_id,
},
}
headers = {
'Content-Type': 'application/json'
'Content-Type': 'application/json',
}
json_result = self._download_json('https://api.blerp.com/graphql',
audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
json_result = self._download_json(
'https://api.blerp.com/graphql', audio_id,
data=json.dumps(data).encode(), headers=headers)
bite_json = json_result['data']['web']['biteById']
info_dict = {
return {
'id': bite_json['_id'],
'url': bite_json['audio']['mp3']['url'],
'title': bite_json['title'],
'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
'ext': 'mp3',
'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None),
}
return info_dict

View File

@@ -21,14 +21,14 @@ class BloggerIE(InfoExtractor):
'ext': 'mp4',
'thumbnail': r're:^https?://.*',
'duration': 76.068,
}
},
}]
def _real_extract(self, url):
token_id = self._match_id(url)
webpage = self._download_webpage(url, token_id)
data_json = self._search_regex(r'var\s+VIDEO_CONFIG\s*=\s*(\{.*)', webpage, 'JSON data')
data = self._parse_json(data_json.encode('utf-8').decode('unicode_escape'), token_id)
data = self._parse_json(data_json.encode().decode('unicode_escape'), token_id)
streams = data['streams']
formats = [{
'ext': mimetype2ext(traverse_obj(parse_qs(stream['play_url']), ('mime', 0))),

View File

@@ -55,7 +55,7 @@ class BloombergIE(InfoExtractor):
title = re.sub(': Video$', '', self._og_search_title(webpage))
embed_info = self._download_json(
'http://www.bloomberg.com/multimedia/api/embed?id=%s' % video_id, video_id)
f'http://www.bloomberg.com/multimedia/api/embed?id={video_id}', video_id)
formats = []
for stream in embed_info['streams']:
stream_url = stream.get('url')

Some files were not shown because too many files have changed in this diff Show More