1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2026-01-07 15:31:21 +00:00

Merged parent

This commit is contained in:
Kieran Eglin
2024-06-14 12:38:09 -07:00
977 changed files with 13496 additions and 10886 deletions

View File

@@ -109,7 +109,6 @@ from .utils import (
determine_protocol,
encode_compat_str,
encodeFilename,
error_to_compat_str,
escapeHTML,
expand_path,
extract_basic_auth,
@@ -583,7 +582,7 @@ class YoutubeDL:
'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data',
'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
}
_deprecated_multivalue_fields = {
'album_artist': 'album_artists',
@@ -594,7 +593,7 @@ class YoutubeDL:
}
_format_selection_exts = {
'audio': set(MEDIA_EXTENSIONS.common_audio),
'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
'video': {*MEDIA_EXTENSIONS.common_video, '3gp'},
'storyboards': set(MEDIA_EXTENSIONS.storyboards),
}
@@ -628,7 +627,7 @@ class YoutubeDL:
error=sys.stderr,
screen=sys.stderr if self.params.get('quiet') else stdout,
console=None if compat_os_name == 'nt' else next(
filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None),
)
try:
@@ -679,9 +678,9 @@ class YoutubeDL:
width_args = [] if width is None else ['-w', str(width)]
sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
try:
self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
self._output_process = Popen(['bidiv', *width_args], **sp_kwargs)
except OSError:
self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
self._output_process = Popen(['fribidi', '-c', 'UTF-8', *width_args], **sp_kwargs)
self._output_channel = os.fdopen(master, 'rb')
except OSError as ose:
if ose.errno == errno.ENOENT:
@@ -822,8 +821,7 @@ class YoutubeDL:
)
self.report_warning(
'Long argument string detected. '
'Use -- to separate parameters and URLs, like this:\n%s' %
shell_quote(correct_argv))
f'Use -- to separate parameters and URLs, like this:\n{shell_quote(correct_argv)}')
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
@@ -922,7 +920,7 @@ class YoutubeDL:
if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
return
self._write_string(
'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
'{}{}'.format(self._bidi_workaround(message), ('' if skip_eol else '\n')),
self._out_files.screen, only_once=only_once)
def to_stderr(self, message, only_once=False):
@@ -1045,10 +1043,10 @@ class YoutubeDL:
return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
def report_warning(self, message, only_once=False):
'''
"""
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
'''
"""
if self.params.get('logger') is not None:
self.params['logger'].warning(message)
else:
@@ -1066,14 +1064,14 @@ class YoutubeDL:
self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
def report_error(self, message, *args, **kwargs):
'''
"""
Do the same as trouble, but prefixes the message with 'ERROR:', colored
in red if stderr is a tty file.
'''
"""
self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
def write_debug(self, message, only_once=False):
'''Log debug message or Print message to stderr'''
"""Log debug message or Print message to stderr"""
if not self.params.get('verbose', False):
return
message = f'[debug] {message}'
@@ -1085,14 +1083,14 @@ class YoutubeDL:
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
try:
self.to_screen('[download] %s has already been downloaded' % file_name)
self.to_screen(f'[download] {file_name} has already been downloaded')
except UnicodeEncodeError:
self.to_screen('[download] The file has already been downloaded')
def report_file_delete(self, file_name):
"""Report that existing file will be deleted."""
try:
self.to_screen('Deleting existing file %s' % file_name)
self.to_screen(f'Deleting existing file {file_name}')
except UnicodeEncodeError:
self.to_screen('Deleting existing file')
@@ -1147,7 +1145,7 @@ class YoutubeDL:
@staticmethod
def escape_outtmpl(outtmpl):
''' Escape any remaining strings like %s, %abc% etc. '''
""" Escape any remaining strings like %s, %abc% etc. """
return re.sub(
STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
@@ -1155,7 +1153,7 @@ class YoutubeDL:
@classmethod
def validate_outtmpl(cls, outtmpl):
''' @return None or Exception object '''
""" @return None or Exception object """
outtmpl = re.sub(
STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
lambda mobj: f'{mobj.group(0)[:-1]}s',
@@ -1208,13 +1206,13 @@ class YoutubeDL:
}
# Field is of the form key1.key2...
# where keys (except first) can be string, int, slice or "{field, ...}"
FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} # noqa: UP031
FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { # noqa: UP031
'inner': FIELD_INNER_RE,
'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
'field': rf'\w*(?:\.{FIELD_INNER_RE})*',
}
MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
MATH_OPERATORS_RE = r'(?:{})'.format('|'.join(map(re.escape, MATH_FUNCTIONS.keys())))
INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
(?P<negate>-)?
(?P<fields>{FIELD_RE})
@@ -1337,7 +1335,7 @@ class YoutubeDL:
value, default = None, na
fmt = outer_mobj.group('format')
if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
if fmt == 's' and last_field in field_size_compat_map and isinstance(value, int):
fmt = f'0{field_size_compat_map[last_field]:d}d'
flags = outer_mobj.group('conversion') or ''
@@ -1362,7 +1360,7 @@ class YoutubeDL:
elif fmt[-1] == 'U': # unicode normalized
value, fmt = unicodedata.normalize(
# "+" = compatibility equivalence, "#" = NFD
'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
'NF{}{}'.format('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
value), str_fmt
elif fmt[-1] == 'D': # decimal suffix
num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
@@ -1390,7 +1388,7 @@ class YoutubeDL:
if fmt[-1] in 'csra':
value = sanitizer(last_field, value)
key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format'))
TMPL_DICT[key] = value
return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
@@ -1479,9 +1477,9 @@ class YoutubeDL:
date = info_dict.get('upload_date')
if date is not None:
dateRange = self.params.get('daterange', DateRange())
if date not in dateRange:
return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
date_range = self.params.get('daterange', DateRange())
if date not in date_range:
return f'{date_from_str(date).isoformat()} upload date is not in range {date_range}'
view_count = info_dict.get('view_count')
if view_count is not None:
min_views = self.params.get('min_views')
@@ -1491,7 +1489,7 @@ class YoutubeDL:
if max_views is not None and view_count > max_views:
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
return 'Skipping "%s" because it is age restricted' % video_title
return f'Skipping "{video_title}" because it is age restricted'
match_filter = self.params.get('match_filter')
if match_filter is None:
@@ -1544,7 +1542,7 @@ class YoutubeDL:
@staticmethod
def add_extra_info(info_dict, extra_info):
'''Set the keys from extra_info in info dict if they are missing'''
"""Set the keys from extra_info in info dict if they are missing"""
for key, value in extra_info.items():
info_dict.setdefault(key, value)
@@ -1590,7 +1588,7 @@ class YoutubeDL:
self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
'has already been recorded in the archive')
if self.params.get('break_on_existing', False):
raise ExistingVideoReached()
raise ExistingVideoReached
break
return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
else:
@@ -1616,8 +1614,8 @@ class YoutubeDL:
except GeoRestrictedError as e:
msg = e.msg
if e.countries:
msg += '\nThis video is available in %s.' % ', '.join(
map(ISO3166Utils.short2full, e.countries))
msg += '\nThis video is available in {}.'.format(', '.join(
map(ISO3166Utils.short2full, e.countries)))
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
self.report_error(msg)
except ExtractorError as e: # An error we somewhat expected
@@ -1826,8 +1824,8 @@ class YoutubeDL:
if isinstance(additional_urls, str):
additional_urls = [additional_urls]
self.to_screen(
'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
'[info] {}: {} additional URL(s) requested'.format(ie_result['id'], len(additional_urls)))
self.write_debug('Additional URLs: "{}"'.format('", "'.join(additional_urls)))
ie_result['additional_entries'] = [
self.extract_info(
url, download, extra_info=extra_info,
@@ -1879,8 +1877,8 @@ class YoutubeDL:
webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
if webpage_url and webpage_url in self._playlist_urls:
self.to_screen(
'[download] Skipping already downloaded playlist: %s'
% ie_result.get('title') or ie_result.get('id'))
'[download] Skipping already downloaded playlist: {}'.format(
ie_result.get('title')) or ie_result.get('id'))
return
self._playlist_level += 1
@@ -1895,8 +1893,8 @@ class YoutubeDL:
self._playlist_urls.clear()
elif result_type == 'compat_list':
self.report_warning(
'Extractor %s returned a compat_list result. '
'It needs to be updated.' % ie_result.get('extractor'))
'Extractor {} returned a compat_list result. '
'It needs to be updated.'.format(ie_result.get('extractor')))
def _fixup(r):
self.add_extra_info(r, {
@@ -1913,7 +1911,7 @@ class YoutubeDL:
]
return ie_result
else:
raise Exception('Invalid result type: %s' % result_type)
raise Exception(f'Invalid result type: {result_type}')
def _ensure_dir_exists(self, path):
return make_dir(path, self.report_error)
@@ -2029,8 +2027,9 @@ class YoutubeDL:
resolved_entries[i] = (playlist_index, NO_DEFAULT)
continue
self.to_screen('[download] Downloading item %s of %s' % (
self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
self.to_screen(
f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} '
f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}')
entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
'playlist_index': playlist_index,
@@ -2080,9 +2079,9 @@ class YoutubeDL:
}
operator_rex = re.compile(r'''(?x)\s*
(?P<key>[\w.-]+)\s*
(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?P<op>{})(?P<none_inclusive>\s*\?)?\s*
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
''' % '|'.join(map(re.escape, OPERATORS.keys())))
'''.format('|'.join(map(re.escape, OPERATORS.keys()))))
m = operator_rex.fullmatch(filter_spec)
if m:
try:
@@ -2093,7 +2092,7 @@ class YoutubeDL:
comparison_value = parse_filesize(m.group('value') + 'B')
if comparison_value is None:
raise ValueError(
'Invalid value %r in format specification %r' % (
'Invalid value {!r} in format specification {!r}'.format(
m.group('value'), filter_spec))
op = OPERATORS[m.group('op')]
@@ -2103,15 +2102,15 @@ class YoutubeDL:
'^=': lambda attr, value: attr.startswith(value),
'$=': lambda attr, value: attr.endswith(value),
'*=': lambda attr, value: value in attr,
'~=': lambda attr, value: value.search(attr) is not None
'~=': lambda attr, value: value.search(attr) is not None,
}
str_operator_rex = re.compile(r'''(?x)\s*
(?P<key>[a-zA-Z0-9._-]+)\s*
(?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
(?P<negation>!\s*)?(?P<op>{})\s*(?P<none_inclusive>\?\s*)?
(?P<quote>["'])?
(?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
(?(quote)(?P=quote))\s*
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
'''.format('|'.join(map(re.escape, STR_OPERATORS.keys()))))
m = str_operator_rex.fullmatch(filter_spec)
if m:
if m.group('op') == '~=':
@@ -2125,7 +2124,7 @@ class YoutubeDL:
op = str_op
if not m:
raise SyntaxError('Invalid filter specification %r' % filter_spec)
raise SyntaxError(f'Invalid filter specification {filter_spec!r}')
def _filter(f):
actual_value = f.get(m.group('key'))
@@ -2136,7 +2135,12 @@ class YoutubeDL:
def _check_formats(self, formats):
for f in formats:
self.to_screen('[info] Testing format %s' % f['format_id'])
working = f.get('__working')
if working is not None:
if working:
yield f
continue
self.to_screen('[info] Testing format {}'.format(f['format_id']))
path = self.get_output_path('temp')
if not self._ensure_dir_exists(f'{path}/'):
continue
@@ -2144,41 +2148,52 @@ class YoutubeDL:
temp_file.close()
try:
success, _ = self.dl(temp_file.name, f, test=True)
except (DownloadError, OSError, ValueError) + network_exceptions:
except (DownloadError, OSError, ValueError, *network_exceptions):
success = False
finally:
if os.path.exists(temp_file.name):
try:
os.remove(temp_file.name)
except OSError:
self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
self.report_warning(f'Unable to delete temporary file "{temp_file.name}"')
f['__working'] = success
if success:
yield f
else:
self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id']))
def _select_formats(self, formats, selector):
return list(selector({
'formats': formats,
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
}))
def _default_format_spec(self, info_dict, download=True):
download = download and not self.params.get('simulate')
prefer_best = download and (
self.params['outtmpl']['default'] == '-'
or info_dict.get('is_live') and not self.params.get('live_from_start'))
def can_merge():
merger = FFmpegMergerPP(self)
return merger.available and merger.can_merge()
prefer_best = (
not self.params.get('simulate')
and download
and (
not can_merge()
or info_dict.get('is_live') and not self.params.get('live_from_start')
or self.params['outtmpl']['default'] == '-'))
compat = (
prefer_best
or self.params.get('allow_multiple_audio_streams', False)
or 'format-spec' in self.params['compat_opts'])
if not prefer_best and download and not can_merge():
prefer_best = True
formats = self._get_formats(info_dict)
evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):
self.report_warning('ffmpeg not found. The downloaded format may not be the best available. '
'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')
return (
'best/bestvideo+bestaudio' if prefer_best
else 'bestvideo*+bestaudio/best' if not compat
else 'bestvideo+bestaudio/best')
compat = (self.params.get('allow_multiple_audio_streams')
or 'format-spec' in self.params['compat_opts'])
return ('best/bestvideo+bestaudio' if prefer_best
else 'bestvideo+bestaudio/best' if compat
else 'bestvideo*+bestaudio/best')
def build_format_selector(self, format_spec):
def syntax_error(note, start):
@@ -2198,8 +2213,8 @@ class YoutubeDL:
def _parse_filter(tokens):
filter_parts = []
for type, string_, start, _, _ in tokens:
if type == tokenize.OP and string_ == ']':
for type_, string_, _start, _, _ in tokens:
if type_ == tokenize.OP and string_ == ']':
return ''.join(filter_parts)
else:
filter_parts.append(string_)
@@ -2209,23 +2224,23 @@ class YoutubeDL:
# E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
ALLOWED_OPS = ('/', '+', ',', '(', ')')
last_string, last_start, last_end, last_line = None, None, None, None
for type, string_, start, end, line in tokens:
if type == tokenize.OP and string_ == '[':
for type_, string_, start, end, line in tokens:
if type_ == tokenize.OP and string_ == '[':
if last_string:
yield tokenize.NAME, last_string, last_start, last_end, last_line
last_string = None
yield type, string_, start, end, line
yield type_, string_, start, end, line
# everything inside brackets will be handled by _parse_filter
for type, string_, start, end, line in tokens:
yield type, string_, start, end, line
if type == tokenize.OP and string_ == ']':
for type_, string_, start, end, line in tokens:
yield type_, string_, start, end, line
if type_ == tokenize.OP and string_ == ']':
break
elif type == tokenize.OP and string_ in ALLOWED_OPS:
elif type_ == tokenize.OP and string_ in ALLOWED_OPS:
if last_string:
yield tokenize.NAME, last_string, last_start, last_end, last_line
last_string = None
yield type, string_, start, end, line
elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
yield type_, string_, start, end, line
elif type_ in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
if not last_string:
last_string = string_
last_start = start
@@ -2238,13 +2253,13 @@ class YoutubeDL:
def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
selectors = []
current_selector = None
for type, string_, start, _, _ in tokens:
for type_, string_, start, _, _ in tokens:
# ENCODING is only defined in Python 3.x
if type == getattr(tokenize, 'ENCODING', None):
if type_ == getattr(tokenize, 'ENCODING', None):
continue
elif type in [tokenize.NAME, tokenize.NUMBER]:
elif type_ in [tokenize.NAME, tokenize.NUMBER]:
current_selector = FormatSelector(SINGLE, string_, [])
elif type == tokenize.OP:
elif type_ == tokenize.OP:
if string_ == ')':
if not inside_group:
# ')' will be handled by the parentheses group
@@ -2287,7 +2302,7 @@ class YoutubeDL:
current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
else:
raise syntax_error(f'Operator not recognized: "{string_}"', start)
elif type == tokenize.ENDMARKER:
elif type_ == tokenize.ENDMARKER:
break
if current_selector:
selectors.append(current_selector)
@@ -2362,7 +2377,7 @@ class YoutubeDL:
'acodec': the_only_audio.get('acodec'),
'abr': the_only_audio.get('abr'),
'asr': the_only_audio.get('asr'),
'audio_channels': the_only_audio.get('audio_channels')
'audio_channels': the_only_audio.get('audio_channels'),
})
return new_dict
@@ -2443,9 +2458,9 @@ class YoutubeDL:
format_fallback = not format_type and not format_modified # for b, w
_filter_f = (
(lambda f: f.get('%scodec' % format_type) != 'none')
(lambda f: f.get(f'{format_type}codec') != 'none')
if format_type and format_modified # bv*, ba*, wv*, wa*
else (lambda f: f.get('%scodec' % not_format_type) == 'none')
else (lambda f: f.get(f'{not_format_type}codec') == 'none')
if format_type # bv, ba, wv, wa
else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
if not format_modified # b, w
@@ -2513,7 +2528,7 @@ class YoutubeDL:
def __next__(self):
if self.counter >= len(self.tokens):
raise StopIteration()
raise StopIteration
value = self.tokens[self.counter]
self.counter += 1
return value
@@ -2596,7 +2611,7 @@ class YoutubeDL:
self._sort_thumbnails(thumbnails)
for i, t in enumerate(thumbnails):
if t.get('id') is None:
t['id'] = '%d' % i
t['id'] = str(i)
if t.get('width') and t.get('height'):
t['resolution'] = '%dx%d' % (t['width'], t['height'])
t['url'] = sanitize_url(t['url'])
@@ -2657,8 +2672,8 @@ class YoutubeDL:
# Auto generate title fields corresponding to the *_number fields when missing
# in order to always have clean titles. This is very common for TV series.
for field in ('chapter', 'season', 'episode'):
if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
if final and info_dict.get(f'{field}_number') is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict[f'{field}_number'])
for old_key, new_key in self._deprecated_multivalue_fields.items():
if new_key in info_dict and old_key in info_dict:
@@ -2690,8 +2705,8 @@ class YoutubeDL:
def report_force_conversion(field, field_not, conversion):
self.report_warning(
'"%s" field is not %s - forcing %s conversion, there is an error in extractor'
% (field, field_not, conversion))
f'"{field}" field is not {field_not} - forcing {conversion} conversion, '
'there is an error in extractor')
def sanitize_string_field(info, string_field):
field = info.get(string_field)
@@ -2808,28 +2823,28 @@ class YoutubeDL:
if not formats:
self.raise_no_formats(info_dict)
for format in formats:
sanitize_string_field(format, 'format_id')
sanitize_numeric_fields(format)
format['url'] = sanitize_url(format['url'])
if format.get('ext') is None:
format['ext'] = determine_ext(format['url']).lower()
if format['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
if format.get('acodec') is None:
format['acodec'] = format['ext']
if format.get('protocol') is None:
format['protocol'] = determine_protocol(format)
if format.get('resolution') is None:
format['resolution'] = self.format_resolution(format, default=None)
if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
format['dynamic_range'] = 'SDR'
if format.get('aspect_ratio') is None:
format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
for fmt in formats:
sanitize_string_field(fmt, 'format_id')
sanitize_numeric_fields(fmt)
fmt['url'] = sanitize_url(fmt['url'])
if fmt.get('ext') is None:
fmt['ext'] = determine_ext(fmt['url']).lower()
if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
if fmt.get('acodec') is None:
fmt['acodec'] = fmt['ext']
if fmt.get('protocol') is None:
fmt['protocol'] = determine_protocol(fmt)
if fmt.get('resolution') is None:
fmt['resolution'] = self.format_resolution(fmt, default=None)
if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none':
fmt['dynamic_range'] = 'SDR'
if fmt.get('aspect_ratio') is None:
fmt['aspect_ratio'] = try_call(lambda: round(fmt['width'] / fmt['height'], 2))
# For fragmented formats, "tbr" is often max bitrate and not average
if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url'))
and not format.get('filesize') and not format.get('filesize_approx')):
format['filesize_approx'] = filesize_from_tbr(format.get('tbr'), info_dict.get('duration'))
format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
if (('manifest-filesize-approx' in self.params['compat_opts'] or not fmt.get('manifest_url'))
and not fmt.get('filesize') and not fmt.get('filesize_approx')):
fmt['filesize_approx'] = filesize_from_tbr(fmt.get('tbr'), info_dict.get('duration'))
fmt['http_headers'] = self._calc_headers(collections.ChainMap(fmt, info_dict), load_cookies=True)
# Safeguard against old/insecure infojson when using --load-info-json
if info_dict.get('http_headers'):
@@ -2842,36 +2857,36 @@ class YoutubeDL:
self.sort_formats({
'formats': formats,
'_format_sort_fields': info_dict.get('_format_sort_fields')
'_format_sort_fields': info_dict.get('_format_sort_fields'),
})
# Sanitize and group by format_id
formats_dict = {}
for i, format in enumerate(formats):
if not format.get('format_id'):
format['format_id'] = str(i)
for i, fmt in enumerate(formats):
if not fmt.get('format_id'):
fmt['format_id'] = str(i)
else:
# Sanitize format_id from characters used in format selector expression
format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
formats_dict.setdefault(format['format_id'], []).append(format)
fmt['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', fmt['format_id'])
formats_dict.setdefault(fmt['format_id'], []).append(fmt)
# Make sure all formats have unique format_id
common_exts = set(itertools.chain(*self._format_selection_exts.values()))
for format_id, ambiguous_formats in formats_dict.items():
ambigious_id = len(ambiguous_formats) > 1
for i, format in enumerate(ambiguous_formats):
for i, fmt in enumerate(ambiguous_formats):
if ambigious_id:
format['format_id'] = '%s-%d' % (format_id, i)
fmt['format_id'] = f'{format_id}-{i}'
# Ensure there is no conflict between id and ext in format selection
# See https://github.com/yt-dlp/yt-dlp/issues/1282
if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
format['format_id'] = 'f%s' % format['format_id']
if fmt['format_id'] != fmt['ext'] and fmt['format_id'] in common_exts:
fmt['format_id'] = 'f{}'.format(fmt['format_id'])
if format.get('format') is None:
format['format'] = '{id} - {res}{note}'.format(
id=format['format_id'],
res=self.format_resolution(format),
note=format_field(format, 'format_note', ' (%s)'),
if fmt.get('format') is None:
fmt['format'] = '{id} - {res}{note}'.format(
id=fmt['format_id'],
res=self.format_resolution(fmt),
note=format_field(fmt, 'format_note', ' (%s)'),
)
if self.params.get('check_formats') is True:
@@ -2928,12 +2943,7 @@ class YoutubeDL:
self.write_debug(f'Default format spec: {req_format}')
format_selector = self.build_format_selector(req_format)
formats_to_download = list(format_selector({
'formats': formats,
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
}))
formats_to_download = self._select_formats(formats, format_selector)
if interactive_format_selection and not formats_to_download:
self.report_error('Requested format is not available', tb=False, is_error=False)
continue
@@ -2998,7 +3008,7 @@ class YoutubeDL:
info_dict['requested_downloads'] = downloaded_formats
info_dict = self.run_all_pps('after_video', info_dict)
if max_downloads_reached:
raise MaxDownloadsReached()
raise MaxDownloadsReached
# We update the info dict with the selected best quality format (backwards compatibility)
info_dict.update(best_format)
@@ -3059,8 +3069,8 @@ class YoutubeDL:
else:
f = formats[-1]
self.report_warning(
'No subtitle format found matching "%s" for language %s, '
'using %s' % (formats_query, lang, f['ext']))
'No subtitle format found matching "{}" for language {}, '
'using {}. Use --list-subs for a list of available subtitles'.format(formats_query, lang, f['ext']))
subs[lang] = f
return subs
@@ -3214,7 +3224,7 @@ class YoutubeDL:
def check_max_downloads():
if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
raise MaxDownloadsReached()
raise MaxDownloadsReached
if self.params.get('simulate'):
info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
@@ -3385,7 +3395,7 @@ class YoutubeDL:
for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
f['filepath'] = fname = prepend_extension(
correct_ext(temp_filename, info_dict['ext']),
'f%s' % f['format_id'], info_dict['ext'])
'f{}'.format(f['format_id']), info_dict['ext'])
downloaded.append(fname)
info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
success, real_download = self.dl(temp_filename, info_dict)
@@ -3418,7 +3428,7 @@ class YoutubeDL:
if temp_filename != '-':
fname = prepend_extension(
correct_ext(temp_filename, new_info['ext']),
'f%s' % f['format_id'], new_info['ext'])
'f{}'.format(f['format_id']), new_info['ext'])
if not self._ensure_dir_exists(fname):
return
f['filepath'] = fname
@@ -3446,11 +3456,11 @@ class YoutubeDL:
dl_filename = dl_filename or temp_filename
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
except network_exceptions as err:
self.report_error('unable to download video data: %s' % error_to_compat_str(err))
self.report_error(f'unable to download video data: {err}')
return
except OSError as err:
raise UnavailableVideoError(err)
except (ContentTooShortError, ) as err:
except ContentTooShortError as err:
self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
return
@@ -3517,13 +3527,13 @@ class YoutubeDL:
try:
replace_info_dict(self.post_process(dl_filename, info_dict))
except PostProcessingError as err:
self.report_error('Postprocessing: %s' % str(err))
self.report_error(f'Postprocessing: {err}')
return
try:
for ph in self._post_hooks:
ph(info_dict['filepath'])
except Exception as err:
self.report_error('post hooks: %s' % str(err))
self.report_error(f'post hooks: {err}')
return
info_dict['__write_download_archive'] = True
@@ -3590,7 +3600,7 @@ class YoutubeDL:
@staticmethod
def sanitize_info(info_dict, remove_private_keys=False):
''' Sanitize the infodict for converting to json '''
""" Sanitize the infodict for converting to json """
if info_dict is None:
return info_dict
info_dict.setdefault('epoch', int(time.time()))
@@ -3625,7 +3635,7 @@ class YoutubeDL:
@staticmethod
def filter_requested_info(info_dict, actually_filter=True):
''' Alias of sanitize_info for backward compatibility '''
""" Alias of sanitize_info for backward compatibility """
return YoutubeDL.sanitize_info(info_dict, actually_filter)
def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
@@ -3645,7 +3655,7 @@ class YoutubeDL:
actual_post_extract(video_dict or {})
return
post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
post_extractor = info_dict.pop('__post_extractor', None) or dict
info_dict.update(post_extractor())
actual_post_extract(info_dict or {})
@@ -3750,7 +3760,7 @@ class YoutubeDL:
if format.get('width') and format.get('height'):
return '%dx%d' % (format['width'], format['height'])
elif format.get('height'):
return '%sp' % format['height']
return '{}p'.format(format['height'])
elif format.get('width'):
return '%dx?' % format['width']
return default
@@ -3767,7 +3777,7 @@ class YoutubeDL:
if fdict.get('language'):
if res:
res += ' '
res += '[%s]' % fdict['language']
res += '[{}]'.format(fdict['language'])
if fdict.get('format_note') is not None:
if res:
res += ' '
@@ -3779,7 +3789,7 @@ class YoutubeDL:
if fdict.get('container') is not None:
if res:
res += ', '
res += '%s container' % fdict['container']
res += '{} container'.format(fdict['container'])
if (fdict.get('vcodec') is not None
and fdict.get('vcodec') != 'none'):
if res:
@@ -3794,7 +3804,7 @@ class YoutubeDL:
if fdict.get('fps') is not None:
if res:
res += ', '
res += '%sfps' % fdict['fps']
res += '{}fps'.format(fdict['fps'])
if fdict.get('acodec') is not None:
if res:
res += ', '
@@ -3837,7 +3847,7 @@ class YoutubeDL:
format_field(f, 'format_id'),
format_field(f, 'ext'),
self.format_resolution(f),
self._format_note(f)
self._format_note(f),
] for f in formats if (f.get('preference') or 0) >= -1000]
return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
@@ -3943,11 +3953,11 @@ class YoutubeDL:
from .extractor.extractors import _LAZY_LOADER
from .extractor.extractors import (
_PLUGIN_CLASSES as plugin_ies,
_PLUGIN_OVERRIDES as plugin_ie_overrides
_PLUGIN_OVERRIDES as plugin_ie_overrides,
)
def get_encoding(stream):
ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
ret = str(getattr(stream, 'encoding', f'missing ({type(stream).__name__})'))
additional_info = []
if os.environ.get('TERM', '').lower() == 'dumb':
additional_info.append('dumb')
@@ -3958,13 +3968,13 @@ class YoutubeDL:
ret = f'{ret} ({",".join(additional_info)})'
return ret
encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
encoding_str = 'Encodings: locale {}, fs {}, pref {}, {}'.format(
locale.getpreferredencoding(),
sys.getfilesystemencoding(),
self.get_encoding(),
', '.join(
f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
if stream is not None and key != 'console')
if stream is not None and key != 'console'),
)
logger = self.params.get('logger')
@@ -3996,7 +4006,7 @@ class YoutubeDL:
else:
write_debug('Lazy loading extractors is disabled')
if self.params['compat_opts']:
write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
write_debug('Compatibility options: {}'.format(', '.join(self.params['compat_opts'])))
if current_git_head():
write_debug(f'Git HEAD: {current_git_head()}')
@@ -4005,14 +4015,14 @@ class YoutubeDL:
exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
if ffmpeg_features:
exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
exe_versions['ffmpeg'] += ' ({})'.format(','.join(sorted(ffmpeg_features)))
exe_versions['rtmpdump'] = rtmpdump_version()
exe_versions['phantomjs'] = PhantomJSwrapper._version()
exe_str = ', '.join(
f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
) or 'none'
write_debug('exe versions: %s' % exe_str)
write_debug(f'exe versions: {exe_str}')
from .compat.compat_utils import get_package_info
from .dependencies import available_dependencies
@@ -4024,7 +4034,7 @@ class YoutubeDL:
write_debug(f'Proxy map: {self.proxies}')
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
display_list = ['%s%s' % (
display_list = ['{}{}'.format(
klass.__name__, '' if klass.__name__ == name else f' as {name}')
for name, klass in plugins.items()]
if plugin_type == 'Extractor':
@@ -4041,14 +4051,13 @@ class YoutubeDL:
# Not implemented
if False and self.params.get('call_home'):
ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
write_debug('Public IP address: %s' % ipaddr)
write_debug(f'Public IP address: {ipaddr}')
latest_version = self.urlopen(
'https://yt-dl.org/latest/version').read().decode()
if version_tuple(latest_version) > version_tuple(__version__):
self.report_warning(
'You are using an outdated version (newest version: %s)! '
'See https://yt-dl.org/update if you need help updating.' %
latest_version)
f'You are using an outdated version (newest version: {latest_version})! '
'See https://yt-dl.org/update if you need help updating.')
@functools.cached_property
def proxies(self):
@@ -4082,7 +4091,7 @@ class YoutubeDL:
return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
def _get_available_impersonate_targets(self):
# todo(future): make available as public API
# TODO(future): make available as public API
return [
(target, rh.RH_NAME)
for rh in self._request_director.handlers.values()
@@ -4091,7 +4100,7 @@ class YoutubeDL:
]
def _impersonate_target_available(self, target):
# todo(future): make available as public API
# TODO(future): make available as public API
return any(
rh.is_supported_target(target)
for rh in self._request_director.handlers.values()
@@ -4217,7 +4226,7 @@ class YoutubeDL:
return encoding
def _write_info_json(self, label, ie_result, infofn, overwrite=None):
''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
""" Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error """
if overwrite is None:
overwrite = self.params.get('overwrites', True)
if not self.params.get('writeinfojson'):
@@ -4240,7 +4249,7 @@ class YoutubeDL:
return None
def _write_description(self, label, ie_result, descfn):
''' Write description and returns True = written, False = skip, None = error '''
""" Write description and returns True = written, False = skip, None = error """
if not self.params.get('writedescription'):
return False
elif not descfn:
@@ -4264,7 +4273,7 @@ class YoutubeDL:
return True
def _write_subtitles(self, info_dict, filename):
''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
""" Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error"""
ret = []
subtitles = info_dict.get('requested_subtitles')
if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
@@ -4311,7 +4320,7 @@ class YoutubeDL:
self.dl(sub_filename, sub_copy, subtitle=True)
sub_info['filepath'] = sub_filename
ret.append(sub_filename)
except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
except (DownloadError, ExtractorError, OSError, ValueError, *network_exceptions) as err:
msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
if self.params.get('ignoreerrors') is not True: # False or 'only_download'
if not self.params.get('ignoreerrors'):
@@ -4321,7 +4330,7 @@ class YoutubeDL:
return ret
def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error '''
""" Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error """
write_all = self.params.get('write_all_thumbnails', False)
thumbnails, ret = [], []
if write_all or self.params.get('writethumbnail', False):
@@ -4349,8 +4358,8 @@ class YoutubeDL:
existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
if existing_thumb:
self.to_screen('[info] %s is already present' % (
thumb_display_id if multiple else f'{label} thumbnail').capitalize())
self.to_screen('[info] {} is already present'.format((
thumb_display_id if multiple else f'{label} thumbnail').capitalize()))
t['filepath'] = existing_thumb
ret.append(existing_thumb)
else:

View File

@@ -14,7 +14,7 @@ import os
import re
import traceback
from .compat import compat_os_name, compat_shlex_quote
from .compat import compat_os_name
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .downloader.external import get_external_downloader
from .extractor import list_extractor_classes
@@ -58,6 +58,7 @@ from .utils import (
read_stdin,
render_table,
setproctitle,
shell_quote,
traverse_obj,
variadic,
write_string,
@@ -115,9 +116,9 @@ def print_extractor_information(opts, urls):
ie.description(markdown=False, search_examples=_SEARCHES)
for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False)
elif opts.ap_list_mso:
out = 'Supported TV Providers:\n%s\n' % render_table(
out = 'Supported TV Providers:\n{}\n'.format(render_table(
['mso', 'mso name'],
[[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()])
[[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]))
else:
return False
write_string(out, out=sys.stdout)
@@ -129,7 +130,7 @@ def set_compat_opts(opts):
if name not in opts.compat_opts:
return False
opts.compat_opts.discard(name)
opts.compat_opts.update(['*%s' % name])
opts.compat_opts.update([f'*{name}'])
return True
def set_default_compat(compat_name, opt_name, default=True, remove_compat=True):
@@ -222,7 +223,7 @@ def validate_options(opts):
validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval')
if opts.wait_for_video is not None:
min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None])
min_wait, max_wait, *_ = map(parse_duration, [*opts.wait_for_video.split('-', 1), None])
validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video),
'time range to wait for video', opts.wait_for_video)
validate_minmax(min_wait, max_wait, 'time range to wait for video')
@@ -264,9 +265,9 @@ def validate_options(opts):
# Retry sleep function
def parse_sleep_func(expr):
NUMBER_RE = r'\d+(?:\.\d+)?'
op, start, limit, step, *_ = tuple(re.fullmatch(
op, start, limit, step, *_ = (*tuple(re.fullmatch(
rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?',
expr.strip()).groups()) + (None, None)
expr.strip()).groups()), None, None)
if op == 'exp':
return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf'))
@@ -396,13 +397,13 @@ def validate_options(opts):
# MetadataParser
def metadataparser_actions(f):
if isinstance(f, str):
cmd = '--parse-metadata %s' % compat_shlex_quote(f)
cmd = f'--parse-metadata {shell_quote(f)}'
try:
actions = [MetadataFromFieldPP.to_action(f)]
except Exception as err:
raise ValueError(f'{cmd} is invalid; {err}')
else:
cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f))
cmd = f'--replace-in-metadata {shell_quote(f)}'
actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(','))
for action in actions:
@@ -413,7 +414,7 @@ def validate_options(opts):
yield action
if opts.metafromtitle is not None:
opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle)
opts.parse_metadata.setdefault('pre_process', []).append(f'title:{opts.metafromtitle}')
opts.parse_metadata = {
k: list(itertools.chain(*map(metadataparser_actions, v)))
for k, v in opts.parse_metadata.items()
@@ -602,7 +603,7 @@ def get_postprocessors(opts):
yield {
'key': 'MetadataParser',
'actions': actions,
'when': when
'when': when,
}
sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove
if sponsorblock_query:
@@ -610,19 +611,19 @@ def get_postprocessors(opts):
'key': 'SponsorBlock',
'categories': sponsorblock_query,
'api': opts.sponsorblock_api,
'when': 'after_filter'
'when': 'after_filter',
}
if opts.convertsubtitles:
yield {
'key': 'FFmpegSubtitlesConvertor',
'format': opts.convertsubtitles,
'when': 'before_dl'
'when': 'before_dl',
}
if opts.convertthumbnails:
yield {
'key': 'FFmpegThumbnailsConvertor',
'format': opts.convertthumbnails,
'when': 'before_dl'
'when': 'before_dl',
}
if opts.extractaudio:
yield {
@@ -647,7 +648,7 @@ def get_postprocessors(opts):
yield {
'key': 'FFmpegEmbedSubtitle',
# already_have_subtitle = True prevents the file from being deleted after embedding
'already_have_subtitle': opts.writesubtitles and keep_subs
'already_have_subtitle': opts.writesubtitles and keep_subs,
}
if not opts.writeautomaticsub and keep_subs:
opts.writesubtitles = True
@@ -660,7 +661,7 @@ def get_postprocessors(opts):
'remove_sponsor_segments': opts.sponsorblock_remove,
'remove_ranges': opts.remove_ranges,
'sponsorblock_chapter_title': opts.sponsorblock_chapter_title,
'force_keyframes': opts.force_keyframes_at_cuts
'force_keyframes': opts.force_keyframes_at_cuts,
}
# FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
# FFmpegExtractAudioPP as containers before conversion may not support
@@ -694,7 +695,7 @@ def get_postprocessors(opts):
yield {
'key': 'EmbedThumbnail',
# already_have_thumbnail = True prevents the file from being deleted after embedding
'already_have_thumbnail': opts.writethumbnail
'already_have_thumbnail': opts.writethumbnail,
}
if not opts.writethumbnail:
opts.writethumbnail = True
@@ -741,7 +742,7 @@ def parse_options(argv=None):
print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:])
any_getting = any(getattr(opts, k) for k in (
'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename',
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl'
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl',
))
if opts.quiet is None:
opts.quiet = any_getting or opts.print_json or bool(opts.forceprint)
@@ -1002,7 +1003,7 @@ def _real_main(argv=None):
def make_row(target, handler):
return [
join_nonempty(target.client.title(), target.version, delim='-') or '-',
join_nonempty((target.os or "").title(), target.os_version, delim='-') or '-',
join_nonempty((target.os or '').title(), target.os_version, delim='-') or '-',
handler,
]

View File

@@ -68,7 +68,7 @@ def pad_block(block, padding_mode):
raise NotImplementedError(f'Padding mode {padding_mode} is not implemented')
if padding_mode == 'iso7816' and padding_size:
block = block + [0x80] # NB: += mutates list
block = [*block, 0x80] # NB: += mutates list
padding_size -= 1
return block + [PADDING_BYTE[padding_mode]] * padding_size
@@ -110,9 +110,7 @@ def aes_ecb_decrypt(data, key, iv=None):
for i in range(block_count):
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
encrypted_data += aes_decrypt(block, expanded_key)
encrypted_data = encrypted_data[:len(data)]
return encrypted_data
return encrypted_data[:len(data)]
def aes_ctr_decrypt(data, key, iv):
@@ -148,9 +146,7 @@ def aes_ctr_encrypt(data, key, iv):
cipher_counter_block = aes_encrypt(counter_block, expanded_key)
encrypted_data += xor(block, cipher_counter_block)
encrypted_data = encrypted_data[:len(data)]
return encrypted_data
return encrypted_data[:len(data)]
def aes_cbc_decrypt(data, key, iv):
@@ -174,9 +170,7 @@ def aes_cbc_decrypt(data, key, iv):
decrypted_block = aes_decrypt(block, expanded_key)
decrypted_data += xor(decrypted_block, previous_cipher_block)
previous_cipher_block = block
decrypted_data = decrypted_data[:len(data)]
return decrypted_data
return decrypted_data[:len(data)]
def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'):
@@ -224,7 +218,7 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key))
if len(nonce) == 12:
j0 = nonce + [0, 0, 0, 1]
j0 = [*nonce, 0, 0, 0, 1]
else:
fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8
ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big'))
@@ -242,11 +236,11 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
data
+ [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad
+ bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data
+ ((len(data) * 8).to_bytes(8, 'big'))) # length of data
+ ((len(data) * 8).to_bytes(8, 'big'))), # length of data
)
if tag != aes_ctr_encrypt(s_tag, key, j0):
raise ValueError("Mismatching authentication tag")
raise ValueError('Mismatching authentication tag')
return decrypted_data
@@ -288,9 +282,7 @@ def aes_decrypt(data, expanded_key):
data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV))
data = shift_rows_inv(data)
data = sub_bytes_inv(data)
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
return data
return xor(data, expanded_key[:BLOCK_SIZE_BYTES])
def aes_decrypt_text(data, password, key_size_bytes):
@@ -318,9 +310,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
cipher = data[NONCE_LENGTH_BYTES:]
decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES))
plaintext = intlist_to_bytes(decrypted_data)
return plaintext
return intlist_to_bytes(decrypted_data)
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
@@ -428,9 +418,7 @@ def key_expansion(data):
for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
temp = data[-4:]
data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
data = data[:expanded_key_size_bytes]
return data
return data[:expanded_key_size_bytes]
def iter_vector(iv):
@@ -511,7 +499,7 @@ def block_product(block_x, block_y):
# NIST SP 800-38D, Algorithm 1
if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES:
raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES)
raise ValueError(f'Length of blocks need to be {BLOCK_SIZE_BYTES} bytes')
block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1)
block_v = block_y[:]
@@ -534,7 +522,7 @@ def ghash(subkey, data):
# NIST SP 800-38D, Algorithm 2
if len(data) % BLOCK_SIZE_BYTES:
raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES)
raise ValueError(f'Length of data should be {BLOCK_SIZE_BYTES} bytes')
last_y = [0] * BLOCK_SIZE_BYTES
for i in range(0, len(data), BLOCK_SIZE_BYTES):

View File

@@ -81,10 +81,10 @@ class Cache:
cachedir = self._get_root_dir()
if not any((term in cachedir) for term in ('cache', 'tmp')):
raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
raise Exception(f'Not removing directory {cachedir} - this does not look like a cache dir')
self._ydl.to_screen(
'Removing cache dir %s .' % cachedir, skip_eol=True)
f'Removing cache dir {cachedir} .', skip_eol=True)
if os.path.exists(cachedir):
self._ydl.to_screen('.', skip_eol=True)
shutil.rmtree(cachedir)

View File

@@ -35,7 +35,7 @@ from .compat_utils import passthrough_module
from ..dependencies import brotli as compat_brotli # noqa: F401
from ..dependencies import websockets as compat_websockets # noqa: F401
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
from ..networking.exceptions import HTTPError as compat_HTTPError # noqa: F401
from ..networking.exceptions import HTTPError as compat_HTTPError
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))

View File

@@ -7,6 +7,6 @@ passthrough_module(__name__, 'functools')
del passthrough_module
try:
cache # >= 3.9
_ = cache # >= 3.9
except NameError:
cache = lru_cache(maxsize=None)

View File

@@ -46,7 +46,7 @@ from .utils import (
from .utils._utils import _YDLLogger
from .utils.networking import normalize_url
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
@@ -146,7 +146,7 @@ def _extract_firefox_cookies(profile, container, logger):
identities = json.load(containers).get('identities', [])
container_id = next((context.get('userContextId') for context in identities if container in (
context.get('name'),
try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()),
)), None)
if not isinstance(container_id, int):
raise ValueError(f'could not find firefox container "{container}" in containers.json')
@@ -219,6 +219,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
}[browser_name]
elif sys.platform == 'darwin':
@@ -230,6 +231,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': os.path.join(appdata, 'Microsoft Edge'),
'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
'vivaldi': os.path.join(appdata, 'Vivaldi'),
'whale': os.path.join(appdata, 'Naver/Whale'),
}[browser_name]
else:
@@ -241,6 +243,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': os.path.join(config, 'microsoft-edge'),
'opera': os.path.join(config, 'opera'),
'vivaldi': os.path.join(config, 'vivaldi'),
'whale': os.path.join(config, 'naver-whale'),
}[browser_name]
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
@@ -252,6 +255,7 @@ def _get_chromium_based_browser_settings(browser_name):
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
'whale': 'Whale',
}[browser_name]
browsers_without_profiles = {'opera'}
@@ -259,7 +263,7 @@ def _get_chromium_based_browser_settings(browser_name):
return {
'browser_dir': browser_dir,
'keyring_name': keyring_name,
'supports_profiles': browser_name not in browsers_without_profiles
'supports_profiles': browser_name not in browsers_without_profiles,
}
@@ -347,6 +351,11 @@ def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, pa
if value is None:
return is_encrypted, None
# In chrome, session cookies have expires_utc set to 0
# In our cookie-store, cookies that do not expire should have expires set to None
if not expires_utc:
expires_utc = None
return is_encrypted, http.cookiejar.Cookie(
version=0, name=name, value=value, port=None, port_specified=False,
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
@@ -817,7 +826,7 @@ def _choose_linux_keyring(logger):
elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
linux_keyring = _LinuxKeyring.KWALLET6
elif desktop_environment in (
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER,
):
linux_keyring = _LinuxKeyring.BASICTEXT
else:
@@ -852,7 +861,7 @@ def _get_kwallet_network_wallet(keyring, logger):
'dbus-send', '--session', '--print-reply=literal',
f'--dest={service_name}',
wallet_path,
'org.kde.KWallet.networkWallet'
'org.kde.KWallet.networkWallet',
], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
if returncode:
@@ -882,7 +891,7 @@ def _get_kwallet_password(browser_keyring_name, keyring, logger):
'kwallet-query',
'--read-password', f'{browser_keyring_name} Safe Storage',
'--folder', f'{browser_keyring_name} Keys',
network_wallet
network_wallet,
], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
if returncode:
@@ -922,9 +931,8 @@ def _get_gnome_keyring_password(browser_keyring_name, logger):
for item in col.get_all_items():
if item.get_label() == f'{browser_keyring_name} Safe Storage':
return item.get_secret()
else:
logger.error('failed to read from keyring')
return b''
logger.error('failed to read from keyring')
return b''
def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
@@ -1044,7 +1052,7 @@ def _decrypt_windows_dpapi(ciphertext, logger):
None, # pvReserved: must be NULL
None, # pPromptStruct: information about prompts to display
0, # dwFlags
ctypes.byref(blob_out) # pDataOut
ctypes.byref(blob_out), # pDataOut
)
if not ret:
logger.warning('failed to decrypt with DPAPI', only_once=True)
@@ -1120,24 +1128,24 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
_RESERVED = {
"expires",
"path",
"comment",
"domain",
"max-age",
"secure",
"httponly",
"version",
"samesite",
'expires',
'path',
'comment',
'domain',
'max-age',
'secure',
'httponly',
'version',
'samesite',
}
_FLAGS = {"secure", "httponly"}
_FLAGS = {'secure', 'httponly'}
# Added 'bad' group to catch the remaining value
_COOKIE_PATTERN = re.compile(r"""
_COOKIE_PATTERN = re.compile(r'''
\s* # Optional whitespace at start of cookie
(?P<key> # Start of group 'key'
[""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
[''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter
) # End of group 'key'
( # Optional group: there may not be a value.
\s*=\s* # Equal Sign
@@ -1147,7 +1155,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
| # or
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
| # or
[""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
[''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string
) # End of group 'val'
| # or
(?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
@@ -1155,7 +1163,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
)? # End of optional value group
\s* # Any number of spaces.
(\s+|;|$) # Ending either at space, semicolon, or EOS.
""", re.ASCII | re.VERBOSE)
''', re.ASCII | re.VERBOSE)
def load(self, data):
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
@@ -1251,14 +1259,14 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
# with no name, whereas http.cookiejar regards it as a
# cookie with no value.
name, value = '', name
f.write('%s\n' % '\t'.join((
f.write('{}\n'.format('\t'.join((
cookie.domain,
self._true_or_false(cookie.domain.startswith('.')),
cookie.path,
self._true_or_false(cookie.secure),
str_or_none(cookie.expires, default=''),
name, value
)))
name, value,
))))
def save(self, filename=None, ignore_discard=True, ignore_expires=True):
"""
@@ -1297,10 +1305,10 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
return line
cookie_list = line.split('\t')
if len(cookie_list) != self._ENTRY_LEN:
raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}')
cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit():
raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}')
return line
cf = io.StringIO()

View File

@@ -404,7 +404,7 @@ class FileDownloader:
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
self.to_screen('[download] Resuming download at byte %s' % resume_len)
self.to_screen(f'[download] Resuming download at byte {resume_len}')
def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
"""Report retry"""

View File

@@ -55,7 +55,7 @@ class ExternalFD(FragmentFD):
# correct and expected termination thus all postprocessing
# should take place
retval = 0
self.to_screen('[%s] Interrupted by user' % self.get_basename())
self.to_screen(f'[{self.get_basename()}] Interrupted by user')
finally:
if self._cookies_tempfile:
self.try_remove(self._cookies_tempfile)
@@ -172,7 +172,7 @@ class ExternalFD(FragmentFD):
decrypt_fragment = self.decrypter(info_dict)
dest, _ = self.sanitize_open(tmpfilename, 'wb')
for frag_index, fragment in enumerate(info_dict['fragments']):
fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
fragment_filename = f'{tmpfilename}-Frag{frag_index}'
try:
src, _ = self.sanitize_open(fragment_filename, 'rb')
except OSError as err:
@@ -186,7 +186,7 @@ class ExternalFD(FragmentFD):
if not self.params.get('keep_fragments', False):
self.try_remove(encodeFilename(fragment_filename))
dest.close()
self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename))
self.try_remove(encodeFilename(f'{tmpfilename}.frag.urls'))
return 0
def _call_process(self, cmd, info_dict):
@@ -336,11 +336,11 @@ class Aria2cFD(ExternalFD):
if 'fragments' in info_dict:
cmd += ['--uri-selector=inorder']
url_list_file = '%s.frag.urls' % tmpfilename
url_list_file = f'{tmpfilename}.frag.urls'
url_list = []
for frag_index, fragment in enumerate(info_dict['fragments']):
fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
fragment_filename = f'{os.path.basename(tmpfilename)}-Frag{frag_index}'
url_list.append('{}\n\tout={}'.format(fragment['url'], self._aria2c_filename(fragment_filename)))
stream, _ = self.sanitize_open(url_list_file, 'wb')
stream.write('\n'.join(url_list).encode())
stream.close()
@@ -357,7 +357,7 @@ class Aria2cFD(ExternalFD):
'id': sanitycheck,
'method': method,
'params': [f'token:{rpc_secret}', *params],
}).encode('utf-8')
}).encode()
request = Request(
f'http://localhost:{rpc_port}/jsonrpc',
data=d, headers={
@@ -416,7 +416,7 @@ class Aria2cFD(ExternalFD):
'total_bytes_estimate': total,
'eta': (total - downloaded) / (speed or 1),
'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None,
'elapsed': time.time() - started
'elapsed': time.time() - started,
})
self._hook_progress(status, info_dict)
@@ -509,12 +509,12 @@ class FFmpegFD(ExternalFD):
proxy = self.params.get('proxy')
if proxy:
if not re.match(r'^[\da-zA-Z]+://', proxy):
proxy = 'http://%s' % proxy
proxy = f'http://{proxy}'
if proxy.startswith('socks'):
self.report_warning(
'%s does not support SOCKS proxies. Downloading is likely to fail. '
'Consider adding --hls-prefer-native to your command.' % self.get_basename())
f'{self.get_basename()} does not support SOCKS proxies. Downloading is likely to fail. '
'Consider adding --hls-prefer-native to your command.')
# Since December 2015 ffmpeg supports -http_proxy option (see
# http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
@@ -575,7 +575,7 @@ class FFmpegFD(ExternalFD):
if end_time:
args += ['-t', str(end_time - start_time)]
args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']]
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']]
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
args += ['-c', 'copy']

View File

@@ -67,12 +67,12 @@ class FlvReader(io.BytesIO):
self.read_bytes(3)
quality_entry_count = self.read_unsigned_char()
# QualityEntryCount
for i in range(quality_entry_count):
for _ in range(quality_entry_count):
self.read_string()
segment_run_count = self.read_unsigned_int()
segments = []
for i in range(segment_run_count):
for _ in range(segment_run_count):
first_segment = self.read_unsigned_int()
fragments_per_segment = self.read_unsigned_int()
segments.append((first_segment, fragments_per_segment))
@@ -91,12 +91,12 @@ class FlvReader(io.BytesIO):
quality_entry_count = self.read_unsigned_char()
# QualitySegmentUrlModifiers
for i in range(quality_entry_count):
for _ in range(quality_entry_count):
self.read_string()
fragments_count = self.read_unsigned_int()
fragments = []
for i in range(fragments_count):
for _ in range(fragments_count):
first = self.read_unsigned_int()
first_ts = self.read_unsigned_long_long()
duration = self.read_unsigned_int()
@@ -135,11 +135,11 @@ class FlvReader(io.BytesIO):
self.read_string() # MovieIdentifier
server_count = self.read_unsigned_char()
# ServerEntryTable
for i in range(server_count):
for _ in range(server_count):
self.read_string()
quality_count = self.read_unsigned_char()
# QualityEntryTable
for i in range(quality_count):
for _ in range(quality_count):
self.read_string()
# DrmData
self.read_string()
@@ -148,14 +148,14 @@ class FlvReader(io.BytesIO):
segments_count = self.read_unsigned_char()
segments = []
for i in range(segments_count):
for _ in range(segments_count):
box_size, box_type, box_data = self.read_box_info()
assert box_type == b'asrt'
segment = FlvReader(box_data).read_asrt()
segments.append(segment)
fragments_run_count = self.read_unsigned_char()
fragments = []
for i in range(fragments_run_count):
for _ in range(fragments_run_count):
box_size, box_type, box_data = self.read_box_info()
assert box_type == b'afrt'
fragments.append(FlvReader(box_data).read_afrt())
@@ -309,7 +309,7 @@ class F4mFD(FragmentFD):
def real_download(self, filename, info_dict):
man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr')
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
self.to_screen(f'[{self.FD_NAME}] Downloading f4m manifest')
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.url
@@ -326,8 +326,8 @@ class F4mFD(FragmentFD):
formats = sorted(formats, key=lambda f: f[0])
rate, media = formats[-1]
else:
rate, media = list(filter(
lambda f: int(f[0]) == requested_bitrate, formats))[0]
rate, media = next(filter(
lambda f: int(f[0]) == requested_bitrate, formats))
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
man_base_url = get_base_url(doc) or man_url

View File

@@ -199,7 +199,7 @@ class FragmentFD(FileDownloader):
'.ytdl file is corrupt' if is_corrupt else
'Inconsistent state of incomplete fragment download')
self.report_warning(
'%s. Restarting from the beginning ...' % message)
f'{message}. Restarting from the beginning ...')
ctx['fragment_index'] = resume_len = 0
if 'ytdl_corrupt' in ctx:
del ctx['ytdl_corrupt']
@@ -366,10 +366,10 @@ class FragmentFD(FileDownloader):
return decrypt_fragment
def download_and_append_fragments_multiple(self, *args, **kwargs):
'''
"""
@params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ...
all args must be either tuple or list
'''
"""
interrupt_trigger = [True]
max_progress = len(args)
if max_progress == 1:
@@ -424,7 +424,7 @@ class FragmentFD(FileDownloader):
finally:
tpe.shutdown(wait=True)
if not interrupt_trigger[0] and not is_live:
raise KeyboardInterrupt()
raise KeyboardInterrupt
# we expect the user wants to stop and DO WANT the preceding postprocessors to run;
# so returning a intermediate result here instead of KeyboardInterrupt on live
return result

View File

@@ -72,7 +72,7 @@ class HlsFD(FragmentFD):
def real_download(self, filename, info_dict):
man_url = info_dict['url']
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest')
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.url
@@ -228,7 +228,7 @@ class HlsFD(FragmentFD):
'url': frag_url,
'decrypt_info': decrypt_info,
'byte_range': byte_range,
'media_sequence': media_sequence
'media_sequence': media_sequence,
})
media_sequence += 1
@@ -350,9 +350,8 @@ class HlsFD(FragmentFD):
# XXX: this should probably be silent as well
# or verify that all segments contain the same data
self.report_warning(bug_reports_message(
'Discarding a %s block found in the middle of the stream; '
'if the subtitles display incorrectly,'
% (type(block).__name__)))
f'Discarding a {type(block).__name__} block found in the middle of the stream; '
'if the subtitles display incorrectly,'))
continue
block.write_into(output)

View File

@@ -176,7 +176,7 @@ class HttpFD(FileDownloader):
'downloaded_bytes': ctx.resume_len,
'total_bytes': ctx.resume_len,
}, info_dict)
raise SucceedDownload()
raise SucceedDownload
else:
# The length does not match, we start the download over
self.report_unable_to_resume()
@@ -194,7 +194,7 @@ class HttpFD(FileDownloader):
def close_stream():
if ctx.stream is not None:
if not ctx.tmpfilename == '-':
if ctx.tmpfilename != '-':
ctx.stream.close()
ctx.stream = None
@@ -268,20 +268,20 @@ class HttpFD(FileDownloader):
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
self.report_destination(ctx.filename)
except OSError as err:
self.report_error('unable to open for writing: %s' % str(err))
self.report_error(f'unable to open for writing: {err}')
return False
if self.params.get('xattr_set_filesize', False) and data_len is not None:
try:
write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode())
except (XAttrUnavailableError, XAttrMetadataError) as err:
self.report_error('unable to set filesize xattr: %s' % str(err))
self.report_error(f'unable to set filesize xattr: {err}')
try:
ctx.stream.write(data_block)
except OSError as err:
self.to_stderr('\n')
self.report_error('unable to write data: %s' % str(err))
self.report_error(f'unable to write data: {err}')
return False
# Apply rate limit
@@ -327,7 +327,7 @@ class HttpFD(FileDownloader):
elif now - ctx.throttle_start > 3:
if ctx.stream is not None and ctx.tmpfilename != '-':
ctx.stream.close()
raise ThrottledDownload()
raise ThrottledDownload
elif speed:
ctx.throttle_start = None
@@ -338,7 +338,7 @@ class HttpFD(FileDownloader):
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
ctx.resume_len = byte_counter
raise NextFragment()
raise NextFragment
if ctx.tmpfilename != '-':
ctx.stream.close()

View File

@@ -251,7 +251,7 @@ class IsmFD(FragmentFD):
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
frag_index = 0
for i, segment in enumerate(segments):
for segment in segments:
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue

View File

@@ -10,7 +10,7 @@ from ..version import __version__ as YT_DLP_VERSION
class MhtmlFD(FragmentFD):
_STYLESHEET = """\
_STYLESHEET = '''\
html, body {
margin: 0;
padding: 0;
@@ -45,7 +45,7 @@ body > figure > img {
max-width: 100%;
max-height: calc(100vh - 5em);
}
"""
'''
_STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET)
_STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET)
@@ -57,24 +57,19 @@ body > figure > img {
)).decode('us-ascii') + '?='
def _gen_cid(self, i, fragment, frag_boundary):
return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary)
return f'{i}.{frag_boundary}@yt-dlp.github.io.invalid'
def _gen_stub(self, *, fragments, frag_boundary, title):
output = io.StringIO()
output.write((
output.write(
'<!DOCTYPE html>'
'<html>'
'<head>'
'' '<meta name="generator" content="yt-dlp {version}">'
'' '<title>{title}</title>'
'' '<style>{styles}</style>'
'<body>'
).format(
version=escapeHTML(YT_DLP_VERSION),
styles=self._STYLESHEET,
title=escapeHTML(title)
))
f'<meta name="generator" content="yt-dlp {escapeHTML(YT_DLP_VERSION)}">'
f'<title>{escapeHTML(title)}</title>'
f'<style>{self._STYLESHEET}</style>'
'<body>')
t0 = 0
for i, frag in enumerate(fragments):
@@ -87,15 +82,12 @@ body > figure > img {
num=i + 1,
t0=srt_subtitles_timecode(t0),
t1=srt_subtitles_timecode(t1),
duration=formatSeconds(frag['duration'], msec=True)
duration=formatSeconds(frag['duration'], msec=True),
))
except (KeyError, ValueError, TypeError):
t1 = None
output.write((
'<figcaption>Slide #{num}</figcaption>'
).format(num=i + 1))
output.write('<img src="cid:{cid}">'.format(
cid=self._gen_cid(i, frag, frag_boundary)))
output.write(f'<figcaption>Slide #{i + 1}</figcaption>')
output.write(f'<img src="cid:{self._gen_cid(i, frag, frag_boundary)}">')
output.write('</figure>')
t0 = t1
@@ -126,31 +118,24 @@ body > figure > img {
stub = self._gen_stub(
fragments=fragments,
frag_boundary=frag_boundary,
title=title
title=title,
)
ctx['dest_stream'].write((
'MIME-Version: 1.0\r\n'
'From: <nowhere@yt-dlp.github.io.invalid>\r\n'
'To: <nowhere@yt-dlp.github.io.invalid>\r\n'
'Subject: {title}\r\n'
f'Subject: {self._escape_mime(title)}\r\n'
'Content-type: multipart/related; '
'' 'boundary="{boundary}"; '
'' 'type="text/html"\r\n'
'X.yt-dlp.Origin: {origin}\r\n'
f'boundary="{frag_boundary}"; '
'type="text/html"\r\n'
f'X.yt-dlp.Origin: {origin}\r\n'
'\r\n'
'--{boundary}\r\n'
f'--{frag_boundary}\r\n'
'Content-Type: text/html; charset=utf-8\r\n'
'Content-Length: {length}\r\n'
f'Content-Length: {len(stub)}\r\n'
'\r\n'
'{stub}\r\n'
).format(
origin=origin,
boundary=frag_boundary,
length=len(stub),
title=self._escape_mime(title),
stub=stub
).encode())
f'{stub}\r\n').encode())
extra_state['header_written'] = True
for i, fragment in enumerate(fragments):

View File

@@ -15,7 +15,7 @@ class NiconicoDmcFD(FileDownloader):
def real_download(self, filename, info_dict):
from ..extractor.niconico import NiconicoIE
self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
self.to_screen(f'[{self.FD_NAME}] Downloading from DMC')
ie = NiconicoIE(self.ydl)
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
@@ -34,7 +34,7 @@ class NiconicoDmcFD(FileDownloader):
try:
self.ydl.urlopen(request).read()
except Exception:
self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
self.to_screen(f'[{self.FD_NAME}] Heartbeat failed')
with heartbeat_lock:
if not download_complete:
@@ -85,14 +85,14 @@ class NiconicoLiveFD(FileDownloader):
'quality': live_quality,
'protocol': 'hls+fmp4',
'latency': live_latency,
'chasePlay': False
'chasePlay': False,
},
'room': {
'protocol': 'webSocket',
'commentable': True
'commentable': True,
},
'reconnect': True,
}
},
}))
else:
ws = ws_extractor
@@ -118,7 +118,7 @@ class NiconicoLiveFD(FileDownloader):
elif self.ydl.params.get('verbose', False):
if len(recv) > 100:
recv = recv[:100] + '...'
self.to_screen('[debug] Server said: %s' % recv)
self.to_screen(f'[debug] Server said: {recv}')
def ws_main():
reconnect = False
@@ -128,7 +128,7 @@ class NiconicoLiveFD(FileDownloader):
if ret is True:
return
except BaseException as e:
self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e)))
self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e)))
time.sleep(10)
continue
finally:

View File

@@ -180,9 +180,9 @@ class RtmpFD(FileDownloader):
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes')
time.sleep(5.0) # This seems to be needed
args = basic_args + ['--resume']
args = [*basic_args, '--resume']
if retval == RD_FAILED:
args += ['--skip', '1']
args = [encodeArgument(a) for a in args]
@@ -197,7 +197,7 @@ class RtmpFD(FileDownloader):
break
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes')
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,

View File

@@ -18,7 +18,7 @@ class YoutubeLiveChatFD(FragmentFD):
def real_download(self, filename, info_dict):
video_id = info_dict['video_id']
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
self.to_screen(f'[{self.FD_NAME}] Downloading live chat')
if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
self.report_warning('Live chat download runs until the livestream ends. '
'If you wish to download the video simultaneously, run a separate yt-dlp instance')

File diff suppressed because it is too large Load Diff

View File

@@ -4,12 +4,11 @@ import re
import time
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
dict_get,
ExtractorError,
js_to_json,
dict_get,
int_or_none,
js_to_json,
parse_iso8601,
str_or_none,
traverse_obj,
@@ -67,7 +66,7 @@ class ABCIE(InfoExtractor):
'ext': 'mp4',
'title': 'WWI Centenary',
'description': 'md5:c2379ec0ca84072e86b446e536954546',
}
},
}, {
'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074',
'info_dict': {
@@ -75,7 +74,7 @@ class ABCIE(InfoExtractor):
'ext': 'mp4',
'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia',
'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f',
}
},
}, {
'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476',
'info_dict': {
@@ -86,7 +85,7 @@ class ABCIE(InfoExtractor):
'upload_date': '20200813',
'uploader': 'Behind the News',
'uploader_id': 'behindthenews',
}
},
}, {
'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540',
'info_dict': {
@@ -95,7 +94,7 @@ class ABCIE(InfoExtractor):
'ext': 'mp4',
'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.',
'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485',
}
},
}]
def _real_extract(self, url):
@@ -126,7 +125,7 @@ class ABCIE(InfoExtractor):
if mobj is None:
expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
if expired:
raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
raise ExtractorError(f'{self.IE_NAME} said: {expired}', expected=True)
raise ExtractorError('Unable to extract video urls')
urls_info = self._parse_json(
@@ -164,7 +163,7 @@ class ABCIE(InfoExtractor):
'height': height,
'tbr': bitrate,
'filesize': int_or_none(url_info.get('filesize')),
'format_id': format_id
'format_id': format_id,
})
return {
@@ -288,13 +287,12 @@ class ABCIViewIE(InfoExtractor):
stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
house_number = video_params.get('episodeHouseNumber') or video_id
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
int(time.time()), house_number)
path = f'/auth/hls/sign?ts={int(time.time())}&hn={house_number}&d=android-tablet'
sig = hmac.new(
b'android.content.res.Resources',
path.encode('utf-8'), hashlib.sha256).hexdigest()
path.encode(), hashlib.sha256).hexdigest()
token = self._download_webpage(
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
f'http://iview.abc.net.au{path}&sig={sig}', video_id)
def tokenize_url(url, token):
return update_url_query(url, {
@@ -303,7 +301,7 @@ class ABCIViewIE(InfoExtractor):
for sd in ('1080', '720', 'sd', 'sd-low'):
sd_url = try_get(
stream, lambda x: x['streams']['hls'][sd], compat_str)
stream, lambda x: x['streams']['hls'][sd], str)
if not sd_url:
continue
formats = self._extract_m3u8_formats(
@@ -358,7 +356,7 @@ class ABCIViewShowSeriesIE(InfoExtractor):
'description': 'md5:93119346c24a7c322d446d8eece430ff',
'series': 'Upper Middle Bogan',
'season': 'Series 1',
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$'
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
},
'playlist_count': 8,
}, {
@@ -386,7 +384,7 @@ class ABCIViewShowSeriesIE(InfoExtractor):
'description': 'Satirist Mark Humphries brings his unique perspective on current political events for 7.30.',
'series': '7.30 Mark Humphries Satire',
'season': 'Episodes',
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$'
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
},
'playlist_count': 15,
}]
@@ -398,7 +396,7 @@ class ABCIViewShowSeriesIE(InfoExtractor):
r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;',
webpage, 'initial state')
video_data = self._parse_json(
unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id)
unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id)
video_data = video_data['route']['pageData']['_embedded']
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])

View File

@@ -58,7 +58,7 @@ class AbcNewsVideoIE(AMPIE):
display_id = mobj.group('display_id')
video_id = mobj.group('id')
info_dict = self._extract_feed_info(
'http://abcnews.go.com/video/itemfeed?id=%s' % video_id)
f'http://abcnews.go.com/video/itemfeed?id={video_id}')
info_dict.update({
'id': video_id,
'display_id': display_id,

View File

@@ -1,5 +1,4 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
dict_get,
int_or_none,
@@ -57,11 +56,11 @@ class ABCOTVSIE(InfoExtractor):
data = self._download_json(
'https://api.abcotvs.com/v2/content', display_id, query={
'id': video_id,
'key': 'otv.web.%s.story' % station,
'key': f'otv.web.{station}.story',
'station': station,
})['data']
video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data
video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id))
video_id = str(dict_get(video, ('id', 'publishedKey'), video_id))
title = video.get('title') or video['linkText']
formats = []

View File

@@ -12,20 +12,21 @@ import urllib.parse
import urllib.request
import urllib.response
import uuid
from ..utils.networking import clean_proxies
from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
from ..utils import (
ExtractorError,
OnDemandPagedList,
bytes_to_intlist,
decode_base_n,
int_or_none,
intlist_to_bytes,
OnDemandPagedList,
time_seconds,
traverse_obj,
update_url_query,
)
from ..utils.networking import clean_proxies
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
@@ -65,8 +66,8 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
query={'t': media_token},
data=json.dumps({
'kv': 'a',
'lt': ticket
}).encode('utf-8'),
'lt': ticket,
}).encode(),
headers={
'Content-Type': 'application/json',
})
@@ -76,7 +77,7 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
h = hmac.new(
binascii.unhexlify(self.HKEY),
(license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
(license_response['cid'] + self.ie._DEVICE_ID).encode(),
digestmod=hashlib.sha256)
enckey = bytes_to_intlist(h.digest())
@@ -102,11 +103,11 @@ class AbemaTVBaseIE(InfoExtractor):
@classmethod
def _generate_aks(cls, deviceid):
deviceid = deviceid.encode('utf-8')
deviceid = deviceid.encode()
# add 1 hour and then drop minute and secs
ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
time_struct = time.gmtime(ts_1hour)
ts_1hour_str = str(ts_1hour).encode('utf-8')
ts_1hour_str = str(ts_1hour).encode()
tmp = None
@@ -118,7 +119,7 @@ class AbemaTVBaseIE(InfoExtractor):
def mix_tmp(count):
nonlocal tmp
for i in range(count):
for _ in range(count):
mix_once(tmp)
def mix_twist(nonce):
@@ -159,7 +160,7 @@ class AbemaTVBaseIE(InfoExtractor):
data=json.dumps({
'deviceId': self._DEVICE_ID,
'applicationKeySecret': aks,
}).encode('utf-8'),
}).encode(),
headers={
'Content-Type': 'application/json',
})
@@ -179,7 +180,7 @@ class AbemaTVBaseIE(InfoExtractor):
'osLang': 'ja_JP',
'osTimezone': 'Asia/Tokyo',
'appId': 'tv.abema',
'appVersion': '3.27.1'
'appVersion': '3.27.1',
}, headers={
'Authorization': f'bearer {self._get_device_token()}',
})['token']
@@ -201,8 +202,8 @@ class AbemaTVBaseIE(InfoExtractor):
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
data=json.dumps({
method: username,
'password': password
}).encode('utf-8'), headers={
'password': password,
}).encode(), headers={
'Authorization': f'bearer {self._get_device_token()}',
'Origin': 'https://abema.tv',
'Referer': 'https://abema.tv/',
@@ -343,7 +344,7 @@ class AbemaTVIE(AbemaTVBaseIE):
description = self._html_search_regex(
(r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div'),
webpage, 'description', default=None, group=1)
if not description:
og_desc = self._html_search_meta(

View File

@@ -67,7 +67,7 @@ class ACastIE(ACastBaseIE):
'display_id': '2.raggarmordet-rosterurdetforflutna',
'season_number': 4,
'season': 'Season 4',
}
},
}, {
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
'only_matching': True,
@@ -93,13 +93,13 @@ class ACastIE(ACastBaseIE):
'series': 'Democracy Sausage with Mark Kenny',
'timestamp': 1684826362,
'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16',
}
},
}]
def _real_extract(self, url):
channel, display_id = self._match_valid_url(url).groups()
episode = self._call_api(
'%s/episodes/%s' % (channel, display_id),
f'{channel}/episodes/{display_id}',
display_id, {'showInfo': 'true'})
return self._extract_episode(
episode, self._extract_show_info(episode.get('show') or {}))
@@ -130,7 +130,7 @@ class ACastChannelIE(ACastBaseIE):
@classmethod
def suitable(cls, url):
return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
return False if ACastIE.suitable(url) else super().suitable(url)
def _real_extract(self, url):
show_slug = self._match_id(url)

View File

@@ -3,10 +3,10 @@ from ..utils import (
float_or_none,
format_field,
int_or_none,
str_or_none,
traverse_obj,
parse_codecs,
parse_qs,
str_or_none,
traverse_obj,
)
@@ -25,7 +25,7 @@ class AcFunVideoBaseIE(InfoExtractor):
'width': int_or_none(video.get('width')),
'height': int_or_none(video.get('height')),
'tbr': float_or_none(video.get('avgBitrate')),
**parse_codecs(video.get('codecs', ''))
**parse_codecs(video.get('codecs', '')),
})
return {
@@ -77,7 +77,7 @@ class AcFunVideoIE(AcFunVideoBaseIE):
'comment_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg)',
'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17',
}
},
}]
def _real_extract(self, url):

View File

@@ -7,21 +7,20 @@ import time
from .common import InfoExtractor
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import compat_b64decode
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
ass_subtitles_timecode,
bytes_to_intlist,
bytes_to_long,
ExtractorError,
float_or_none,
int_or_none,
intlist_to_bytes,
long_to_bytes,
parse_iso8601,
pkcs1pad,
strip_or_none,
str_or_none,
strip_or_none,
try_get,
unified_strdate,
urlencode_postdata,
@@ -111,9 +110,9 @@ class ADNIE(ADNBaseIE):
# http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
compat_b64decode(enc_subtitles[24:]),
base64.b64decode(enc_subtitles[24:]),
binascii.unhexlify(self._K + '7fac1178830cfe0c'),
compat_b64decode(enc_subtitles[:24])))
base64.b64decode(enc_subtitles[:24])))
subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False)
if not subtitles_json:
return None
@@ -136,7 +135,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
if start is None or end is None or text is None:
continue
alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0)
ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % (
ssa += os.linesep + 'Dialogue: Marked=0,{},{},Default,,0,0,0,,{}{}'.format(
ass_subtitles_timecode(start),
ass_subtitles_timecode(end),
'{\\a%d}' % alignment if alignment != 2 else '',
@@ -178,7 +177,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
def _real_extract(self, url):
lang, video_id = self._match_valid_url(url).group('lang', 'id')
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/'
player = self._download_json(
video_base_url + 'configuration', video_id,
'Downloading player config JSON metadata',
@@ -219,12 +218,12 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
links_url, video_id, 'Downloading links JSON metadata', headers={
'X-Player-Token': authorization,
'X-Target-Distribution': lang,
**self._HEADERS
**self._HEADERS,
}, query={
'freeWithAds': 'true',
'adaptive': 'false',
'withMetadata': 'true',
'source': 'Web'
'source': 'Web',
})
break
except ExtractorError as e:
@@ -256,7 +255,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
for quality, load_balancer_url in qualities.items():
load_balancer_data = self._download_json(
load_balancer_url, video_id,
'Downloading %s %s JSON metadata' % (format_id, quality),
f'Downloading {format_id} {quality} JSON metadata',
fatal=False) or {}
m3u8_url = load_balancer_data.get('location')
if not m3u8_url:
@@ -276,7 +275,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
self.raise_login_required('This video requires a subscription', method='password')
video = (self._download_json(
self._API_BASE_URL + 'video/%s' % video_id, video_id,
self._API_BASE_URL + f'video/{video_id}', video_id,
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
show = video.get('show') or {}
@@ -320,7 +319,7 @@ class ADNSeasonIE(ADNBaseIE):
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
'Downloading episode list', headers={
'X-Target-Distribution': lang,
**self._HEADERS
**self._HEADERS,
}, query={
'order': 'asc',
'limit': '-1',

View File

@@ -1,8 +1,6 @@
import urllib.parse
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urlparse,
)
class AdobeConnectIE(InfoExtractor):
@@ -12,13 +10,13 @@ class AdobeConnectIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._html_extract_title(webpage)
qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
qs = urllib.parse.parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
is_live = qs.get('isLive', ['false'])[0] == 'true'
formats = []
for con_string in qs['conStrings'][0].split(','):
formats.append({
'format_id': con_string.split('://')[0],
'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
'app': urllib.parse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
'ext': 'flv',
'play_path': 'mp4:' + qs['streamName'][0],
'rtmp_conn': 'S:' + qs['ticket'][0],

File diff suppressed because it is too large Load Diff

View File

@@ -2,13 +2,12 @@ import functools
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ISO639Utils,
OnDemandPagedList,
float_or_none,
int_or_none,
ISO639Utils,
join_nonempty,
OnDemandPagedList,
parse_duration,
str_or_none,
str_to_int,
@@ -36,7 +35,7 @@ class AdobeTVBaseIE(InfoExtractor):
return subtitles
def _parse_video_data(self, video_data):
video_id = compat_str(video_data['id'])
video_id = str(video_data['id'])
title = video_data['title']
s3_extracted = False
@@ -151,7 +150,7 @@ class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
page += 1
query['page'] = page
for element_data in self._call_api(
self._RESOURCE, display_id, query, 'Download Page %d' % page):
self._RESOURCE, display_id, query, f'Download Page {page}'):
yield self._process_data(element_data)
def _extract_playlist_entries(self, display_id, query):

View File

@@ -91,7 +91,7 @@ class AdultSwimIE(TurnerBaseIE):
getShowBySlug(slug:"%s") {
%%s
}
}''' % show_path
}''' % show_path # noqa: UP031
if episode_path:
query = query % '''title
getVideoBySlug(slug:"%s") {
@@ -128,7 +128,7 @@ class AdultSwimIE(TurnerBaseIE):
episode_title = title = video_data['title']
series = show_data.get('title')
if series:
title = '%s - %s' % (series, title)
title = f'{series} - {title}'
info = {
'id': video_id,
'title': title,
@@ -191,7 +191,7 @@ class AdultSwimIE(TurnerBaseIE):
if not slug:
continue
entries.append(self.url_result(
'http://adultswim.com/videos/%s/%s' % (show_path, slug),
f'http://adultswim.com/videos/{show_path}/{slug}',
'AdultSwim', video.get('_id')))
return self.playlist_result(
entries, show_path, show_data.get('title'),

View File

@@ -73,8 +73,8 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
def _extract_aetn_info(self, domain, filter_key, filter_value, url):
requestor_id, brand = self._DOMAIN_MAP[domain]
result = self._download_json(
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
filter_value, query={'filter[%s]' % filter_key: filter_value})
f'https://feeds.video.aetnd.com/api/v2/{brand}/videos',
filter_value, query={f'filter[{filter_key}]': filter_value})
result = traverse_obj(
result, ('results',
lambda k, v: k == 0 and v[filter_key] == filter_value),
@@ -142,7 +142,7 @@ class AENetworksIE(AENetworksBaseIE):
'skip_download': True,
},
'add_ie': ['ThePlatform'],
'skip': 'Geo-restricted - This content is not available in your location.'
'skip': 'Geo-restricted - This content is not available in your location.',
}, {
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
'info_dict': {
@@ -171,28 +171,28 @@ class AENetworksIE(AENetworksBaseIE):
'skip': 'This video is only available for users of participating TV providers.',
}, {
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
'only_matching': True
'only_matching': True,
}, {
'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
'only_matching': True
'only_matching': True,
}, {
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
'only_matching': True
'only_matching': True,
}, {
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
'only_matching': True
'only_matching': True,
}, {
'url': 'http://www.history.com/videos/history-of-valentines-day',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
'only_matching': True
'only_matching': True,
}]
def _real_extract(self, url):
@@ -209,14 +209,14 @@ class AENetworksListBaseIE(AENetworksBaseIE):
%s(slug: "%s") {
%s
}
}''' % (resource, slug, fields),
}''' % (resource, slug, fields), # noqa: UP031
}))['data'][resource]
def _real_extract(self, url):
domain, slug = self._match_valid_url(url).groups()
_, brand = self._DOMAIN_MAP[domain]
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
base_url = 'http://watch.%s' % domain
base_url = f'http://watch.{domain}'
entries = []
for item in (playlist.get(self._ITEMS_KEY) or []):
@@ -248,10 +248,10 @@ class AENetworksCollectionIE(AENetworksListBaseIE):
'playlist_mincount': 12,
}, {
'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://www.historyvault.com/collections/mysteryquest',
'only_matching': True
'only_matching': True,
}]
_RESOURCE = 'list'
_ITEMS_KEY = 'items'
@@ -309,7 +309,7 @@ class HistoryTopicIE(AENetworksBaseIE):
'info_dict': {
'id': '40700995724',
'ext': 'mp4',
'title': "History of Valentines Day",
'title': 'History of Valentines Day',
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
'timestamp': 1375819729,
'upload_date': '20130806',
@@ -364,6 +364,6 @@ class BiographyIE(AENetworksBaseIE):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
player_url = self._search_regex(
r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
rf'<phoenix-iframe[^>]+src="({HistoryPlayerIE._VALID_URL})',
webpage, 'player URL')
return self.url_result(player_url, HistoryPlayerIE.ie_key())

View File

@@ -16,8 +16,8 @@ class AeonCoIE(InfoExtractor):
'uploader': 'Semiconductor',
'uploader_id': 'semiconductor',
'uploader_url': 'https://vimeo.com/semiconductor',
'duration': 348
}
'duration': 348,
},
}, {
'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
'md5': '03582d795382e49f2fd0b427b55de409',
@@ -29,8 +29,8 @@ class AeonCoIE(InfoExtractor):
'uploader': 'Aeon Video',
'uploader_id': 'aeonvideo',
'uploader_url': 'https://vimeo.com/aeonvideo',
'duration': 1344
}
'duration': 1344,
},
}, {
'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out',
'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b',

View File

@@ -55,7 +55,7 @@ class AfreecaTVBaseIE(InfoExtractor):
if result != 1:
error = _ERRORS.get(result, 'You have failed to log in.')
raise ExtractorError(
'Unable to login: %s said: %s' % (self.IE_NAME, error),
f'Unable to login: {self.IE_NAME} said: {error}',
expected=True)
@@ -227,7 +227,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
**traverse_obj(file_element, {
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
'timestamp': ('file_start', {unified_timestamp}),
})
}),
})
if traverse_obj(data, ('adult_status', {str})) == 'notLogin':

View File

@@ -168,7 +168,7 @@ class TokFMPodcastIE(InfoExtractor):
for ext in ('aac', 'mp3'):
url_data = self._download_json(
f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}',
media_id, 'Downloading podcast %s URL' % ext)
media_id, f'Downloading podcast {ext} URL')
# prevents inserting the mp3 (default) multiple times
if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']:
formats.append({
@@ -206,8 +206,8 @@ class TokFMAuditionIE(InfoExtractor):
}
@staticmethod
def _create_url(id):
return f'https://audycje.tokfm.pl/audycja/{id}'
def _create_url(video_id):
return f'https://audycje.tokfm.pl/audycja/{video_id}'
def _real_extract(self, url):
audition_id = self._match_id(url)

View File

@@ -5,7 +5,7 @@ from ..utils import (
int_or_none,
mimetype2ext,
parse_iso8601,
traverse_obj
traverse_obj,
)
@@ -26,7 +26,7 @@ class AirTVIE(InfoExtractor):
'view_count': int,
'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg',
'timestamp': 1664792603,
}
},
}, {
# with youtube_id
'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q',
@@ -54,7 +54,7 @@ class AirTVIE(InfoExtractor):
'channel': 'Newsflare',
'duration': 37,
'upload_date': '20180511',
}
},
}]
def _get_formats_and_subtitle(self, json_data, video_id):

View File

@@ -22,7 +22,7 @@ class AitubeKZVideoIE(InfoExtractor):
'timestamp': 1667370519,
'title': 'Ангел хранитель 1 серия',
'channel_follower_count': int,
}
},
}, {
# embed url
'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c',

View File

@@ -1,5 +1,4 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
float_or_none,
try_get,
@@ -44,7 +43,7 @@ class AliExpressLiveIE(InfoExtractor):
'title': title,
'thumbnail': data.get('coverUrl'),
'uploader': try_get(
data, lambda x: x['followBar']['name'], compat_str),
data, lambda x: x['followBar']['name'], str),
'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
'formats': formats,
}

View File

@@ -18,7 +18,7 @@ class AlJazeeraIE(InfoExtractor):
'timestamp': 1636219149,
'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.',
'upload_date': '20211106',
}
},
}, {
'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu',
'info_dict': {
@@ -33,7 +33,7 @@ class AlJazeeraIE(InfoExtractor):
BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P<account>\d+)/(?P<player_id>[a-zA-Z0-9]+)_(?P<embed>[^/]+)/index.html\?videoId=(?P<id>\d+)'
def _real_extract(self, url):
base, post_type, id = self._match_valid_url(url).groups()
base, post_type, display_id = self._match_valid_url(url).groups()
wp = {
'balkans.aljazeera.net': 'ajb',
'chinese.aljazeera.net': 'chinese',
@@ -47,11 +47,11 @@ class AlJazeeraIE(InfoExtractor):
'news': 'news',
}[post_type.split('/')[0]]
video = self._download_json(
f'https://{base}/graphql', id, query={
f'https://{base}/graphql', display_id, query={
'wp-site': wp,
'operationName': 'ArchipelagoSingleArticleQuery',
'variables': json.dumps({
'name': id,
'name': display_id,
'postType': post_type,
}),
}, headers={
@@ -64,7 +64,7 @@ class AlJazeeraIE(InfoExtractor):
embed = 'default'
if video_id is None:
webpage = self._download_webpage(url, id)
webpage = self._download_webpage(url, display_id)
account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id',
group=(1, 2, 3, 4), default=(None, None, None, None))
@@ -73,11 +73,11 @@ class AlJazeeraIE(InfoExtractor):
return {
'_type': 'url_transparent',
'url': url,
'ie_key': 'Generic'
'ie_key': 'Generic',
}
return {
'_type': 'url_transparent',
'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}',
'ie_key': 'BrightcoveNew'
'ie_key': 'BrightcoveNew',
}

View File

@@ -1,5 +1,4 @@
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
qualities,
@@ -95,11 +94,11 @@ class AllocineIE(InfoExtractor):
duration = int_or_none(video.get('duration'))
view_count = int_or_none(video.get('view_count'))
timestamp = unified_timestamp(try_get(
video, lambda x: x['added_at']['date'], compat_str))
video, lambda x: x['added_at']['date'], str))
else:
video_id = display_id
media_data = self._download_json(
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
f'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media={video_id}', display_id)
title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné'))
for key, value in media_data['video'].items():
if not key.endswith('Path'):

View File

@@ -12,7 +12,6 @@ from ..utils import (
)
from ..utils.traversal import traverse_obj
_FIELDS = '''
_id
clipImageSource
@@ -34,27 +33,27 @@ _QUERIES = {
video: getClip(clipIdentifier: $id) {
%s %s
}
}''' % (_FIELDS, _EXTRA_FIELDS),
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
'montage': '''query ($id: String!) {
video: getMontage(clipIdentifier: $id) {
%s
}
}''' % _FIELDS,
}''' % _FIELDS, # noqa: UP031
'Clips': '''query ($page: Int!, $user: String!, $game: Int) {
videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) {
data { %s %s }
}
}''' % (_FIELDS, _EXTRA_FIELDS),
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
'Montages': '''query ($page: Int!, $user: String!) {
videos: montages(search: createdDate, page: $page, user: $user) {
data { %s }
}
}''' % _FIELDS,
}''' % _FIELDS, # noqa: UP031
'Mobile Clips': '''query ($page: Int!, $user: String!) {
videos: clips(search: createdDate, page: $page, user: $user, mobile: true) {
data { %s %s }
}
}''' % (_FIELDS, _EXTRA_FIELDS),
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
}
@@ -122,7 +121,7 @@ class AllstarIE(AllstarBaseIE):
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230425',
'view_count': int,
}
},
}, {
'url': 'https://allstar.gg/clip?clip=8LJLY4JKB',
'info_dict': {
@@ -140,7 +139,7 @@ class AllstarIE(AllstarBaseIE):
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230702',
'view_count': int,
}
},
}, {
'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c',
'info_dict': {
@@ -156,7 +155,7 @@ class AllstarIE(AllstarBaseIE):
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230418',
'view_count': int,
}
},
}, {
'url': 'https://allstar.gg/montage?montage=RILJMH6QOS',
'info_dict': {
@@ -172,7 +171,7 @@ class AllstarIE(AllstarBaseIE):
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
'upload_date': '20230703',
'view_count': int,
}
},
}]
def _real_extract(self, url):
@@ -192,28 +191,28 @@ class AllstarProfileIE(AllstarBaseIE):
'id': '62b8bdfc9021052f7905882d-clips',
'title': 'cherokee - Clips',
},
'playlist_mincount': 15
'playlist_mincount': 15,
}, {
'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-clips-730',
'title': 'cherokee - Clips - 730',
},
'playlist_mincount': 15
'playlist_mincount': 15,
}, {
'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-montages',
'title': 'cherokee - Montages',
},
'playlist_mincount': 4
'playlist_mincount': 4,
}, {
'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips',
'info_dict': {
'id': '62b8bdfc9021052f7905882d-mobile',
'title': 'cherokee - Mobile Clips',
},
'playlist_mincount': 1
'playlist_mincount': 1,
}]
_PAGE_SIZE = 10

View File

@@ -1,9 +1,9 @@
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
int_or_none,
parse_duration,
parse_filesize,
int_or_none,
parse_iso8601,
)
@@ -25,7 +25,7 @@ class AlphaPornoIE(InfoExtractor):
'tbr': 1145,
'categories': list,
'age_limit': 18,
}
},
}
def _real_extract(self, url):

View File

@@ -12,7 +12,7 @@ from ..utils import (
class Alsace20TVBaseIE(InfoExtractor):
def _extract_video(self, video_id, url=None):
info = self._download_json(
'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
f'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key={video_id}&habillage=0&mode=html',
video_id) or {}
title = info.get('titre')
@@ -24,9 +24,9 @@ class Alsace20TVBaseIE(InfoExtractor):
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
thumbnail = url_or_none(dict_get(info, ('image', 'preview')) or self._og_search_thumbnail(webpage))
upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
upload_date = unified_strdate(f'20{upload_date[:2]}-{upload_date[2:4]}-{upload_date[4:]}') if upload_date else None
return {
'id': video_id,
'title': title,

View File

@@ -34,7 +34,7 @@ class AltCensoredIE(InfoExtractor):
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
'view_count': int,
'categories': ['News & Politics'],
}
},
}]
def _real_extract(self, url):

View File

@@ -1,17 +1,13 @@
import re
import urllib.parse
from .common import InfoExtractor
from ..compat import (
compat_urlparse,
)
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
urlencode_postdata,
urljoin,
int_or_none,
clean_html,
ExtractorError
)
@@ -25,7 +21,7 @@ class AluraIE(InfoExtractor):
'info_dict': {
'id': '60095',
'ext': 'mp4',
'title': 'Referências, ref-set e alter'
'title': 'Referências, ref-set e alter',
},
'skip': 'Requires alura account credentials'},
{
@@ -34,12 +30,12 @@ class AluraIE(InfoExtractor):
'only_matching': True},
{
'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219',
'only_matching': True}
'only_matching': True},
]
def _real_extract(self, url):
course, video_id = self._match_valid_url(url)
course, video_id = self._match_valid_url(url).group('course_name', 'id')
video_url = self._VIDEO_URL % (course, video_id)
video_dict = self._download_json(video_url, video_id, 'Searching for videos')
@@ -52,7 +48,7 @@ class AluraIE(InfoExtractor):
formats = []
for video_obj in video_dict:
video_url_m3u8 = video_obj.get('link')
video_url_m3u8 = video_obj.get('mp4')
video_format = self._extract_m3u8_formats(
video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
@@ -66,7 +62,7 @@ class AluraIE(InfoExtractor):
return {
'id': video_id,
'title': video_title,
"formats": formats
'formats': formats,
}
def _perform_login(self, username, password):
@@ -95,7 +91,7 @@ class AluraIE(InfoExtractor):
'post url', default=self._LOGIN_URL, group='url')
if not post_url.startswith('http'):
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url)
response = self._download_webpage(
post_url, None, 'Logging in',
@@ -107,7 +103,7 @@ class AluraIE(InfoExtractor):
r'(?s)<p[^>]+class="alert-message[^"]*">(.+?)</p>',
response, 'error message', default=None)
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError(f'Unable to login: {error}', expected=True)
raise ExtractorError('Unable to log in')
@@ -123,7 +119,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
@classmethod
def suitable(cls, url):
return False if AluraIE.suitable(url) else super(AluraCourseIE, cls).suitable(url)
return False if AluraIE.suitable(url) else super().suitable(url)
def _real_extract(self, url):
@@ -161,7 +157,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
'url': video_url,
'id_key': self.ie_key(),
'chapter': chapter,
'chapter_number': chapter_number
'chapter_number': chapter_number,
}
entries.append(entry)
return self.playlist_result(entries, course_path, course_title)

View File

@@ -24,7 +24,7 @@ class AmadeusTVIE(InfoExtractor):
'display_id': '65091a87ff85af59d9fc54c3',
'view_count': int,
'description': 'md5:a0357b9c215489e2067cbae0b777bb95',
}
},
}]
def _real_extract(self, url):

View File

@@ -1,6 +1,6 @@
from .common import InfoExtractor
from .youtube import YoutubeIE
from .vimeo import VimeoIE
from .youtube import YoutubeIE
from ..utils import (
int_or_none,
parse_iso8601,
@@ -25,7 +25,7 @@ class AmaraIE(InfoExtractor):
'uploader': 'PBS NewsHour',
'uploader_id': 'PBSNewsHour',
'timestamp': 1549639570,
}
},
}, {
# Vimeo
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
@@ -40,8 +40,8 @@ class AmaraIE(InfoExtractor):
'timestamp': 1294763658,
'upload_date': '20110111',
'uploader': 'Sam Morrill',
'uploader_id': 'sammorrill'
}
'uploader_id': 'sammorrill',
},
}, {
# Direct Link
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
@@ -55,13 +55,13 @@ class AmaraIE(InfoExtractor):
'subtitles': dict,
'upload_date': '20091007',
'timestamp': 1254942511,
}
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
meta = self._download_json(
'https://amara.org/api/videos/%s/' % video_id,
f'https://amara.org/api/videos/{video_id}/',
video_id, query={'format': 'json'})
title = meta['title']
video_url = meta['all_urls'][0]

View File

@@ -61,13 +61,13 @@ class AmazonStoreIE(InfoExtractor):
}]
def _real_extract(self, url):
id = self._match_id(url)
playlist_id = self._match_id(url)
for retry in self.RetryManager():
webpage = self._download_webpage(url, id)
webpage = self._download_webpage(url, playlist_id)
try:
data_json = self._search_json(
r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id,
r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', playlist_id,
transform_source=js_to_json)
except ExtractorError as e:
retry.error = e
@@ -81,7 +81,7 @@ class AmazonStoreIE(InfoExtractor):
'height': int_or_none(video.get('videoHeight')),
'width': int_or_none(video.get('videoWidth')),
} for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')]
return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title'))
return self.playlist_result(entries, playlist_id=playlist_id, playlist_title=data_json.get('title'))
class AmazonReviewsIE(InfoExtractor):

View File

@@ -25,7 +25,7 @@ class AmazonMiniTVBaseIE(InfoExtractor):
asin, note=note, headers={
'Content-Type': 'application/json',
'currentpageurl': '/',
'currentplatform': 'dWeb'
'currentplatform': 'dWeb',
}, data=json.dumps(data).encode() if data else None,
query=None if data else {
'deviceType': 'A1WMMUXPCUJL4N',

View File

@@ -64,8 +64,8 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
site, display_id = self._match_valid_url(url).groups()
requestor_id = self._REQUESTOR_ID_MAP[site]
page_data = self._download_json(
'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s'
% (requestor_id.lower(), display_id), display_id)['data']
f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}',
display_id)['data']
properties = page_data.get('properties') or {}
query = {
'mbr': 'true',
@@ -76,15 +76,15 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
try:
for v in page_data['children']:
if v.get('type') == 'video-player':
releasePid = v['properties']['currentVideo']['meta']['releasePid']
tp_path = 'M_UwQC/' + releasePid
release_pid = v['properties']['currentVideo']['meta']['releasePid']
tp_path = 'M_UwQC/' + release_pid
media_url = 'https://link.theplatform.com/s/' + tp_path
video_player_count += 1
except KeyError:
pass
if video_player_count > 1:
self.report_warning(
'The JSON data has %d video players. Only one will be extracted' % video_player_count)
f'The JSON data has {video_player_count} video players. Only one will be extracted')
# Fall back to videoPid if releasePid not found.
# TODO: Fall back to videoPid if releasePid manifest uses DRM.
@@ -131,7 +131,7 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
})
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
if ns_keys:
ns = list(ns_keys)[0]
ns = next(iter(ns_keys))
episode = theplatform_metadata.get(ns + '$episodeTitle') or None
episode_number = int_or_none(
theplatform_metadata.get(ns + '$episode'))

View File

@@ -87,13 +87,13 @@ class AmericasTestKitchenIE(InfoExtractor):
resource_type = 'episodes'
resource = self._download_json(
'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id)
f'https://www.americastestkitchen.com/api/v6/{resource_type}/{video_id}', video_id)
video = resource['video'] if is_episode else resource
episode = resource if is_episode else resource.get('episode') or {}
return {
'_type': 'url_transparent',
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
'url': 'https://player.zype.com/embed/{}.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ'.format(video['zypeId']),
'ie_key': 'Zype',
'description': clean_html(video.get('description')),
'timestamp': unified_timestamp(video.get('publishDate')),
@@ -174,22 +174,22 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
]
if season_number:
playlist_id = 'season_%d' % season_number
playlist_title = 'Season %d' % season_number
playlist_id = f'season_{season_number}'
playlist_title = f'Season {season_number}'
facet_filters.append('search_season_list:' + playlist_title)
else:
playlist_id = show
playlist_title = title
season_search = self._download_json(
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
f'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_{slug}_season_desc_production',
playlist_id, headers={
'Origin': 'https://www.americastestkitchen.com',
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
'X-Algolia-Application-Id': 'Y1FNZXUI30',
}, query={
'facetFilters': json.dumps(facet_filters),
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
'attributesToRetrieve': f'description,search_{slug}_episode_number,search_document_date,search_url,title,search_atk_episode_season',
'attributesToHighlight': '',
'hitsPerPage': 1000,
})
@@ -207,7 +207,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
'description': episode.get('description'),
'timestamp': unified_timestamp(episode.get('search_document_date')),
'season_number': season_number,
'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
'episode_number': int_or_none(episode.get(f'search_{slug}_episode_number')),
'ie_key': AmericasTestKitchenIE.ie_key(),
}

View File

@@ -1,7 +1,7 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
ExtractorError,
determine_ext,
int_or_none,
mimetype2ext,
parse_iso8601,
@@ -19,12 +19,12 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
'Unable to download Akamai AMP feed', transform_source=strip_jsonp)
item = feed.get('channel', {}).get('item')
if not item:
raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
raise ExtractorError('{} said: {}'.format(self.IE_NAME, feed['error']))
video_id = item['guid']
def get_media_node(name, default=None):
media_name = 'media-%s' % name
media_name = f'media-{name}'
media_group = item.get('media-group') or item
return media_group.get(media_name) or item.get(media_name) or item.get(name, default)

View File

@@ -5,7 +5,7 @@ from ..utils import (
int_or_none,
str_or_none,
traverse_obj,
unified_timestamp
unified_timestamp,
)
@@ -29,7 +29,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
'release_date': '20230121',
'release_timestamp': 1674285179,
'episode_id': 'e1tpt3d',
}
},
}, {
# embed url
'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd',
@@ -50,7 +50,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
'season': 'Season 2',
'season_number': 2,
'episode_id': 'e1shjqd',
}
},
}]
_WEBPAGE_TESTS = [{
@@ -72,7 +72,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
'uploader': 'Podcast Tempo',
'channel': 'apakatatempo',
}
},
}]
def _real_extract(self, url):

View File

@@ -1,7 +1,7 @@
import re
from .common import InfoExtractor
from ..utils import url_or_none, merge_dicts
from ..utils import merge_dicts, url_or_none
class AngelIE(InfoExtractor):
@@ -15,8 +15,8 @@ class AngelIE(InfoExtractor):
'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons',
'description': 'md5:73b704897c20ab59c433a9c0a8202d5e',
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
'duration': 1359.0
}
'duration': 1359.0,
},
}, {
'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name',
'md5': 'e4774bad0a5f0ad2e90d175cafdb797d',
@@ -26,8 +26,8 @@ class AngelIE(InfoExtractor):
'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name',
'description': 'md5:aadfb4827a94415de5ff6426e6dee3be',
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
'duration': 3276.0
}
'duration': 3276.0,
},
}]
def _real_extract(self, url):
@@ -44,7 +44,7 @@ class AngelIE(InfoExtractor):
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'formats': formats,
'subtitles': subtitles
'subtitles': subtitles,
}
# Angel uses cloudinary in the background and supports image transformations.

View File

@@ -105,7 +105,7 @@ class Ant1NewsGrArticleIE(AntennaBaseIE):
info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle')
embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage))
if not embed_urls:
raise ExtractorError('no videos found for %s' % video_id, expected=True)
raise ExtractorError(f'no videos found for {video_id}', expected=True)
return self.playlist_from_matches(
embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(),
video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})

View File

@@ -238,7 +238,7 @@ class AnvatoIE(InfoExtractor):
'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900',
'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99',
'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe',
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
}
def _generate_nfl_token(self, anvack, mcp_id):
@@ -255,7 +255,7 @@ class AnvatoIE(InfoExtractor):
token
}
}
}''' % (anvack, mcp_id),
}''' % (anvack, mcp_id), # noqa: UP031
}).encode(), headers={
'Authorization': auth_token,
'Content-Type': 'application/json',
@@ -299,7 +299,7 @@ class AnvatoIE(InfoExtractor):
return self._download_json(
video_data_url, video_id, transform_source=strip_jsonp, query=query,
data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8'))
data=json.dumps({'api': api}, separators=(',', ':')).encode())
def _get_anvato_videos(self, access_key, video_id, token):
video_data = self._get_video_json(access_key, video_id, token)
@@ -358,7 +358,7 @@ class AnvatoIE(InfoExtractor):
for caption in video_data.get('captions', []):
a_caption = {
'url': caption['url'],
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None,
}
subtitles.setdefault(caption['language'], []).append(a_caption)
subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs)

View File

@@ -30,7 +30,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
'params': {
# m3u8 download
'skip_download': True,
}
},
}, {
# video with vidible ID
'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/',
@@ -46,7 +46,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
'params': {
# m3u8 download
'skip_download': True,
}
},
}, {
'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/',
'only_matching': True,
@@ -83,10 +83,10 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
return self._extract_yahoo_video(video_id, 'us')
response = self._download_json(
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
f'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/{video_id}/details',
video_id)['response']
if response['statusText'] != 'Ok':
raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True)
raise ExtractorError('{} said: {}'.format(self.IE_NAME, response['statusText']), expected=True)
video_data = response['data']
formats = []

View File

@@ -34,7 +34,7 @@ class APAIE(InfoExtractor):
video_id, base_url = mobj.group('id', 'base_url')
webpage = self._download_webpage(
'%s/player/%s' % (base_url, video_id), video_id)
f'{base_url}/player/{video_id}', video_id)
jwplatform_id = self._search_regex(
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
@@ -47,7 +47,7 @@ class APAIE(InfoExtractor):
def extract(field, name=None):
return self._search_regex(
r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field,
rf'\b{field}["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
webpage, name or field, default=None, group='value')
title = extract('title') or video_id

View File

@@ -1,8 +1,5 @@
from .common import InfoExtractor
from ..utils import (
str_to_int,
ExtractorError
)
from ..utils import ExtractorError, str_to_int
class AppleConnectIE(InfoExtractor):

View File

@@ -24,7 +24,7 @@ class ApplePodcastsIE(InfoExtractor):
'duration': 6454,
'series': 'The Tim Dillon Show',
'thumbnail': 're:.+[.](png|jpe?g|webp)',
}
},
}, {
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
'only_matching': True,

View File

@@ -1,8 +1,8 @@
import re
import json
import re
import urllib.parse
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
parse_duration,
@@ -64,7 +64,7 @@ class AppleTrailersIE(InfoExtractor):
'uploader_id': 'wb',
},
},
]
],
}, {
'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
'info_dict': {
@@ -99,7 +99,7 @@ class AppleTrailersIE(InfoExtractor):
webpage = self._download_webpage(url, movie)
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
film_data = self._download_json(
'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
f'http://trailers.apple.com/trailers/feeds/data/{film_id}.json',
film_id, fatal=False)
if film_data:
@@ -114,7 +114,7 @@ class AppleTrailersIE(InfoExtractor):
if not src:
continue
formats.append({
'format_id': '%s-%s' % (version, size),
'format_id': f'{version}-{size}',
'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
'width': int_or_none(size_data.get('width')),
'height': int_or_none(size_data.get('height')),
@@ -134,7 +134,7 @@ class AppleTrailersIE(InfoExtractor):
page_data = film_data.get('page', {})
return self.playlist_result(entries, film_id, page_data.get('movie_title'))
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
playlist_url = urllib.parse.urljoin(url, 'includes/playlists/itunes.inc')
def fix_html(s):
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
@@ -143,10 +143,9 @@ class AppleTrailersIE(InfoExtractor):
# like: http://trailers.apple.com/trailers/wb/gravity/
def _clean_json(m):
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
return 'iTunes.playURL({});'.format(m.group(1).replace('\'', '&#39;'))
s = re.sub(self._JSON_RE, _clean_json, s)
s = '<html>%s</html>' % s
return s
return f'<html>{s}</html>'
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
playlist = []
@@ -170,18 +169,18 @@ class AppleTrailersIE(InfoExtractor):
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
settings_json_url = urllib.parse.urljoin(url, f'includes/settings/{trailer_id}.json')
settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
formats = []
for format in settings['metadata']['sizes']:
for fmt in settings['metadata']['sizes']:
# The src is a file pointing to the real video file
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', fmt['src'])
formats.append({
'url': format_url,
'format': format['type'],
'width': int_or_none(format['width']),
'height': int_or_none(format['height']),
'format': fmt['type'],
'width': int_or_none(fmt['width']),
'height': int_or_none(fmt['height']),
})
playlist.append({
@@ -229,7 +228,7 @@ class AppleTrailersSectionIE(InfoExtractor):
'title': 'Movie Studios',
},
}
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS)
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>{})'.format('|'.join(_SECTIONS))
_TESTS = [{
'url': 'http://trailers.apple.com/#section=justadded',
'info_dict': {
@@ -270,7 +269,7 @@ class AppleTrailersSectionIE(InfoExtractor):
def _real_extract(self, url):
section = self._match_id(url)
section_data = self._download_json(
'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'],
'http://trailers.apple.com/trailers/home/feeds/{}.json'.format(self._SECTIONS[section]['feed_path']),
section)
entries = [
self.url_result('http://trailers.apple.com' + e['location'])

View File

@@ -1,10 +1,11 @@
from __future__ import annotations
import json
import re
import urllib.parse
from .common import InfoExtractor
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
from ..compat import compat_urllib_parse_unquote
from ..networking import HEADRequest
from ..networking.exceptions import HTTPError
from ..utils import (
@@ -145,7 +146,7 @@ class ArchiveOrgIE(InfoExtractor):
'title': 'Bells Of Rostov',
'ext': 'mp3',
},
'skip': 'restricted'
'skip': 'restricted',
}, {
'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3',
'md5': '1d0aabe03edca83ca58d9ed3b493a3c3',
@@ -158,7 +159,7 @@ class ArchiveOrgIE(InfoExtractor):
'description': 'md5:012b2d668ae753be36896f343d12a236',
'upload_date': '20190928',
},
'skip': 'restricted'
'skip': 'restricted',
}, {
# Original formats are private
'url': 'https://archive.org/details/irelandthemakingofarepublic',
@@ -202,8 +203,8 @@ class ArchiveOrgIE(InfoExtractor):
'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg',
'display_id': 'irelandthemakingofarepublicreel2.mov',
},
}
]
},
],
}]
@staticmethod
@@ -220,7 +221,7 @@ class ArchiveOrgIE(InfoExtractor):
def _real_extract(self, url):
video_id = urllib.parse.unquote_plus(self._match_id(url))
identifier, entry_id = (video_id.split('/', 1) + [None])[:2]
identifier, _, entry_id = video_id.partition('/')
# Archive.org metadata API doesn't clearly demarcate playlist entries
# or subtitle tracks, so we get them from the embeddable player.
@@ -246,7 +247,7 @@ class ArchiveOrgIE(InfoExtractor):
if track['kind'] != 'subtitles':
continue
entries[p['orig']][track['label']] = {
'url': 'https://archive.org/' + track['file'].lstrip('/')
'url': 'https://archive.org/' + track['file'].lstrip('/'),
}
metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier)
@@ -293,7 +294,9 @@ class ArchiveOrgIE(InfoExtractor):
'height': int_or_none(f.get('width')),
'filesize': int_or_none(f.get('size'))})
extension = (f['name'].rsplit('.', 1) + [None])[1]
_, has_ext, extension = f['name'].rpartition('.')
if not has_ext:
extension = None
# We don't want to skip private formats if the user has access to them,
# however without access to an account with such privileges we can't implement/test this.
@@ -308,7 +311,7 @@ class ArchiveOrgIE(InfoExtractor):
'filesize': int_or_none(f.get('size')),
'protocol': 'https',
'source_preference': 0 if f.get('source') == 'original' else -1,
'format_note': f.get('source')
'format_note': f.get('source'),
})
for entry in entries.values():
@@ -371,7 +374,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader_url': 'https://www.youtube.com/user/Zeurel',
'thumbnail': r're:https?://.*\.(jpg|webp)',
'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg',
}
},
}, {
# Internal link
'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0',
@@ -388,7 +391,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader_url': 'https://www.youtube.com/user/1veritasium',
'thumbnail': r're:https?://.*\.(jpg|webp)',
'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA',
}
},
}, {
# Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description.
# Should use the date in the link. Title ends with '- Youtube'. Capture has description in eow-description
@@ -403,8 +406,8 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader_id': 'machinima',
'uploader_url': 'https://www.youtube.com/user/machinima',
'thumbnail': r're:https?://.*\.(jpg|webp)',
'uploader': 'machinima'
}
'uploader': 'machinima',
},
}, {
# FLV video. Video file URL does not provide itag information
'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw',
@@ -421,7 +424,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A',
'thumbnail': r're:https?://.*\.(jpg|webp)',
'uploader': 'jawed',
}
},
}, {
'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA',
'info_dict': {
@@ -437,7 +440,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader_url': 'https://www.youtube.com/user/itsmadeon',
'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w',
'thumbnail': r're:https?://.*\.(jpg|webp)',
}
},
}, {
# First capture is of dead video, second is the oldest from CDX response.
'url': 'https://web.archive.org/https://www.youtube.com/watch?v=1JYutPM8O6E',
@@ -454,7 +457,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA',
'thumbnail': r're:https?://.*\.(jpg|webp)',
'uploader': 'ETC News',
}
},
}, {
# First capture of dead video, capture date in link links to dead capture.
'url': 'https://web.archive.org/web/20180803221945/https://www.youtube.com/watch?v=6FPhZJGvf4E',
@@ -473,15 +476,15 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader': 'ETC News',
},
'expected_warnings': [
r'unable to download capture webpage \(it may not be archived\)'
]
r'unable to download capture webpage \(it may not be archived\)',
],
}, { # Very old YouTube page, has - YouTube in title.
'url': 'http://web.archive.org/web/20070302011044/http://youtube.com/watch?v=-06-KB9XTzg',
'info_dict': {
'id': '-06-KB9XTzg',
'ext': 'flv',
'title': 'New Coin Hack!! 100% Safe!!'
}
'title': 'New Coin Hack!! 100% Safe!!',
},
}, {
'url': 'web.archive.org/https://www.youtube.com/watch?v=dWW7qP423y8',
'info_dict': {
@@ -495,7 +498,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc',
'thumbnail': r're:https?://.*\.(jpg|webp)',
'uploader': 'DankPods',
}
},
}, {
# player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093
'url': 'https://web.archive.org/web/20200827003909if_/http://www.youtube.com/watch?v=6Dh-RL__uN4',
@@ -512,7 +515,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader_id': 'PewDiePie',
'uploader_url': 'https://www.youtube.com/user/PewDiePie',
'thumbnail': r're:https?://.*\.(jpg|webp)',
}
},
}, {
# ~June 2010 Capture. swfconfig
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y',
@@ -527,7 +530,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'thumbnail': r're:https?://.*\.(jpg|webp)',
'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks',
'upload_date': '20090520',
}
},
}, {
# Jan 2011: watch-video-date/eow-date surrounded by whitespace
'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc',
@@ -542,7 +545,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'thumbnail': r're:https?://.*\.(jpg|webp)',
'duration': 132,
'uploader_url': 'https://www.youtube.com/user/claybutlermusic',
}
},
}, {
# ~May 2009 swfArgs. ytcfg is spread out over various vars
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY',
@@ -557,7 +560,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'description': 'md5:4ca77d79538064e41e4cc464e93f44f0',
'thumbnail': r're:https?://.*\.(jpg|webp)',
'duration': 754,
}
},
}, {
# ~June 2012. Upload date is in another lang so cannot extract.
'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA',
@@ -571,7 +574,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'uploader': 'BlackNerdComedy',
'duration': 182,
'thumbnail': r're:https?://.*\.(jpg|webp)',
}
},
}, {
# ~July 2013
'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM',
@@ -587,7 +590,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ',
'upload_date': '20060428',
'uploader': 'punkybird',
}
},
}, {
# April 2020: Player response in player config
'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en',
@@ -604,7 +607,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'thumbnail': r're:https?://.*\.(jpg|webp)',
'description': 'md5:c625bb3c02c4f5fb4205971e468fa341',
'uploader_url': 'https://www.youtube.com/user/GameGrumps',
}
},
}, {
# watch7-user-header with yt-user-info
'url': 'ytarchive:kbh4T_b4Ixw:20160307085057',
@@ -619,7 +622,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
'thumbnail': r're:https?://.*\.(jpg|webp)',
'upload_date': '20150503',
'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA',
}
},
}, {
# April 2012
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU',
@@ -634,35 +637,35 @@ class YoutubeWebArchiveIE(InfoExtractor):
'duration': 200,
'upload_date': '20120407',
'uploader_id': 'thecomputernerd01',
}
},
}, {
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://web.archive.org/web/20050214000000_if/http://www.youtube.com/watch?v=0altSZ96U4M',
'only_matching': True
'only_matching': True,
}, {
# Video not archived, only capture is unavailable video page
'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10',
'only_matching': True
'only_matching': True,
}, { # Encoded url
'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://web.archive.org/web/20060527081937/http://www.youtube.com:80/watch.php?v=ELTFsLT73fA&amp;search=soccer',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg',
'only_matching': True
'only_matching': True,
}, {
'url': 'ytarchive:BaW_jenozKc:20050214000000',
'only_matching': True
'only_matching': True,
}, {
'url': 'ytarchive:BaW_jenozKc',
'only_matching': True
'only_matching': True,
},
]
_YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
@@ -673,13 +676,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
_YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers
_YT_ALL_THUMB_SERVERS = orderedSet(
_YT_DEFAULT_THUMB_SERVERS + ['img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(0, 5), 9)]])
[*_YT_DEFAULT_THUMB_SERVERS, 'img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(5), 9)]])
_WAYBACK_BASE_URL = 'https://web.archive.org/web/%sif_/'
_OLDEST_CAPTURE_DATE = 20050214000000
_NEWEST_CAPTURE_DATE = 20500101000000
def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False):
def _call_cdx_api(self, item_id, url, filters: list | None = None, collapse: list | None = None, query: dict | None = None, note=None, fatal=False):
# CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
query = {
'url': url,
@@ -688,14 +691,14 @@ class YoutubeWebArchiveIE(InfoExtractor):
'limit': 500,
'filter': ['statuscode:200'] + (filters or []),
'collapse': collapse or [],
**(query or {})
**(query or {}),
}
res = self._download_json(
'https://web.archive.org/cdx/search/cdx', item_id,
note or 'Downloading CDX API JSON', query=query, fatal=fatal)
if isinstance(res, list) and len(res) >= 2:
# format response to make it easier to use
return list(dict(zip(res[0], v)) for v in res[1:])
return [dict(zip(res[0], v)) for v in res[1:]]
elif not isinstance(res, list) or len(res) != 0:
self.report_warning('Error while parsing CDX API response' + bug_reports_message())
@@ -852,7 +855,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
{
'url': (self._WAYBACK_BASE_URL % (int_or_none(thumbnail_dict.get('timestamp')) or self._OLDEST_CAPTURE_DATE)) + thumbnail_dict.get('original'),
'filesize': int_or_none(thumbnail_dict.get('length')),
'preference': int_or_none(thumbnail_dict.get('length'))
'preference': int_or_none(thumbnail_dict.get('length')),
} for thumbnail_dict in response)
if not try_all:
break
@@ -893,7 +896,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
for retry in retry_manager:
try:
urlh = self._request_webpage(
HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id),
HEADRequest(f'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{video_id}'),
video_id, note='Fetching archived video file url', expected_status=True)
except ExtractorError as e:
# HTTP Error 404 is expected if the video is not saved.
@@ -924,21 +927,21 @@ class YoutubeWebArchiveIE(InfoExtractor):
info['thumbnails'] = self._extract_thumbnails(video_id)
if urlh:
url = compat_urllib_parse_unquote(urlh.url)
url = urllib.parse.unquote(urlh.url)
video_file_url_qs = parse_qs(url)
# Attempt to recover any ext & format info from playback url & response headers
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
fmt = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
itag = try_get(video_file_url_qs, lambda x: x['itag'][0])
if itag and itag in YoutubeIE._formats:
format.update(YoutubeIE._formats[itag])
format.update({'format_id': itag})
fmt.update(YoutubeIE._formats[itag])
fmt.update({'format_id': itag})
else:
mime = try_get(video_file_url_qs, lambda x: x['mime'][0])
ext = (mimetype2ext(mime)
or urlhandle_detect_ext(urlh)
or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type')))
format.update({'ext': ext})
info['formats'] = [format]
fmt.update({'ext': ext})
info['formats'] = [fmt]
if not info.get('duration'):
info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0]))

View File

@@ -11,7 +11,7 @@ from ..utils import (
class ArcPublishingIE(InfoExtractor):
_UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
_VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX
_VALID_URL = rf'arcpublishing:(?P<org>[a-z]+):(?P<id>{_UUID_REGEX})'
_TESTS = [{
# https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/
'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
@@ -74,12 +74,12 @@ class ArcPublishingIE(InfoExtractor):
def _extract_embed_urls(cls, url, webpage):
entries = []
# https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage):
for powa_el in re.findall(rf'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="{ArcPublishingIE._UUID_REGEX}"[^>]*>)', webpage):
powa = extract_attributes(powa_el) or {}
org = powa.get('data-org')
uuid = powa.get('data-uuid')
if org and uuid:
entries.append('arcpublishing:%s:%s' % (org, uuid))
entries.append(f'arcpublishing:{org}:{uuid}')
return entries
def _real_extract(self, url):
@@ -122,7 +122,7 @@ class ArcPublishingIE(InfoExtractor):
elif stream_type in ('ts', 'hls'):
m3u8_formats = self._extract_m3u8_formats(
s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
if all([f.get('acodec') == 'none' for f in m3u8_formats]):
if all(f.get('acodec') == 'none' for f in m3u8_formats):
continue
for f in m3u8_formats:
height = f.get('height')
@@ -136,7 +136,7 @@ class ArcPublishingIE(InfoExtractor):
else:
vbr = int_or_none(s.get('bitrate'))
formats.append({
'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type,
'format_id': f'{stream_type}-{vbr}' if vbr else stream_type,
'vbr': vbr,
'width': int_or_none(s.get('width')),
'height': int_or_none(s.get('height')),

View File

@@ -85,7 +85,7 @@ class ARDMediathekBaseIE(InfoExtractor):
formats.extend(self._extract_f4m_formats(
update_url_query(stream_url, {
'hdcore': '3.1.1',
'plugin': 'aasp-3.1.1.69.124'
'plugin': 'aasp-3.1.1.69.124',
}), video_id, f4m_id='hds', fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
@@ -96,12 +96,12 @@ class ARDMediathekBaseIE(InfoExtractor):
f = {
'url': server,
'play_path': stream_url,
'format_id': 'a%s-rtmp-%s' % (num, quality),
'format_id': f'a{num}-rtmp-{quality}',
}
else:
f = {
'url': stream_url,
'format_id': 'a%s-%s-%s' % (num, ext, quality)
'format_id': f'a{num}-{ext}-{quality}',
}
m = re.search(
r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$',

View File

@@ -64,7 +64,7 @@ class ArkenaIE(InfoExtractor):
raise ExtractorError('Invalid URL', expected=True)
media = self._download_json(
'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id),
f'https://video.qbrick.com/api/v1/public/accounts/{account_id}/medias/{video_id}',
video_id, query={
# https://video.qbrick.com/docs/api/examples/library-api.html
'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags',

View File

@@ -1,11 +1,9 @@
import urllib.parse
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
)
from ..utils import (
format_field,
float_or_none,
format_field,
int_or_none,
parse_iso8601,
remove_start,
@@ -35,7 +33,7 @@ class ArnesIE(InfoExtractor):
'view_count': int,
'tags': ['linearna_algebra'],
'start_time': 10,
}
},
}, {
'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
'only_matching': True,
@@ -93,6 +91,6 @@ class ArnesIE(InfoExtractor):
'duration': float_or_none(video.get('duration'), 1000),
'view_count': int_or_none(video.get('views')),
'tags': video.get('hashtags'),
'start_time': int_or_none(compat_parse_qs(
compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
'start_time': int_or_none(urllib.parse.parse_qs(
urllib.parse.urlparse(url).query).get('t', [None])[0]),
}

View File

@@ -153,7 +153,7 @@ class Art19IE(InfoExtractor):
'series_id': ('series_id', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'release_timestamp': ('released_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601})
'modified_timestamp': ('updated_at', {parse_iso8601}),
})),
**traverse_obj(rss_metadata, ('content', {
'title': ('episode_title', {str}),

View File

@@ -5,6 +5,7 @@ from ..utils import (
ExtractorError,
GeoRestrictedError,
int_or_none,
join_nonempty,
parse_iso8601,
parse_qs,
strip_or_none,
@@ -19,32 +20,18 @@ class ArteTVBaseIE(InfoExtractor):
class ArteTVIE(ArteTVBaseIE):
_VALID_URL = r'''(?x)
_VALID_URL = rf'''(?x)
(?:https?://
(?:
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos|
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>{ArteTVBaseIE._ARTE_LANGUAGES})
)
|arte://program)
/(?P<id>\d{6}-\d{3}-[AF]|LIVE)
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
/(?P<id>\d{{6}}-\d{{3}}-[AF]|LIVE)
'''
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
'only_matching': True,
}, {
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
'info_dict': {
'id': '100103-000-A',
'title': 'USA: Dyskryminacja na porodówce',
'description': 'md5:242017b7cce59ffae340a54baefcafb1',
'alt_title': 'ARTE Reportage',
'upload_date': '20201103',
'duration': 554,
'thumbnail': r're:https://api-cdn\.arte\.tv/.+940x530',
'timestamp': 1604417980,
'ext': 'mp4',
},
'params': {'skip_download': 'm3u8'}
}, {
'note': 'No alt_title',
'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
@@ -58,6 +45,23 @@ class ArteTVIE(ArteTVBaseIE):
}, {
'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/',
'only_matching': True,
}, {
'url': 'https://www.arte.tv/fr/videos/109067-000-A/la-loi-de-teheran/',
'info_dict': {
'id': '109067-000-A',
'ext': 'mp4',
'description': 'md5:d2ca367b8ecee028dddaa8bd1aebc739',
'timestamp': 1713927600,
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/3rR6PLzfbigSkkeHtkCZNF/940x530',
'duration': 7599,
'title': 'La loi de Téhéran',
'upload_date': '20240424',
'subtitles': {
'fr': 'mincount:1',
'fr-acc': 'mincount:1',
'fr-forced': 'mincount:1',
},
},
}, {
'note': 'age-restricted',
'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/',
@@ -71,23 +75,7 @@ class ArteTVIE(ArteTVBaseIE):
'upload_date': '20230930',
'ext': 'mp4',
},
}, {
'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/',
'info_dict': {
'id': '085374-003-A',
'ext': 'mp4',
'description': 'md5:ab79ec7cc472a93164415b4e4916abf9',
'timestamp': 1702872000,
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530',
'duration': 2594,
'title': 'Die kurze Zeit der Jugend',
'alt_title': 'Im hohen Norden geboren',
'upload_date': '20231218',
'subtitles': {
'fr': 'mincount:1',
'fr-acc': 'mincount:1',
},
},
'skip': '404 Not Found',
}]
_GEO_BYPASS = True
@@ -143,19 +131,21 @@ class ArteTVIE(ArteTVBaseIE):
updated_subs = {}
for lang, sub_formats in subs.items():
for fmt in sub_formats:
if fmt.get('url', '').endswith('-MAL.m3u8'):
lang += '-acc'
updated_subs.setdefault(lang, []).append(fmt)
url = fmt.get('url') or ''
suffix = ('acc' if url.endswith('-MAL.m3u8')
else 'forced' if '_VO' not in url
else None)
updated_subs.setdefault(join_nonempty(lang, suffix), []).append(fmt)
return updated_subs
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
lang = mobj.group('lang') or mobj.group('lang_2')
langauge_code = self._LANG_MAP.get(lang)
language_code = self._LANG_MAP.get(lang)
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={
'x-validated-age': '18'
'x-validated-age': '18',
})
geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {}
@@ -180,10 +170,10 @@ class ArteTVIE(ArteTVBaseIE):
m = self._VERSION_CODE_RE.match(stream_version_code)
if m:
lang_pref = int(''.join('01'[x] for x in (
m.group('vlang') == langauge_code, # we prefer voice in the requested language
m.group('vlang') == language_code, # we prefer voice in the requested language
not m.group('audio_desc'), # and not the audio description version
bool(m.group('original_voice')), # but if voice is not in the requested language, at least choose the original voice
m.group('sub_lang') == langauge_code, # if subtitles are present, we prefer them in the requested language
m.group('sub_lang') == language_code, # if subtitles are present, we prefer them in the requested language
not m.group('has_sub'), # but we prefer no subtitles otherwise
not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles
)))
@@ -257,7 +247,7 @@ class ArteTVEmbedIE(InfoExtractor):
'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
'upload_date': '20201116',
},
'skip': 'No video available'
'skip': 'No video available',
}, {
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
'only_matching': True,
@@ -272,7 +262,7 @@ class ArteTVEmbedIE(InfoExtractor):
class ArteTVPlaylistIE(ArteTVBaseIE):
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
_VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P<id>RC-\d{{6}})'
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
'only_matching': True,
@@ -308,7 +298,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
class ArteTVCategoryIE(ArteTVBaseIE):
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
_VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$'
_TESTS = [{
'url': 'https://www.arte.tv/en/videos/politics-and-society/',
'info_dict': {
@@ -322,7 +312,7 @@ class ArteTVCategoryIE(ArteTVBaseIE):
@classmethod
def suitable(cls, url):
return (
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE))
and super().suitable(url))
def _real_extract(self, url):
@@ -331,12 +321,12 @@ class ArteTVCategoryIE(ArteTVBaseIE):
items = []
for video in re.finditer(
r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
rf'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/{lang}/videos/[\w/-]+)(?P=q)',
webpage):
video = video.group('url')
if video == url:
continue
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE)):
items.append(video)
title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None

View File

@@ -20,7 +20,7 @@ class AtresPlayerIE(InfoExtractor):
'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
'duration': 3413,
},
'skip': 'This video is only available for registered users'
'skip': 'This video is only available for registered users',
},
{
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
@@ -49,7 +49,7 @@ class AtresPlayerIE(InfoExtractor):
target_url = self._download_json(
'https://account.atresmedia.com/api/login', None,
'Logging in', headers={
'Content-Type': 'application/x-www-form-urlencoded'
'Content-Type': 'application/x-www-form-urlencoded',
}, data=urlencode_postdata({
'username': username,
'password': password,

View File

@@ -12,7 +12,7 @@ class AtScaleConfEventIE(InfoExtractor):
'info_dict': {
'id': 'data-scale-spring-2022',
'title': 'Data @Scale Spring 2022',
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55',
},
}, {
'url': 'https://atscaleconference.com/events/video-scale-2021/',
@@ -20,15 +20,15 @@ class AtScaleConfEventIE(InfoExtractor):
'info_dict': {
'id': 'video-scale-2021',
'title': 'Video @Scale 2021',
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55',
},
}]
def _real_extract(self, url):
id = self._match_id(url)
webpage = self._download_webpage(url, id)
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
return self.playlist_from_matches(
re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage),
ie='Generic', playlist_id=id,
ie='Generic', playlist_id=playlist_id,
title=self._og_search_title(webpage), description=self._og_search_description(webpage))

View File

@@ -2,10 +2,10 @@ import datetime as dt
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
jwt_encode_hs256,
try_get,
ExtractorError,
)
@@ -19,7 +19,7 @@ class ATVAtIE(InfoExtractor):
'id': 'v-ce9cgn1e70n5-1',
'ext': 'mp4',
'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen',
}
},
}, {
'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1',
'only_matching': True,
@@ -66,10 +66,10 @@ class ATVAtIE(InfoExtractor):
video_id=video_id)
video_title = json_data['views']['default']['page']['title']
contentResource = json_data['views']['default']['page']['contentResource']
content_id = contentResource[0]['id']
content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']}
for id, content in enumerate(contentResource)]
content_resource = json_data['views']['default']['page']['contentResource']
content_id = content_resource[0]['id']
content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']}
for id_, content in enumerate(content_resource)]
time_of_request = dt.datetime.now()
not_before = time_of_request - dt.timedelta(minutes=5)
@@ -87,17 +87,17 @@ class ATVAtIE(InfoExtractor):
videos = self._download_json(
'https://vas-v4.p7s1video.net/4.0/getsources',
content_id, 'Downloading videos JSON', query={
'token': jwt_token.decode('utf-8')
'token': jwt_token.decode('utf-8'),
})
video_id, videos_data = list(videos['data'].items())[0]
video_id, videos_data = next(iter(videos['data'].items()))
error_msg = try_get(videos_data, lambda x: x['error']['title'])
if error_msg == 'Geo check failed':
self.raise_geo_restricted(error_msg)
elif error_msg:
raise ExtractorError(error_msg)
entries = [
self._extract_video_info(url, contentResource[video['id']], video)
self._extract_video_info(url, content_resource[video['id']], video)
for video in videos_data]
return {

View File

@@ -19,7 +19,7 @@ class AudiMediaIE(InfoExtractor):
'timestamp': 1448354940,
'duration': 74022,
'view_count': int,
}
},
}, {
'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991',
'only_matching': True,
@@ -73,7 +73,7 @@ class AudiMediaIE(InfoExtractor):
bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)
if bitrate:
f.update({
'format_id': 'http-%s' % bitrate,
'format_id': f'http-{bitrate}',
})
formats.append(f)

View File

@@ -15,7 +15,7 @@ class AudioBoomIE(InfoExtractor):
'duration': 4000.99,
'uploader': 'Sue Perkins: An hour or so with...',
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
}
},
}, { # Direct mp3-file link
'url': 'https://audioboom.com/posts/8128496.mp3',
'md5': 'e329edf304d450def95c7f86a9165ee1',
@@ -27,7 +27,7 @@ class AudioBoomIE(InfoExtractor):
'duration': 1689.7,
'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race',
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904',
}
},
}, {
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
'only_matching': True,

View File

@@ -9,7 +9,7 @@ class AudiodraftBaseIE(InfoExtractor):
headers={
'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest',
}, data=f'id={player_entry_id}'.encode('utf-8'))
}, data=f'id={player_entry_id}'.encode())
return {
'id': str(data_json['entry_id']),
@@ -65,9 +65,10 @@ class AudiodraftCustomIE(AudiodraftBaseIE):
}]
def _real_extract(self, url):
id = self._match_id(url)
webpage = self._download_webpage(url, id)
player_entry_id = self._search_regex(r'playAudio\(\'(player_entry_\d+)\'\);', webpage, id, 'play entry id')
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player_entry_id = self._search_regex(
r'playAudio\(\'(player_entry_\d+)\'\);', webpage, video_id, 'play entry id')
return self._audiodraft_extract_from_id(player_entry_id)
@@ -89,5 +90,5 @@ class AudiodraftGenericIE(AudiodraftBaseIE):
}]
def _real_extract(self, url):
id = self._match_id(url)
return self._audiodraft_extract_from_id(f'player_entry_{id}')
video_id = self._match_id(url)
return self._audiodraft_extract_from_id(f'player_entry_{video_id}')

View File

@@ -3,7 +3,6 @@ import time
from .common import InfoExtractor
from .soundcloud import SoundcloudIE
from ..compat import compat_str
from ..utils import (
ExtractorError,
url_basename,
@@ -22,8 +21,8 @@ class AudiomackIE(InfoExtractor):
'id': '310086',
'ext': 'mp3',
'uploader': 'Roosh Williams',
'title': 'Extraordinary'
}
'title': 'Extraordinary',
},
},
# audiomack wrapper around soundcloud song
# Needs new test URL.
@@ -56,7 +55,7 @@ class AudiomackIE(InfoExtractor):
# API is inconsistent with errors
if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
raise ExtractorError('Invalid url %s' % url)
raise ExtractorError(f'Invalid url {url}')
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
# if so, pass the work off to the soundcloud extractor
@@ -64,7 +63,7 @@ class AudiomackIE(InfoExtractor):
return self.url_result(api_response['url'], SoundcloudIE.ie_key())
return {
'id': compat_str(api_response.get('id', album_url_tag)),
'id': str(api_response.get('id', album_url_tag)),
'uploader': api_response.get('artist'),
'title': api_response.get('title'),
'url': api_response['url'],
@@ -82,8 +81,8 @@ class AudiomackAlbumIE(InfoExtractor):
'info_dict':
{
'id': '812251',
'title': 'Tha Tour: Part 2 (Official Mixtape)'
}
'title': 'Tha Tour: Part 2 (Official Mixtape)',
},
},
# Album playlist ripped from fakeshoredrive with no metadata
{
@@ -98,16 +97,16 @@ class AudiomackAlbumIE(InfoExtractor):
'id': '837576',
'ext': 'mp3',
'uploader': 'Lil Herb a.k.a. G Herbo',
}
},
}, {
'info_dict': {
'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)',
'id': '837580',
'ext': 'mp3',
'uploader': 'Lil Herb a.k.a. G Herbo',
}
},
}],
}
},
]
def _real_extract(self, url):
@@ -123,12 +122,12 @@ class AudiomackAlbumIE(InfoExtractor):
api_response = self._download_json(
'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d'
% (album_url_tag, track_no, time.time()), album_url_tag,
note='Querying song information (%d)' % (track_no + 1))
note=f'Querying song information ({track_no + 1})')
# Total failure, only occurs when url is totally wrong
# Won't happen in middle of valid playlist (next case)
if 'url' not in api_response or 'error' in api_response:
raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url))
raise ExtractorError(f'Invalid url for track {track_no} of album url {url}')
# URL is good but song id doesn't exist - usually means end of playlist
elif not api_response['url']:
break
@@ -136,10 +135,10 @@ class AudiomackAlbumIE(InfoExtractor):
# Pull out the album metadata and add to result (if it exists)
for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
if apikey in api_response and resultkey not in result:
result[resultkey] = compat_str(api_response[apikey])
result[resultkey] = str(api_response[apikey])
song_id = url_basename(api_response['url']).rpartition('.')[0]
result['entries'].append({
'id': compat_str(api_response.get('id', song_id)),
'id': str(api_response.get('id', song_id)),
'uploader': api_response.get('artist'),
'title': api_response.get('title', song_id),
'url': api_response['url'],

View File

@@ -1,7 +1,7 @@
import random
import urllib.parse
from .common import InfoExtractor
from ..compat import compat_str, compat_urllib_parse_unquote
from ..utils import ExtractorError, str_or_none, try_get
@@ -15,13 +15,13 @@ class AudiusBaseIE(InfoExtractor):
if response_data is not None:
return response_data
if len(response) == 1 and 'message' in response:
raise ExtractorError('API error: %s' % response['message'],
raise ExtractorError('API error: {}'.format(response['message']),
expected=True)
raise ExtractorError('Unexpected API response')
def _select_api_base(self):
"""Selecting one of the currently available API hosts"""
response = super(AudiusBaseIE, self)._download_json(
response = super()._download_json(
'https://api.audius.co/', None,
note='Requesting available API hosts',
errnote='Unable to request available API hosts')
@@ -41,8 +41,8 @@ class AudiusBaseIE(InfoExtractor):
anything from this link, since the Audius API won't be able to resolve
this url
"""
url = compat_urllib_parse_unquote(url)
title = compat_urllib_parse_unquote(title)
url = urllib.parse.unquote(url)
title = urllib.parse.unquote(title)
if '/' in title or '%2F' in title:
fixed_title = title.replace('/', '%5C').replace('%2F', '%5C')
return url.replace(title, fixed_title)
@@ -54,19 +54,19 @@ class AudiusBaseIE(InfoExtractor):
if self._API_BASE is None:
self._select_api_base()
try:
response = super(AudiusBaseIE, self)._download_json(
'%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note,
response = super()._download_json(
f'{self._API_BASE}{self._API_V}{path}', item_id, note=note,
errnote=errnote, expected_status=expected_status)
except ExtractorError as exc:
# some of Audius API hosts may not work as expected and return HTML
if 'Failed to parse JSON' in compat_str(exc):
if 'Failed to parse JSON' in str(exc):
raise ExtractorError('An error occurred while receiving data. Try again',
expected=True)
raise exc
return self._get_response_data(response)
def _resolve_url(self, url, item_id):
return self._api_request('/resolve?url=%s' % url, item_id,
return self._api_request(f'/resolve?url={url}', item_id,
expected_status=404)
@@ -91,7 +91,7 @@ class AudiusIE(AudiusBaseIE):
'view_count': int,
'like_count': int,
'repost_count': int,
}
},
},
{
# Regular track
@@ -109,14 +109,14 @@ class AudiusIE(AudiusBaseIE):
'view_count': int,
'like_count': int,
'repost_count': int,
}
},
},
]
_ARTWORK_MAP = {
"150x150": 150,
"480x480": 480,
"1000x1000": 1000
'150x150': 150,
'480x480': 480,
'1000x1000': 1000,
}
def _real_extract(self, url):
@@ -130,7 +130,7 @@ class AudiusIE(AudiusBaseIE):
else: # API link
title = None
# uploader = None
track_data = self._api_request('/tracks/%s' % track_id, track_id)
track_data = self._api_request(f'/tracks/{track_id}', track_id)
if not isinstance(track_data, dict):
raise ExtractorError('Unexpected API response')
@@ -144,7 +144,7 @@ class AudiusIE(AudiusBaseIE):
if isinstance(artworks_data, dict):
for quality_key, thumbnail_url in artworks_data.items():
thumbnail = {
"url": thumbnail_url
'url': thumbnail_url,
}
quality_code = self._ARTWORK_MAP.get(quality_key)
if quality_code is not None:
@@ -154,12 +154,12 @@ class AudiusIE(AudiusBaseIE):
return {
'id': track_id,
'title': track_data.get('title', title),
'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id),
'url': f'{self._API_BASE}/v1/tracks/{track_id}/stream',
'ext': 'mp3',
'description': track_data.get('description'),
'duration': track_data.get('duration'),
'track': track_data.get('title'),
'artist': try_get(track_data, lambda x: x['user']['name'], compat_str),
'artist': try_get(track_data, lambda x: x['user']['name'], str),
'genre': track_data.get('genre'),
'thumbnails': thumbnails,
'view_count': track_data.get('play_count'),
@@ -175,11 +175,11 @@ class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE
_TESTS = [
{
'url': 'audius:9RWlo',
'only_matching': True
'only_matching': True,
},
{
'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo',
'only_matching': True
'only_matching': True,
},
]
@@ -207,7 +207,7 @@ class AudiusPlaylistIE(AudiusBaseIE):
if not track_id:
raise ExtractorError('Unable to get track ID from playlist')
entries.append(self.url_result(
'audius:%s' % track_id,
f'audius:{track_id}',
ie=AudiusTrackIE.ie_key(), video_id=track_id))
return entries
@@ -231,7 +231,7 @@ class AudiusPlaylistIE(AudiusBaseIE):
raise ExtractorError('Unable to get playlist ID')
playlist_tracks = self._api_request(
'/playlists/%s/tracks' % playlist_id,
f'/playlists/{playlist_id}/tracks',
title, note='Downloading playlist tracks metadata',
errnote='Unable to download playlist tracks metadata')
if not isinstance(playlist_tracks, list):
@@ -267,5 +267,5 @@ class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete I
profile_audius_id = _profile_data[0]['id']
profile_bio = _profile_data[0].get('bio')
api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id)
api_call = self._api_request(f'/full/users/handle/{profile_id}/tracks', profile_id)
return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio)

View File

@@ -1,10 +1,7 @@
import base64
import urllib.parse
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlencode,
compat_str,
)
from ..utils import (
format_field,
int_or_none,
@@ -22,14 +19,14 @@ class AWAANIE(InfoExtractor):
show_id, video_id, season_id = self._match_valid_url(url).groups()
if video_id and int(video_id) > 0:
return self.url_result(
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo')
f'http://awaan.ae/media/{video_id}', 'AWAANVideo')
elif season_id and int(season_id) > 0:
return self.url_result(smuggle_url(
'http://awaan.ae/program/season/%s' % season_id,
f'http://awaan.ae/program/season/{season_id}',
{'show_id': show_id}), 'AWAANSeason')
else:
return self.url_result(
'http://awaan.ae/program/%s' % show_id, 'AWAANSeason')
f'http://awaan.ae/program/{show_id}', 'AWAANSeason')
class AWAANBaseIE(InfoExtractor):
@@ -75,11 +72,11 @@ class AWAANVideoIE(AWAANBaseIE):
video_id = self._match_id(url)
video_data = self._download_json(
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
f'http://admin.mangomolo.com/analytics/index.php/plus/video?id={video_id}',
video_id, headers={'Origin': 'http://awaan.ae'})
info = self._parse_video_data(video_data, video_id, False)
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + urllib.parse.urlencode({
'id': video_data['id'],
'user_id': video_data['user_id'],
'signature': video_data['signature'],
@@ -117,11 +114,11 @@ class AWAANLiveIE(AWAANBaseIE):
channel_id = self._match_id(url)
channel_data = self._download_json(
'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id,
f'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id={channel_id}',
channel_id, headers={'Origin': 'http://awaan.ae'})
info = self._parse_video_data(channel_data, channel_id, True)
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + urllib.parse.urlencode({
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
'signature': channel_data['signature'],
@@ -159,7 +156,7 @@ class AWAANSeasonIE(InfoExtractor):
show_id = smuggled_data.get('show_id')
if show_id is None:
season = self._download_json(
'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id,
f'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id={season_id}',
season_id, headers={'Origin': 'http://awaan.ae'})
show_id = season['id']
data['show_id'] = show_id
@@ -167,7 +164,7 @@ class AWAANSeasonIE(InfoExtractor):
'http://admin.mangomolo.com/analytics/index.php/plus/show',
show_id, data=urlencode_postdata(data), headers={
'Origin': 'http://awaan.ae',
'Content-Type': 'application/x-www-form-urlencoded'
'Content-Type': 'application/x-www-form-urlencoded',
})
if not season_id:
season_id = show['default_season']
@@ -177,8 +174,8 @@ class AWAANSeasonIE(InfoExtractor):
entries = []
for video in show['videos']:
video_id = compat_str(video['id'])
video_id = str(video['id'])
entries.append(self.url_result(
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id))
f'http://awaan.ae/media/{video_id}', 'AWAANVideo', video_id))
return self.playlist_result(entries, season_id, title)

View File

@@ -1,9 +1,9 @@
import datetime as dt
import hashlib
import hmac
import urllib.parse
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode
class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
@@ -18,20 +18,20 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
'Accept': 'application/json',
'Host': self._AWS_PROXY_HOST,
'X-Amz-Date': amz_date,
'X-Api-Key': self._AWS_API_KEY
'X-Api-Key': self._AWS_API_KEY,
}
session_token = aws_dict.get('session_token')
if session_token:
headers['X-Amz-Security-Token'] = session_token
def aws_hash(s):
return hashlib.sha256(s.encode('utf-8')).hexdigest()
return hashlib.sha256(s.encode()).hexdigest()
# Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
canonical_querystring = compat_urllib_parse_urlencode(query)
canonical_querystring = urllib.parse.urlencode(query)
canonical_headers = ''
for header_name, header_value in sorted(headers.items()):
canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
canonical_headers += f'{header_name.lower()}:{header_value}\n'
signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())])
canonical_request = '\n'.join([
'GET',
@@ -39,7 +39,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
canonical_querystring,
canonical_headers,
signed_headers,
aws_hash('')
aws_hash(''),
])
# Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
@@ -49,7 +49,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
# Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
def aws_hmac(key, msg):
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
return hmac.new(key, msg.encode(), hashlib.sha256)
def aws_hmac_digest(key, msg):
return aws_hmac(key, msg).digest()
@@ -57,7 +57,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
def aws_hmac_hexdigest(key, msg):
return aws_hmac(key, msg).hexdigest()
k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8')
k_signing = ('AWS4' + aws_dict['secret_key']).encode()
for value in credential_scope_list:
k_signing = aws_hmac_digest(k_signing, value)
@@ -65,11 +65,11 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
# Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
headers['Authorization'] = ', '.join([
'%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
'SignedHeaders=%s' % signed_headers,
'Signature=%s' % signature,
'{} Credential={}/{}'.format(self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
f'SignedHeaders={signed_headers}',
f'Signature={signature}',
])
return self._download_json(
'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
'https://{}{}{}'.format(self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
video_id, headers=headers)

View File

@@ -38,14 +38,14 @@ class AZMedienIE(InfoExtractor):
'timestamp': 1538328802,
'view_count': int,
'thumbnail': 'http://cfvod.kaltura.com/p/1719221/sp/171922100/thumbnail/entry_id/1_anruz3wy/version/100031',
'duration': 1930
'duration': 1930,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
'only_matching': True
'only_matching': True,
}]
_API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
_PARTNER_ID = '1719221'
@@ -62,5 +62,5 @@ class AZMedienIE(InfoExtractor):
})['data']['context']['mainAsset']['video']['kaltura']['kalturaId']
return self.url_result(
'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
f'kaltura:{self._PARTNER_ID}:{entry_id}',
ie=KalturaIE.ie_key(), video_id=entry_id)

View File

@@ -24,8 +24,9 @@ class BaiduVideoIE(InfoExtractor):
}]
def _call_api(self, path, category, playlist_id, note):
return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % (
path, category, playlist_id), playlist_id, note)
return self._download_json(
f'http://app.video.baidu.com/{path}/?worktype=adnative{category}&id={playlist_id}',
playlist_id, note)
def _real_extract(self, url):
category, playlist_id = self._match_valid_url(url).groups()
@@ -44,7 +45,7 @@ class BaiduVideoIE(InfoExtractor):
'xqsingle', category, playlist_id, 'Download episodes JSON metadata')
entries = [self.url_result(
episode['url'], video_title=episode['title']
episode['url'], video_title=episode['title'],
) for episode in episodes_detail['videos']]
return self.playlist_result(

View File

@@ -1,13 +1,10 @@
import math
import urllib.parse
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_urlparse,
compat_parse_qs,
)
from ..utils import (
format_field,
InAdvancePagedList,
format_field,
traverse_obj,
unified_timestamp,
)
@@ -20,8 +17,8 @@ class BanByeBaseIE(InfoExtractor):
@staticmethod
def _extract_playlist_id(url, param='playlist'):
return compat_parse_qs(
compat_urllib_parse_urlparse(url).query).get(param, [None])[0]
return urllib.parse.parse_qs(
urllib.parse.urlparse(url).query).get(param, [None])[0]
def _extract_playlist(self, playlist_id):
data = self._download_json(f'{self._API_BASE}/playlists/{playlist_id}', playlist_id)

View File

@@ -3,7 +3,6 @@ import re
import time
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
KNOWN_EXTENSIONS,
ExtractorError,
@@ -42,7 +41,7 @@ class BandcampIE(InfoExtractor):
'uploader_id': 'youtube-dl',
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
},
'_skip': 'There is a limit of 200 free downloads / month for the test song'
'_skip': 'There is a limit of 200 free downloads / month for the test song',
}, {
# free download
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
@@ -119,7 +118,7 @@ class BandcampIE(InfoExtractor):
def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
return self._parse_json(self._html_search_regex(
r'data-%s=(["\'])({.+?})\1' % attr, webpage,
rf'data-{attr}=(["\'])({{.+?}})\1', webpage,
attr + ' data', group=2), video_id, fatal=fatal)
def _real_extract(self, url):
@@ -167,7 +166,7 @@ class BandcampIE(InfoExtractor):
download_link = tralbum.get('freeDownloadPage')
if download_link:
track_id = compat_str(tralbum['id'])
track_id = str(tralbum['id'])
download_webpage = self._download_webpage(
download_link, track_id, 'Downloading free downloads page')
@@ -192,7 +191,7 @@ class BandcampIE(InfoExtractor):
if isinstance(download_formats_list, list):
for f in blob['download_formats']:
name, ext = f.get('name'), f.get('file_extension')
if all(isinstance(x, compat_str) for x in (name, ext)):
if all(isinstance(x, str) for x in (name, ext)):
download_formats[name] = ext.strip('.')
for format_id, f in downloads.items():
@@ -207,7 +206,7 @@ class BandcampIE(InfoExtractor):
})
format_id = f.get('encoding_name') or format_id
stat = self._download_json(
stat_url, track_id, 'Downloading %s JSON' % format_id,
stat_url, track_id, f'Downloading {format_id} JSON',
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
fatal=False)
if not stat:
@@ -225,7 +224,7 @@ class BandcampIE(InfoExtractor):
'acodec': format_id.split('-')[0],
})
title = '%s - %s' % (artist, track) if artist else track
title = f'{artist} - {track}' if artist else track
if not duration:
duration = float_or_none(self._html_search_meta(
@@ -267,7 +266,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'timestamp': 1311756226,
'upload_date': '20110727',
'uploader': 'Blazo',
}
},
},
{
'md5': '1a2c32e2691474643e912cc6cd4bffaa',
@@ -278,7 +277,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'timestamp': 1311757238,
'upload_date': '20110727',
'uploader': 'Blazo',
}
},
},
],
'info_dict': {
@@ -287,9 +286,9 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'uploader_id': 'blazo',
},
'params': {
'playlistend': 2
'playlistend': 2,
},
'skip': 'Bandcamp imposes download limits.'
'skip': 'Bandcamp imposes download limits.',
}, {
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
'info_dict': {
@@ -324,7 +323,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
def suitable(cls, url):
return (False
if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
else super(BandcampAlbumIE, cls).suitable(url))
else super().suitable(url))
def _real_extract(self, url):
uploader_id, album_id = self._match_valid_url(url).groups()
@@ -376,7 +375,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
},
}, {
'url': 'https://bandcamp.com/?blah/blah@&show=228',
'only_matching': True
'only_matching': True,
}]
def _real_extract(self, url):
@@ -407,7 +406,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
title = show.get('audio_title') or 'Bandcamp Weekly'
subtitle = show.get('subtitle')
if subtitle:
title += ' - %s' % subtitle
title += f' - {subtitle}'
return {
'id': show_id,
@@ -419,7 +418,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
'series': 'Bandcamp Weekly',
'episode': show.get('subtitle'),
'episode_id': show_id,
'formats': formats
'formats': formats,
}
@@ -440,7 +439,7 @@ class BandcampUserIE(InfoExtractor):
'url': 'http://dotscale.bandcamp.com',
'info_dict': {
'id': 'dotscale',
'title': 'Discography of dotscale'
'title': 'Discography of dotscale',
},
'playlist_count': 1,
}, {

View File

@@ -2,11 +2,11 @@ import json
from .common import InfoExtractor
from ..utils import (
try_get,
int_or_none,
url_or_none,
float_or_none,
int_or_none,
try_get,
unified_timestamp,
url_or_none,
)
@@ -23,7 +23,7 @@ class BannedVideoIE(InfoExtractor):
'description': 'md5:560d96f02abbebe6c6b78b47465f6b28',
'upload_date': '20200324',
'timestamp': 1585087895,
}
},
}]
_GRAPHQL_GETMETADATA_QUERY = '''
@@ -84,15 +84,15 @@ query GetCommentReplies($id: String!) {
'GetCommentReplies': _GRAPHQL_GETCOMMENTSREPLIES_QUERY,
}
def _call_api(self, video_id, id, operation, note):
def _call_api(self, video_id, id_var, operation, note):
return self._download_json(
'https://api.infowarsmedia.com/graphql', video_id, note=note,
headers={
'Content-Type': 'application/json; charset=utf-8'
'Content-Type': 'application/json; charset=utf-8',
}, data=json.dumps({
'variables': {'id': id},
'variables': {'id': id_var},
'operationName': operation,
'query': self._GRAPHQL_QUERIES[operation]
'query': self._GRAPHQL_QUERIES[operation],
}).encode('utf8')).get('data')
def _get_comments(self, video_id, comments, comment_data):
@@ -151,5 +151,5 @@ query GetCommentReplies($id: String!) {
'tags': [tag.get('name') for tag in video_info.get('tags')],
'availability': self._availability(is_unlisted=video_info.get('unlisted')),
'comments': comments,
'__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments'))
'__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments')),
}

View File

@@ -2,10 +2,10 @@ import functools
import itertools
import json
import re
import urllib.parse
import xml.etree.ElementTree
from .common import InfoExtractor
from ..compat import compat_str, compat_urlparse
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
@@ -35,7 +35,7 @@ class BBCCoUkIE(InfoExtractor):
IE_NAME = 'bbc.co.uk'
IE_DESC = 'BBC iPlayer'
_ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
_VALID_URL = r'''(?x)
_VALID_URL = rf'''(?x)
https?://
(?:www\.)?bbc\.co\.uk/
(?:
@@ -45,8 +45,8 @@ class BBCCoUkIE(InfoExtractor):
radio/player/|
events/[^/]+/play/[^/]+/
)
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
''' % _ID_REGEX
(?P<id>{_ID_REGEX})(?!/(?:episodes|broadcasts|clips))
'''
_EMBED_REGEX = [r'setPlaylist\("(?P<url>https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)']
_LOGIN_URL = 'https://account.bbc.com/signin'
@@ -75,7 +75,7 @@ class BBCCoUkIE(InfoExtractor):
'params': {
# rtmp download
'skip_download': True,
}
},
},
{
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
@@ -148,7 +148,7 @@ class BBCCoUkIE(InfoExtractor):
'params': {
# rtmp download
'skip_download': True,
}
},
}, {
'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
'note': 'Video',
@@ -162,7 +162,7 @@ class BBCCoUkIE(InfoExtractor):
'params': {
# rtmp download
'skip_download': True,
}
},
}, {
'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
'info_dict': {
@@ -268,19 +268,19 @@ class BBCCoUkIE(InfoExtractor):
error = clean_html(get_element_by_class('form-message', response))
if error:
raise ExtractorError(
'Unable to login: %s' % error, expected=True)
f'Unable to login: {error}', expected=True)
raise ExtractorError('Unable to log in')
class MediaSelectionError(Exception):
def __init__(self, id):
self.id = id
def __init__(self, error_id):
self.id = error_id
def _extract_asx_playlist(self, connection, programme_id):
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
def _extract_items(self, playlist):
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
return playlist.findall(f'./{{{self._EMP_PLAYLIST_NS}}}item')
def _extract_medias(self, media_selection):
error = media_selection.get('result')
@@ -312,7 +312,7 @@ class BBCCoUkIE(InfoExtractor):
def _raise_extractor_error(self, media_selection_error):
raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
f'{self.IE_NAME} returned error: {media_selection_error.id}',
expected=True)
def _download_media_selector(self, programme_id):
@@ -372,7 +372,7 @@ class BBCCoUkIE(InfoExtractor):
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
formats.append({
'url': ref,
'format_id': 'ref%s_%s' % (i, format_id),
'format_id': f'ref{i}_{format_id}',
})
elif transfer_format == 'dash':
formats.extend(self._extract_mpd_formats(
@@ -394,7 +394,7 @@ class BBCCoUkIE(InfoExtractor):
href, programme_id, f4m_id=format_id, fatal=False))
else:
if not supplier and bitrate:
format_id += '-%d' % bitrate
format_id += f'-{bitrate}'
fmt = {
'format_id': format_id,
'filesize': file_size,
@@ -423,9 +423,9 @@ class BBCCoUkIE(InfoExtractor):
identifier = connection.get('identifier')
server = connection.get('server')
fmt.update({
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
'url': f'{protocol}://{server}/{application}?{auth_string}',
'play_path': identifier,
'app': '%s?%s' % (application, auth_string),
'app': f'{application}?{auth_string}',
'page_url': 'http://www.bbc.co.uk',
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
'rtmp_live': False,
@@ -441,7 +441,7 @@ class BBCCoUkIE(InfoExtractor):
def _download_playlist(self, playlist_id):
try:
playlist = self._download_json(
'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
f'http://www.bbc.co.uk/programmes/{playlist_id}/playlist.json',
playlist_id, 'Downloading playlist JSON')
formats = []
subtitles = {}
@@ -480,32 +480,32 @@ class BBCCoUkIE(InfoExtractor):
def _process_legacy_playlist(self, playlist_id):
return self._process_legacy_playlist_url(
'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
f'http://www.bbc.co.uk/iplayer/playlist/{playlist_id}', playlist_id)
def _download_legacy_playlist_url(self, url, playlist_id=None):
return self._download_xml(
url, playlist_id, 'Downloading legacy playlist XML')
def _extract_from_legacy_playlist(self, playlist, playlist_id):
no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
no_items = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}noItems')
if no_items is not None:
reason = no_items.get('reason')
if reason == 'preAvailability':
msg = 'Episode %s is not yet available' % playlist_id
msg = f'Episode {playlist_id} is not yet available'
elif reason == 'postAvailability':
msg = 'Episode %s is no longer available' % playlist_id
msg = f'Episode {playlist_id} is no longer available'
elif reason == 'noMedia':
msg = 'Episode %s is not currently available' % playlist_id
msg = f'Episode {playlist_id} is not currently available'
else:
msg = 'Episode %s is not available: %s' % (playlist_id, reason)
msg = f'Episode {playlist_id} is not available: {reason}'
raise ExtractorError(msg, expected=True)
for item in self._extract_items(playlist):
kind = item.get('kind')
if kind not in ('programme', 'radioProgramme'):
continue
title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
title = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}title').text
description_el = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}summary')
description = description_el.text if description_el is not None else None
def get_programme_id(item):
@@ -515,7 +515,7 @@ class BBCCoUkIE(InfoExtractor):
if value and re.match(r'^[pb][\da-z]{7}$', value):
return value
get_from_attributes(item)
mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
mediator = item.find(f'./{{{self._EMP_PLAYLIST_NS}}}mediator')
if mediator is not None:
return get_from_attributes(mediator)
@@ -555,7 +555,7 @@ class BBCCoUkIE(InfoExtractor):
if not programme_id:
programme_id = self._search_regex(
r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None)
rf'"vpid"\s*:\s*"({self._ID_REGEX})"', webpage, 'vpid', fatal=False, default=None)
if programme_id:
formats, subtitles = self._download_media_selector(programme_id)
@@ -602,7 +602,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'url': 'http://www.bbc.com/news/world-europe-32668511',
'info_dict': {
'id': 'world-europe-32668511',
'title': 'Russia stages massive WW2 parade',
'title': 'Russia stages massive WW2 parade despite Western boycott',
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
},
'playlist_count': 2,
@@ -623,6 +623,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'info_dict': {
'id': '3662a707-0af9-3149-963f-47bea720b460',
'title': 'BUGGER',
'description': r're:BUGGER The recent revelations by the whistleblower Edward Snowden were fascinating. .{211}\.{3}$',
},
'playlist_count': 18,
}, {
@@ -631,16 +632,16 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'info_dict': {
'id': 'p02mprgb',
'ext': 'mp4',
'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
'description': 'md5:2868290467291b37feda7863f7a83f54',
'title': 'Germanwings crash site aerial video',
'description': r're:(?s)Aerial video showed the site where the Germanwings flight 4U 9525, .{156} BFM TV\.$',
'duration': 47,
'timestamp': 1427219242,
'upload_date': '20150324',
'thumbnail': 'https://ichef.bbci.co.uk/news/1024/media/images/81879000/jpg/_81879090_81879089.jpg',
},
'params': {
# rtmp download
'skip_download': True,
}
},
}, {
# article with single video embedded with data-playable containing XML playlist
# with direct video links as progressiveDownloadUrl (for now these are extracted)
@@ -656,21 +657,24 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
},
'params': {
'skip_download': True,
}
},
'skip': 'now SIMORGH_DATA with no video',
}, {
# single video embedded with data-playable containing XML playlists (regional section)
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
'info_dict': {
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
'id': '39275083',
'display_id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
'ext': 'mp4',
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
'description': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
'timestamp': 1434713142,
'upload_date': '20150619',
'thumbnail': 'https://a.files.bbci.co.uk/worldservice/live/assets/images/2015/06/19/150619132146_honduras_hsopitales_militares_640x360_aptn_nocredit.jpg',
},
'params': {
'skip_download': True,
}
},
}, {
# single video from video playlist embedded with vxp-playlist-data JSON
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
@@ -683,22 +687,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
},
'params': {
'skip_download': True,
}
},
'skip': '404 Not Found',
}, {
# single video story with digitalData
# single video story with __PWA_PRELOADED_STATE__
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
'info_dict': {
'id': 'p02q6gc4',
'ext': 'flv',
'title': 'Sri Lankas spicy secret',
'description': 'As a new train line to Jaffna opens up the countrys north, travellers can experience a truly distinct slice of Tamil culture.',
'timestamp': 1437674293,
'upload_date': '20150723',
'ext': 'mp4',
'title': 'Tasting the spice of life in Jaffna',
'description': r're:(?s)BBC Travel Shows Henry Golding explores the city of Jaffna .{151} aftertaste\.$',
'timestamp': 1646058397,
'upload_date': '20220228',
'duration': 255,
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1920xn/p02vxvkn.jpg',
},
'params': {
# rtmp download
'skip_download': True,
}
}, {
# single video story without digitalData
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
@@ -710,12 +713,10 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'timestamp': 1415867444,
'upload_date': '20141113',
},
'params': {
# rtmp download
'skip_download': True,
}
'skip': 'redirects to TopGear home page',
}, {
# single video embedded with Morph
# TODO: replacement test page
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
'info_dict': {
'id': 'p041vhd0',
@@ -726,27 +727,22 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'uploader': 'BBC Sport',
'uploader_id': 'bbc_sport',
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'Georestricted to UK',
'skip': 'Video no longer in page',
}, {
# single video with playlist.sxml URL in playlist param
# single video in __INITIAL_DATA__
'url': 'http://www.bbc.com/sport/0/football/33653409',
'info_dict': {
'id': 'p02xycnp',
'ext': 'mp4',
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
'title': 'Ronaldo to Man Utd, Arsenal to spend?',
'description': r're:(?s)BBC Sport\'s David Ornstein rounds up the latest transfer reports, .{359} here\.$',
'timestamp': 1437750175,
'upload_date': '20150724',
'thumbnail': r're:https?://.+/.+media/images/69320000/png/_69320754_mmgossipcolumnextraaugust18.png',
'duration': 140,
},
'params': {
# rtmp download
'skip_download': True,
}
}, {
# article with multiple videos embedded with playlist.sxml in playlist param
# article with multiple videos embedded with Morph.setPayload
'url': 'http://www.bbc.com/sport/0/football/34475836',
'info_dict': {
'id': '34475836',
@@ -754,6 +750,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
},
'playlist_count': 3,
}, {
# Testing noplaylist
'url': 'http://www.bbc.com/sport/0/football/34475836',
'info_dict': {
'id': 'p034ppnv',
'ext': 'mp4',
'title': 'All you need to know about Jurgen Klopp',
'timestamp': 1444335081,
'upload_date': '20151008',
'duration': 122.0,
'thumbnail': 'https://ichef.bbci.co.uk/onesport/cps/976/cpsprodpb/7542/production/_85981003_klopp.jpg',
},
'params': {
'noplaylist': True,
},
}, {
# school report article with single video
'url': 'http://www.bbc.co.uk/schoolreport/35744779',
@@ -762,6 +773,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'title': 'School which breaks down barriers in Jerusalem',
},
'playlist_count': 1,
'skip': 'redirects to Young Reporter home page https://www.bbc.co.uk/news/topics/cg41ylwv43pt',
}, {
# single video with playlist URL from weather section
'url': 'http://www.bbc.com/weather/features/33601775',
@@ -778,18 +790,33 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'thumbnail': r're:https?://.+/.+\.jpg',
'timestamp': 1437785037,
'upload_date': '20150725',
'duration': 105,
},
}, {
# video with window.__INITIAL_DATA__ and value as JSON string
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
'info_dict': {
'id': 'p0b71qth',
'id': 'p0b779gc',
'ext': 'mp4',
'title': 'Why France is making this woman a national hero',
'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
'description': r're:(?s)France is honouring the US-born 20th Century singer and activist Josephine .{208} Second World War.',
'thumbnail': r're:https?://.+/.+\.jpg',
'timestamp': 1638230731,
'upload_date': '20211130',
'timestamp': 1638215626,
'upload_date': '20211129',
'duration': 125,
},
}, {
# video with script id __NEXT_DATA__ and value as JSON string
'url': 'https://www.bbc.com/news/uk-68546268',
'info_dict': {
'id': 'p0hj0lq7',
'ext': 'mp4',
'title': 'Nasser Hospital doctor describes his treatment by IDF',
'description': r're:(?s)Doctor Abu Sabha said he was detained by Israeli forces after .{276} hostages\."$',
'thumbnail': r're:https?://.+/.+\.jpg',
'timestamp': 1710188248,
'upload_date': '20240311',
'duration': 104,
},
}, {
# single video article embedded with data-media-vpid
@@ -817,6 +844,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'uploader': 'Radio 3',
'uploader_id': 'bbc_radio_three',
},
'skip': '404 Not Found',
}, {
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
'info_dict': {
@@ -824,6 +852,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'ext': 'mp4',
'title': 'md5:2fabf12a726603193a2879a055f72514',
'description': 'Learn English words and phrases from this story',
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1200x675/p06pq9gk.jpg',
},
'add_ie': [BBCCoUkIE.ie_key()],
}, {
@@ -832,27 +861,29 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'info_dict': {
'id': 'p07c6sb9',
'ext': 'mp4',
'title': 'How positive thinking is harming your happiness',
'alt_title': 'The downsides of positive thinking',
'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
'title': 'The downsides of positive thinking',
'description': 'The downsides of positive thinking',
'duration': 235,
'thumbnail': r're:https?://.+/p07c9dsr.jpg',
'upload_date': '20190604',
'categories': ['Psychology'],
'thumbnail': r're:https?://.+/p07c9dsr\.(?:jpg|webp|png)',
'upload_date': '20220223',
'timestamp': 1645632746,
},
}, {
# BBC Sounds
'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
'url': 'https://www.bbc.co.uk/sounds/play/w3ct5rgx',
'info_dict': {
'id': 'm001q789',
'id': 'p0hrw4nr',
'ext': 'mp4',
'title': 'The Night Tracks Mix - Music for the darkling hour',
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
'chapters': 'count:8',
'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
'uploader': 'Radio 3',
'duration': 1800,
'uploader_id': 'bbc_radio_three',
'title': 'Are our coastlines being washed away?',
'description': r're:(?s)Around the world, coastlines are constantly changing .{2000,} Images\)$',
'timestamp': 1713556800,
'upload_date': '20240419',
'duration': 1588,
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0hrnxbl.jpg',
'uploader': 'World Service',
'uploader_id': 'bbc_world_service',
'series': 'CrowdScience',
'chapters': [],
},
}, { # onion routes
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
@@ -866,7 +897,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
def suitable(cls, url):
EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
else super(BBCIE, cls).suitable(url))
else super().suitable(url))
def _extract_from_media_meta(self, media_meta, video_id):
# Direct links to media in media metadata (e.g.
@@ -978,7 +1009,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
if playlist:
entry = None
for key in ('streaming', 'progressiveDownload'):
playlist_url = playlist.get('%sUrl' % key)
playlist_url = playlist.get(f'{key}Url')
if not playlist_url:
continue
try:
@@ -1004,18 +1035,17 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
# http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
group_id = self._search_regex(
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
rf'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\']({self._ID_REGEX})',
webpage, 'group id', default=None)
if group_id:
return self.url_result(
'https://www.bbc.co.uk/programmes/%s' % group_id,
ie=BBCCoUkIE.ie_key())
f'https://www.bbc.co.uk/programmes/{group_id}', BBCCoUkIE)
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
programme_id = self._search_regex(
[r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
[rf'data-(?:video-player|media)-vpid="({self._ID_REGEX})"',
rf'<param[^>]+name="externalIdentifier"[^>]+value="({self._ID_REGEX})"',
rf'videoId\s*:\s*["\']({self._ID_REGEX})["\']'],
webpage, 'vpid', default=None)
if programme_id:
@@ -1069,83 +1099,133 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
}
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
# There are several setPayload calls may be present but the video
# seems to be always related to the first one
morph_payload = self._parse_json(
self._search_regex(
r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
webpage, 'morph payload', default='{}'),
playlist_id, fatal=False)
# Several setPayload calls may be present but the video(s)
# should be in one that mentions leadMedia or videoData
morph_payload = self._search_json(
r'\bMorph\s*\.\s*setPayload\s*\([^,]+,', webpage, 'morph payload', playlist_id,
contains_pattern=r'{(?s:(?:(?!</script>).)+(?:"leadMedia"|\\"videoData\\")\s*:.+)}',
default={})
if morph_payload:
components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
for component in components:
if not isinstance(component, dict):
continue
lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
if not lead_media:
continue
identifiers = lead_media.get('identifiers')
if not identifiers or not isinstance(identifiers, dict):
continue
programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
for lead_media in traverse_obj(morph_payload, (
'body', 'components', ..., 'props', 'leadMedia', {dict})):
programme_id = traverse_obj(lead_media, ('identifiers', ('vpid', 'playablePid'), {str}, any))
if not programme_id:
continue
title = lead_media.get('title') or self._og_search_title(webpage)
formats, subtitles = self._download_media_selector(programme_id)
description = lead_media.get('summary')
uploader = lead_media.get('masterBrand')
uploader_id = lead_media.get('mid')
duration = None
duration_d = lead_media.get('duration')
if isinstance(duration_d, dict):
duration = parse_duration(dict_get(
duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
return {
'id': programme_id,
'title': title,
'description': description,
'duration': duration,
'uploader': uploader,
'uploader_id': uploader_id,
'title': lead_media.get('title') or self._og_search_title(webpage),
**traverse_obj(lead_media, {
'description': ('summary', {str}),
'duration': ('duration', ('rawDuration', 'formattedDuration', 'spokenDuration'), {parse_duration}),
'uploader': ('masterBrand', {str}),
'uploader_id': ('mid', {str}),
}),
'formats': formats,
'subtitles': subtitles,
}
body = self._parse_json(traverse_obj(morph_payload, (
'body', 'content', 'article', 'body')), playlist_id, fatal=False)
for video_data in traverse_obj(body, (lambda _, v: v['videoData']['pid'], 'videoData')):
if video_data.get('vpid'):
video_id = video_data['vpid']
formats, subtitles = self._download_media_selector(video_id)
entry = {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
}
else:
video_id = video_data['pid']
entry = self.url_result(
f'https://www.bbc.co.uk/programmes/{video_id}', BBCCoUkIE,
video_id, url_transparent=True)
entry.update({
'timestamp': traverse_obj(morph_payload, (
'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601}),
),
**traverse_obj(video_data, {
'thumbnail': (('iChefImage', 'image'), {url_or_none}, any),
'title': (('title', 'caption'), {str}, any),
'duration': ('duration', {parse_duration}),
}),
})
if video_data.get('isLead') and not self._yes_playlist(playlist_id, video_id):
return entry
entries.append(entry)
if entries:
playlist_title = traverse_obj(morph_payload, (
'body', 'content', 'article', 'headline', {str})) or playlist_title
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
preload_state = self._parse_json(self._search_regex(
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
'preload state', default='{}'), playlist_id, fatal=False)
if preload_state:
current_programme = preload_state.get('programmes', {}).get('current') or {}
programme_id = current_programme.get('id')
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
formats, subtitles = self._download_media_selector(programme_id)
synopses = current_programme.get('synopses') or {}
network = current_programme.get('network') or {}
duration = int_or_none(
current_programme.get('duration', {}).get('value'))
thumbnail = None
image_url = current_programme.get('image_url')
if image_url:
thumbnail = image_url.replace('{recipe}', 'raw')
# various PRELOADED_STATE JSON
preload_state = self._search_json(
r'window\.__(?:PWA_)?PRELOADED_STATE__\s*=', webpage,
'preload state', playlist_id, transform_source=js_to_json, default={})
# PRELOADED_STATE with current programmme
current_programme = traverse_obj(preload_state, ('programmes', 'current', {dict}))
programme_id = traverse_obj(current_programme, ('id', {str}))
if programme_id and current_programme.get('type') == 'playable_item':
title = traverse_obj(current_programme, ('titles', ('tertiary', 'secondary'), {str}, any)) or playlist_title
formats, subtitles = self._download_media_selector(programme_id)
return {
'id': programme_id,
'title': title,
'formats': formats,
**traverse_obj(current_programme, {
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
'thumbnail': ('image_url', {lambda u: url_or_none(u.replace('{recipe}', 'raw'))}),
'duration': ('duration', 'value', {int_or_none}),
'uploader': ('network', 'short_title', {str}),
'uploader_id': ('network', 'id', {str}),
'timestamp': ((('availability', 'from'), ('release', 'date')), {parse_iso8601}, any),
'series': ('titles', 'primary', {str}),
}),
'subtitles': subtitles,
'chapters': traverse_obj(preload_state, (
'tracklist', 'tracks', lambda _, v: float(v['offset']['start']), {
'title': ('titles', {lambda x: join_nonempty(
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
'start_time': ('offset', 'start', {float_or_none}),
'end_time': ('offset', 'end', {float_or_none}),
}),
),
}
# PWA_PRELOADED_STATE with article video asset
asset_id = traverse_obj(preload_state, (
'entities', 'articles', lambda k, _: k.rsplit('/', 1)[-1] == playlist_id,
'assetVideo', 0, {str}, any))
if asset_id:
video_id = traverse_obj(preload_state, ('entities', 'videos', asset_id, 'vpid', {str}))
if video_id:
article = traverse_obj(preload_state, (
'entities', 'articles', lambda _, v: v['assetVideo'][0] == asset_id, any))
def image_url(image_id):
return traverse_obj(preload_state, (
'entities', 'images', image_id, 'url',
{lambda u: url_or_none(u.replace('$recipe', 'raw'))}))
formats, subtitles = self._download_media_selector(video_id)
return {
'id': programme_id,
'title': title,
'description': dict_get(synopses, ('long', 'medium', 'short')),
'thumbnail': thumbnail,
'duration': duration,
'uploader': network.get('short_title'),
'uploader_id': network.get('id'),
'id': video_id,
**traverse_obj(preload_state, ('entities', 'videos', asset_id, {
'title': ('title', {str}),
'description': (('synopsisLong', 'synopsisMedium', 'synopsisShort'), {str}, any),
'thumbnail': (0, {image_url}),
'duration': ('duration', {int_or_none}),
})),
'formats': formats,
'subtitles': subtitles,
'chapters': traverse_obj(preload_state, (
'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
'title': ('titles', {lambda x: join_nonempty(
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
'start_time': ('offset', 'start', {float_or_none}),
'end_time': ('offset', 'end', {float_or_none}),
})) or None,
'timestamp': traverse_obj(article, ('displayDate', {parse_iso8601})),
}
else:
return self.url_result(
f'https://www.bbc.co.uk/programmes/{asset_id}', BBCCoUkIE,
asset_id, playlist_title, display_id=playlist_id,
description=playlist_description)
bbc3_config = self._parse_json(
self._search_regex(
@@ -1191,6 +1271,28 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
def parse_model(model):
"""Extract single video from model structure"""
item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
if not item_id:
return
formats, subtitles = self._download_media_selector(item_id)
return {
'id': item_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(model, {
'title': ('title', {str}),
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
'duration': ('versions', 0, 'duration', {int}),
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
}),
}
def is_type(*types):
return lambda _, v: v['type'] in types
initial_data = self._search_regex(
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
'quoted preload state', default=None)
@@ -1202,6 +1304,19 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
if initial_data:
for video_data in traverse_obj(initial_data, (
'stores', 'article', 'articleBodyContent', is_type('video'))):
model = traverse_obj(video_data, (
'model', 'blocks', is_type('aresMedia'),
'model', 'blocks', is_type('aresMediaMetadata'),
'model', {dict}, any))
entry = parse_model(model)
if entry:
entries.append(entry)
if entries:
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
def parse_media(media):
if not media:
return
@@ -1216,7 +1331,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
if blocks:
summary = []
for block in blocks:
text = try_get(block, lambda x: x['model']['text'], compat_str)
text = try_get(block, lambda x: x['model']['text'], str)
if text:
summary.append(text)
if summary:
@@ -1234,37 +1349,100 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'subtitles': subtitles,
'timestamp': item_time,
'description': strip_or_none(item_desc),
'duration': int_or_none(item.get('duration')),
})
for resp in (initial_data.get('data') or {}).values():
name = resp.get('name')
for resp in traverse_obj(initial_data, ('data', lambda _, v: v['name'])):
name = resp['name']
if name == 'media-experience':
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
elif name == 'article':
for block in (try_get(resp,
(lambda x: x['data']['blocks'],
lambda x: x['data']['content']['model']['blocks'],),
list) or []):
if block.get('type') not in ['media', 'video']:
continue
parse_media(block.get('model'))
for block in traverse_obj(resp, (
'data', (None, ('content', 'model')), 'blocks',
is_type('media', 'video'), 'model', {dict})):
parse_media(block)
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
# extract from SIMORGH_DATA hydration JSON
simorgh_data = self._search_json(
r'window\s*\.\s*SIMORGH_DATA\s*=', webpage,
'simorgh data', playlist_id, default={})
if simorgh_data:
done = False
for video_data in traverse_obj(simorgh_data, (
'pageData', 'content', 'model', 'blocks', is_type('video', 'legacyMedia'))):
model = traverse_obj(video_data, (
'model', 'blocks', is_type('aresMedia'),
'model', 'blocks', is_type('aresMediaMetadata'),
'model', {dict}, any))
if video_data['type'] == 'video':
entry = parse_model(model)
else: # legacyMedia: no duration, subtitles
block_id, entry = traverse_obj(model, ('blockId', {str})), None
media_data = traverse_obj(simorgh_data, (
'pageData', 'promo', 'media',
{lambda x: x if x['id'] == block_id else None}))
formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
'url': ('url', {url_or_none}),
'ext': ('format', {str}),
'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
}))
if formats:
entry = {
'id': block_id,
'display_id': playlist_id,
'formats': formats,
'description': traverse_obj(simorgh_data, ('pageData', 'promo', 'summary', {str})),
**traverse_obj(model, {
'title': ('title', {str}),
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
}),
}
done = True
if entry:
entries.append(entry)
if done:
break
if entries:
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
def extract_all(pattern):
return list(filter(None, map(
lambda s: self._parse_json(s, playlist_id, fatal=False),
re.findall(pattern, webpage))))
return list(filter(None, (
self._parse_json(s, playlist_id, fatal=False)
for s in re.findall(pattern, webpage))))
# US accessed article with single embedded video (e.g.
# https://www.bbc.com/news/uk-68546268)
next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}),
('props', 'pageProps', 'page'))
model = traverse_obj(next_data, (
..., 'contents', is_type('video'),
'model', 'blocks', is_type('media'),
'model', 'blocks', is_type('mediaMetadata'),
'model', {dict}, any))
if model and (entry := parse_model(model)):
if not entry.get('timestamp'):
entry['timestamp'] = traverse_obj(next_data, (
..., 'contents', is_type('timestamp'), 'model',
'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
entries.append(entry)
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
# Multiple video article (e.g.
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
EMBED_URL = rf'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+{self._ID_REGEX}(?:\b[^"]+)?'
entries = []
for match in extract_all(r'new\s+SMP\(({.+?})\)'):
embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
if embed_url and re.match(EMBED_URL, embed_url):
entries.append(embed_url)
entries.extend(re.findall(
r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
rf'setPlaylist\("({EMBED_URL})"\)', webpage))
if entries:
return self.playlist_result(
[self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
@@ -1314,11 +1492,11 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
video_id = media_meta.get('externalId')
if not video_id:
video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
video_id = playlist_id if len(medias) == 1 else f'{playlist_id}-{num}'
title = media_meta.get('caption')
if not title:
title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
title = playlist_title if len(medias) == 1 else f'{playlist_title} - Video {num}'
duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
@@ -1379,8 +1557,8 @@ class BBCCoUkArticleIE(InfoExtractor):
class BBCCoUkPlaylistBaseIE(InfoExtractor):
def _entries(self, webpage, url, playlist_id):
single_page = 'page' in compat_urlparse.parse_qs(
compat_urlparse.urlparse(url).query)
single_page = 'page' in urllib.parse.parse_qs(
urllib.parse.urlparse(url).query)
for page_num in itertools.count(2):
for video_id in re.findall(
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
@@ -1394,8 +1572,8 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor):
if not next_page:
break
webpage = self._download_webpage(
compat_urlparse.urljoin(url, next_page), playlist_id,
'Downloading page %d' % page_num, page_num)
urllib.parse.urljoin(url, next_page), playlist_id,
f'Downloading page {page_num}', page_num)
def _real_extract(self, url):
playlist_id = self._match_id(url)
@@ -1410,7 +1588,7 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor):
class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
_VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
_VALID_URL_TMPL = rf'https?://(?:www\.)?bbc\.co\.uk/iplayer/%s/(?P<id>{BBCCoUkIE._ID_REGEX})'
@staticmethod
def _get_default(episode, key, default_key='default'):
@@ -1534,11 +1712,11 @@ class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
variables['sliceId'] = series_id
return self._download_json(
'https://graph.ibl.api.bbc.co.uk/', pid, headers={
'Content-Type': 'application/json'
'Content-Type': 'application/json',
}, data=json.dumps({
'id': '5692d93d5aac8d796a0305e895e61551',
'variables': variables,
}).encode('utf-8'))['data']['programme']
}).encode())['data']['programme']
@staticmethod
def _get_playlist_data(data):
@@ -1598,7 +1776,7 @@ class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
def _call_api(self, pid, per_page, page=1, series_id=None):
return self._download_json(
'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid,
f'http://ibl.api.bbc.co.uk/ibl/v1/groups/{pid}/episodes',
pid, query={
'page': page,
'per_page': per_page,
@@ -1614,7 +1792,7 @@ class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
IE_NAME = 'bbc.co.uk:playlist'
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
_VALID_URL = rf'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>{BBCCoUkIE._ID_REGEX})/(?:episodes|broadcasts|clips)'
_URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
_VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
_TESTS = [{

View File

@@ -1,7 +1,6 @@
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import int_or_none
@@ -33,7 +32,7 @@ class BeatportIE(InfoExtractor):
'display_id': 'birds-original-mix',
'ext': 'mp4',
'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
}
},
}]
def _real_extract(self, url):
@@ -51,7 +50,7 @@ class BeatportIE(InfoExtractor):
track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
title = ', '.join(a['name'] for a in track['artists']) + ' - ' + track['name']
if track['mix']:
title += ' (' + track['mix'] + ')'
@@ -89,7 +88,7 @@ class BeatportIE(InfoExtractor):
images.append(image)
return {
'id': compat_str(track.get('id')) or track_id,
'id': str(track.get('id')) or track_id,
'display_id': track.get('slug') or display_id,
'title': title,
'formats': formats,

View File

@@ -1,5 +1,4 @@
from .common import InfoExtractor
from ..utils import (
int_or_none,
str_or_none,
@@ -24,7 +23,7 @@ class BeegIE(InfoExtractor):
'upload_date': '20220131',
'timestamp': 1643656455,
'display_id': '2540839',
}
},
}, {
'url': 'https://beeg.com/-0599050563103750?t=4-861',
'md5': 'bd8b5ea75134f7f07fad63008db2060e',
@@ -39,7 +38,7 @@ class BeegIE(InfoExtractor):
'timestamp': 1643623200,
'display_id': '2569965',
'upload_date': '20220131',
}
},
}, {
# api/v6 v2
'url': 'https://beeg.com/1941093077?t=911-1391',
@@ -56,8 +55,8 @@ class BeegIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video = self._download_json(
'https://store.externulls.com/facts/file/%s' % video_id,
video_id, 'Downloading JSON for %s' % video_id)
f'https://store.externulls.com/facts/file/{video_id}',
video_id, f'Downloading JSON for {video_id}')
fc_facts = video.get('fc_facts')
first_fact = {}

View File

@@ -16,7 +16,7 @@ class BehindKinkIE(InfoExtractor):
'upload_date': '20141205',
'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
'age_limit': 18,
}
},
}
def _real_extract(self, url):

View File

@@ -86,6 +86,6 @@ class BellMediaIE(InfoExtractor):
return {
'_type': 'url_transparent',
'id': video_id,
'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id),
'url': f'9c9media:{self._DOMAINS.get(domain, domain)}_web:{video_id}',
'ie_key': 'NineCNineMedia',
}

View File

@@ -16,7 +16,7 @@ class BerufeTVIE(InfoExtractor):
'tags': ['Studienfilm'],
'duration': 602.440,
'thumbnail': r're:^https://asset-out-cdn\.video-cdn\.net/private/videos/DvKC3DUpMKvUZ_6fEnfg3u/thumbnails/793063\?quality=thumbnail&__token__=[^\s]+$',
}
},
}]
def _real_extract(self, url):
@@ -54,7 +54,7 @@ class BerufeTVIE(InfoExtractor):
subtitles.setdefault(track['language'], []).append({
'url': track['source'],
'name': track.get('label'),
'ext': 'vtt'
'ext': 'vtt',
})
return {

View File

@@ -19,7 +19,7 @@ class BetIE(MTVServicesInfoExtractor):
'thumbnail': r're:(?i)^https?://.*\.jpg$',
'subtitles': {
'en': 'mincount:2',
}
},
},
'params': {
# rtmp download
@@ -39,16 +39,16 @@ class BetIE(MTVServicesInfoExtractor):
'thumbnail': r're:(?i)^https?://.*\.jpg$',
'subtitles': {
'en': 'mincount:2',
}
},
},
'params': {
# rtmp download
'skip_download': True,
},
}
},
]
_FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/bet-mrss-player'
def _get_feed_query(self, uri):
return {

View File

@@ -98,8 +98,8 @@ class BFMTVArticleIE(BFMTVBaseIE):
'timestamp': 1673341692,
'duration': 109.269,
'tags': ['rmc', 'show', 'apolline de malherbe', 'info', 'talk', 'matinale', 'radio'],
'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg'
}
'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg',
},
}]
def _real_extract(self, url):

View File

@@ -1,10 +1,8 @@
import base64
import re
import urllib.parse
from .common import InfoExtractor
from ..compat import (
compat_b64decode,
compat_urllib_parse_unquote,
)
class BigflixIE(InfoExtractor):
@@ -21,7 +19,7 @@ class BigflixIE(InfoExtractor):
},
'params': {
'skip_download': True,
}
},
}, {
# multiple formats
'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',
@@ -38,7 +36,7 @@ class BigflixIE(InfoExtractor):
webpage, 'title')
def decode_url(quoted_b64_url):
return compat_b64decode(compat_urllib_parse_unquote(
return base64.b64decode(urllib.parse.unquote(
quoted_b64_url)).decode('utf-8')
formats = []
@@ -47,7 +45,7 @@ class BigflixIE(InfoExtractor):
video_url = decode_url(encoded_url)
f = {
'url': video_url,
'format_id': '%sp' % height,
'format_id': f'{height}p',
'height': int(height),
}
if video_url.startswith('rtmp'):
@@ -69,5 +67,5 @@ class BigflixIE(InfoExtractor):
'id': video_id,
'title': title,
'description': description,
'formats': formats
'formats': formats,
}

View File

@@ -36,7 +36,7 @@ class BigoIE(InfoExtractor):
raise ExtractorError('Received invalid JSON data')
if info_raw.get('code'):
raise ExtractorError(
'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True)
'Bigo says: {} (code {})'.format(info_raw.get('msg'), info_raw.get('code')), expected=True)
info = info_raw.get('data') or {}
if not info.get('alive'):

View File

@@ -20,7 +20,7 @@ class BildIE(InfoExtractor):
'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 196,
}
},
}, {
'note': 'static MP4 and HLS',
'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html',
@@ -32,7 +32,7 @@ class BildIE(InfoExtractor):
'description': 'md5:709b543c24dc31bbbffee73bccda34ad',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 69,
}
},
}]
def _real_extract(self, url):

View File

@@ -93,11 +93,11 @@ class BilibiliBaseIE(InfoExtractor):
return formats
def _download_playinfo(self, video_id, cid):
def _download_playinfo(self, video_id, cid, headers=None):
return self._download_json(
'https://api.bilibili.com/x/player/playurl', video_id,
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
note=f'Downloading video formats for cid {cid}')['data']
note=f'Downloading video formats for cid {cid}', headers=headers)['data']
def json2srt(self, json_data):
srt_data = ''
@@ -112,7 +112,7 @@ class BilibiliBaseIE(InfoExtractor):
'danmaku': [{
'ext': 'xml',
'url': f'https://comment.bilibili.com/{cid}.xml',
}]
}],
}
subtitle_info = traverse_obj(self._download_json(
@@ -126,7 +126,7 @@ class BilibiliBaseIE(InfoExtractor):
for s in subs_list:
subtitles.setdefault(s['lan'], []).append({
'ext': 'srt',
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
})
return subtitles
@@ -215,7 +215,7 @@ class BilibiliBaseIE(InfoExtractor):
yield {
**metainfo,
'id': f'{video_id}_{cid}',
'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}',
'formats': self.extract_formats(play_info),
'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
'duration': float_or_none(play_info.get('timelength'), scale=1000),
@@ -269,7 +269,7 @@ class BiliBiliIE(BilibiliBaseIE):
'url': 'https://www.bilibili.com/video/BV1bK411W797',
'info_dict': {
'id': 'BV1bK411W797',
'title': '物语中的人物是如何吐槽自己的OP的'
'title': '物语中的人物是如何吐槽自己的OP的',
},
'playlist_count': 18,
'playlist': [{
@@ -288,8 +288,8 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
'duration': 90.314,
}
}]
},
}],
}, {
'note': 'Specific page of Anthology',
'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
@@ -308,7 +308,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
'duration': 90.314,
}
},
}, {
'note': 'video has subtitles',
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
@@ -327,7 +327,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'subtitles': 'count:2'
'subtitles': 'count:2',
},
'params': {'listsubtitles': True},
}, {
@@ -493,7 +493,8 @@ class BiliBiliIE(BilibiliBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage, urlh = self._download_webpage_handle(url, video_id)
headers = self.geo_verification_headers()
webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
if not self._match_valid_url(urlh.url):
return self.url_result(urlh.url)
@@ -531,7 +532,7 @@ class BiliBiliIE(BilibiliBaseIE):
self._download_json(
'https://api.bilibili.com/x/player/pagelist', video_id,
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
note='Extracting videos in anthology'),
note='Extracting videos in anthology', headers=headers),
'data', expected_type=list) or []
is_anthology = len(page_list_json) > 1
@@ -552,7 +553,7 @@ class BiliBiliIE(BilibiliBaseIE):
festival_info = {}
if is_festival:
play_info = self._download_playinfo(video_id, cid)
play_info = self._download_playinfo(video_id, cid, headers=headers)
festival_info = traverse_obj(initial_state, {
'uploader': ('videoInfo', 'upName'),
@@ -585,10 +586,9 @@ class BiliBiliIE(BilibiliBaseIE):
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
if is_interactive:
return self.playlist_result(
self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{
'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
'__post_extractor': self.extract_comments(aid),
})
self._get_interactive_entries(video_id, cid, metainfo), **metainfo,
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
__post_extractor=self.extract_comments(aid))
else:
return {
**metainfo,
@@ -639,7 +639,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
'duration': 1425.256,
'timestamp': 1554566400,
'upload_date': '20190406',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
'skip': 'Geo-restricted',
}, {
@@ -660,20 +660,21 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
'duration': 1922.129,
'timestamp': 1602853860,
'upload_date': '20201016',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}]
def _real_extract(self, url):
episode_id = self._match_id(url)
webpage = self._download_webpage(url, episode_id)
headers = self.geo_verification_headers()
webpage = self._download_webpage(url, episode_id, headers=headers)
if '您所在的地区无法观看本片' in webpage:
raise GeoRestrictedError('This video is restricted')
elif '正在观看预览,大会员免费看全片' in webpage:
self.raise_login_required('This video is for premium members only')
headers = {'Referer': url, **self.geo_verification_headers()}
headers['Referer'] = url
play_info = self._download_json(
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
@@ -724,7 +725,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
'__post_extractor': self.extract_comments(aid),
'http_headers': headers,
'http_headers': {'Referer': url},
}
@@ -762,7 +763,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE):
'duration': 1525.777,
'timestamp': 1425074413,
'upload_date': '20150227',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}],
}]
@@ -792,7 +793,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
'title': '鬼灭之刃',
'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
},
'playlist_mincount': 26
'playlist_mincount': 26,
}, {
'url': 'https://www.bilibili.com/bangumi/play/ss2251',
'info_dict': {
@@ -817,7 +818,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
'duration': 1436.992,
'timestamp': 1343185080,
'upload_date': '20120725',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}],
}]
@@ -904,7 +905,7 @@ class BilibiliCheeseIE(BilibiliCheeseBaseIE):
'upload_date': '20230924',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'view_count': int,
}
},
}]
def _real_extract(self, url):
@@ -937,7 +938,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
'upload_date': '20230924',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'view_count': int,
}
},
}],
'params': {'playlist_items': '1'},
}, {
@@ -1010,7 +1011,7 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
for position in (
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
57, 62, 11, 36, 20, 34, 44, 52
57, 62, 11, 36, 20, 34, 44, 52,
):
char_at_position = try_call(lambda: session_key[position])
if char_at_position:
@@ -1043,15 +1044,17 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
try:
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
playlist_id, note=f'Downloading page {page_idx}', query=query)
playlist_id, note=f'Downloading page {page_idx}', query=query,
headers={'referer': url})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
raise ExtractorError(
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
raise
if response['code'] == -401:
if response['code'] in (-352, -401):
raise ExtractorError(
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
f'Request is blocked by server ({-response["code"]}), '
'please add cookies, wait and try later.', expected=True)
return response['data']
def get_metadata(page_data):
@@ -1159,7 +1162,7 @@ class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
'uploader_id': ('meta', 'mid', {str_or_none}),
'timestamp': ('meta', 'ptime', {int_or_none}),
'thumbnail': ('meta', 'cover', {url_or_none}),
})
}),
}
def get_entries(page_data):
@@ -1191,7 +1194,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
mid, sid = self._match_valid_url(url).group('mid', 'sid')
playlist_id = f'{mid}_{sid}'
playlist_meta = traverse_obj(self._download_json(
f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False,
), {
'title': ('data', 'meta', 'name', {str}),
'description': ('data', 'meta', 'description', {str}),
@@ -1213,7 +1216,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
'page_count': math.ceil(entry_count / page_size),
'page_size': page_size,
'uploader': self._get_uploader(mid, playlist_id),
**playlist_meta
**playlist_meta,
}
def get_entries(page_data):
@@ -1237,7 +1240,7 @@ class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
'upload_date': '20201109',
'modified_timestamp': int,
'modified_date': str,
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
'view_count': int,
'like_count': int,
},
@@ -1341,7 +1344,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'uploader_id': '84912',
'timestamp': 1604905176,
'upload_date': '20201109',
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
},
'playlist_mincount': 22,
}, {
@@ -1367,7 +1370,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
for page_num in itertools.count(1):
page_data = self._download_json(
'https://api.bilibili.com/x/v2/medialist/resource/list',
list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}',
)['data']
yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
@@ -1403,7 +1406,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'tid': ('tid', {int_or_none}),
'sort_field': ('sortFiled', {int_or_none}),
'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
})
}),
}
metadata = {
'id': f'{query["type"]}_{query["biz_id"]}',
@@ -1426,26 +1429,26 @@ class BilibiliCategoryIE(InfoExtractor):
'url': 'https://www.bilibili.com/v/kichiku/mad',
'info_dict': {
'id': 'kichiku: mad',
'title': 'kichiku: mad'
'title': 'kichiku: mad',
},
'playlist_mincount': 45,
'params': {
'playlistend': 45
}
'playlistend': 45,
},
}]
def _fetch_page(self, api_url, num_pages, query, page_num):
parsed_json = self._download_json(
api_url, query, query={'Search_key': query, 'pn': page_num},
note='Extracting results from page %s of %s' % (page_num, num_pages))
note=f'Extracting results from page {page_num} of {num_pages}')
video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
if not video_list:
raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
raise ExtractorError(f'Failed to retrieve video list for page {page_num}')
for video in video_list:
yield self.url_result(
'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid'])
def _entries(self, category, subcategory, query):
# map of categories : subcategories : RIDs
@@ -1455,7 +1458,7 @@ class BilibiliCategoryIE(InfoExtractor):
'manual_vocaloid': 126,
'guide': 22,
'theatre': 216,
'course': 127
'course': 127,
},
}
@@ -1481,7 +1484,7 @@ class BilibiliCategoryIE(InfoExtractor):
def _real_extract(self, url):
category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
query = '%s: %s' % (category, subcategory)
query = f'{category}: {subcategory}'
return self.playlist_result(self._entries(category, subcategory, query), query, query)
@@ -1584,7 +1587,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
formats = [{
'url': play_data['cdns'][0],
'filesize': int_or_none(play_data.get('size')),
'vcodec': 'none'
'vcodec': 'none',
}]
for a_format in formats:
@@ -1602,7 +1605,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
subtitles = {
'origin': [{
'url': lyric,
}]
}],
}
return {
@@ -1670,7 +1673,7 @@ class BiliBiliPlayerIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
'http://www.bilibili.tv/video/av%s/' % video_id,
f'http://www.bilibili.tv/video/av{video_id}/',
ie=BiliBiliIE.ie_key(), video_id=video_id)
@@ -1698,11 +1701,10 @@ class BiliIntlBaseIE(InfoExtractor):
return json.get('data')
def json2srt(self, json):
data = '\n\n'.join(
return '\n\n'.join(
f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
for i, line in enumerate(traverse_obj(json, (
'body', lambda _, l: l['content'] and l['from'] and l['to']))))
return data
def _get_subtitles(self, *, ep_id=None, aid=None):
sub_json = self._call_api(
@@ -1804,14 +1806,14 @@ class BiliIntlBaseIE(InfoExtractor):
note='Downloading login key', errnote='Unable to download login key')['data']
public_key = Cryptodome.RSA.importKey(key_data['key'])
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
login_post = self._download_json(
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
'username': username,
'password': base64.b64encode(password_hash).decode('ascii'),
'keep_me': 'true',
's_locale': 'en_US',
'isTrusted': 'true'
'isTrusted': 'true',
}), note='Logging in', errnote='Unable to log in')
if login_post.get('code'):
if login_post.get('message'):
@@ -1838,17 +1840,17 @@ class BiliIntlIE(BiliIntlBaseIE):
'chapters': [{
'start_time': 0,
'end_time': 76.242,
'title': '<Untitled Chapter 1>'
'title': '<Untitled Chapter 1>',
}, {
'start_time': 76.242,
'end_time': 161.161,
'title': 'Intro'
'title': 'Intro',
}, {
'start_time': 1325.742,
'end_time': 1403.903,
'title': 'Outro'
'title': 'Outro',
}],
}
},
}, {
# Non-Bstation page
'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
@@ -1865,17 +1867,17 @@ class BiliIntlIE(BiliIntlBaseIE):
'chapters': [{
'start_time': 0,
'end_time': 88.0,
'title': '<Untitled Chapter 1>'
'title': '<Untitled Chapter 1>',
}, {
'start_time': 88.0,
'end_time': 156.0,
'title': 'Intro'
'title': 'Intro',
}, {
'start_time': 1173.0,
'end_time': 1259.535,
'title': 'Outro'
'title': 'Outro',
}],
}
},
}, {
# Subtitle with empty content
'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
@@ -1886,7 +1888,7 @@ class BiliIntlIE(BiliIntlBaseIE):
'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
'episode_number': 140,
},
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.',
}, {
# episode comment extraction
'url': 'https://www.bilibili.tv/en/play/34580/340317',
@@ -1904,20 +1906,20 @@ class BiliIntlIE(BiliIntlBaseIE):
'chapters': [{
'start_time': 0,
'end_time': 61.0,
'title': '<Untitled Chapter 1>'
'title': '<Untitled Chapter 1>',
}, {
'start_time': 61.0,
'end_time': 134.0,
'title': 'Intro'
'title': 'Intro',
}, {
'start_time': 1290.0,
'end_time': 1379.0,
'title': 'Outro'
'title': 'Outro',
}],
},
'params': {
'getcomments': True
}
'getcomments': True,
},
}, {
# user generated content comment extraction
'url': 'https://www.bilibili.tv/en/video/2045730385',
@@ -1932,8 +1934,8 @@ class BiliIntlIE(BiliIntlBaseIE):
'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
},
'params': {
'getcomments': True
}
'getcomments': True,
},
}, {
# episode id without intro and outro
'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
@@ -1988,7 +1990,7 @@ class BiliIntlIE(BiliIntlBaseIE):
# Non-Bstation layout, read through episode list
season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
video_data = traverse_obj(season_json, (
'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id,
), expected_type=dict, get_all=False)
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
@@ -2020,7 +2022,7 @@ class BiliIntlIE(BiliIntlBaseIE):
'id': replies.get('rpid'),
'like_count': int_or_none(replies.get('like_count')),
'parent': replies.get('parent'),
'timestamp': unified_timestamp(replies.get('ctime_text'))
'timestamp': unified_timestamp(replies.get('ctime_text')),
}
if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
@@ -2073,11 +2075,11 @@ class BiliIntlIE(BiliIntlBaseIE):
chapters = [{
'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
'title': 'Intro'
'title': 'Intro',
}, {
'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
'title': 'Outro'
'title': 'Outro',
}]
return {
@@ -2133,7 +2135,7 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
episode_id = str(episode['episode_id'])
yield self.url_result(smuggle_url(
BiliIntlIE._make_url(episode_id, series_id),
self._parse_video_metadata(episode)
self._parse_video_metadata(episode),
), BiliIntlIE, episode_id)
def _real_extract(self, url):
@@ -2152,19 +2154,19 @@ class BiliLiveIE(InfoExtractor):
'url': 'https://live.bilibili.com/196',
'info_dict': {
'id': '33989',
'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群全员禁言66690667090209258459971⑧481 (功能一样,别多加)",
'description': '周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群全员禁言66690667090209258459971⑧481 (功能一样,别多加)',
'ext': 'flv',
'title': "太空狼人杀联动,不被爆杀就算赢",
'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
'title': '太空狼人杀联动,不被爆杀就算赢',
'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg',
'timestamp': 1650802769,
},
'skip': 'not live'
'skip': 'not live',
}, {
'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
'only_matching': True
'only_matching': True,
}, {
'url': 'https://live.bilibili.com/blanc/196',
'only_matching': True
'only_matching': True,
}]
_FORMATS = {
@@ -2205,7 +2207,7 @@ class BiliLiveIE(InfoExtractor):
raise ExtractorError('Streamer is not live', expected=True)
formats = []
for qn in self._FORMATS.keys():
for qn in self._FORMATS:
stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
'room_id': room_id,
'qn': qn,

View File

@@ -39,7 +39,7 @@ class BitChuteIE(InfoExtractor):
'upload_date': '20170103',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
},
}, {
# test case: video with different channel and uploader
@@ -55,7 +55,7 @@ class BitChuteIE(InfoExtractor):
'upload_date': '20231106',
'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
'channel': 'Full Measure with Sharyl Attkisson',
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/'
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/',
},
}, {
# video not downloadable in browser, but we can recover it
@@ -72,7 +72,7 @@ class BitChuteIE(InfoExtractor):
'upload_date': '20181113',
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
'channel': 'BitChute',
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
},
'params': {'check_formats': None},
}, {
@@ -115,7 +115,7 @@ class BitChuteIE(InfoExtractor):
continue
return {
'url': url,
'filesize': int_or_none(response.headers.get('Content-Length'))
'filesize': int_or_none(response.headers.get('Content-Length')),
}
def _raise_if_restricted(self, webpage):
@@ -196,7 +196,7 @@ class BitChuteChannelIE(InfoExtractor):
'duration': 16,
'view_count': int,
},
}
},
],
'params': {
'skip_download': True,
@@ -209,7 +209,7 @@ class BitChuteChannelIE(InfoExtractor):
'id': 'wV9Imujxasw9',
'title': 'Bruce MacDonald and "The Light of Darkness"',
'description': 'md5:747724ef404eebdfc04277714f81863e',
}
},
}]
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
@@ -224,7 +224,7 @@ class BitChuteChannelIE(InfoExtractor):
'container': 'playlist-video',
'title': 'title',
'description': 'description',
}
},
}

View File

@@ -47,7 +47,7 @@ class BlackboardCollaborateIE(InfoExtractor):
region = mobj.group('region')
video_id = mobj.group('id')
info = self._download_json(
'https://{}.bbcollab.com/collab/api/csa/recordings/{}/data'.format(region, video_id), video_id)
f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/data', video_id)
duration = info.get('duration')
title = info['name']
upload_date = info.get('created')

View File

@@ -1,5 +1,5 @@
from .common import InfoExtractor
from .amp import AMPIE
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
@@ -44,7 +44,7 @@ class BleacherReportIE(InfoExtractor):
def _real_extract(self, url):
article_id = self._match_id(url)
article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article']
article_data = self._download_json(f'http://api.bleacherreport.com/api/v1/articles/{article_id}', article_id)['article']
thumbnails = []
primary_photo = article_data.get('primaryPhoto')
@@ -71,11 +71,11 @@ class BleacherReportIE(InfoExtractor):
if video:
video_type = video['type']
if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'):
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
info['url'] = 'http://bleacherreport.com/video_embed?id={}'.format(video['id'])
elif video_type == 'youtube.com':
info['url'] = video['id']
elif video_type == 'vine.co':
info['url'] = 'https://vine.co/v/%s' % video['id']
info['url'] = 'https://vine.co/v/{}'.format(video['id'])
else:
info['url'] = video_type + video['id']
return info
@@ -99,12 +99,12 @@ class BleacherReportCMSIE(AMPIE):
},
'expected_warnings': [
'Unable to download f4m manifest'
]
'Unable to download f4m manifest',
],
}]
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id)
info = self._extract_feed_info(f'http://vid.bleacherreport.com/videos/{video_id}.akamai')
info['id'] = video_id
return info

View File

@@ -16,7 +16,7 @@ class BlerpIE(InfoExtractor):
'uploader_id': '5fb81e51aa66ae000c395478',
'ext': 'mp3',
'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
}
},
}, {
'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
'info_dict': {
@@ -25,11 +25,11 @@ class BlerpIE(InfoExtractor):
'uploader': '179617322678353920',
'uploader_id': '5ba99cf71386730004552c42',
'ext': 'mp3',
'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
}
'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'],
},
}]
_GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
_GRAPHQL_OPERATIONNAME = 'webBitePageGetBite'
_GRAPHQL_QUERY = (
'''query webBitePageGetBite($_id: MongoID!) {
web {
@@ -141,27 +141,26 @@ class BlerpIE(InfoExtractor):
'operationName': self._GRAPHQL_OPERATIONNAME,
'query': self._GRAPHQL_QUERY,
'variables': {
'_id': audio_id
}
'_id': audio_id,
},
}
headers = {
'Content-Type': 'application/json'
'Content-Type': 'application/json',
}
json_result = self._download_json('https://api.blerp.com/graphql',
audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
json_result = self._download_json(
'https://api.blerp.com/graphql', audio_id,
data=json.dumps(data).encode(), headers=headers)
bite_json = json_result['data']['web']['biteById']
info_dict = {
return {
'id': bite_json['_id'],
'url': bite_json['audio']['mp3']['url'],
'title': bite_json['title'],
'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
'ext': 'mp3',
'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None),
}
return info_dict

View File

@@ -1,3 +1,4 @@
from .common import InfoExtractor
from ..utils import (
mimetype2ext,
parse_duration,
@@ -5,7 +6,6 @@ from ..utils import (
str_or_none,
traverse_obj,
)
from .common import InfoExtractor
class BloggerIE(InfoExtractor):
@@ -21,14 +21,14 @@ class BloggerIE(InfoExtractor):
'ext': 'mp4',
'thumbnail': r're:^https?://.*',
'duration': 76.068,
}
},
}]
def _real_extract(self, url):
token_id = self._match_id(url)
webpage = self._download_webpage(url, token_id)
data_json = self._search_regex(r'var\s+VIDEO_CONFIG\s*=\s*(\{.*)', webpage, 'JSON data')
data = self._parse_json(data_json.encode('utf-8').decode('unicode_escape'), token_id)
data = self._parse_json(data_json.encode().decode('unicode_escape'), token_id)
streams = data['streams']
formats = [{
'ext': mimetype2ext(traverse_obj(parse_qs(stream['play_url']), ('mime', 0))),

View File

@@ -55,7 +55,7 @@ class BloombergIE(InfoExtractor):
title = re.sub(': Video$', '', self._og_search_title(webpage))
embed_info = self._download_json(
'http://www.bloomberg.com/multimedia/api/embed?id=%s' % video_id, video_id)
f'http://www.bloomberg.com/multimedia/api/embed?id={video_id}', video_id)
formats = []
for stream in embed_info['streams']:
stream_url = stream.get('url')

Some files were not shown because too many files have changed in this diff Show More