mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-01-07 15:31:21 +00:00
Merged parent
This commit is contained in:
@@ -109,7 +109,6 @@ from .utils import (
|
||||
determine_protocol,
|
||||
encode_compat_str,
|
||||
encodeFilename,
|
||||
error_to_compat_str,
|
||||
escapeHTML,
|
||||
expand_path,
|
||||
extract_basic_auth,
|
||||
@@ -583,7 +582,7 @@ class YoutubeDL:
|
||||
'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data',
|
||||
'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
|
||||
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
|
||||
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
|
||||
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
|
||||
}
|
||||
_deprecated_multivalue_fields = {
|
||||
'album_artist': 'album_artists',
|
||||
@@ -594,7 +593,7 @@ class YoutubeDL:
|
||||
}
|
||||
_format_selection_exts = {
|
||||
'audio': set(MEDIA_EXTENSIONS.common_audio),
|
||||
'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
|
||||
'video': {*MEDIA_EXTENSIONS.common_video, '3gp'},
|
||||
'storyboards': set(MEDIA_EXTENSIONS.storyboards),
|
||||
}
|
||||
|
||||
@@ -628,7 +627,7 @@ class YoutubeDL:
|
||||
error=sys.stderr,
|
||||
screen=sys.stderr if self.params.get('quiet') else stdout,
|
||||
console=None if compat_os_name == 'nt' else next(
|
||||
filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
|
||||
filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None),
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -679,9 +678,9 @@ class YoutubeDL:
|
||||
width_args = [] if width is None else ['-w', str(width)]
|
||||
sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
|
||||
try:
|
||||
self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
|
||||
self._output_process = Popen(['bidiv', *width_args], **sp_kwargs)
|
||||
except OSError:
|
||||
self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
|
||||
self._output_process = Popen(['fribidi', '-c', 'UTF-8', *width_args], **sp_kwargs)
|
||||
self._output_channel = os.fdopen(master, 'rb')
|
||||
except OSError as ose:
|
||||
if ose.errno == errno.ENOENT:
|
||||
@@ -822,8 +821,7 @@ class YoutubeDL:
|
||||
)
|
||||
self.report_warning(
|
||||
'Long argument string detected. '
|
||||
'Use -- to separate parameters and URLs, like this:\n%s' %
|
||||
shell_quote(correct_argv))
|
||||
f'Use -- to separate parameters and URLs, like this:\n{shell_quote(correct_argv)}')
|
||||
|
||||
def add_info_extractor(self, ie):
|
||||
"""Add an InfoExtractor object to the end of the list."""
|
||||
@@ -922,7 +920,7 @@ class YoutubeDL:
|
||||
if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
|
||||
return
|
||||
self._write_string(
|
||||
'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
|
||||
'{}{}'.format(self._bidi_workaround(message), ('' if skip_eol else '\n')),
|
||||
self._out_files.screen, only_once=only_once)
|
||||
|
||||
def to_stderr(self, message, only_once=False):
|
||||
@@ -1045,10 +1043,10 @@ class YoutubeDL:
|
||||
return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
|
||||
|
||||
def report_warning(self, message, only_once=False):
|
||||
'''
|
||||
"""
|
||||
Print the message to stderr, it will be prefixed with 'WARNING:'
|
||||
If stderr is a tty file the 'WARNING:' will be colored
|
||||
'''
|
||||
"""
|
||||
if self.params.get('logger') is not None:
|
||||
self.params['logger'].warning(message)
|
||||
else:
|
||||
@@ -1066,14 +1064,14 @@ class YoutubeDL:
|
||||
self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
|
||||
|
||||
def report_error(self, message, *args, **kwargs):
|
||||
'''
|
||||
"""
|
||||
Do the same as trouble, but prefixes the message with 'ERROR:', colored
|
||||
in red if stderr is a tty file.
|
||||
'''
|
||||
"""
|
||||
self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
|
||||
|
||||
def write_debug(self, message, only_once=False):
|
||||
'''Log debug message or Print message to stderr'''
|
||||
"""Log debug message or Print message to stderr"""
|
||||
if not self.params.get('verbose', False):
|
||||
return
|
||||
message = f'[debug] {message}'
|
||||
@@ -1085,14 +1083,14 @@ class YoutubeDL:
|
||||
def report_file_already_downloaded(self, file_name):
|
||||
"""Report file has already been fully downloaded."""
|
||||
try:
|
||||
self.to_screen('[download] %s has already been downloaded' % file_name)
|
||||
self.to_screen(f'[download] {file_name} has already been downloaded')
|
||||
except UnicodeEncodeError:
|
||||
self.to_screen('[download] The file has already been downloaded')
|
||||
|
||||
def report_file_delete(self, file_name):
|
||||
"""Report that existing file will be deleted."""
|
||||
try:
|
||||
self.to_screen('Deleting existing file %s' % file_name)
|
||||
self.to_screen(f'Deleting existing file {file_name}')
|
||||
except UnicodeEncodeError:
|
||||
self.to_screen('Deleting existing file')
|
||||
|
||||
@@ -1147,7 +1145,7 @@ class YoutubeDL:
|
||||
|
||||
@staticmethod
|
||||
def escape_outtmpl(outtmpl):
|
||||
''' Escape any remaining strings like %s, %abc% etc. '''
|
||||
""" Escape any remaining strings like %s, %abc% etc. """
|
||||
return re.sub(
|
||||
STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
|
||||
lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
|
||||
@@ -1155,7 +1153,7 @@ class YoutubeDL:
|
||||
|
||||
@classmethod
|
||||
def validate_outtmpl(cls, outtmpl):
|
||||
''' @return None or Exception object '''
|
||||
""" @return None or Exception object """
|
||||
outtmpl = re.sub(
|
||||
STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
|
||||
lambda mobj: f'{mobj.group(0)[:-1]}s',
|
||||
@@ -1208,13 +1206,13 @@ class YoutubeDL:
|
||||
}
|
||||
# Field is of the form key1.key2...
|
||||
# where keys (except first) can be string, int, slice or "{field, ...}"
|
||||
FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
|
||||
FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
|
||||
FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} # noqa: UP031
|
||||
FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { # noqa: UP031
|
||||
'inner': FIELD_INNER_RE,
|
||||
'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
|
||||
'field': rf'\w*(?:\.{FIELD_INNER_RE})*',
|
||||
}
|
||||
MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
|
||||
MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
|
||||
MATH_OPERATORS_RE = r'(?:{})'.format('|'.join(map(re.escape, MATH_FUNCTIONS.keys())))
|
||||
INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
|
||||
(?P<negate>-)?
|
||||
(?P<fields>{FIELD_RE})
|
||||
@@ -1337,7 +1335,7 @@ class YoutubeDL:
|
||||
value, default = None, na
|
||||
|
||||
fmt = outer_mobj.group('format')
|
||||
if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
|
||||
if fmt == 's' and last_field in field_size_compat_map and isinstance(value, int):
|
||||
fmt = f'0{field_size_compat_map[last_field]:d}d'
|
||||
|
||||
flags = outer_mobj.group('conversion') or ''
|
||||
@@ -1362,7 +1360,7 @@ class YoutubeDL:
|
||||
elif fmt[-1] == 'U': # unicode normalized
|
||||
value, fmt = unicodedata.normalize(
|
||||
# "+" = compatibility equivalence, "#" = NFD
|
||||
'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
|
||||
'NF{}{}'.format('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
|
||||
value), str_fmt
|
||||
elif fmt[-1] == 'D': # decimal suffix
|
||||
num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
|
||||
@@ -1390,7 +1388,7 @@ class YoutubeDL:
|
||||
if fmt[-1] in 'csra':
|
||||
value = sanitizer(last_field, value)
|
||||
|
||||
key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
|
||||
key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format'))
|
||||
TMPL_DICT[key] = value
|
||||
return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
|
||||
|
||||
@@ -1479,9 +1477,9 @@ class YoutubeDL:
|
||||
|
||||
date = info_dict.get('upload_date')
|
||||
if date is not None:
|
||||
dateRange = self.params.get('daterange', DateRange())
|
||||
if date not in dateRange:
|
||||
return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
|
||||
date_range = self.params.get('daterange', DateRange())
|
||||
if date not in date_range:
|
||||
return f'{date_from_str(date).isoformat()} upload date is not in range {date_range}'
|
||||
view_count = info_dict.get('view_count')
|
||||
if view_count is not None:
|
||||
min_views = self.params.get('min_views')
|
||||
@@ -1491,7 +1489,7 @@ class YoutubeDL:
|
||||
if max_views is not None and view_count > max_views:
|
||||
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
|
||||
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
|
||||
return 'Skipping "%s" because it is age restricted' % video_title
|
||||
return f'Skipping "{video_title}" because it is age restricted'
|
||||
|
||||
match_filter = self.params.get('match_filter')
|
||||
if match_filter is None:
|
||||
@@ -1544,7 +1542,7 @@ class YoutubeDL:
|
||||
|
||||
@staticmethod
|
||||
def add_extra_info(info_dict, extra_info):
|
||||
'''Set the keys from extra_info in info dict if they are missing'''
|
||||
"""Set the keys from extra_info in info dict if they are missing"""
|
||||
for key, value in extra_info.items():
|
||||
info_dict.setdefault(key, value)
|
||||
|
||||
@@ -1590,7 +1588,7 @@ class YoutubeDL:
|
||||
self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
|
||||
'has already been recorded in the archive')
|
||||
if self.params.get('break_on_existing', False):
|
||||
raise ExistingVideoReached()
|
||||
raise ExistingVideoReached
|
||||
break
|
||||
return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
|
||||
else:
|
||||
@@ -1616,8 +1614,8 @@ class YoutubeDL:
|
||||
except GeoRestrictedError as e:
|
||||
msg = e.msg
|
||||
if e.countries:
|
||||
msg += '\nThis video is available in %s.' % ', '.join(
|
||||
map(ISO3166Utils.short2full, e.countries))
|
||||
msg += '\nThis video is available in {}.'.format(', '.join(
|
||||
map(ISO3166Utils.short2full, e.countries)))
|
||||
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
||||
self.report_error(msg)
|
||||
except ExtractorError as e: # An error we somewhat expected
|
||||
@@ -1826,8 +1824,8 @@ class YoutubeDL:
|
||||
if isinstance(additional_urls, str):
|
||||
additional_urls = [additional_urls]
|
||||
self.to_screen(
|
||||
'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
|
||||
self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
|
||||
'[info] {}: {} additional URL(s) requested'.format(ie_result['id'], len(additional_urls)))
|
||||
self.write_debug('Additional URLs: "{}"'.format('", "'.join(additional_urls)))
|
||||
ie_result['additional_entries'] = [
|
||||
self.extract_info(
|
||||
url, download, extra_info=extra_info,
|
||||
@@ -1879,8 +1877,8 @@ class YoutubeDL:
|
||||
webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
|
||||
if webpage_url and webpage_url in self._playlist_urls:
|
||||
self.to_screen(
|
||||
'[download] Skipping already downloaded playlist: %s'
|
||||
% ie_result.get('title') or ie_result.get('id'))
|
||||
'[download] Skipping already downloaded playlist: {}'.format(
|
||||
ie_result.get('title')) or ie_result.get('id'))
|
||||
return
|
||||
|
||||
self._playlist_level += 1
|
||||
@@ -1895,8 +1893,8 @@ class YoutubeDL:
|
||||
self._playlist_urls.clear()
|
||||
elif result_type == 'compat_list':
|
||||
self.report_warning(
|
||||
'Extractor %s returned a compat_list result. '
|
||||
'It needs to be updated.' % ie_result.get('extractor'))
|
||||
'Extractor {} returned a compat_list result. '
|
||||
'It needs to be updated.'.format(ie_result.get('extractor')))
|
||||
|
||||
def _fixup(r):
|
||||
self.add_extra_info(r, {
|
||||
@@ -1913,7 +1911,7 @@ class YoutubeDL:
|
||||
]
|
||||
return ie_result
|
||||
else:
|
||||
raise Exception('Invalid result type: %s' % result_type)
|
||||
raise Exception(f'Invalid result type: {result_type}')
|
||||
|
||||
def _ensure_dir_exists(self, path):
|
||||
return make_dir(path, self.report_error)
|
||||
@@ -2029,8 +2027,9 @@ class YoutubeDL:
|
||||
resolved_entries[i] = (playlist_index, NO_DEFAULT)
|
||||
continue
|
||||
|
||||
self.to_screen('[download] Downloading item %s of %s' % (
|
||||
self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
|
||||
self.to_screen(
|
||||
f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} '
|
||||
f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}')
|
||||
|
||||
entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
|
||||
'playlist_index': playlist_index,
|
||||
@@ -2080,9 +2079,9 @@ class YoutubeDL:
|
||||
}
|
||||
operator_rex = re.compile(r'''(?x)\s*
|
||||
(?P<key>[\w.-]+)\s*
|
||||
(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||
(?P<op>{})(?P<none_inclusive>\s*\?)?\s*
|
||||
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
|
||||
''' % '|'.join(map(re.escape, OPERATORS.keys())))
|
||||
'''.format('|'.join(map(re.escape, OPERATORS.keys()))))
|
||||
m = operator_rex.fullmatch(filter_spec)
|
||||
if m:
|
||||
try:
|
||||
@@ -2093,7 +2092,7 @@ class YoutubeDL:
|
||||
comparison_value = parse_filesize(m.group('value') + 'B')
|
||||
if comparison_value is None:
|
||||
raise ValueError(
|
||||
'Invalid value %r in format specification %r' % (
|
||||
'Invalid value {!r} in format specification {!r}'.format(
|
||||
m.group('value'), filter_spec))
|
||||
op = OPERATORS[m.group('op')]
|
||||
|
||||
@@ -2103,15 +2102,15 @@ class YoutubeDL:
|
||||
'^=': lambda attr, value: attr.startswith(value),
|
||||
'$=': lambda attr, value: attr.endswith(value),
|
||||
'*=': lambda attr, value: value in attr,
|
||||
'~=': lambda attr, value: value.search(attr) is not None
|
||||
'~=': lambda attr, value: value.search(attr) is not None,
|
||||
}
|
||||
str_operator_rex = re.compile(r'''(?x)\s*
|
||||
(?P<key>[a-zA-Z0-9._-]+)\s*
|
||||
(?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
|
||||
(?P<negation>!\s*)?(?P<op>{})\s*(?P<none_inclusive>\?\s*)?
|
||||
(?P<quote>["'])?
|
||||
(?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
|
||||
(?(quote)(?P=quote))\s*
|
||||
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
|
||||
'''.format('|'.join(map(re.escape, STR_OPERATORS.keys()))))
|
||||
m = str_operator_rex.fullmatch(filter_spec)
|
||||
if m:
|
||||
if m.group('op') == '~=':
|
||||
@@ -2125,7 +2124,7 @@ class YoutubeDL:
|
||||
op = str_op
|
||||
|
||||
if not m:
|
||||
raise SyntaxError('Invalid filter specification %r' % filter_spec)
|
||||
raise SyntaxError(f'Invalid filter specification {filter_spec!r}')
|
||||
|
||||
def _filter(f):
|
||||
actual_value = f.get(m.group('key'))
|
||||
@@ -2136,7 +2135,12 @@ class YoutubeDL:
|
||||
|
||||
def _check_formats(self, formats):
|
||||
for f in formats:
|
||||
self.to_screen('[info] Testing format %s' % f['format_id'])
|
||||
working = f.get('__working')
|
||||
if working is not None:
|
||||
if working:
|
||||
yield f
|
||||
continue
|
||||
self.to_screen('[info] Testing format {}'.format(f['format_id']))
|
||||
path = self.get_output_path('temp')
|
||||
if not self._ensure_dir_exists(f'{path}/'):
|
||||
continue
|
||||
@@ -2144,41 +2148,52 @@ class YoutubeDL:
|
||||
temp_file.close()
|
||||
try:
|
||||
success, _ = self.dl(temp_file.name, f, test=True)
|
||||
except (DownloadError, OSError, ValueError) + network_exceptions:
|
||||
except (DownloadError, OSError, ValueError, *network_exceptions):
|
||||
success = False
|
||||
finally:
|
||||
if os.path.exists(temp_file.name):
|
||||
try:
|
||||
os.remove(temp_file.name)
|
||||
except OSError:
|
||||
self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
|
||||
self.report_warning(f'Unable to delete temporary file "{temp_file.name}"')
|
||||
f['__working'] = success
|
||||
if success:
|
||||
yield f
|
||||
else:
|
||||
self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
|
||||
self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id']))
|
||||
|
||||
def _select_formats(self, formats, selector):
|
||||
return list(selector({
|
||||
'formats': formats,
|
||||
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
|
||||
'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
|
||||
or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
|
||||
}))
|
||||
|
||||
def _default_format_spec(self, info_dict, download=True):
|
||||
download = download and not self.params.get('simulate')
|
||||
prefer_best = download and (
|
||||
self.params['outtmpl']['default'] == '-'
|
||||
or info_dict.get('is_live') and not self.params.get('live_from_start'))
|
||||
|
||||
def can_merge():
|
||||
merger = FFmpegMergerPP(self)
|
||||
return merger.available and merger.can_merge()
|
||||
|
||||
prefer_best = (
|
||||
not self.params.get('simulate')
|
||||
and download
|
||||
and (
|
||||
not can_merge()
|
||||
or info_dict.get('is_live') and not self.params.get('live_from_start')
|
||||
or self.params['outtmpl']['default'] == '-'))
|
||||
compat = (
|
||||
prefer_best
|
||||
or self.params.get('allow_multiple_audio_streams', False)
|
||||
or 'format-spec' in self.params['compat_opts'])
|
||||
if not prefer_best and download and not can_merge():
|
||||
prefer_best = True
|
||||
formats = self._get_formats(info_dict)
|
||||
evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
|
||||
if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):
|
||||
self.report_warning('ffmpeg not found. The downloaded format may not be the best available. '
|
||||
'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')
|
||||
|
||||
return (
|
||||
'best/bestvideo+bestaudio' if prefer_best
|
||||
else 'bestvideo*+bestaudio/best' if not compat
|
||||
else 'bestvideo+bestaudio/best')
|
||||
compat = (self.params.get('allow_multiple_audio_streams')
|
||||
or 'format-spec' in self.params['compat_opts'])
|
||||
|
||||
return ('best/bestvideo+bestaudio' if prefer_best
|
||||
else 'bestvideo+bestaudio/best' if compat
|
||||
else 'bestvideo*+bestaudio/best')
|
||||
|
||||
def build_format_selector(self, format_spec):
|
||||
def syntax_error(note, start):
|
||||
@@ -2198,8 +2213,8 @@ class YoutubeDL:
|
||||
|
||||
def _parse_filter(tokens):
|
||||
filter_parts = []
|
||||
for type, string_, start, _, _ in tokens:
|
||||
if type == tokenize.OP and string_ == ']':
|
||||
for type_, string_, _start, _, _ in tokens:
|
||||
if type_ == tokenize.OP and string_ == ']':
|
||||
return ''.join(filter_parts)
|
||||
else:
|
||||
filter_parts.append(string_)
|
||||
@@ -2209,23 +2224,23 @@ class YoutubeDL:
|
||||
# E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
|
||||
ALLOWED_OPS = ('/', '+', ',', '(', ')')
|
||||
last_string, last_start, last_end, last_line = None, None, None, None
|
||||
for type, string_, start, end, line in tokens:
|
||||
if type == tokenize.OP and string_ == '[':
|
||||
for type_, string_, start, end, line in tokens:
|
||||
if type_ == tokenize.OP and string_ == '[':
|
||||
if last_string:
|
||||
yield tokenize.NAME, last_string, last_start, last_end, last_line
|
||||
last_string = None
|
||||
yield type, string_, start, end, line
|
||||
yield type_, string_, start, end, line
|
||||
# everything inside brackets will be handled by _parse_filter
|
||||
for type, string_, start, end, line in tokens:
|
||||
yield type, string_, start, end, line
|
||||
if type == tokenize.OP and string_ == ']':
|
||||
for type_, string_, start, end, line in tokens:
|
||||
yield type_, string_, start, end, line
|
||||
if type_ == tokenize.OP and string_ == ']':
|
||||
break
|
||||
elif type == tokenize.OP and string_ in ALLOWED_OPS:
|
||||
elif type_ == tokenize.OP and string_ in ALLOWED_OPS:
|
||||
if last_string:
|
||||
yield tokenize.NAME, last_string, last_start, last_end, last_line
|
||||
last_string = None
|
||||
yield type, string_, start, end, line
|
||||
elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
|
||||
yield type_, string_, start, end, line
|
||||
elif type_ in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
|
||||
if not last_string:
|
||||
last_string = string_
|
||||
last_start = start
|
||||
@@ -2238,13 +2253,13 @@ class YoutubeDL:
|
||||
def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
|
||||
selectors = []
|
||||
current_selector = None
|
||||
for type, string_, start, _, _ in tokens:
|
||||
for type_, string_, start, _, _ in tokens:
|
||||
# ENCODING is only defined in Python 3.x
|
||||
if type == getattr(tokenize, 'ENCODING', None):
|
||||
if type_ == getattr(tokenize, 'ENCODING', None):
|
||||
continue
|
||||
elif type in [tokenize.NAME, tokenize.NUMBER]:
|
||||
elif type_ in [tokenize.NAME, tokenize.NUMBER]:
|
||||
current_selector = FormatSelector(SINGLE, string_, [])
|
||||
elif type == tokenize.OP:
|
||||
elif type_ == tokenize.OP:
|
||||
if string_ == ')':
|
||||
if not inside_group:
|
||||
# ')' will be handled by the parentheses group
|
||||
@@ -2287,7 +2302,7 @@ class YoutubeDL:
|
||||
current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
|
||||
else:
|
||||
raise syntax_error(f'Operator not recognized: "{string_}"', start)
|
||||
elif type == tokenize.ENDMARKER:
|
||||
elif type_ == tokenize.ENDMARKER:
|
||||
break
|
||||
if current_selector:
|
||||
selectors.append(current_selector)
|
||||
@@ -2362,7 +2377,7 @@ class YoutubeDL:
|
||||
'acodec': the_only_audio.get('acodec'),
|
||||
'abr': the_only_audio.get('abr'),
|
||||
'asr': the_only_audio.get('asr'),
|
||||
'audio_channels': the_only_audio.get('audio_channels')
|
||||
'audio_channels': the_only_audio.get('audio_channels'),
|
||||
})
|
||||
|
||||
return new_dict
|
||||
@@ -2443,9 +2458,9 @@ class YoutubeDL:
|
||||
|
||||
format_fallback = not format_type and not format_modified # for b, w
|
||||
_filter_f = (
|
||||
(lambda f: f.get('%scodec' % format_type) != 'none')
|
||||
(lambda f: f.get(f'{format_type}codec') != 'none')
|
||||
if format_type and format_modified # bv*, ba*, wv*, wa*
|
||||
else (lambda f: f.get('%scodec' % not_format_type) == 'none')
|
||||
else (lambda f: f.get(f'{not_format_type}codec') == 'none')
|
||||
if format_type # bv, ba, wv, wa
|
||||
else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
|
||||
if not format_modified # b, w
|
||||
@@ -2513,7 +2528,7 @@ class YoutubeDL:
|
||||
|
||||
def __next__(self):
|
||||
if self.counter >= len(self.tokens):
|
||||
raise StopIteration()
|
||||
raise StopIteration
|
||||
value = self.tokens[self.counter]
|
||||
self.counter += 1
|
||||
return value
|
||||
@@ -2596,7 +2611,7 @@ class YoutubeDL:
|
||||
self._sort_thumbnails(thumbnails)
|
||||
for i, t in enumerate(thumbnails):
|
||||
if t.get('id') is None:
|
||||
t['id'] = '%d' % i
|
||||
t['id'] = str(i)
|
||||
if t.get('width') and t.get('height'):
|
||||
t['resolution'] = '%dx%d' % (t['width'], t['height'])
|
||||
t['url'] = sanitize_url(t['url'])
|
||||
@@ -2657,8 +2672,8 @@ class YoutubeDL:
|
||||
# Auto generate title fields corresponding to the *_number fields when missing
|
||||
# in order to always have clean titles. This is very common for TV series.
|
||||
for field in ('chapter', 'season', 'episode'):
|
||||
if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
||||
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
||||
if final and info_dict.get(f'{field}_number') is not None and not info_dict.get(field):
|
||||
info_dict[field] = '%s %d' % (field.capitalize(), info_dict[f'{field}_number'])
|
||||
|
||||
for old_key, new_key in self._deprecated_multivalue_fields.items():
|
||||
if new_key in info_dict and old_key in info_dict:
|
||||
@@ -2690,8 +2705,8 @@ class YoutubeDL:
|
||||
|
||||
def report_force_conversion(field, field_not, conversion):
|
||||
self.report_warning(
|
||||
'"%s" field is not %s - forcing %s conversion, there is an error in extractor'
|
||||
% (field, field_not, conversion))
|
||||
f'"{field}" field is not {field_not} - forcing {conversion} conversion, '
|
||||
'there is an error in extractor')
|
||||
|
||||
def sanitize_string_field(info, string_field):
|
||||
field = info.get(string_field)
|
||||
@@ -2808,28 +2823,28 @@ class YoutubeDL:
|
||||
if not formats:
|
||||
self.raise_no_formats(info_dict)
|
||||
|
||||
for format in formats:
|
||||
sanitize_string_field(format, 'format_id')
|
||||
sanitize_numeric_fields(format)
|
||||
format['url'] = sanitize_url(format['url'])
|
||||
if format.get('ext') is None:
|
||||
format['ext'] = determine_ext(format['url']).lower()
|
||||
if format['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
|
||||
if format.get('acodec') is None:
|
||||
format['acodec'] = format['ext']
|
||||
if format.get('protocol') is None:
|
||||
format['protocol'] = determine_protocol(format)
|
||||
if format.get('resolution') is None:
|
||||
format['resolution'] = self.format_resolution(format, default=None)
|
||||
if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
|
||||
format['dynamic_range'] = 'SDR'
|
||||
if format.get('aspect_ratio') is None:
|
||||
format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
|
||||
for fmt in formats:
|
||||
sanitize_string_field(fmt, 'format_id')
|
||||
sanitize_numeric_fields(fmt)
|
||||
fmt['url'] = sanitize_url(fmt['url'])
|
||||
if fmt.get('ext') is None:
|
||||
fmt['ext'] = determine_ext(fmt['url']).lower()
|
||||
if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
|
||||
if fmt.get('acodec') is None:
|
||||
fmt['acodec'] = fmt['ext']
|
||||
if fmt.get('protocol') is None:
|
||||
fmt['protocol'] = determine_protocol(fmt)
|
||||
if fmt.get('resolution') is None:
|
||||
fmt['resolution'] = self.format_resolution(fmt, default=None)
|
||||
if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none':
|
||||
fmt['dynamic_range'] = 'SDR'
|
||||
if fmt.get('aspect_ratio') is None:
|
||||
fmt['aspect_ratio'] = try_call(lambda: round(fmt['width'] / fmt['height'], 2))
|
||||
# For fragmented formats, "tbr" is often max bitrate and not average
|
||||
if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url'))
|
||||
and not format.get('filesize') and not format.get('filesize_approx')):
|
||||
format['filesize_approx'] = filesize_from_tbr(format.get('tbr'), info_dict.get('duration'))
|
||||
format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
|
||||
if (('manifest-filesize-approx' in self.params['compat_opts'] or not fmt.get('manifest_url'))
|
||||
and not fmt.get('filesize') and not fmt.get('filesize_approx')):
|
||||
fmt['filesize_approx'] = filesize_from_tbr(fmt.get('tbr'), info_dict.get('duration'))
|
||||
fmt['http_headers'] = self._calc_headers(collections.ChainMap(fmt, info_dict), load_cookies=True)
|
||||
|
||||
# Safeguard against old/insecure infojson when using --load-info-json
|
||||
if info_dict.get('http_headers'):
|
||||
@@ -2842,36 +2857,36 @@ class YoutubeDL:
|
||||
|
||||
self.sort_formats({
|
||||
'formats': formats,
|
||||
'_format_sort_fields': info_dict.get('_format_sort_fields')
|
||||
'_format_sort_fields': info_dict.get('_format_sort_fields'),
|
||||
})
|
||||
|
||||
# Sanitize and group by format_id
|
||||
formats_dict = {}
|
||||
for i, format in enumerate(formats):
|
||||
if not format.get('format_id'):
|
||||
format['format_id'] = str(i)
|
||||
for i, fmt in enumerate(formats):
|
||||
if not fmt.get('format_id'):
|
||||
fmt['format_id'] = str(i)
|
||||
else:
|
||||
# Sanitize format_id from characters used in format selector expression
|
||||
format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
|
||||
formats_dict.setdefault(format['format_id'], []).append(format)
|
||||
fmt['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', fmt['format_id'])
|
||||
formats_dict.setdefault(fmt['format_id'], []).append(fmt)
|
||||
|
||||
# Make sure all formats have unique format_id
|
||||
common_exts = set(itertools.chain(*self._format_selection_exts.values()))
|
||||
for format_id, ambiguous_formats in formats_dict.items():
|
||||
ambigious_id = len(ambiguous_formats) > 1
|
||||
for i, format in enumerate(ambiguous_formats):
|
||||
for i, fmt in enumerate(ambiguous_formats):
|
||||
if ambigious_id:
|
||||
format['format_id'] = '%s-%d' % (format_id, i)
|
||||
fmt['format_id'] = f'{format_id}-{i}'
|
||||
# Ensure there is no conflict between id and ext in format selection
|
||||
# See https://github.com/yt-dlp/yt-dlp/issues/1282
|
||||
if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
|
||||
format['format_id'] = 'f%s' % format['format_id']
|
||||
if fmt['format_id'] != fmt['ext'] and fmt['format_id'] in common_exts:
|
||||
fmt['format_id'] = 'f{}'.format(fmt['format_id'])
|
||||
|
||||
if format.get('format') is None:
|
||||
format['format'] = '{id} - {res}{note}'.format(
|
||||
id=format['format_id'],
|
||||
res=self.format_resolution(format),
|
||||
note=format_field(format, 'format_note', ' (%s)'),
|
||||
if fmt.get('format') is None:
|
||||
fmt['format'] = '{id} - {res}{note}'.format(
|
||||
id=fmt['format_id'],
|
||||
res=self.format_resolution(fmt),
|
||||
note=format_field(fmt, 'format_note', ' (%s)'),
|
||||
)
|
||||
|
||||
if self.params.get('check_formats') is True:
|
||||
@@ -2928,12 +2943,7 @@ class YoutubeDL:
|
||||
self.write_debug(f'Default format spec: {req_format}')
|
||||
format_selector = self.build_format_selector(req_format)
|
||||
|
||||
formats_to_download = list(format_selector({
|
||||
'formats': formats,
|
||||
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
|
||||
'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
|
||||
or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
|
||||
}))
|
||||
formats_to_download = self._select_formats(formats, format_selector)
|
||||
if interactive_format_selection and not formats_to_download:
|
||||
self.report_error('Requested format is not available', tb=False, is_error=False)
|
||||
continue
|
||||
@@ -2998,7 +3008,7 @@ class YoutubeDL:
|
||||
info_dict['requested_downloads'] = downloaded_formats
|
||||
info_dict = self.run_all_pps('after_video', info_dict)
|
||||
if max_downloads_reached:
|
||||
raise MaxDownloadsReached()
|
||||
raise MaxDownloadsReached
|
||||
|
||||
# We update the info dict with the selected best quality format (backwards compatibility)
|
||||
info_dict.update(best_format)
|
||||
@@ -3059,8 +3069,8 @@ class YoutubeDL:
|
||||
else:
|
||||
f = formats[-1]
|
||||
self.report_warning(
|
||||
'No subtitle format found matching "%s" for language %s, '
|
||||
'using %s' % (formats_query, lang, f['ext']))
|
||||
'No subtitle format found matching "{}" for language {}, '
|
||||
'using {}. Use --list-subs for a list of available subtitles'.format(formats_query, lang, f['ext']))
|
||||
subs[lang] = f
|
||||
return subs
|
||||
|
||||
@@ -3214,7 +3224,7 @@ class YoutubeDL:
|
||||
|
||||
def check_max_downloads():
|
||||
if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
|
||||
raise MaxDownloadsReached()
|
||||
raise MaxDownloadsReached
|
||||
|
||||
if self.params.get('simulate'):
|
||||
info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
|
||||
@@ -3385,7 +3395,7 @@ class YoutubeDL:
|
||||
for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
|
||||
f['filepath'] = fname = prepend_extension(
|
||||
correct_ext(temp_filename, info_dict['ext']),
|
||||
'f%s' % f['format_id'], info_dict['ext'])
|
||||
'f{}'.format(f['format_id']), info_dict['ext'])
|
||||
downloaded.append(fname)
|
||||
info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
|
||||
success, real_download = self.dl(temp_filename, info_dict)
|
||||
@@ -3418,7 +3428,7 @@ class YoutubeDL:
|
||||
if temp_filename != '-':
|
||||
fname = prepend_extension(
|
||||
correct_ext(temp_filename, new_info['ext']),
|
||||
'f%s' % f['format_id'], new_info['ext'])
|
||||
'f{}'.format(f['format_id']), new_info['ext'])
|
||||
if not self._ensure_dir_exists(fname):
|
||||
return
|
||||
f['filepath'] = fname
|
||||
@@ -3446,11 +3456,11 @@ class YoutubeDL:
|
||||
dl_filename = dl_filename or temp_filename
|
||||
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
|
||||
except network_exceptions as err:
|
||||
self.report_error('unable to download video data: %s' % error_to_compat_str(err))
|
||||
self.report_error(f'unable to download video data: {err}')
|
||||
return
|
||||
except OSError as err:
|
||||
raise UnavailableVideoError(err)
|
||||
except (ContentTooShortError, ) as err:
|
||||
except ContentTooShortError as err:
|
||||
self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
|
||||
return
|
||||
|
||||
@@ -3517,13 +3527,13 @@ class YoutubeDL:
|
||||
try:
|
||||
replace_info_dict(self.post_process(dl_filename, info_dict))
|
||||
except PostProcessingError as err:
|
||||
self.report_error('Postprocessing: %s' % str(err))
|
||||
self.report_error(f'Postprocessing: {err}')
|
||||
return
|
||||
try:
|
||||
for ph in self._post_hooks:
|
||||
ph(info_dict['filepath'])
|
||||
except Exception as err:
|
||||
self.report_error('post hooks: %s' % str(err))
|
||||
self.report_error(f'post hooks: {err}')
|
||||
return
|
||||
info_dict['__write_download_archive'] = True
|
||||
|
||||
@@ -3590,7 +3600,7 @@ class YoutubeDL:
|
||||
|
||||
@staticmethod
|
||||
def sanitize_info(info_dict, remove_private_keys=False):
|
||||
''' Sanitize the infodict for converting to json '''
|
||||
""" Sanitize the infodict for converting to json """
|
||||
if info_dict is None:
|
||||
return info_dict
|
||||
info_dict.setdefault('epoch', int(time.time()))
|
||||
@@ -3625,7 +3635,7 @@ class YoutubeDL:
|
||||
|
||||
@staticmethod
|
||||
def filter_requested_info(info_dict, actually_filter=True):
|
||||
''' Alias of sanitize_info for backward compatibility '''
|
||||
""" Alias of sanitize_info for backward compatibility """
|
||||
return YoutubeDL.sanitize_info(info_dict, actually_filter)
|
||||
|
||||
def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
|
||||
@@ -3645,7 +3655,7 @@ class YoutubeDL:
|
||||
actual_post_extract(video_dict or {})
|
||||
return
|
||||
|
||||
post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
|
||||
post_extractor = info_dict.pop('__post_extractor', None) or dict
|
||||
info_dict.update(post_extractor())
|
||||
|
||||
actual_post_extract(info_dict or {})
|
||||
@@ -3750,7 +3760,7 @@ class YoutubeDL:
|
||||
if format.get('width') and format.get('height'):
|
||||
return '%dx%d' % (format['width'], format['height'])
|
||||
elif format.get('height'):
|
||||
return '%sp' % format['height']
|
||||
return '{}p'.format(format['height'])
|
||||
elif format.get('width'):
|
||||
return '%dx?' % format['width']
|
||||
return default
|
||||
@@ -3767,7 +3777,7 @@ class YoutubeDL:
|
||||
if fdict.get('language'):
|
||||
if res:
|
||||
res += ' '
|
||||
res += '[%s]' % fdict['language']
|
||||
res += '[{}]'.format(fdict['language'])
|
||||
if fdict.get('format_note') is not None:
|
||||
if res:
|
||||
res += ' '
|
||||
@@ -3779,7 +3789,7 @@ class YoutubeDL:
|
||||
if fdict.get('container') is not None:
|
||||
if res:
|
||||
res += ', '
|
||||
res += '%s container' % fdict['container']
|
||||
res += '{} container'.format(fdict['container'])
|
||||
if (fdict.get('vcodec') is not None
|
||||
and fdict.get('vcodec') != 'none'):
|
||||
if res:
|
||||
@@ -3794,7 +3804,7 @@ class YoutubeDL:
|
||||
if fdict.get('fps') is not None:
|
||||
if res:
|
||||
res += ', '
|
||||
res += '%sfps' % fdict['fps']
|
||||
res += '{}fps'.format(fdict['fps'])
|
||||
if fdict.get('acodec') is not None:
|
||||
if res:
|
||||
res += ', '
|
||||
@@ -3837,7 +3847,7 @@ class YoutubeDL:
|
||||
format_field(f, 'format_id'),
|
||||
format_field(f, 'ext'),
|
||||
self.format_resolution(f),
|
||||
self._format_note(f)
|
||||
self._format_note(f),
|
||||
] for f in formats if (f.get('preference') or 0) >= -1000]
|
||||
return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
|
||||
|
||||
@@ -3943,11 +3953,11 @@ class YoutubeDL:
|
||||
from .extractor.extractors import _LAZY_LOADER
|
||||
from .extractor.extractors import (
|
||||
_PLUGIN_CLASSES as plugin_ies,
|
||||
_PLUGIN_OVERRIDES as plugin_ie_overrides
|
||||
_PLUGIN_OVERRIDES as plugin_ie_overrides,
|
||||
)
|
||||
|
||||
def get_encoding(stream):
|
||||
ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
|
||||
ret = str(getattr(stream, 'encoding', f'missing ({type(stream).__name__})'))
|
||||
additional_info = []
|
||||
if os.environ.get('TERM', '').lower() == 'dumb':
|
||||
additional_info.append('dumb')
|
||||
@@ -3958,13 +3968,13 @@ class YoutubeDL:
|
||||
ret = f'{ret} ({",".join(additional_info)})'
|
||||
return ret
|
||||
|
||||
encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
|
||||
encoding_str = 'Encodings: locale {}, fs {}, pref {}, {}'.format(
|
||||
locale.getpreferredencoding(),
|
||||
sys.getfilesystemencoding(),
|
||||
self.get_encoding(),
|
||||
', '.join(
|
||||
f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
|
||||
if stream is not None and key != 'console')
|
||||
if stream is not None and key != 'console'),
|
||||
)
|
||||
|
||||
logger = self.params.get('logger')
|
||||
@@ -3996,7 +4006,7 @@ class YoutubeDL:
|
||||
else:
|
||||
write_debug('Lazy loading extractors is disabled')
|
||||
if self.params['compat_opts']:
|
||||
write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
|
||||
write_debug('Compatibility options: {}'.format(', '.join(self.params['compat_opts'])))
|
||||
|
||||
if current_git_head():
|
||||
write_debug(f'Git HEAD: {current_git_head()}')
|
||||
@@ -4005,14 +4015,14 @@ class YoutubeDL:
|
||||
exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
|
||||
ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
|
||||
if ffmpeg_features:
|
||||
exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
|
||||
exe_versions['ffmpeg'] += ' ({})'.format(','.join(sorted(ffmpeg_features)))
|
||||
|
||||
exe_versions['rtmpdump'] = rtmpdump_version()
|
||||
exe_versions['phantomjs'] = PhantomJSwrapper._version()
|
||||
exe_str = ', '.join(
|
||||
f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
|
||||
) or 'none'
|
||||
write_debug('exe versions: %s' % exe_str)
|
||||
write_debug(f'exe versions: {exe_str}')
|
||||
|
||||
from .compat.compat_utils import get_package_info
|
||||
from .dependencies import available_dependencies
|
||||
@@ -4024,7 +4034,7 @@ class YoutubeDL:
|
||||
write_debug(f'Proxy map: {self.proxies}')
|
||||
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
|
||||
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
|
||||
display_list = ['%s%s' % (
|
||||
display_list = ['{}{}'.format(
|
||||
klass.__name__, '' if klass.__name__ == name else f' as {name}')
|
||||
for name, klass in plugins.items()]
|
||||
if plugin_type == 'Extractor':
|
||||
@@ -4041,14 +4051,13 @@ class YoutubeDL:
|
||||
# Not implemented
|
||||
if False and self.params.get('call_home'):
|
||||
ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
|
||||
write_debug('Public IP address: %s' % ipaddr)
|
||||
write_debug(f'Public IP address: {ipaddr}')
|
||||
latest_version = self.urlopen(
|
||||
'https://yt-dl.org/latest/version').read().decode()
|
||||
if version_tuple(latest_version) > version_tuple(__version__):
|
||||
self.report_warning(
|
||||
'You are using an outdated version (newest version: %s)! '
|
||||
'See https://yt-dl.org/update if you need help updating.' %
|
||||
latest_version)
|
||||
f'You are using an outdated version (newest version: {latest_version})! '
|
||||
'See https://yt-dl.org/update if you need help updating.')
|
||||
|
||||
@functools.cached_property
|
||||
def proxies(self):
|
||||
@@ -4082,7 +4091,7 @@ class YoutubeDL:
|
||||
return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
|
||||
|
||||
def _get_available_impersonate_targets(self):
|
||||
# todo(future): make available as public API
|
||||
# TODO(future): make available as public API
|
||||
return [
|
||||
(target, rh.RH_NAME)
|
||||
for rh in self._request_director.handlers.values()
|
||||
@@ -4091,7 +4100,7 @@ class YoutubeDL:
|
||||
]
|
||||
|
||||
def _impersonate_target_available(self, target):
|
||||
# todo(future): make available as public API
|
||||
# TODO(future): make available as public API
|
||||
return any(
|
||||
rh.is_supported_target(target)
|
||||
for rh in self._request_director.handlers.values()
|
||||
@@ -4217,7 +4226,7 @@ class YoutubeDL:
|
||||
return encoding
|
||||
|
||||
def _write_info_json(self, label, ie_result, infofn, overwrite=None):
|
||||
''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
|
||||
""" Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error """
|
||||
if overwrite is None:
|
||||
overwrite = self.params.get('overwrites', True)
|
||||
if not self.params.get('writeinfojson'):
|
||||
@@ -4240,7 +4249,7 @@ class YoutubeDL:
|
||||
return None
|
||||
|
||||
def _write_description(self, label, ie_result, descfn):
|
||||
''' Write description and returns True = written, False = skip, None = error '''
|
||||
""" Write description and returns True = written, False = skip, None = error """
|
||||
if not self.params.get('writedescription'):
|
||||
return False
|
||||
elif not descfn:
|
||||
@@ -4264,7 +4273,7 @@ class YoutubeDL:
|
||||
return True
|
||||
|
||||
def _write_subtitles(self, info_dict, filename):
|
||||
''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
|
||||
""" Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error"""
|
||||
ret = []
|
||||
subtitles = info_dict.get('requested_subtitles')
|
||||
if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
|
||||
@@ -4311,7 +4320,7 @@ class YoutubeDL:
|
||||
self.dl(sub_filename, sub_copy, subtitle=True)
|
||||
sub_info['filepath'] = sub_filename
|
||||
ret.append(sub_filename)
|
||||
except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
|
||||
except (DownloadError, ExtractorError, OSError, ValueError, *network_exceptions) as err:
|
||||
msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
|
||||
if self.params.get('ignoreerrors') is not True: # False or 'only_download'
|
||||
if not self.params.get('ignoreerrors'):
|
||||
@@ -4321,7 +4330,7 @@ class YoutubeDL:
|
||||
return ret
|
||||
|
||||
def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
|
||||
''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error '''
|
||||
""" Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error """
|
||||
write_all = self.params.get('write_all_thumbnails', False)
|
||||
thumbnails, ret = [], []
|
||||
if write_all or self.params.get('writethumbnail', False):
|
||||
@@ -4349,8 +4358,8 @@ class YoutubeDL:
|
||||
|
||||
existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
|
||||
if existing_thumb:
|
||||
self.to_screen('[info] %s is already present' % (
|
||||
thumb_display_id if multiple else f'{label} thumbnail').capitalize())
|
||||
self.to_screen('[info] {} is already present'.format((
|
||||
thumb_display_id if multiple else f'{label} thumbnail').capitalize()))
|
||||
t['filepath'] = existing_thumb
|
||||
ret.append(existing_thumb)
|
||||
else:
|
||||
|
||||
@@ -14,7 +14,7 @@ import os
|
||||
import re
|
||||
import traceback
|
||||
|
||||
from .compat import compat_os_name, compat_shlex_quote
|
||||
from .compat import compat_os_name
|
||||
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
|
||||
from .downloader.external import get_external_downloader
|
||||
from .extractor import list_extractor_classes
|
||||
@@ -58,6 +58,7 @@ from .utils import (
|
||||
read_stdin,
|
||||
render_table,
|
||||
setproctitle,
|
||||
shell_quote,
|
||||
traverse_obj,
|
||||
variadic,
|
||||
write_string,
|
||||
@@ -115,9 +116,9 @@ def print_extractor_information(opts, urls):
|
||||
ie.description(markdown=False, search_examples=_SEARCHES)
|
||||
for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False)
|
||||
elif opts.ap_list_mso:
|
||||
out = 'Supported TV Providers:\n%s\n' % render_table(
|
||||
out = 'Supported TV Providers:\n{}\n'.format(render_table(
|
||||
['mso', 'mso name'],
|
||||
[[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()])
|
||||
[[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]))
|
||||
else:
|
||||
return False
|
||||
write_string(out, out=sys.stdout)
|
||||
@@ -129,7 +130,7 @@ def set_compat_opts(opts):
|
||||
if name not in opts.compat_opts:
|
||||
return False
|
||||
opts.compat_opts.discard(name)
|
||||
opts.compat_opts.update(['*%s' % name])
|
||||
opts.compat_opts.update([f'*{name}'])
|
||||
return True
|
||||
|
||||
def set_default_compat(compat_name, opt_name, default=True, remove_compat=True):
|
||||
@@ -222,7 +223,7 @@ def validate_options(opts):
|
||||
validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval')
|
||||
|
||||
if opts.wait_for_video is not None:
|
||||
min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None])
|
||||
min_wait, max_wait, *_ = map(parse_duration, [*opts.wait_for_video.split('-', 1), None])
|
||||
validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video),
|
||||
'time range to wait for video', opts.wait_for_video)
|
||||
validate_minmax(min_wait, max_wait, 'time range to wait for video')
|
||||
@@ -264,9 +265,9 @@ def validate_options(opts):
|
||||
# Retry sleep function
|
||||
def parse_sleep_func(expr):
|
||||
NUMBER_RE = r'\d+(?:\.\d+)?'
|
||||
op, start, limit, step, *_ = tuple(re.fullmatch(
|
||||
op, start, limit, step, *_ = (*tuple(re.fullmatch(
|
||||
rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?',
|
||||
expr.strip()).groups()) + (None, None)
|
||||
expr.strip()).groups()), None, None)
|
||||
|
||||
if op == 'exp':
|
||||
return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf'))
|
||||
@@ -396,13 +397,13 @@ def validate_options(opts):
|
||||
# MetadataParser
|
||||
def metadataparser_actions(f):
|
||||
if isinstance(f, str):
|
||||
cmd = '--parse-metadata %s' % compat_shlex_quote(f)
|
||||
cmd = f'--parse-metadata {shell_quote(f)}'
|
||||
try:
|
||||
actions = [MetadataFromFieldPP.to_action(f)]
|
||||
except Exception as err:
|
||||
raise ValueError(f'{cmd} is invalid; {err}')
|
||||
else:
|
||||
cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f))
|
||||
cmd = f'--replace-in-metadata {shell_quote(f)}'
|
||||
actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(','))
|
||||
|
||||
for action in actions:
|
||||
@@ -413,7 +414,7 @@ def validate_options(opts):
|
||||
yield action
|
||||
|
||||
if opts.metafromtitle is not None:
|
||||
opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle)
|
||||
opts.parse_metadata.setdefault('pre_process', []).append(f'title:{opts.metafromtitle}')
|
||||
opts.parse_metadata = {
|
||||
k: list(itertools.chain(*map(metadataparser_actions, v)))
|
||||
for k, v in opts.parse_metadata.items()
|
||||
@@ -602,7 +603,7 @@ def get_postprocessors(opts):
|
||||
yield {
|
||||
'key': 'MetadataParser',
|
||||
'actions': actions,
|
||||
'when': when
|
||||
'when': when,
|
||||
}
|
||||
sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove
|
||||
if sponsorblock_query:
|
||||
@@ -610,19 +611,19 @@ def get_postprocessors(opts):
|
||||
'key': 'SponsorBlock',
|
||||
'categories': sponsorblock_query,
|
||||
'api': opts.sponsorblock_api,
|
||||
'when': 'after_filter'
|
||||
'when': 'after_filter',
|
||||
}
|
||||
if opts.convertsubtitles:
|
||||
yield {
|
||||
'key': 'FFmpegSubtitlesConvertor',
|
||||
'format': opts.convertsubtitles,
|
||||
'when': 'before_dl'
|
||||
'when': 'before_dl',
|
||||
}
|
||||
if opts.convertthumbnails:
|
||||
yield {
|
||||
'key': 'FFmpegThumbnailsConvertor',
|
||||
'format': opts.convertthumbnails,
|
||||
'when': 'before_dl'
|
||||
'when': 'before_dl',
|
||||
}
|
||||
if opts.extractaudio:
|
||||
yield {
|
||||
@@ -647,7 +648,7 @@ def get_postprocessors(opts):
|
||||
yield {
|
||||
'key': 'FFmpegEmbedSubtitle',
|
||||
# already_have_subtitle = True prevents the file from being deleted after embedding
|
||||
'already_have_subtitle': opts.writesubtitles and keep_subs
|
||||
'already_have_subtitle': opts.writesubtitles and keep_subs,
|
||||
}
|
||||
if not opts.writeautomaticsub and keep_subs:
|
||||
opts.writesubtitles = True
|
||||
@@ -660,7 +661,7 @@ def get_postprocessors(opts):
|
||||
'remove_sponsor_segments': opts.sponsorblock_remove,
|
||||
'remove_ranges': opts.remove_ranges,
|
||||
'sponsorblock_chapter_title': opts.sponsorblock_chapter_title,
|
||||
'force_keyframes': opts.force_keyframes_at_cuts
|
||||
'force_keyframes': opts.force_keyframes_at_cuts,
|
||||
}
|
||||
# FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
|
||||
# FFmpegExtractAudioPP as containers before conversion may not support
|
||||
@@ -694,7 +695,7 @@ def get_postprocessors(opts):
|
||||
yield {
|
||||
'key': 'EmbedThumbnail',
|
||||
# already_have_thumbnail = True prevents the file from being deleted after embedding
|
||||
'already_have_thumbnail': opts.writethumbnail
|
||||
'already_have_thumbnail': opts.writethumbnail,
|
||||
}
|
||||
if not opts.writethumbnail:
|
||||
opts.writethumbnail = True
|
||||
@@ -741,7 +742,7 @@ def parse_options(argv=None):
|
||||
print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:])
|
||||
any_getting = any(getattr(opts, k) for k in (
|
||||
'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename',
|
||||
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl'
|
||||
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl',
|
||||
))
|
||||
if opts.quiet is None:
|
||||
opts.quiet = any_getting or opts.print_json or bool(opts.forceprint)
|
||||
@@ -1002,7 +1003,7 @@ def _real_main(argv=None):
|
||||
def make_row(target, handler):
|
||||
return [
|
||||
join_nonempty(target.client.title(), target.version, delim='-') or '-',
|
||||
join_nonempty((target.os or "").title(), target.os_version, delim='-') or '-',
|
||||
join_nonempty((target.os or '').title(), target.os_version, delim='-') or '-',
|
||||
handler,
|
||||
]
|
||||
|
||||
|
||||
@@ -68,7 +68,7 @@ def pad_block(block, padding_mode):
|
||||
raise NotImplementedError(f'Padding mode {padding_mode} is not implemented')
|
||||
|
||||
if padding_mode == 'iso7816' and padding_size:
|
||||
block = block + [0x80] # NB: += mutates list
|
||||
block = [*block, 0x80] # NB: += mutates list
|
||||
padding_size -= 1
|
||||
|
||||
return block + [PADDING_BYTE[padding_mode]] * padding_size
|
||||
@@ -110,9 +110,7 @@ def aes_ecb_decrypt(data, key, iv=None):
|
||||
for i in range(block_count):
|
||||
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
|
||||
encrypted_data += aes_decrypt(block, expanded_key)
|
||||
encrypted_data = encrypted_data[:len(data)]
|
||||
|
||||
return encrypted_data
|
||||
return encrypted_data[:len(data)]
|
||||
|
||||
|
||||
def aes_ctr_decrypt(data, key, iv):
|
||||
@@ -148,9 +146,7 @@ def aes_ctr_encrypt(data, key, iv):
|
||||
|
||||
cipher_counter_block = aes_encrypt(counter_block, expanded_key)
|
||||
encrypted_data += xor(block, cipher_counter_block)
|
||||
encrypted_data = encrypted_data[:len(data)]
|
||||
|
||||
return encrypted_data
|
||||
return encrypted_data[:len(data)]
|
||||
|
||||
|
||||
def aes_cbc_decrypt(data, key, iv):
|
||||
@@ -174,9 +170,7 @@ def aes_cbc_decrypt(data, key, iv):
|
||||
decrypted_block = aes_decrypt(block, expanded_key)
|
||||
decrypted_data += xor(decrypted_block, previous_cipher_block)
|
||||
previous_cipher_block = block
|
||||
decrypted_data = decrypted_data[:len(data)]
|
||||
|
||||
return decrypted_data
|
||||
return decrypted_data[:len(data)]
|
||||
|
||||
|
||||
def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'):
|
||||
@@ -224,7 +218,7 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
|
||||
hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key))
|
||||
|
||||
if len(nonce) == 12:
|
||||
j0 = nonce + [0, 0, 0, 1]
|
||||
j0 = [*nonce, 0, 0, 0, 1]
|
||||
else:
|
||||
fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8
|
||||
ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big'))
|
||||
@@ -242,11 +236,11 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
|
||||
data
|
||||
+ [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad
|
||||
+ bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data
|
||||
+ ((len(data) * 8).to_bytes(8, 'big'))) # length of data
|
||||
+ ((len(data) * 8).to_bytes(8, 'big'))), # length of data
|
||||
)
|
||||
|
||||
if tag != aes_ctr_encrypt(s_tag, key, j0):
|
||||
raise ValueError("Mismatching authentication tag")
|
||||
raise ValueError('Mismatching authentication tag')
|
||||
|
||||
return decrypted_data
|
||||
|
||||
@@ -288,9 +282,7 @@ def aes_decrypt(data, expanded_key):
|
||||
data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV))
|
||||
data = shift_rows_inv(data)
|
||||
data = sub_bytes_inv(data)
|
||||
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
|
||||
|
||||
return data
|
||||
return xor(data, expanded_key[:BLOCK_SIZE_BYTES])
|
||||
|
||||
|
||||
def aes_decrypt_text(data, password, key_size_bytes):
|
||||
@@ -318,9 +310,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
|
||||
cipher = data[NONCE_LENGTH_BYTES:]
|
||||
|
||||
decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES))
|
||||
plaintext = intlist_to_bytes(decrypted_data)
|
||||
|
||||
return plaintext
|
||||
return intlist_to_bytes(decrypted_data)
|
||||
|
||||
|
||||
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
|
||||
@@ -428,9 +418,7 @@ def key_expansion(data):
|
||||
for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
|
||||
temp = data[-4:]
|
||||
data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
|
||||
data = data[:expanded_key_size_bytes]
|
||||
|
||||
return data
|
||||
return data[:expanded_key_size_bytes]
|
||||
|
||||
|
||||
def iter_vector(iv):
|
||||
@@ -511,7 +499,7 @@ def block_product(block_x, block_y):
|
||||
# NIST SP 800-38D, Algorithm 1
|
||||
|
||||
if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES:
|
||||
raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES)
|
||||
raise ValueError(f'Length of blocks need to be {BLOCK_SIZE_BYTES} bytes')
|
||||
|
||||
block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1)
|
||||
block_v = block_y[:]
|
||||
@@ -534,7 +522,7 @@ def ghash(subkey, data):
|
||||
# NIST SP 800-38D, Algorithm 2
|
||||
|
||||
if len(data) % BLOCK_SIZE_BYTES:
|
||||
raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES)
|
||||
raise ValueError(f'Length of data should be {BLOCK_SIZE_BYTES} bytes')
|
||||
|
||||
last_y = [0] * BLOCK_SIZE_BYTES
|
||||
for i in range(0, len(data), BLOCK_SIZE_BYTES):
|
||||
|
||||
@@ -81,10 +81,10 @@ class Cache:
|
||||
|
||||
cachedir = self._get_root_dir()
|
||||
if not any((term in cachedir) for term in ('cache', 'tmp')):
|
||||
raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
|
||||
raise Exception(f'Not removing directory {cachedir} - this does not look like a cache dir')
|
||||
|
||||
self._ydl.to_screen(
|
||||
'Removing cache dir %s .' % cachedir, skip_eol=True)
|
||||
f'Removing cache dir {cachedir} .', skip_eol=True)
|
||||
if os.path.exists(cachedir):
|
||||
self._ydl.to_screen('.', skip_eol=True)
|
||||
shutil.rmtree(cachedir)
|
||||
|
||||
@@ -35,7 +35,7 @@ from .compat_utils import passthrough_module
|
||||
from ..dependencies import brotli as compat_brotli # noqa: F401
|
||||
from ..dependencies import websockets as compat_websockets # noqa: F401
|
||||
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
|
||||
from ..networking.exceptions import HTTPError as compat_HTTPError # noqa: F401
|
||||
from ..networking.exceptions import HTTPError as compat_HTTPError
|
||||
|
||||
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
|
||||
|
||||
|
||||
@@ -7,6 +7,6 @@ passthrough_module(__name__, 'functools')
|
||||
del passthrough_module
|
||||
|
||||
try:
|
||||
cache # >= 3.9
|
||||
_ = cache # >= 3.9
|
||||
except NameError:
|
||||
cache = lru_cache(maxsize=None)
|
||||
|
||||
@@ -46,7 +46,7 @@ from .utils import (
|
||||
from .utils._utils import _YDLLogger
|
||||
from .utils.networking import normalize_url
|
||||
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
|
||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||
|
||||
|
||||
@@ -146,7 +146,7 @@ def _extract_firefox_cookies(profile, container, logger):
|
||||
identities = json.load(containers).get('identities', [])
|
||||
container_id = next((context.get('userContextId') for context in identities if container in (
|
||||
context.get('name'),
|
||||
try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group())
|
||||
try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()),
|
||||
)), None)
|
||||
if not isinstance(container_id, int):
|
||||
raise ValueError(f'could not find firefox container "{container}" in containers.json')
|
||||
@@ -219,6 +219,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
|
||||
'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
|
||||
'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
|
||||
'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
|
||||
}[browser_name]
|
||||
|
||||
elif sys.platform == 'darwin':
|
||||
@@ -230,6 +231,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
'edge': os.path.join(appdata, 'Microsoft Edge'),
|
||||
'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
|
||||
'vivaldi': os.path.join(appdata, 'Vivaldi'),
|
||||
'whale': os.path.join(appdata, 'Naver/Whale'),
|
||||
}[browser_name]
|
||||
|
||||
else:
|
||||
@@ -241,6 +243,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
'edge': os.path.join(config, 'microsoft-edge'),
|
||||
'opera': os.path.join(config, 'opera'),
|
||||
'vivaldi': os.path.join(config, 'vivaldi'),
|
||||
'whale': os.path.join(config, 'naver-whale'),
|
||||
}[browser_name]
|
||||
|
||||
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
|
||||
@@ -252,6 +255,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
|
||||
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
|
||||
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
|
||||
'whale': 'Whale',
|
||||
}[browser_name]
|
||||
|
||||
browsers_without_profiles = {'opera'}
|
||||
@@ -259,7 +263,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||
return {
|
||||
'browser_dir': browser_dir,
|
||||
'keyring_name': keyring_name,
|
||||
'supports_profiles': browser_name not in browsers_without_profiles
|
||||
'supports_profiles': browser_name not in browsers_without_profiles,
|
||||
}
|
||||
|
||||
|
||||
@@ -347,6 +351,11 @@ def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, pa
|
||||
if value is None:
|
||||
return is_encrypted, None
|
||||
|
||||
# In chrome, session cookies have expires_utc set to 0
|
||||
# In our cookie-store, cookies that do not expire should have expires set to None
|
||||
if not expires_utc:
|
||||
expires_utc = None
|
||||
|
||||
return is_encrypted, http.cookiejar.Cookie(
|
||||
version=0, name=name, value=value, port=None, port_specified=False,
|
||||
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
|
||||
@@ -817,7 +826,7 @@ def _choose_linux_keyring(logger):
|
||||
elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
|
||||
linux_keyring = _LinuxKeyring.KWALLET6
|
||||
elif desktop_environment in (
|
||||
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER
|
||||
_LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER,
|
||||
):
|
||||
linux_keyring = _LinuxKeyring.BASICTEXT
|
||||
else:
|
||||
@@ -852,7 +861,7 @@ def _get_kwallet_network_wallet(keyring, logger):
|
||||
'dbus-send', '--session', '--print-reply=literal',
|
||||
f'--dest={service_name}',
|
||||
wallet_path,
|
||||
'org.kde.KWallet.networkWallet'
|
||||
'org.kde.KWallet.networkWallet',
|
||||
], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
|
||||
|
||||
if returncode:
|
||||
@@ -882,7 +891,7 @@ def _get_kwallet_password(browser_keyring_name, keyring, logger):
|
||||
'kwallet-query',
|
||||
'--read-password', f'{browser_keyring_name} Safe Storage',
|
||||
'--folder', f'{browser_keyring_name} Keys',
|
||||
network_wallet
|
||||
network_wallet,
|
||||
], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
|
||||
|
||||
if returncode:
|
||||
@@ -922,9 +931,8 @@ def _get_gnome_keyring_password(browser_keyring_name, logger):
|
||||
for item in col.get_all_items():
|
||||
if item.get_label() == f'{browser_keyring_name} Safe Storage':
|
||||
return item.get_secret()
|
||||
else:
|
||||
logger.error('failed to read from keyring')
|
||||
return b''
|
||||
logger.error('failed to read from keyring')
|
||||
return b''
|
||||
|
||||
|
||||
def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
|
||||
@@ -1044,7 +1052,7 @@ def _decrypt_windows_dpapi(ciphertext, logger):
|
||||
None, # pvReserved: must be NULL
|
||||
None, # pPromptStruct: information about prompts to display
|
||||
0, # dwFlags
|
||||
ctypes.byref(blob_out) # pDataOut
|
||||
ctypes.byref(blob_out), # pDataOut
|
||||
)
|
||||
if not ret:
|
||||
logger.warning('failed to decrypt with DPAPI', only_once=True)
|
||||
@@ -1120,24 +1128,24 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
_LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
|
||||
|
||||
_RESERVED = {
|
||||
"expires",
|
||||
"path",
|
||||
"comment",
|
||||
"domain",
|
||||
"max-age",
|
||||
"secure",
|
||||
"httponly",
|
||||
"version",
|
||||
"samesite",
|
||||
'expires',
|
||||
'path',
|
||||
'comment',
|
||||
'domain',
|
||||
'max-age',
|
||||
'secure',
|
||||
'httponly',
|
||||
'version',
|
||||
'samesite',
|
||||
}
|
||||
|
||||
_FLAGS = {"secure", "httponly"}
|
||||
_FLAGS = {'secure', 'httponly'}
|
||||
|
||||
# Added 'bad' group to catch the remaining value
|
||||
_COOKIE_PATTERN = re.compile(r"""
|
||||
_COOKIE_PATTERN = re.compile(r'''
|
||||
\s* # Optional whitespace at start of cookie
|
||||
(?P<key> # Start of group 'key'
|
||||
[""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
|
||||
[''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter
|
||||
) # End of group 'key'
|
||||
( # Optional group: there may not be a value.
|
||||
\s*=\s* # Equal Sign
|
||||
@@ -1147,7 +1155,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
| # or
|
||||
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
|
||||
| # or
|
||||
[""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
|
||||
[''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string
|
||||
) # End of group 'val'
|
||||
| # or
|
||||
(?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
|
||||
@@ -1155,7 +1163,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
|
||||
)? # End of optional value group
|
||||
\s* # Any number of spaces.
|
||||
(\s+|;|$) # Ending either at space, semicolon, or EOS.
|
||||
""", re.ASCII | re.VERBOSE)
|
||||
''', re.ASCII | re.VERBOSE)
|
||||
|
||||
def load(self, data):
|
||||
# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
|
||||
@@ -1251,14 +1259,14 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
||||
# with no name, whereas http.cookiejar regards it as a
|
||||
# cookie with no value.
|
||||
name, value = '', name
|
||||
f.write('%s\n' % '\t'.join((
|
||||
f.write('{}\n'.format('\t'.join((
|
||||
cookie.domain,
|
||||
self._true_or_false(cookie.domain.startswith('.')),
|
||||
cookie.path,
|
||||
self._true_or_false(cookie.secure),
|
||||
str_or_none(cookie.expires, default=''),
|
||||
name, value
|
||||
)))
|
||||
name, value,
|
||||
))))
|
||||
|
||||
def save(self, filename=None, ignore_discard=True, ignore_expires=True):
|
||||
"""
|
||||
@@ -1297,10 +1305,10 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
|
||||
return line
|
||||
cookie_list = line.split('\t')
|
||||
if len(cookie_list) != self._ENTRY_LEN:
|
||||
raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
|
||||
raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}')
|
||||
cookie = self._CookieFileEntry(*cookie_list)
|
||||
if cookie.expires_at and not cookie.expires_at.isdigit():
|
||||
raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
|
||||
raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}')
|
||||
return line
|
||||
|
||||
cf = io.StringIO()
|
||||
|
||||
@@ -404,7 +404,7 @@ class FileDownloader:
|
||||
|
||||
def report_resuming_byte(self, resume_len):
|
||||
"""Report attempt to resume at given byte."""
|
||||
self.to_screen('[download] Resuming download at byte %s' % resume_len)
|
||||
self.to_screen(f'[download] Resuming download at byte {resume_len}')
|
||||
|
||||
def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
|
||||
"""Report retry"""
|
||||
|
||||
@@ -55,7 +55,7 @@ class ExternalFD(FragmentFD):
|
||||
# correct and expected termination thus all postprocessing
|
||||
# should take place
|
||||
retval = 0
|
||||
self.to_screen('[%s] Interrupted by user' % self.get_basename())
|
||||
self.to_screen(f'[{self.get_basename()}] Interrupted by user')
|
||||
finally:
|
||||
if self._cookies_tempfile:
|
||||
self.try_remove(self._cookies_tempfile)
|
||||
@@ -172,7 +172,7 @@ class ExternalFD(FragmentFD):
|
||||
decrypt_fragment = self.decrypter(info_dict)
|
||||
dest, _ = self.sanitize_open(tmpfilename, 'wb')
|
||||
for frag_index, fragment in enumerate(info_dict['fragments']):
|
||||
fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
|
||||
fragment_filename = f'{tmpfilename}-Frag{frag_index}'
|
||||
try:
|
||||
src, _ = self.sanitize_open(fragment_filename, 'rb')
|
||||
except OSError as err:
|
||||
@@ -186,7 +186,7 @@ class ExternalFD(FragmentFD):
|
||||
if not self.params.get('keep_fragments', False):
|
||||
self.try_remove(encodeFilename(fragment_filename))
|
||||
dest.close()
|
||||
self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename))
|
||||
self.try_remove(encodeFilename(f'{tmpfilename}.frag.urls'))
|
||||
return 0
|
||||
|
||||
def _call_process(self, cmd, info_dict):
|
||||
@@ -336,11 +336,11 @@ class Aria2cFD(ExternalFD):
|
||||
|
||||
if 'fragments' in info_dict:
|
||||
cmd += ['--uri-selector=inorder']
|
||||
url_list_file = '%s.frag.urls' % tmpfilename
|
||||
url_list_file = f'{tmpfilename}.frag.urls'
|
||||
url_list = []
|
||||
for frag_index, fragment in enumerate(info_dict['fragments']):
|
||||
fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
|
||||
url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
|
||||
fragment_filename = f'{os.path.basename(tmpfilename)}-Frag{frag_index}'
|
||||
url_list.append('{}\n\tout={}'.format(fragment['url'], self._aria2c_filename(fragment_filename)))
|
||||
stream, _ = self.sanitize_open(url_list_file, 'wb')
|
||||
stream.write('\n'.join(url_list).encode())
|
||||
stream.close()
|
||||
@@ -357,7 +357,7 @@ class Aria2cFD(ExternalFD):
|
||||
'id': sanitycheck,
|
||||
'method': method,
|
||||
'params': [f'token:{rpc_secret}', *params],
|
||||
}).encode('utf-8')
|
||||
}).encode()
|
||||
request = Request(
|
||||
f'http://localhost:{rpc_port}/jsonrpc',
|
||||
data=d, headers={
|
||||
@@ -416,7 +416,7 @@ class Aria2cFD(ExternalFD):
|
||||
'total_bytes_estimate': total,
|
||||
'eta': (total - downloaded) / (speed or 1),
|
||||
'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None,
|
||||
'elapsed': time.time() - started
|
||||
'elapsed': time.time() - started,
|
||||
})
|
||||
self._hook_progress(status, info_dict)
|
||||
|
||||
@@ -509,12 +509,12 @@ class FFmpegFD(ExternalFD):
|
||||
proxy = self.params.get('proxy')
|
||||
if proxy:
|
||||
if not re.match(r'^[\da-zA-Z]+://', proxy):
|
||||
proxy = 'http://%s' % proxy
|
||||
proxy = f'http://{proxy}'
|
||||
|
||||
if proxy.startswith('socks'):
|
||||
self.report_warning(
|
||||
'%s does not support SOCKS proxies. Downloading is likely to fail. '
|
||||
'Consider adding --hls-prefer-native to your command.' % self.get_basename())
|
||||
f'{self.get_basename()} does not support SOCKS proxies. Downloading is likely to fail. '
|
||||
'Consider adding --hls-prefer-native to your command.')
|
||||
|
||||
# Since December 2015 ffmpeg supports -http_proxy option (see
|
||||
# http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
|
||||
@@ -575,7 +575,7 @@ class FFmpegFD(ExternalFD):
|
||||
if end_time:
|
||||
args += ['-t', str(end_time - start_time)]
|
||||
|
||||
args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']]
|
||||
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']]
|
||||
|
||||
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
|
||||
args += ['-c', 'copy']
|
||||
|
||||
@@ -67,12 +67,12 @@ class FlvReader(io.BytesIO):
|
||||
self.read_bytes(3)
|
||||
quality_entry_count = self.read_unsigned_char()
|
||||
# QualityEntryCount
|
||||
for i in range(quality_entry_count):
|
||||
for _ in range(quality_entry_count):
|
||||
self.read_string()
|
||||
|
||||
segment_run_count = self.read_unsigned_int()
|
||||
segments = []
|
||||
for i in range(segment_run_count):
|
||||
for _ in range(segment_run_count):
|
||||
first_segment = self.read_unsigned_int()
|
||||
fragments_per_segment = self.read_unsigned_int()
|
||||
segments.append((first_segment, fragments_per_segment))
|
||||
@@ -91,12 +91,12 @@ class FlvReader(io.BytesIO):
|
||||
|
||||
quality_entry_count = self.read_unsigned_char()
|
||||
# QualitySegmentUrlModifiers
|
||||
for i in range(quality_entry_count):
|
||||
for _ in range(quality_entry_count):
|
||||
self.read_string()
|
||||
|
||||
fragments_count = self.read_unsigned_int()
|
||||
fragments = []
|
||||
for i in range(fragments_count):
|
||||
for _ in range(fragments_count):
|
||||
first = self.read_unsigned_int()
|
||||
first_ts = self.read_unsigned_long_long()
|
||||
duration = self.read_unsigned_int()
|
||||
@@ -135,11 +135,11 @@ class FlvReader(io.BytesIO):
|
||||
self.read_string() # MovieIdentifier
|
||||
server_count = self.read_unsigned_char()
|
||||
# ServerEntryTable
|
||||
for i in range(server_count):
|
||||
for _ in range(server_count):
|
||||
self.read_string()
|
||||
quality_count = self.read_unsigned_char()
|
||||
# QualityEntryTable
|
||||
for i in range(quality_count):
|
||||
for _ in range(quality_count):
|
||||
self.read_string()
|
||||
# DrmData
|
||||
self.read_string()
|
||||
@@ -148,14 +148,14 @@ class FlvReader(io.BytesIO):
|
||||
|
||||
segments_count = self.read_unsigned_char()
|
||||
segments = []
|
||||
for i in range(segments_count):
|
||||
for _ in range(segments_count):
|
||||
box_size, box_type, box_data = self.read_box_info()
|
||||
assert box_type == b'asrt'
|
||||
segment = FlvReader(box_data).read_asrt()
|
||||
segments.append(segment)
|
||||
fragments_run_count = self.read_unsigned_char()
|
||||
fragments = []
|
||||
for i in range(fragments_run_count):
|
||||
for _ in range(fragments_run_count):
|
||||
box_size, box_type, box_data = self.read_box_info()
|
||||
assert box_type == b'afrt'
|
||||
fragments.append(FlvReader(box_data).read_afrt())
|
||||
@@ -309,7 +309,7 @@ class F4mFD(FragmentFD):
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
requested_bitrate = info_dict.get('tbr')
|
||||
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
|
||||
self.to_screen(f'[{self.FD_NAME}] Downloading f4m manifest')
|
||||
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.url
|
||||
@@ -326,8 +326,8 @@ class F4mFD(FragmentFD):
|
||||
formats = sorted(formats, key=lambda f: f[0])
|
||||
rate, media = formats[-1]
|
||||
else:
|
||||
rate, media = list(filter(
|
||||
lambda f: int(f[0]) == requested_bitrate, formats))[0]
|
||||
rate, media = next(filter(
|
||||
lambda f: int(f[0]) == requested_bitrate, formats))
|
||||
|
||||
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
|
||||
man_base_url = get_base_url(doc) or man_url
|
||||
|
||||
@@ -199,7 +199,7 @@ class FragmentFD(FileDownloader):
|
||||
'.ytdl file is corrupt' if is_corrupt else
|
||||
'Inconsistent state of incomplete fragment download')
|
||||
self.report_warning(
|
||||
'%s. Restarting from the beginning ...' % message)
|
||||
f'{message}. Restarting from the beginning ...')
|
||||
ctx['fragment_index'] = resume_len = 0
|
||||
if 'ytdl_corrupt' in ctx:
|
||||
del ctx['ytdl_corrupt']
|
||||
@@ -366,10 +366,10 @@ class FragmentFD(FileDownloader):
|
||||
return decrypt_fragment
|
||||
|
||||
def download_and_append_fragments_multiple(self, *args, **kwargs):
|
||||
'''
|
||||
"""
|
||||
@params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ...
|
||||
all args must be either tuple or list
|
||||
'''
|
||||
"""
|
||||
interrupt_trigger = [True]
|
||||
max_progress = len(args)
|
||||
if max_progress == 1:
|
||||
@@ -424,7 +424,7 @@ class FragmentFD(FileDownloader):
|
||||
finally:
|
||||
tpe.shutdown(wait=True)
|
||||
if not interrupt_trigger[0] and not is_live:
|
||||
raise KeyboardInterrupt()
|
||||
raise KeyboardInterrupt
|
||||
# we expect the user wants to stop and DO WANT the preceding postprocessors to run;
|
||||
# so returning a intermediate result here instead of KeyboardInterrupt on live
|
||||
return result
|
||||
|
||||
@@ -72,7 +72,7 @@ class HlsFD(FragmentFD):
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
man_url = info_dict['url']
|
||||
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
|
||||
self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest')
|
||||
|
||||
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
|
||||
man_url = urlh.url
|
||||
@@ -228,7 +228,7 @@ class HlsFD(FragmentFD):
|
||||
'url': frag_url,
|
||||
'decrypt_info': decrypt_info,
|
||||
'byte_range': byte_range,
|
||||
'media_sequence': media_sequence
|
||||
'media_sequence': media_sequence,
|
||||
})
|
||||
media_sequence += 1
|
||||
|
||||
@@ -350,9 +350,8 @@ class HlsFD(FragmentFD):
|
||||
# XXX: this should probably be silent as well
|
||||
# or verify that all segments contain the same data
|
||||
self.report_warning(bug_reports_message(
|
||||
'Discarding a %s block found in the middle of the stream; '
|
||||
'if the subtitles display incorrectly,'
|
||||
% (type(block).__name__)))
|
||||
f'Discarding a {type(block).__name__} block found in the middle of the stream; '
|
||||
'if the subtitles display incorrectly,'))
|
||||
continue
|
||||
block.write_into(output)
|
||||
|
||||
|
||||
@@ -176,7 +176,7 @@ class HttpFD(FileDownloader):
|
||||
'downloaded_bytes': ctx.resume_len,
|
||||
'total_bytes': ctx.resume_len,
|
||||
}, info_dict)
|
||||
raise SucceedDownload()
|
||||
raise SucceedDownload
|
||||
else:
|
||||
# The length does not match, we start the download over
|
||||
self.report_unable_to_resume()
|
||||
@@ -194,7 +194,7 @@ class HttpFD(FileDownloader):
|
||||
|
||||
def close_stream():
|
||||
if ctx.stream is not None:
|
||||
if not ctx.tmpfilename == '-':
|
||||
if ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
ctx.stream = None
|
||||
|
||||
@@ -268,20 +268,20 @@ class HttpFD(FileDownloader):
|
||||
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
|
||||
self.report_destination(ctx.filename)
|
||||
except OSError as err:
|
||||
self.report_error('unable to open for writing: %s' % str(err))
|
||||
self.report_error(f'unable to open for writing: {err}')
|
||||
return False
|
||||
|
||||
if self.params.get('xattr_set_filesize', False) and data_len is not None:
|
||||
try:
|
||||
write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode())
|
||||
except (XAttrUnavailableError, XAttrMetadataError) as err:
|
||||
self.report_error('unable to set filesize xattr: %s' % str(err))
|
||||
self.report_error(f'unable to set filesize xattr: {err}')
|
||||
|
||||
try:
|
||||
ctx.stream.write(data_block)
|
||||
except OSError as err:
|
||||
self.to_stderr('\n')
|
||||
self.report_error('unable to write data: %s' % str(err))
|
||||
self.report_error(f'unable to write data: {err}')
|
||||
return False
|
||||
|
||||
# Apply rate limit
|
||||
@@ -327,7 +327,7 @@ class HttpFD(FileDownloader):
|
||||
elif now - ctx.throttle_start > 3:
|
||||
if ctx.stream is not None and ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
raise ThrottledDownload()
|
||||
raise ThrottledDownload
|
||||
elif speed:
|
||||
ctx.throttle_start = None
|
||||
|
||||
@@ -338,7 +338,7 @@ class HttpFD(FileDownloader):
|
||||
|
||||
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
|
||||
ctx.resume_len = byte_counter
|
||||
raise NextFragment()
|
||||
raise NextFragment
|
||||
|
||||
if ctx.tmpfilename != '-':
|
||||
ctx.stream.close()
|
||||
|
||||
@@ -251,7 +251,7 @@ class IsmFD(FragmentFD):
|
||||
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
|
||||
|
||||
frag_index = 0
|
||||
for i, segment in enumerate(segments):
|
||||
for segment in segments:
|
||||
frag_index += 1
|
||||
if frag_index <= ctx['fragment_index']:
|
||||
continue
|
||||
|
||||
@@ -10,7 +10,7 @@ from ..version import __version__ as YT_DLP_VERSION
|
||||
|
||||
|
||||
class MhtmlFD(FragmentFD):
|
||||
_STYLESHEET = """\
|
||||
_STYLESHEET = '''\
|
||||
html, body {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
@@ -45,7 +45,7 @@ body > figure > img {
|
||||
max-width: 100%;
|
||||
max-height: calc(100vh - 5em);
|
||||
}
|
||||
"""
|
||||
'''
|
||||
_STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET)
|
||||
_STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET)
|
||||
|
||||
@@ -57,24 +57,19 @@ body > figure > img {
|
||||
)).decode('us-ascii') + '?='
|
||||
|
||||
def _gen_cid(self, i, fragment, frag_boundary):
|
||||
return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary)
|
||||
return f'{i}.{frag_boundary}@yt-dlp.github.io.invalid'
|
||||
|
||||
def _gen_stub(self, *, fragments, frag_boundary, title):
|
||||
output = io.StringIO()
|
||||
|
||||
output.write((
|
||||
output.write(
|
||||
'<!DOCTYPE html>'
|
||||
'<html>'
|
||||
'<head>'
|
||||
'' '<meta name="generator" content="yt-dlp {version}">'
|
||||
'' '<title>{title}</title>'
|
||||
'' '<style>{styles}</style>'
|
||||
'<body>'
|
||||
).format(
|
||||
version=escapeHTML(YT_DLP_VERSION),
|
||||
styles=self._STYLESHEET,
|
||||
title=escapeHTML(title)
|
||||
))
|
||||
f'<meta name="generator" content="yt-dlp {escapeHTML(YT_DLP_VERSION)}">'
|
||||
f'<title>{escapeHTML(title)}</title>'
|
||||
f'<style>{self._STYLESHEET}</style>'
|
||||
'<body>')
|
||||
|
||||
t0 = 0
|
||||
for i, frag in enumerate(fragments):
|
||||
@@ -87,15 +82,12 @@ body > figure > img {
|
||||
num=i + 1,
|
||||
t0=srt_subtitles_timecode(t0),
|
||||
t1=srt_subtitles_timecode(t1),
|
||||
duration=formatSeconds(frag['duration'], msec=True)
|
||||
duration=formatSeconds(frag['duration'], msec=True),
|
||||
))
|
||||
except (KeyError, ValueError, TypeError):
|
||||
t1 = None
|
||||
output.write((
|
||||
'<figcaption>Slide #{num}</figcaption>'
|
||||
).format(num=i + 1))
|
||||
output.write('<img src="cid:{cid}">'.format(
|
||||
cid=self._gen_cid(i, frag, frag_boundary)))
|
||||
output.write(f'<figcaption>Slide #{i + 1}</figcaption>')
|
||||
output.write(f'<img src="cid:{self._gen_cid(i, frag, frag_boundary)}">')
|
||||
output.write('</figure>')
|
||||
t0 = t1
|
||||
|
||||
@@ -126,31 +118,24 @@ body > figure > img {
|
||||
stub = self._gen_stub(
|
||||
fragments=fragments,
|
||||
frag_boundary=frag_boundary,
|
||||
title=title
|
||||
title=title,
|
||||
)
|
||||
|
||||
ctx['dest_stream'].write((
|
||||
'MIME-Version: 1.0\r\n'
|
||||
'From: <nowhere@yt-dlp.github.io.invalid>\r\n'
|
||||
'To: <nowhere@yt-dlp.github.io.invalid>\r\n'
|
||||
'Subject: {title}\r\n'
|
||||
f'Subject: {self._escape_mime(title)}\r\n'
|
||||
'Content-type: multipart/related; '
|
||||
'' 'boundary="{boundary}"; '
|
||||
'' 'type="text/html"\r\n'
|
||||
'X.yt-dlp.Origin: {origin}\r\n'
|
||||
f'boundary="{frag_boundary}"; '
|
||||
'type="text/html"\r\n'
|
||||
f'X.yt-dlp.Origin: {origin}\r\n'
|
||||
'\r\n'
|
||||
'--{boundary}\r\n'
|
||||
f'--{frag_boundary}\r\n'
|
||||
'Content-Type: text/html; charset=utf-8\r\n'
|
||||
'Content-Length: {length}\r\n'
|
||||
f'Content-Length: {len(stub)}\r\n'
|
||||
'\r\n'
|
||||
'{stub}\r\n'
|
||||
).format(
|
||||
origin=origin,
|
||||
boundary=frag_boundary,
|
||||
length=len(stub),
|
||||
title=self._escape_mime(title),
|
||||
stub=stub
|
||||
).encode())
|
||||
f'{stub}\r\n').encode())
|
||||
extra_state['header_written'] = True
|
||||
|
||||
for i, fragment in enumerate(fragments):
|
||||
|
||||
@@ -15,7 +15,7 @@ class NiconicoDmcFD(FileDownloader):
|
||||
def real_download(self, filename, info_dict):
|
||||
from ..extractor.niconico import NiconicoIE
|
||||
|
||||
self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
|
||||
self.to_screen(f'[{self.FD_NAME}] Downloading from DMC')
|
||||
ie = NiconicoIE(self.ydl)
|
||||
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
|
||||
|
||||
@@ -34,7 +34,7 @@ class NiconicoDmcFD(FileDownloader):
|
||||
try:
|
||||
self.ydl.urlopen(request).read()
|
||||
except Exception:
|
||||
self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
|
||||
self.to_screen(f'[{self.FD_NAME}] Heartbeat failed')
|
||||
|
||||
with heartbeat_lock:
|
||||
if not download_complete:
|
||||
@@ -85,14 +85,14 @@ class NiconicoLiveFD(FileDownloader):
|
||||
'quality': live_quality,
|
||||
'protocol': 'hls+fmp4',
|
||||
'latency': live_latency,
|
||||
'chasePlay': False
|
||||
'chasePlay': False,
|
||||
},
|
||||
'room': {
|
||||
'protocol': 'webSocket',
|
||||
'commentable': True
|
||||
'commentable': True,
|
||||
},
|
||||
'reconnect': True,
|
||||
}
|
||||
},
|
||||
}))
|
||||
else:
|
||||
ws = ws_extractor
|
||||
@@ -118,7 +118,7 @@ class NiconicoLiveFD(FileDownloader):
|
||||
elif self.ydl.params.get('verbose', False):
|
||||
if len(recv) > 100:
|
||||
recv = recv[:100] + '...'
|
||||
self.to_screen('[debug] Server said: %s' % recv)
|
||||
self.to_screen(f'[debug] Server said: {recv}')
|
||||
|
||||
def ws_main():
|
||||
reconnect = False
|
||||
@@ -128,7 +128,7 @@ class NiconicoLiveFD(FileDownloader):
|
||||
if ret is True:
|
||||
return
|
||||
except BaseException as e:
|
||||
self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e)))
|
||||
self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e)))
|
||||
time.sleep(10)
|
||||
continue
|
||||
finally:
|
||||
|
||||
@@ -180,9 +180,9 @@ class RtmpFD(FileDownloader):
|
||||
|
||||
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
|
||||
prevsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
|
||||
self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes')
|
||||
time.sleep(5.0) # This seems to be needed
|
||||
args = basic_args + ['--resume']
|
||||
args = [*basic_args, '--resume']
|
||||
if retval == RD_FAILED:
|
||||
args += ['--skip', '1']
|
||||
args = [encodeArgument(a) for a in args]
|
||||
@@ -197,7 +197,7 @@ class RtmpFD(FileDownloader):
|
||||
break
|
||||
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
|
||||
fsize = os.path.getsize(encodeFilename(tmpfilename))
|
||||
self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
|
||||
self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes')
|
||||
self.try_rename(tmpfilename, filename)
|
||||
self._hook_progress({
|
||||
'downloaded_bytes': fsize,
|
||||
|
||||
@@ -18,7 +18,7 @@ class YoutubeLiveChatFD(FragmentFD):
|
||||
|
||||
def real_download(self, filename, info_dict):
|
||||
video_id = info_dict['video_id']
|
||||
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
|
||||
self.to_screen(f'[{self.FD_NAME}] Downloading live chat')
|
||||
if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
|
||||
self.report_warning('Live chat download runs until the livestream ends. '
|
||||
'If you wish to download the video simultaneously, run a separate yt-dlp instance')
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -4,12 +4,11 @@ import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
ExtractorError,
|
||||
js_to_json,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
@@ -67,7 +66,7 @@ class ABCIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'WWI Centenary',
|
||||
'description': 'md5:c2379ec0ca84072e86b446e536954546',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074',
|
||||
'info_dict': {
|
||||
@@ -75,7 +74,7 @@ class ABCIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia',
|
||||
'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476',
|
||||
'info_dict': {
|
||||
@@ -86,7 +85,7 @@ class ABCIE(InfoExtractor):
|
||||
'upload_date': '20200813',
|
||||
'uploader': 'Behind the News',
|
||||
'uploader_id': 'behindthenews',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540',
|
||||
'info_dict': {
|
||||
@@ -95,7 +94,7 @@ class ABCIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.',
|
||||
'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -126,7 +125,7 @@ class ABCIE(InfoExtractor):
|
||||
if mobj is None:
|
||||
expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
|
||||
if expired:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {expired}', expected=True)
|
||||
raise ExtractorError('Unable to extract video urls')
|
||||
|
||||
urls_info = self._parse_json(
|
||||
@@ -164,7 +163,7 @@ class ABCIE(InfoExtractor):
|
||||
'height': height,
|
||||
'tbr': bitrate,
|
||||
'filesize': int_or_none(url_info.get('filesize')),
|
||||
'format_id': format_id
|
||||
'format_id': format_id,
|
||||
})
|
||||
|
||||
return {
|
||||
@@ -288,13 +287,12 @@ class ABCIViewIE(InfoExtractor):
|
||||
stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
|
||||
|
||||
house_number = video_params.get('episodeHouseNumber') or video_id
|
||||
path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
|
||||
int(time.time()), house_number)
|
||||
path = f'/auth/hls/sign?ts={int(time.time())}&hn={house_number}&d=android-tablet'
|
||||
sig = hmac.new(
|
||||
b'android.content.res.Resources',
|
||||
path.encode('utf-8'), hashlib.sha256).hexdigest()
|
||||
path.encode(), hashlib.sha256).hexdigest()
|
||||
token = self._download_webpage(
|
||||
'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
|
||||
f'http://iview.abc.net.au{path}&sig={sig}', video_id)
|
||||
|
||||
def tokenize_url(url, token):
|
||||
return update_url_query(url, {
|
||||
@@ -303,7 +301,7 @@ class ABCIViewIE(InfoExtractor):
|
||||
|
||||
for sd in ('1080', '720', 'sd', 'sd-low'):
|
||||
sd_url = try_get(
|
||||
stream, lambda x: x['streams']['hls'][sd], compat_str)
|
||||
stream, lambda x: x['streams']['hls'][sd], str)
|
||||
if not sd_url:
|
||||
continue
|
||||
formats = self._extract_m3u8_formats(
|
||||
@@ -358,7 +356,7 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
'description': 'md5:93119346c24a7c322d446d8eece430ff',
|
||||
'series': 'Upper Middle Bogan',
|
||||
'season': 'Series 1',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$'
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}, {
|
||||
@@ -386,7 +384,7 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
'description': 'Satirist Mark Humphries brings his unique perspective on current political events for 7.30.',
|
||||
'series': '7.30 Mark Humphries Satire',
|
||||
'season': 'Episodes',
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$'
|
||||
'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$',
|
||||
},
|
||||
'playlist_count': 15,
|
||||
}]
|
||||
@@ -398,7 +396,7 @@ class ABCIViewShowSeriesIE(InfoExtractor):
|
||||
r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;',
|
||||
webpage, 'initial state')
|
||||
video_data = self._parse_json(
|
||||
unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id)
|
||||
unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id)
|
||||
video_data = video_data['route']['pageData']['_embedded']
|
||||
|
||||
highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl'])
|
||||
|
||||
@@ -58,7 +58,7 @@ class AbcNewsVideoIE(AMPIE):
|
||||
display_id = mobj.group('display_id')
|
||||
video_id = mobj.group('id')
|
||||
info_dict = self._extract_feed_info(
|
||||
'http://abcnews.go.com/video/itemfeed?id=%s' % video_id)
|
||||
f'http://abcnews.go.com/video/itemfeed?id={video_id}')
|
||||
info_dict.update({
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
int_or_none,
|
||||
@@ -57,11 +56,11 @@ class ABCOTVSIE(InfoExtractor):
|
||||
data = self._download_json(
|
||||
'https://api.abcotvs.com/v2/content', display_id, query={
|
||||
'id': video_id,
|
||||
'key': 'otv.web.%s.story' % station,
|
||||
'key': f'otv.web.{station}.story',
|
||||
'station': station,
|
||||
})['data']
|
||||
video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data
|
||||
video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id))
|
||||
video_id = str(dict_get(video, ('id', 'publishedKey'), video_id))
|
||||
title = video.get('title') or video['linkText']
|
||||
|
||||
formats = []
|
||||
|
||||
@@ -12,20 +12,21 @@ import urllib.parse
|
||||
import urllib.request
|
||||
import urllib.response
|
||||
import uuid
|
||||
from ..utils.networking import clean_proxies
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_ecb_decrypt
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
OnDemandPagedList,
|
||||
bytes_to_intlist,
|
||||
decode_base_n,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
OnDemandPagedList,
|
||||
time_seconds,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.networking import clean_proxies
|
||||
|
||||
|
||||
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
|
||||
@@ -65,8 +66,8 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
query={'t': media_token},
|
||||
data=json.dumps({
|
||||
'kv': 'a',
|
||||
'lt': ticket
|
||||
}).encode('utf-8'),
|
||||
'lt': ticket,
|
||||
}).encode(),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
@@ -76,7 +77,7 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
|
||||
|
||||
h = hmac.new(
|
||||
binascii.unhexlify(self.HKEY),
|
||||
(license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'),
|
||||
(license_response['cid'] + self.ie._DEVICE_ID).encode(),
|
||||
digestmod=hashlib.sha256)
|
||||
enckey = bytes_to_intlist(h.digest())
|
||||
|
||||
@@ -102,11 +103,11 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
|
||||
@classmethod
|
||||
def _generate_aks(cls, deviceid):
|
||||
deviceid = deviceid.encode('utf-8')
|
||||
deviceid = deviceid.encode()
|
||||
# add 1 hour and then drop minute and secs
|
||||
ts_1hour = int((time_seconds() // 3600 + 1) * 3600)
|
||||
time_struct = time.gmtime(ts_1hour)
|
||||
ts_1hour_str = str(ts_1hour).encode('utf-8')
|
||||
ts_1hour_str = str(ts_1hour).encode()
|
||||
|
||||
tmp = None
|
||||
|
||||
@@ -118,7 +119,7 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
|
||||
def mix_tmp(count):
|
||||
nonlocal tmp
|
||||
for i in range(count):
|
||||
for _ in range(count):
|
||||
mix_once(tmp)
|
||||
|
||||
def mix_twist(nonce):
|
||||
@@ -159,7 +160,7 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
data=json.dumps({
|
||||
'deviceId': self._DEVICE_ID,
|
||||
'applicationKeySecret': aks,
|
||||
}).encode('utf-8'),
|
||||
}).encode(),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
@@ -179,7 +180,7 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
'osLang': 'ja_JP',
|
||||
'osTimezone': 'Asia/Tokyo',
|
||||
'appId': 'tv.abema',
|
||||
'appVersion': '3.27.1'
|
||||
'appVersion': '3.27.1',
|
||||
}, headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
})['token']
|
||||
@@ -201,8 +202,8 @@ class AbemaTVBaseIE(InfoExtractor):
|
||||
f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in',
|
||||
data=json.dumps({
|
||||
method: username,
|
||||
'password': password
|
||||
}).encode('utf-8'), headers={
|
||||
'password': password,
|
||||
}).encode(), headers={
|
||||
'Authorization': f'bearer {self._get_device_token()}',
|
||||
'Origin': 'https://abema.tv',
|
||||
'Referer': 'https://abema.tv/',
|
||||
@@ -343,7 +344,7 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||
|
||||
description = self._html_search_regex(
|
||||
(r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div',
|
||||
r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',),
|
||||
r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div'),
|
||||
webpage, 'description', default=None, group=1)
|
||||
if not description:
|
||||
og_desc = self._html_search_meta(
|
||||
|
||||
@@ -67,7 +67,7 @@ class ACastIE(ACastBaseIE):
|
||||
'display_id': '2.raggarmordet-rosterurdetforflutna',
|
||||
'season_number': 4,
|
||||
'season': 'Season 4',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
||||
'only_matching': True,
|
||||
@@ -93,13 +93,13 @@ class ACastIE(ACastBaseIE):
|
||||
'series': 'Democracy Sausage with Mark Kenny',
|
||||
'timestamp': 1684826362,
|
||||
'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel, display_id = self._match_valid_url(url).groups()
|
||||
episode = self._call_api(
|
||||
'%s/episodes/%s' % (channel, display_id),
|
||||
f'{channel}/episodes/{display_id}',
|
||||
display_id, {'showInfo': 'true'})
|
||||
return self._extract_episode(
|
||||
episode, self._extract_show_info(episode.get('show') or {}))
|
||||
@@ -130,7 +130,7 @@ class ACastChannelIE(ACastBaseIE):
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
|
||||
return False if ACastIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_slug = self._match_id(url)
|
||||
|
||||
@@ -3,10 +3,10 @@ from ..utils import (
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
parse_codecs,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ class AcFunVideoBaseIE(InfoExtractor):
|
||||
'width': int_or_none(video.get('width')),
|
||||
'height': int_or_none(video.get('height')),
|
||||
'tbr': float_or_none(video.get('avgBitrate')),
|
||||
**parse_codecs(video.get('codecs', ''))
|
||||
**parse_codecs(video.get('codecs', '')),
|
||||
})
|
||||
|
||||
return {
|
||||
@@ -77,7 +77,7 @@ class AcFunVideoIE(AcFunVideoBaseIE):
|
||||
'comment_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg)',
|
||||
'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -7,21 +7,20 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
|
||||
from ..compat import compat_b64decode
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
ass_subtitles_timecode,
|
||||
bytes_to_intlist,
|
||||
bytes_to_long,
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
intlist_to_bytes,
|
||||
long_to_bytes,
|
||||
parse_iso8601,
|
||||
pkcs1pad,
|
||||
strip_or_none,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
@@ -111,9 +110,9 @@ class ADNIE(ADNBaseIE):
|
||||
|
||||
# http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
|
||||
compat_b64decode(enc_subtitles[24:]),
|
||||
base64.b64decode(enc_subtitles[24:]),
|
||||
binascii.unhexlify(self._K + '7fac1178830cfe0c'),
|
||||
compat_b64decode(enc_subtitles[:24])))
|
||||
base64.b64decode(enc_subtitles[:24])))
|
||||
subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False)
|
||||
if not subtitles_json:
|
||||
return None
|
||||
@@ -136,7 +135,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
if start is None or end is None or text is None:
|
||||
continue
|
||||
alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0)
|
||||
ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % (
|
||||
ssa += os.linesep + 'Dialogue: Marked=0,{},{},Default,,0,0,0,,{}{}'.format(
|
||||
ass_subtitles_timecode(start),
|
||||
ass_subtitles_timecode(end),
|
||||
'{\\a%d}' % alignment if alignment != 2 else '',
|
||||
@@ -178,7 +177,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
lang, video_id = self._match_valid_url(url).group('lang', 'id')
|
||||
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
|
||||
video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/'
|
||||
player = self._download_json(
|
||||
video_base_url + 'configuration', video_id,
|
||||
'Downloading player config JSON metadata',
|
||||
@@ -219,12 +218,12 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||
'X-Player-Token': authorization,
|
||||
'X-Target-Distribution': lang,
|
||||
**self._HEADERS
|
||||
**self._HEADERS,
|
||||
}, query={
|
||||
'freeWithAds': 'true',
|
||||
'adaptive': 'false',
|
||||
'withMetadata': 'true',
|
||||
'source': 'Web'
|
||||
'source': 'Web',
|
||||
})
|
||||
break
|
||||
except ExtractorError as e:
|
||||
@@ -256,7 +255,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
for quality, load_balancer_url in qualities.items():
|
||||
load_balancer_data = self._download_json(
|
||||
load_balancer_url, video_id,
|
||||
'Downloading %s %s JSON metadata' % (format_id, quality),
|
||||
f'Downloading {format_id} {quality} JSON metadata',
|
||||
fatal=False) or {}
|
||||
m3u8_url = load_balancer_data.get('location')
|
||||
if not m3u8_url:
|
||||
@@ -276,7 +275,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||
self.raise_login_required('This video requires a subscription', method='password')
|
||||
|
||||
video = (self._download_json(
|
||||
self._API_BASE_URL + 'video/%s' % video_id, video_id,
|
||||
self._API_BASE_URL + f'video/{video_id}', video_id,
|
||||
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
|
||||
show = video.get('show') or {}
|
||||
|
||||
@@ -320,7 +319,7 @@ class ADNSeasonIE(ADNBaseIE):
|
||||
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
|
||||
'Downloading episode list', headers={
|
||||
'X-Target-Distribution': lang,
|
||||
**self._HEADERS
|
||||
**self._HEADERS,
|
||||
}, query={
|
||||
'order': 'asc',
|
||||
'limit': '-1',
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
|
||||
class AdobeConnectIE(InfoExtractor):
|
||||
@@ -12,13 +10,13 @@ class AdobeConnectIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_extract_title(webpage)
|
||||
qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
|
||||
qs = urllib.parse.parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
|
||||
is_live = qs.get('isLive', ['false'])[0] == 'true'
|
||||
formats = []
|
||||
for con_string in qs['conStrings'][0].split(','):
|
||||
formats.append({
|
||||
'format_id': con_string.split('://')[0],
|
||||
'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
|
||||
'app': urllib.parse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
|
||||
'ext': 'flv',
|
||||
'play_path': 'mp4:' + qs['streamName'][0],
|
||||
'rtmp_conn': 'S:' + qs['ticket'][0],
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -2,13 +2,12 @@ import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ISO639Utils,
|
||||
OnDemandPagedList,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
ISO639Utils,
|
||||
join_nonempty,
|
||||
OnDemandPagedList,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
@@ -36,7 +35,7 @@ class AdobeTVBaseIE(InfoExtractor):
|
||||
return subtitles
|
||||
|
||||
def _parse_video_data(self, video_data):
|
||||
video_id = compat_str(video_data['id'])
|
||||
video_id = str(video_data['id'])
|
||||
title = video_data['title']
|
||||
|
||||
s3_extracted = False
|
||||
@@ -151,7 +150,7 @@ class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
|
||||
page += 1
|
||||
query['page'] = page
|
||||
for element_data in self._call_api(
|
||||
self._RESOURCE, display_id, query, 'Download Page %d' % page):
|
||||
self._RESOURCE, display_id, query, f'Download Page {page}'):
|
||||
yield self._process_data(element_data)
|
||||
|
||||
def _extract_playlist_entries(self, display_id, query):
|
||||
|
||||
@@ -91,7 +91,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
getShowBySlug(slug:"%s") {
|
||||
%%s
|
||||
}
|
||||
}''' % show_path
|
||||
}''' % show_path # noqa: UP031
|
||||
if episode_path:
|
||||
query = query % '''title
|
||||
getVideoBySlug(slug:"%s") {
|
||||
@@ -128,7 +128,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
episode_title = title = video_data['title']
|
||||
series = show_data.get('title')
|
||||
if series:
|
||||
title = '%s - %s' % (series, title)
|
||||
title = f'{series} - {title}'
|
||||
info = {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
@@ -191,7 +191,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
if not slug:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
'http://adultswim.com/videos/%s/%s' % (show_path, slug),
|
||||
f'http://adultswim.com/videos/{show_path}/{slug}',
|
||||
'AdultSwim', video.get('_id')))
|
||||
return self.playlist_result(
|
||||
entries, show_path, show_data.get('title'),
|
||||
|
||||
@@ -73,8 +73,8 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
def _extract_aetn_info(self, domain, filter_key, filter_value, url):
|
||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||
result = self._download_json(
|
||||
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||
filter_value, query={'filter[%s]' % filter_key: filter_value})
|
||||
f'https://feeds.video.aetnd.com/api/v2/{brand}/videos',
|
||||
filter_value, query={f'filter[{filter_key}]': filter_value})
|
||||
result = traverse_obj(
|
||||
result, ('results',
|
||||
lambda k, v: k == 0 and v[filter_key] == filter_value),
|
||||
@@ -142,7 +142,7 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'Geo-restricted - This content is not available in your location.'
|
||||
'skip': 'Geo-restricted - This content is not available in your location.',
|
||||
}, {
|
||||
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||
'info_dict': {
|
||||
@@ -171,28 +171,28 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'skip': 'This video is only available for users of participating TV providers.',
|
||||
}, {
|
||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.history.com/videos/history-of-valentines-day',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -209,14 +209,14 @@ class AENetworksListBaseIE(AENetworksBaseIE):
|
||||
%s(slug: "%s") {
|
||||
%s
|
||||
}
|
||||
}''' % (resource, slug, fields),
|
||||
}''' % (resource, slug, fields), # noqa: UP031
|
||||
}))['data'][resource]
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, slug = self._match_valid_url(url).groups()
|
||||
_, brand = self._DOMAIN_MAP[domain]
|
||||
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
|
||||
base_url = 'http://watch.%s' % domain
|
||||
base_url = f'http://watch.{domain}'
|
||||
|
||||
entries = []
|
||||
for item in (playlist.get(self._ITEMS_KEY) or []):
|
||||
@@ -248,10 +248,10 @@ class AENetworksCollectionIE(AENetworksListBaseIE):
|
||||
'playlist_mincount': 12,
|
||||
}, {
|
||||
'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.historyvault.com/collections/mysteryquest',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}]
|
||||
_RESOURCE = 'list'
|
||||
_ITEMS_KEY = 'items'
|
||||
@@ -309,7 +309,7 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||
'info_dict': {
|
||||
'id': '40700995724',
|
||||
'ext': 'mp4',
|
||||
'title': "History of Valentine’s Day",
|
||||
'title': 'History of Valentine’s Day',
|
||||
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||
'timestamp': 1375819729,
|
||||
'upload_date': '20130806',
|
||||
@@ -364,6 +364,6 @@ class BiographyIE(AENetworksBaseIE):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_url = self._search_regex(
|
||||
r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
|
||||
rf'<phoenix-iframe[^>]+src="({HistoryPlayerIE._VALID_URL})',
|
||||
webpage, 'player URL')
|
||||
return self.url_result(player_url, HistoryPlayerIE.ie_key())
|
||||
|
||||
@@ -16,8 +16,8 @@ class AeonCoIE(InfoExtractor):
|
||||
'uploader': 'Semiconductor',
|
||||
'uploader_id': 'semiconductor',
|
||||
'uploader_url': 'https://vimeo.com/semiconductor',
|
||||
'duration': 348
|
||||
}
|
||||
'duration': 348,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
|
||||
'md5': '03582d795382e49f2fd0b427b55de409',
|
||||
@@ -29,8 +29,8 @@ class AeonCoIE(InfoExtractor):
|
||||
'uploader': 'Aeon Video',
|
||||
'uploader_id': 'aeonvideo',
|
||||
'uploader_url': 'https://vimeo.com/aeonvideo',
|
||||
'duration': 1344
|
||||
}
|
||||
'duration': 1344,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out',
|
||||
'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b',
|
||||
|
||||
@@ -55,7 +55,7 @@ class AfreecaTVBaseIE(InfoExtractor):
|
||||
if result != 1:
|
||||
error = _ERRORS.get(result, 'You have failed to log in.')
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s said: %s' % (self.IE_NAME, error),
|
||||
f'Unable to login: {self.IE_NAME} said: {error}',
|
||||
expected=True)
|
||||
|
||||
|
||||
@@ -227,7 +227,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
|
||||
**traverse_obj(file_element, {
|
||||
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
|
||||
'timestamp': ('file_start', {unified_timestamp}),
|
||||
})
|
||||
}),
|
||||
})
|
||||
|
||||
if traverse_obj(data, ('adult_status', {str})) == 'notLogin':
|
||||
|
||||
@@ -168,7 +168,7 @@ class TokFMPodcastIE(InfoExtractor):
|
||||
for ext in ('aac', 'mp3'):
|
||||
url_data = self._download_json(
|
||||
f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}',
|
||||
media_id, 'Downloading podcast %s URL' % ext)
|
||||
media_id, f'Downloading podcast {ext} URL')
|
||||
# prevents inserting the mp3 (default) multiple times
|
||||
if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']:
|
||||
formats.append({
|
||||
@@ -206,8 +206,8 @@ class TokFMAuditionIE(InfoExtractor):
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _create_url(id):
|
||||
return f'https://audycje.tokfm.pl/audycja/{id}'
|
||||
def _create_url(video_id):
|
||||
return f'https://audycje.tokfm.pl/audycja/{video_id}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
audition_id = self._match_id(url)
|
||||
|
||||
@@ -5,7 +5,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
traverse_obj
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ class AirTVIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg',
|
||||
'timestamp': 1664792603,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# with youtube_id
|
||||
'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q',
|
||||
@@ -54,7 +54,7 @@ class AirTVIE(InfoExtractor):
|
||||
'channel': 'Newsflare',
|
||||
'duration': 37,
|
||||
'upload_date': '20180511',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_formats_and_subtitle(self, json_data, video_id):
|
||||
|
||||
@@ -22,7 +22,7 @@ class AitubeKZVideoIE(InfoExtractor):
|
||||
'timestamp': 1667370519,
|
||||
'title': 'Ангел хранитель 1 серия',
|
||||
'channel_follower_count': int,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# embed url
|
||||
'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c',
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
try_get,
|
||||
@@ -44,7 +43,7 @@ class AliExpressLiveIE(InfoExtractor):
|
||||
'title': title,
|
||||
'thumbnail': data.get('coverUrl'),
|
||||
'uploader': try_get(
|
||||
data, lambda x: x['followBar']['name'], compat_str),
|
||||
data, lambda x: x['followBar']['name'], str),
|
||||
'timestamp': float_or_none(data.get('startTimeLong'), scale=1000),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ class AlJazeeraIE(InfoExtractor):
|
||||
'timestamp': 1636219149,
|
||||
'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.',
|
||||
'upload_date': '20211106',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu',
|
||||
'info_dict': {
|
||||
@@ -33,7 +33,7 @@ class AlJazeeraIE(InfoExtractor):
|
||||
BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P<account>\d+)/(?P<player_id>[a-zA-Z0-9]+)_(?P<embed>[^/]+)/index.html\?videoId=(?P<id>\d+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
base, post_type, id = self._match_valid_url(url).groups()
|
||||
base, post_type, display_id = self._match_valid_url(url).groups()
|
||||
wp = {
|
||||
'balkans.aljazeera.net': 'ajb',
|
||||
'chinese.aljazeera.net': 'chinese',
|
||||
@@ -47,11 +47,11 @@ class AlJazeeraIE(InfoExtractor):
|
||||
'news': 'news',
|
||||
}[post_type.split('/')[0]]
|
||||
video = self._download_json(
|
||||
f'https://{base}/graphql', id, query={
|
||||
f'https://{base}/graphql', display_id, query={
|
||||
'wp-site': wp,
|
||||
'operationName': 'ArchipelagoSingleArticleQuery',
|
||||
'variables': json.dumps({
|
||||
'name': id,
|
||||
'name': display_id,
|
||||
'postType': post_type,
|
||||
}),
|
||||
}, headers={
|
||||
@@ -64,7 +64,7 @@ class AlJazeeraIE(InfoExtractor):
|
||||
embed = 'default'
|
||||
|
||||
if video_id is None:
|
||||
webpage = self._download_webpage(url, id)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id',
|
||||
group=(1, 2, 3, 4), default=(None, None, None, None))
|
||||
@@ -73,11 +73,11 @@ class AlJazeeraIE(InfoExtractor):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': url,
|
||||
'ie_key': 'Generic'
|
||||
'ie_key': 'Generic',
|
||||
}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}',
|
||||
'ie_key': 'BrightcoveNew'
|
||||
'ie_key': 'BrightcoveNew',
|
||||
}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
qualities,
|
||||
@@ -95,11 +94,11 @@ class AllocineIE(InfoExtractor):
|
||||
duration = int_or_none(video.get('duration'))
|
||||
view_count = int_or_none(video.get('view_count'))
|
||||
timestamp = unified_timestamp(try_get(
|
||||
video, lambda x: x['added_at']['date'], compat_str))
|
||||
video, lambda x: x['added_at']['date'], str))
|
||||
else:
|
||||
video_id = display_id
|
||||
media_data = self._download_json(
|
||||
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
|
||||
f'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media={video_id}', display_id)
|
||||
title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné'))
|
||||
for key, value in media_data['video'].items():
|
||||
if not key.endswith('Path'):
|
||||
|
||||
@@ -12,7 +12,6 @@ from ..utils import (
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
_FIELDS = '''
|
||||
_id
|
||||
clipImageSource
|
||||
@@ -34,27 +33,27 @@ _QUERIES = {
|
||||
video: getClip(clipIdentifier: $id) {
|
||||
%s %s
|
||||
}
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS),
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
|
||||
'montage': '''query ($id: String!) {
|
||||
video: getMontage(clipIdentifier: $id) {
|
||||
%s
|
||||
}
|
||||
}''' % _FIELDS,
|
||||
}''' % _FIELDS, # noqa: UP031
|
||||
'Clips': '''query ($page: Int!, $user: String!, $game: Int) {
|
||||
videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) {
|
||||
data { %s %s }
|
||||
}
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS),
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
|
||||
'Montages': '''query ($page: Int!, $user: String!) {
|
||||
videos: montages(search: createdDate, page: $page, user: $user) {
|
||||
data { %s }
|
||||
}
|
||||
}''' % _FIELDS,
|
||||
}''' % _FIELDS, # noqa: UP031
|
||||
'Mobile Clips': '''query ($page: Int!, $user: String!) {
|
||||
videos: clips(search: createdDate, page: $page, user: $user, mobile: true) {
|
||||
data { %s %s }
|
||||
}
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS),
|
||||
}''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031
|
||||
}
|
||||
|
||||
|
||||
@@ -122,7 +121,7 @@ class AllstarIE(AllstarBaseIE):
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230425',
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://allstar.gg/clip?clip=8LJLY4JKB',
|
||||
'info_dict': {
|
||||
@@ -140,7 +139,7 @@ class AllstarIE(AllstarBaseIE):
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230702',
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c',
|
||||
'info_dict': {
|
||||
@@ -156,7 +155,7 @@ class AllstarIE(AllstarBaseIE):
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230418',
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://allstar.gg/montage?montage=RILJMH6QOS',
|
||||
'info_dict': {
|
||||
@@ -172,7 +171,7 @@ class AllstarIE(AllstarBaseIE):
|
||||
'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d',
|
||||
'upload_date': '20230703',
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -192,28 +191,28 @@ class AllstarProfileIE(AllstarBaseIE):
|
||||
'id': '62b8bdfc9021052f7905882d-clips',
|
||||
'title': 'cherokee - Clips',
|
||||
},
|
||||
'playlist_mincount': 15
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-clips-730',
|
||||
'title': 'cherokee - Clips - 730',
|
||||
},
|
||||
'playlist_mincount': 15
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-montages',
|
||||
'title': 'cherokee - Montages',
|
||||
},
|
||||
'playlist_mincount': 4
|
||||
'playlist_mincount': 4,
|
||||
}, {
|
||||
'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips',
|
||||
'info_dict': {
|
||||
'id': '62b8bdfc9021052f7905882d-mobile',
|
||||
'title': 'cherokee - Mobile Clips',
|
||||
},
|
||||
'playlist_mincount': 1
|
||||
'playlist_mincount': 1,
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 10
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
parse_iso8601,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_filesize,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ class AlphaPornoIE(InfoExtractor):
|
||||
'tbr': 1145,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -12,7 +12,7 @@ from ..utils import (
|
||||
class Alsace20TVBaseIE(InfoExtractor):
|
||||
def _extract_video(self, video_id, url=None):
|
||||
info = self._download_json(
|
||||
'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ),
|
||||
f'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key={video_id}&habillage=0&mode=html',
|
||||
video_id) or {}
|
||||
title = info.get('titre')
|
||||
|
||||
@@ -24,9 +24,9 @@ class Alsace20TVBaseIE(InfoExtractor):
|
||||
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
|
||||
|
||||
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
|
||||
thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
|
||||
thumbnail = url_or_none(dict_get(info, ('image', 'preview')) or self._og_search_thumbnail(webpage))
|
||||
upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None)
|
||||
upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None
|
||||
upload_date = unified_strdate(f'20{upload_date[:2]}-{upload_date[2:4]}-{upload_date[4:]}') if upload_date else None
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
|
||||
@@ -34,7 +34,7 @@ class AltCensoredIE(InfoExtractor):
|
||||
'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg',
|
||||
'view_count': int,
|
||||
'categories': ['News & Politics'],
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,17 +1,13 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
int_or_none,
|
||||
clean_html,
|
||||
ExtractorError
|
||||
)
|
||||
|
||||
|
||||
@@ -25,7 +21,7 @@ class AluraIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': '60095',
|
||||
'ext': 'mp4',
|
||||
'title': 'Referências, ref-set e alter'
|
||||
'title': 'Referências, ref-set e alter',
|
||||
},
|
||||
'skip': 'Requires alura account credentials'},
|
||||
{
|
||||
@@ -34,12 +30,12 @@ class AluraIE(InfoExtractor):
|
||||
'only_matching': True},
|
||||
{
|
||||
'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219',
|
||||
'only_matching': True}
|
||||
'only_matching': True},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
course, video_id = self._match_valid_url(url)
|
||||
course, video_id = self._match_valid_url(url).group('course_name', 'id')
|
||||
video_url = self._VIDEO_URL % (course, video_id)
|
||||
|
||||
video_dict = self._download_json(video_url, video_id, 'Searching for videos')
|
||||
@@ -52,7 +48,7 @@ class AluraIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for video_obj in video_dict:
|
||||
video_url_m3u8 = video_obj.get('link')
|
||||
video_url_m3u8 = video_obj.get('mp4')
|
||||
video_format = self._extract_m3u8_formats(
|
||||
video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
@@ -66,7 +62,7 @@ class AluraIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
"formats": formats
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
@@ -95,7 +91,7 @@ class AluraIE(InfoExtractor):
|
||||
'post url', default=self._LOGIN_URL, group='url')
|
||||
|
||||
if not post_url.startswith('http'):
|
||||
post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
|
||||
post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url)
|
||||
|
||||
response = self._download_webpage(
|
||||
post_url, None, 'Logging in',
|
||||
@@ -107,7 +103,7 @@ class AluraIE(InfoExtractor):
|
||||
r'(?s)<p[^>]+class="alert-message[^"]*">(.+?)</p>',
|
||||
response, 'error message', default=None)
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
raise ExtractorError(f'Unable to login: {error}', expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
|
||||
@@ -123,7 +119,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if AluraIE.suitable(url) else super(AluraCourseIE, cls).suitable(url)
|
||||
return False if AluraIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -161,7 +157,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE
|
||||
'url': video_url,
|
||||
'id_key': self.ie_key(),
|
||||
'chapter': chapter,
|
||||
'chapter_number': chapter_number
|
||||
'chapter_number': chapter_number,
|
||||
}
|
||||
entries.append(entry)
|
||||
return self.playlist_result(entries, course_path, course_title)
|
||||
|
||||
@@ -24,7 +24,7 @@ class AmadeusTVIE(InfoExtractor):
|
||||
'display_id': '65091a87ff85af59d9fc54c3',
|
||||
'view_count': int,
|
||||
'description': 'md5:a0357b9c215489e2067cbae0b777bb95',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from .vimeo import VimeoIE
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
@@ -25,7 +25,7 @@ class AmaraIE(InfoExtractor):
|
||||
'uploader': 'PBS NewsHour',
|
||||
'uploader_id': 'PBSNewsHour',
|
||||
'timestamp': 1549639570,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Vimeo
|
||||
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
|
||||
@@ -40,8 +40,8 @@ class AmaraIE(InfoExtractor):
|
||||
'timestamp': 1294763658,
|
||||
'upload_date': '20110111',
|
||||
'uploader': 'Sam Morrill',
|
||||
'uploader_id': 'sammorrill'
|
||||
}
|
||||
'uploader_id': 'sammorrill',
|
||||
},
|
||||
}, {
|
||||
# Direct Link
|
||||
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
|
||||
@@ -55,13 +55,13 @@ class AmaraIE(InfoExtractor):
|
||||
'subtitles': dict,
|
||||
'upload_date': '20091007',
|
||||
'timestamp': 1254942511,
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
meta = self._download_json(
|
||||
'https://amara.org/api/videos/%s/' % video_id,
|
||||
f'https://amara.org/api/videos/{video_id}/',
|
||||
video_id, query={'format': 'json'})
|
||||
title = meta['title']
|
||||
video_url = meta['all_urls'][0]
|
||||
|
||||
@@ -61,13 +61,13 @@ class AmazonStoreIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
for retry in self.RetryManager():
|
||||
webpage = self._download_webpage(url, id)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
try:
|
||||
data_json = self._search_json(
|
||||
r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id,
|
||||
r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', playlist_id,
|
||||
transform_source=js_to_json)
|
||||
except ExtractorError as e:
|
||||
retry.error = e
|
||||
@@ -81,7 +81,7 @@ class AmazonStoreIE(InfoExtractor):
|
||||
'height': int_or_none(video.get('videoHeight')),
|
||||
'width': int_or_none(video.get('videoWidth')),
|
||||
} for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')]
|
||||
return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title'))
|
||||
return self.playlist_result(entries, playlist_id=playlist_id, playlist_title=data_json.get('title'))
|
||||
|
||||
|
||||
class AmazonReviewsIE(InfoExtractor):
|
||||
|
||||
@@ -25,7 +25,7 @@ class AmazonMiniTVBaseIE(InfoExtractor):
|
||||
asin, note=note, headers={
|
||||
'Content-Type': 'application/json',
|
||||
'currentpageurl': '/',
|
||||
'currentplatform': 'dWeb'
|
||||
'currentplatform': 'dWeb',
|
||||
}, data=json.dumps(data).encode() if data else None,
|
||||
query=None if data else {
|
||||
'deviceType': 'A1WMMUXPCUJL4N',
|
||||
|
||||
@@ -64,8 +64,8 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
site, display_id = self._match_valid_url(url).groups()
|
||||
requestor_id = self._REQUESTOR_ID_MAP[site]
|
||||
page_data = self._download_json(
|
||||
'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s'
|
||||
% (requestor_id.lower(), display_id), display_id)['data']
|
||||
f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}',
|
||||
display_id)['data']
|
||||
properties = page_data.get('properties') or {}
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
@@ -76,15 +76,15 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
try:
|
||||
for v in page_data['children']:
|
||||
if v.get('type') == 'video-player':
|
||||
releasePid = v['properties']['currentVideo']['meta']['releasePid']
|
||||
tp_path = 'M_UwQC/' + releasePid
|
||||
release_pid = v['properties']['currentVideo']['meta']['releasePid']
|
||||
tp_path = 'M_UwQC/' + release_pid
|
||||
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||
video_player_count += 1
|
||||
except KeyError:
|
||||
pass
|
||||
if video_player_count > 1:
|
||||
self.report_warning(
|
||||
'The JSON data has %d video players. Only one will be extracted' % video_player_count)
|
||||
f'The JSON data has {video_player_count} video players. Only one will be extracted')
|
||||
|
||||
# Fall back to videoPid if releasePid not found.
|
||||
# TODO: Fall back to videoPid if releasePid manifest uses DRM.
|
||||
@@ -131,7 +131,7 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
})
|
||||
ns_keys = theplatform_metadata.get('$xmlns', {}).keys()
|
||||
if ns_keys:
|
||||
ns = list(ns_keys)[0]
|
||||
ns = next(iter(ns_keys))
|
||||
episode = theplatform_metadata.get(ns + '$episodeTitle') or None
|
||||
episode_number = int_or_none(
|
||||
theplatform_metadata.get(ns + '$episode'))
|
||||
|
||||
@@ -87,13 +87,13 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
resource_type = 'episodes'
|
||||
|
||||
resource = self._download_json(
|
||||
'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id)
|
||||
f'https://www.americastestkitchen.com/api/v6/{resource_type}/{video_id}', video_id)
|
||||
video = resource['video'] if is_episode else resource
|
||||
episode = resource if is_episode else resource.get('episode') or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
|
||||
'url': 'https://player.zype.com/embed/{}.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ'.format(video['zypeId']),
|
||||
'ie_key': 'Zype',
|
||||
'description': clean_html(video.get('description')),
|
||||
'timestamp': unified_timestamp(video.get('publishDate')),
|
||||
@@ -174,22 +174,22 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
]
|
||||
|
||||
if season_number:
|
||||
playlist_id = 'season_%d' % season_number
|
||||
playlist_title = 'Season %d' % season_number
|
||||
playlist_id = f'season_{season_number}'
|
||||
playlist_title = f'Season {season_number}'
|
||||
facet_filters.append('search_season_list:' + playlist_title)
|
||||
else:
|
||||
playlist_id = show
|
||||
playlist_title = title
|
||||
|
||||
season_search = self._download_json(
|
||||
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
||||
f'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_{slug}_season_desc_production',
|
||||
playlist_id, headers={
|
||||
'Origin': 'https://www.americastestkitchen.com',
|
||||
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
||||
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
||||
}, query={
|
||||
'facetFilters': json.dumps(facet_filters),
|
||||
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug,
|
||||
'attributesToRetrieve': f'description,search_{slug}_episode_number,search_document_date,search_url,title,search_atk_episode_season',
|
||||
'attributesToHighlight': '',
|
||||
'hitsPerPage': 1000,
|
||||
})
|
||||
@@ -207,7 +207,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||
'description': episode.get('description'),
|
||||
'timestamp': unified_timestamp(episode.get('search_document_date')),
|
||||
'season_number': season_number,
|
||||
'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
|
||||
'episode_number': int_or_none(episode.get(f'search_{slug}_episode_number')),
|
||||
'ie_key': AmericasTestKitchenIE.ie_key(),
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
@@ -19,12 +19,12 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
'Unable to download Akamai AMP feed', transform_source=strip_jsonp)
|
||||
item = feed.get('channel', {}).get('item')
|
||||
if not item:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))
|
||||
raise ExtractorError('{} said: {}'.format(self.IE_NAME, feed['error']))
|
||||
|
||||
video_id = item['guid']
|
||||
|
||||
def get_media_node(name, default=None):
|
||||
media_name = 'media-%s' % name
|
||||
media_name = f'media-{name}'
|
||||
media_group = item.get('media-group') or item
|
||||
return media_group.get(media_name) or item.get(media_name) or item.get(name, default)
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
|
||||
'release_date': '20230121',
|
||||
'release_timestamp': 1674285179,
|
||||
'episode_id': 'e1tpt3d',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# embed url
|
||||
'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd',
|
||||
@@ -50,7 +50,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'episode_id': 'e1shjqd',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
@@ -72,7 +72,7 @@ class AnchorFMEpisodeIE(InfoExtractor):
|
||||
'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg',
|
||||
'uploader': 'Podcast Tempo',
|
||||
'channel': 'apakatatempo',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import url_or_none, merge_dicts
|
||||
from ..utils import merge_dicts, url_or_none
|
||||
|
||||
|
||||
class AngelIE(InfoExtractor):
|
||||
@@ -15,8 +15,8 @@ class AngelIE(InfoExtractor):
|
||||
'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons',
|
||||
'description': 'md5:73b704897c20ab59c433a9c0a8202d5e',
|
||||
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
|
||||
'duration': 1359.0
|
||||
}
|
||||
'duration': 1359.0,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name',
|
||||
'md5': 'e4774bad0a5f0ad2e90d175cafdb797d',
|
||||
@@ -26,8 +26,8 @@ class AngelIE(InfoExtractor):
|
||||
'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name',
|
||||
'description': 'md5:aadfb4827a94415de5ff6426e6dee3be',
|
||||
'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$',
|
||||
'duration': 3276.0
|
||||
}
|
||||
'duration': 3276.0,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -44,7 +44,7 @@ class AngelIE(InfoExtractor):
|
||||
'title': self._og_search_title(webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
# Angel uses cloudinary in the background and supports image transformations.
|
||||
|
||||
@@ -105,7 +105,7 @@ class Ant1NewsGrArticleIE(AntennaBaseIE):
|
||||
info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle')
|
||||
embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage))
|
||||
if not embed_urls:
|
||||
raise ExtractorError('no videos found for %s' % video_id, expected=True)
|
||||
raise ExtractorError(f'no videos found for {video_id}', expected=True)
|
||||
return self.playlist_from_matches(
|
||||
embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(),
|
||||
video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})
|
||||
|
||||
@@ -238,7 +238,7 @@ class AnvatoIE(InfoExtractor):
|
||||
'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900',
|
||||
'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99',
|
||||
'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe',
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
|
||||
}
|
||||
|
||||
def _generate_nfl_token(self, anvack, mcp_id):
|
||||
@@ -255,7 +255,7 @@ class AnvatoIE(InfoExtractor):
|
||||
token
|
||||
}
|
||||
}
|
||||
}''' % (anvack, mcp_id),
|
||||
}''' % (anvack, mcp_id), # noqa: UP031
|
||||
}).encode(), headers={
|
||||
'Authorization': auth_token,
|
||||
'Content-Type': 'application/json',
|
||||
@@ -299,7 +299,7 @@ class AnvatoIE(InfoExtractor):
|
||||
|
||||
return self._download_json(
|
||||
video_data_url, video_id, transform_source=strip_jsonp, query=query,
|
||||
data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8'))
|
||||
data=json.dumps({'api': api}, separators=(',', ':')).encode())
|
||||
|
||||
def _get_anvato_videos(self, access_key, video_id, token):
|
||||
video_data = self._get_video_json(access_key, video_id, token)
|
||||
@@ -358,7 +358,7 @@ class AnvatoIE(InfoExtractor):
|
||||
for caption in video_data.get('captions', []):
|
||||
a_caption = {
|
||||
'url': caption['url'],
|
||||
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
|
||||
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None,
|
||||
}
|
||||
subtitles.setdefault(caption['language'], []).append(a_caption)
|
||||
subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs)
|
||||
|
||||
@@ -30,7 +30,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# video with vidible ID
|
||||
'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/',
|
||||
@@ -46,7 +46,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/',
|
||||
'only_matching': True,
|
||||
@@ -83,10 +83,10 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE
|
||||
return self._extract_yahoo_video(video_id, 'us')
|
||||
|
||||
response = self._download_json(
|
||||
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
|
||||
f'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/{video_id}/details',
|
||||
video_id)['response']
|
||||
if response['statusText'] != 'Ok':
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True)
|
||||
raise ExtractorError('{} said: {}'.format(self.IE_NAME, response['statusText']), expected=True)
|
||||
|
||||
video_data = response['data']
|
||||
formats = []
|
||||
|
||||
@@ -34,7 +34,7 @@ class APAIE(InfoExtractor):
|
||||
video_id, base_url = mobj.group('id', 'base_url')
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'%s/player/%s' % (base_url, video_id), video_id)
|
||||
f'{base_url}/player/{video_id}', video_id)
|
||||
|
||||
jwplatform_id = self._search_regex(
|
||||
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
|
||||
@@ -47,7 +47,7 @@ class APAIE(InfoExtractor):
|
||||
|
||||
def extract(field, name=None):
|
||||
return self._search_regex(
|
||||
r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field,
|
||||
rf'\b{field}["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
webpage, name or field, default=None, group='value')
|
||||
|
||||
title = extract('title') or video_id
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
str_to_int,
|
||||
ExtractorError
|
||||
)
|
||||
from ..utils import ExtractorError, str_to_int
|
||||
|
||||
|
||||
class AppleConnectIE(InfoExtractor):
|
||||
|
||||
@@ -24,7 +24,7 @@ class ApplePodcastsIE(InfoExtractor):
|
||||
'duration': 6454,
|
||||
'series': 'The Tim Dillon Show',
|
||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import re
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
@@ -64,7 +64,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
'uploader_id': 'wb',
|
||||
},
|
||||
},
|
||||
]
|
||||
],
|
||||
}, {
|
||||
'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
|
||||
'info_dict': {
|
||||
@@ -99,7 +99,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, movie)
|
||||
film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
|
||||
film_data = self._download_json(
|
||||
'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
|
||||
f'http://trailers.apple.com/trailers/feeds/data/{film_id}.json',
|
||||
film_id, fatal=False)
|
||||
|
||||
if film_data:
|
||||
@@ -114,7 +114,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
if not src:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': '%s-%s' % (version, size),
|
||||
'format_id': f'{version}-{size}',
|
||||
'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
|
||||
'width': int_or_none(size_data.get('width')),
|
||||
'height': int_or_none(size_data.get('height')),
|
||||
@@ -134,7 +134,7 @@ class AppleTrailersIE(InfoExtractor):
|
||||
page_data = film_data.get('page', {})
|
||||
return self.playlist_result(entries, film_id, page_data.get('movie_title'))
|
||||
|
||||
playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||
playlist_url = urllib.parse.urljoin(url, 'includes/playlists/itunes.inc')
|
||||
|
||||
def fix_html(s):
|
||||
s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
|
||||
@@ -143,10 +143,9 @@ class AppleTrailersIE(InfoExtractor):
|
||||
# like: http://trailers.apple.com/trailers/wb/gravity/
|
||||
|
||||
def _clean_json(m):
|
||||
return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')
|
||||
return 'iTunes.playURL({});'.format(m.group(1).replace('\'', '''))
|
||||
s = re.sub(self._JSON_RE, _clean_json, s)
|
||||
s = '<html>%s</html>' % s
|
||||
return s
|
||||
return f'<html>{s}</html>'
|
||||
doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
|
||||
|
||||
playlist = []
|
||||
@@ -170,18 +169,18 @@ class AppleTrailersIE(InfoExtractor):
|
||||
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
|
||||
|
||||
trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
|
||||
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
|
||||
settings_json_url = urllib.parse.urljoin(url, f'includes/settings/{trailer_id}.json')
|
||||
settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
|
||||
|
||||
formats = []
|
||||
for format in settings['metadata']['sizes']:
|
||||
for fmt in settings['metadata']['sizes']:
|
||||
# The src is a file pointing to the real video file
|
||||
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
|
||||
format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', fmt['src'])
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format': format['type'],
|
||||
'width': int_or_none(format['width']),
|
||||
'height': int_or_none(format['height']),
|
||||
'format': fmt['type'],
|
||||
'width': int_or_none(fmt['width']),
|
||||
'height': int_or_none(fmt['height']),
|
||||
})
|
||||
|
||||
playlist.append({
|
||||
@@ -229,7 +228,7 @@ class AppleTrailersSectionIE(InfoExtractor):
|
||||
'title': 'Movie Studios',
|
||||
},
|
||||
}
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS)
|
||||
_VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>{})'.format('|'.join(_SECTIONS))
|
||||
_TESTS = [{
|
||||
'url': 'http://trailers.apple.com/#section=justadded',
|
||||
'info_dict': {
|
||||
@@ -270,7 +269,7 @@ class AppleTrailersSectionIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
section = self._match_id(url)
|
||||
section_data = self._download_json(
|
||||
'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'],
|
||||
'http://trailers.apple.com/trailers/home/feeds/{}.json'.format(self._SECTIONS[section]['feed_path']),
|
||||
section)
|
||||
entries = [
|
||||
self.url_result('http://trailers.apple.com' + e['location'])
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking import HEADRequest
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
@@ -145,7 +146,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'title': 'Bells Of Rostov',
|
||||
'ext': 'mp3',
|
||||
},
|
||||
'skip': 'restricted'
|
||||
'skip': 'restricted',
|
||||
}, {
|
||||
'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3',
|
||||
'md5': '1d0aabe03edca83ca58d9ed3b493a3c3',
|
||||
@@ -158,7 +159,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'description': 'md5:012b2d668ae753be36896f343d12a236',
|
||||
'upload_date': '20190928',
|
||||
},
|
||||
'skip': 'restricted'
|
||||
'skip': 'restricted',
|
||||
}, {
|
||||
# Original formats are private
|
||||
'url': 'https://archive.org/details/irelandthemakingofarepublic',
|
||||
@@ -202,8 +203,8 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg',
|
||||
'display_id': 'irelandthemakingofarepublicreel2.mov',
|
||||
},
|
||||
}
|
||||
]
|
||||
},
|
||||
],
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@@ -220,7 +221,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = urllib.parse.unquote_plus(self._match_id(url))
|
||||
identifier, entry_id = (video_id.split('/', 1) + [None])[:2]
|
||||
identifier, _, entry_id = video_id.partition('/')
|
||||
|
||||
# Archive.org metadata API doesn't clearly demarcate playlist entries
|
||||
# or subtitle tracks, so we get them from the embeddable player.
|
||||
@@ -246,7 +247,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
if track['kind'] != 'subtitles':
|
||||
continue
|
||||
entries[p['orig']][track['label']] = {
|
||||
'url': 'https://archive.org/' + track['file'].lstrip('/')
|
||||
'url': 'https://archive.org/' + track['file'].lstrip('/'),
|
||||
}
|
||||
|
||||
metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier)
|
||||
@@ -293,7 +294,9 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'height': int_or_none(f.get('width')),
|
||||
'filesize': int_or_none(f.get('size'))})
|
||||
|
||||
extension = (f['name'].rsplit('.', 1) + [None])[1]
|
||||
_, has_ext, extension = f['name'].rpartition('.')
|
||||
if not has_ext:
|
||||
extension = None
|
||||
|
||||
# We don't want to skip private formats if the user has access to them,
|
||||
# however without access to an account with such privileges we can't implement/test this.
|
||||
@@ -308,7 +311,7 @@ class ArchiveOrgIE(InfoExtractor):
|
||||
'filesize': int_or_none(f.get('size')),
|
||||
'protocol': 'https',
|
||||
'source_preference': 0 if f.get('source') == 'original' else -1,
|
||||
'format_note': f.get('source')
|
||||
'format_note': f.get('source'),
|
||||
})
|
||||
|
||||
for entry in entries.values():
|
||||
@@ -371,7 +374,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/user/Zeurel',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Internal link
|
||||
'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0',
|
||||
@@ -388,7 +391,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/user/1veritasium',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description.
|
||||
# Should use the date in the link. Title ends with '- Youtube'. Capture has description in eow-description
|
||||
@@ -403,8 +406,8 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_id': 'machinima',
|
||||
'uploader_url': 'https://www.youtube.com/user/machinima',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'machinima'
|
||||
}
|
||||
'uploader': 'machinima',
|
||||
},
|
||||
}, {
|
||||
# FLV video. Video file URL does not provide itag information
|
||||
'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw',
|
||||
@@ -421,7 +424,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'jawed',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA',
|
||||
'info_dict': {
|
||||
@@ -437,7 +440,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_url': 'https://www.youtube.com/user/itsmadeon',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# First capture is of dead video, second is the oldest from CDX response.
|
||||
'url': 'https://web.archive.org/https://www.youtube.com/watch?v=1JYutPM8O6E',
|
||||
@@ -454,7 +457,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'ETC News',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# First capture of dead video, capture date in link links to dead capture.
|
||||
'url': 'https://web.archive.org/web/20180803221945/https://www.youtube.com/watch?v=6FPhZJGvf4E',
|
||||
@@ -473,15 +476,15 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader': 'ETC News',
|
||||
},
|
||||
'expected_warnings': [
|
||||
r'unable to download capture webpage \(it may not be archived\)'
|
||||
]
|
||||
r'unable to download capture webpage \(it may not be archived\)',
|
||||
],
|
||||
}, { # Very old YouTube page, has - YouTube in title.
|
||||
'url': 'http://web.archive.org/web/20070302011044/http://youtube.com/watch?v=-06-KB9XTzg',
|
||||
'info_dict': {
|
||||
'id': '-06-KB9XTzg',
|
||||
'ext': 'flv',
|
||||
'title': 'New Coin Hack!! 100% Safe!!'
|
||||
}
|
||||
'title': 'New Coin Hack!! 100% Safe!!',
|
||||
},
|
||||
}, {
|
||||
'url': 'web.archive.org/https://www.youtube.com/watch?v=dWW7qP423y8',
|
||||
'info_dict': {
|
||||
@@ -495,7 +498,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader': 'DankPods',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093
|
||||
'url': 'https://web.archive.org/web/20200827003909if_/http://www.youtube.com/watch?v=6Dh-RL__uN4',
|
||||
@@ -512,7 +515,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader_id': 'PewDiePie',
|
||||
'uploader_url': 'https://www.youtube.com/user/PewDiePie',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# ~June 2010 Capture. swfconfig
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y',
|
||||
@@ -527,7 +530,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks',
|
||||
'upload_date': '20090520',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Jan 2011: watch-video-date/eow-date surrounded by whitespace
|
||||
'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc',
|
||||
@@ -542,7 +545,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'duration': 132,
|
||||
'uploader_url': 'https://www.youtube.com/user/claybutlermusic',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# ~May 2009 swfArgs. ytcfg is spread out over various vars
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY',
|
||||
@@ -557,7 +560,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'description': 'md5:4ca77d79538064e41e4cc464e93f44f0',
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'duration': 754,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# ~June 2012. Upload date is in another lang so cannot extract.
|
||||
'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA',
|
||||
@@ -571,7 +574,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'uploader': 'BlackNerdComedy',
|
||||
'duration': 182,
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# ~July 2013
|
||||
'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM',
|
||||
@@ -587,7 +590,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ',
|
||||
'upload_date': '20060428',
|
||||
'uploader': 'punkybird',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# April 2020: Player response in player config
|
||||
'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en',
|
||||
@@ -604,7 +607,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'description': 'md5:c625bb3c02c4f5fb4205971e468fa341',
|
||||
'uploader_url': 'https://www.youtube.com/user/GameGrumps',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# watch7-user-header with yt-user-info
|
||||
'url': 'ytarchive:kbh4T_b4Ixw:20160307085057',
|
||||
@@ -619,7 +622,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'thumbnail': r're:https?://.*\.(jpg|webp)',
|
||||
'upload_date': '20150503',
|
||||
'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# April 2012
|
||||
'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU',
|
||||
@@ -634,35 +637,35 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'duration': 200,
|
||||
'upload_date': '20120407',
|
||||
'uploader_id': 'thecomputernerd01',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20050214000000_if/http://www.youtube.com/watch?v=0altSZ96U4M',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Video not archived, only capture is unavailable video page
|
||||
'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, { # Encoded url
|
||||
'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://web.archive.org/web/20060527081937/http://www.youtube.com:80/watch.php?v=ELTFsLT73fA&search=soccer',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'ytarchive:BaW_jenozKc:20050214000000',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'ytarchive:BaW_jenozKc',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
_YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
|
||||
@@ -673,13 +676,13 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
|
||||
_YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers
|
||||
_YT_ALL_THUMB_SERVERS = orderedSet(
|
||||
_YT_DEFAULT_THUMB_SERVERS + ['img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(0, 5), 9)]])
|
||||
[*_YT_DEFAULT_THUMB_SERVERS, 'img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(5), 9)]])
|
||||
|
||||
_WAYBACK_BASE_URL = 'https://web.archive.org/web/%sif_/'
|
||||
_OLDEST_CAPTURE_DATE = 20050214000000
|
||||
_NEWEST_CAPTURE_DATE = 20500101000000
|
||||
|
||||
def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False):
|
||||
def _call_cdx_api(self, item_id, url, filters: list | None = None, collapse: list | None = None, query: dict | None = None, note=None, fatal=False):
|
||||
# CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
|
||||
query = {
|
||||
'url': url,
|
||||
@@ -688,14 +691,14 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
'limit': 500,
|
||||
'filter': ['statuscode:200'] + (filters or []),
|
||||
'collapse': collapse or [],
|
||||
**(query or {})
|
||||
**(query or {}),
|
||||
}
|
||||
res = self._download_json(
|
||||
'https://web.archive.org/cdx/search/cdx', item_id,
|
||||
note or 'Downloading CDX API JSON', query=query, fatal=fatal)
|
||||
if isinstance(res, list) and len(res) >= 2:
|
||||
# format response to make it easier to use
|
||||
return list(dict(zip(res[0], v)) for v in res[1:])
|
||||
return [dict(zip(res[0], v)) for v in res[1:]]
|
||||
elif not isinstance(res, list) or len(res) != 0:
|
||||
self.report_warning('Error while parsing CDX API response' + bug_reports_message())
|
||||
|
||||
@@ -852,7 +855,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
{
|
||||
'url': (self._WAYBACK_BASE_URL % (int_or_none(thumbnail_dict.get('timestamp')) or self._OLDEST_CAPTURE_DATE)) + thumbnail_dict.get('original'),
|
||||
'filesize': int_or_none(thumbnail_dict.get('length')),
|
||||
'preference': int_or_none(thumbnail_dict.get('length'))
|
||||
'preference': int_or_none(thumbnail_dict.get('length')),
|
||||
} for thumbnail_dict in response)
|
||||
if not try_all:
|
||||
break
|
||||
@@ -893,7 +896,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
for retry in retry_manager:
|
||||
try:
|
||||
urlh = self._request_webpage(
|
||||
HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id),
|
||||
HEADRequest(f'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{video_id}'),
|
||||
video_id, note='Fetching archived video file url', expected_status=True)
|
||||
except ExtractorError as e:
|
||||
# HTTP Error 404 is expected if the video is not saved.
|
||||
@@ -924,21 +927,21 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
||||
info['thumbnails'] = self._extract_thumbnails(video_id)
|
||||
|
||||
if urlh:
|
||||
url = compat_urllib_parse_unquote(urlh.url)
|
||||
url = urllib.parse.unquote(urlh.url)
|
||||
video_file_url_qs = parse_qs(url)
|
||||
# Attempt to recover any ext & format info from playback url & response headers
|
||||
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
||||
fmt = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
|
||||
itag = try_get(video_file_url_qs, lambda x: x['itag'][0])
|
||||
if itag and itag in YoutubeIE._formats:
|
||||
format.update(YoutubeIE._formats[itag])
|
||||
format.update({'format_id': itag})
|
||||
fmt.update(YoutubeIE._formats[itag])
|
||||
fmt.update({'format_id': itag})
|
||||
else:
|
||||
mime = try_get(video_file_url_qs, lambda x: x['mime'][0])
|
||||
ext = (mimetype2ext(mime)
|
||||
or urlhandle_detect_ext(urlh)
|
||||
or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type')))
|
||||
format.update({'ext': ext})
|
||||
info['formats'] = [format]
|
||||
fmt.update({'ext': ext})
|
||||
info['formats'] = [fmt]
|
||||
if not info.get('duration'):
|
||||
info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0]))
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ from ..utils import (
|
||||
|
||||
class ArcPublishingIE(InfoExtractor):
|
||||
_UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||
_VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX
|
||||
_VALID_URL = rf'arcpublishing:(?P<org>[a-z]+):(?P<id>{_UUID_REGEX})'
|
||||
_TESTS = [{
|
||||
# https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/
|
||||
'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
|
||||
@@ -74,12 +74,12 @@ class ArcPublishingIE(InfoExtractor):
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
entries = []
|
||||
# https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
|
||||
for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage):
|
||||
for powa_el in re.findall(rf'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="{ArcPublishingIE._UUID_REGEX}"[^>]*>)', webpage):
|
||||
powa = extract_attributes(powa_el) or {}
|
||||
org = powa.get('data-org')
|
||||
uuid = powa.get('data-uuid')
|
||||
if org and uuid:
|
||||
entries.append('arcpublishing:%s:%s' % (org, uuid))
|
||||
entries.append(f'arcpublishing:{org}:{uuid}')
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -122,7 +122,7 @@ class ArcPublishingIE(InfoExtractor):
|
||||
elif stream_type in ('ts', 'hls'):
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
|
||||
if all([f.get('acodec') == 'none' for f in m3u8_formats]):
|
||||
if all(f.get('acodec') == 'none' for f in m3u8_formats):
|
||||
continue
|
||||
for f in m3u8_formats:
|
||||
height = f.get('height')
|
||||
@@ -136,7 +136,7 @@ class ArcPublishingIE(InfoExtractor):
|
||||
else:
|
||||
vbr = int_or_none(s.get('bitrate'))
|
||||
formats.append({
|
||||
'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type,
|
||||
'format_id': f'{stream_type}-{vbr}' if vbr else stream_type,
|
||||
'vbr': vbr,
|
||||
'width': int_or_none(s.get('width')),
|
||||
'height': int_or_none(s.get('height')),
|
||||
|
||||
@@ -85,7 +85,7 @@ class ARDMediathekBaseIE(InfoExtractor):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(stream_url, {
|
||||
'hdcore': '3.1.1',
|
||||
'plugin': 'aasp-3.1.1.69.124'
|
||||
'plugin': 'aasp-3.1.1.69.124',
|
||||
}), video_id, f4m_id='hds', fatal=False))
|
||||
elif ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
@@ -96,12 +96,12 @@ class ARDMediathekBaseIE(InfoExtractor):
|
||||
f = {
|
||||
'url': server,
|
||||
'play_path': stream_url,
|
||||
'format_id': 'a%s-rtmp-%s' % (num, quality),
|
||||
'format_id': f'a{num}-rtmp-{quality}',
|
||||
}
|
||||
else:
|
||||
f = {
|
||||
'url': stream_url,
|
||||
'format_id': 'a%s-%s-%s' % (num, ext, quality)
|
||||
'format_id': f'a{num}-{ext}-{quality}',
|
||||
}
|
||||
m = re.search(
|
||||
r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$',
|
||||
|
||||
@@ -64,7 +64,7 @@ class ArkenaIE(InfoExtractor):
|
||||
raise ExtractorError('Invalid URL', expected=True)
|
||||
|
||||
media = self._download_json(
|
||||
'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id),
|
||||
f'https://video.qbrick.com/api/v1/public/accounts/{account_id}/medias/{video_id}',
|
||||
video_id, query={
|
||||
# https://video.qbrick.com/docs/api/examples/library-api.html
|
||||
'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags',
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
float_or_none,
|
||||
format_field,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
remove_start,
|
||||
@@ -35,7 +33,7 @@ class ArnesIE(InfoExtractor):
|
||||
'view_count': int,
|
||||
'tags': ['linearna_algebra'],
|
||||
'start_time': 10,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
|
||||
'only_matching': True,
|
||||
@@ -93,6 +91,6 @@ class ArnesIE(InfoExtractor):
|
||||
'duration': float_or_none(video.get('duration'), 1000),
|
||||
'view_count': int_or_none(video.get('views')),
|
||||
'tags': video.get('hashtags'),
|
||||
'start_time': int_or_none(compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
|
||||
'start_time': int_or_none(urllib.parse.parse_qs(
|
||||
urllib.parse.urlparse(url).query).get('t', [None])[0]),
|
||||
}
|
||||
|
||||
@@ -153,7 +153,7 @@ class Art19IE(InfoExtractor):
|
||||
'series_id': ('series_id', {str}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'release_timestamp': ('released_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601})
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601}),
|
||||
})),
|
||||
**traverse_obj(rss_metadata, ('content', {
|
||||
'title': ('episode_title', {str}),
|
||||
|
||||
@@ -5,6 +5,7 @@ from ..utils import (
|
||||
ExtractorError,
|
||||
GeoRestrictedError,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
strip_or_none,
|
||||
@@ -19,32 +20,18 @@ class ArteTVBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class ArteTVIE(ArteTVBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
_VALID_URL = rf'''(?x)
|
||||
(?:https?://
|
||||
(?:
|
||||
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
|
||||
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
|
||||
(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos|
|
||||
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>{ArteTVBaseIE._ARTE_LANGUAGES})
|
||||
)
|
||||
|arte://program)
|
||||
/(?P<id>\d{6}-\d{3}-[AF]|LIVE)
|
||||
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
|
||||
/(?P<id>\d{{6}}-\d{{3}}-[AF]|LIVE)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
|
||||
'info_dict': {
|
||||
'id': '100103-000-A',
|
||||
'title': 'USA: Dyskryminacja na porodówce',
|
||||
'description': 'md5:242017b7cce59ffae340a54baefcafb1',
|
||||
'alt_title': 'ARTE Reportage',
|
||||
'upload_date': '20201103',
|
||||
'duration': 554,
|
||||
'thumbnail': r're:https://api-cdn\.arte\.tv/.+940x530',
|
||||
'timestamp': 1604417980,
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'}
|
||||
}, {
|
||||
'note': 'No alt_title',
|
||||
'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/',
|
||||
@@ -58,6 +45,23 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/fr/videos/109067-000-A/la-loi-de-teheran/',
|
||||
'info_dict': {
|
||||
'id': '109067-000-A',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:d2ca367b8ecee028dddaa8bd1aebc739',
|
||||
'timestamp': 1713927600,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/3rR6PLzfbigSkkeHtkCZNF/940x530',
|
||||
'duration': 7599,
|
||||
'title': 'La loi de Téhéran',
|
||||
'upload_date': '20240424',
|
||||
'subtitles': {
|
||||
'fr': 'mincount:1',
|
||||
'fr-acc': 'mincount:1',
|
||||
'fr-forced': 'mincount:1',
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'note': 'age-restricted',
|
||||
'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/',
|
||||
@@ -71,23 +75,7 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
'upload_date': '20230930',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/',
|
||||
'info_dict': {
|
||||
'id': '085374-003-A',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:ab79ec7cc472a93164415b4e4916abf9',
|
||||
'timestamp': 1702872000,
|
||||
'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530',
|
||||
'duration': 2594,
|
||||
'title': 'Die kurze Zeit der Jugend',
|
||||
'alt_title': 'Im hohen Norden geboren',
|
||||
'upload_date': '20231218',
|
||||
'subtitles': {
|
||||
'fr': 'mincount:1',
|
||||
'fr-acc': 'mincount:1',
|
||||
},
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}]
|
||||
|
||||
_GEO_BYPASS = True
|
||||
@@ -143,19 +131,21 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
updated_subs = {}
|
||||
for lang, sub_formats in subs.items():
|
||||
for fmt in sub_formats:
|
||||
if fmt.get('url', '').endswith('-MAL.m3u8'):
|
||||
lang += '-acc'
|
||||
updated_subs.setdefault(lang, []).append(fmt)
|
||||
url = fmt.get('url') or ''
|
||||
suffix = ('acc' if url.endswith('-MAL.m3u8')
|
||||
else 'forced' if '_VO' not in url
|
||||
else None)
|
||||
updated_subs.setdefault(join_nonempty(lang, suffix), []).append(fmt)
|
||||
return updated_subs
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
lang = mobj.group('lang') or mobj.group('lang_2')
|
||||
langauge_code = self._LANG_MAP.get(lang)
|
||||
language_code = self._LANG_MAP.get(lang)
|
||||
|
||||
config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={
|
||||
'x-validated-age': '18'
|
||||
'x-validated-age': '18',
|
||||
})
|
||||
|
||||
geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {}
|
||||
@@ -180,10 +170,10 @@ class ArteTVIE(ArteTVBaseIE):
|
||||
m = self._VERSION_CODE_RE.match(stream_version_code)
|
||||
if m:
|
||||
lang_pref = int(''.join('01'[x] for x in (
|
||||
m.group('vlang') == langauge_code, # we prefer voice in the requested language
|
||||
m.group('vlang') == language_code, # we prefer voice in the requested language
|
||||
not m.group('audio_desc'), # and not the audio description version
|
||||
bool(m.group('original_voice')), # but if voice is not in the requested language, at least choose the original voice
|
||||
m.group('sub_lang') == langauge_code, # if subtitles are present, we prefer them in the requested language
|
||||
m.group('sub_lang') == language_code, # if subtitles are present, we prefer them in the requested language
|
||||
not m.group('has_sub'), # but we prefer no subtitles otherwise
|
||||
not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles
|
||||
)))
|
||||
@@ -257,7 +247,7 @@ class ArteTVEmbedIE(InfoExtractor):
|
||||
'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
|
||||
'upload_date': '20201116',
|
||||
},
|
||||
'skip': 'No video available'
|
||||
'skip': 'No video available',
|
||||
}, {
|
||||
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||
'only_matching': True,
|
||||
@@ -272,7 +262,7 @@ class ArteTVEmbedIE(InfoExtractor):
|
||||
|
||||
|
||||
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||
_VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P<id>RC-\d{{6}})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
|
||||
'only_matching': True,
|
||||
@@ -308,7 +298,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||
|
||||
|
||||
class ArteTVCategoryIE(ArteTVBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||
_VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.arte.tv/en/videos/politics-and-society/',
|
||||
'info_dict': {
|
||||
@@ -322,7 +312,7 @@ class ArteTVCategoryIE(ArteTVBaseIE):
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return (
|
||||
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
|
||||
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE))
|
||||
and super().suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -331,12 +321,12 @@ class ArteTVCategoryIE(ArteTVBaseIE):
|
||||
|
||||
items = []
|
||||
for video in re.finditer(
|
||||
r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
|
||||
rf'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/{lang}/videos/[\w/-]+)(?P=q)',
|
||||
webpage):
|
||||
video = video.group('url')
|
||||
if video == url:
|
||||
continue
|
||||
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
|
||||
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE)):
|
||||
items.append(video)
|
||||
|
||||
title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None
|
||||
|
||||
@@ -20,7 +20,7 @@ class AtresPlayerIE(InfoExtractor):
|
||||
'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
|
||||
'duration': 3413,
|
||||
},
|
||||
'skip': 'This video is only available for registered users'
|
||||
'skip': 'This video is only available for registered users',
|
||||
},
|
||||
{
|
||||
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
|
||||
@@ -49,7 +49,7 @@ class AtresPlayerIE(InfoExtractor):
|
||||
target_url = self._download_json(
|
||||
'https://account.atresmedia.com/api/login', None,
|
||||
'Logging in', headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
}, data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': password,
|
||||
|
||||
@@ -12,7 +12,7 @@ class AtScaleConfEventIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'data-scale-spring-2022',
|
||||
'title': 'Data @Scale Spring 2022',
|
||||
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
|
||||
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://atscaleconference.com/events/video-scale-2021/',
|
||||
@@ -20,15 +20,15 @@ class AtScaleConfEventIE(InfoExtractor):
|
||||
'info_dict': {
|
||||
'id': 'video-scale-2021',
|
||||
'title': 'Video @Scale 2021',
|
||||
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
|
||||
'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
return self.playlist_from_matches(
|
||||
re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage),
|
||||
ie='Generic', playlist_id=id,
|
||||
ie='Generic', playlist_id=playlist_id,
|
||||
title=self._og_search_title(webpage), description=self._og_search_description(webpage))
|
||||
|
||||
@@ -2,10 +2,10 @@ import datetime as dt
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
jwt_encode_hs256,
|
||||
try_get,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ class ATVAtIE(InfoExtractor):
|
||||
'id': 'v-ce9cgn1e70n5-1',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1',
|
||||
'only_matching': True,
|
||||
@@ -66,10 +66,10 @@ class ATVAtIE(InfoExtractor):
|
||||
video_id=video_id)
|
||||
|
||||
video_title = json_data['views']['default']['page']['title']
|
||||
contentResource = json_data['views']['default']['page']['contentResource']
|
||||
content_id = contentResource[0]['id']
|
||||
content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']}
|
||||
for id, content in enumerate(contentResource)]
|
||||
content_resource = json_data['views']['default']['page']['contentResource']
|
||||
content_id = content_resource[0]['id']
|
||||
content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']}
|
||||
for id_, content in enumerate(content_resource)]
|
||||
|
||||
time_of_request = dt.datetime.now()
|
||||
not_before = time_of_request - dt.timedelta(minutes=5)
|
||||
@@ -87,17 +87,17 @@ class ATVAtIE(InfoExtractor):
|
||||
videos = self._download_json(
|
||||
'https://vas-v4.p7s1video.net/4.0/getsources',
|
||||
content_id, 'Downloading videos JSON', query={
|
||||
'token': jwt_token.decode('utf-8')
|
||||
'token': jwt_token.decode('utf-8'),
|
||||
})
|
||||
|
||||
video_id, videos_data = list(videos['data'].items())[0]
|
||||
video_id, videos_data = next(iter(videos['data'].items()))
|
||||
error_msg = try_get(videos_data, lambda x: x['error']['title'])
|
||||
if error_msg == 'Geo check failed':
|
||||
self.raise_geo_restricted(error_msg)
|
||||
elif error_msg:
|
||||
raise ExtractorError(error_msg)
|
||||
entries = [
|
||||
self._extract_video_info(url, contentResource[video['id']], video)
|
||||
self._extract_video_info(url, content_resource[video['id']], video)
|
||||
for video in videos_data]
|
||||
|
||||
return {
|
||||
|
||||
@@ -19,7 +19,7 @@ class AudiMediaIE(InfoExtractor):
|
||||
'timestamp': 1448354940,
|
||||
'duration': 74022,
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991',
|
||||
'only_matching': True,
|
||||
@@ -73,7 +73,7 @@ class AudiMediaIE(InfoExtractor):
|
||||
bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)
|
||||
if bitrate:
|
||||
f.update({
|
||||
'format_id': 'http-%s' % bitrate,
|
||||
'format_id': f'http-{bitrate}',
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ class AudioBoomIE(InfoExtractor):
|
||||
'duration': 4000.99,
|
||||
'uploader': 'Sue Perkins: An hour or so with...',
|
||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
|
||||
}
|
||||
},
|
||||
}, { # Direct mp3-file link
|
||||
'url': 'https://audioboom.com/posts/8128496.mp3',
|
||||
'md5': 'e329edf304d450def95c7f86a9165ee1',
|
||||
@@ -27,7 +27,7 @@ class AudioBoomIE(InfoExtractor):
|
||||
'duration': 1689.7,
|
||||
'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race',
|
||||
'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
|
||||
'only_matching': True,
|
||||
|
||||
@@ -9,7 +9,7 @@ class AudiodraftBaseIE(InfoExtractor):
|
||||
headers={
|
||||
'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
}, data=f'id={player_entry_id}'.encode('utf-8'))
|
||||
}, data=f'id={player_entry_id}'.encode())
|
||||
|
||||
return {
|
||||
'id': str(data_json['entry_id']),
|
||||
@@ -65,9 +65,10 @@ class AudiodraftCustomIE(AudiodraftBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, id)
|
||||
player_entry_id = self._search_regex(r'playAudio\(\'(player_entry_\d+)\'\);', webpage, id, 'play entry id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
player_entry_id = self._search_regex(
|
||||
r'playAudio\(\'(player_entry_\d+)\'\);', webpage, video_id, 'play entry id')
|
||||
return self._audiodraft_extract_from_id(player_entry_id)
|
||||
|
||||
|
||||
@@ -89,5 +90,5 @@ class AudiodraftGenericIE(AudiodraftBaseIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id = self._match_id(url)
|
||||
return self._audiodraft_extract_from_id(f'player_entry_{id}')
|
||||
video_id = self._match_id(url)
|
||||
return self._audiodraft_extract_from_id(f'player_entry_{video_id}')
|
||||
|
||||
@@ -3,7 +3,6 @@ import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .soundcloud import SoundcloudIE
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
url_basename,
|
||||
@@ -22,8 +21,8 @@ class AudiomackIE(InfoExtractor):
|
||||
'id': '310086',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'Roosh Williams',
|
||||
'title': 'Extraordinary'
|
||||
}
|
||||
'title': 'Extraordinary',
|
||||
},
|
||||
},
|
||||
# audiomack wrapper around soundcloud song
|
||||
# Needs new test URL.
|
||||
@@ -56,7 +55,7 @@ class AudiomackIE(InfoExtractor):
|
||||
|
||||
# API is inconsistent with errors
|
||||
if 'url' not in api_response or not api_response['url'] or 'error' in api_response:
|
||||
raise ExtractorError('Invalid url %s' % url)
|
||||
raise ExtractorError(f'Invalid url {url}')
|
||||
|
||||
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
|
||||
# if so, pass the work off to the soundcloud extractor
|
||||
@@ -64,7 +63,7 @@ class AudiomackIE(InfoExtractor):
|
||||
return self.url_result(api_response['url'], SoundcloudIE.ie_key())
|
||||
|
||||
return {
|
||||
'id': compat_str(api_response.get('id', album_url_tag)),
|
||||
'id': str(api_response.get('id', album_url_tag)),
|
||||
'uploader': api_response.get('artist'),
|
||||
'title': api_response.get('title'),
|
||||
'url': api_response['url'],
|
||||
@@ -82,8 +81,8 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
'info_dict':
|
||||
{
|
||||
'id': '812251',
|
||||
'title': 'Tha Tour: Part 2 (Official Mixtape)'
|
||||
}
|
||||
'title': 'Tha Tour: Part 2 (Official Mixtape)',
|
||||
},
|
||||
},
|
||||
# Album playlist ripped from fakeshoredrive with no metadata
|
||||
{
|
||||
@@ -98,16 +97,16 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
'id': '837576',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'Lil Herb a.k.a. G Herbo',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)',
|
||||
'id': '837580',
|
||||
'ext': 'mp3',
|
||||
'uploader': 'Lil Herb a.k.a. G Herbo',
|
||||
}
|
||||
},
|
||||
}],
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -123,12 +122,12 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
api_response = self._download_json(
|
||||
'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d'
|
||||
% (album_url_tag, track_no, time.time()), album_url_tag,
|
||||
note='Querying song information (%d)' % (track_no + 1))
|
||||
note=f'Querying song information ({track_no + 1})')
|
||||
|
||||
# Total failure, only occurs when url is totally wrong
|
||||
# Won't happen in middle of valid playlist (next case)
|
||||
if 'url' not in api_response or 'error' in api_response:
|
||||
raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url))
|
||||
raise ExtractorError(f'Invalid url for track {track_no} of album url {url}')
|
||||
# URL is good but song id doesn't exist - usually means end of playlist
|
||||
elif not api_response['url']:
|
||||
break
|
||||
@@ -136,10 +135,10 @@ class AudiomackAlbumIE(InfoExtractor):
|
||||
# Pull out the album metadata and add to result (if it exists)
|
||||
for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
|
||||
if apikey in api_response and resultkey not in result:
|
||||
result[resultkey] = compat_str(api_response[apikey])
|
||||
result[resultkey] = str(api_response[apikey])
|
||||
song_id = url_basename(api_response['url']).rpartition('.')[0]
|
||||
result['entries'].append({
|
||||
'id': compat_str(api_response.get('id', song_id)),
|
||||
'id': str(api_response.get('id', song_id)),
|
||||
'uploader': api_response.get('artist'),
|
||||
'title': api_response.get('title', song_id),
|
||||
'url': api_response['url'],
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import random
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str, compat_urllib_parse_unquote
|
||||
from ..utils import ExtractorError, str_or_none, try_get
|
||||
|
||||
|
||||
@@ -15,13 +15,13 @@ class AudiusBaseIE(InfoExtractor):
|
||||
if response_data is not None:
|
||||
return response_data
|
||||
if len(response) == 1 and 'message' in response:
|
||||
raise ExtractorError('API error: %s' % response['message'],
|
||||
raise ExtractorError('API error: {}'.format(response['message']),
|
||||
expected=True)
|
||||
raise ExtractorError('Unexpected API response')
|
||||
|
||||
def _select_api_base(self):
|
||||
"""Selecting one of the currently available API hosts"""
|
||||
response = super(AudiusBaseIE, self)._download_json(
|
||||
response = super()._download_json(
|
||||
'https://api.audius.co/', None,
|
||||
note='Requesting available API hosts',
|
||||
errnote='Unable to request available API hosts')
|
||||
@@ -41,8 +41,8 @@ class AudiusBaseIE(InfoExtractor):
|
||||
anything from this link, since the Audius API won't be able to resolve
|
||||
this url
|
||||
"""
|
||||
url = compat_urllib_parse_unquote(url)
|
||||
title = compat_urllib_parse_unquote(title)
|
||||
url = urllib.parse.unquote(url)
|
||||
title = urllib.parse.unquote(title)
|
||||
if '/' in title or '%2F' in title:
|
||||
fixed_title = title.replace('/', '%5C').replace('%2F', '%5C')
|
||||
return url.replace(title, fixed_title)
|
||||
@@ -54,19 +54,19 @@ class AudiusBaseIE(InfoExtractor):
|
||||
if self._API_BASE is None:
|
||||
self._select_api_base()
|
||||
try:
|
||||
response = super(AudiusBaseIE, self)._download_json(
|
||||
'%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note,
|
||||
response = super()._download_json(
|
||||
f'{self._API_BASE}{self._API_V}{path}', item_id, note=note,
|
||||
errnote=errnote, expected_status=expected_status)
|
||||
except ExtractorError as exc:
|
||||
# some of Audius API hosts may not work as expected and return HTML
|
||||
if 'Failed to parse JSON' in compat_str(exc):
|
||||
if 'Failed to parse JSON' in str(exc):
|
||||
raise ExtractorError('An error occurred while receiving data. Try again',
|
||||
expected=True)
|
||||
raise exc
|
||||
return self._get_response_data(response)
|
||||
|
||||
def _resolve_url(self, url, item_id):
|
||||
return self._api_request('/resolve?url=%s' % url, item_id,
|
||||
return self._api_request(f'/resolve?url={url}', item_id,
|
||||
expected_status=404)
|
||||
|
||||
|
||||
@@ -91,7 +91,7 @@ class AudiusIE(AudiusBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
# Regular track
|
||||
@@ -109,14 +109,14 @@ class AudiusIE(AudiusBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'repost_count': int,
|
||||
}
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
_ARTWORK_MAP = {
|
||||
"150x150": 150,
|
||||
"480x480": 480,
|
||||
"1000x1000": 1000
|
||||
'150x150': 150,
|
||||
'480x480': 480,
|
||||
'1000x1000': 1000,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -130,7 +130,7 @@ class AudiusIE(AudiusBaseIE):
|
||||
else: # API link
|
||||
title = None
|
||||
# uploader = None
|
||||
track_data = self._api_request('/tracks/%s' % track_id, track_id)
|
||||
track_data = self._api_request(f'/tracks/{track_id}', track_id)
|
||||
|
||||
if not isinstance(track_data, dict):
|
||||
raise ExtractorError('Unexpected API response')
|
||||
@@ -144,7 +144,7 @@ class AudiusIE(AudiusBaseIE):
|
||||
if isinstance(artworks_data, dict):
|
||||
for quality_key, thumbnail_url in artworks_data.items():
|
||||
thumbnail = {
|
||||
"url": thumbnail_url
|
||||
'url': thumbnail_url,
|
||||
}
|
||||
quality_code = self._ARTWORK_MAP.get(quality_key)
|
||||
if quality_code is not None:
|
||||
@@ -154,12 +154,12 @@ class AudiusIE(AudiusBaseIE):
|
||||
return {
|
||||
'id': track_id,
|
||||
'title': track_data.get('title', title),
|
||||
'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id),
|
||||
'url': f'{self._API_BASE}/v1/tracks/{track_id}/stream',
|
||||
'ext': 'mp3',
|
||||
'description': track_data.get('description'),
|
||||
'duration': track_data.get('duration'),
|
||||
'track': track_data.get('title'),
|
||||
'artist': try_get(track_data, lambda x: x['user']['name'], compat_str),
|
||||
'artist': try_get(track_data, lambda x: x['user']['name'], str),
|
||||
'genre': track_data.get('genre'),
|
||||
'thumbnails': thumbnails,
|
||||
'view_count': track_data.get('play_count'),
|
||||
@@ -175,11 +175,11 @@ class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'audius:9RWlo',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
@@ -207,7 +207,7 @@ class AudiusPlaylistIE(AudiusBaseIE):
|
||||
if not track_id:
|
||||
raise ExtractorError('Unable to get track ID from playlist')
|
||||
entries.append(self.url_result(
|
||||
'audius:%s' % track_id,
|
||||
f'audius:{track_id}',
|
||||
ie=AudiusTrackIE.ie_key(), video_id=track_id))
|
||||
return entries
|
||||
|
||||
@@ -231,7 +231,7 @@ class AudiusPlaylistIE(AudiusBaseIE):
|
||||
raise ExtractorError('Unable to get playlist ID')
|
||||
|
||||
playlist_tracks = self._api_request(
|
||||
'/playlists/%s/tracks' % playlist_id,
|
||||
f'/playlists/{playlist_id}/tracks',
|
||||
title, note='Downloading playlist tracks metadata',
|
||||
errnote='Unable to download playlist tracks metadata')
|
||||
if not isinstance(playlist_tracks, list):
|
||||
@@ -267,5 +267,5 @@ class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete I
|
||||
profile_audius_id = _profile_data[0]['id']
|
||||
profile_bio = _profile_data[0].get('bio')
|
||||
|
||||
api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id)
|
||||
api_call = self._api_request(f'/full/users/handle/{profile_id}/tracks', profile_id)
|
||||
return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio)
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
import base64
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlencode,
|
||||
compat_str,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
int_or_none,
|
||||
@@ -22,14 +19,14 @@ class AWAANIE(InfoExtractor):
|
||||
show_id, video_id, season_id = self._match_valid_url(url).groups()
|
||||
if video_id and int(video_id) > 0:
|
||||
return self.url_result(
|
||||
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo')
|
||||
f'http://awaan.ae/media/{video_id}', 'AWAANVideo')
|
||||
elif season_id and int(season_id) > 0:
|
||||
return self.url_result(smuggle_url(
|
||||
'http://awaan.ae/program/season/%s' % season_id,
|
||||
f'http://awaan.ae/program/season/{season_id}',
|
||||
{'show_id': show_id}), 'AWAANSeason')
|
||||
else:
|
||||
return self.url_result(
|
||||
'http://awaan.ae/program/%s' % show_id, 'AWAANSeason')
|
||||
f'http://awaan.ae/program/{show_id}', 'AWAANSeason')
|
||||
|
||||
|
||||
class AWAANBaseIE(InfoExtractor):
|
||||
@@ -75,11 +72,11 @@ class AWAANVideoIE(AWAANBaseIE):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_data = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id,
|
||||
f'http://admin.mangomolo.com/analytics/index.php/plus/video?id={video_id}',
|
||||
video_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(video_data, video_id, False)
|
||||
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + urllib.parse.urlencode({
|
||||
'id': video_data['id'],
|
||||
'user_id': video_data['user_id'],
|
||||
'signature': video_data['signature'],
|
||||
@@ -117,11 +114,11 @@ class AWAANLiveIE(AWAANBaseIE):
|
||||
channel_id = self._match_id(url)
|
||||
|
||||
channel_data = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id,
|
||||
f'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id={channel_id}',
|
||||
channel_id, headers={'Origin': 'http://awaan.ae'})
|
||||
info = self._parse_video_data(channel_data, channel_id, True)
|
||||
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({
|
||||
embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + urllib.parse.urlencode({
|
||||
'id': base64.b64encode(channel_data['user_id'].encode()).decode(),
|
||||
'channelid': base64.b64encode(channel_data['id'].encode()).decode(),
|
||||
'signature': channel_data['signature'],
|
||||
@@ -159,7 +156,7 @@ class AWAANSeasonIE(InfoExtractor):
|
||||
show_id = smuggled_data.get('show_id')
|
||||
if show_id is None:
|
||||
season = self._download_json(
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id,
|
||||
f'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id={season_id}',
|
||||
season_id, headers={'Origin': 'http://awaan.ae'})
|
||||
show_id = season['id']
|
||||
data['show_id'] = show_id
|
||||
@@ -167,7 +164,7 @@ class AWAANSeasonIE(InfoExtractor):
|
||||
'http://admin.mangomolo.com/analytics/index.php/plus/show',
|
||||
show_id, data=urlencode_postdata(data), headers={
|
||||
'Origin': 'http://awaan.ae',
|
||||
'Content-Type': 'application/x-www-form-urlencoded'
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
if not season_id:
|
||||
season_id = show['default_season']
|
||||
@@ -177,8 +174,8 @@ class AWAANSeasonIE(InfoExtractor):
|
||||
|
||||
entries = []
|
||||
for video in show['videos']:
|
||||
video_id = compat_str(video['id'])
|
||||
video_id = str(video['id'])
|
||||
entries.append(self.url_result(
|
||||
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id))
|
||||
f'http://awaan.ae/media/{video_id}', 'AWAANVideo', video_id))
|
||||
|
||||
return self.playlist_result(entries, season_id, title)
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import datetime as dt
|
||||
import hashlib
|
||||
import hmac
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_urlencode
|
||||
|
||||
|
||||
class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||
@@ -18,20 +18,20 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
'Accept': 'application/json',
|
||||
'Host': self._AWS_PROXY_HOST,
|
||||
'X-Amz-Date': amz_date,
|
||||
'X-Api-Key': self._AWS_API_KEY
|
||||
'X-Api-Key': self._AWS_API_KEY,
|
||||
}
|
||||
session_token = aws_dict.get('session_token')
|
||||
if session_token:
|
||||
headers['X-Amz-Security-Token'] = session_token
|
||||
|
||||
def aws_hash(s):
|
||||
return hashlib.sha256(s.encode('utf-8')).hexdigest()
|
||||
return hashlib.sha256(s.encode()).hexdigest()
|
||||
|
||||
# Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
|
||||
canonical_querystring = compat_urllib_parse_urlencode(query)
|
||||
canonical_querystring = urllib.parse.urlencode(query)
|
||||
canonical_headers = ''
|
||||
for header_name, header_value in sorted(headers.items()):
|
||||
canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
|
||||
canonical_headers += f'{header_name.lower()}:{header_value}\n'
|
||||
signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())])
|
||||
canonical_request = '\n'.join([
|
||||
'GET',
|
||||
@@ -39,7 +39,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
canonical_querystring,
|
||||
canonical_headers,
|
||||
signed_headers,
|
||||
aws_hash('')
|
||||
aws_hash(''),
|
||||
])
|
||||
|
||||
# Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
|
||||
@@ -49,7 +49,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
|
||||
# Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
|
||||
def aws_hmac(key, msg):
|
||||
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
|
||||
return hmac.new(key, msg.encode(), hashlib.sha256)
|
||||
|
||||
def aws_hmac_digest(key, msg):
|
||||
return aws_hmac(key, msg).digest()
|
||||
@@ -57,7 +57,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
def aws_hmac_hexdigest(key, msg):
|
||||
return aws_hmac(key, msg).hexdigest()
|
||||
|
||||
k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8')
|
||||
k_signing = ('AWS4' + aws_dict['secret_key']).encode()
|
||||
for value in credential_scope_list:
|
||||
k_signing = aws_hmac_digest(k_signing, value)
|
||||
|
||||
@@ -65,11 +65,11 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||
|
||||
# Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
|
||||
headers['Authorization'] = ', '.join([
|
||||
'%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
|
||||
'SignedHeaders=%s' % signed_headers,
|
||||
'Signature=%s' % signature,
|
||||
'{} Credential={}/{}'.format(self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
|
||||
f'SignedHeaders={signed_headers}',
|
||||
f'Signature={signature}',
|
||||
])
|
||||
|
||||
return self._download_json(
|
||||
'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
|
||||
'https://{}{}{}'.format(self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
|
||||
video_id, headers=headers)
|
||||
|
||||
@@ -38,14 +38,14 @@ class AZMedienIE(InfoExtractor):
|
||||
'timestamp': 1538328802,
|
||||
'view_count': int,
|
||||
'thumbnail': 'http://cfvod.kaltura.com/p/1719221/sp/171922100/thumbnail/entry_id/1_anruz3wy/version/100031',
|
||||
'duration': 1930
|
||||
'duration': 1930,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}]
|
||||
_API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
|
||||
_PARTNER_ID = '1719221'
|
||||
@@ -62,5 +62,5 @@ class AZMedienIE(InfoExtractor):
|
||||
})['data']['context']['mainAsset']['video']['kaltura']['kalturaId']
|
||||
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
|
||||
f'kaltura:{self._PARTNER_ID}:{entry_id}',
|
||||
ie=KalturaIE.ie_key(), video_id=entry_id)
|
||||
|
||||
@@ -24,8 +24,9 @@ class BaiduVideoIE(InfoExtractor):
|
||||
}]
|
||||
|
||||
def _call_api(self, path, category, playlist_id, note):
|
||||
return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % (
|
||||
path, category, playlist_id), playlist_id, note)
|
||||
return self._download_json(
|
||||
f'http://app.video.baidu.com/{path}/?worktype=adnative{category}&id={playlist_id}',
|
||||
playlist_id, note)
|
||||
|
||||
def _real_extract(self, url):
|
||||
category, playlist_id = self._match_valid_url(url).groups()
|
||||
@@ -44,7 +45,7 @@ class BaiduVideoIE(InfoExtractor):
|
||||
'xqsingle', category, playlist_id, 'Download episodes JSON metadata')
|
||||
|
||||
entries = [self.url_result(
|
||||
episode['url'], video_title=episode['title']
|
||||
episode['url'], video_title=episode['title'],
|
||||
) for episode in episodes_detail['videos']]
|
||||
|
||||
return self.playlist_result(
|
||||
|
||||
@@ -1,13 +1,10 @@
|
||||
import math
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_urllib_parse_urlparse,
|
||||
compat_parse_qs,
|
||||
)
|
||||
from ..utils import (
|
||||
format_field,
|
||||
InAdvancePagedList,
|
||||
format_field,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
)
|
||||
@@ -20,8 +17,8 @@ class BanByeBaseIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _extract_playlist_id(url, param='playlist'):
|
||||
return compat_parse_qs(
|
||||
compat_urllib_parse_urlparse(url).query).get(param, [None])[0]
|
||||
return urllib.parse.parse_qs(
|
||||
urllib.parse.urlparse(url).query).get(param, [None])[0]
|
||||
|
||||
def _extract_playlist(self, playlist_id):
|
||||
data = self._download_json(f'{self._API_BASE}/playlists/{playlist_id}', playlist_id)
|
||||
|
||||
@@ -3,7 +3,6 @@ import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
ExtractorError,
|
||||
@@ -42,7 +41,7 @@ class BandcampIE(InfoExtractor):
|
||||
'uploader_id': 'youtube-dl',
|
||||
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
|
||||
},
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song',
|
||||
}, {
|
||||
# free download
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
@@ -119,7 +118,7 @@ class BandcampIE(InfoExtractor):
|
||||
|
||||
def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
|
||||
return self._parse_json(self._html_search_regex(
|
||||
r'data-%s=(["\'])({.+?})\1' % attr, webpage,
|
||||
rf'data-{attr}=(["\'])({{.+?}})\1', webpage,
|
||||
attr + ' data', group=2), video_id, fatal=fatal)
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -167,7 +166,7 @@ class BandcampIE(InfoExtractor):
|
||||
|
||||
download_link = tralbum.get('freeDownloadPage')
|
||||
if download_link:
|
||||
track_id = compat_str(tralbum['id'])
|
||||
track_id = str(tralbum['id'])
|
||||
|
||||
download_webpage = self._download_webpage(
|
||||
download_link, track_id, 'Downloading free downloads page')
|
||||
@@ -192,7 +191,7 @@ class BandcampIE(InfoExtractor):
|
||||
if isinstance(download_formats_list, list):
|
||||
for f in blob['download_formats']:
|
||||
name, ext = f.get('name'), f.get('file_extension')
|
||||
if all(isinstance(x, compat_str) for x in (name, ext)):
|
||||
if all(isinstance(x, str) for x in (name, ext)):
|
||||
download_formats[name] = ext.strip('.')
|
||||
|
||||
for format_id, f in downloads.items():
|
||||
@@ -207,7 +206,7 @@ class BandcampIE(InfoExtractor):
|
||||
})
|
||||
format_id = f.get('encoding_name') or format_id
|
||||
stat = self._download_json(
|
||||
stat_url, track_id, 'Downloading %s JSON' % format_id,
|
||||
stat_url, track_id, f'Downloading {format_id} JSON',
|
||||
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
|
||||
fatal=False)
|
||||
if not stat:
|
||||
@@ -225,7 +224,7 @@ class BandcampIE(InfoExtractor):
|
||||
'acodec': format_id.split('-')[0],
|
||||
})
|
||||
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
title = f'{artist} - {track}' if artist else track
|
||||
|
||||
if not duration:
|
||||
duration = float_or_none(self._html_search_meta(
|
||||
@@ -267,7 +266,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1311756226,
|
||||
'upload_date': '20110727',
|
||||
'uploader': 'Blazo',
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
'md5': '1a2c32e2691474643e912cc6cd4bffaa',
|
||||
@@ -278,7 +277,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1311757238,
|
||||
'upload_date': '20110727',
|
||||
'uploader': 'Blazo',
|
||||
}
|
||||
},
|
||||
},
|
||||
],
|
||||
'info_dict': {
|
||||
@@ -287,9 +286,9 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'uploader_id': 'blazo',
|
||||
},
|
||||
'params': {
|
||||
'playlistend': 2
|
||||
'playlistend': 2,
|
||||
},
|
||||
'skip': 'Bandcamp imposes download limits.'
|
||||
'skip': 'Bandcamp imposes download limits.',
|
||||
}, {
|
||||
'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
|
||||
'info_dict': {
|
||||
@@ -324,7 +323,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
def suitable(cls, url):
|
||||
return (False
|
||||
if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
|
||||
else super(BandcampAlbumIE, cls).suitable(url))
|
||||
else super().suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
uploader_id, album_id = self._match_valid_url(url).groups()
|
||||
@@ -376,7 +375,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
},
|
||||
}, {
|
||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -407,7 +406,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
title = show.get('audio_title') or 'Bandcamp Weekly'
|
||||
subtitle = show.get('subtitle')
|
||||
if subtitle:
|
||||
title += ' - %s' % subtitle
|
||||
title += f' - {subtitle}'
|
||||
|
||||
return {
|
||||
'id': show_id,
|
||||
@@ -419,7 +418,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
|
||||
'series': 'Bandcamp Weekly',
|
||||
'episode': show.get('subtitle'),
|
||||
'episode_id': show_id,
|
||||
'formats': formats
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
@@ -440,7 +439,7 @@ class BandcampUserIE(InfoExtractor):
|
||||
'url': 'http://dotscale.bandcamp.com',
|
||||
'info_dict': {
|
||||
'id': 'dotscale',
|
||||
'title': 'Discography of dotscale'
|
||||
'title': 'Discography of dotscale',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
}, {
|
||||
|
||||
@@ -2,11 +2,11 @@ import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
try_get,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ class BannedVideoIE(InfoExtractor):
|
||||
'description': 'md5:560d96f02abbebe6c6b78b47465f6b28',
|
||||
'upload_date': '20200324',
|
||||
'timestamp': 1585087895,
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
_GRAPHQL_GETMETADATA_QUERY = '''
|
||||
@@ -84,15 +84,15 @@ query GetCommentReplies($id: String!) {
|
||||
'GetCommentReplies': _GRAPHQL_GETCOMMENTSREPLIES_QUERY,
|
||||
}
|
||||
|
||||
def _call_api(self, video_id, id, operation, note):
|
||||
def _call_api(self, video_id, id_var, operation, note):
|
||||
return self._download_json(
|
||||
'https://api.infowarsmedia.com/graphql', video_id, note=note,
|
||||
headers={
|
||||
'Content-Type': 'application/json; charset=utf-8'
|
||||
'Content-Type': 'application/json; charset=utf-8',
|
||||
}, data=json.dumps({
|
||||
'variables': {'id': id},
|
||||
'variables': {'id': id_var},
|
||||
'operationName': operation,
|
||||
'query': self._GRAPHQL_QUERIES[operation]
|
||||
'query': self._GRAPHQL_QUERIES[operation],
|
||||
}).encode('utf8')).get('data')
|
||||
|
||||
def _get_comments(self, video_id, comments, comment_data):
|
||||
@@ -151,5 +151,5 @@ query GetCommentReplies($id: String!) {
|
||||
'tags': [tag.get('name') for tag in video_info.get('tags')],
|
||||
'availability': self._availability(is_unlisted=video_info.get('unlisted')),
|
||||
'comments': comments,
|
||||
'__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments'))
|
||||
'__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments')),
|
||||
}
|
||||
|
||||
@@ -2,10 +2,10 @@ import functools
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import urllib.parse
|
||||
import xml.etree.ElementTree
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str, compat_urlparse
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@@ -35,7 +35,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
IE_NAME = 'bbc.co.uk'
|
||||
IE_DESC = 'BBC iPlayer'
|
||||
_ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
|
||||
_VALID_URL = r'''(?x)
|
||||
_VALID_URL = rf'''(?x)
|
||||
https?://
|
||||
(?:www\.)?bbc\.co\.uk/
|
||||
(?:
|
||||
@@ -45,8 +45,8 @@ class BBCCoUkIE(InfoExtractor):
|
||||
radio/player/|
|
||||
events/[^/]+/play/[^/]+/
|
||||
)
|
||||
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
|
||||
''' % _ID_REGEX
|
||||
(?P<id>{_ID_REGEX})(?!/(?:episodes|broadcasts|clips))
|
||||
'''
|
||||
_EMBED_REGEX = [r'setPlaylist\("(?P<url>https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)']
|
||||
|
||||
_LOGIN_URL = 'https://account.bbc.com/signin'
|
||||
@@ -75,7 +75,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
|
||||
@@ -148,7 +148,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
|
||||
'note': 'Video',
|
||||
@@ -162,7 +162,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
|
||||
'info_dict': {
|
||||
@@ -268,19 +268,19 @@ class BBCCoUkIE(InfoExtractor):
|
||||
error = clean_html(get_element_by_class('form-message', response))
|
||||
if error:
|
||||
raise ExtractorError(
|
||||
'Unable to login: %s' % error, expected=True)
|
||||
f'Unable to login: {error}', expected=True)
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
class MediaSelectionError(Exception):
|
||||
def __init__(self, id):
|
||||
self.id = id
|
||||
def __init__(self, error_id):
|
||||
self.id = error_id
|
||||
|
||||
def _extract_asx_playlist(self, connection, programme_id):
|
||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||
|
||||
def _extract_items(self, playlist):
|
||||
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
||||
return playlist.findall(f'./{{{self._EMP_PLAYLIST_NS}}}item')
|
||||
|
||||
def _extract_medias(self, media_selection):
|
||||
error = media_selection.get('result')
|
||||
@@ -312,7 +312,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
def _raise_extractor_error(self, media_selection_error):
|
||||
raise ExtractorError(
|
||||
'%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
|
||||
f'{self.IE_NAME} returned error: {media_selection_error.id}',
|
||||
expected=True)
|
||||
|
||||
def _download_media_selector(self, programme_id):
|
||||
@@ -372,7 +372,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||
formats.append({
|
||||
'url': ref,
|
||||
'format_id': 'ref%s_%s' % (i, format_id),
|
||||
'format_id': f'ref{i}_{format_id}',
|
||||
})
|
||||
elif transfer_format == 'dash':
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
@@ -394,7 +394,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
href, programme_id, f4m_id=format_id, fatal=False))
|
||||
else:
|
||||
if not supplier and bitrate:
|
||||
format_id += '-%d' % bitrate
|
||||
format_id += f'-{bitrate}'
|
||||
fmt = {
|
||||
'format_id': format_id,
|
||||
'filesize': file_size,
|
||||
@@ -423,9 +423,9 @@ class BBCCoUkIE(InfoExtractor):
|
||||
identifier = connection.get('identifier')
|
||||
server = connection.get('server')
|
||||
fmt.update({
|
||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||
'url': f'{protocol}://{server}/{application}?{auth_string}',
|
||||
'play_path': identifier,
|
||||
'app': '%s?%s' % (application, auth_string),
|
||||
'app': f'{application}?{auth_string}',
|
||||
'page_url': 'http://www.bbc.co.uk',
|
||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||
'rtmp_live': False,
|
||||
@@ -441,7 +441,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
def _download_playlist(self, playlist_id):
|
||||
try:
|
||||
playlist = self._download_json(
|
||||
'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
|
||||
f'http://www.bbc.co.uk/programmes/{playlist_id}/playlist.json',
|
||||
playlist_id, 'Downloading playlist JSON')
|
||||
formats = []
|
||||
subtitles = {}
|
||||
@@ -480,32 +480,32 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
def _process_legacy_playlist(self, playlist_id):
|
||||
return self._process_legacy_playlist_url(
|
||||
'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
|
||||
f'http://www.bbc.co.uk/iplayer/playlist/{playlist_id}', playlist_id)
|
||||
|
||||
def _download_legacy_playlist_url(self, url, playlist_id=None):
|
||||
return self._download_xml(
|
||||
url, playlist_id, 'Downloading legacy playlist XML')
|
||||
|
||||
def _extract_from_legacy_playlist(self, playlist, playlist_id):
|
||||
no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
|
||||
no_items = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}noItems')
|
||||
if no_items is not None:
|
||||
reason = no_items.get('reason')
|
||||
if reason == 'preAvailability':
|
||||
msg = 'Episode %s is not yet available' % playlist_id
|
||||
msg = f'Episode {playlist_id} is not yet available'
|
||||
elif reason == 'postAvailability':
|
||||
msg = 'Episode %s is no longer available' % playlist_id
|
||||
msg = f'Episode {playlist_id} is no longer available'
|
||||
elif reason == 'noMedia':
|
||||
msg = 'Episode %s is not currently available' % playlist_id
|
||||
msg = f'Episode {playlist_id} is not currently available'
|
||||
else:
|
||||
msg = 'Episode %s is not available: %s' % (playlist_id, reason)
|
||||
msg = f'Episode {playlist_id} is not available: {reason}'
|
||||
raise ExtractorError(msg, expected=True)
|
||||
|
||||
for item in self._extract_items(playlist):
|
||||
kind = item.get('kind')
|
||||
if kind not in ('programme', 'radioProgramme'):
|
||||
continue
|
||||
title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
|
||||
description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
|
||||
title = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}title').text
|
||||
description_el = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}summary')
|
||||
description = description_el.text if description_el is not None else None
|
||||
|
||||
def get_programme_id(item):
|
||||
@@ -515,7 +515,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
if value and re.match(r'^[pb][\da-z]{7}$', value):
|
||||
return value
|
||||
get_from_attributes(item)
|
||||
mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
|
||||
mediator = item.find(f'./{{{self._EMP_PLAYLIST_NS}}}mediator')
|
||||
if mediator is not None:
|
||||
return get_from_attributes(mediator)
|
||||
|
||||
@@ -555,7 +555,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||
|
||||
if not programme_id:
|
||||
programme_id = self._search_regex(
|
||||
r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None)
|
||||
rf'"vpid"\s*:\s*"({self._ID_REGEX})"', webpage, 'vpid', fatal=False, default=None)
|
||||
|
||||
if programme_id:
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
@@ -602,7 +602,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
||||
'info_dict': {
|
||||
'id': 'world-europe-32668511',
|
||||
'title': 'Russia stages massive WW2 parade',
|
||||
'title': 'Russia stages massive WW2 parade despite Western boycott',
|
||||
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
|
||||
},
|
||||
'playlist_count': 2,
|
||||
@@ -623,6 +623,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'info_dict': {
|
||||
'id': '3662a707-0af9-3149-963f-47bea720b460',
|
||||
'title': 'BUGGER',
|
||||
'description': r're:BUGGER The recent revelations by the whistleblower Edward Snowden were fascinating. .{211}\.{3}$',
|
||||
},
|
||||
'playlist_count': 18,
|
||||
}, {
|
||||
@@ -631,16 +632,16 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'info_dict': {
|
||||
'id': 'p02mprgb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
|
||||
'description': 'md5:2868290467291b37feda7863f7a83f54',
|
||||
'title': 'Germanwings crash site aerial video',
|
||||
'description': r're:(?s)Aerial video showed the site where the Germanwings flight 4U 9525, .{156} BFM TV\.$',
|
||||
'duration': 47,
|
||||
'timestamp': 1427219242,
|
||||
'upload_date': '20150324',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/news/1024/media/images/81879000/jpg/_81879090_81879089.jpg',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# article with single video embedded with data-playable containing XML playlist
|
||||
# with direct video links as progressiveDownloadUrl (for now these are extracted)
|
||||
@@ -656,21 +657,24 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'now SIMORGH_DATA with no video',
|
||||
}, {
|
||||
# single video embedded with data-playable containing XML playlists (regional section)
|
||||
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'info_dict': {
|
||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'id': '39275083',
|
||||
'display_id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
|
||||
'description': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'timestamp': 1434713142,
|
||||
'upload_date': '20150619',
|
||||
'thumbnail': 'https://a.files.bbci.co.uk/worldservice/live/assets/images/2015/06/19/150619132146_honduras_hsopitales_militares_640x360_aptn_nocredit.jpg',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# single video from video playlist embedded with vxp-playlist-data JSON
|
||||
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
|
||||
@@ -683,22 +687,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# single video story with digitalData
|
||||
# single video story with __PWA_PRELOADED_STATE__
|
||||
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
|
||||
'info_dict': {
|
||||
'id': 'p02q6gc4',
|
||||
'ext': 'flv',
|
||||
'title': 'Sri Lanka’s spicy secret',
|
||||
'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
|
||||
'timestamp': 1437674293,
|
||||
'upload_date': '20150723',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tasting the spice of life in Jaffna',
|
||||
'description': r're:(?s)BBC Travel Show’s Henry Golding explores the city of Jaffna .{151} aftertaste\.$',
|
||||
'timestamp': 1646058397,
|
||||
'upload_date': '20220228',
|
||||
'duration': 255,
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1920xn/p02vxvkn.jpg',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video story without digitalData
|
||||
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
|
||||
@@ -710,12 +713,10 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'timestamp': 1415867444,
|
||||
'upload_date': '20141113',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
'skip': 'redirects to TopGear home page',
|
||||
}, {
|
||||
# single video embedded with Morph
|
||||
# TODO: replacement test page
|
||||
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
|
||||
'info_dict': {
|
||||
'id': 'p041vhd0',
|
||||
@@ -726,27 +727,22 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'uploader': 'BBC Sport',
|
||||
'uploader_id': 'bbc_sport',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Georestricted to UK',
|
||||
'skip': 'Video no longer in page',
|
||||
}, {
|
||||
# single video with playlist.sxml URL in playlist param
|
||||
# single video in __INITIAL_DATA__
|
||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||
'info_dict': {
|
||||
'id': 'p02xycnp',
|
||||
'ext': 'mp4',
|
||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
|
||||
'title': 'Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': r're:(?s)BBC Sport\'s David Ornstein rounds up the latest transfer reports, .{359} here\.$',
|
||||
'timestamp': 1437750175,
|
||||
'upload_date': '20150724',
|
||||
'thumbnail': r're:https?://.+/.+media/images/69320000/png/_69320754_mmgossipcolumnextraaugust18.png',
|
||||
'duration': 140,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# article with multiple videos embedded with playlist.sxml in playlist param
|
||||
# article with multiple videos embedded with Morph.setPayload
|
||||
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||
'info_dict': {
|
||||
'id': '34475836',
|
||||
@@ -754,6 +750,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# Testing noplaylist
|
||||
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||
'info_dict': {
|
||||
'id': 'p034ppnv',
|
||||
'ext': 'mp4',
|
||||
'title': 'All you need to know about Jurgen Klopp',
|
||||
'timestamp': 1444335081,
|
||||
'upload_date': '20151008',
|
||||
'duration': 122.0,
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/onesport/cps/976/cpsprodpb/7542/production/_85981003_klopp.jpg',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
},
|
||||
}, {
|
||||
# school report article with single video
|
||||
'url': 'http://www.bbc.co.uk/schoolreport/35744779',
|
||||
@@ -762,6 +773,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'title': 'School which breaks down barriers in Jerusalem',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'skip': 'redirects to Young Reporter home page https://www.bbc.co.uk/news/topics/cg41ylwv43pt',
|
||||
}, {
|
||||
# single video with playlist URL from weather section
|
||||
'url': 'http://www.bbc.com/weather/features/33601775',
|
||||
@@ -778,18 +790,33 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1437785037,
|
||||
'upload_date': '20150725',
|
||||
'duration': 105,
|
||||
},
|
||||
}, {
|
||||
# video with window.__INITIAL_DATA__ and value as JSON string
|
||||
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
|
||||
'info_dict': {
|
||||
'id': 'p0b71qth',
|
||||
'id': 'p0b779gc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Why France is making this woman a national hero',
|
||||
'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
|
||||
'description': r're:(?s)France is honouring the US-born 20th Century singer and activist Josephine .{208} Second World War.',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1638230731,
|
||||
'upload_date': '20211130',
|
||||
'timestamp': 1638215626,
|
||||
'upload_date': '20211129',
|
||||
'duration': 125,
|
||||
},
|
||||
}, {
|
||||
# video with script id __NEXT_DATA__ and value as JSON string
|
||||
'url': 'https://www.bbc.com/news/uk-68546268',
|
||||
'info_dict': {
|
||||
'id': 'p0hj0lq7',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nasser Hospital doctor describes his treatment by IDF',
|
||||
'description': r're:(?s)Doctor Abu Sabha said he was detained by Israeli forces after .{276} hostages\."$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1710188248,
|
||||
'upload_date': '20240311',
|
||||
'duration': 104,
|
||||
},
|
||||
}, {
|
||||
# single video article embedded with data-media-vpid
|
||||
@@ -817,6 +844,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'uploader': 'Radio 3',
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
|
||||
'info_dict': {
|
||||
@@ -824,6 +852,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:2fabf12a726603193a2879a055f72514',
|
||||
'description': 'Learn English words and phrases from this story',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1200x675/p06pq9gk.jpg',
|
||||
},
|
||||
'add_ie': [BBCCoUkIE.ie_key()],
|
||||
}, {
|
||||
@@ -832,27 +861,29 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'info_dict': {
|
||||
'id': 'p07c6sb9',
|
||||
'ext': 'mp4',
|
||||
'title': 'How positive thinking is harming your happiness',
|
||||
'alt_title': 'The downsides of positive thinking',
|
||||
'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
|
||||
'title': 'The downsides of positive thinking',
|
||||
'description': 'The downsides of positive thinking',
|
||||
'duration': 235,
|
||||
'thumbnail': r're:https?://.+/p07c9dsr.jpg',
|
||||
'upload_date': '20190604',
|
||||
'categories': ['Psychology'],
|
||||
'thumbnail': r're:https?://.+/p07c9dsr\.(?:jpg|webp|png)',
|
||||
'upload_date': '20220223',
|
||||
'timestamp': 1645632746,
|
||||
},
|
||||
}, {
|
||||
# BBC Sounds
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/w3ct5rgx',
|
||||
'info_dict': {
|
||||
'id': 'm001q789',
|
||||
'id': 'p0hrw4nr',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Night Tracks Mix - Music for the darkling hour',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
|
||||
'chapters': 'count:8',
|
||||
'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
|
||||
'uploader': 'Radio 3',
|
||||
'duration': 1800,
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
'title': 'Are our coastlines being washed away?',
|
||||
'description': r're:(?s)Around the world, coastlines are constantly changing .{2000,} Images\)$',
|
||||
'timestamp': 1713556800,
|
||||
'upload_date': '20240419',
|
||||
'duration': 1588,
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0hrnxbl.jpg',
|
||||
'uploader': 'World Service',
|
||||
'uploader_id': 'bbc_world_service',
|
||||
'series': 'CrowdScience',
|
||||
'chapters': [],
|
||||
},
|
||||
}, { # onion routes
|
||||
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
||||
@@ -866,7 +897,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
def suitable(cls, url):
|
||||
EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
|
||||
return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
|
||||
else super(BBCIE, cls).suitable(url))
|
||||
else super().suitable(url))
|
||||
|
||||
def _extract_from_media_meta(self, media_meta, video_id):
|
||||
# Direct links to media in media metadata (e.g.
|
||||
@@ -978,7 +1009,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
if playlist:
|
||||
entry = None
|
||||
for key in ('streaming', 'progressiveDownload'):
|
||||
playlist_url = playlist.get('%sUrl' % key)
|
||||
playlist_url = playlist.get(f'{key}Url')
|
||||
if not playlist_url:
|
||||
continue
|
||||
try:
|
||||
@@ -1004,18 +1035,17 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
# http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
|
||||
group_id = self._search_regex(
|
||||
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
|
||||
rf'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\']({self._ID_REGEX})',
|
||||
webpage, 'group id', default=None)
|
||||
if group_id:
|
||||
return self.url_result(
|
||||
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
||||
ie=BBCCoUkIE.ie_key())
|
||||
f'https://www.bbc.co.uk/programmes/{group_id}', BBCCoUkIE)
|
||||
|
||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
programme_id = self._search_regex(
|
||||
[r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
|
||||
r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
|
||||
r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
|
||||
[rf'data-(?:video-player|media)-vpid="({self._ID_REGEX})"',
|
||||
rf'<param[^>]+name="externalIdentifier"[^>]+value="({self._ID_REGEX})"',
|
||||
rf'videoId\s*:\s*["\']({self._ID_REGEX})["\']'],
|
||||
webpage, 'vpid', default=None)
|
||||
|
||||
if programme_id:
|
||||
@@ -1069,83 +1099,133 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
}
|
||||
|
||||
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
||||
# There are several setPayload calls may be present but the video
|
||||
# seems to be always related to the first one
|
||||
morph_payload = self._parse_json(
|
||||
self._search_regex(
|
||||
r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
|
||||
webpage, 'morph payload', default='{}'),
|
||||
playlist_id, fatal=False)
|
||||
# Several setPayload calls may be present but the video(s)
|
||||
# should be in one that mentions leadMedia or videoData
|
||||
morph_payload = self._search_json(
|
||||
r'\bMorph\s*\.\s*setPayload\s*\([^,]+,', webpage, 'morph payload', playlist_id,
|
||||
contains_pattern=r'{(?s:(?:(?!</script>).)+(?:"leadMedia"|\\"videoData\\")\s*:.+)}',
|
||||
default={})
|
||||
if morph_payload:
|
||||
components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
|
||||
for component in components:
|
||||
if not isinstance(component, dict):
|
||||
continue
|
||||
lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
|
||||
if not lead_media:
|
||||
continue
|
||||
identifiers = lead_media.get('identifiers')
|
||||
if not identifiers or not isinstance(identifiers, dict):
|
||||
continue
|
||||
programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
|
||||
for lead_media in traverse_obj(morph_payload, (
|
||||
'body', 'components', ..., 'props', 'leadMedia', {dict})):
|
||||
programme_id = traverse_obj(lead_media, ('identifiers', ('vpid', 'playablePid'), {str}, any))
|
||||
if not programme_id:
|
||||
continue
|
||||
title = lead_media.get('title') or self._og_search_title(webpage)
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
description = lead_media.get('summary')
|
||||
uploader = lead_media.get('masterBrand')
|
||||
uploader_id = lead_media.get('mid')
|
||||
duration = None
|
||||
duration_d = lead_media.get('duration')
|
||||
if isinstance(duration_d, dict):
|
||||
duration = parse_duration(dict_get(
|
||||
duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'title': lead_media.get('title') or self._og_search_title(webpage),
|
||||
**traverse_obj(lead_media, {
|
||||
'description': ('summary', {str}),
|
||||
'duration': ('duration', ('rawDuration', 'formattedDuration', 'spokenDuration'), {parse_duration}),
|
||||
'uploader': ('masterBrand', {str}),
|
||||
'uploader_id': ('mid', {str}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
body = self._parse_json(traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'body')), playlist_id, fatal=False)
|
||||
for video_data in traverse_obj(body, (lambda _, v: v['videoData']['pid'], 'videoData')):
|
||||
if video_data.get('vpid'):
|
||||
video_id = video_data['vpid']
|
||||
formats, subtitles = self._download_media_selector(video_id)
|
||||
entry = {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
else:
|
||||
video_id = video_data['pid']
|
||||
entry = self.url_result(
|
||||
f'https://www.bbc.co.uk/programmes/{video_id}', BBCCoUkIE,
|
||||
video_id, url_transparent=True)
|
||||
entry.update({
|
||||
'timestamp': traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601}),
|
||||
),
|
||||
**traverse_obj(video_data, {
|
||||
'thumbnail': (('iChefImage', 'image'), {url_or_none}, any),
|
||||
'title': (('title', 'caption'), {str}, any),
|
||||
'duration': ('duration', {parse_duration}),
|
||||
}),
|
||||
})
|
||||
if video_data.get('isLead') and not self._yes_playlist(playlist_id, video_id):
|
||||
return entry
|
||||
entries.append(entry)
|
||||
if entries:
|
||||
playlist_title = traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'headline', {str})) or playlist_title
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
preload_state = self._parse_json(self._search_regex(
|
||||
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), playlist_id, fatal=False)
|
||||
if preload_state:
|
||||
current_programme = preload_state.get('programmes', {}).get('current') or {}
|
||||
programme_id = current_programme.get('id')
|
||||
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
synopses = current_programme.get('synopses') or {}
|
||||
network = current_programme.get('network') or {}
|
||||
duration = int_or_none(
|
||||
current_programme.get('duration', {}).get('value'))
|
||||
thumbnail = None
|
||||
image_url = current_programme.get('image_url')
|
||||
if image_url:
|
||||
thumbnail = image_url.replace('{recipe}', 'raw')
|
||||
# various PRELOADED_STATE JSON
|
||||
preload_state = self._search_json(
|
||||
r'window\.__(?:PWA_)?PRELOADED_STATE__\s*=', webpage,
|
||||
'preload state', playlist_id, transform_source=js_to_json, default={})
|
||||
# PRELOADED_STATE with current programmme
|
||||
current_programme = traverse_obj(preload_state, ('programmes', 'current', {dict}))
|
||||
programme_id = traverse_obj(current_programme, ('id', {str}))
|
||||
if programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = traverse_obj(current_programme, ('titles', ('tertiary', 'secondary'), {str}, any)) or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
**traverse_obj(current_programme, {
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'thumbnail': ('image_url', {lambda u: url_or_none(u.replace('{recipe}', 'raw'))}),
|
||||
'duration': ('duration', 'value', {int_or_none}),
|
||||
'uploader': ('network', 'short_title', {str}),
|
||||
'uploader_id': ('network', 'id', {str}),
|
||||
'timestamp': ((('availability', 'from'), ('release', 'date')), {parse_iso8601}, any),
|
||||
'series': ('titles', 'primary', {str}),
|
||||
}),
|
||||
'subtitles': subtitles,
|
||||
'chapters': traverse_obj(preload_state, (
|
||||
'tracklist', 'tracks', lambda _, v: float(v['offset']['start']), {
|
||||
'title': ('titles', {lambda x: join_nonempty(
|
||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||
'start_time': ('offset', 'start', {float_or_none}),
|
||||
'end_time': ('offset', 'end', {float_or_none}),
|
||||
}),
|
||||
),
|
||||
}
|
||||
|
||||
# PWA_PRELOADED_STATE with article video asset
|
||||
asset_id = traverse_obj(preload_state, (
|
||||
'entities', 'articles', lambda k, _: k.rsplit('/', 1)[-1] == playlist_id,
|
||||
'assetVideo', 0, {str}, any))
|
||||
if asset_id:
|
||||
video_id = traverse_obj(preload_state, ('entities', 'videos', asset_id, 'vpid', {str}))
|
||||
if video_id:
|
||||
article = traverse_obj(preload_state, (
|
||||
'entities', 'articles', lambda _, v: v['assetVideo'][0] == asset_id, any))
|
||||
|
||||
def image_url(image_id):
|
||||
return traverse_obj(preload_state, (
|
||||
'entities', 'images', image_id, 'url',
|
||||
{lambda u: url_or_none(u.replace('$recipe', 'raw'))}))
|
||||
|
||||
formats, subtitles = self._download_media_selector(video_id)
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': dict_get(synopses, ('long', 'medium', 'short')),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'uploader': network.get('short_title'),
|
||||
'uploader_id': network.get('id'),
|
||||
'id': video_id,
|
||||
**traverse_obj(preload_state, ('entities', 'videos', asset_id, {
|
||||
'title': ('title', {str}),
|
||||
'description': (('synopsisLong', 'synopsisMedium', 'synopsisShort'), {str}, any),
|
||||
'thumbnail': (0, {image_url}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
})),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': traverse_obj(preload_state, (
|
||||
'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
|
||||
'title': ('titles', {lambda x: join_nonempty(
|
||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||
'start_time': ('offset', 'start', {float_or_none}),
|
||||
'end_time': ('offset', 'end', {float_or_none}),
|
||||
})) or None,
|
||||
'timestamp': traverse_obj(article, ('displayDate', {parse_iso8601})),
|
||||
}
|
||||
else:
|
||||
return self.url_result(
|
||||
f'https://www.bbc.co.uk/programmes/{asset_id}', BBCCoUkIE,
|
||||
asset_id, playlist_title, display_id=playlist_id,
|
||||
description=playlist_description)
|
||||
|
||||
bbc3_config = self._parse_json(
|
||||
self._search_regex(
|
||||
@@ -1191,6 +1271,28 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def parse_model(model):
|
||||
"""Extract single video from model structure"""
|
||||
item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
|
||||
if not item_id:
|
||||
return
|
||||
formats, subtitles = self._download_media_selector(item_id)
|
||||
return {
|
||||
'id': item_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
|
||||
'duration': ('versions', 0, 'duration', {int}),
|
||||
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
|
||||
}),
|
||||
}
|
||||
|
||||
def is_type(*types):
|
||||
return lambda _, v: v['type'] in types
|
||||
|
||||
initial_data = self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
|
||||
'quoted preload state', default=None)
|
||||
@@ -1202,6 +1304,19 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
||||
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
||||
if initial_data:
|
||||
for video_data in traverse_obj(initial_data, (
|
||||
'stores', 'article', 'articleBodyContent', is_type('video'))):
|
||||
model = traverse_obj(video_data, (
|
||||
'model', 'blocks', is_type('aresMedia'),
|
||||
'model', 'blocks', is_type('aresMediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
entry = parse_model(model)
|
||||
if entry:
|
||||
entries.append(entry)
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def parse_media(media):
|
||||
if not media:
|
||||
return
|
||||
@@ -1216,7 +1331,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
if blocks:
|
||||
summary = []
|
||||
for block in blocks:
|
||||
text = try_get(block, lambda x: x['model']['text'], compat_str)
|
||||
text = try_get(block, lambda x: x['model']['text'], str)
|
||||
if text:
|
||||
summary.append(text)
|
||||
if summary:
|
||||
@@ -1234,37 +1349,100 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
'subtitles': subtitles,
|
||||
'timestamp': item_time,
|
||||
'description': strip_or_none(item_desc),
|
||||
'duration': int_or_none(item.get('duration')),
|
||||
})
|
||||
for resp in (initial_data.get('data') or {}).values():
|
||||
name = resp.get('name')
|
||||
|
||||
for resp in traverse_obj(initial_data, ('data', lambda _, v: v['name'])):
|
||||
name = resp['name']
|
||||
if name == 'media-experience':
|
||||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||
elif name == 'article':
|
||||
for block in (try_get(resp,
|
||||
(lambda x: x['data']['blocks'],
|
||||
lambda x: x['data']['content']['model']['blocks'],),
|
||||
list) or []):
|
||||
if block.get('type') not in ['media', 'video']:
|
||||
continue
|
||||
parse_media(block.get('model'))
|
||||
for block in traverse_obj(resp, (
|
||||
'data', (None, ('content', 'model')), 'blocks',
|
||||
is_type('media', 'video'), 'model', {dict})):
|
||||
parse_media(block)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# extract from SIMORGH_DATA hydration JSON
|
||||
simorgh_data = self._search_json(
|
||||
r'window\s*\.\s*SIMORGH_DATA\s*=', webpage,
|
||||
'simorgh data', playlist_id, default={})
|
||||
if simorgh_data:
|
||||
done = False
|
||||
for video_data in traverse_obj(simorgh_data, (
|
||||
'pageData', 'content', 'model', 'blocks', is_type('video', 'legacyMedia'))):
|
||||
model = traverse_obj(video_data, (
|
||||
'model', 'blocks', is_type('aresMedia'),
|
||||
'model', 'blocks', is_type('aresMediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
if video_data['type'] == 'video':
|
||||
entry = parse_model(model)
|
||||
else: # legacyMedia: no duration, subtitles
|
||||
block_id, entry = traverse_obj(model, ('blockId', {str})), None
|
||||
media_data = traverse_obj(simorgh_data, (
|
||||
'pageData', 'promo', 'media',
|
||||
{lambda x: x if x['id'] == block_id else None}))
|
||||
formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
|
||||
'url': ('url', {url_or_none}),
|
||||
'ext': ('format', {str}),
|
||||
'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
|
||||
}))
|
||||
if formats:
|
||||
entry = {
|
||||
'id': block_id,
|
||||
'display_id': playlist_id,
|
||||
'formats': formats,
|
||||
'description': traverse_obj(simorgh_data, ('pageData', 'promo', 'summary', {str})),
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
|
||||
}),
|
||||
}
|
||||
done = True
|
||||
if entry:
|
||||
entries.append(entry)
|
||||
if done:
|
||||
break
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def extract_all(pattern):
|
||||
return list(filter(None, map(
|
||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||
re.findall(pattern, webpage))))
|
||||
return list(filter(None, (
|
||||
self._parse_json(s, playlist_id, fatal=False)
|
||||
for s in re.findall(pattern, webpage))))
|
||||
|
||||
# US accessed article with single embedded video (e.g.
|
||||
# https://www.bbc.com/news/uk-68546268)
|
||||
next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}),
|
||||
('props', 'pageProps', 'page'))
|
||||
model = traverse_obj(next_data, (
|
||||
..., 'contents', is_type('video'),
|
||||
'model', 'blocks', is_type('media'),
|
||||
'model', 'blocks', is_type('mediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
if model and (entry := parse_model(model)):
|
||||
if not entry.get('timestamp'):
|
||||
entry['timestamp'] = traverse_obj(next_data, (
|
||||
..., 'contents', is_type('timestamp'), 'model',
|
||||
'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
|
||||
entries.append(entry)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# Multiple video article (e.g.
|
||||
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
|
||||
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
|
||||
EMBED_URL = rf'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+{self._ID_REGEX}(?:\b[^"]+)?'
|
||||
entries = []
|
||||
for match in extract_all(r'new\s+SMP\(({.+?})\)'):
|
||||
embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
|
||||
if embed_url and re.match(EMBED_URL, embed_url):
|
||||
entries.append(embed_url)
|
||||
entries.extend(re.findall(
|
||||
r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
|
||||
rf'setPlaylist\("({EMBED_URL})"\)', webpage))
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
[self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
|
||||
@@ -1314,11 +1492,11 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||
|
||||
video_id = media_meta.get('externalId')
|
||||
if not video_id:
|
||||
video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
|
||||
video_id = playlist_id if len(medias) == 1 else f'{playlist_id}-{num}'
|
||||
|
||||
title = media_meta.get('caption')
|
||||
if not title:
|
||||
title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
|
||||
title = playlist_title if len(medias) == 1 else f'{playlist_title} - Video {num}'
|
||||
|
||||
duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
|
||||
|
||||
@@ -1379,8 +1557,8 @@ class BBCCoUkArticleIE(InfoExtractor):
|
||||
|
||||
class BBCCoUkPlaylistBaseIE(InfoExtractor):
|
||||
def _entries(self, webpage, url, playlist_id):
|
||||
single_page = 'page' in compat_urlparse.parse_qs(
|
||||
compat_urlparse.urlparse(url).query)
|
||||
single_page = 'page' in urllib.parse.parse_qs(
|
||||
urllib.parse.urlparse(url).query)
|
||||
for page_num in itertools.count(2):
|
||||
for video_id in re.findall(
|
||||
self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
|
||||
@@ -1394,8 +1572,8 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor):
|
||||
if not next_page:
|
||||
break
|
||||
webpage = self._download_webpage(
|
||||
compat_urlparse.urljoin(url, next_page), playlist_id,
|
||||
'Downloading page %d' % page_num, page_num)
|
||||
urllib.parse.urljoin(url, next_page), playlist_id,
|
||||
f'Downloading page {page_num}', page_num)
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
@@ -1410,7 +1588,7 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
|
||||
_VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
||||
_VALID_URL_TMPL = rf'https?://(?:www\.)?bbc\.co\.uk/iplayer/%s/(?P<id>{BBCCoUkIE._ID_REGEX})'
|
||||
|
||||
@staticmethod
|
||||
def _get_default(episode, key, default_key='default'):
|
||||
@@ -1534,11 +1712,11 @@ class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
|
||||
variables['sliceId'] = series_id
|
||||
return self._download_json(
|
||||
'https://graph.ibl.api.bbc.co.uk/', pid, headers={
|
||||
'Content-Type': 'application/json'
|
||||
'Content-Type': 'application/json',
|
||||
}, data=json.dumps({
|
||||
'id': '5692d93d5aac8d796a0305e895e61551',
|
||||
'variables': variables,
|
||||
}).encode('utf-8'))['data']['programme']
|
||||
}).encode())['data']['programme']
|
||||
|
||||
@staticmethod
|
||||
def _get_playlist_data(data):
|
||||
@@ -1598,7 +1776,7 @@ class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
|
||||
|
||||
def _call_api(self, pid, per_page, page=1, series_id=None):
|
||||
return self._download_json(
|
||||
'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid,
|
||||
f'http://ibl.api.bbc.co.uk/ibl/v1/groups/{pid}/episodes',
|
||||
pid, query={
|
||||
'page': page,
|
||||
'per_page': per_page,
|
||||
@@ -1614,7 +1792,7 @@ class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
|
||||
|
||||
class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||
IE_NAME = 'bbc.co.uk:playlist'
|
||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
|
||||
_VALID_URL = rf'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>{BBCCoUkIE._ID_REGEX})/(?:episodes|broadcasts|clips)'
|
||||
_URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
|
||||
_VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
|
||||
_TESTS = [{
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
@@ -33,7 +32,7 @@ class BeatportIE(InfoExtractor):
|
||||
'display_id': 'birds-original-mix',
|
||||
'ext': 'mp4',
|
||||
'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)",
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -51,7 +50,7 @@ class BeatportIE(InfoExtractor):
|
||||
|
||||
track = next(t for t in playables['tracks'] if t['id'] == int(track_id))
|
||||
|
||||
title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name']
|
||||
title = ', '.join(a['name'] for a in track['artists']) + ' - ' + track['name']
|
||||
if track['mix']:
|
||||
title += ' (' + track['mix'] + ')'
|
||||
|
||||
@@ -89,7 +88,7 @@ class BeatportIE(InfoExtractor):
|
||||
images.append(image)
|
||||
|
||||
return {
|
||||
'id': compat_str(track.get('id')) or track_id,
|
||||
'id': str(track.get('id')) or track_id,
|
||||
'display_id': track.get('slug') or display_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
@@ -24,7 +23,7 @@ class BeegIE(InfoExtractor):
|
||||
'upload_date': '20220131',
|
||||
'timestamp': 1643656455,
|
||||
'display_id': '2540839',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beeg.com/-0599050563103750?t=4-861',
|
||||
'md5': 'bd8b5ea75134f7f07fad63008db2060e',
|
||||
@@ -39,7 +38,7 @@ class BeegIE(InfoExtractor):
|
||||
'timestamp': 1643623200,
|
||||
'display_id': '2569965',
|
||||
'upload_date': '20220131',
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# api/v6 v2
|
||||
'url': 'https://beeg.com/1941093077?t=911-1391',
|
||||
@@ -56,8 +55,8 @@ class BeegIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video = self._download_json(
|
||||
'https://store.externulls.com/facts/file/%s' % video_id,
|
||||
video_id, 'Downloading JSON for %s' % video_id)
|
||||
f'https://store.externulls.com/facts/file/{video_id}',
|
||||
video_id, f'Downloading JSON for {video_id}')
|
||||
|
||||
fc_facts = video.get('fc_facts')
|
||||
first_fact = {}
|
||||
|
||||
@@ -16,7 +16,7 @@ class BehindKinkIE(InfoExtractor):
|
||||
'upload_date': '20141205',
|
||||
'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg',
|
||||
'age_limit': 18,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -86,6 +86,6 @@ class BellMediaIE(InfoExtractor):
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id),
|
||||
'url': f'9c9media:{self._DOMAINS.get(domain, domain)}_web:{video_id}',
|
||||
'ie_key': 'NineCNineMedia',
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ class BerufeTVIE(InfoExtractor):
|
||||
'tags': ['Studienfilm'],
|
||||
'duration': 602.440,
|
||||
'thumbnail': r're:^https://asset-out-cdn\.video-cdn\.net/private/videos/DvKC3DUpMKvUZ_6fEnfg3u/thumbnails/793063\?quality=thumbnail&__token__=[^\s]+$',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -54,7 +54,7 @@ class BerufeTVIE(InfoExtractor):
|
||||
subtitles.setdefault(track['language'], []).append({
|
||||
'url': track['source'],
|
||||
'name': track.get('label'),
|
||||
'ext': 'vtt'
|
||||
'ext': 'vtt',
|
||||
})
|
||||
|
||||
return {
|
||||
|
||||
@@ -19,7 +19,7 @@ class BetIE(MTVServicesInfoExtractor):
|
||||
'thumbnail': r're:(?i)^https?://.*\.jpg$',
|
||||
'subtitles': {
|
||||
'en': 'mincount:2',
|
||||
}
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
@@ -39,16 +39,16 @@ class BetIE(MTVServicesInfoExtractor):
|
||||
'thumbnail': r're:(?i)^https?://.*\.jpg$',
|
||||
'subtitles': {
|
||||
'en': 'mincount:2',
|
||||
}
|
||||
},
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
_FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
|
||||
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/bet-mrss-player'
|
||||
|
||||
def _get_feed_query(self, uri):
|
||||
return {
|
||||
|
||||
@@ -98,8 +98,8 @@ class BFMTVArticleIE(BFMTVBaseIE):
|
||||
'timestamp': 1673341692,
|
||||
'duration': 109.269,
|
||||
'tags': ['rmc', 'show', 'apolline de malherbe', 'info', 'talk', 'matinale', 'radio'],
|
||||
'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg'
|
||||
}
|
||||
'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
import base64
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_urllib_parse_unquote,
|
||||
)
|
||||
|
||||
|
||||
class BigflixIE(InfoExtractor):
|
||||
@@ -21,7 +19,7 @@ class BigflixIE(InfoExtractor):
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# multiple formats
|
||||
'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967',
|
||||
@@ -38,7 +36,7 @@ class BigflixIE(InfoExtractor):
|
||||
webpage, 'title')
|
||||
|
||||
def decode_url(quoted_b64_url):
|
||||
return compat_b64decode(compat_urllib_parse_unquote(
|
||||
return base64.b64decode(urllib.parse.unquote(
|
||||
quoted_b64_url)).decode('utf-8')
|
||||
|
||||
formats = []
|
||||
@@ -47,7 +45,7 @@ class BigflixIE(InfoExtractor):
|
||||
video_url = decode_url(encoded_url)
|
||||
f = {
|
||||
'url': video_url,
|
||||
'format_id': '%sp' % height,
|
||||
'format_id': f'{height}p',
|
||||
'height': int(height),
|
||||
}
|
||||
if video_url.startswith('rtmp'):
|
||||
@@ -69,5 +67,5 @@ class BigflixIE(InfoExtractor):
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'formats': formats
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ class BigoIE(InfoExtractor):
|
||||
raise ExtractorError('Received invalid JSON data')
|
||||
if info_raw.get('code'):
|
||||
raise ExtractorError(
|
||||
'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True)
|
||||
'Bigo says: {} (code {})'.format(info_raw.get('msg'), info_raw.get('code')), expected=True)
|
||||
info = info_raw.get('data') or {}
|
||||
|
||||
if not info.get('alive'):
|
||||
|
||||
@@ -20,7 +20,7 @@ class BildIE(InfoExtractor):
|
||||
'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 196,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'note': 'static MP4 and HLS',
|
||||
'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html',
|
||||
@@ -32,7 +32,7 @@ class BildIE(InfoExtractor):
|
||||
'description': 'md5:709b543c24dc31bbbffee73bccda34ad',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 69,
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
||||
@@ -93,11 +93,11 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
|
||||
return formats
|
||||
|
||||
def _download_playinfo(self, video_id, cid):
|
||||
def _download_playinfo(self, video_id, cid, headers=None):
|
||||
return self._download_json(
|
||||
'https://api.bilibili.com/x/player/playurl', video_id,
|
||||
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
|
||||
note=f'Downloading video formats for cid {cid}')['data']
|
||||
note=f'Downloading video formats for cid {cid}', headers=headers)['data']
|
||||
|
||||
def json2srt(self, json_data):
|
||||
srt_data = ''
|
||||
@@ -112,7 +112,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
'danmaku': [{
|
||||
'ext': 'xml',
|
||||
'url': f'https://comment.bilibili.com/{cid}.xml',
|
||||
}]
|
||||
}],
|
||||
}
|
||||
|
||||
subtitle_info = traverse_obj(self._download_json(
|
||||
@@ -126,7 +126,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
for s in subs_list:
|
||||
subtitles.setdefault(s['lan'], []).append({
|
||||
'ext': 'srt',
|
||||
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
|
||||
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
|
||||
})
|
||||
return subtitles
|
||||
|
||||
@@ -215,7 +215,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||
yield {
|
||||
**metainfo,
|
||||
'id': f'{video_id}_{cid}',
|
||||
'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
|
||||
'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}',
|
||||
'formats': self.extract_formats(play_info),
|
||||
'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
@@ -269,7 +269,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'url': 'https://www.bilibili.com/video/BV1bK411W797',
|
||||
'info_dict': {
|
||||
'id': 'BV1bK411W797',
|
||||
'title': '物语中的人物是如何吐槽自己的OP的'
|
||||
'title': '物语中的人物是如何吐槽自己的OP的',
|
||||
},
|
||||
'playlist_count': 18,
|
||||
'playlist': [{
|
||||
@@ -288,8 +288,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
||||
'duration': 90.314,
|
||||
}
|
||||
}]
|
||||
},
|
||||
}],
|
||||
}, {
|
||||
'note': 'Specific page of Anthology',
|
||||
'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
|
||||
@@ -308,7 +308,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
|
||||
'duration': 90.314,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'note': 'video has subtitles',
|
||||
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
|
||||
@@ -327,7 +327,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'subtitles': 'count:2'
|
||||
'subtitles': 'count:2',
|
||||
},
|
||||
'params': {'listsubtitles': True},
|
||||
}, {
|
||||
@@ -493,7 +493,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
headers = self.geo_verification_headers()
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
|
||||
if not self._match_valid_url(urlh.url):
|
||||
return self.url_result(urlh.url)
|
||||
|
||||
@@ -531,7 +532,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
self._download_json(
|
||||
'https://api.bilibili.com/x/player/pagelist', video_id,
|
||||
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
|
||||
note='Extracting videos in anthology'),
|
||||
note='Extracting videos in anthology', headers=headers),
|
||||
'data', expected_type=list) or []
|
||||
is_anthology = len(page_list_json) > 1
|
||||
|
||||
@@ -552,7 +553,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
|
||||
festival_info = {}
|
||||
if is_festival:
|
||||
play_info = self._download_playinfo(video_id, cid)
|
||||
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||
|
||||
festival_info = traverse_obj(initial_state, {
|
||||
'uploader': ('videoInfo', 'upName'),
|
||||
@@ -585,10 +586,9 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
|
||||
if is_interactive:
|
||||
return self.playlist_result(
|
||||
self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{
|
||||
'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
})
|
||||
self._get_interactive_entries(video_id, cid, metainfo), **metainfo,
|
||||
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
|
||||
__post_extractor=self.extract_comments(aid))
|
||||
else:
|
||||
return {
|
||||
**metainfo,
|
||||
@@ -639,7 +639,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
'duration': 1425.256,
|
||||
'timestamp': 1554566400,
|
||||
'upload_date': '20190406',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
'skip': 'Geo-restricted',
|
||||
}, {
|
||||
@@ -660,20 +660,21 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
'duration': 1922.129,
|
||||
'timestamp': 1602853860,
|
||||
'upload_date': '20201016',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, episode_id)
|
||||
headers = self.geo_verification_headers()
|
||||
webpage = self._download_webpage(url, episode_id, headers=headers)
|
||||
|
||||
if '您所在的地区无法观看本片' in webpage:
|
||||
raise GeoRestrictedError('This video is restricted')
|
||||
elif '正在观看预览,大会员免费看全片' in webpage:
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
headers = {'Referer': url, **self.geo_verification_headers()}
|
||||
headers['Referer'] = url
|
||||
play_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
|
||||
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
|
||||
@@ -724,7 +725,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
'http_headers': headers,
|
||||
'http_headers': {'Referer': url},
|
||||
}
|
||||
|
||||
|
||||
@@ -762,7 +763,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE):
|
||||
'duration': 1525.777,
|
||||
'timestamp': 1425074413,
|
||||
'upload_date': '20150227',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}],
|
||||
}]
|
||||
@@ -792,7 +793,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
'title': '鬼灭之刃',
|
||||
'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
|
||||
},
|
||||
'playlist_mincount': 26
|
||||
'playlist_mincount': 26,
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ss2251',
|
||||
'info_dict': {
|
||||
@@ -817,7 +818,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
|
||||
'duration': 1436.992,
|
||||
'timestamp': 1343185080,
|
||||
'upload_date': '20120725',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}],
|
||||
}]
|
||||
@@ -904,7 +905,7 @@ class BilibiliCheeseIE(BilibiliCheeseBaseIE):
|
||||
'upload_date': '20230924',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -937,7 +938,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
|
||||
'upload_date': '20230924',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
'view_count': int,
|
||||
}
|
||||
},
|
||||
}],
|
||||
'params': {'playlist_items': '1'},
|
||||
}, {
|
||||
@@ -1010,7 +1011,7 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
for position in (
|
||||
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
|
||||
12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
|
||||
57, 62, 11, 36, 20, 34, 44, 52
|
||||
57, 62, 11, 36, 20, 34, 44, 52,
|
||||
):
|
||||
char_at_position = try_call(lambda: session_key[position])
|
||||
if char_at_position:
|
||||
@@ -1043,15 +1044,17 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||
|
||||
try:
|
||||
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
||||
playlist_id, note=f'Downloading page {page_idx}', query=query)
|
||||
playlist_id, note=f'Downloading page {page_idx}', query=query,
|
||||
headers={'referer': url})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||
raise
|
||||
if response['code'] == -401:
|
||||
if response['code'] in (-352, -401):
|
||||
raise ExtractorError(
|
||||
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
|
||||
f'Request is blocked by server ({-response["code"]}), '
|
||||
'please add cookies, wait and try later.', expected=True)
|
||||
return response['data']
|
||||
|
||||
def get_metadata(page_data):
|
||||
@@ -1159,7 +1162,7 @@ class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
|
||||
'uploader_id': ('meta', 'mid', {str_or_none}),
|
||||
'timestamp': ('meta', 'ptime', {int_or_none}),
|
||||
'thumbnail': ('meta', 'cover', {url_or_none}),
|
||||
})
|
||||
}),
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
@@ -1191,7 +1194,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
|
||||
mid, sid = self._match_valid_url(url).group('mid', 'sid')
|
||||
playlist_id = f'{mid}_{sid}'
|
||||
playlist_meta = traverse_obj(self._download_json(
|
||||
f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
|
||||
f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False,
|
||||
), {
|
||||
'title': ('data', 'meta', 'name', {str}),
|
||||
'description': ('data', 'meta', 'description', {str}),
|
||||
@@ -1213,7 +1216,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
|
||||
'page_count': math.ceil(entry_count / page_size),
|
||||
'page_size': page_size,
|
||||
'uploader': self._get_uploader(mid, playlist_id),
|
||||
**playlist_meta
|
||||
**playlist_meta,
|
||||
}
|
||||
|
||||
def get_entries(page_data):
|
||||
@@ -1237,7 +1240,7 @@ class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
|
||||
'upload_date': '20201109',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
|
||||
'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
},
|
||||
@@ -1341,7 +1344,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
'uploader_id': '84912',
|
||||
'timestamp': 1604905176,
|
||||
'upload_date': '20201109',
|
||||
'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
|
||||
'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
|
||||
},
|
||||
'playlist_mincount': 22,
|
||||
}, {
|
||||
@@ -1367,7 +1370,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
for page_num in itertools.count(1):
|
||||
page_data = self._download_json(
|
||||
'https://api.bilibili.com/x/v2/medialist/resource/list',
|
||||
list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
|
||||
list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}',
|
||||
)['data']
|
||||
yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
|
||||
query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
|
||||
@@ -1403,7 +1406,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
|
||||
'tid': ('tid', {int_or_none}),
|
||||
'sort_field': ('sortFiled', {int_or_none}),
|
||||
'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
|
||||
})
|
||||
}),
|
||||
}
|
||||
metadata = {
|
||||
'id': f'{query["type"]}_{query["biz_id"]}',
|
||||
@@ -1426,26 +1429,26 @@ class BilibiliCategoryIE(InfoExtractor):
|
||||
'url': 'https://www.bilibili.com/v/kichiku/mad',
|
||||
'info_dict': {
|
||||
'id': 'kichiku: mad',
|
||||
'title': 'kichiku: mad'
|
||||
'title': 'kichiku: mad',
|
||||
},
|
||||
'playlist_mincount': 45,
|
||||
'params': {
|
||||
'playlistend': 45
|
||||
}
|
||||
'playlistend': 45,
|
||||
},
|
||||
}]
|
||||
|
||||
def _fetch_page(self, api_url, num_pages, query, page_num):
|
||||
parsed_json = self._download_json(
|
||||
api_url, query, query={'Search_key': query, 'pn': page_num},
|
||||
note='Extracting results from page %s of %s' % (page_num, num_pages))
|
||||
note=f'Extracting results from page {page_num} of {num_pages}')
|
||||
|
||||
video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
|
||||
if not video_list:
|
||||
raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
|
||||
raise ExtractorError(f'Failed to retrieve video list for page {page_num}')
|
||||
|
||||
for video in video_list:
|
||||
yield self.url_result(
|
||||
'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
|
||||
'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid'])
|
||||
|
||||
def _entries(self, category, subcategory, query):
|
||||
# map of categories : subcategories : RIDs
|
||||
@@ -1455,7 +1458,7 @@ class BilibiliCategoryIE(InfoExtractor):
|
||||
'manual_vocaloid': 126,
|
||||
'guide': 22,
|
||||
'theatre': 216,
|
||||
'course': 127
|
||||
'course': 127,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1481,7 +1484,7 @@ class BilibiliCategoryIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
|
||||
query = '%s: %s' % (category, subcategory)
|
||||
query = f'{category}: {subcategory}'
|
||||
|
||||
return self.playlist_result(self._entries(category, subcategory, query), query, query)
|
||||
|
||||
@@ -1584,7 +1587,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
|
||||
formats = [{
|
||||
'url': play_data['cdns'][0],
|
||||
'filesize': int_or_none(play_data.get('size')),
|
||||
'vcodec': 'none'
|
||||
'vcodec': 'none',
|
||||
}]
|
||||
|
||||
for a_format in formats:
|
||||
@@ -1602,7 +1605,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
|
||||
subtitles = {
|
||||
'origin': [{
|
||||
'url': lyric,
|
||||
}]
|
||||
}],
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -1670,7 +1673,7 @@ class BiliBiliPlayerIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self.url_result(
|
||||
'http://www.bilibili.tv/video/av%s/' % video_id,
|
||||
f'http://www.bilibili.tv/video/av{video_id}/',
|
||||
ie=BiliBiliIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
||||
@@ -1698,11 +1701,10 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||
return json.get('data')
|
||||
|
||||
def json2srt(self, json):
|
||||
data = '\n\n'.join(
|
||||
return '\n\n'.join(
|
||||
f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
|
||||
for i, line in enumerate(traverse_obj(json, (
|
||||
'body', lambda _, l: l['content'] and l['from'] and l['to']))))
|
||||
return data
|
||||
|
||||
def _get_subtitles(self, *, ep_id=None, aid=None):
|
||||
sub_json = self._call_api(
|
||||
@@ -1804,14 +1806,14 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||
note='Downloading login key', errnote='Unable to download login key')['data']
|
||||
|
||||
public_key = Cryptodome.RSA.importKey(key_data['key'])
|
||||
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
|
||||
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
|
||||
login_post = self._download_json(
|
||||
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
|
||||
'username': username,
|
||||
'password': base64.b64encode(password_hash).decode('ascii'),
|
||||
'keep_me': 'true',
|
||||
's_locale': 'en_US',
|
||||
'isTrusted': 'true'
|
||||
'isTrusted': 'true',
|
||||
}), note='Logging in', errnote='Unable to log in')
|
||||
if login_post.get('code'):
|
||||
if login_post.get('message'):
|
||||
@@ -1838,17 +1840,17 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'chapters': [{
|
||||
'start_time': 0,
|
||||
'end_time': 76.242,
|
||||
'title': '<Untitled Chapter 1>'
|
||||
'title': '<Untitled Chapter 1>',
|
||||
}, {
|
||||
'start_time': 76.242,
|
||||
'end_time': 161.161,
|
||||
'title': 'Intro'
|
||||
'title': 'Intro',
|
||||
}, {
|
||||
'start_time': 1325.742,
|
||||
'end_time': 1403.903,
|
||||
'title': 'Outro'
|
||||
'title': 'Outro',
|
||||
}],
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Non-Bstation page
|
||||
'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
|
||||
@@ -1865,17 +1867,17 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'chapters': [{
|
||||
'start_time': 0,
|
||||
'end_time': 88.0,
|
||||
'title': '<Untitled Chapter 1>'
|
||||
'title': '<Untitled Chapter 1>',
|
||||
}, {
|
||||
'start_time': 88.0,
|
||||
'end_time': 156.0,
|
||||
'title': 'Intro'
|
||||
'title': 'Intro',
|
||||
}, {
|
||||
'start_time': 1173.0,
|
||||
'end_time': 1259.535,
|
||||
'title': 'Outro'
|
||||
'title': 'Outro',
|
||||
}],
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# Subtitle with empty content
|
||||
'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
|
||||
@@ -1886,7 +1888,7 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
|
||||
'episode_number': 140,
|
||||
},
|
||||
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
|
||||
'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.',
|
||||
}, {
|
||||
# episode comment extraction
|
||||
'url': 'https://www.bilibili.tv/en/play/34580/340317',
|
||||
@@ -1904,20 +1906,20 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'chapters': [{
|
||||
'start_time': 0,
|
||||
'end_time': 61.0,
|
||||
'title': '<Untitled Chapter 1>'
|
||||
'title': '<Untitled Chapter 1>',
|
||||
}, {
|
||||
'start_time': 61.0,
|
||||
'end_time': 134.0,
|
||||
'title': 'Intro'
|
||||
'title': 'Intro',
|
||||
}, {
|
||||
'start_time': 1290.0,
|
||||
'end_time': 1379.0,
|
||||
'title': 'Outro'
|
||||
'title': 'Outro',
|
||||
}],
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True
|
||||
}
|
||||
'getcomments': True,
|
||||
},
|
||||
}, {
|
||||
# user generated content comment extraction
|
||||
'url': 'https://www.bilibili.tv/en/video/2045730385',
|
||||
@@ -1932,8 +1934,8 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
|
||||
},
|
||||
'params': {
|
||||
'getcomments': True
|
||||
}
|
||||
'getcomments': True,
|
||||
},
|
||||
}, {
|
||||
# episode id without intro and outro
|
||||
'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
|
||||
@@ -1988,7 +1990,7 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
# Non-Bstation layout, read through episode list
|
||||
season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
|
||||
video_data = traverse_obj(season_json, (
|
||||
'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
|
||||
'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id,
|
||||
), expected_type=dict, get_all=False)
|
||||
|
||||
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
|
||||
@@ -2020,7 +2022,7 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
'id': replies.get('rpid'),
|
||||
'like_count': int_or_none(replies.get('like_count')),
|
||||
'parent': replies.get('parent'),
|
||||
'timestamp': unified_timestamp(replies.get('ctime_text'))
|
||||
'timestamp': unified_timestamp(replies.get('ctime_text')),
|
||||
}
|
||||
|
||||
if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
|
||||
@@ -2073,11 +2075,11 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||
chapters = [{
|
||||
'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
|
||||
'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
|
||||
'title': 'Intro'
|
||||
'title': 'Intro',
|
||||
}, {
|
||||
'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
|
||||
'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
|
||||
'title': 'Outro'
|
||||
'title': 'Outro',
|
||||
}]
|
||||
|
||||
return {
|
||||
@@ -2133,7 +2135,7 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
|
||||
episode_id = str(episode['episode_id'])
|
||||
yield self.url_result(smuggle_url(
|
||||
BiliIntlIE._make_url(episode_id, series_id),
|
||||
self._parse_video_metadata(episode)
|
||||
self._parse_video_metadata(episode),
|
||||
), BiliIntlIE, episode_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
@@ -2152,19 +2154,19 @@ class BiliLiveIE(InfoExtractor):
|
||||
'url': 'https://live.bilibili.com/196',
|
||||
'info_dict': {
|
||||
'id': '33989',
|
||||
'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
|
||||
'description': '周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)',
|
||||
'ext': 'flv',
|
||||
'title': "太空狼人杀联动,不被爆杀就算赢",
|
||||
'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
|
||||
'title': '太空狼人杀联动,不被爆杀就算赢',
|
||||
'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg',
|
||||
'timestamp': 1650802769,
|
||||
},
|
||||
'skip': 'not live'
|
||||
'skip': 'not live',
|
||||
}, {
|
||||
'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://live.bilibili.com/blanc/196',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_FORMATS = {
|
||||
@@ -2205,7 +2207,7 @@ class BiliLiveIE(InfoExtractor):
|
||||
raise ExtractorError('Streamer is not live', expected=True)
|
||||
|
||||
formats = []
|
||||
for qn in self._FORMATS.keys():
|
||||
for qn in self._FORMATS:
|
||||
stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
|
||||
'room_id': room_id,
|
||||
'qn': qn,
|
||||
|
||||
@@ -39,7 +39,7 @@ class BitChuteIE(InfoExtractor):
|
||||
'upload_date': '20170103',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||
'channel': 'BitChute',
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
},
|
||||
}, {
|
||||
# test case: video with different channel and uploader
|
||||
@@ -55,7 +55,7 @@ class BitChuteIE(InfoExtractor):
|
||||
'upload_date': '20231106',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
|
||||
'channel': 'Full Measure with Sharyl Attkisson',
|
||||
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/'
|
||||
'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/',
|
||||
},
|
||||
}, {
|
||||
# video not downloadable in browser, but we can recover it
|
||||
@@ -72,7 +72,7 @@ class BitChuteIE(InfoExtractor):
|
||||
'upload_date': '20181113',
|
||||
'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
|
||||
'channel': 'BitChute',
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/'
|
||||
'channel_url': 'https://www.bitchute.com/channel/bitchute/',
|
||||
},
|
||||
'params': {'check_formats': None},
|
||||
}, {
|
||||
@@ -115,7 +115,7 @@ class BitChuteIE(InfoExtractor):
|
||||
continue
|
||||
return {
|
||||
'url': url,
|
||||
'filesize': int_or_none(response.headers.get('Content-Length'))
|
||||
'filesize': int_or_none(response.headers.get('Content-Length')),
|
||||
}
|
||||
|
||||
def _raise_if_restricted(self, webpage):
|
||||
@@ -196,7 +196,7 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'duration': 16,
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
},
|
||||
],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
@@ -209,7 +209,7 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'id': 'wV9Imujxasw9',
|
||||
'title': 'Bruce MacDonald and "The Light of Darkness"',
|
||||
'description': 'md5:747724ef404eebdfc04277714f81863e',
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
|
||||
@@ -224,7 +224,7 @@ class BitChuteChannelIE(InfoExtractor):
|
||||
'container': 'playlist-video',
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
}
|
||||
},
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ class BlackboardCollaborateIE(InfoExtractor):
|
||||
region = mobj.group('region')
|
||||
video_id = mobj.group('id')
|
||||
info = self._download_json(
|
||||
'https://{}.bbcollab.com/collab/api/csa/recordings/{}/data'.format(region, video_id), video_id)
|
||||
f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/data', video_id)
|
||||
duration = info.get('duration')
|
||||
title = info['name']
|
||||
upload_date = info.get('created')
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from .common import InfoExtractor
|
||||
from .amp import AMPIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@@ -44,7 +44,7 @@ class BleacherReportIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
article_id = self._match_id(url)
|
||||
|
||||
article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article']
|
||||
article_data = self._download_json(f'http://api.bleacherreport.com/api/v1/articles/{article_id}', article_id)['article']
|
||||
|
||||
thumbnails = []
|
||||
primary_photo = article_data.get('primaryPhoto')
|
||||
@@ -71,11 +71,11 @@ class BleacherReportIE(InfoExtractor):
|
||||
if video:
|
||||
video_type = video['type']
|
||||
if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'):
|
||||
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
|
||||
info['url'] = 'http://bleacherreport.com/video_embed?id={}'.format(video['id'])
|
||||
elif video_type == 'youtube.com':
|
||||
info['url'] = video['id']
|
||||
elif video_type == 'vine.co':
|
||||
info['url'] = 'https://vine.co/v/%s' % video['id']
|
||||
info['url'] = 'https://vine.co/v/{}'.format(video['id'])
|
||||
else:
|
||||
info['url'] = video_type + video['id']
|
||||
return info
|
||||
@@ -99,12 +99,12 @@ class BleacherReportCMSIE(AMPIE):
|
||||
|
||||
},
|
||||
'expected_warnings': [
|
||||
'Unable to download f4m manifest'
|
||||
]
|
||||
'Unable to download f4m manifest',
|
||||
],
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id)
|
||||
info = self._extract_feed_info(f'http://vid.bleacherreport.com/videos/{video_id}.akamai')
|
||||
info['id'] = video_id
|
||||
return info
|
||||
|
||||
@@ -16,7 +16,7 @@ class BlerpIE(InfoExtractor):
|
||||
'uploader_id': '5fb81e51aa66ae000c395478',
|
||||
'ext': 'mp3',
|
||||
'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'],
|
||||
}
|
||||
},
|
||||
}, {
|
||||
'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f',
|
||||
'info_dict': {
|
||||
@@ -25,11 +25,11 @@ class BlerpIE(InfoExtractor):
|
||||
'uploader': '179617322678353920',
|
||||
'uploader_id': '5ba99cf71386730004552c42',
|
||||
'ext': 'mp3',
|
||||
'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee']
|
||||
}
|
||||
'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'],
|
||||
},
|
||||
}]
|
||||
|
||||
_GRAPHQL_OPERATIONNAME = "webBitePageGetBite"
|
||||
_GRAPHQL_OPERATIONNAME = 'webBitePageGetBite'
|
||||
_GRAPHQL_QUERY = (
|
||||
'''query webBitePageGetBite($_id: MongoID!) {
|
||||
web {
|
||||
@@ -141,27 +141,26 @@ class BlerpIE(InfoExtractor):
|
||||
'operationName': self._GRAPHQL_OPERATIONNAME,
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
'variables': {
|
||||
'_id': audio_id
|
||||
}
|
||||
'_id': audio_id,
|
||||
},
|
||||
}
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
|
||||
json_result = self._download_json('https://api.blerp.com/graphql',
|
||||
audio_id, data=json.dumps(data).encode('utf-8'), headers=headers)
|
||||
json_result = self._download_json(
|
||||
'https://api.blerp.com/graphql', audio_id,
|
||||
data=json.dumps(data).encode(), headers=headers)
|
||||
|
||||
bite_json = json_result['data']['web']['biteById']
|
||||
|
||||
info_dict = {
|
||||
return {
|
||||
'id': bite_json['_id'],
|
||||
'url': bite_json['audio']['mp3']['url'],
|
||||
'title': bite_json['title'],
|
||||
'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none),
|
||||
'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none),
|
||||
'ext': 'mp3',
|
||||
'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None)
|
||||
'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None),
|
||||
}
|
||||
|
||||
return info_dict
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
mimetype2ext,
|
||||
parse_duration,
|
||||
@@ -5,7 +6,6 @@ from ..utils import (
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
)
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class BloggerIE(InfoExtractor):
|
||||
@@ -21,14 +21,14 @@ class BloggerIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 76.068,
|
||||
}
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
token_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, token_id)
|
||||
data_json = self._search_regex(r'var\s+VIDEO_CONFIG\s*=\s*(\{.*)', webpage, 'JSON data')
|
||||
data = self._parse_json(data_json.encode('utf-8').decode('unicode_escape'), token_id)
|
||||
data = self._parse_json(data_json.encode().decode('unicode_escape'), token_id)
|
||||
streams = data['streams']
|
||||
formats = [{
|
||||
'ext': mimetype2ext(traverse_obj(parse_qs(stream['play_url']), ('mime', 0))),
|
||||
|
||||
@@ -55,7 +55,7 @@ class BloombergIE(InfoExtractor):
|
||||
title = re.sub(': Video$', '', self._og_search_title(webpage))
|
||||
|
||||
embed_info = self._download_json(
|
||||
'http://www.bloomberg.com/multimedia/api/embed?id=%s' % video_id, video_id)
|
||||
f'http://www.bloomberg.com/multimedia/api/embed?id={video_id}', video_id)
|
||||
formats = []
|
||||
for stream in embed_info['streams']:
|
||||
stream_url = stream.get('url')
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user