1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-12-20 23:18:57 +00:00

Merge remote-tracking branch 'origin' into yt-live-from-start-range

This commit is contained in:
Elyse
2023-06-03 14:39:32 -06:00
98 changed files with 7110 additions and 3283 deletions

View File

@@ -13,6 +13,7 @@ import os
import random
import re
import shutil
import string
import subprocess
import sys
import tempfile
@@ -20,10 +21,9 @@ import time
import tokenize
import traceback
import unicodedata
import urllib.request
from string import Formatter, ascii_letters
from .cache import Cache
from .compat import urllib # isort: split
from .compat import compat_os_name, compat_shlex_quote
from .cookies import load_cookies
from .downloader import (
@@ -129,7 +129,6 @@ from .utils import (
parse_filesize,
preferredencoding,
prepend_extension,
register_socks_protocols,
remove_terminal_sequences,
render_table,
replace_extension,
@@ -195,6 +194,7 @@ class YoutubeDL:
ap_username: Multiple-system operator account username.
ap_password: Multiple-system operator account password.
usenetrc: Use netrc for authentication instead.
netrc_location: Location of the netrc file. Defaults to ~/.netrc.
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
no_warnings: Do not print out anything for warnings.
@@ -285,7 +285,7 @@ class YoutubeDL:
subtitles. The language can be prefixed with a "-" to
exclude it from the requested languages, e.g. ['all', '-live_chat']
keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range.
daterange: A utils.DateRange object, download only if the upload_date is in the range.
skip_download: Skip the actual download of the video file
cachedir: Location of the cache files in the filesystem.
False to disable filesystem cache.
@@ -334,13 +334,13 @@ class YoutubeDL:
'auto' for elaborate guessing
encoding: Use this encoding instead of the system-specified.
extract_flat: Whether to resolve and process url_results further
* False: Always process (default)
* False: Always process. Default for API
* True: Never process
* 'in_playlist': Do not process inside playlist/multi_video
* 'discard': Always process, but don't return the result
from inside playlist/multi_video
* 'discard_in_playlist': Same as "discard", but only for
playlists (not multi_video)
playlists (not multi_video). Default for CLI
wait_for_video: If given, wait for scheduled streams to become available.
The value should be a tuple containing the range
(min_secs, max_secs) to wait between retries
@@ -420,7 +420,12 @@ class YoutubeDL:
- Raise utils.DownloadCancelled(msg) to abort remaining
downloads when a video is rejected.
match_filter_func in utils.py is one example for this.
no_color: Do not emit color codes in output.
color: A Dictionary with output stream names as keys
and their respective color policy as values.
Can also just be a single color policy,
in which case it applies to all outputs.
Valid stream names are 'stdout' and 'stderr'.
Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
HTTP header
geo_bypass_country:
@@ -477,7 +482,7 @@ class YoutubeDL:
can also be used
The following options are used by the extractors:
extractor_retries: Number of times to retry for known errors
extractor_retries: Number of times to retry for known errors (default: 3)
dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
hls_split_discontinuity: Split HLS playlists to different formats at
discontinuities such as ad breaks (default: False)
@@ -542,6 +547,7 @@ class YoutubeDL:
data will be downloaded and processed by extractor.
You can reduce network I/O by disabling it if you don't
care about HLS. (only for youtube)
no_color: Same as `color='no_color'`
"""
_NUMERIC_FIELDS = {
@@ -608,9 +614,24 @@ class YoutubeDL:
except Exception as e:
self.write_debug(f'Failed to enable VT mode: {e}')
if self.params.get('no_color'):
if self.params.get('color') is not None:
self.report_warning('Overwriting params from "color" with "no_color"')
self.params['color'] = 'no_color'
term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
def process_color_policy(stream):
stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
if policy in ('auto', None):
return term_allow_color and supports_terminal_sequences(stream)
assert policy in ('always', 'never', 'no_color')
return {'always': True, 'never': False}.get(policy, policy)
self._allow_colors = Namespace(**{
type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
for type_, stream in self._out_files.items_ if type_ != 'console'
name: process_color_policy(stream)
for name, stream in self._out_files.items_ if name != 'console'
})
# The code is left like this to be reused for future deprecations
@@ -743,7 +764,6 @@ class YoutubeDL:
when=when)
self._setup_opener()
register_socks_protocols()
def preload_download_archive(fn):
"""Preload the archive, if any is specified"""
@@ -980,7 +1000,7 @@ class YoutubeDL:
text = text.encode(encoding, 'ignore').decode(encoding)
if fallback is not None and text != original_text:
text = fallback
return format_text(text, f) if allow_colors else text if fallback is None else fallback
return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
def _format_out(self, *args, **kwargs):
return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
@@ -1083,7 +1103,7 @@ class YoutubeDL:
# correspondingly that is not what we want since we need to keep
# '%%' intact for template dict substitution step. Working around
# with boundary-alike separator hack.
sep = ''.join(random.choices(ascii_letters, k=32))
sep = ''.join(random.choices(string.ascii_letters, k=32))
outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
# outtmpl should be expand_path'ed before template dict substitution
@@ -1242,7 +1262,7 @@ class YoutubeDL:
return list(obj)
return repr(obj)
class _ReplacementFormatter(Formatter):
class _ReplacementFormatter(string.Formatter):
def get_field(self, field_name, args, kwargs):
if field_name.isdigit():
return args[0], -1
@@ -2072,86 +2092,86 @@ class YoutubeDL:
def _parse_filter(tokens):
filter_parts = []
for type, string, start, _, _ in tokens:
if type == tokenize.OP and string == ']':
for type, string_, start, _, _ in tokens:
if type == tokenize.OP and string_ == ']':
return ''.join(filter_parts)
else:
filter_parts.append(string)
filter_parts.append(string_)
def _remove_unused_ops(tokens):
# Remove operators that we don't use and join them with the surrounding strings.
# E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
ALLOWED_OPS = ('/', '+', ',', '(', ')')
last_string, last_start, last_end, last_line = None, None, None, None
for type, string, start, end, line in tokens:
if type == tokenize.OP and string == '[':
for type, string_, start, end, line in tokens:
if type == tokenize.OP and string_ == '[':
if last_string:
yield tokenize.NAME, last_string, last_start, last_end, last_line
last_string = None
yield type, string, start, end, line
yield type, string_, start, end, line
# everything inside brackets will be handled by _parse_filter
for type, string, start, end, line in tokens:
yield type, string, start, end, line
if type == tokenize.OP and string == ']':
for type, string_, start, end, line in tokens:
yield type, string_, start, end, line
if type == tokenize.OP and string_ == ']':
break
elif type == tokenize.OP and string in ALLOWED_OPS:
elif type == tokenize.OP and string_ in ALLOWED_OPS:
if last_string:
yield tokenize.NAME, last_string, last_start, last_end, last_line
last_string = None
yield type, string, start, end, line
yield type, string_, start, end, line
elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
if not last_string:
last_string = string
last_string = string_
last_start = start
last_end = end
else:
last_string += string
last_string += string_
if last_string:
yield tokenize.NAME, last_string, last_start, last_end, last_line
def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
selectors = []
current_selector = None
for type, string, start, _, _ in tokens:
for type, string_, start, _, _ in tokens:
# ENCODING is only defined in python 3.x
if type == getattr(tokenize, 'ENCODING', None):
continue
elif type in [tokenize.NAME, tokenize.NUMBER]:
current_selector = FormatSelector(SINGLE, string, [])
current_selector = FormatSelector(SINGLE, string_, [])
elif type == tokenize.OP:
if string == ')':
if string_ == ')':
if not inside_group:
# ')' will be handled by the parentheses group
tokens.restore_last_token()
break
elif inside_merge and string in ['/', ',']:
elif inside_merge and string_ in ['/', ',']:
tokens.restore_last_token()
break
elif inside_choice and string == ',':
elif inside_choice and string_ == ',':
tokens.restore_last_token()
break
elif string == ',':
elif string_ == ',':
if not current_selector:
raise syntax_error('"," must follow a format selector', start)
selectors.append(current_selector)
current_selector = None
elif string == '/':
elif string_ == '/':
if not current_selector:
raise syntax_error('"/" must follow a format selector', start)
first_choice = current_selector
second_choice = _parse_format_selection(tokens, inside_choice=True)
current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
elif string == '[':
elif string_ == '[':
if not current_selector:
current_selector = FormatSelector(SINGLE, 'best', [])
format_filter = _parse_filter(tokens)
current_selector.filters.append(format_filter)
elif string == '(':
elif string_ == '(':
if current_selector:
raise syntax_error('Unexpected "("', start)
group = _parse_format_selection(tokens, inside_group=True)
current_selector = FormatSelector(GROUP, group, [])
elif string == '+':
elif string_ == '+':
if not current_selector:
raise syntax_error('Unexpected "+"', start)
selector_1 = current_selector
@@ -2160,7 +2180,7 @@ class YoutubeDL:
raise syntax_error('Expected a selector', start)
current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
else:
raise syntax_error(f'Operator not recognized: "{string}"', start)
raise syntax_error(f'Operator not recognized: "{string_}"', start)
elif type == tokenize.ENDMARKER:
break
if current_selector:
@@ -2386,8 +2406,10 @@ class YoutubeDL:
def _calc_headers(self, info_dict):
res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
cookies = self._calc_cookies(info_dict['url'])
if 'Youtubedl-No-Compression' in res: # deprecated
res.pop('Youtubedl-No-Compression', None)
res['Accept-Encoding'] = 'identity'
cookies = self.cookiejar.get_cookie_header(info_dict['url'])
if cookies:
res['Cookie'] = cookies
@@ -2399,9 +2421,8 @@ class YoutubeDL:
return res
def _calc_cookies(self, url):
pr = sanitized_Request(url)
self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie')
self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
return self.cookiejar.get_cookie_header(url)
def _sort_thumbnails(self, thumbnails):
thumbnails.sort(key=lambda t: (
@@ -2728,21 +2749,22 @@ class YoutubeDL:
return info_dict
format_selector = self.format_selector
if format_selector is None:
req_format = self._default_format_spec(info_dict, download=download)
self.write_debug('Default format spec: %s' % req_format)
format_selector = self.build_format_selector(req_format)
while True:
if interactive_format_selection:
req_format = input(
self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
+ '(Press ENTER for default, or Ctrl+C to quit)'
+ self._format_screen(': ', self.Styles.EMPHASIS))
try:
format_selector = self.build_format_selector(req_format)
format_selector = self.build_format_selector(req_format) if req_format else None
except SyntaxError as err:
self.report_error(err, tb=False, is_error=False)
continue
if format_selector is None:
req_format = self._default_format_spec(info_dict, download=download)
self.write_debug(f'Default format spec: {req_format}')
format_selector = self.build_format_selector(req_format)
formats_to_download = list(format_selector({
'formats': formats,
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
@@ -2902,7 +2924,7 @@ class YoutubeDL:
fmt = '%({})s'
if tmpl.startswith('{'):
tmpl = f'.{tmpl}'
tmpl, fmt = f'.{tmpl}', '%({})j'
if tmpl.endswith('='):
tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
@@ -2941,7 +2963,8 @@ class YoutubeDL:
print_field('url', 'urls')
print_field('thumbnail', optional=True)
print_field('description', optional=True)
print_field('filename', optional=True)
if filename:
print_field('filename')
if self.params.get('forceduration') and info_copy.get('duration') is not None:
self.to_stdout(formatSeconds(info_copy['duration']))
print_field('format')
@@ -3422,8 +3445,8 @@ class YoutubeDL:
if remove_private_keys:
reject = lambda k, v: v is None or k.startswith('__') or k in {
'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
'_format_sort_fields',
'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
'playlist_autonumber', '_format_sort_fields',
}
else:
reject = lambda k, v: False
@@ -3492,7 +3515,7 @@ class YoutubeDL:
*files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
return infodict
def run_all_pps(self, key, info, *, additional_pps=None, fatal=True):
def run_all_pps(self, key, info, *, additional_pps=None):
if key != 'video':
self._forceprint(key, info)
for pp in (additional_pps or []) + self._pps[key]:
@@ -3771,9 +3794,14 @@ class YoutubeDL:
def get_encoding(stream):
ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
additional_info = []
if os.environ.get('TERM', '').lower() == 'dumb':
additional_info.append('dumb')
if not supports_terminal_sequences(stream):
from .utils import WINDOWS_VT_MODE # Must be imported locally
ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
if additional_info:
ret = f'{ret} ({",".join(additional_info)})'
return ret
encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
@@ -3998,7 +4026,7 @@ class YoutubeDL:
# that way it will silently go on when used with unsupporting IE
return ret
elif not subtitles:
self.to_screen('[info] There\'s no subtitles for the requested languages')
self.to_screen('[info] There are no subtitles for the requested languages')
return ret
sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
if not sub_filename_base:
@@ -4052,7 +4080,7 @@ class YoutubeDL:
if write_all or self.params.get('writethumbnail', False):
thumbnails = info_dict.get('thumbnails') or []
if not thumbnails:
self.to_screen(f'[info] There\'s no {label} thumbnails to download')
self.to_screen(f'[info] There are no {label} thumbnails to download')
return ret
multiple = write_all and len(thumbnails) > 1