Merge remote-tracking branch 'origin' into yt-live-from-start-range

2025-12-20 23:18:57 +00:00 · 2023-06-03 14:39:32 -06:00
parent 8ee942a9c8 2fb35f6004
commit 1f7974690e
98 changed files with 7110 additions and 3283 deletions
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -13,6 +13,7 @@ import os
 import random
 import re
 import shutil
+import string
 import subprocess
 import sys
 import tempfile
@@ -20,10 +21,9 @@ import time
 import tokenize
 import traceback
 import unicodedata
-import urllib.request
-from string import Formatter, ascii_letters

 from .cache import Cache
+from .compat import urllib  # isort: split
 from .compat import compat_os_name, compat_shlex_quote
 from .cookies import load_cookies
 from .downloader import (
@@ -129,7 +129,6 @@ from .utils import (
    parse_filesize,
    preferredencoding,
    prepend_extension,
-    register_socks_protocols,
    remove_terminal_sequences,
    render_table,
    replace_extension,
@@ -195,6 +194,7 @@ class YoutubeDL:
    ap_username:       Multiple-system operator account username.
    ap_password:       Multiple-system operator account password.
    usenetrc:          Use netrc for authentication instead.
+    netrc_location:    Location of the netrc file. Defaults to ~/.netrc.
    verbose:           Print additional info to stdout.
    quiet:             Do not print messages to stdout.
    no_warnings:       Do not print out anything for warnings.
@@ -285,7 +285,7 @@ class YoutubeDL:
                       subtitles. The language can be prefixed with a "-" to
                       exclude it from the requested languages, e.g. ['all', '-live_chat']
    keepvideo:         Keep the video file after post-processing
-    daterange:         A DateRange object, download only if the upload_date is in the range.
+    daterange:         A utils.DateRange object, download only if the upload_date is in the range.
    skip_download:     Skip the actual download of the video file
    cachedir:          Location of the cache files in the filesystem.
                       False to disable filesystem cache.
@@ -334,13 +334,13 @@ class YoutubeDL:
                       'auto' for elaborate guessing
    encoding:          Use this encoding instead of the system-specified.
    extract_flat:      Whether to resolve and process url_results further
-                       * False:     Always process (default)
+                       * False:     Always process. Default for API
                       * True:      Never process
                       * 'in_playlist': Do not process inside playlist/multi_video
                       * 'discard': Always process, but don't return the result
                                    from inside playlist/multi_video
                       * 'discard_in_playlist': Same as "discard", but only for
-                                    playlists (not multi_video)
+                                    playlists (not multi_video). Default for CLI
    wait_for_video:    If given, wait for scheduled streams to become available.
                       The value should be a tuple containing the range
                       (min_secs, max_secs) to wait between retries
@@ -420,7 +420,12 @@ class YoutubeDL:
                       - Raise utils.DownloadCancelled(msg) to abort remaining
                         downloads when a video is rejected.
                       match_filter_func in utils.py is one example for this.
-    no_color:          Do not emit color codes in output.
+    color:             A Dictionary with output stream names as keys
+                       and their respective color policy as values.
+                       Can also just be a single color policy,
+                       in which case it applies to all outputs.
+                       Valid stream names are 'stdout' and 'stderr'.
+                       Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
    geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
                       HTTP header
    geo_bypass_country:
@@ -477,7 +482,7 @@ class YoutubeDL:
                       can also be used

    The following options are used by the extractors:
-    extractor_retries: Number of times to retry for known errors
+    extractor_retries: Number of times to retry for known errors (default: 3)
    dynamic_mpd:       Whether to process dynamic DASH manifests (default: True)
    hls_split_discontinuity: Split HLS playlists to different formats at
                       discontinuities such as ad breaks (default: False)
@@ -542,6 +547,7 @@ class YoutubeDL:
                       data will be downloaded and processed by extractor.
                       You can reduce network I/O by disabling it if you don't
                       care about HLS. (only for youtube)
+    no_color:          Same as `color='no_color'`
    """

    _NUMERIC_FIELDS = {
@@ -608,9 +614,24 @@ class YoutubeDL:
        except Exception as e:
            self.write_debug(f'Failed to enable VT mode: {e}')

+        if self.params.get('no_color'):
+            if self.params.get('color') is not None:
+                self.report_warning('Overwriting params from "color" with "no_color"')
+            self.params['color'] = 'no_color'
+
+        term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
+
+        def process_color_policy(stream):
+            stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
+            policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
+            if policy in ('auto', None):
+                return term_allow_color and supports_terminal_sequences(stream)
+            assert policy in ('always', 'never', 'no_color')
+            return {'always': True, 'never': False}.get(policy, policy)
+
        self._allow_colors = Namespace(**{
-            type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
-            for type_, stream in self._out_files.items_ if type_ != 'console'
+            name: process_color_policy(stream)
+            for name, stream in self._out_files.items_ if name != 'console'
        })

        # The code is left like this to be reused for future deprecations
@@ -743,7 +764,6 @@ class YoutubeDL:
                when=when)

        self._setup_opener()
-        register_socks_protocols()

        def preload_download_archive(fn):
            """Preload the archive, if any is specified"""
@@ -980,7 +1000,7 @@ class YoutubeDL:
            text = text.encode(encoding, 'ignore').decode(encoding)
            if fallback is not None and text != original_text:
                text = fallback
-        return format_text(text, f) if allow_colors else text if fallback is None else fallback
+        return format_text(text, f) if allow_colors is True else text if fallback is None else fallback

    def _format_out(self, *args, **kwargs):
        return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
@@ -1083,7 +1103,7 @@ class YoutubeDL:
        # correspondingly that is not what we want since we need to keep
        # '%%' intact for template dict substitution step. Working around
        # with boundary-alike separator hack.
-        sep = ''.join(random.choices(ascii_letters, k=32))
+        sep = ''.join(random.choices(string.ascii_letters, k=32))
        outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')

        # outtmpl should be expand_path'ed before template dict substitution
@@ -1242,7 +1262,7 @@ class YoutubeDL:
                return list(obj)
            return repr(obj)

-        class _ReplacementFormatter(Formatter):
+        class _ReplacementFormatter(string.Formatter):
            def get_field(self, field_name, args, kwargs):
                if field_name.isdigit():
                    return args[0], -1
@@ -2072,86 +2092,86 @@ class YoutubeDL:

        def _parse_filter(tokens):
            filter_parts = []
-            for type, string, start, _, _ in tokens:
-                if type == tokenize.OP and string == ']':
+            for type, string_, start, _, _ in tokens:
+                if type == tokenize.OP and string_ == ']':
                    return ''.join(filter_parts)
                else:
-                    filter_parts.append(string)
+                    filter_parts.append(string_)

        def _remove_unused_ops(tokens):
            # Remove operators that we don't use and join them with the surrounding strings.
            # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
            ALLOWED_OPS = ('/', '+', ',', '(', ')')
            last_string, last_start, last_end, last_line = None, None, None, None
-            for type, string, start, end, line in tokens:
-                if type == tokenize.OP and string == '[':
+            for type, string_, start, end, line in tokens:
+                if type == tokenize.OP and string_ == '[':
                    if last_string:
                        yield tokenize.NAME, last_string, last_start, last_end, last_line
                        last_string = None
-                    yield type, string, start, end, line
+                    yield type, string_, start, end, line
                    # everything inside brackets will be handled by _parse_filter
-                    for type, string, start, end, line in tokens:
-                        yield type, string, start, end, line
-                        if type == tokenize.OP and string == ']':
+                    for type, string_, start, end, line in tokens:
+                        yield type, string_, start, end, line
+                        if type == tokenize.OP and string_ == ']':
                            break
-                elif type == tokenize.OP and string in ALLOWED_OPS:
+                elif type == tokenize.OP and string_ in ALLOWED_OPS:
                    if last_string:
                        yield tokenize.NAME, last_string, last_start, last_end, last_line
                        last_string = None
-                    yield type, string, start, end, line
+                    yield type, string_, start, end, line
                elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
                    if not last_string:
-                        last_string = string
+                        last_string = string_
                        last_start = start
                        last_end = end
                    else:
-                        last_string += string
+                        last_string += string_
            if last_string:
                yield tokenize.NAME, last_string, last_start, last_end, last_line

        def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
            selectors = []
            current_selector = None
-            for type, string, start, _, _ in tokens:
+            for type, string_, start, _, _ in tokens:
                # ENCODING is only defined in python 3.x
                if type == getattr(tokenize, 'ENCODING', None):
                    continue
                elif type in [tokenize.NAME, tokenize.NUMBER]:
-                    current_selector = FormatSelector(SINGLE, string, [])
+                    current_selector = FormatSelector(SINGLE, string_, [])
                elif type == tokenize.OP:
-                    if string == ')':
+                    if string_ == ')':
                        if not inside_group:
                            # ')' will be handled by the parentheses group
                            tokens.restore_last_token()
                        break
-                    elif inside_merge and string in ['/', ',']:
+                    elif inside_merge and string_ in ['/', ',']:
                        tokens.restore_last_token()
                        break
-                    elif inside_choice and string == ',':
+                    elif inside_choice and string_ == ',':
                        tokens.restore_last_token()
                        break
-                    elif string == ',':
+                    elif string_ == ',':
                        if not current_selector:
                            raise syntax_error('"," must follow a format selector', start)
                        selectors.append(current_selector)
                        current_selector = None
-                    elif string == '/':
+                    elif string_ == '/':
                        if not current_selector:
                            raise syntax_error('"/" must follow a format selector', start)
                        first_choice = current_selector
                        second_choice = _parse_format_selection(tokens, inside_choice=True)
                        current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
-                    elif string == '[':
+                    elif string_ == '[':
                        if not current_selector:
                            current_selector = FormatSelector(SINGLE, 'best', [])
                        format_filter = _parse_filter(tokens)
                        current_selector.filters.append(format_filter)
-                    elif string == '(':
+                    elif string_ == '(':
                        if current_selector:
                            raise syntax_error('Unexpected "("', start)
                        group = _parse_format_selection(tokens, inside_group=True)
                        current_selector = FormatSelector(GROUP, group, [])
-                    elif string == '+':
+                    elif string_ == '+':
                        if not current_selector:
                            raise syntax_error('Unexpected "+"', start)
                        selector_1 = current_selector
@@ -2160,7 +2180,7 @@ class YoutubeDL:
                            raise syntax_error('Expected a selector', start)
                        current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
                    else:
-                        raise syntax_error(f'Operator not recognized: "{string}"', start)
+                        raise syntax_error(f'Operator not recognized: "{string_}"', start)
                elif type == tokenize.ENDMARKER:
                    break
            if current_selector:
@@ -2386,8 +2406,10 @@ class YoutubeDL:

    def _calc_headers(self, info_dict):
        res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
-
-        cookies = self._calc_cookies(info_dict['url'])
+        if 'Youtubedl-No-Compression' in res:  # deprecated
+            res.pop('Youtubedl-No-Compression', None)
+            res['Accept-Encoding'] = 'identity'
+        cookies = self.cookiejar.get_cookie_header(info_dict['url'])
        if cookies:
            res['Cookie'] = cookies

@@ -2399,9 +2421,8 @@ class YoutubeDL:
        return res

    def _calc_cookies(self, url):
-        pr = sanitized_Request(url)
-        self.cookiejar.add_cookie_header(pr)
-        return pr.get_header('Cookie')
+        self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
+        return self.cookiejar.get_cookie_header(url)

    def _sort_thumbnails(self, thumbnails):
        thumbnails.sort(key=lambda t: (
@@ -2728,21 +2749,22 @@ class YoutubeDL:
            return info_dict

        format_selector = self.format_selector
-        if format_selector is None:
-            req_format = self._default_format_spec(info_dict, download=download)
-            self.write_debug('Default format spec: %s' % req_format)
-            format_selector = self.build_format_selector(req_format)
-
        while True:
            if interactive_format_selection:
-                req_format = input(
-                    self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
+                req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
+                                   + '(Press ENTER for default, or Ctrl+C to quit)'
+                                   + self._format_screen(': ', self.Styles.EMPHASIS))
                try:
-                    format_selector = self.build_format_selector(req_format)
+                    format_selector = self.build_format_selector(req_format) if req_format else None
                except SyntaxError as err:
                    self.report_error(err, tb=False, is_error=False)
                    continue

+            if format_selector is None:
+                req_format = self._default_format_spec(info_dict, download=download)
+                self.write_debug(f'Default format spec: {req_format}')
+                format_selector = self.build_format_selector(req_format)
+
            formats_to_download = list(format_selector({
                'formats': formats,
                'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
@@ -2902,7 +2924,7 @@ class YoutubeDL:

            fmt = '%({})s'
            if tmpl.startswith('{'):
-                tmpl = f'.{tmpl}'
+                tmpl, fmt = f'.{tmpl}', '%({})j'
            if tmpl.endswith('='):
                tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
            return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
@@ -2941,7 +2963,8 @@ class YoutubeDL:
        print_field('url', 'urls')
        print_field('thumbnail', optional=True)
        print_field('description', optional=True)
-        print_field('filename', optional=True)
+        if filename:
+            print_field('filename')
        if self.params.get('forceduration') and info_copy.get('duration') is not None:
            self.to_stdout(formatSeconds(info_copy['duration']))
        print_field('format')
@@ -3422,8 +3445,8 @@ class YoutubeDL:
        if remove_private_keys:
            reject = lambda k, v: v is None or k.startswith('__') or k in {
                'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
-                'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
-                '_format_sort_fields',
+                'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
+                'playlist_autonumber', '_format_sort_fields',
            }
        else:
            reject = lambda k, v: False
@@ -3492,7 +3515,7 @@ class YoutubeDL:
                *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
        return infodict

-    def run_all_pps(self, key, info, *, additional_pps=None, fatal=True):
+    def run_all_pps(self, key, info, *, additional_pps=None):
        if key != 'video':
            self._forceprint(key, info)
        for pp in (additional_pps or []) + self._pps[key]:
@@ -3771,9 +3794,14 @@ class YoutubeDL:

        def get_encoding(stream):
            ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
+            additional_info = []
+            if os.environ.get('TERM', '').lower() == 'dumb':
+                additional_info.append('dumb')
            if not supports_terminal_sequences(stream):
                from .utils import WINDOWS_VT_MODE  # Must be imported locally
-                ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
+                additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
+            if additional_info:
+                ret = f'{ret} ({",".join(additional_info)})'
            return ret

        encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
@@ -3998,7 +4026,7 @@ class YoutubeDL:
            # that way it will silently go on when used with unsupporting IE
            return ret
        elif not subtitles:
-            self.to_screen('[info] There\'s no subtitles for the requested languages')
+            self.to_screen('[info] There are no subtitles for the requested languages')
            return ret
        sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
        if not sub_filename_base:
@@ -4052,7 +4080,7 @@ class YoutubeDL:
        if write_all or self.params.get('writethumbnail', False):
            thumbnails = info_dict.get('thumbnails') or []
            if not thumbnails:
-                self.to_screen(f'[info] There\'s no {label} thumbnails to download')
+                self.to_screen(f'[info] There are no {label} thumbnails to download')
                return ret
        multiple = write_all and len(thumbnails) > 1