1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-12-28 10:31:38 +00:00

Merge branch 'master' into yt-live-from-start-range

This commit is contained in:
bashonly
2024-01-19 17:05:19 -06:00
committed by GitHub
346 changed files with 10935 additions and 13520 deletions

View File

@@ -1,4 +1,6 @@
"""No longer used and new code should not use. Exists only for API compat."""
import asyncio
import atexit
import platform
import struct
import sys
@@ -32,6 +34,77 @@ has_certifi = bool(certifi)
has_websockets = bool(websockets)
class WebSocketsWrapper:
"""Wraps websockets module to use in non-async scopes"""
pool = None
def __init__(self, url, headers=None, connect=True, **ws_kwargs):
self.loop = asyncio.new_event_loop()
# XXX: "loop" is deprecated
self.conn = websockets.connect(
url, extra_headers=headers, ping_interval=None,
close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'), **ws_kwargs)
if connect:
self.__enter__()
atexit.register(self.__exit__, None, None, None)
def __enter__(self):
if not self.pool:
self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
return self
def send(self, *args):
self.run_with_loop(self.pool.send(*args), self.loop)
def recv(self, *args):
return self.run_with_loop(self.pool.recv(*args), self.loop)
def __exit__(self, type, value, traceback):
try:
return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
finally:
self.loop.close()
self._cancel_all_tasks(self.loop)
# taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
# for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
@staticmethod
def run_with_loop(main, loop):
if not asyncio.iscoroutine(main):
raise ValueError(f'a coroutine was expected, got {main!r}')
try:
return loop.run_until_complete(main)
finally:
loop.run_until_complete(loop.shutdown_asyncgens())
if hasattr(loop, 'shutdown_default_executor'):
loop.run_until_complete(loop.shutdown_default_executor())
@staticmethod
def _cancel_all_tasks(loop):
to_cancel = asyncio.all_tasks(loop)
if not to_cancel:
return
for task in to_cancel:
task.cancel()
# XXX: "loop" is removed in python 3.10+
loop.run_until_complete(
asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
for task in to_cancel:
if task.cancelled():
continue
if task.exception() is not None:
loop.call_exception_handler({
'message': 'unhandled exception during asyncio.run() shutdown',
'exception': task.exception(),
'task': task,
})
def load_plugins(name, suffix, namespace):
from ..plugins import load_plugins
ret = load_plugins(name, suffix)

View File

@@ -1,5 +1,3 @@
import asyncio
import atexit
import base64
import binascii
import calendar
@@ -54,7 +52,7 @@ from ..compat import (
compat_os_name,
compat_shlex_quote,
)
from ..dependencies import websockets, xattr
from ..dependencies import xattr
__name__ = __name__.rsplit('.', 1)[0] # Pretend to be the parent module
@@ -560,7 +558,7 @@ class LenientJSONDecoder(json.JSONDecoder):
s = self._close_object(e)
if s is not None:
continue
raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
raise type(e)(f'{e.msg} in {s[e.pos - 10:e.pos + 10]!r}', s, e.pos)
assert False, 'Too many attempts to decode JSON'
@@ -638,7 +636,7 @@ def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
elif char in '\\/|*<>':
return '\0_'
if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
return '\0_'
return '' if unicodedata.category(char)[0] in 'CM' else '\0_'
return char
# Replace look-alike Unicode glyphs
@@ -1887,6 +1885,7 @@ def setproctitle(title):
buf = ctypes.create_string_buffer(len(title_bytes))
buf.value = title_bytes
try:
# PR_SET_NAME = 15 Ref: /usr/include/linux/prctl.h
libc.prctl(15, buf, 0, 0, 0)
except AttributeError:
return # Strange libc, just skip this
@@ -2267,6 +2266,9 @@ class PagedList:
raise self.IndexError()
return entries[0]
def __bool__(self):
return bool(self.getslice(0, 1))
class OnDemandPagedList(PagedList):
"""Download pages until a page with less than maximum results"""
@@ -4446,10 +4448,12 @@ def write_xattr(path, key, value):
raise XAttrMetadataError(e.errno, e.strerror)
return
# UNIX Method 1. Use xattrs/pyxattrs modules
# UNIX Method 1. Use os.setxattr/xattrs/pyxattrs modules
setxattr = None
if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
if callable(getattr(os, 'setxattr', None)):
setxattr = os.setxattr
elif getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
# Unicode arguments are not supported in pyxattr until version 0.5.0
# See https://github.com/ytdl-org/youtube-dl/issues/5498
if version_tuple(xattr.__version__) >= (0, 5, 0):
@@ -4794,8 +4798,9 @@ def parse_http_range(range):
def read_stdin(what):
eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
if what:
eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
return sys.stdin
@@ -4926,77 +4931,6 @@ class Config:
return self.parser.parse_args(self.all_args)
class WebSocketsWrapper:
"""Wraps websockets module to use in non-async scopes"""
pool = None
def __init__(self, url, headers=None, connect=True):
self.loop = asyncio.new_event_loop()
# XXX: "loop" is deprecated
self.conn = websockets.connect(
url, extra_headers=headers, ping_interval=None,
close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
if connect:
self.__enter__()
atexit.register(self.__exit__, None, None, None)
def __enter__(self):
if not self.pool:
self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
return self
def send(self, *args):
self.run_with_loop(self.pool.send(*args), self.loop)
def recv(self, *args):
return self.run_with_loop(self.pool.recv(*args), self.loop)
def __exit__(self, type, value, traceback):
try:
return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
finally:
self.loop.close()
self._cancel_all_tasks(self.loop)
# taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
# for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
@staticmethod
def run_with_loop(main, loop):
if not asyncio.iscoroutine(main):
raise ValueError(f'a coroutine was expected, got {main!r}')
try:
return loop.run_until_complete(main)
finally:
loop.run_until_complete(loop.shutdown_asyncgens())
if hasattr(loop, 'shutdown_default_executor'):
loop.run_until_complete(loop.shutdown_default_executor())
@staticmethod
def _cancel_all_tasks(loop):
to_cancel = asyncio.all_tasks(loop)
if not to_cancel:
return
for task in to_cancel:
task.cancel()
# XXX: "loop" is removed in python 3.10+
loop.run_until_complete(
asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
for task in to_cancel:
if task.cancelled():
continue
if task.exception() is not None:
loop.call_exception_handler({
'message': 'unhandled exception during asyncio.run() shutdown',
'exception': task.exception(),
'task': task,
})
def merge_headers(*dicts):
"""Merge dicts of http headers case insensitively, prioritizing the latter ones"""
return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
@@ -5145,7 +5079,7 @@ def truncate_string(s, left, right=0):
assert left > 3 and right >= 0
if s is None or len(s) <= left + right:
return s
return f'{s[:left-3]}...{s[-right:] if right else ""}'
return f'{s[:left - 3]}...{s[-right:] if right else ""}'
def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):

View File

@@ -67,7 +67,7 @@ class HTTPHeaderDict(collections.UserDict, dict):
def __setitem__(self, key, value):
if isinstance(value, bytes):
value = value.decode('latin-1')
super().__setitem__(key.title(), str(value))
super().__setitem__(key.title(), str(value).strip())
def __getitem__(self, key):
return super().__getitem__(key.title())
@@ -123,6 +123,7 @@ def clean_headers(headers: HTTPHeaderDict):
if 'Youtubedl-No-Compression' in headers: # compat
del headers['Youtubedl-No-Compression']
headers['Accept-Encoding'] = 'identity'
headers.pop('Ytdl-socks-proxy', None)
def remove_dot_segments(path):

View File

@@ -3,12 +3,13 @@ import contextlib
import inspect
import itertools
import re
import xml.etree.ElementTree
from ._utils import (
IDENTITY,
NO_DEFAULT,
LazyList,
int_or_none,
deprecation_warning,
is_iterable_like,
try_call,
variadic,
@@ -17,13 +18,13 @@ from ._utils import (
def traverse_obj(
obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
casesense=True, is_user_input=False, traverse_string=False):
casesense=True, is_user_input=NO_DEFAULT, traverse_string=False):
"""
Safely traverse nested `dict`s and `Iterable`s
>>> obj = [{}, {"key": "value"}]
>>> traverse_obj(obj, (1, "key"))
"value"
'value'
Each of the provided `paths` is tested and the first producing a valid result will be returned.
The next path will also be tested if the path branched but no results could be found.
@@ -63,10 +64,8 @@ def traverse_obj(
@param get_all If `False`, return the first matching result, otherwise all matching ones.
@param casesense If `False`, consider string dictionary keys as case insensitive.
The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
`traverse_string` is only meant to be used by YoutubeDL.prepare_outtmpl and is not part of the API
@param is_user_input Whether the keys are generated from user input.
If `True` strings get converted to `int`/`slice` if needed.
@param traverse_string Whether to traverse into objects as strings.
If `True`, any non-compatible object will first be
converted into a string and then traversed into.
@@ -80,6 +79,9 @@ def traverse_obj(
If no `default` is given and the last path branches, a `list` of results
is always returned. If a path ends on a `dict` that result will always be a `dict`.
"""
if is_user_input is not NO_DEFAULT:
deprecation_warning('The is_user_input parameter is deprecated and no longer works')
casefold = lambda k: k.casefold() if isinstance(k, str) else k
if isinstance(expected_type, type):
@@ -117,7 +119,7 @@ def traverse_obj(
branching = True
if isinstance(obj, collections.abc.Mapping):
result = obj.values()
elif is_iterable_like(obj):
elif is_iterable_like(obj) or isinstance(obj, xml.etree.ElementTree.Element):
result = obj
elif isinstance(obj, re.Match):
result = obj.groups()
@@ -131,7 +133,7 @@ def traverse_obj(
branching = True
if isinstance(obj, collections.abc.Mapping):
iter_obj = obj.items()
elif is_iterable_like(obj):
elif is_iterable_like(obj) or isinstance(obj, xml.etree.ElementTree.Element):
iter_obj = enumerate(obj)
elif isinstance(obj, re.Match):
iter_obj = itertools.chain(
@@ -167,7 +169,7 @@ def traverse_obj(
result = next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
elif isinstance(key, (int, slice)):
if is_iterable_like(obj, collections.abc.Sequence):
if is_iterable_like(obj, (collections.abc.Sequence, xml.etree.ElementTree.Element)):
branching = isinstance(key, slice)
with contextlib.suppress(IndexError):
result = obj[key]
@@ -175,6 +177,34 @@ def traverse_obj(
with contextlib.suppress(IndexError):
result = str(obj)[key]
elif isinstance(obj, xml.etree.ElementTree.Element) and isinstance(key, str):
xpath, _, special = key.rpartition('/')
if not special.startswith('@') and special != 'text()':
xpath = key
special = None
# Allow abbreviations of relative paths, absolute paths error
if xpath.startswith('/'):
xpath = f'.{xpath}'
elif xpath and not xpath.startswith('./'):
xpath = f'./{xpath}'
def apply_specials(element):
if special is None:
return element
if special == '@':
return element.attrib
if special.startswith('@'):
return try_call(element.attrib.get, args=(special[1:],))
if special == 'text()':
return element.text
assert False, f'apply_specials is missing case for {special!r}'
if xpath:
result = list(map(apply_specials, obj.iterfind(xpath)))
else:
result = apply_specials(obj)
return branching, result if branching else (result,)
def lazy_last(iterable):
@@ -195,14 +225,6 @@ def traverse_obj(
key = None
for last, key in lazy_last(variadic(path, (str, bytes, dict, set))):
if is_user_input and isinstance(key, str):
if key == ':':
key = ...
elif ':' in key:
key = slice(*map(int_or_none, key.split(':')))
elif int_or_none(key) is not None:
key = int(key)
if not casesense and isinstance(key, str):
key = key.casefold()