1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2026-02-22 00:15:51 +00:00

Merge branch 'yt-dlp:master' into pr/live-sections

This commit is contained in:
bashonly
2024-11-07 11:48:52 -06:00
108 changed files with 1951 additions and 736 deletions

View File

@@ -212,6 +212,23 @@ def write_json_file(obj, fn):
raise
def partial_application(func):
sig = inspect.signature(func)
required_args = [
param.name for param in sig.parameters.values()
if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.VAR_POSITIONAL)
if param.default is inspect.Parameter.empty
]
@functools.wraps(func)
def wrapped(*args, **kwargs):
if set(required_args[len(args):]).difference(kwargs):
return functools.partial(func, *args, **kwargs)
return func(*args, **kwargs)
return wrapped
def find_xpath_attr(node, xpath, key, val=None):
""" Find the xpath xpath[@key=val] """
assert re.match(r'^[a-zA-Z_-]+$', key)
@@ -1192,6 +1209,7 @@ def extract_timezone(date_str, default=None):
return timezone, date_str
@partial_application
def parse_iso8601(date_str, delimiter='T', timezone=None):
""" Return a UNIX timestamp from the given date """
@@ -1269,6 +1287,7 @@ def unified_timestamp(date_str, day_first=True, with_milliseconds=False):
return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
@partial_application
def determine_ext(url, default_ext='unknown_video'):
if url is None or '.' not in url:
return default_ext
@@ -1944,7 +1963,7 @@ def remove_start(s, start):
def remove_end(s, end):
return s[:-len(end)] if s is not None and s.endswith(end) else s
return s[:-len(end)] if s is not None and end and s.endswith(end) else s
def remove_quotes(s):
@@ -1973,6 +1992,7 @@ def base_url(url):
return re.match(r'https?://[^?#]+/', url).group()
@partial_application
def urljoin(base, path):
if isinstance(path, bytes):
path = path.decode()
@@ -1988,21 +2008,6 @@ def urljoin(base, path):
return urllib.parse.urljoin(base, path)
def partial_application(func):
sig = inspect.signature(func)
@functools.wraps(func)
def wrapped(*args, **kwargs):
try:
sig.bind(*args, **kwargs)
except TypeError:
return functools.partial(func, *args, **kwargs)
else:
return func(*args, **kwargs)
return wrapped
@partial_application
def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1, base=None):
if get_attr and v is not None:
@@ -2588,6 +2593,7 @@ def urlencode_postdata(*args, **kargs):
return urllib.parse.urlencode(*args, **kargs).encode('ascii')
@partial_application
def update_url(url, *, query_update=None, **kwargs):
"""Replace URL components specified by kwargs
@param url str or parse url tuple
@@ -2608,6 +2614,7 @@ def update_url(url, *, query_update=None, **kwargs):
return urllib.parse.urlunparse(url._replace(**kwargs))
@partial_application
def update_url_query(url, query):
return update_url(url, query_update=query)
@@ -2929,6 +2936,7 @@ def error_to_str(err):
return f'{type(err).__name__}: {err}'
@partial_application
def mimetype2ext(mt, default=NO_DEFAULT):
if not isinstance(mt, str):
if default is not NO_DEFAULT:
@@ -4669,6 +4677,7 @@ def to_high_limit_path(path):
return path
@partial_application
def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
val = traversal.traverse_obj(obj, *variadic(field))
if not val if ignore is NO_DEFAULT else val in variadic(ignore):
@@ -4833,6 +4842,7 @@ def number_of_digits(number):
return len('%d' % number)
@partial_application
def join_nonempty(*values, delim='-', from_dict=None):
if from_dict is not None:
values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)
@@ -5137,6 +5147,7 @@ class _UnsafeExtensionError(Exception):
'rm',
'swf',
'ts',
'vid',
'vob',
'vp9',
@@ -5169,7 +5180,9 @@ class _UnsafeExtensionError(Exception):
'heic',
'ico',
'image',
'jfif',
'jng',
'jpe',
'jpeg',
'jxl',
'svg',
@@ -5282,11 +5295,13 @@ class RetryManager:
time.sleep(delay)
@partial_application
def make_archive_id(ie, video_id):
ie_key = ie if isinstance(ie, str) else ie.ie_key()
return f'{ie_key.lower()} {video_id}'
@partial_application
def truncate_string(s, left, right=0):
assert left > 3 and right >= 0
if s is None or len(s) <= left + right:
@@ -5329,8 +5344,11 @@ class FormatSorter:
regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
'res', 'fps', 'hdr:12', 'vcodec', 'channels', 'acodec',
'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
_prefer_vp9_sort = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id')
ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
'height', 'width', 'proto', 'vext', 'abr', 'aext',
'fps', 'fs_approx', 'source', 'id')
@@ -5583,14 +5601,15 @@ class FormatSorter:
value = get_value(field)
return self._calculate_field_preference_from_value(format_, field, type_, value)
def calculate_preference(self, format):
@staticmethod
def _fill_sorting_fields(format):
# Determine missing protocol
if not format.get('protocol'):
format['protocol'] = determine_protocol(format)
# Determine missing ext
if not format.get('ext') and 'url' in format:
format['ext'] = determine_ext(format['url'])
format['ext'] = determine_ext(format['url']).lower()
if format.get('vcodec') == 'none':
format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
format['video_ext'] = 'none'
@@ -5618,6 +5637,8 @@ class FormatSorter:
if not format.get('tbr'):
format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None
def calculate_preference(self, format):
self._fill_sorting_fields(format)
return tuple(self._calculate_field_preference(format, field) for field in self._order)

View File

@@ -20,6 +20,7 @@ from ._utils import (
get_elements_html_by_class,
get_elements_html_by_attribute,
get_elements_by_attribute,
get_element_by_class,
get_element_html_by_attribute,
get_element_by_attribute,
get_element_html_by_id,
@@ -373,7 +374,7 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
@typing.overload
def find_element(*, attr: str, value: str, tag: str | None = None, html=False): ...
def find_element(*, attr: str, value: str, tag: str | None = None, html=False, regex=False): ...
@typing.overload
@@ -381,34 +382,34 @@ def find_element(*, cls: str, html=False): ...
@typing.overload
def find_element(*, id: str, tag: str | None = None, html=False): ...
def find_element(*, id: str, tag: str | None = None, html=False, regex=False): ...
@typing.overload
def find_element(*, tag: str, html=False): ...
def find_element(*, tag: str, html=False, regex=False): ...
def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False):
def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False, regex=False):
# deliberately using `id=` and `cls=` for ease of readability
assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required'
if not tag:
tag = r'[\w:.-]+'
ANY_TAG = r'[\w:.-]+'
if attr and value:
assert not cls, 'Cannot match both attr and cls'
assert not id, 'Cannot match both attr and id'
func = get_element_html_by_attribute if html else get_element_by_attribute
return functools.partial(func, attr, value, tag=tag)
return functools.partial(func, attr, value, tag=tag or ANY_TAG, escape_value=not regex)
elif cls:
assert not id, 'Cannot match both cls and id'
assert tag is None, 'Cannot match both cls and tag'
func = get_element_html_by_class if html else get_elements_by_class
assert not regex, 'Cannot use regex with cls'
func = get_element_html_by_class if html else get_element_by_class
return functools.partial(func, cls)
elif id:
func = get_element_html_by_id if html else get_element_by_id
return functools.partial(func, id, tag=tag)
return functools.partial(func, id, tag=tag or ANY_TAG, escape_value=not regex)
index = int(bool(html))
return lambda html: get_element_text_and_html_by_tag(tag, html)[index]
@@ -419,23 +420,46 @@ def find_elements(*, cls: str, html=False): ...
@typing.overload
def find_elements(*, attr: str, value: str, tag: str | None = None, html=False): ...
def find_elements(*, attr: str, value: str, tag: str | None = None, html=False, regex=False): ...
def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False):
def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False, regex=False):
# deliberately using `cls=` for ease of readability
assert cls or (attr and value), 'One of cls or (attr AND value) is required'
if attr and value:
assert not cls, 'Cannot match both attr and cls'
func = get_elements_html_by_attribute if html else get_elements_by_attribute
return functools.partial(func, attr, value, tag=tag or r'[\w:.-]+')
return functools.partial(func, attr, value, tag=tag or r'[\w:.-]+', escape_value=not regex)
assert not tag, 'Cannot match both cls and tag'
assert not regex, 'Cannot use regex with cls'
func = get_elements_html_by_class if html else get_elements_by_class
return functools.partial(func, cls)
def trim_str(*, start=None, end=None):
def trim(s):
if s is None:
return None
start_idx = 0
if start and s.startswith(start):
start_idx = len(start)
if end and s.endswith(end):
return s[start_idx:-len(end)]
return s[start_idx:]
return trim
def unpack(func):
@functools.wraps(func)
def inner(items, **kwargs):
return func(*items, **kwargs)
return inner
def get_first(obj, *paths, **kwargs):
return traverse_obj(obj, *((..., *variadic(keys)) for keys in paths), **kwargs, get_all=False)