mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2026-01-05 22:41:18 +00:00
Merge branch 'yt-dlp:master' into pr/live-sections
This commit is contained in:
@@ -2195,9 +2195,8 @@ class YoutubeDL:
|
||||
or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
|
||||
}))
|
||||
|
||||
def _default_format_spec(self, info_dict, download=True):
|
||||
download = download and not self.params.get('simulate')
|
||||
prefer_best = download and (
|
||||
def _default_format_spec(self, info_dict):
|
||||
prefer_best = (
|
||||
self.params['outtmpl']['default'] == '-'
|
||||
or info_dict.get('is_live') and not self.params.get('live_from_start'))
|
||||
|
||||
@@ -2205,7 +2204,7 @@ class YoutubeDL:
|
||||
merger = FFmpegMergerPP(self)
|
||||
return merger.available and merger.can_merge()
|
||||
|
||||
if not prefer_best and download and not can_merge():
|
||||
if not prefer_best and not can_merge():
|
||||
prefer_best = True
|
||||
formats = self._get_formats(info_dict)
|
||||
evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
|
||||
@@ -2964,7 +2963,7 @@ class YoutubeDL:
|
||||
continue
|
||||
|
||||
if format_selector is None:
|
||||
req_format = self._default_format_spec(info_dict, download=download)
|
||||
req_format = self._default_format_spec(info_dict)
|
||||
self.write_debug(f'Default format spec: {req_format}')
|
||||
format_selector = self.build_format_selector(req_format)
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class BoxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/?#]+)(?:/file/(?P<id>\d+))?'
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<service>app|ent)\.box\.com/s/(?P<shared_name>[^/?#]+)(?:/file/(?P<id>\d+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
||||
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
||||
@@ -38,10 +38,22 @@ class BoxIE(InfoExtractor):
|
||||
'uploader_id': '239068974',
|
||||
},
|
||||
'params': {'skip_download': 'dash fragment too small'},
|
||||
}, {
|
||||
'url': 'https://thejacksonlaboratory.ent.box.com/s/2x09dm6vcg6y28o0oox1so4l0t8wzt6l/file/1536173056065',
|
||||
'info_dict': {
|
||||
'id': '1536173056065',
|
||||
'ext': 'mp4',
|
||||
'uploader_id': '18523128264',
|
||||
'uploader': 'Lexi Hennigan',
|
||||
'title': 'iPSC Symposium recording part 1.mp4',
|
||||
'timestamp': 1716228343,
|
||||
'upload_date': '20240520',
|
||||
},
|
||||
'params': {'skip_download': 'dash fragment too small'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
shared_name, file_id = self._match_valid_url(url).groups()
|
||||
shared_name, file_id, service = self._match_valid_url(url).group('shared_name', 'id', 'service')
|
||||
webpage = self._download_webpage(url, file_id or shared_name)
|
||||
|
||||
if not file_id:
|
||||
@@ -57,14 +69,14 @@ class BoxIE(InfoExtractor):
|
||||
request_token = self._search_json(
|
||||
r'Box\.config\s*=', webpage, 'Box config', file_id)['requestToken']
|
||||
access_token = self._download_json(
|
||||
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||
f'https://{service}.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||
'Downloading token JSON metadata',
|
||||
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'X-Request-Token': request_token,
|
||||
'X-Box-EndUser-API': 'sharedName=' + shared_name,
|
||||
})[file_id]['read']
|
||||
shared_link = 'https://app.box.com/s/' + shared_name
|
||||
shared_link = f'https://{service}.box.com/s/{shared_name}'
|
||||
f = self._download_json(
|
||||
'https://api.box.com/2.0/files/' + file_id, file_id,
|
||||
'Downloading file JSON metadata', headers={
|
||||
|
||||
@@ -314,23 +314,11 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||
self.write_debug(f'"{identifier}" is not a requested format, skipping')
|
||||
continue
|
||||
|
||||
stream = None
|
||||
for retry in self.RetryManager(fatal=False):
|
||||
try:
|
||||
stream = self._call_api(
|
||||
format_url, track_id, f'Downloading {identifier} format info JSON',
|
||||
query=query, headers=self._HEADERS)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 429:
|
||||
self.report_warning(
|
||||
'You have reached the API rate limit, which is ~600 requests per '
|
||||
'10 minutes. Use the --extractor-retries and --retry-sleep options '
|
||||
'to configure an appropriate retry count and wait time', only_once=True)
|
||||
retry.error = e.cause
|
||||
else:
|
||||
self.report_warning(e.msg)
|
||||
# XXX: if not extract_flat, 429 error must be caught where _extract_info_dict is called
|
||||
stream_url = traverse_obj(self._call_api(
|
||||
format_url, track_id, f'Downloading {identifier} format info JSON',
|
||||
query=query, headers=self._HEADERS), ('url', {url_or_none}))
|
||||
|
||||
stream_url = traverse_obj(stream, ('url', {url_or_none}))
|
||||
if invalid_url(stream_url):
|
||||
continue
|
||||
format_urls.add(stream_url)
|
||||
@@ -647,7 +635,17 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||
info = self._call_api(
|
||||
info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS)
|
||||
|
||||
return self._extract_info_dict(info, full_title, token)
|
||||
for retry in self.RetryManager():
|
||||
try:
|
||||
return self._extract_info_dict(info, full_title, token)
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
|
||||
raise
|
||||
self.report_warning(
|
||||
'You have reached the API rate limit, which is ~600 requests per '
|
||||
'10 minutes. Use the --extractor-retries and --retry-sleep options '
|
||||
'to configure an appropriate retry count and wait time', only_once=True)
|
||||
retry.error = e.cause
|
||||
|
||||
|
||||
class SoundcloudPlaylistBaseIE(SoundcloudBaseIE):
|
||||
|
||||
@@ -1458,9 +1458,11 @@ class TikTokLiveIE(TikTokBaseIE):
|
||||
|
||||
if webpage:
|
||||
data = self._get_sigi_state(webpage, uploader or room_id)
|
||||
room_id = (traverse_obj(data, ('UserModule', 'users', ..., 'roomId', {str_or_none}), get_all=False)
|
||||
or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None)
|
||||
or room_id)
|
||||
room_id = (
|
||||
traverse_obj(data, ((
|
||||
('LiveRoom', 'liveRoomUserInfo', 'user'),
|
||||
('UserModule', 'users', ...)), 'roomId', {str}, any))
|
||||
or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=room_id))
|
||||
uploader = uploader or traverse_obj(
|
||||
data, ('LiveRoom', 'liveRoomUserInfo', 'user', 'uniqueId'),
|
||||
('UserModule', 'users', ..., 'uniqueId'), get_all=False, expected_type=str)
|
||||
|
||||
@@ -96,7 +96,7 @@ class TV5MondePlusIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
webpage = self._download_webpage(url, display_id, impersonate=True)
|
||||
|
||||
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
|
||||
self.raise_geo_restricted(countries=['FR'])
|
||||
|
||||
@@ -3159,7 +3159,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _extract_n_function_name(self, jscode):
|
||||
funcname, idx = self._search_regex(
|
||||
r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
|
||||
r'''(?x)(?:\.get\("n"\)\)&&\(b=|b=String\.fromCharCode\(110\),c=a\.get\(b\)\)&&\(c=)
|
||||
(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)''',
|
||||
jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
|
||||
if not idx:
|
||||
return funcname
|
||||
@@ -3170,7 +3171,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _extract_n_function_code(self, video_id, player_url):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
|
||||
func_code = self.cache.load('youtube-nsig', player_id, min_ver='2024.07.09')
|
||||
jscode = func_code or self._load_player(video_id, player_url)
|
||||
jsi = JSInterpreter(jscode)
|
||||
|
||||
@@ -3179,17 +3180,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
func_name = self._extract_n_function_name(jscode)
|
||||
|
||||
# For redundancy
|
||||
func_code = self._search_regex(
|
||||
rf'''(?xs){func_name}\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
|
||||
# NB: The end of the regex is intentionally kept strict
|
||||
{{(?P<code>.+?}}\s*return\ [\w$]+.join\(""\))}};''',
|
||||
jscode, 'nsig function', group=('var', 'code'), default=None)
|
||||
if func_code:
|
||||
func_code = ([func_code[0]], func_code[1])
|
||||
else:
|
||||
self.write_debug('Extracting nsig function with jsinterp')
|
||||
func_code = jsi.extract_function_code(func_name)
|
||||
func_code = jsi.extract_function_code(func_name)
|
||||
|
||||
self.cache.store('youtube-nsig', player_id, func_code)
|
||||
return jsi, player_id, func_code
|
||||
|
||||
@@ -636,6 +636,8 @@ class JSInterpreter:
|
||||
raise self.Exception(f'{member} {msg}', expr)
|
||||
|
||||
def eval_method():
|
||||
nonlocal member
|
||||
|
||||
if (variable, member) == ('console', 'debug'):
|
||||
if Debugger.ENABLED:
|
||||
Debugger.write(self.interpret_expression(f'[{arg_str}]', local_vars, allow_recursion))
|
||||
@@ -644,6 +646,7 @@ class JSInterpreter:
|
||||
types = {
|
||||
'String': str,
|
||||
'Math': float,
|
||||
'Array': list,
|
||||
}
|
||||
obj = local_vars.get(variable, types.get(variable, NO_DEFAULT))
|
||||
if obj is NO_DEFAULT:
|
||||
@@ -667,6 +670,21 @@ class JSInterpreter:
|
||||
self.interpret_expression(v, local_vars, allow_recursion)
|
||||
for v in self._separate(arg_str)]
|
||||
|
||||
# Fixup prototype call
|
||||
if isinstance(obj, type) and member.startswith('prototype.'):
|
||||
new_member, _, func_prototype = member.partition('.')[2].partition('.')
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
assertion(isinstance(argvals[0], obj), f'needs binding to type {obj}')
|
||||
if func_prototype == 'call':
|
||||
obj, *argvals = argvals
|
||||
elif func_prototype == 'apply':
|
||||
assertion(len(argvals) == 2, 'takes two arguments')
|
||||
obj, argvals = argvals
|
||||
assertion(isinstance(argvals, list), 'second argument needs to be a list')
|
||||
else:
|
||||
raise self.Exception(f'Unsupported Function method {func_prototype}', expr)
|
||||
member = new_member
|
||||
|
||||
if obj is str:
|
||||
if member == 'fromCharCode':
|
||||
assertion(argvals, 'takes one or more arguments')
|
||||
|
||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import io
|
||||
import math
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from ._helper import InstanceStoreMixin, select_proxy
|
||||
@@ -27,11 +28,12 @@ from ..utils import int_or_none
|
||||
if curl_cffi is None:
|
||||
raise ImportError('curl_cffi is not installed')
|
||||
|
||||
curl_cffi_version = tuple(int_or_none(x, default=0) for x in curl_cffi.__version__.split('.'))
|
||||
|
||||
if curl_cffi_version != (0, 5, 10):
|
||||
curl_cffi_version = tuple(map(int, re.split(r'[^\d]+', curl_cffi.__version__)[:3]))
|
||||
|
||||
if curl_cffi_version != (0, 5, 10) and not ((0, 7, 0) <= curl_cffi_version < (0, 8, 0)):
|
||||
curl_cffi._yt_dlp__version = f'{curl_cffi.__version__} (unsupported)'
|
||||
raise ImportError('Only curl_cffi 0.5.10 is supported')
|
||||
raise ImportError('Only curl_cffi versions 0.5.10, 0.7.X are supported')
|
||||
|
||||
import curl_cffi.requests
|
||||
from curl_cffi.const import CurlECode, CurlOpt
|
||||
@@ -110,6 +112,13 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
|
||||
_SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
|
||||
_SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
|
||||
_SUPPORTED_IMPERSONATE_TARGET_MAP = {
|
||||
**({
|
||||
ImpersonateTarget('chrome', '124', 'macos', '14'): curl_cffi.requests.BrowserType.chrome124,
|
||||
ImpersonateTarget('chrome', '123', 'macos', '14'): curl_cffi.requests.BrowserType.chrome123,
|
||||
ImpersonateTarget('chrome', '120', 'macos', '14'): curl_cffi.requests.BrowserType.chrome120,
|
||||
ImpersonateTarget('chrome', '119', 'macos', '14'): curl_cffi.requests.BrowserType.chrome119,
|
||||
ImpersonateTarget('chrome', '116', 'windows', '10'): curl_cffi.requests.BrowserType.chrome116,
|
||||
} if curl_cffi_version >= (0, 7, 0) else {}),
|
||||
ImpersonateTarget('chrome', '110', 'windows', '10'): curl_cffi.requests.BrowserType.chrome110,
|
||||
ImpersonateTarget('chrome', '107', 'windows', '10'): curl_cffi.requests.BrowserType.chrome107,
|
||||
ImpersonateTarget('chrome', '104', 'windows', '10'): curl_cffi.requests.BrowserType.chrome104,
|
||||
@@ -118,9 +127,15 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
|
||||
ImpersonateTarget('chrome', '99', 'windows', '10'): curl_cffi.requests.BrowserType.chrome99,
|
||||
ImpersonateTarget('edge', '101', 'windows', '10'): curl_cffi.requests.BrowserType.edge101,
|
||||
ImpersonateTarget('edge', '99', 'windows', '10'): curl_cffi.requests.BrowserType.edge99,
|
||||
**({
|
||||
ImpersonateTarget('safari', '17.0', 'macos', '14'): curl_cffi.requests.BrowserType.safari17_0,
|
||||
} if curl_cffi_version >= (0, 7, 0) else {}),
|
||||
ImpersonateTarget('safari', '15.5', 'macos', '12'): curl_cffi.requests.BrowserType.safari15_5,
|
||||
ImpersonateTarget('safari', '15.3', 'macos', '11'): curl_cffi.requests.BrowserType.safari15_3,
|
||||
ImpersonateTarget('chrome', '99', 'android', '12'): curl_cffi.requests.BrowserType.chrome99_android,
|
||||
**({
|
||||
ImpersonateTarget('safari', '17.2', 'ios', '17.2'): curl_cffi.requests.BrowserType.safari17_2_ios,
|
||||
} if curl_cffi_version >= (0, 7, 0) else {}),
|
||||
}
|
||||
|
||||
def _create_instance(self, cookiejar=None):
|
||||
@@ -187,7 +202,7 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
|
||||
timeout = self._calculate_timeout(request)
|
||||
|
||||
# set CURLOPT_LOW_SPEED_LIMIT and CURLOPT_LOW_SPEED_TIME to act as a read timeout. [1]
|
||||
# curl_cffi does not currently do this. [2]
|
||||
# This is required only for 0.5.10 [2]
|
||||
# Note: CURLOPT_LOW_SPEED_TIME is in seconds, so we need to round up to the nearest second. [3]
|
||||
# [1] https://unix.stackexchange.com/a/305311
|
||||
# [2] https://github.com/yifeikong/curl_cffi/issues/156
|
||||
@@ -203,7 +218,7 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
|
||||
data=request.data,
|
||||
verify=self.verify,
|
||||
max_redirects=5,
|
||||
timeout=timeout,
|
||||
timeout=(timeout, timeout),
|
||||
impersonate=self._SUPPORTED_IMPERSONATE_TARGET_MAP.get(
|
||||
self._get_request_target(request)),
|
||||
interface=self.source_address,
|
||||
@@ -222,7 +237,7 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
|
||||
|
||||
elif (
|
||||
e.code == CurlECode.PROXY
|
||||
or (e.code == CurlECode.RECV_ERROR and 'Received HTTP code 407 from proxy after CONNECT' in str(e))
|
||||
or (e.code == CurlECode.RECV_ERROR and 'CONNECT' in str(e))
|
||||
):
|
||||
raise ProxyError(cause=e) from e
|
||||
else:
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Autogenerated by devscripts/update-version.py
|
||||
|
||||
__version__ = '2024.07.07'
|
||||
__version__ = '2024.07.09'
|
||||
|
||||
RELEASE_GIT_HEAD = 'b337d2989ce0614651d363383f6f743d977248ef'
|
||||
RELEASE_GIT_HEAD = '7ead7332af69422cee931aec3faa277288e9e212'
|
||||
|
||||
VARIANT = None
|
||||
|
||||
@@ -12,4 +12,4 @@ CHANNEL = 'stable'
|
||||
|
||||
ORIGIN = 'yt-dlp/yt-dlp'
|
||||
|
||||
_pkg_version = '2024.07.07'
|
||||
_pkg_version = '2024.07.09'
|
||||
|
||||
Reference in New Issue
Block a user