mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-27 08:58:30 +00:00
Merge e4962161f6
into 06c1a8cdff
This commit is contained in:
commit
16a02a3d5a
@ -30,7 +30,7 @@
|
|||||||
from ..downloader.f4m import get_base_url, remove_encrypted_media
|
from ..downloader.f4m import get_base_url, remove_encrypted_media
|
||||||
from ..downloader.hls import HlsFD
|
from ..downloader.hls import HlsFD
|
||||||
from ..globals import plugin_ies_overrides
|
from ..globals import plugin_ies_overrides
|
||||||
from ..networking import HEADRequest, Request
|
from ..networking import HEADRequest, Request, Response
|
||||||
from ..networking.exceptions import (
|
from ..networking.exceptions import (
|
||||||
HTTPError,
|
HTTPError,
|
||||||
IncompleteRead,
|
IncompleteRead,
|
||||||
@ -102,6 +102,7 @@
|
|||||||
)
|
)
|
||||||
from ..utils._utils import _request_dump_filename
|
from ..utils._utils import _request_dump_filename
|
||||||
from ..utils.jslib import devalue
|
from ..utils.jslib import devalue
|
||||||
|
from ..utils.networking import HTTPHeaderDict
|
||||||
|
|
||||||
|
|
||||||
class InfoExtractor:
|
class InfoExtractor:
|
||||||
@ -571,6 +572,7 @@ class InfoExtractor:
|
|||||||
_ready = False
|
_ready = False
|
||||||
_downloader = None
|
_downloader = None
|
||||||
_x_forwarded_for_ip = None
|
_x_forwarded_for_ip = None
|
||||||
|
_latest_firefox_user_agent = None
|
||||||
_GEO_BYPASS = True
|
_GEO_BYPASS = True
|
||||||
_GEO_COUNTRIES = None
|
_GEO_COUNTRIES = None
|
||||||
_GEO_IP_BLOCKS = None
|
_GEO_IP_BLOCKS = None
|
||||||
@ -1202,6 +1204,52 @@ def _download_webpage(
|
|||||||
raise e
|
raise e
|
||||||
self._sleep(timeout, video_id)
|
self._sleep(timeout, video_id)
|
||||||
|
|
||||||
|
def _get_latest_firefox_user_agent(self, fatal=False):
|
||||||
|
if InfoExtractor._latest_firefox_user_agent:
|
||||||
|
return InfoExtractor._latest_firefox_user_agent
|
||||||
|
|
||||||
|
USER_AGENT_TMPL = 'Mozilla/5.0 (Windows NT 10.0; rv:{0}.0) Gecko/20100101 Firefox/{0}.0'
|
||||||
|
DEFAULT_VERSION = '140' # If not fatal, default to latest major version as of 2025.06.24
|
||||||
|
ff_version = None
|
||||||
|
|
||||||
|
# Ref: https://ftp.mozilla.org/pub/firefox/releases/latest/README.txt
|
||||||
|
urlh = self._request_webpage(
|
||||||
|
HEADRequest('https://download.mozilla.org/'), None,
|
||||||
|
'Fetching latest Firefox version number', 'Unable to fetch latest Firefox version number',
|
||||||
|
fatal=fatal, query={'product': 'firefox-latest', 'os': 'linux64', 'lang': 'en-US'})
|
||||||
|
if isinstance(urlh, Response):
|
||||||
|
ff_version = self._search_regex(
|
||||||
|
r'/releases/(\d{3})', urlh.url, 'latest Firefox version number', fatal=fatal)
|
||||||
|
|
||||||
|
if ff_version:
|
||||||
|
InfoExtractor._latest_firefox_user_agent = USER_AGENT_TMPL.format(ff_version)
|
||||||
|
return InfoExtractor._latest_firefox_user_agent
|
||||||
|
|
||||||
|
self.write_debug(f'Using default Firefox {DEFAULT_VERSION} user-agent instead of latest')
|
||||||
|
return USER_AGENT_TMPL.format(DEFAULT_VERSION)
|
||||||
|
|
||||||
|
def _download_firefox_webpage(self, url, video_id, *args, **kwargs):
|
||||||
|
impersonate = kwargs.pop('impersonate', None)
|
||||||
|
require_impersonation = kwargs.pop('require_impersonation', False)
|
||||||
|
headers = HTTPHeaderDict(kwargs.pop('headers', None))
|
||||||
|
headers.update({'User-Agent': self._get_latest_firefox_user_agent()})
|
||||||
|
kwargs.update({'headers': headers})
|
||||||
|
|
||||||
|
try:
|
||||||
|
return self._download_webpage(url, video_id, *args, **kwargs)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
|
||||||
|
raise
|
||||||
|
self.write_debug(f'{video_id}: Got HTTP Error 403, retrying with impersonation')
|
||||||
|
|
||||||
|
# Retry with impersonation if user-agent alone is insufficient
|
||||||
|
headers.pop('User-Agent')
|
||||||
|
kwargs.update({
|
||||||
|
'impersonate': impersonate if impersonate is not None else 'firefox',
|
||||||
|
'require_impersonation': require_impersonation,
|
||||||
|
})
|
||||||
|
return self._download_webpage(url, video_id, *args, **kwargs)
|
||||||
|
|
||||||
def report_warning(self, msg, video_id=None, *args, only_once=False, **kwargs):
|
def report_warning(self, msg, video_id=None, *args, only_once=False, **kwargs):
|
||||||
idstr = format_field(video_id, None, '%s: ')
|
idstr = format_field(video_id, None, '%s: ')
|
||||||
msg = f'[{self.IE_NAME}] {idstr}{msg}'
|
msg = f'[{self.IE_NAME}] {idstr}{msg}'
|
||||||
|
@ -1,9 +1,7 @@
|
|||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..networking.exceptions import HTTPError
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
float_or_none,
|
float_or_none,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
@ -58,16 +56,8 @@ class FrancaisFacileIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = urllib.parse.unquote(self._match_id(url))
|
display_id = urllib.parse.unquote(self._match_id(url))
|
||||||
|
# yt-dlp's default Chrome user-agents are too old and blocked by the website
|
||||||
try: # yt-dlp's default user-agents are too old and blocked by the site
|
webpage = self._download_firefox_webpage(url, display_id, impersonate=True)
|
||||||
webpage = self._download_webpage(url, display_id, headers={
|
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
|
|
||||||
})
|
|
||||||
except ExtractorError as e:
|
|
||||||
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
|
|
||||||
raise
|
|
||||||
# Retry with impersonation if hardcoded UA is insufficient
|
|
||||||
webpage = self._download_webpage(url, display_id, impersonate=True)
|
|
||||||
|
|
||||||
data = self._search_json(
|
data = self._search_json(
|
||||||
r'<script[^>]+\bdata-media-id=[^>]+\btype="application/json"[^>]*>',
|
r'<script[^>]+\bdata-media-id=[^>]+\btype="application/json"[^>]*>',
|
||||||
|
@ -79,7 +79,8 @@ class MiTeleIE(TelecincoBaseIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_akamai_webpage(url, display_id)
|
# yt-dlp's default Chrome user-agents are too old and blocked by akamai
|
||||||
|
webpage = self._download_firefox_webpage(url, display_id, impersonate=True)
|
||||||
pre_player = self._search_json(
|
pre_player = self._search_json(
|
||||||
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=',
|
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=',
|
||||||
webpage, 'Pre Player', display_id)['prePlayer']
|
webpage, 'Pre Player', display_id)['prePlayer']
|
||||||
|
@ -98,11 +98,8 @@ def _extract_embed_urls(cls, url, webpage):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id, headers={
|
webpage = self._download_firefox_webpage(
|
||||||
**traverse_obj(smuggled_data, {'Referer': 'referer'}),
|
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}))
|
||||||
# yt-dlp's default Chrome user-agents are too old
|
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:140.0) Gecko/20100101 Firefox/140.0',
|
|
||||||
})
|
|
||||||
data = self._search_json(
|
data = self._search_json(
|
||||||
r'var\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id,
|
r'var\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id,
|
||||||
contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];',
|
contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];',
|
||||||
|
@ -63,17 +63,6 @@ def _parse_content(self, content, url):
|
|||||||
'http_headers': headers,
|
'http_headers': headers,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _download_akamai_webpage(self, url, display_id):
|
|
||||||
try: # yt-dlp's default user-agents are too old and blocked by akamai
|
|
||||||
return self._download_webpage(url, display_id, headers={
|
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
|
|
||||||
})
|
|
||||||
except ExtractorError as e:
|
|
||||||
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
|
|
||||||
raise
|
|
||||||
# Retry with impersonation if hardcoded UA is insufficient to bypass akamai
|
|
||||||
return self._download_webpage(url, display_id, impersonate=True)
|
|
||||||
|
|
||||||
|
|
||||||
class TelecincoIE(TelecincoBaseIE):
|
class TelecincoIE(TelecincoBaseIE):
|
||||||
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
|
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
|
||||||
@ -151,7 +140,8 @@ class TelecincoIE(TelecincoBaseIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_akamai_webpage(url, display_id)
|
# yt-dlp's default Chrome user-agents are too old and blocked by akamai
|
||||||
|
webpage = self._download_firefox_webpage(url, display_id, impersonate=True)
|
||||||
article = self._search_json(
|
article = self._search_json(
|
||||||
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
|
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
|
||||||
webpage, 'article', display_id)['article']
|
webpage, 'article', display_id)['article']
|
||||||
|
Loading…
Reference in New Issue
Block a user