1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-08-15 00:48:28 +00:00

Merge branch 'yt-dlp:master' into niconico_error

This commit is contained in:
doe1080 2025-08-01 00:33:16 +09:00 committed by GitHub
commit 296c61c2a0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
44 changed files with 757 additions and 627 deletions

View File

@ -242,7 +242,7 @@ jobs:
permissions: permissions:
contents: read contents: read
actions: write # For cleaning up cache actions: write # For cleaning up cache
runs-on: macos-13 runs-on: macos-14
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@ -261,6 +261,8 @@ jobs:
- name: Install Requirements - name: Install Requirements
run: | run: |
brew install coreutils brew install coreutils
# We need to use system Python in order to roll our own universal2 curl_cffi wheel
brew uninstall --ignore-dependencies python3
python3 -m venv ~/yt-dlp-build-venv python3 -m venv ~/yt-dlp-build-venv
source ~/yt-dlp-build-venv/bin/activate source ~/yt-dlp-build-venv/bin/activate
python3 devscripts/install_deps.py -o --include build python3 devscripts/install_deps.py -o --include build

View File

@ -37,7 +37,7 @@ jobs:
matrix: matrix:
os: [ubuntu-latest] os: [ubuntu-latest]
# CPython 3.9 is in quick-test # CPython 3.9 is in quick-test
python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.11]
include: include:
# atleast one of each CPython/PyPy tests must be in windows # atleast one of each CPython/PyPy tests must be in windows
- os: windows-latest - os: windows-latest
@ -49,7 +49,7 @@ jobs:
- os: windows-latest - os: windows-latest
python-version: '3.13' python-version: '3.13'
- os: windows-latest - os: windows-latest
python-version: pypy-3.10 python-version: pypy-3.11
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}

View File

@ -28,13 +28,13 @@ jobs:
fail-fast: true fail-fast: true
matrix: matrix:
os: [ubuntu-latest] os: [ubuntu-latest]
python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.11]
include: include:
# atleast one of each CPython/PyPy tests must be in windows # atleast one of each CPython/PyPy tests must be in windows
- os: windows-latest - os: windows-latest
python-version: '3.9' python-version: '3.9'
- os: windows-latest - os: windows-latest
python-version: pypy-3.10 python-version: pypy-3.11
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}

View File

@ -25,7 +25,7 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
os: [ubuntu-latest, windows-latest] os: [ubuntu-latest, windows-latest]
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.10, pypy-3.11] python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.11]
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}

View File

@ -272,7 +272,7 @@ ## Adding support for a new site
You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`). You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`).
1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.10. Backward compatibility is not required for even older versions of Python. 1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.11. Backward compatibility is not required for even older versions of Python.
1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
```shell ```shell

View File

@ -172,7 +172,7 @@ # To install nightly with pip:
``` ```
## DEPENDENCIES ## DEPENDENCIES
Python versions 3.9+ (CPython) and 3.10+ (PyPy) are supported. Other versions and implementations may or may not work correctly. Python versions 3.9+ (CPython) and 3.11+ (PyPy) are supported. Other versions and implementations may or may not work correctly.
<!-- Python 3.5+ uses VC++14 and it is already embedded in the binary created <!-- Python 3.5+ uses VC++14 and it is already embedded in the binary created
<!x-- https://www.microsoft.com/en-us/download/details.aspx?id=26999 --x> <!x-- https://www.microsoft.com/en-us/download/details.aspx?id=26999 --x>
@ -1902,7 +1902,7 @@ #### tver
* `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated) * `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated)
#### vimeo #### vimeo
* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `android` client is used by default. If account cookies or credentials are used for authentication, then the `web` client is used by default. The `web` client only works with authentication. The `ios` client only works with previously cached OAuth tokens * `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `web` client is used by default. The `web` client only works with account cookies or login credentials. The `android` and `ios` clients only work with previously cached OAuth tokens
* `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the web client's API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability * `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the web client's API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability
**Note**: These options may be changed/removed in the future without concern for backward compatibility **Note**: These options may be changed/removed in the future without concern for backward compatibility

View File

@ -62,16 +62,22 @@ def parse_options():
def exe(onedir): def exe(onedir):
"""@returns (name, path)""" """@returns (name, path)"""
platform_name, machine, extension = {
'win32': (None, MACHINE, '.exe'),
'darwin': ('macos', None, None),
}.get(OS_NAME, (OS_NAME, MACHINE, None))
name = '_'.join(filter(None, ( name = '_'.join(filter(None, (
'yt-dlp', 'yt-dlp',
{'win32': '', 'darwin': 'macos'}.get(OS_NAME, OS_NAME), platform_name,
MACHINE, machine,
))) )))
return name, ''.join(filter(None, ( return name, ''.join(filter(None, (
'dist/', 'dist/',
onedir and f'{name}/', onedir and f'{name}/',
name, name,
OS_NAME == 'win32' and '.exe', extension,
))) )))

View File

@ -21,9 +21,6 @@ def test_compat_passthrough(self):
with self.assertWarns(DeprecationWarning): with self.assertWarns(DeprecationWarning):
_ = compat.compat_basestring _ = compat.compat_basestring
with self.assertWarns(DeprecationWarning):
_ = compat.WINDOWS_VT_MODE
self.assertEqual(urllib.request.getproxies, getproxies) self.assertEqual(urllib.request.getproxies, getproxies)
with self.assertWarns(DeprecationWarning): with self.assertWarns(DeprecationWarning):

View File

@ -1373,6 +1373,7 @@ def test_parse_resolution(self):
self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080})
self.assertEqual(parse_resolution('ep1x2'), {}) self.assertEqual(parse_resolution('ep1x2'), {})
self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080})
self.assertEqual(parse_resolution('1920w', lenient=True), {'width': 1920})
def test_parse_bitrate(self): def test_parse_bitrate(self):
self.assertEqual(parse_bitrate(None), None) self.assertEqual(parse_bitrate(None), None)

View File

@ -36,6 +36,7 @@
from .globals import ( from .globals import (
IN_CLI, IN_CLI,
LAZY_EXTRACTORS, LAZY_EXTRACTORS,
WINDOWS_VT_MODE,
plugin_ies, plugin_ies,
plugin_ies_overrides, plugin_ies_overrides,
plugin_pps, plugin_pps,
@ -4040,8 +4041,7 @@ def get_encoding(stream):
if os.environ.get('TERM', '').lower() == 'dumb': if os.environ.get('TERM', '').lower() == 'dumb':
additional_info.append('dumb') additional_info.append('dumb')
if not supports_terminal_sequences(stream): if not supports_terminal_sequences(stream):
from .utils import WINDOWS_VT_MODE # Must be imported locally additional_info.append('No VT' if WINDOWS_VT_MODE.value is False else 'No ANSI')
additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
if additional_info: if additional_info:
ret = f'{ret} ({",".join(additional_info)})' ret = f'{ret} ({",".join(additional_info)})'
return ret return ret

View File

@ -37,7 +37,7 @@
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
from ..networking.exceptions import HTTPError as compat_HTTPError from ..networking.exceptions import HTTPError as compat_HTTPError
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) passthrough_module(__name__, '...utils', ('windows_enable_vt_mode',))
# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE # compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE

View File

@ -1335,7 +1335,7 @@ def prepare_line(line):
if len(cookie_list) != self._ENTRY_LEN: if len(cookie_list) != self._ENTRY_LEN:
raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}') raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}')
cookie = self._CookieFileEntry(*cookie_list) cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit(): if cookie.expires_at and not re.fullmatch(r'[0-9]+(?:\.[0-9]+)?', cookie.expires_at):
raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}') raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}')
return line return line

View File

@ -572,7 +572,21 @@ def _call_downloader(self, tmpfilename, info_dict):
if end_time: if end_time:
args += ['-t', str(end_time - start_time)] args += ['-t', str(end_time - start_time)]
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']] url = fmt['url']
if self.params.get('enable_file_urls') and url.startswith('file:'):
# The default protocol_whitelist is 'file,crypto,data' when reading local m3u8 URLs,
# so only local segments can be read unless we also include 'http,https,tcp,tls'
args += ['-protocol_whitelist', 'file,crypto,data,http,https,tcp,tls']
# ffmpeg incorrectly handles 'file:' URLs by only removing the
# 'file:' prefix and treating the rest as if it's a normal filepath.
# FFmpegPostProcessor also depends on this behavior, so we need to fixup the URLs:
# - On Windows/Cygwin, replace 'file:///' and 'file://localhost/' with 'file:'
# - On *nix, replace 'file://localhost/' with 'file:/'
# Ref: https://github.com/yt-dlp/yt-dlp/issues/13781
# https://trac.ffmpeg.org/ticket/2702
url = re.sub(r'^file://(?:localhost)?/', 'file:' if os.name == 'nt' else 'file:/', url)
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', url]
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
args += ['-c', 'copy'] args += ['-c', 'copy']

View File

@ -205,7 +205,7 @@ def is_ad_fragment_end(s):
line = line.strip() line = line.strip()
if line: if line:
if not line.startswith('#'): if not line.startswith('#'):
if format_index and discontinuity_count != format_index: if format_index is not None and discontinuity_count != format_index:
continue continue
if ad_frag_next: if ad_frag_next:
continue continue
@ -231,7 +231,7 @@ def is_ad_fragment_end(s):
byte_range = {} byte_range = {}
elif line.startswith('#EXT-X-MAP'): elif line.startswith('#EXT-X-MAP'):
if format_index and discontinuity_count != format_index: if format_index is not None and discontinuity_count != format_index:
continue continue
if frag_index > 0: if frag_index > 0:
self.report_error( self.report_error(

View File

@ -571,10 +571,6 @@
DWIE, DWIE,
DWArticleIE, DWArticleIE,
) )
from .eagleplatform import (
ClipYouEmbedIE,
EaglePlatformIE,
)
from .ebaumsworld import EbaumsWorldIE from .ebaumsworld import EbaumsWorldIE
from .ebay import EbayIE from .ebay import EbayIE
from .egghead import ( from .egghead import (
@ -640,6 +636,7 @@
FancodeVodIE, FancodeVodIE,
) )
from .fathom import FathomIE from .fathom import FathomIE
from .faulio import FaulioLiveIE
from .faz import FazIE from .faz import FazIE
from .fc2 import ( from .fc2 import (
FC2IE, FC2IE,
@ -1568,6 +1565,7 @@
) )
from .plutotv import PlutoTVIE from .plutotv import PlutoTVIE
from .plvideo import PlVideoIE from .plvideo import PlVideoIE
from .plyr import PlyrEmbedIE
from .podbayfm import ( from .podbayfm import (
PodbayFMChannelIE, PodbayFMChannelIE,
PodbayFMIE, PodbayFMIE,
@ -1783,6 +1781,7 @@
RTVEALaCartaIE, RTVEALaCartaIE,
RTVEAudioIE, RTVEAudioIE,
RTVELiveIE, RTVELiveIE,
RTVEProgramIE,
RTVETelevisionIE, RTVETelevisionIE,
) )
from .rtvs import RTVSIE from .rtvs import RTVSIE
@ -2166,7 +2165,6 @@
from .trueid import TrueIDIE from .trueid import TrueIDIE
from .trunews import TruNewsIE from .trunews import TruNewsIE
from .truth import TruthIE from .truth import TruthIE
from .trutv import TruTVIE
from .tube8 import Tube8IE from .tube8 import Tube8IE
from .tubetugraz import ( from .tubetugraz import (
TubeTuGrazIE, TubeTuGrazIE,
@ -2237,6 +2235,7 @@
from .tvplayer import TVPlayerIE from .tvplayer import TVPlayerIE
from .tvw import ( from .tvw import (
TvwIE, TvwIE,
TvwNewsIE,
TvwTvChannelsIE, TvwTvChannelsIE,
) )
from .tweakers import TweakersIE from .tweakers import TweakersIE

View File

@ -48,7 +48,6 @@
'username_field': 'user', 'username_field': 'user',
'password_field': 'passwd', 'password_field': 'passwd',
'login_hostname': 'login.xfinity.com', 'login_hostname': 'login.xfinity.com',
'needs_newer_ua': True,
}, },
'TWC': { 'TWC': {
'name': 'Time Warner Cable | Spectrum', 'name': 'Time Warner Cable | Spectrum',
@ -1379,11 +1378,8 @@ def _download_webpage_handle(self, *args, **kwargs):
@staticmethod @staticmethod
def _get_mso_headers(mso_info): def _get_mso_headers(mso_info):
# yt-dlp's default user-agent is usually too old for some MSO's like Comcast_SSO # Not needed currently
# See: https://github.com/yt-dlp/yt-dlp/issues/10848 return {}
return {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0',
} if mso_info.get('needs_newer_ua') else {}
@staticmethod @staticmethod
def _get_mvpd_resource(provider_id, title, guid, rating): def _get_mvpd_resource(provider_id, title, guid, rating):

View File

@ -33,7 +33,6 @@
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
urlhandle_detect_ext, urlhandle_detect_ext,
variadic,
) )
@ -232,6 +231,23 @@ class ArchiveOrgIE(InfoExtractor):
'release_date': '19950402', 'release_date': '19950402',
'timestamp': 1084927901, 'timestamp': 1084927901,
}, },
}, {
# metadata['metadata']['description'] is a list of strings instead of str
'url': 'https://archive.org/details/pra-KZ1908.02',
'info_dict': {
'id': 'pra-KZ1908.02',
'ext': 'mp3',
'display_id': 'KZ1908.02_01.wav',
'title': 'Crips and Bloods speak about gang life',
'description': 'md5:2b56b35ff021311e3554b47a285e70b3',
'uploader': 'jake@archive.org',
'duration': 1733.74,
'track': 'KZ1908.02 01',
'track_number': 1,
'timestamp': 1336026026,
'upload_date': '20120503',
'release_year': 1992,
},
}] }]
@staticmethod @staticmethod
@ -274,34 +290,40 @@ def _real_extract(self, url):
m = metadata['metadata'] m = metadata['metadata']
identifier = m['identifier'] identifier = m['identifier']
info = { info = traverse_obj(m, {
'title': ('title', {str}),
'description': ('description', ({str}, (..., all, {' '.join})), {clean_html}, filter, any),
'uploader': (('uploader', 'adder'), {str}, any),
'creators': ('creator', (None, ...), {str}, filter, all, filter),
'license': ('licenseurl', {url_or_none}),
'release_date': ('date', {unified_strdate}),
'timestamp': (('publicdate', 'addeddate'), {unified_timestamp}, any),
'location': ('venue', {str}),
'release_year': ('year', {int_or_none}),
})
info.update({
'id': identifier, 'id': identifier,
'title': m['title'],
'description': clean_html(m.get('description')),
'uploader': dict_get(m, ['uploader', 'adder']),
'creators': traverse_obj(m, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
'license': m.get('licenseurl'),
'release_date': unified_strdate(m.get('date')),
'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])),
'webpage_url': f'https://archive.org/details/{identifier}', 'webpage_url': f'https://archive.org/details/{identifier}',
'location': m.get('venue'), })
'release_year': int_or_none(m.get('year'))}
for f in metadata['files']: for f in metadata['files']:
if f['name'] in entries: if f['name'] in entries:
entries[f['name']] = merge_dicts(entries[f['name']], { entries[f['name']] = merge_dicts(entries[f['name']], {
'id': identifier + '/' + f['name'], 'id': identifier + '/' + f['name'],
'title': f.get('title') or f['name'], **traverse_obj(f, {
'display_id': f['name'], 'title': (('title', 'name'), {str}, any),
'description': clean_html(f.get('description')), 'display_id': ('name', {str}),
'creators': traverse_obj(f, ('creator', {variadic}, {lambda x: x[0] and list(x)})), 'description': ('description', ({str}, (..., all, {' '.join})), {clean_html}, filter, any),
'duration': parse_duration(f.get('length')), 'creators': ('creator', (None, ...), {str}, filter, all, filter),
'track_number': int_or_none(f.get('track')), 'duration': ('length', {parse_duration}),
'album': f.get('album'), 'track_number': ('track', {int_or_none}),
'discnumber': int_or_none(f.get('disc')), 'album': ('album', {str}),
'release_year': int_or_none(f.get('year'))}) 'discnumber': ('disc', {int_or_none}),
'release_year': ('year', {int_or_none}),
}),
})
entry = entries[f['name']] entry = entries[f['name']]
elif traverse_obj(f, 'original', expected_type=str) in entries: elif traverse_obj(f, ('original', {str})) in entries:
entry = entries[f['original']] entry = entries[f['original']]
else: else:
continue continue

View File

@ -175,13 +175,6 @@ def _download_playinfo(self, bvid, cid, headers=None, query=None):
else: else:
note = f'Downloading video formats for cid {cid}' note = f'Downloading video formats for cid {cid}'
# TODO: remove this patch once utils.networking.random_user_agent() is updated, see #13735
# playurl requests carrying old UA will be rejected
headers = {
'User-Agent': f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(118,138)}.0.0.0 Safari/537.36',
**(headers or {}),
}
return self._download_json( return self._download_json(
'https://api.bilibili.com/x/player/wbi/playurl', bvid, 'https://api.bilibili.com/x/player/wbi/playurl', bvid,
query=self._sign_wbi(params, bvid), headers=headers, note=note)['data'] query=self._sign_wbi(params, bvid), headers=headers, note=note)['data']

View File

@ -1,215 +0,0 @@
import functools
import re
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
smuggle_url,
unsmuggle_url,
url_or_none,
)
class EaglePlatformIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
eagleplatform:(?P<custom_host>[^/]+):|
https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
)
(?P<id>\d+)
'''
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1']
_TESTS = [{
# http://lenta.ru/news/2015/03/06/navalny/
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'info_dict': {
'id': '227304',
'ext': 'mp4',
'title': 'Навальный вышел на свободу',
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 87,
'view_count': int,
'age_limit': 0,
},
}, {
# http://muz-tv.ru/play/7129/
# http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
'url': 'eagleplatform:media.clipyou.ru:12820',
'md5': '358597369cf8ba56675c1df15e7af624',
'info_dict': {
'id': '12820',
'ext': 'mp4',
'title': "'O Sole Mio",
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 216,
'view_count': int,
},
'skip': 'Georestricted',
}, {
# referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/)
'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306',
'only_matching': True,
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
add_referer = functools.partial(smuggle_url, data={'referrer': url})
res = tuple(super()._extract_embed_urls(url, webpage))
if res:
return map(add_referer, res)
PLAYER_JS_RE = r'''
<script[^>]+
src=(?P<qjs>["\'])(?:https?:)?//(?P<host>(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs)
.+?
'''
# "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/)
mobj = re.search(
rf'''(?xs)
{PLAYER_JS_RE}
<div[^>]+
class=(?P<qclass>["\'])eagleplayer(?P=qclass)[^>]+
data-id=["\'](?P<id>\d+)
''', webpage)
if mobj is not None:
return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))]
# Generalization of "Javascript code usage", "Combined usage" and
# "Usage without attaching to DOM" embeddings (see
# http://dultonmedia.github.io/eplayer/)
mobj = re.search(
r'''(?xs)
%s
<script>
.+?
new\s+EaglePlayer\(
(?:[^,]+\s*,\s*)?
{
.+?
\bid\s*:\s*["\']?(?P<id>\d+)
.+?
}
\s*\)
.+?
</script>
''' % PLAYER_JS_RE, webpage) # noqa: UP031
if mobj is not None:
return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))]
@staticmethod
def _handle_error(response):
status = int_or_none(response.get('status', 200))
if status != 200:
raise ExtractorError(' '.join(response['errors']), expected=True)
def _download_json(self, url_or_request, video_id, *args, **kwargs):
try:
response = super()._download_json(
url_or_request, video_id, *args, **kwargs)
except ExtractorError as ee:
if isinstance(ee.cause, HTTPError):
response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id)
self._handle_error(response)
raise
return response
def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'):
return self._download_json(url_or_request, video_id, note)['data'][0]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = self._match_valid_url(url)
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
headers = {}
query = {
'id': video_id,
}
referrer = smuggled_data.get('referrer')
if referrer:
headers['Referer'] = referrer
query['referrer'] = referrer
player_data = self._download_json(
f'http://{host}/api/player_data', video_id,
headers=headers, query=query)
media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
title = media['title']
description = media.get('description')
thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:')
duration = int_or_none(media.get('duration'))
view_count = int_or_none(media.get('views'))
age_restriction = media.get('age_restriction')
age_limit = None
if age_restriction:
age_limit = 0 if age_restriction == 'allow_all' else 18
secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:')
formats = []
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
m3u8_formats_dict = {}
for f in m3u8_formats:
if f.get('height') is not None:
m3u8_formats_dict[f['height']] = f
mp4_data = self._download_json(
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
# http://lentaru.media.eagleplatform.com/player/player.js
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8),
video_id, 'Downloading mp4 JSON', fatal=False)
if mp4_data:
for format_id, format_url in mp4_data.get('data', {}).items():
if not url_or_none(format_url):
continue
height = int_or_none(format_id)
if height is not None and m3u8_formats_dict.get(height):
f = m3u8_formats_dict[height].copy()
f.update({
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
else:
f = {
'format_id': f'http-{format_id}',
'height': int_or_none(format_id),
}
f['url'] = format_url
formats.append(f)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'age_limit': age_limit,
'formats': formats,
}
class ClipYouEmbedIE(InfoExtractor):
_VALID_URL = False
@classmethod
def _extract_embed_urls(cls, url, webpage):
mobj = re.search(
r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
if mobj is not None:
yield smuggle_url('eagleplatform:{host}:{id}'.format(**mobj.groupdict()), {'referrer': url})

105
yt_dlp/extractor/faulio.py Normal file
View File

@ -0,0 +1,105 @@
import re
import urllib.parse
from .common import InfoExtractor
from ..utils import js_to_json, url_or_none
from ..utils.traversal import traverse_obj
class FaulioLiveIE(InfoExtractor):
_DOMAINS = (
'aloula.sba.sa',
'bahry.com',
'maraya.sba.net.ae',
'sat7plus.org',
)
_VALID_URL = fr'https?://(?:{"|".join(map(re.escape, _DOMAINS))})/(?:(?:en|ar|fa)/)?live/(?P<id>[a-zA-Z0-9-]+)'
_TESTS = [{
'url': 'https://aloula.sba.sa/live/saudiatv',
'info_dict': {
'id': 'aloula.faulio.com_saudiatv',
'title': str,
'description': str,
'ext': 'mp4',
'live_status': 'is_live',
},
'params': {
'skip_download': 'Livestream',
},
}, {
'url': 'https://bahry.com/live/1',
'info_dict': {
'id': 'bahry.faulio.com_1',
'title': str,
'description': str,
'ext': 'mp4',
'live_status': 'is_live',
},
'params': {
'skip_download': 'Livestream',
},
}, {
'url': 'https://maraya.sba.net.ae/live/1',
'info_dict': {
'id': 'maraya.faulio.com_1',
'title': str,
'description': str,
'ext': 'mp4',
'live_status': 'is_live',
},
'params': {
'skip_download': 'Livestream',
},
}, {
'url': 'https://sat7plus.org/live/pars',
'info_dict': {
'id': 'sat7.faulio.com_pars',
'title': str,
'description': str,
'ext': 'mp4',
'live_status': 'is_live',
},
'params': {
'skip_download': 'Livestream',
},
}, {
'url': 'https://sat7plus.org/fa/live/arabic',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
config_data = self._search_json(
r'window\.__NUXT__\.config=', webpage, 'config', video_id, transform_source=js_to_json)
api_base = config_data['public']['TRANSLATIONS_API_URL']
channel = traverse_obj(
self._download_json(f'{api_base}/channels', video_id),
(lambda k, v: v['url'] == video_id, any))
formats = []
subtitles = {}
if hls_url := traverse_obj(channel, ('streams', 'hls', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
hls_url, video_id, 'mp4', m3u8_id='hls', live=True, fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
if mpd_url := traverse_obj(channel, ('streams', 'mpd', {url_or_none})):
fmts, subs = self._extract_mpd_formats_and_subtitles(
mpd_url, video_id, mpd_id='dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'id': f'{urllib.parse.urlparse(api_base).hostname}_{video_id}',
**traverse_obj(channel, {
'title': ('title', {str}),
'description': ('description', {str}),
}),
'formats': formats,
'subtitles': subtitles,
'is_live': True,
}

View File

@ -1,9 +1,7 @@
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError,
float_or_none, float_or_none,
url_or_none, url_or_none,
) )
@ -58,16 +56,7 @@ class FrancaisFacileIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
display_id = urllib.parse.unquote(self._match_id(url)) display_id = urllib.parse.unquote(self._match_id(url))
webpage = self._download_webpage(url, display_id)
try: # yt-dlp's default user-agents are too old and blocked by the site
webpage = self._download_webpage(url, display_id, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
})
except ExtractorError as e:
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
raise
# Retry with impersonation if hardcoded UA is insufficient
webpage = self._download_webpage(url, display_id, impersonate=True)
data = self._search_json( data = self._search_json(
r'<script[^>]+\bdata-media-id=[^>]+\btype="application/json"[^>]*>', r'<script[^>]+\bdata-media-id=[^>]+\btype="application/json"[^>]*>',

View File

@ -1010,38 +1010,6 @@ class GenericIE(InfoExtractor):
}, },
'add_ie': ['Kaltura'], 'add_ie': ['Kaltura'],
}, },
# referrer protected EaglePlatform embed
{
'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
'info_dict': {
'id': '582306',
'ext': 'mp4',
'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 3382,
'view_count': int,
},
'params': {
'skip_download': True,
},
},
# ClipYou (EaglePlatform) embed (custom URL)
{
'url': 'http://muz-tv.ru/play/7129/',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'info_dict': {
'id': '12820',
'ext': 'mp4',
'title': "'O Sole Mio",
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 216,
'view_count': int,
},
'params': {
'skip_download': True,
},
'skip': 'This video is unavailable.',
},
# Pladform embed # Pladform embed
{ {
'url': 'http://muz-tv.ru/kinozal/view/7400/', 'url': 'http://muz-tv.ru/kinozal/view/7400/',

View File

@ -3,6 +3,7 @@
class LiveJournalIE(InfoExtractor): class LiveJournalIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P<id>\d+)' _VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272', 'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272',

View File

@ -79,7 +79,7 @@ class MiTeleIE(TelecincoBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_akamai_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
pre_player = self._search_json( pre_player = self._search_json(
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=', r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=',
webpage, 'Pre Player', display_id)['prePlayer'] webpage, 'Pre Player', display_id)['prePlayer']

View File

@ -34,7 +34,6 @@ class NetEaseMusicBaseIE(InfoExtractor):
'sky', # SVIP tier; 沉浸环绕声 (Surround Audio); flac 'sky', # SVIP tier; 沉浸环绕声 (Surround Audio); flac
) )
_API_BASE = 'http://music.163.com/api/' _API_BASE = 'http://music.163.com/api/'
_GEO_BYPASS = False
def _create_eapi_cipher(self, api_path, query_body, cookies): def _create_eapi_cipher(self, api_path, query_body, cookies):
request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':')) request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':'))
@ -64,6 +63,8 @@ def _download_eapi_json(self, path, video_id, query_body, headers={}, **kwargs):
'MUSIC_U': ('MUSIC_U', {lambda i: i.value}), 'MUSIC_U': ('MUSIC_U', {lambda i: i.value}),
}), }),
} }
if self._x_forwarded_for_ip:
headers.setdefault('X-Real-IP', self._x_forwarded_for_ip)
return self._download_json( return self._download_json(
urljoin('https://interface3.music.163.com/', f'/eapi{path}'), video_id, urljoin('https://interface3.music.163.com/', f'/eapi{path}'), video_id,
data=self._create_eapi_cipher(f'/api{path}', query_body, cookies), headers={ data=self._create_eapi_cipher(f'/api{path}', query_body, cookies), headers={

View File

@ -1,63 +1,63 @@
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import parse_duration, parse_iso8601, url_or_none
int_or_none, from ..utils.traversal import traverse_obj
try_get,
unified_timestamp,
)
class ParlviewIE(InfoExtractor): class ParlviewIE(InfoExtractor):
_WORKING = False _VALID_URL = r'https?://(?:www\.)?aph\.gov\.au/News_and_Events/Watch_Read_Listen/ParlView/video/(?P<id>[^/?#]+)'
_VALID_URL = r'https?://(?:www\.)?parlview\.aph\.gov\.au/(?:[^/]+)?\bvideoID=(?P<id>\d{6})'
_TESTS = [{ _TESTS = [{
'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=542661', 'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/3406614',
'info_dict': { 'info_dict': {
'id': '542661', 'id': '3406614',
'ext': 'mp4', 'ext': 'mp4',
'title': "Australia's Family Law System [Part 2]", 'title': 'Senate Chamber',
'duration': 5799, 'description': 'Official Recording of Senate Proceedings from the Australian Parliament',
'description': 'md5:7099883b391619dbae435891ca871a62', 'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/SenateParlview06.jpg',
'timestamp': 1621430700, 'upload_date': '20250325',
'upload_date': '20210519', 'duration': 17999,
'uploader': 'Joint Committee', 'timestamp': 1742939400,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=539936', 'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/SV1394.dv',
'only_matching': True, 'info_dict': {
'id': 'SV1394.dv',
'ext': 'mp4',
'title': 'Senate Select Committee on Uranium Mining and Milling [Part 1]',
'description': 'Official Recording of Senate Committee Proceedings from the Australian Parliament',
'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/CommitteeThumbnail06.jpg',
'upload_date': '19960822',
'duration': 14765,
'timestamp': 840754200,
},
'params': {
'skip_download': True,
},
}] }]
_API_URL = 'https://parlview.aph.gov.au/api_v3/1/playback/getUniversalPlayerConfig?videoID=%s&format=json'
_MEDIA_INFO_URL = 'https://parlview.aph.gov.au/ajaxPlayer.php?videoID=%s&tabNum=4&action=loadTab'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) video_details = self._download_json(
media = self._download_json(self._API_URL % video_id, video_id).get('media') f'https://vodapi.aph.gov.au/api/search/parlview/{video_id}', video_id)['videoDetails']
timestamp = try_get(media, lambda x: x['timeMap']['source']['timecode_offsets'][0], str) or '/'
stream = try_get(media, lambda x: x['renditions'][0], dict) formats, subtitles = self._extract_m3u8_formats_and_subtitles(
if not stream: video_details['files']['file']['url'], video_id, 'mp4')
self.raise_no_formats('No streams were detected')
elif stream.get('streamType') != 'VOD':
self.raise_no_formats('Unknown type of stream was detected: "{}"'.format(str(stream.get('streamType'))))
formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native')
media_info = self._download_webpage( DURATION_RE = re.compile(r'(?P<duration>\d+:\d+:\d+):\d+')
self._MEDIA_INFO_URL % video_id, video_id, note='Downloading media info', fatal=False)
return { return {
'id': video_id, 'id': video_id,
'url': url,
'title': self._html_search_regex(r'<h2>([^<]+)<', webpage, 'title', fatal=False),
'formats': formats, 'formats': formats,
'duration': int_or_none(media.get('duration')), 'subtitles': subtitles,
'timestamp': unified_timestamp(timestamp.split('/', 1)[1].replace('_', ' ')), **traverse_obj(video_details, {
'description': self._html_search_regex( 'title': (('parlViewTitle', 'title'), {str}, any),
r'<div[^>]+class="descripti?on"[^>]*>[^>]+<strong>[^>]+>[^>]+>([^<]+)', 'description': ('parlViewDescription', {str}),
webpage, 'description', fatal=False), 'duration': ('files', 'file', 'duration', {DURATION_RE.fullmatch}, 'duration', {parse_duration}),
'uploader': self._html_search_regex( 'timestamp': ('recordingFrom', {parse_iso8601}),
r'<td>[^>]+>Channel:[^>]+>([^<]+)', media_info, 'channel', fatal=False), 'thumbnail': ('thumbUrl', {url_or_none}),
'thumbnail': media.get('staticImage'), }),
} }

104
yt_dlp/extractor/plyr.py Normal file
View File

@ -0,0 +1,104 @@
import re
from .common import InfoExtractor
from .vimeo import VimeoIE
class PlyrEmbedIE(InfoExtractor):
_VALID_URL = False
_WEBPAGE_TESTS = [{
# data-plyr-embed-id="https://player.vimeo.com/video/522319456/90e5c96063?dnt=1"
'url': 'https://www.dhm.de/zeughauskino/filmreihen/online-filmreihen/filme-des-marshall-plans/200000000-mouths/',
'info_dict': {
'id': '522319456',
'ext': 'mp4',
'title': '200.000.000 Mouths (195051)',
'uploader': 'Zeughauskino',
'uploader_url': '',
'comment_count': int,
'like_count': int,
'duration': 963,
'thumbnail': 'https://i.vimeocdn.com/video/1081797161-9f09ddb4b7faa86e834e006b8e4b9c2cbaa0baa7da493211bf0796ae133a5ab8-d',
'timestamp': 1615467405,
'upload_date': '20210311',
'release_timestamp': 1615467405,
'release_date': '20210311',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, {
# data-plyr-provider="vimeo" data-plyr-embed-id="803435276"
'url': 'https://www.inarcassa.it/',
'info_dict': {
'id': '803435276',
'ext': 'mp4',
'title': 'HOME_Moto_Perpetuo',
'uploader': 'Inarcassa',
'uploader_url': '',
'duration': 38,
'thumbnail': 'https://i.vimeocdn.com/video/1663734769-945ad7ffabb16dbca009c023fd1d7b36bdb426a3dbae8345ed758136fe28f89a-d',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, {
# data-plyr-embed-id="https://youtu.be/GF-BjYKoAqI"
'url': 'https://www.profile.nl',
'info_dict': {
'id': 'GF-BjYKoAqI',
'ext': 'mp4',
'title': 'PROFILE: Recruitment Profile',
'description': '',
'media_type': 'video',
'uploader': 'Profile Nederland',
'uploader_id': '@profilenederland',
'uploader_url': 'https://www.youtube.com/@profilenederland',
'channel': 'Profile Nederland',
'channel_id': 'UC9AUkB0Tv39-TBYjs05n3vg',
'channel_url': 'https://www.youtube.com/channel/UC9AUkB0Tv39-TBYjs05n3vg',
'channel_follower_count': int,
'view_count': int,
'like_count': int,
'age_limit': 0,
'duration': 39,
'thumbnail': 'https://i.ytimg.com/vi/GF-BjYKoAqI/maxresdefault.jpg',
'categories': ['Autos & Vehicles'],
'tags': [],
'timestamp': 1675692990,
'upload_date': '20230206',
'playable_in_embed': True,
'availability': 'public',
'live_status': 'not_live',
},
}, {
# data-plyr-embed-id="B1TZV8rNZoc" data-plyr-provider="youtube"
'url': 'https://www.vnis.edu.vn',
'info_dict': {
'id': 'vnis.edu',
'title': 'VNIS Education - Master Agent các Trường hàng đầu Bắc Mỹ',
'description': 'md5:4dafcf7335bb018780e4426da8ab8e4e',
'age_limit': 0,
'thumbnail': 'https://vnis.edu.vn/wp-content/uploads/2021/05/ve-welcome-en.png',
'timestamp': 1753233356,
'upload_date': '20250723',
},
'playlist_count': 3,
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
plyr_embeds = re.finditer(r'''(?x)
<div[^>]+(?:
data-plyr-embed-id="(?P<id1>[^"]+)"[^>]+data-plyr-provider="(?P<provider1>[^"]+)"|
data-plyr-provider="(?P<provider2>[^"]+)"[^>]+data-plyr-embed-id="(?P<id2>[^"]+)"
)[^>]*>''', webpage)
for mobj in plyr_embeds:
embed_id = mobj.group('id1') or mobj.group('id2')
provider = mobj.group('provider1') or mobj.group('provider2')
if provider == 'vimeo':
if not re.match(r'https?://', embed_id):
embed_id = f'https://player.vimeo.com/video/{embed_id}'
yield VimeoIE._smuggle_referrer(embed_id, url)
elif provider == 'youtube':
if not re.match(r'https?://', embed_id):
embed_id = f'https://youtube.com/watch?v={embed_id}'
yield embed_id

View File

@ -6,9 +6,11 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
InAdvancePagedList,
clean_html, clean_html,
determine_ext, determine_ext,
float_or_none, float_or_none,
int_or_none,
make_archive_id, make_archive_id,
parse_iso8601, parse_iso8601,
qualities, qualities,
@ -371,3 +373,62 @@ def _real_extract(self, url):
raise ExtractorError('The webpage doesn\'t contain any video', expected=True) raise ExtractorError('The webpage doesn\'t contain any video', expected=True)
return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key()) return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key())
class RTVEProgramIE(RTVEBaseIE):
IE_NAME = 'rtve.es:program'
IE_DESC = 'RTVE.es programs'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/play/videos/(?P<id>[\w-]+)/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://www.rtve.es/play/videos/saber-vivir/',
'info_dict': {
'id': '111570',
'title': 'Saber vivir - Programa de ciencia y futuro en RTVE Play',
},
'playlist_mincount': 400,
}]
_PAGE_SIZE = 60
def _fetch_page(self, program_id, page_num):
return self._download_json(
f'https://www.rtve.es/api/programas/{program_id}/videos',
program_id, note=f'Downloading page {page_num}',
query={
'type': 39816,
'page': page_num,
'size': 60,
})
def _entries(self, page_data):
for video in traverse_obj(page_data, ('page', 'items', lambda _, v: url_or_none(v['htmlUrl']))):
yield self.url_result(
video['htmlUrl'], RTVEALaCartaIE, url_transparent=True,
**traverse_obj(video, {
'id': ('id', {str}),
'title': ('longTitle', {str}),
'description': ('shortDescription', {str}),
'duration': ('duration', {float_or_none(scale=1000)}),
'series': (('programInfo', 'title'), {str}, any),
'season_number': ('temporadaOrden', {int_or_none}),
'season_id': ('temporadaId', {str}),
'season': ('temporada', {str}),
'episode_number': ('episode', {int_or_none}),
'episode': ('title', {str}),
'thumbnail': ('thumbnail', {url_or_none}),
}),
)
def _real_extract(self, url):
program_slug = self._match_id(url)
program_page = self._download_webpage(url, program_slug)
program_id = self._html_search_meta('DC.identifier', program_page, 'Program ID', fatal=True)
first_page = self._fetch_page(program_id, 1)
page_count = traverse_obj(first_page, ('page', 'totalPages', {int})) or 1
entries = InAdvancePagedList(
lambda idx: self._entries(self._fetch_page(program_id, idx + 1) if idx else first_page),
page_count, self._PAGE_SIZE)
return self.playlist_result(entries, program_id, self._html_extract_title(program_page))

View File

@ -8,84 +8,9 @@
class SportDeutschlandIE(InfoExtractor): class SportDeutschlandIE(InfoExtractor):
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)' _VALID_URL = r'https?://(?:player\.)?sportdeutschland\.tv/(?P<id>(?:[^/?#]+/)?[^?#/&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport/buchholzer-formationswochenende-2023-samstag-1-bundesliga-landesliga', # Single-part video, direct link
'info_dict': {
'id': '9839a5c7-0dbb-48a8-ab63-3b408adc7b54',
'ext': 'mp4',
'title': 'Buchholzer Formationswochenende 2023 - Samstag - 1. Bundesliga / Landesliga',
'display_id': 'blauweissbuchholztanzsport/buchholzer-formationswochenende-2023-samstag-1-bundesliga-landesliga',
'description': 'md5:a288c794a5ee69e200d8f12982f81a87',
'live_status': 'was_live',
'channel': 'Blau-Weiss Buchholz Tanzsport',
'channel_url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport',
'channel_id': '93ec33c9-48be-43b6-b404-e016b64fdfa3',
'duration': 32447,
'upload_date': '20230114',
'timestamp': 1673733618,
},
'skip': 'not found',
}, {
'url': 'https://sportdeutschland.tv/deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0',
'info_dict': {
'id': '95c80c52-6b9a-4ae9-9197-984145adfced',
'ext': 'mp4',
'title': 'BWF Tour: 1. Runde Feld 1 - YONEX GAINWARD German Open 2022',
'display_id': 'deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0',
'description': 'md5:2afb5996ceb9ac0b2ac81f563d3a883e',
'live_status': 'was_live',
'channel': 'Deutscher Badminton Verband',
'channel_url': 'https://sportdeutschland.tv/deutscherbadmintonverband',
'channel_id': '93ca5866-2551-49fc-8424-6db35af58920',
'duration': 41097,
'upload_date': '20220309',
'timestamp': 1646860727.0,
},
'skip': 'not found',
}, {
'url': 'https://sportdeutschland.tv/ggcbremen/formationswochenende-latein-2023',
'info_dict': {
'id': '9889785e-55b0-4d97-a72a-ce9a9f157cce',
'title': 'Formationswochenende Latein 2023 - Samstag',
'display_id': 'ggcbremen/formationswochenende-latein-2023',
'description': 'md5:6e4060d40ff6a8f8eeb471b51a8f08b2',
'live_status': 'was_live',
'channel': 'Grün-Gold-Club Bremen e.V.',
'channel_id': '9888f04e-bb46-4c7f-be47-df960a4167bb',
'channel_url': 'https://sportdeutschland.tv/ggcbremen',
},
'playlist_count': 3,
'playlist': [{
'info_dict': {
'id': '988e1fea-9d44-4fab-8c72-3085fb667547',
'ext': 'mp4',
'channel_url': 'https://sportdeutschland.tv/ggcbremen',
'channel_id': '9888f04e-bb46-4c7f-be47-df960a4167bb',
'channel': 'Grün-Gold-Club Bremen e.V.',
'duration': 86,
'title': 'Formationswochenende Latein 2023 - Samstag Part 1',
'upload_date': '20230225',
'timestamp': 1677349909,
'live_status': 'was_live',
},
}],
'skip': 'not found',
}, {
'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1',
'info_dict': {
'id': '95d71b8a-370a-4b87-ad16-94680da18528',
'ext': 'mp4',
'title': r're:Gymnastik International - Tag 1 .+',
'display_id': 'dtb/gymnastik-international-tag-1',
'channel_id': '936ecef1-2f4a-4e08-be2f-68073cb7ecab',
'channel': 'Deutscher Turner-Bund',
'channel_url': 'https://sportdeutschland.tv/dtb',
'description': 'md5:07a885dde5838a6f0796ee21dc3b0c52',
'live_status': 'is_live',
},
'skip': 'live',
}, {
'url': 'https://sportdeutschland.tv/rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates', 'url': 'https://sportdeutschland.tv/rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates',
'md5': '35c11a19395c938cdd076b93bda54cde', 'md5': '35c11a19395c938cdd076b93bda54cde',
'info_dict': { 'info_dict': {
@ -100,7 +25,82 @@ class SportDeutschlandIE(InfoExtractor):
'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b', 'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b',
'timestamp': 1749913117, 'timestamp': 1749913117,
'upload_date': '20250614', 'upload_date': '20250614',
'duration': 12287.0,
}, },
}, {
# Single-part video, embedded player link
'url': 'https://player.sportdeutschland.tv/9e9619c4-7d77-43c4-926d-49fb57dc06dc',
'info_dict': {
'id': '9f27a97d-1544-4d0b-aa03-48d92d17a03a',
'ext': 'mp4',
'title': 'GFL2: Rostock Griffins vs. Elmshorn Fighting Pirates',
'display_id': '9e9619c4-7d77-43c4-926d-49fb57dc06dc',
'channel': 'Rostock Griffins',
'channel_url': 'https://sportdeutschland.tv/rostock-griffins',
'live_status': 'was_live',
'description': 'md5:60cb00067e55dafa27b0933a43d72862',
'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b',
'timestamp': 1749913117,
'upload_date': '20250614',
'duration': 12287.0,
},
'params': {'skip_download': True},
}, {
# Multi-part video
'url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2',
'info_dict': {
'id': '9f63d737-2444-4e3a-a1ea-840df73fd481',
'display_id': 'rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2',
'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2',
'description': 'md5:0a17da15e48a687e6019639c3452572b',
'channel': 'Rhine-Ruhr 2025 FISU World University Games',
'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334',
'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games',
'live_status': 'was_live',
},
'playlist_count': 2,
'playlist': [{
'info_dict': {
'id': '9f725a94-d43e-40ff-859d-13da3081bb04',
'ext': 'mp4',
'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2 Part 1',
'channel': 'Rhine-Ruhr 2025 FISU World University Games',
'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334',
'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games',
'duration': 14773.0,
'timestamp': 1753085197,
'upload_date': '20250721',
'live_status': 'was_live',
},
}, {
'info_dict': {
'id': '9f725a94-370e-4477-89ac-1751098e3217',
'ext': 'mp4',
'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2 Part 2',
'channel': 'Rhine-Ruhr 2025 FISU World University Games',
'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334',
'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games',
'duration': 14773.0,
'timestamp': 1753128421,
'upload_date': '20250721',
'live_status': 'was_live',
},
}],
}, {
# Livestream
'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1',
'info_dict': {
'id': '95d71b8a-370a-4b87-ad16-94680da18528',
'ext': 'mp4',
'title': r're:Gymnastik International - Tag 1 .+',
'display_id': 'dtb/gymnastik-international-tag-1',
'channel_id': '936ecef1-2f4a-4e08-be2f-68073cb7ecab',
'channel': 'Deutscher Turner-Bund',
'channel_url': 'https://sportdeutschland.tv/dtb',
'description': 'md5:07a885dde5838a6f0796ee21dc3b0c52',
'live_status': 'is_live',
},
'skip': 'live',
}] }]
def _process_video(self, asset_id, video): def _process_video(self, asset_id, video):

View File

@ -99,10 +99,10 @@ def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( webpage = self._download_webpage(
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}), impersonate=True) url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}))
data = self._search_json( data = self._search_json(
r'(?:var|const|let)\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id, r'(?:var|const|let)\s+(?:dat|(?:player|video)Info|)\s*=\s*["\']', webpage, 'player info',
contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];', video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];',
transform_source=lambda x: base64.b64decode(x).decode()) transform_source=lambda x: base64.b64decode(x).decode())
# SproutVideo may send player info for 'SMPTE Color Monitor Test' [a791d7b71b12ecc52e] # SproutVideo may send player info for 'SMPTE Color Monitor Test' [a791d7b71b12ecc52e]

View File

@ -33,16 +33,20 @@ def _extract_from_streaks_api(self, project_id, media_id, headers=None, query=No
**(headers or {}), **(headers or {}),
}) })
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status in {403, 404}: if isinstance(e.cause, HTTPError) and e.cause.status in (403, 404):
error = self._parse_json(e.cause.response.read().decode(), media_id, fatal=False) error = self._parse_json(e.cause.response.read().decode(), media_id, fatal=False)
message = traverse_obj(error, ('message', {str})) message = traverse_obj(error, ('message', {str}))
code = traverse_obj(error, ('code', {str})) code = traverse_obj(error, ('code', {str}))
error_id = traverse_obj(error, ('id', {int}))
if code == 'REQUEST_FAILED': if code == 'REQUEST_FAILED':
self.raise_geo_restricted(message, countries=self._GEO_COUNTRIES) if error_id == 124:
elif code == 'MEDIA_NOT_FOUND': self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
raise ExtractorError(message, expected=True) elif error_id == 126:
elif code or message: raise ExtractorError('Access is denied (possibly due to invalid/missing API key)')
raise ExtractorError(join_nonempty(code, message, delim=': ')) if code == 'MEDIA_NOT_FOUND':
raise ExtractorError(join_nonempty(code, message, delim=': '), expected=True)
if code or message:
raise ExtractorError(join_nonempty(code, error_id, message, delim=': '))
raise raise
streaks_id = response['id'] streaks_id = response['id']

View File

@ -5,45 +5,110 @@
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
int_or_none, int_or_none,
make_archive_id,
strip_or_none, strip_or_none,
) )
from ..utils.traversal import traverse_obj
class TBSIE(TurnerBaseIE): class TBSIE(TurnerBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|watchtnt|watchtbs|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))' _SITE_INFO = {
'tbs': ('TBS', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJkZTA0NTYxZS1iMTFhLTRlYTgtYTg5NC01NjI3MGM1NmM2MWIiLCJuYmYiOjE1MzcxODkzOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTg5MzkwfQ.Z7ny66kaqNDdCHf9Y9KsV12LrBxrLkGGxlYe2XGm6qsw2T-k1OCKC1TMzeqiZP735292MMRAQkcJDKrMIzNbAuf9nCdIcv4kE1E2nqUnjPMBduC1bHffZp8zlllyrN2ElDwM8Vhwv_5nElLRwWGEt0Kaq6KJAMZA__WDxKWC18T-wVtsOZWXQpDqO7nByhfj2t-Z8c3TUNVsA_wHgNXlkzJCZ16F2b7yGLT5ZhLPupOScd3MXC5iPh19HSVIok22h8_F_noTmGzmMnIRQi6bWYWK2zC7TQ_MsYHfv7V6EaG5m1RKZTV6JAwwoJQF_9ByzarLV1DGwZxD9-eQdqswvg'),
'tntdrama': ('TNT', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwOTMxYTU4OS1jZjEzLTRmNjMtYTJmYy03MzhjMjE1NWU5NjEiLCJuYmYiOjE1MzcxOTA4MjcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODI3fQ.AucKvtws7oekTXi80_zX4-BlgJD9GLvlOI9FlBCjdlx7Pa3eJ0AqbogynKMiatMbnLOTMHGjd7tTiq422unmZjBz70dhePAe9BbW0dIo7oQ57vZ-VBYw_tWYRPmON61MwAbLVlqROD3n_zURs85S8TlkQx9aNx9x_riGGELjd8l05CVa_pOluNhYvuIFn6wmrASOKI1hNEblBDWh468UWP571-fe4zzi0rlYeeHd-cjvtWvOB3bQsWrUVbK4pRmqvzEH59j0vNF-ihJF9HncmUicYONe47Mib3elfMok23v4dB1_UAlQY_oawfNcynmEnJQCcqFmbHdEwTW6gMiYsA'),
'trutv': ('truTV', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhYzQyOTkwMi0xMDYzLTQyNTQtYWJlYS1iZTY2ODM4MTVmZGIiLCJuYmYiOjE1MzcxOTA4NjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODY4fQ.ewXl5LDMDvvx3nDXV4jCdSwUq_sOluKoOVsIjznAo6Zo4zrGe9rjlZ9DOmQKW66g6VRMexJsJ5vM1EkY8TC5-YcQw_BclK1FPGO1rH3Wf7tX_l0b1BVbSJQKIj9UgqDp_QbGcBXz24kN4So3U22mhs6di9PYyyfG68ccKL2iRprcVKWCslIHwUF-T7FaEqb0K57auilxeW1PONG2m-lIAcZ62DUwqXDWvw0CRoWI08aVVqkkhnXaSsQfLs5Ph1Pfh9Oq3g_epUm9Ss45mq6XM7gbOb5omTcKLADRKK-PJVB_JXnZnlsXbG0ttKE1cTKJ738qu7j4aipYTf-W0nKF5Q'),
}
_VALID_URL = fr'''(?x)
https?://(?:www\.)?(?P<site>{"|".join(map(re.escape, _SITE_INFO))})\.com
(?P<path>/(?:
(?P<watch>watch(?:tnt|tbs|trutv))|
movies|shows/[^/?#]+/(?:clips|season-\d+/episode-\d+)
)/(?P<id>[^/?#]+))
'''
_TESTS = [{ _TESTS = [{
'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster', 'url': 'https://www.tbs.com/shows/american-dad/season-6/episode-12/you-debt-your-life',
'info_dict': { 'info_dict': {
'id': '8d384cde33b89f3a43ce5329de42903ed5099887', 'id': '984bdcd8db0cc00dc699927f2a411c8c6e0e48f3',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Monster', 'title': 'You Debt Your Life',
'description': 'Get a first look at the theatrical trailer for TNTs highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.', 'description': 'md5:f211cfeb9187fd3cdb53eb0e8930d499',
'timestamp': 1508175329, 'duration': 1231.0,
'upload_date': '20171016', 'thumbnail': r're:https://images\.tbs\.com/tbs/.+\.(?:jpe?g|png)',
'chapters': 'count:4',
'season': 'Season 6',
'season_number': 6,
'episode': 'Episode 12',
'episode_number': 12,
'timestamp': 1478276239,
'upload_date': '20161104',
}, },
'params': { 'params': {'skip_download': 'm3u8'},
# m3u8 download }, {
'skip_download': True, 'url': 'https://www.tntdrama.com/shows/the-librarians-the-next-chapter/season-1/episode-10/and-going-medieval',
'info_dict': {
'id': 'e487b31b663a8001864f62fd20907782f7b8ccb8',
'ext': 'mp4',
'title': 'And Going Medieval',
'description': 'md5:5aed0ae23a6cf148a02fe3c1be8359fa',
'duration': 2528.0,
'thumbnail': r're:https://images\.tntdrama\.com/tnt/.+\.(?:jpe?g|png)',
'chapters': 'count:7',
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 10',
'episode_number': 10,
'timestamp': 1743107520,
'upload_date': '20250327',
}, },
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.trutv.com/shows/the-carbonaro-effect/season-1/episode-1/got-the-bug-out',
'info_dict': {
'id': 'b457dd7458fd9e64b596355950b13a1ca799dc39',
'ext': 'mp4',
'title': 'Got the Bug Out',
'description': 'md5:9eeddf6248f73517b0e5969b8a43c025',
'duration': 1283.0,
'thumbnail': r're:https://images\.trutv\.com/tru/.+\.(?:jpe?g|png)',
'chapters': 'count:4',
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 1',
'episode_number': 1,
'timestamp': 1570040829,
'upload_date': '20191002',
'_old_archive_ids': ['trutv b457dd7458fd9e64b596355950b13a1ca799dc39'],
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
'only_matching': True,
}, { }, {
'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew', 'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope', 'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.trutv.com/shows/impractical-jokers/season-9/episode-1/you-dirty-dog',
'only_matching': True,
}, {
'url': 'https://www.trutv.com/watchtrutv/east',
'only_matching': True,
}, {
'url': 'https://www.tbs.com/watchtbs/east',
'only_matching': True,
}, {
'url': 'https://www.tntdrama.com/watchtnt/east',
'only_matching': True,
}] }]
_SOFTWARE_STATEMENT_MAP = {
'tbs': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJkZTA0NTYxZS1iMTFhLTRlYTgtYTg5NC01NjI3MGM1NmM2MWIiLCJuYmYiOjE1MzcxODkzOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTg5MzkwfQ.Z7ny66kaqNDdCHf9Y9KsV12LrBxrLkGGxlYe2XGm6qsw2T-k1OCKC1TMzeqiZP735292MMRAQkcJDKrMIzNbAuf9nCdIcv4kE1E2nqUnjPMBduC1bHffZp8zlllyrN2ElDwM8Vhwv_5nElLRwWGEt0Kaq6KJAMZA__WDxKWC18T-wVtsOZWXQpDqO7nByhfj2t-Z8c3TUNVsA_wHgNXlkzJCZ16F2b7yGLT5ZhLPupOScd3MXC5iPh19HSVIok22h8_F_noTmGzmMnIRQi6bWYWK2zC7TQ_MsYHfv7V6EaG5m1RKZTV6JAwwoJQF_9ByzarLV1DGwZxD9-eQdqswvg',
'tntdrama': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwOTMxYTU4OS1jZjEzLTRmNjMtYTJmYy03MzhjMjE1NWU5NjEiLCJuYmYiOjE1MzcxOTA4MjcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODI3fQ.AucKvtws7oekTXi80_zX4-BlgJD9GLvlOI9FlBCjdlx7Pa3eJ0AqbogynKMiatMbnLOTMHGjd7tTiq422unmZjBz70dhePAe9BbW0dIo7oQ57vZ-VBYw_tWYRPmON61MwAbLVlqROD3n_zURs85S8TlkQx9aNx9x_riGGELjd8l05CVa_pOluNhYvuIFn6wmrASOKI1hNEblBDWh468UWP571-fe4zzi0rlYeeHd-cjvtWvOB3bQsWrUVbK4pRmqvzEH59j0vNF-ihJF9HncmUicYONe47Mib3elfMok23v4dB1_UAlQY_oawfNcynmEnJQCcqFmbHdEwTW6gMiYsA',
}
def _real_extract(self, url): def _real_extract(self, url):
site, path, display_id = self._match_valid_url(url).groups() site, path, display_id, watch = self._match_valid_url(url).group('site', 'path', 'id', 'watch')
is_live = bool(watch)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
drupal_settings = self._parse_json(self._search_regex( drupal_settings = self._search_json(
r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>', r'<script\b[^>]+\bdata-drupal-selector="drupal-settings-json"[^>]*>',
webpage, 'drupal setting'), display_id) webpage, 'drupal settings', display_id)
is_live = 'watchtnt' in path or 'watchtbs' in path
video_data = next(v for v in drupal_settings['turner_playlist'] if is_live or v.get('url') == path) video_data = next(v for v in drupal_settings['turner_playlist'] if is_live or v.get('url') == path)
media_id = video_data['mediaID'] media_id = video_data['mediaID']
@ -51,10 +116,14 @@ def _real_extract(self, url):
tokenizer_query = urllib.parse.parse_qs(urllib.parse.urlparse( tokenizer_query = urllib.parse.parse_qs(urllib.parse.urlparse(
drupal_settings['ngtv_token_url']).query) drupal_settings['ngtv_token_url']).query)
auth_info = traverse_obj(drupal_settings, ('top2', {dict})) or {}
site_name = auth_info.get('siteName') or self._SITE_INFO[site][0]
software_statement = auth_info.get('softwareStatement') or self._SITE_INFO[site][1]
info = self._extract_ngtv_info( info = self._extract_ngtv_info(
media_id, tokenizer_query, self._SOFTWARE_STATEMENT_MAP[site], { media_id, tokenizer_query, software_statement, {
'url': url, 'url': url,
'site_name': site[:3].upper(), 'site_name': site_name,
'auth_required': video_data.get('authRequired') == '1' or is_live, 'auth_required': video_data.get('authRequired') == '1' or is_live,
'is_live': is_live, 'is_live': is_live,
}) })
@ -87,4 +156,6 @@ def _real_extract(self, url):
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'is_live': is_live, 'is_live': is_live,
}) })
if site == 'trutv':
info['_old_archive_ids'] = [make_archive_id(site, media_id)]
return info return info

View File

@ -63,17 +63,6 @@ def _parse_content(self, content, url):
'http_headers': headers, 'http_headers': headers,
} }
def _download_akamai_webpage(self, url, display_id):
try: # yt-dlp's default user-agents are too old and blocked by akamai
return self._download_webpage(url, display_id, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
})
except ExtractorError as e:
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
raise
# Retry with impersonation if hardcoded UA is insufficient to bypass akamai
return self._download_webpage(url, display_id, impersonate=True)
class TelecincoIE(TelecincoBaseIE): class TelecincoIE(TelecincoBaseIE):
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
@ -151,7 +140,7 @@ class TelecincoIE(TelecincoBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_akamai_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
article = self._search_json( article = self._search_json(
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=', r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
webpage, 'article', display_id)['article'] webpage, 'article', display_id)['article']

View File

@ -1,71 +0,0 @@
from .turner import TurnerBaseIE
from ..utils import (
int_or_none,
parse_iso8601,
)
class TruTVIE(TurnerBaseIE):
_VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P<series_slug>[0-9A-Za-z-]+)/(?:videos/(?P<clip_slug>[0-9A-Za-z-]+)|(?P<id>\d+))'
_TEST = {
'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html',
'info_dict': {
'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1',
'ext': 'mp4',
'title': 'Sunlight-Activated Flower',
'description': "A customer is stunned when he sees Michael's sunlight-activated flower.",
},
'params': {
# m3u8 download
'skip_download': True,
},
}
_SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhYzQyOTkwMi0xMDYzLTQyNTQtYWJlYS1iZTY2ODM4MTVmZGIiLCJuYmYiOjE1MzcxOTA4NjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODY4fQ.ewXl5LDMDvvx3nDXV4jCdSwUq_sOluKoOVsIjznAo6Zo4zrGe9rjlZ9DOmQKW66g6VRMexJsJ5vM1EkY8TC5-YcQw_BclK1FPGO1rH3Wf7tX_l0b1BVbSJQKIj9UgqDp_QbGcBXz24kN4So3U22mhs6di9PYyyfG68ccKL2iRprcVKWCslIHwUF-T7FaEqb0K57auilxeW1PONG2m-lIAcZ62DUwqXDWvw0CRoWI08aVVqkkhnXaSsQfLs5Ph1Pfh9Oq3g_epUm9Ss45mq6XM7gbOb5omTcKLADRKK-PJVB_JXnZnlsXbG0ttKE1cTKJ738qu7j4aipYTf-W0nKF5Q'
def _real_extract(self, url):
series_slug, clip_slug, video_id = self._match_valid_url(url).groups()
if video_id:
path = 'episode'
display_id = video_id
else:
path = 'series/clip'
display_id = clip_slug
data = self._download_json(
f'https://api.trutv.com/v2/web/{path}/{series_slug}/{display_id}',
display_id)
video_data = data['episode'] if video_id else data['info']
media_id = video_data['mediaId']
title = video_data['title'].strip()
info = self._extract_ngtv_info(
media_id, {}, self._SOFTWARE_STATEMENT, {
'url': url,
'site_name': 'truTV',
'auth_required': video_data.get('isAuthRequired'),
})
thumbnails = []
for image in video_data.get('images', []):
image_url = image.get('srcUrl')
if not image_url:
continue
thumbnails.append({
'url': image_url,
'width': int_or_none(image.get('width')),
'height': int_or_none(image.get('height')),
})
info.update({
'id': media_id,
'display_id': display_id,
'title': title,
'description': video_data.get('description'),
'thumbnails': thumbnails,
'timestamp': parse_iso8601(video_data.get('publicationDate')),
'series': video_data.get('showTitle'),
'season_number': int_or_none(video_data.get('seasonNum')),
'episode_number': int_or_none(video_data.get('episodeNum')),
})
return info

View File

@ -251,6 +251,11 @@ def _extract_ngtv_info(self, media_id, tokenizer_query, software_statement, ap_d
'end_time': start_time + chapter_duration, 'end_time': start_time + chapter_duration,
}) })
if is_live:
for f in formats:
# Prevent ffmpeg from adding its own http headers or else we get HTTP Error 403
f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-icy', '0']}
return { return {
'formats': formats, 'formats': formats,
'chapters': chapters, 'chapters': chapters,

View File

@ -1,12 +1,16 @@
import datetime as dt
from .streaks import StreaksBaseIE from .streaks import StreaksBaseIE
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
GeoRestrictedError,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
make_archive_id, make_archive_id,
smuggle_url, smuggle_url,
str_or_none, str_or_none,
strip_or_none, strip_or_none,
time_seconds,
update_url_query, update_url_query,
) )
from ..utils.traversal import require, traverse_obj from ..utils.traversal import require, traverse_obj
@ -96,6 +100,7 @@ class TVerIE(StreaksBaseIE):
'Referer': 'https://tver.jp/', 'Referer': 'https://tver.jp/',
} }
_PLATFORM_QUERY = {} _PLATFORM_QUERY = {}
_STREAKS_API_INFO = {}
def _real_initialize(self): def _real_initialize(self):
session_info = self._download_json( session_info = self._download_json(
@ -105,6 +110,9 @@ def _real_initialize(self):
'platform_uid': 'platform_uid', 'platform_uid': 'platform_uid',
'platform_token': 'platform_token', 'platform_token': 'platform_token',
})) }))
self._STREAKS_API_INFO = self._download_json(
'https://player.tver.jp/player/streaks_info_v2.json', None,
'Downloading STREAKS API info', 'Unable to download STREAKS API info')
def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None): def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None):
return self._download_json( return self._download_json(
@ -219,15 +227,26 @@ def _real_extract(self, url):
'_type': 'url_transparent', '_type': 'url_transparent',
'url': smuggle_url( 'url': smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, brightcove_id), self.BRIGHTCOVE_URL_TEMPLATE % (account_id, brightcove_id),
{'geo_countries': ['JP']}), {'geo_countries': self._GEO_COUNTRIES}),
'ie_key': 'BrightcoveNew', 'ie_key': 'BrightcoveNew',
} }
return { project_id = video_info['streaks']['projectID']
**self._extract_from_streaks_api(video_info['streaks']['projectID'], streaks_id, { key_idx = dt.datetime.fromtimestamp(time_seconds(hours=9), dt.timezone.utc).month % 6 or 6
try:
streaks_info = self._extract_from_streaks_api(project_id, streaks_id, {
'Origin': 'https://tver.jp', 'Origin': 'https://tver.jp',
'Referer': 'https://tver.jp/', 'Referer': 'https://tver.jp/',
}), 'X-Streaks-Api-Key': self._STREAKS_API_INFO[project_id]['api_key'][f'key0{key_idx}'],
})
except GeoRestrictedError as e:
# Catch and re-raise with metadata_available to support --ignore-no-formats-error
self.raise_geo_restricted(e.orig_msg, countries=self._GEO_COUNTRIES, metadata_available=True)
streaks_info = {}
return {
**streaks_info,
**metadata, **metadata,
'id': video_id, 'id': video_id,
'_old_archive_ids': [make_archive_id('BrightcoveNew', brightcove_id)] if brightcove_id else None, '_old_archive_ids': [make_archive_id('BrightcoveNew', brightcove_id)] if brightcove_id else None,

View File

@ -10,12 +10,15 @@
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
) )
from ..utils.traversal import find_element, traverse_obj from ..utils.traversal import find_element, find_elements, traverse_obj
class TvwIE(InfoExtractor): class TvwIE(InfoExtractor):
IE_NAME = 'tvw' IE_NAME = 'tvw'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)' _VALID_URL = [
r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)',
r'https?://(?:www\.)?tvw\.org/watch/?\?(?:[^#]+&)?eventID=(?P<id>\d+)',
]
_TESTS = [{ _TESTS = [{
'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/', 'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/',
'md5': '9ceb94fe2bb7fd726f74f16356825703', 'md5': '9ceb94fe2bb7fd726f74f16356825703',
@ -75,6 +78,20 @@ class TvwIE(InfoExtractor):
'display_id': 'washington-to-washington-a-new-space-race-2022041111', 'display_id': 'washington-to-washington-a-new-space-race-2022041111',
'categories': ['Washington to Washington', 'General Interest'], 'categories': ['Washington to Washington', 'General Interest'],
}, },
}, {
'url': 'https://tvw.org/watch?eventID=2025041235',
'md5': '7d697c02f110b37d6a47622ea608ca90',
'info_dict': {
'id': '2025041235',
'ext': 'mp4',
'title': 'Legislative Review - Medicaid Postpartum Bill Sparks Debate & Senate Approves Automatic Voter Registration',
'thumbnail': r're:^https?://.*\.(?:jpe?g|png)$',
'description': 'md5:37d0f3a9187ae520aac261b3959eaee6',
'timestamp': 1745006400,
'upload_date': '20250418',
'location': 'Hayner Media Center',
'categories': ['Legislative Review'],
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -125,6 +142,41 @@ def _real_extract(self, url):
} }
class TvwNewsIE(InfoExtractor):
IE_NAME = 'tvw:news'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/\d{4}/\d{2}/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://tvw.org/2024/01/the-impact-issues-to-watch-in-the-2024-legislative-session/',
'info_dict': {
'id': 'the-impact-issues-to-watch-in-the-2024-legislative-session',
'title': 'The Impact - Issues to Watch in the 2024 Legislative Session',
'description': 'md5:65f0b33ec8f18ff1cd401c5547aa5441',
},
'playlist_count': 6,
}, {
'url': 'https://tvw.org/2024/06/the-impact-water-rights-and-the-skookumchuck-dam-debate/',
'info_dict': {
'id': 'the-impact-water-rights-and-the-skookumchuck-dam-debate',
'title': 'The Impact - Water Rights and the Skookumchuck Dam Debate',
'description': 'md5:185f3a2350ef81e3fa159ac3e040a94b',
},
'playlist_count': 1,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
video_ids = traverse_obj(webpage, (
{find_elements(cls='invintus-player', html=True)}, ..., {extract_attributes}, 'data-eventid'))
return self.playlist_from_matches(
video_ids, playlist_id,
playlist_title=remove_end(self._og_search_title(webpage, default=None), ' - TVW'),
playlist_description=self._og_search_description(webpage, default=None),
getter=lambda x: f'https://tvw.org/watch?eventID={x}', ie=TvwIE)
class TvwTvChannelsIE(InfoExtractor): class TvwTvChannelsIE(InfoExtractor):
IE_NAME = 'tvw:tvchannels' IE_NAME = 'tvw:tvchannels'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/tvchannels/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?tvw\.org/tvchannels/(?P<id>[^/?#]+)'

View File

@ -49,7 +49,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'Cannot download embed-only video without embedding URL. Please call yt-dlp ' 'Cannot download embed-only video without embedding URL. Please call yt-dlp '
'with the URL of the page that embeds this video.') 'with the URL of the page that embeds this video.')
_DEFAULT_CLIENT = 'android' _DEFAULT_CLIENT = 'web'
_DEFAULT_AUTHED_CLIENT = 'web' _DEFAULT_AUTHED_CLIENT = 'web'
_CLIENT_HEADERS = { _CLIENT_HEADERS = {
'Accept': 'application/vnd.vimeo.*+json; version=3.4.10', 'Accept': 'application/vnd.vimeo.*+json; version=3.4.10',
@ -58,7 +58,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
_CLIENT_CONFIGS = { _CLIENT_CONFIGS = {
'android': { 'android': {
'CACHE_KEY': 'oauth-token-android', 'CACHE_KEY': 'oauth-token-android',
'CACHE_ONLY': False, 'CACHE_ONLY': True,
'VIEWER_JWT': False, 'VIEWER_JWT': False,
'REQUIRES_AUTH': False, 'REQUIRES_AUTH': False,
'AUTH': 'NzRmYTg5YjgxMWExY2JiNzUwZDg1MjhkMTYzZjQ4YWYyOGEyZGJlMTp4OGx2NFd3QnNvY1lkamI2UVZsdjdDYlNwSDUrdm50YzdNNThvWDcwN1JrenJGZC9tR1lReUNlRjRSVklZeWhYZVpRS0tBcU9YYzRoTGY2Z1dlVkJFYkdJc0dMRHpoZWFZbU0reDRqZ1dkZ1diZmdIdGUrNUM5RVBySlM0VG1qcw==', 'AUTH': 'NzRmYTg5YjgxMWExY2JiNzUwZDg1MjhkMTYzZjQ4YWYyOGEyZGJlMTp4OGx2NFd3QnNvY1lkamI2UVZsdjdDYlNwSDUrdm50YzdNNThvWDcwN1JrenJGZC9tR1lReUNlRjRSVklZeWhYZVpRS0tBcU9YYzRoTGY2Z1dlVkJFYkdJc0dMRHpoZWFZbU0reDRqZ1dkZ1diZmdIdGUrNUM5RVBySlM0VG1qcw==',
@ -88,6 +88,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
), ),
}, },
'web': { 'web': {
'CACHE_ONLY': False,
'VIEWER_JWT': True, 'VIEWER_JWT': True,
'REQUIRES_AUTH': True, 'REQUIRES_AUTH': True,
'USER_AGENT': None, 'USER_AGENT': None,
@ -142,7 +143,6 @@ def _perform_login(self, username, password):
'service': 'vimeo', 'service': 'vimeo',
'token': viewer['xsrft'], 'token': viewer['xsrft'],
} }
self._set_vimeo_cookie('vuid', viewer['vuid'])
try: try:
self._download_webpage( self._download_webpage(
self._LOGIN_URL, None, 'Logging in', self._LOGIN_URL, None, 'Logging in',
@ -151,16 +151,40 @@ def _perform_login(self, username, password):
'Referer': self._LOGIN_URL, 'Referer': self._LOGIN_URL,
}) })
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 418: if isinstance(e.cause, HTTPError) and e.cause.status in (405, 418):
raise ExtractorError( raise ExtractorError(
'Unable to log in: bad username or password', 'Unable to log in: bad username or password',
expected=True) expected=True)
raise ExtractorError('Unable to log in') raise ExtractorError('Unable to log in')
# Clear unauthenticated viewer info
self._viewer_info = None
def _real_initialize(self): def _real_initialize(self):
if self._LOGIN_REQUIRED and not self._is_logged_in: if self._is_logged_in:
return
if self._LOGIN_REQUIRED:
self.raise_login_required() self.raise_login_required()
if self._DEFAULT_CLIENT != 'web':
return
for client_name, client_config in self._CLIENT_CONFIGS.items():
if not client_config['CACHE_ONLY']:
continue
cache_key = client_config['CACHE_KEY']
if cache_key not in self._oauth_tokens:
if token := self.cache.load(self._NETRC_MACHINE, cache_key):
self._oauth_tokens[cache_key] = token
if self._oauth_tokens.get(cache_key):
self._DEFAULT_CLIENT = client_name
self.write_debug(
f'Found cached {client_name} token; using {client_name} as default API client')
return
def _get_video_password(self): def _get_video_password(self):
password = self.get_param('videopassword') password = self.get_param('videopassword')
if password is None: if password is None:
@ -200,9 +224,6 @@ def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
if vimeo_config: if vimeo_config:
return self._parse_json(vimeo_config, video_id) return self._parse_json(vimeo_config, video_id)
def _set_vimeo_cookie(self, name, value):
self._set_cookie('vimeo.com', name, value)
def _parse_config(self, config, video_id): def _parse_config(self, config, video_id):
video_data = config['video'] video_data = config['video']
video_title = video_data.get('title') video_title = video_data.get('title')
@ -363,22 +384,26 @@ def _fetch_oauth_token(self, client):
return f'Bearer {self._oauth_tokens[cache_key]}' return f'Bearer {self._oauth_tokens[cache_key]}'
def _get_requested_client(self): def _get_requested_client(self):
default_client = self._DEFAULT_AUTHED_CLIENT if self._is_logged_in else self._DEFAULT_CLIENT if client := self._configuration_arg('client', [None], ie_key=VimeoIE)[0]:
if client not in self._CLIENT_CONFIGS:
raise ExtractorError(
f'Unsupported API client "{client}" requested. '
f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True)
self.write_debug(
f'Using {client} API client as specified by extractor argument', only_once=True)
return client
client = self._configuration_arg('client', [default_client], ie_key=VimeoIE)[0] if self._is_logged_in:
if client not in self._CLIENT_CONFIGS: return self._DEFAULT_AUTHED_CLIENT
raise ExtractorError(
f'Unsupported API client "{client}" requested. '
f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True)
return client return self._DEFAULT_CLIENT
def _call_videos_api(self, video_id, unlisted_hash=None, path=None, *, force_client=None, query=None, **kwargs): def _call_videos_api(self, video_id, unlisted_hash=None, path=None, *, force_client=None, query=None, **kwargs):
client = force_client or self._get_requested_client() client = force_client or self._get_requested_client()
client_config = self._CLIENT_CONFIGS[client] client_config = self._CLIENT_CONFIGS[client]
if client_config['REQUIRES_AUTH'] and not self._is_logged_in: if client_config['REQUIRES_AUTH'] and not self._is_logged_in:
self.raise_login_required(f'The {client} client requires authentication') self.raise_login_required(f'The {client} client only works when logged-in')
return self._download_json( return self._download_json(
join_nonempty( join_nonempty(
@ -1192,7 +1217,6 @@ def _try_album_password(self, url):
raise ExtractorError( raise ExtractorError(
'This album is protected by a password, use the --video-password option', 'This album is protected by a password, use the --video-password option',
expected=True) expected=True)
self._set_vimeo_cookie('vuid', viewer['vuid'])
try: try:
self._download_json( self._download_json(
f'https://vimeo.com/showcase/{album_id}/auth', f'https://vimeo.com/showcase/{album_id}/auth',
@ -1589,7 +1613,6 @@ def _real_extract(self, url):
raise ExtractorError( raise ExtractorError(
'This album is protected by a password, use the --video-password option', 'This album is protected by a password, use the --video-password option',
expected=True) expected=True)
self._set_vimeo_cookie('vuid', viewer['vuid'])
try: try:
hashed_pass = self._download_json( hashed_pass = self._download_json(
f'https://vimeo.com/showcase/{album_id}/auth', f'https://vimeo.com/showcase/{album_id}/auth',

View File

@ -1,3 +1,4 @@
import os
from collections import defaultdict from collections import defaultdict
# Please Note: Due to necessary changes and the complex nature involved in the plugin/globals system, # Please Note: Due to necessary changes and the complex nature involved in the plugin/globals system,
@ -28,3 +29,4 @@ def __repr__(self, /):
# Misc # Misc
IN_CLI = Indirect(False) IN_CLI = Indirect(False)
LAZY_EXTRACTORS = Indirect(None) # `False`=force, `None`=disabled, `True`=enabled LAZY_EXTRACTORS = Indirect(None) # `False`=force, `None`=disabled, `True`=enabled
WINDOWS_VT_MODE = Indirect(False if os.name == 'nt' else None)

View File

@ -90,7 +90,7 @@ def run(self, info):
if info['ext'] == 'mp3': if info['ext'] == 'mp3':
options = [ options = [
'-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3', '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3',
'-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)'] '-metadata:s:v', 'title=Album cover', '-metadata:s:v', 'comment=Cover (front)']
self._report_run('ffmpeg', filename) self._report_run('ffmpeg', filename)
self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options) self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)

View File

@ -33,8 +33,17 @@ class XAttrMetadataPP(PostProcessor):
# (e.g., 4kB on ext4), and we don't want to have the other ones fail # (e.g., 4kB on ext4), and we don't want to have the other ones fail
'user.dublincore.description': 'description', 'user.dublincore.description': 'description',
# 'user.xdg.comment': 'description', # 'user.xdg.comment': 'description',
'com.apple.metadata:kMDItemWhereFroms': 'webpage_url',
} }
APPLE_PLIST_TEMPLATE = '''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<array>
\t<string>%s</string>
</array>
</plist>'''
def run(self, info): def run(self, info):
mtime = os.stat(info['filepath']).st_mtime mtime = os.stat(info['filepath']).st_mtime
self.to_screen('Writing metadata to file\'s xattrs') self.to_screen('Writing metadata to file\'s xattrs')
@ -44,6 +53,8 @@ def run(self, info):
if value: if value:
if infoname == 'upload_date': if infoname == 'upload_date':
value = hyphenate_date(value) value = hyphenate_date(value)
elif xattrname == 'com.apple.metadata:kMDItemWhereFroms':
value = self.APPLE_PLIST_TEMPLATE % value
write_xattr(info['filepath'], xattrname, value.encode()) write_xattr(info['filepath'], xattrname, value.encode())
except XAttrUnavailableError as e: except XAttrUnavailableError as e:

View File

@ -139,7 +139,18 @@ def _get_binary_name():
def _get_system_deprecation(): def _get_system_deprecation():
MIN_SUPPORTED, MIN_RECOMMENDED = (3, 9), (3, 9) MIN_SUPPORTED, MIN_RECOMMENDED = (3, 9), (3, 10)
EXE_MSG_TMPL = ('Support for {} has been deprecated. '
'See https://github.com/yt-dlp/yt-dlp/{} for details.\n{}')
STOP_MSG = 'You may stop receiving updates on this version at any time!'
variant = detect_variant()
# Temporary until macos_legacy executable builds are discontinued
if variant == 'darwin_legacy_exe':
return EXE_MSG_TMPL.format(
f'{variant} (the PyInstaller-bundled executable for macOS versions older than 10.15)',
'issues/13856', STOP_MSG)
if sys.version_info > MIN_RECOMMENDED: if sys.version_info > MIN_RECOMMENDED:
return None return None
@ -150,6 +161,13 @@ def _get_system_deprecation():
if sys.version_info < MIN_SUPPORTED: if sys.version_info < MIN_SUPPORTED:
return f'Python version {major}.{minor} is no longer supported! {PYTHON_MSG}' return f'Python version {major}.{minor} is no longer supported! {PYTHON_MSG}'
# Temporary until aarch64/armv7l build flow is bumped to Ubuntu 22.04 and Python 3.10
if variant in ('linux_aarch64_exe', 'linux_armv7l_exe'):
libc_ver = version_tuple(os.confstr('CS_GNU_LIBC_VERSION').partition(' ')[2])
if libc_ver < (2, 35):
return EXE_MSG_TMPL.format('system glibc version < 2.35', 'issues/13858', STOP_MSG)
return None
return f'Support for Python version {major}.{minor} has been deprecated. {PYTHON_MSG}' return f'Support for Python version {major}.{minor} has been deprecated. {PYTHON_MSG}'

View File

@ -52,7 +52,7 @@
compat_HTMLParseError, compat_HTMLParseError,
) )
from ..dependencies import xattr from ..dependencies import xattr
from ..globals import IN_CLI from ..globals import IN_CLI, WINDOWS_VT_MODE
__name__ = __name__.rsplit('.', 1)[0] # noqa: A001 # Pretend to be the parent module __name__ = __name__.rsplit('.', 1)[0] # noqa: A001 # Pretend to be the parent module
@ -1875,6 +1875,11 @@ def parse_resolution(s, *, lenient=False):
if mobj: if mobj:
return {'height': int(mobj.group(1)) * 540} return {'height': int(mobj.group(1)) * 540}
if lenient:
mobj = re.search(r'(?<!\d)(\d{2,5})w(?![a-zA-Z0-9])', s)
if mobj:
return {'width': int(mobj.group(1))}
return {} return {}
@ -4759,13 +4764,10 @@ def jwt_decode_hs256(jwt):
return json.loads(base64.urlsafe_b64decode(f'{payload_b64}===')) return json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
WINDOWS_VT_MODE = False if os.name == 'nt' else None
@functools.cache @functools.cache
def supports_terminal_sequences(stream): def supports_terminal_sequences(stream):
if os.name == 'nt': if os.name == 'nt':
if not WINDOWS_VT_MODE: if not WINDOWS_VT_MODE.value:
return False return False
elif not os.getenv('TERM'): elif not os.getenv('TERM'):
return False return False
@ -4802,8 +4804,7 @@ def windows_enable_vt_mode():
finally: finally:
os.close(handle) os.close(handle)
global WINDOWS_VT_MODE WINDOWS_VT_MODE.value = True
WINDOWS_VT_MODE = True
supports_terminal_sequences.cache_clear() supports_terminal_sequences.cache_clear()

View File

@ -15,48 +15,10 @@
def random_user_agent(): def random_user_agent():
_USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' USER_AGENT_TMPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{} Safari/537.36'
_CHROME_VERSIONS = ( # Target versions released within the last ~6 months
'90.0.4430.212', CHROME_MAJOR_VERSION_RANGE = (132, 138)
'90.0.4430.24', return USER_AGENT_TMPL.format(f'{random.randint(*CHROME_MAJOR_VERSION_RANGE)}.0.0.0')
'90.0.4430.70',
'90.0.4430.72',
'90.0.4430.85',
'90.0.4430.93',
'91.0.4472.101',
'91.0.4472.106',
'91.0.4472.114',
'91.0.4472.124',
'91.0.4472.164',
'91.0.4472.19',
'91.0.4472.77',
'92.0.4515.107',
'92.0.4515.115',
'92.0.4515.131',
'92.0.4515.159',
'92.0.4515.43',
'93.0.4556.0',
'93.0.4577.15',
'93.0.4577.63',
'93.0.4577.82',
'94.0.4606.41',
'94.0.4606.54',
'94.0.4606.61',
'94.0.4606.71',
'94.0.4606.81',
'94.0.4606.85',
'95.0.4638.17',
'95.0.4638.50',
'95.0.4638.54',
'95.0.4638.69',
'95.0.4638.74',
'96.0.4664.18',
'96.0.4664.45',
'96.0.4664.55',
'96.0.4664.93',
'97.0.4692.20',
)
return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
class HTTPHeaderDict(dict): class HTTPHeaderDict(dict):