1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-08-15 00:48:28 +00:00

Merge branch 'yt-dlp:master' into niconico_error

This commit is contained in:
doe1080 2025-08-01 00:33:16 +09:00 committed by GitHub
commit 296c61c2a0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
44 changed files with 757 additions and 627 deletions

View File

@ -242,7 +242,7 @@ jobs:
permissions:
contents: read
actions: write # For cleaning up cache
runs-on: macos-13
runs-on: macos-14
steps:
- uses: actions/checkout@v4
@ -261,6 +261,8 @@ jobs:
- name: Install Requirements
run: |
brew install coreutils
# We need to use system Python in order to roll our own universal2 curl_cffi wheel
brew uninstall --ignore-dependencies python3
python3 -m venv ~/yt-dlp-build-venv
source ~/yt-dlp-build-venv/bin/activate
python3 devscripts/install_deps.py -o --include build

View File

@ -37,7 +37,7 @@ jobs:
matrix:
os: [ubuntu-latest]
# CPython 3.9 is in quick-test
python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10]
python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.11]
include:
# atleast one of each CPython/PyPy tests must be in windows
- os: windows-latest
@ -49,7 +49,7 @@ jobs:
- os: windows-latest
python-version: '3.13'
- os: windows-latest
python-version: pypy-3.10
python-version: pypy-3.11
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}

View File

@ -28,13 +28,13 @@ jobs:
fail-fast: true
matrix:
os: [ubuntu-latest]
python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10]
python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.11]
include:
# atleast one of each CPython/PyPy tests must be in windows
- os: windows-latest
python-version: '3.9'
- os: windows-latest
python-version: pypy-3.10
python-version: pypy-3.11
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}

View File

@ -25,7 +25,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest]
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.10, pypy-3.11]
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', pypy-3.11]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}

View File

@ -272,7 +272,7 @@ ## Adding support for a new site
You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`).
1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.10. Backward compatibility is not required for even older versions of Python.
1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.11. Backward compatibility is not required for even older versions of Python.
1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
```shell

View File

@ -172,7 +172,7 @@ # To install nightly with pip:
```
## DEPENDENCIES
Python versions 3.9+ (CPython) and 3.10+ (PyPy) are supported. Other versions and implementations may or may not work correctly.
Python versions 3.9+ (CPython) and 3.11+ (PyPy) are supported. Other versions and implementations may or may not work correctly.
<!-- Python 3.5+ uses VC++14 and it is already embedded in the binary created
<!x-- https://www.microsoft.com/en-us/download/details.aspx?id=26999 --x>
@ -1902,7 +1902,7 @@ #### tver
* `backend`: Backend API to use for extraction - one of `streaks` (default) or `brightcove` (deprecated)
#### vimeo
* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `android` client is used by default. If account cookies or credentials are used for authentication, then the `web` client is used by default. The `web` client only works with authentication. The `ios` client only works with previously cached OAuth tokens
* `client`: Client to extract video data from. The currently available clients are `android`, `ios`, and `web`. Only one client can be used. The `web` client is used by default. The `web` client only works with account cookies or login credentials. The `android` and `ios` clients only work with previously cached OAuth tokens
* `original_format_policy`: Policy for when to try extracting original formats. One of `always`, `never`, or `auto`. The default `auto` policy tries to avoid exceeding the web client's API rate-limit by only making an extra request when Vimeo publicizes the video's downloadability
**Note**: These options may be changed/removed in the future without concern for backward compatibility

View File

@ -62,16 +62,22 @@ def parse_options():
def exe(onedir):
"""@returns (name, path)"""
platform_name, machine, extension = {
'win32': (None, MACHINE, '.exe'),
'darwin': ('macos', None, None),
}.get(OS_NAME, (OS_NAME, MACHINE, None))
name = '_'.join(filter(None, (
'yt-dlp',
{'win32': '', 'darwin': 'macos'}.get(OS_NAME, OS_NAME),
MACHINE,
platform_name,
machine,
)))
return name, ''.join(filter(None, (
'dist/',
onedir and f'{name}/',
name,
OS_NAME == 'win32' and '.exe',
extension,
)))

View File

@ -21,9 +21,6 @@ def test_compat_passthrough(self):
with self.assertWarns(DeprecationWarning):
_ = compat.compat_basestring
with self.assertWarns(DeprecationWarning):
_ = compat.WINDOWS_VT_MODE
self.assertEqual(urllib.request.getproxies, getproxies)
with self.assertWarns(DeprecationWarning):

View File

@ -1373,6 +1373,7 @@ def test_parse_resolution(self):
self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080})
self.assertEqual(parse_resolution('ep1x2'), {})
self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080})
self.assertEqual(parse_resolution('1920w', lenient=True), {'width': 1920})
def test_parse_bitrate(self):
self.assertEqual(parse_bitrate(None), None)

View File

@ -36,6 +36,7 @@
from .globals import (
IN_CLI,
LAZY_EXTRACTORS,
WINDOWS_VT_MODE,
plugin_ies,
plugin_ies_overrides,
plugin_pps,
@ -4040,8 +4041,7 @@ def get_encoding(stream):
if os.environ.get('TERM', '').lower() == 'dumb':
additional_info.append('dumb')
if not supports_terminal_sequences(stream):
from .utils import WINDOWS_VT_MODE # Must be imported locally
additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
additional_info.append('No VT' if WINDOWS_VT_MODE.value is False else 'No ANSI')
if additional_info:
ret = f'{ret} ({",".join(additional_info)})'
return ret

View File

@ -37,7 +37,7 @@
from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401
from ..networking.exceptions import HTTPError as compat_HTTPError
passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
passthrough_module(__name__, '...utils', ('windows_enable_vt_mode',))
# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE

View File

@ -1335,7 +1335,7 @@ def prepare_line(line):
if len(cookie_list) != self._ENTRY_LEN:
raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}')
cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit():
if cookie.expires_at and not re.fullmatch(r'[0-9]+(?:\.[0-9]+)?', cookie.expires_at):
raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}')
return line

View File

@ -572,7 +572,21 @@ def _call_downloader(self, tmpfilename, info_dict):
if end_time:
args += ['-t', str(end_time - start_time)]
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']]
url = fmt['url']
if self.params.get('enable_file_urls') and url.startswith('file:'):
# The default protocol_whitelist is 'file,crypto,data' when reading local m3u8 URLs,
# so only local segments can be read unless we also include 'http,https,tcp,tls'
args += ['-protocol_whitelist', 'file,crypto,data,http,https,tcp,tls']
# ffmpeg incorrectly handles 'file:' URLs by only removing the
# 'file:' prefix and treating the rest as if it's a normal filepath.
# FFmpegPostProcessor also depends on this behavior, so we need to fixup the URLs:
# - On Windows/Cygwin, replace 'file:///' and 'file://localhost/' with 'file:'
# - On *nix, replace 'file://localhost/' with 'file:/'
# Ref: https://github.com/yt-dlp/yt-dlp/issues/13781
# https://trac.ffmpeg.org/ticket/2702
url = re.sub(r'^file://(?:localhost)?/', 'file:' if os.name == 'nt' else 'file:/', url)
args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', url]
if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
args += ['-c', 'copy']

View File

@ -205,7 +205,7 @@ def is_ad_fragment_end(s):
line = line.strip()
if line:
if not line.startswith('#'):
if format_index and discontinuity_count != format_index:
if format_index is not None and discontinuity_count != format_index:
continue
if ad_frag_next:
continue
@ -231,7 +231,7 @@ def is_ad_fragment_end(s):
byte_range = {}
elif line.startswith('#EXT-X-MAP'):
if format_index and discontinuity_count != format_index:
if format_index is not None and discontinuity_count != format_index:
continue
if frag_index > 0:
self.report_error(

View File

@ -571,10 +571,6 @@
DWIE,
DWArticleIE,
)
from .eagleplatform import (
ClipYouEmbedIE,
EaglePlatformIE,
)
from .ebaumsworld import EbaumsWorldIE
from .ebay import EbayIE
from .egghead import (
@ -640,6 +636,7 @@
FancodeVodIE,
)
from .fathom import FathomIE
from .faulio import FaulioLiveIE
from .faz import FazIE
from .fc2 import (
FC2IE,
@ -1568,6 +1565,7 @@
)
from .plutotv import PlutoTVIE
from .plvideo import PlVideoIE
from .plyr import PlyrEmbedIE
from .podbayfm import (
PodbayFMChannelIE,
PodbayFMIE,
@ -1783,6 +1781,7 @@
RTVEALaCartaIE,
RTVEAudioIE,
RTVELiveIE,
RTVEProgramIE,
RTVETelevisionIE,
)
from .rtvs import RTVSIE
@ -2166,7 +2165,6 @@
from .trueid import TrueIDIE
from .trunews import TruNewsIE
from .truth import TruthIE
from .trutv import TruTVIE
from .tube8 import Tube8IE
from .tubetugraz import (
TubeTuGrazIE,
@ -2237,6 +2235,7 @@
from .tvplayer import TVPlayerIE
from .tvw import (
TvwIE,
TvwNewsIE,
TvwTvChannelsIE,
)
from .tweakers import TweakersIE

View File

@ -48,7 +48,6 @@
'username_field': 'user',
'password_field': 'passwd',
'login_hostname': 'login.xfinity.com',
'needs_newer_ua': True,
},
'TWC': {
'name': 'Time Warner Cable | Spectrum',
@ -1379,11 +1378,8 @@ def _download_webpage_handle(self, *args, **kwargs):
@staticmethod
def _get_mso_headers(mso_info):
# yt-dlp's default user-agent is usually too old for some MSO's like Comcast_SSO
# See: https://github.com/yt-dlp/yt-dlp/issues/10848
return {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0',
} if mso_info.get('needs_newer_ua') else {}
# Not needed currently
return {}
@staticmethod
def _get_mvpd_resource(provider_id, title, guid, rating):

View File

@ -33,7 +33,6 @@
unified_timestamp,
url_or_none,
urlhandle_detect_ext,
variadic,
)
@ -232,6 +231,23 @@ class ArchiveOrgIE(InfoExtractor):
'release_date': '19950402',
'timestamp': 1084927901,
},
}, {
# metadata['metadata']['description'] is a list of strings instead of str
'url': 'https://archive.org/details/pra-KZ1908.02',
'info_dict': {
'id': 'pra-KZ1908.02',
'ext': 'mp3',
'display_id': 'KZ1908.02_01.wav',
'title': 'Crips and Bloods speak about gang life',
'description': 'md5:2b56b35ff021311e3554b47a285e70b3',
'uploader': 'jake@archive.org',
'duration': 1733.74,
'track': 'KZ1908.02 01',
'track_number': 1,
'timestamp': 1336026026,
'upload_date': '20120503',
'release_year': 1992,
},
}]
@staticmethod
@ -274,34 +290,40 @@ def _real_extract(self, url):
m = metadata['metadata']
identifier = m['identifier']
info = {
info = traverse_obj(m, {
'title': ('title', {str}),
'description': ('description', ({str}, (..., all, {' '.join})), {clean_html}, filter, any),
'uploader': (('uploader', 'adder'), {str}, any),
'creators': ('creator', (None, ...), {str}, filter, all, filter),
'license': ('licenseurl', {url_or_none}),
'release_date': ('date', {unified_strdate}),
'timestamp': (('publicdate', 'addeddate'), {unified_timestamp}, any),
'location': ('venue', {str}),
'release_year': ('year', {int_or_none}),
})
info.update({
'id': identifier,
'title': m['title'],
'description': clean_html(m.get('description')),
'uploader': dict_get(m, ['uploader', 'adder']),
'creators': traverse_obj(m, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
'license': m.get('licenseurl'),
'release_date': unified_strdate(m.get('date')),
'timestamp': unified_timestamp(dict_get(m, ['publicdate', 'addeddate'])),
'webpage_url': f'https://archive.org/details/{identifier}',
'location': m.get('venue'),
'release_year': int_or_none(m.get('year'))}
})
for f in metadata['files']:
if f['name'] in entries:
entries[f['name']] = merge_dicts(entries[f['name']], {
'id': identifier + '/' + f['name'],
'title': f.get('title') or f['name'],
'display_id': f['name'],
'description': clean_html(f.get('description')),
'creators': traverse_obj(f, ('creator', {variadic}, {lambda x: x[0] and list(x)})),
'duration': parse_duration(f.get('length')),
'track_number': int_or_none(f.get('track')),
'album': f.get('album'),
'discnumber': int_or_none(f.get('disc')),
'release_year': int_or_none(f.get('year'))})
**traverse_obj(f, {
'title': (('title', 'name'), {str}, any),
'display_id': ('name', {str}),
'description': ('description', ({str}, (..., all, {' '.join})), {clean_html}, filter, any),
'creators': ('creator', (None, ...), {str}, filter, all, filter),
'duration': ('length', {parse_duration}),
'track_number': ('track', {int_or_none}),
'album': ('album', {str}),
'discnumber': ('disc', {int_or_none}),
'release_year': ('year', {int_or_none}),
}),
})
entry = entries[f['name']]
elif traverse_obj(f, 'original', expected_type=str) in entries:
elif traverse_obj(f, ('original', {str})) in entries:
entry = entries[f['original']]
else:
continue

View File

@ -175,13 +175,6 @@ def _download_playinfo(self, bvid, cid, headers=None, query=None):
else:
note = f'Downloading video formats for cid {cid}'
# TODO: remove this patch once utils.networking.random_user_agent() is updated, see #13735
# playurl requests carrying old UA will be rejected
headers = {
'User-Agent': f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(118,138)}.0.0.0 Safari/537.36',
**(headers or {}),
}
return self._download_json(
'https://api.bilibili.com/x/player/wbi/playurl', bvid,
query=self._sign_wbi(params, bvid), headers=headers, note=note)['data']

View File

@ -1,215 +0,0 @@
import functools
import re
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
smuggle_url,
unsmuggle_url,
url_or_none,
)
class EaglePlatformIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
eagleplatform:(?P<custom_host>[^/]+):|
https?://(?P<host>.+?\.media\.eagleplatform\.com)/index/player\?.*\brecord_id=
)
(?P<id>\d+)
'''
_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1']
_TESTS = [{
# http://lenta.ru/news/2015/03/06/navalny/
'url': 'http://lentaru.media.eagleplatform.com/index/player?player=new&record_id=227304&player_template_id=5201',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'info_dict': {
'id': '227304',
'ext': 'mp4',
'title': 'Навальный вышел на свободу',
'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 87,
'view_count': int,
'age_limit': 0,
},
}, {
# http://muz-tv.ru/play/7129/
# http://media.clipyou.ru/index/player?record_id=12820&width=730&height=415&autoplay=true
'url': 'eagleplatform:media.clipyou.ru:12820',
'md5': '358597369cf8ba56675c1df15e7af624',
'info_dict': {
'id': '12820',
'ext': 'mp4',
'title': "'O Sole Mio",
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 216,
'view_count': int,
},
'skip': 'Georestricted',
}, {
# referrer protected video (https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/)
'url': 'eagleplatform:tvrainru.media.eagleplatform.com:582306',
'only_matching': True,
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
add_referer = functools.partial(smuggle_url, data={'referrer': url})
res = tuple(super()._extract_embed_urls(url, webpage))
if res:
return map(add_referer, res)
PLAYER_JS_RE = r'''
<script[^>]+
src=(?P<qjs>["\'])(?:https?:)?//(?P<host>(?:(?!(?P=qjs)).)+\.media\.eagleplatform\.com)/player/player\.js(?P=qjs)
.+?
'''
# "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/)
mobj = re.search(
rf'''(?xs)
{PLAYER_JS_RE}
<div[^>]+
class=(?P<qclass>["\'])eagleplayer(?P=qclass)[^>]+
data-id=["\'](?P<id>\d+)
''', webpage)
if mobj is not None:
return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))]
# Generalization of "Javascript code usage", "Combined usage" and
# "Usage without attaching to DOM" embeddings (see
# http://dultonmedia.github.io/eplayer/)
mobj = re.search(
r'''(?xs)
%s
<script>
.+?
new\s+EaglePlayer\(
(?:[^,]+\s*,\s*)?
{
.+?
\bid\s*:\s*["\']?(?P<id>\d+)
.+?
}
\s*\)
.+?
</script>
''' % PLAYER_JS_RE, webpage) # noqa: UP031
if mobj is not None:
return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))]
@staticmethod
def _handle_error(response):
status = int_or_none(response.get('status', 200))
if status != 200:
raise ExtractorError(' '.join(response['errors']), expected=True)
def _download_json(self, url_or_request, video_id, *args, **kwargs):
try:
response = super()._download_json(
url_or_request, video_id, *args, **kwargs)
except ExtractorError as ee:
if isinstance(ee.cause, HTTPError):
response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id)
self._handle_error(response)
raise
return response
def _get_video_url(self, url_or_request, video_id, note='Downloading JSON metadata'):
return self._download_json(url_or_request, video_id, note)['data'][0]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = self._match_valid_url(url)
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
headers = {}
query = {
'id': video_id,
}
referrer = smuggled_data.get('referrer')
if referrer:
headers['Referer'] = referrer
query['referrer'] = referrer
player_data = self._download_json(
f'http://{host}/api/player_data', video_id,
headers=headers, query=query)
media = player_data['data']['playlist']['viewports'][0]['medialist'][0]
title = media['title']
description = media.get('description')
thumbnail = self._proto_relative_url(media.get('snapshot'), 'http:')
duration = int_or_none(media.get('duration'))
view_count = int_or_none(media.get('views'))
age_restriction = media.get('age_restriction')
age_limit = None
if age_restriction:
age_limit = 0 if age_restriction == 'allow_all' else 18
secure_m3u8 = self._proto_relative_url(media['sources']['secure_m3u8']['auto'], 'http:')
formats = []
m3u8_url = self._get_video_url(secure_m3u8, video_id, 'Downloading m3u8 JSON')
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
m3u8_formats_dict = {}
for f in m3u8_formats:
if f.get('height') is not None:
m3u8_formats_dict[f['height']] = f
mp4_data = self._download_json(
# Secure mp4 URL is constructed according to Player.prototype.mp4 from
# http://lentaru.media.eagleplatform.com/player/player.js
re.sub(r'm3u8|hlsvod|hls|f4m', 'mp4s', secure_m3u8),
video_id, 'Downloading mp4 JSON', fatal=False)
if mp4_data:
for format_id, format_url in mp4_data.get('data', {}).items():
if not url_or_none(format_url):
continue
height = int_or_none(format_id)
if height is not None and m3u8_formats_dict.get(height):
f = m3u8_formats_dict[height].copy()
f.update({
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
else:
f = {
'format_id': f'http-{format_id}',
'height': int_or_none(format_id),
}
f['url'] = format_url
formats.append(f)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'age_limit': age_limit,
'formats': formats,
}
class ClipYouEmbedIE(InfoExtractor):
_VALID_URL = False
@classmethod
def _extract_embed_urls(cls, url, webpage):
mobj = re.search(
r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
if mobj is not None:
yield smuggle_url('eagleplatform:{host}:{id}'.format(**mobj.groupdict()), {'referrer': url})

105
yt_dlp/extractor/faulio.py Normal file
View File

@ -0,0 +1,105 @@
import re
import urllib.parse
from .common import InfoExtractor
from ..utils import js_to_json, url_or_none
from ..utils.traversal import traverse_obj
class FaulioLiveIE(InfoExtractor):
_DOMAINS = (
'aloula.sba.sa',
'bahry.com',
'maraya.sba.net.ae',
'sat7plus.org',
)
_VALID_URL = fr'https?://(?:{"|".join(map(re.escape, _DOMAINS))})/(?:(?:en|ar|fa)/)?live/(?P<id>[a-zA-Z0-9-]+)'
_TESTS = [{
'url': 'https://aloula.sba.sa/live/saudiatv',
'info_dict': {
'id': 'aloula.faulio.com_saudiatv',
'title': str,
'description': str,
'ext': 'mp4',
'live_status': 'is_live',
},
'params': {
'skip_download': 'Livestream',
},
}, {
'url': 'https://bahry.com/live/1',
'info_dict': {
'id': 'bahry.faulio.com_1',
'title': str,
'description': str,
'ext': 'mp4',
'live_status': 'is_live',
},
'params': {
'skip_download': 'Livestream',
},
}, {
'url': 'https://maraya.sba.net.ae/live/1',
'info_dict': {
'id': 'maraya.faulio.com_1',
'title': str,
'description': str,
'ext': 'mp4',
'live_status': 'is_live',
},
'params': {
'skip_download': 'Livestream',
},
}, {
'url': 'https://sat7plus.org/live/pars',
'info_dict': {
'id': 'sat7.faulio.com_pars',
'title': str,
'description': str,
'ext': 'mp4',
'live_status': 'is_live',
},
'params': {
'skip_download': 'Livestream',
},
}, {
'url': 'https://sat7plus.org/fa/live/arabic',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
config_data = self._search_json(
r'window\.__NUXT__\.config=', webpage, 'config', video_id, transform_source=js_to_json)
api_base = config_data['public']['TRANSLATIONS_API_URL']
channel = traverse_obj(
self._download_json(f'{api_base}/channels', video_id),
(lambda k, v: v['url'] == video_id, any))
formats = []
subtitles = {}
if hls_url := traverse_obj(channel, ('streams', 'hls', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
hls_url, video_id, 'mp4', m3u8_id='hls', live=True, fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
if mpd_url := traverse_obj(channel, ('streams', 'mpd', {url_or_none})):
fmts, subs = self._extract_mpd_formats_and_subtitles(
mpd_url, video_id, mpd_id='dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
'id': f'{urllib.parse.urlparse(api_base).hostname}_{video_id}',
**traverse_obj(channel, {
'title': ('title', {str}),
'description': ('description', {str}),
}),
'formats': formats,
'subtitles': subtitles,
'is_live': True,
}

View File

@ -1,9 +1,7 @@
import urllib.parse
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
float_or_none,
url_or_none,
)
@ -58,16 +56,7 @@ class FrancaisFacileIE(InfoExtractor):
def _real_extract(self, url):
display_id = urllib.parse.unquote(self._match_id(url))
try: # yt-dlp's default user-agents are too old and blocked by the site
webpage = self._download_webpage(url, display_id, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
})
except ExtractorError as e:
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
raise
# Retry with impersonation if hardcoded UA is insufficient
webpage = self._download_webpage(url, display_id, impersonate=True)
webpage = self._download_webpage(url, display_id)
data = self._search_json(
r'<script[^>]+\bdata-media-id=[^>]+\btype="application/json"[^>]*>',

View File

@ -1010,38 +1010,6 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['Kaltura'],
},
# referrer protected EaglePlatform embed
{
'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
'info_dict': {
'id': '582306',
'ext': 'mp4',
'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 3382,
'view_count': int,
},
'params': {
'skip_download': True,
},
},
# ClipYou (EaglePlatform) embed (custom URL)
{
'url': 'http://muz-tv.ru/play/7129/',
# Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'info_dict': {
'id': '12820',
'ext': 'mp4',
'title': "'O Sole Mio",
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 216,
'view_count': int,
},
'params': {
'skip_download': True,
},
'skip': 'This video is unavailable.',
},
# Pladform embed
{
'url': 'http://muz-tv.ru/kinozal/view/7400/',

View File

@ -3,6 +3,7 @@
class LiveJournalIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P<id>\d+)'
_TEST = {
'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272',

View File

@ -79,7 +79,7 @@ class MiTeleIE(TelecincoBaseIE):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_akamai_webpage(url, display_id)
webpage = self._download_webpage(url, display_id)
pre_player = self._search_json(
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=',
webpage, 'Pre Player', display_id)['prePlayer']

View File

@ -34,7 +34,6 @@ class NetEaseMusicBaseIE(InfoExtractor):
'sky', # SVIP tier; 沉浸环绕声 (Surround Audio); flac
)
_API_BASE = 'http://music.163.com/api/'
_GEO_BYPASS = False
def _create_eapi_cipher(self, api_path, query_body, cookies):
request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':'))
@ -64,6 +63,8 @@ def _download_eapi_json(self, path, video_id, query_body, headers={}, **kwargs):
'MUSIC_U': ('MUSIC_U', {lambda i: i.value}),
}),
}
if self._x_forwarded_for_ip:
headers.setdefault('X-Real-IP', self._x_forwarded_for_ip)
return self._download_json(
urljoin('https://interface3.music.163.com/', f'/eapi{path}'), video_id,
data=self._create_eapi_cipher(f'/api{path}', query_body, cookies), headers={

View File

@ -1,63 +1,63 @@
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
try_get,
unified_timestamp,
)
from ..utils import parse_duration, parse_iso8601, url_or_none
from ..utils.traversal import traverse_obj
class ParlviewIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?parlview\.aph\.gov\.au/(?:[^/]+)?\bvideoID=(?P<id>\d{6})'
_VALID_URL = r'https?://(?:www\.)?aph\.gov\.au/News_and_Events/Watch_Read_Listen/ParlView/video/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=542661',
'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/3406614',
'info_dict': {
'id': '542661',
'id': '3406614',
'ext': 'mp4',
'title': "Australia's Family Law System [Part 2]",
'duration': 5799,
'description': 'md5:7099883b391619dbae435891ca871a62',
'timestamp': 1621430700,
'upload_date': '20210519',
'uploader': 'Joint Committee',
'title': 'Senate Chamber',
'description': 'Official Recording of Senate Proceedings from the Australian Parliament',
'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/SenateParlview06.jpg',
'upload_date': '20250325',
'duration': 17999,
'timestamp': 1742939400,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=539936',
'only_matching': True,
'url': 'https://www.aph.gov.au/News_and_Events/Watch_Read_Listen/ParlView/video/SV1394.dv',
'info_dict': {
'id': 'SV1394.dv',
'ext': 'mp4',
'title': 'Senate Select Committee on Uranium Mining and Milling [Part 1]',
'description': 'Official Recording of Senate Committee Proceedings from the Australian Parliament',
'thumbnail': 'https://aphbroadcasting-prod.z01.azurefd.net/vod-storage/vod-logos/CommitteeThumbnail06.jpg',
'upload_date': '19960822',
'duration': 14765,
'timestamp': 840754200,
},
'params': {
'skip_download': True,
},
}]
_API_URL = 'https://parlview.aph.gov.au/api_v3/1/playback/getUniversalPlayerConfig?videoID=%s&format=json'
_MEDIA_INFO_URL = 'https://parlview.aph.gov.au/ajaxPlayer.php?videoID=%s&tabNum=4&action=loadTab'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
media = self._download_json(self._API_URL % video_id, video_id).get('media')
timestamp = try_get(media, lambda x: x['timeMap']['source']['timecode_offsets'][0], str) or '/'
video_details = self._download_json(
f'https://vodapi.aph.gov.au/api/search/parlview/{video_id}', video_id)['videoDetails']
stream = try_get(media, lambda x: x['renditions'][0], dict)
if not stream:
self.raise_no_formats('No streams were detected')
elif stream.get('streamType') != 'VOD':
self.raise_no_formats('Unknown type of stream was detected: "{}"'.format(str(stream.get('streamType'))))
formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
video_details['files']['file']['url'], video_id, 'mp4')
media_info = self._download_webpage(
self._MEDIA_INFO_URL % video_id, video_id, note='Downloading media info', fatal=False)
DURATION_RE = re.compile(r'(?P<duration>\d+:\d+:\d+):\d+')
return {
'id': video_id,
'url': url,
'title': self._html_search_regex(r'<h2>([^<]+)<', webpage, 'title', fatal=False),
'formats': formats,
'duration': int_or_none(media.get('duration')),
'timestamp': unified_timestamp(timestamp.split('/', 1)[1].replace('_', ' ')),
'description': self._html_search_regex(
r'<div[^>]+class="descripti?on"[^>]*>[^>]+<strong>[^>]+>[^>]+>([^<]+)',
webpage, 'description', fatal=False),
'uploader': self._html_search_regex(
r'<td>[^>]+>Channel:[^>]+>([^<]+)', media_info, 'channel', fatal=False),
'thumbnail': media.get('staticImage'),
'subtitles': subtitles,
**traverse_obj(video_details, {
'title': (('parlViewTitle', 'title'), {str}, any),
'description': ('parlViewDescription', {str}),
'duration': ('files', 'file', 'duration', {DURATION_RE.fullmatch}, 'duration', {parse_duration}),
'timestamp': ('recordingFrom', {parse_iso8601}),
'thumbnail': ('thumbUrl', {url_or_none}),
}),
}

104
yt_dlp/extractor/plyr.py Normal file
View File

@ -0,0 +1,104 @@
import re
from .common import InfoExtractor
from .vimeo import VimeoIE
class PlyrEmbedIE(InfoExtractor):
_VALID_URL = False
_WEBPAGE_TESTS = [{
# data-plyr-embed-id="https://player.vimeo.com/video/522319456/90e5c96063?dnt=1"
'url': 'https://www.dhm.de/zeughauskino/filmreihen/online-filmreihen/filme-des-marshall-plans/200000000-mouths/',
'info_dict': {
'id': '522319456',
'ext': 'mp4',
'title': '200.000.000 Mouths (195051)',
'uploader': 'Zeughauskino',
'uploader_url': '',
'comment_count': int,
'like_count': int,
'duration': 963,
'thumbnail': 'https://i.vimeocdn.com/video/1081797161-9f09ddb4b7faa86e834e006b8e4b9c2cbaa0baa7da493211bf0796ae133a5ab8-d',
'timestamp': 1615467405,
'upload_date': '20210311',
'release_timestamp': 1615467405,
'release_date': '20210311',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, {
# data-plyr-provider="vimeo" data-plyr-embed-id="803435276"
'url': 'https://www.inarcassa.it/',
'info_dict': {
'id': '803435276',
'ext': 'mp4',
'title': 'HOME_Moto_Perpetuo',
'uploader': 'Inarcassa',
'uploader_url': '',
'duration': 38,
'thumbnail': 'https://i.vimeocdn.com/video/1663734769-945ad7ffabb16dbca009c023fd1d7b36bdb426a3dbae8345ed758136fe28f89a-d',
},
'params': {'skip_download': 'm3u8'},
'expected_warnings': ['Failed to parse XML: not well-formed'],
}, {
# data-plyr-embed-id="https://youtu.be/GF-BjYKoAqI"
'url': 'https://www.profile.nl',
'info_dict': {
'id': 'GF-BjYKoAqI',
'ext': 'mp4',
'title': 'PROFILE: Recruitment Profile',
'description': '',
'media_type': 'video',
'uploader': 'Profile Nederland',
'uploader_id': '@profilenederland',
'uploader_url': 'https://www.youtube.com/@profilenederland',
'channel': 'Profile Nederland',
'channel_id': 'UC9AUkB0Tv39-TBYjs05n3vg',
'channel_url': 'https://www.youtube.com/channel/UC9AUkB0Tv39-TBYjs05n3vg',
'channel_follower_count': int,
'view_count': int,
'like_count': int,
'age_limit': 0,
'duration': 39,
'thumbnail': 'https://i.ytimg.com/vi/GF-BjYKoAqI/maxresdefault.jpg',
'categories': ['Autos & Vehicles'],
'tags': [],
'timestamp': 1675692990,
'upload_date': '20230206',
'playable_in_embed': True,
'availability': 'public',
'live_status': 'not_live',
},
}, {
# data-plyr-embed-id="B1TZV8rNZoc" data-plyr-provider="youtube"
'url': 'https://www.vnis.edu.vn',
'info_dict': {
'id': 'vnis.edu',
'title': 'VNIS Education - Master Agent các Trường hàng đầu Bắc Mỹ',
'description': 'md5:4dafcf7335bb018780e4426da8ab8e4e',
'age_limit': 0,
'thumbnail': 'https://vnis.edu.vn/wp-content/uploads/2021/05/ve-welcome-en.png',
'timestamp': 1753233356,
'upload_date': '20250723',
},
'playlist_count': 3,
}]
@classmethod
def _extract_embed_urls(cls, url, webpage):
plyr_embeds = re.finditer(r'''(?x)
<div[^>]+(?:
data-plyr-embed-id="(?P<id1>[^"]+)"[^>]+data-plyr-provider="(?P<provider1>[^"]+)"|
data-plyr-provider="(?P<provider2>[^"]+)"[^>]+data-plyr-embed-id="(?P<id2>[^"]+)"
)[^>]*>''', webpage)
for mobj in plyr_embeds:
embed_id = mobj.group('id1') or mobj.group('id2')
provider = mobj.group('provider1') or mobj.group('provider2')
if provider == 'vimeo':
if not re.match(r'https?://', embed_id):
embed_id = f'https://player.vimeo.com/video/{embed_id}'
yield VimeoIE._smuggle_referrer(embed_id, url)
elif provider == 'youtube':
if not re.match(r'https?://', embed_id):
embed_id = f'https://youtube.com/watch?v={embed_id}'
yield embed_id

View File

@ -6,9 +6,11 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
InAdvancePagedList,
clean_html,
determine_ext,
float_or_none,
int_or_none,
make_archive_id,
parse_iso8601,
qualities,
@ -371,3 +373,62 @@ def _real_extract(self, url):
raise ExtractorError('The webpage doesn\'t contain any video', expected=True)
return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key())
class RTVEProgramIE(RTVEBaseIE):
IE_NAME = 'rtve.es:program'
IE_DESC = 'RTVE.es programs'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/play/videos/(?P<id>[\w-]+)/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://www.rtve.es/play/videos/saber-vivir/',
'info_dict': {
'id': '111570',
'title': 'Saber vivir - Programa de ciencia y futuro en RTVE Play',
},
'playlist_mincount': 400,
}]
_PAGE_SIZE = 60
def _fetch_page(self, program_id, page_num):
return self._download_json(
f'https://www.rtve.es/api/programas/{program_id}/videos',
program_id, note=f'Downloading page {page_num}',
query={
'type': 39816,
'page': page_num,
'size': 60,
})
def _entries(self, page_data):
for video in traverse_obj(page_data, ('page', 'items', lambda _, v: url_or_none(v['htmlUrl']))):
yield self.url_result(
video['htmlUrl'], RTVEALaCartaIE, url_transparent=True,
**traverse_obj(video, {
'id': ('id', {str}),
'title': ('longTitle', {str}),
'description': ('shortDescription', {str}),
'duration': ('duration', {float_or_none(scale=1000)}),
'series': (('programInfo', 'title'), {str}, any),
'season_number': ('temporadaOrden', {int_or_none}),
'season_id': ('temporadaId', {str}),
'season': ('temporada', {str}),
'episode_number': ('episode', {int_or_none}),
'episode': ('title', {str}),
'thumbnail': ('thumbnail', {url_or_none}),
}),
)
def _real_extract(self, url):
program_slug = self._match_id(url)
program_page = self._download_webpage(url, program_slug)
program_id = self._html_search_meta('DC.identifier', program_page, 'Program ID', fatal=True)
first_page = self._fetch_page(program_id, 1)
page_count = traverse_obj(first_page, ('page', 'totalPages', {int})) or 1
entries = InAdvancePagedList(
lambda idx: self._entries(self._fetch_page(program_id, idx + 1) if idx else first_page),
page_count, self._PAGE_SIZE)
return self.playlist_result(entries, program_id, self._html_extract_title(program_page))

View File

@ -8,84 +8,9 @@
class SportDeutschlandIE(InfoExtractor):
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)'
_VALID_URL = r'https?://(?:player\.)?sportdeutschland\.tv/(?P<id>(?:[^/?#]+/)?[^?#/&]+)'
_TESTS = [{
'url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport/buchholzer-formationswochenende-2023-samstag-1-bundesliga-landesliga',
'info_dict': {
'id': '9839a5c7-0dbb-48a8-ab63-3b408adc7b54',
'ext': 'mp4',
'title': 'Buchholzer Formationswochenende 2023 - Samstag - 1. Bundesliga / Landesliga',
'display_id': 'blauweissbuchholztanzsport/buchholzer-formationswochenende-2023-samstag-1-bundesliga-landesliga',
'description': 'md5:a288c794a5ee69e200d8f12982f81a87',
'live_status': 'was_live',
'channel': 'Blau-Weiss Buchholz Tanzsport',
'channel_url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport',
'channel_id': '93ec33c9-48be-43b6-b404-e016b64fdfa3',
'duration': 32447,
'upload_date': '20230114',
'timestamp': 1673733618,
},
'skip': 'not found',
}, {
'url': 'https://sportdeutschland.tv/deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0',
'info_dict': {
'id': '95c80c52-6b9a-4ae9-9197-984145adfced',
'ext': 'mp4',
'title': 'BWF Tour: 1. Runde Feld 1 - YONEX GAINWARD German Open 2022',
'display_id': 'deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0',
'description': 'md5:2afb5996ceb9ac0b2ac81f563d3a883e',
'live_status': 'was_live',
'channel': 'Deutscher Badminton Verband',
'channel_url': 'https://sportdeutschland.tv/deutscherbadmintonverband',
'channel_id': '93ca5866-2551-49fc-8424-6db35af58920',
'duration': 41097,
'upload_date': '20220309',
'timestamp': 1646860727.0,
},
'skip': 'not found',
}, {
'url': 'https://sportdeutschland.tv/ggcbremen/formationswochenende-latein-2023',
'info_dict': {
'id': '9889785e-55b0-4d97-a72a-ce9a9f157cce',
'title': 'Formationswochenende Latein 2023 - Samstag',
'display_id': 'ggcbremen/formationswochenende-latein-2023',
'description': 'md5:6e4060d40ff6a8f8eeb471b51a8f08b2',
'live_status': 'was_live',
'channel': 'Grün-Gold-Club Bremen e.V.',
'channel_id': '9888f04e-bb46-4c7f-be47-df960a4167bb',
'channel_url': 'https://sportdeutschland.tv/ggcbremen',
},
'playlist_count': 3,
'playlist': [{
'info_dict': {
'id': '988e1fea-9d44-4fab-8c72-3085fb667547',
'ext': 'mp4',
'channel_url': 'https://sportdeutschland.tv/ggcbremen',
'channel_id': '9888f04e-bb46-4c7f-be47-df960a4167bb',
'channel': 'Grün-Gold-Club Bremen e.V.',
'duration': 86,
'title': 'Formationswochenende Latein 2023 - Samstag Part 1',
'upload_date': '20230225',
'timestamp': 1677349909,
'live_status': 'was_live',
},
}],
'skip': 'not found',
}, {
'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1',
'info_dict': {
'id': '95d71b8a-370a-4b87-ad16-94680da18528',
'ext': 'mp4',
'title': r're:Gymnastik International - Tag 1 .+',
'display_id': 'dtb/gymnastik-international-tag-1',
'channel_id': '936ecef1-2f4a-4e08-be2f-68073cb7ecab',
'channel': 'Deutscher Turner-Bund',
'channel_url': 'https://sportdeutschland.tv/dtb',
'description': 'md5:07a885dde5838a6f0796ee21dc3b0c52',
'live_status': 'is_live',
},
'skip': 'live',
}, {
# Single-part video, direct link
'url': 'https://sportdeutschland.tv/rostock-griffins/gfl2-rostock-griffins-vs-elmshorn-fighting-pirates',
'md5': '35c11a19395c938cdd076b93bda54cde',
'info_dict': {
@ -100,7 +25,82 @@ class SportDeutschlandIE(InfoExtractor):
'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b',
'timestamp': 1749913117,
'upload_date': '20250614',
'duration': 12287.0,
},
}, {
# Single-part video, embedded player link
'url': 'https://player.sportdeutschland.tv/9e9619c4-7d77-43c4-926d-49fb57dc06dc',
'info_dict': {
'id': '9f27a97d-1544-4d0b-aa03-48d92d17a03a',
'ext': 'mp4',
'title': 'GFL2: Rostock Griffins vs. Elmshorn Fighting Pirates',
'display_id': '9e9619c4-7d77-43c4-926d-49fb57dc06dc',
'channel': 'Rostock Griffins',
'channel_url': 'https://sportdeutschland.tv/rostock-griffins',
'live_status': 'was_live',
'description': 'md5:60cb00067e55dafa27b0933a43d72862',
'channel_id': '9635f21c-3f67-4584-9ce4-796e9a47276b',
'timestamp': 1749913117,
'upload_date': '20250614',
'duration': 12287.0,
},
'params': {'skip_download': True},
}, {
# Multi-part video
'url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2',
'info_dict': {
'id': '9f63d737-2444-4e3a-a1ea-840df73fd481',
'display_id': 'rhine-ruhr-2025-fisu-world-university-games/volleyball-w-japan-vs-brasilien-halbfinale-2',
'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2',
'description': 'md5:0a17da15e48a687e6019639c3452572b',
'channel': 'Rhine-Ruhr 2025 FISU World University Games',
'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334',
'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games',
'live_status': 'was_live',
},
'playlist_count': 2,
'playlist': [{
'info_dict': {
'id': '9f725a94-d43e-40ff-859d-13da3081bb04',
'ext': 'mp4',
'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2 Part 1',
'channel': 'Rhine-Ruhr 2025 FISU World University Games',
'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334',
'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games',
'duration': 14773.0,
'timestamp': 1753085197,
'upload_date': '20250721',
'live_status': 'was_live',
},
}, {
'info_dict': {
'id': '9f725a94-370e-4477-89ac-1751098e3217',
'ext': 'mp4',
'title': 'Volleyball w: Japan vs. Braslien - Halbfinale 2 Part 2',
'channel': 'Rhine-Ruhr 2025 FISU World University Games',
'channel_id': '9f5216be-a49d-470b-9a30-4fe9df993334',
'channel_url': 'https://sportdeutschland.tv/rhine-ruhr-2025-fisu-world-university-games',
'duration': 14773.0,
'timestamp': 1753128421,
'upload_date': '20250721',
'live_status': 'was_live',
},
}],
}, {
# Livestream
'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1',
'info_dict': {
'id': '95d71b8a-370a-4b87-ad16-94680da18528',
'ext': 'mp4',
'title': r're:Gymnastik International - Tag 1 .+',
'display_id': 'dtb/gymnastik-international-tag-1',
'channel_id': '936ecef1-2f4a-4e08-be2f-68073cb7ecab',
'channel': 'Deutscher Turner-Bund',
'channel_url': 'https://sportdeutschland.tv/dtb',
'description': 'md5:07a885dde5838a6f0796ee21dc3b0c52',
'live_status': 'is_live',
},
'skip': 'live',
}]
def _process_video(self, asset_id, video):

View File

@ -99,10 +99,10 @@ def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}), impersonate=True)
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}))
data = self._search_json(
r'(?:var|const|let)\s+(?:dat|playerInfo)\s*=\s*["\']', webpage, 'player info', video_id,
contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];',
r'(?:var|const|let)\s+(?:dat|(?:player|video)Info|)\s*=\s*["\']', webpage, 'player info',
video_id, contains_pattern=r'[A-Za-z0-9+/=]+', end_pattern=r'["\'];',
transform_source=lambda x: base64.b64decode(x).decode())
# SproutVideo may send player info for 'SMPTE Color Monitor Test' [a791d7b71b12ecc52e]

View File

@ -33,16 +33,20 @@ def _extract_from_streaks_api(self, project_id, media_id, headers=None, query=No
**(headers or {}),
})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status in {403, 404}:
if isinstance(e.cause, HTTPError) and e.cause.status in (403, 404):
error = self._parse_json(e.cause.response.read().decode(), media_id, fatal=False)
message = traverse_obj(error, ('message', {str}))
code = traverse_obj(error, ('code', {str}))
error_id = traverse_obj(error, ('id', {int}))
if code == 'REQUEST_FAILED':
self.raise_geo_restricted(message, countries=self._GEO_COUNTRIES)
elif code == 'MEDIA_NOT_FOUND':
raise ExtractorError(message, expected=True)
elif code or message:
raise ExtractorError(join_nonempty(code, message, delim=': '))
if error_id == 124:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
elif error_id == 126:
raise ExtractorError('Access is denied (possibly due to invalid/missing API key)')
if code == 'MEDIA_NOT_FOUND':
raise ExtractorError(join_nonempty(code, message, delim=': '), expected=True)
if code or message:
raise ExtractorError(join_nonempty(code, error_id, message, delim=': '))
raise
streaks_id = response['id']

View File

@ -5,45 +5,110 @@
from ..utils import (
float_or_none,
int_or_none,
make_archive_id,
strip_or_none,
)
from ..utils.traversal import traverse_obj
class TBSIE(TurnerBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|watchtnt|watchtbs|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))'
_SITE_INFO = {
'tbs': ('TBS', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJkZTA0NTYxZS1iMTFhLTRlYTgtYTg5NC01NjI3MGM1NmM2MWIiLCJuYmYiOjE1MzcxODkzOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTg5MzkwfQ.Z7ny66kaqNDdCHf9Y9KsV12LrBxrLkGGxlYe2XGm6qsw2T-k1OCKC1TMzeqiZP735292MMRAQkcJDKrMIzNbAuf9nCdIcv4kE1E2nqUnjPMBduC1bHffZp8zlllyrN2ElDwM8Vhwv_5nElLRwWGEt0Kaq6KJAMZA__WDxKWC18T-wVtsOZWXQpDqO7nByhfj2t-Z8c3TUNVsA_wHgNXlkzJCZ16F2b7yGLT5ZhLPupOScd3MXC5iPh19HSVIok22h8_F_noTmGzmMnIRQi6bWYWK2zC7TQ_MsYHfv7V6EaG5m1RKZTV6JAwwoJQF_9ByzarLV1DGwZxD9-eQdqswvg'),
'tntdrama': ('TNT', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwOTMxYTU4OS1jZjEzLTRmNjMtYTJmYy03MzhjMjE1NWU5NjEiLCJuYmYiOjE1MzcxOTA4MjcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODI3fQ.AucKvtws7oekTXi80_zX4-BlgJD9GLvlOI9FlBCjdlx7Pa3eJ0AqbogynKMiatMbnLOTMHGjd7tTiq422unmZjBz70dhePAe9BbW0dIo7oQ57vZ-VBYw_tWYRPmON61MwAbLVlqROD3n_zURs85S8TlkQx9aNx9x_riGGELjd8l05CVa_pOluNhYvuIFn6wmrASOKI1hNEblBDWh468UWP571-fe4zzi0rlYeeHd-cjvtWvOB3bQsWrUVbK4pRmqvzEH59j0vNF-ihJF9HncmUicYONe47Mib3elfMok23v4dB1_UAlQY_oawfNcynmEnJQCcqFmbHdEwTW6gMiYsA'),
'trutv': ('truTV', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhYzQyOTkwMi0xMDYzLTQyNTQtYWJlYS1iZTY2ODM4MTVmZGIiLCJuYmYiOjE1MzcxOTA4NjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODY4fQ.ewXl5LDMDvvx3nDXV4jCdSwUq_sOluKoOVsIjznAo6Zo4zrGe9rjlZ9DOmQKW66g6VRMexJsJ5vM1EkY8TC5-YcQw_BclK1FPGO1rH3Wf7tX_l0b1BVbSJQKIj9UgqDp_QbGcBXz24kN4So3U22mhs6di9PYyyfG68ccKL2iRprcVKWCslIHwUF-T7FaEqb0K57auilxeW1PONG2m-lIAcZ62DUwqXDWvw0CRoWI08aVVqkkhnXaSsQfLs5Ph1Pfh9Oq3g_epUm9Ss45mq6XM7gbOb5omTcKLADRKK-PJVB_JXnZnlsXbG0ttKE1cTKJ738qu7j4aipYTf-W0nKF5Q'),
}
_VALID_URL = fr'''(?x)
https?://(?:www\.)?(?P<site>{"|".join(map(re.escape, _SITE_INFO))})\.com
(?P<path>/(?:
(?P<watch>watch(?:tnt|tbs|trutv))|
movies|shows/[^/?#]+/(?:clips|season-\d+/episode-\d+)
)/(?P<id>[^/?#]+))
'''
_TESTS = [{
'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
'url': 'https://www.tbs.com/shows/american-dad/season-6/episode-12/you-debt-your-life',
'info_dict': {
'id': '8d384cde33b89f3a43ce5329de42903ed5099887',
'id': '984bdcd8db0cc00dc699927f2a411c8c6e0e48f3',
'ext': 'mp4',
'title': 'Monster',
'description': 'Get a first look at the theatrical trailer for TNTs highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.',
'timestamp': 1508175329,
'upload_date': '20171016',
'title': 'You Debt Your Life',
'description': 'md5:f211cfeb9187fd3cdb53eb0e8930d499',
'duration': 1231.0,
'thumbnail': r're:https://images\.tbs\.com/tbs/.+\.(?:jpe?g|png)',
'chapters': 'count:4',
'season': 'Season 6',
'season_number': 6,
'episode': 'Episode 12',
'episode_number': 12,
'timestamp': 1478276239,
'upload_date': '20161104',
},
'params': {
# m3u8 download
'skip_download': True,
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.tntdrama.com/shows/the-librarians-the-next-chapter/season-1/episode-10/and-going-medieval',
'info_dict': {
'id': 'e487b31b663a8001864f62fd20907782f7b8ccb8',
'ext': 'mp4',
'title': 'And Going Medieval',
'description': 'md5:5aed0ae23a6cf148a02fe3c1be8359fa',
'duration': 2528.0,
'thumbnail': r're:https://images\.tntdrama\.com/tnt/.+\.(?:jpe?g|png)',
'chapters': 'count:7',
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 10',
'episode_number': 10,
'timestamp': 1743107520,
'upload_date': '20250327',
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.trutv.com/shows/the-carbonaro-effect/season-1/episode-1/got-the-bug-out',
'info_dict': {
'id': 'b457dd7458fd9e64b596355950b13a1ca799dc39',
'ext': 'mp4',
'title': 'Got the Bug Out',
'description': 'md5:9eeddf6248f73517b0e5969b8a43c025',
'duration': 1283.0,
'thumbnail': r're:https://images\.trutv\.com/tru/.+\.(?:jpe?g|png)',
'chapters': 'count:4',
'season': 'Season 1',
'season_number': 1,
'episode': 'Episode 1',
'episode_number': 1,
'timestamp': 1570040829,
'upload_date': '20191002',
'_old_archive_ids': ['trutv b457dd7458fd9e64b596355950b13a1ca799dc39'],
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
'only_matching': True,
}, {
'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew',
'only_matching': True,
}, {
'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope',
'only_matching': True,
}, {
'url': 'https://www.trutv.com/shows/impractical-jokers/season-9/episode-1/you-dirty-dog',
'only_matching': True,
}, {
'url': 'https://www.trutv.com/watchtrutv/east',
'only_matching': True,
}, {
'url': 'https://www.tbs.com/watchtbs/east',
'only_matching': True,
}, {
'url': 'https://www.tntdrama.com/watchtnt/east',
'only_matching': True,
}]
_SOFTWARE_STATEMENT_MAP = {
'tbs': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJkZTA0NTYxZS1iMTFhLTRlYTgtYTg5NC01NjI3MGM1NmM2MWIiLCJuYmYiOjE1MzcxODkzOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTg5MzkwfQ.Z7ny66kaqNDdCHf9Y9KsV12LrBxrLkGGxlYe2XGm6qsw2T-k1OCKC1TMzeqiZP735292MMRAQkcJDKrMIzNbAuf9nCdIcv4kE1E2nqUnjPMBduC1bHffZp8zlllyrN2ElDwM8Vhwv_5nElLRwWGEt0Kaq6KJAMZA__WDxKWC18T-wVtsOZWXQpDqO7nByhfj2t-Z8c3TUNVsA_wHgNXlkzJCZ16F2b7yGLT5ZhLPupOScd3MXC5iPh19HSVIok22h8_F_noTmGzmMnIRQi6bWYWK2zC7TQ_MsYHfv7V6EaG5m1RKZTV6JAwwoJQF_9ByzarLV1DGwZxD9-eQdqswvg',
'tntdrama': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwOTMxYTU4OS1jZjEzLTRmNjMtYTJmYy03MzhjMjE1NWU5NjEiLCJuYmYiOjE1MzcxOTA4MjcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODI3fQ.AucKvtws7oekTXi80_zX4-BlgJD9GLvlOI9FlBCjdlx7Pa3eJ0AqbogynKMiatMbnLOTMHGjd7tTiq422unmZjBz70dhePAe9BbW0dIo7oQ57vZ-VBYw_tWYRPmON61MwAbLVlqROD3n_zURs85S8TlkQx9aNx9x_riGGELjd8l05CVa_pOluNhYvuIFn6wmrASOKI1hNEblBDWh468UWP571-fe4zzi0rlYeeHd-cjvtWvOB3bQsWrUVbK4pRmqvzEH59j0vNF-ihJF9HncmUicYONe47Mib3elfMok23v4dB1_UAlQY_oawfNcynmEnJQCcqFmbHdEwTW6gMiYsA',
}
def _real_extract(self, url):
site, path, display_id = self._match_valid_url(url).groups()
site, path, display_id, watch = self._match_valid_url(url).group('site', 'path', 'id', 'watch')
is_live = bool(watch)
webpage = self._download_webpage(url, display_id)
drupal_settings = self._parse_json(self._search_regex(
r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
webpage, 'drupal setting'), display_id)
is_live = 'watchtnt' in path or 'watchtbs' in path
drupal_settings = self._search_json(
r'<script\b[^>]+\bdata-drupal-selector="drupal-settings-json"[^>]*>',
webpage, 'drupal settings', display_id)
video_data = next(v for v in drupal_settings['turner_playlist'] if is_live or v.get('url') == path)
media_id = video_data['mediaID']
@ -51,10 +116,14 @@ def _real_extract(self, url):
tokenizer_query = urllib.parse.parse_qs(urllib.parse.urlparse(
drupal_settings['ngtv_token_url']).query)
auth_info = traverse_obj(drupal_settings, ('top2', {dict})) or {}
site_name = auth_info.get('siteName') or self._SITE_INFO[site][0]
software_statement = auth_info.get('softwareStatement') or self._SITE_INFO[site][1]
info = self._extract_ngtv_info(
media_id, tokenizer_query, self._SOFTWARE_STATEMENT_MAP[site], {
media_id, tokenizer_query, software_statement, {
'url': url,
'site_name': site[:3].upper(),
'site_name': site_name,
'auth_required': video_data.get('authRequired') == '1' or is_live,
'is_live': is_live,
})
@ -87,4 +156,6 @@ def _real_extract(self, url):
'thumbnails': thumbnails,
'is_live': is_live,
})
if site == 'trutv':
info['_old_archive_ids'] = [make_archive_id(site, media_id)]
return info

View File

@ -63,17 +63,6 @@ def _parse_content(self, content, url):
'http_headers': headers,
}
def _download_akamai_webpage(self, url, display_id):
try: # yt-dlp's default user-agents are too old and blocked by akamai
return self._download_webpage(url, display_id, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
})
except ExtractorError as e:
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
raise
# Retry with impersonation if hardcoded UA is insufficient to bypass akamai
return self._download_webpage(url, display_id, impersonate=True)
class TelecincoIE(TelecincoBaseIE):
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
@ -151,7 +140,7 @@ class TelecincoIE(TelecincoBaseIE):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_akamai_webpage(url, display_id)
webpage = self._download_webpage(url, display_id)
article = self._search_json(
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
webpage, 'article', display_id)['article']

View File

@ -1,71 +0,0 @@
from .turner import TurnerBaseIE
from ..utils import (
int_or_none,
parse_iso8601,
)
class TruTVIE(TurnerBaseIE):
_VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P<series_slug>[0-9A-Za-z-]+)/(?:videos/(?P<clip_slug>[0-9A-Za-z-]+)|(?P<id>\d+))'
_TEST = {
'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html',
'info_dict': {
'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1',
'ext': 'mp4',
'title': 'Sunlight-Activated Flower',
'description': "A customer is stunned when he sees Michael's sunlight-activated flower.",
},
'params': {
# m3u8 download
'skip_download': True,
},
}
_SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhYzQyOTkwMi0xMDYzLTQyNTQtYWJlYS1iZTY2ODM4MTVmZGIiLCJuYmYiOjE1MzcxOTA4NjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODY4fQ.ewXl5LDMDvvx3nDXV4jCdSwUq_sOluKoOVsIjznAo6Zo4zrGe9rjlZ9DOmQKW66g6VRMexJsJ5vM1EkY8TC5-YcQw_BclK1FPGO1rH3Wf7tX_l0b1BVbSJQKIj9UgqDp_QbGcBXz24kN4So3U22mhs6di9PYyyfG68ccKL2iRprcVKWCslIHwUF-T7FaEqb0K57auilxeW1PONG2m-lIAcZ62DUwqXDWvw0CRoWI08aVVqkkhnXaSsQfLs5Ph1Pfh9Oq3g_epUm9Ss45mq6XM7gbOb5omTcKLADRKK-PJVB_JXnZnlsXbG0ttKE1cTKJ738qu7j4aipYTf-W0nKF5Q'
def _real_extract(self, url):
series_slug, clip_slug, video_id = self._match_valid_url(url).groups()
if video_id:
path = 'episode'
display_id = video_id
else:
path = 'series/clip'
display_id = clip_slug
data = self._download_json(
f'https://api.trutv.com/v2/web/{path}/{series_slug}/{display_id}',
display_id)
video_data = data['episode'] if video_id else data['info']
media_id = video_data['mediaId']
title = video_data['title'].strip()
info = self._extract_ngtv_info(
media_id, {}, self._SOFTWARE_STATEMENT, {
'url': url,
'site_name': 'truTV',
'auth_required': video_data.get('isAuthRequired'),
})
thumbnails = []
for image in video_data.get('images', []):
image_url = image.get('srcUrl')
if not image_url:
continue
thumbnails.append({
'url': image_url,
'width': int_or_none(image.get('width')),
'height': int_or_none(image.get('height')),
})
info.update({
'id': media_id,
'display_id': display_id,
'title': title,
'description': video_data.get('description'),
'thumbnails': thumbnails,
'timestamp': parse_iso8601(video_data.get('publicationDate')),
'series': video_data.get('showTitle'),
'season_number': int_or_none(video_data.get('seasonNum')),
'episode_number': int_or_none(video_data.get('episodeNum')),
})
return info

View File

@ -251,6 +251,11 @@ def _extract_ngtv_info(self, media_id, tokenizer_query, software_statement, ap_d
'end_time': start_time + chapter_duration,
})
if is_live:
for f in formats:
# Prevent ffmpeg from adding its own http headers or else we get HTTP Error 403
f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-icy', '0']}
return {
'formats': formats,
'chapters': chapters,

View File

@ -1,12 +1,16 @@
import datetime as dt
from .streaks import StreaksBaseIE
from ..utils import (
ExtractorError,
GeoRestrictedError,
int_or_none,
join_nonempty,
make_archive_id,
smuggle_url,
str_or_none,
strip_or_none,
time_seconds,
update_url_query,
)
from ..utils.traversal import require, traverse_obj
@ -96,6 +100,7 @@ class TVerIE(StreaksBaseIE):
'Referer': 'https://tver.jp/',
}
_PLATFORM_QUERY = {}
_STREAKS_API_INFO = {}
def _real_initialize(self):
session_info = self._download_json(
@ -105,6 +110,9 @@ def _real_initialize(self):
'platform_uid': 'platform_uid',
'platform_token': 'platform_token',
}))
self._STREAKS_API_INFO = self._download_json(
'https://player.tver.jp/player/streaks_info_v2.json', None,
'Downloading STREAKS API info', 'Unable to download STREAKS API info')
def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None):
return self._download_json(
@ -219,15 +227,26 @@ def _real_extract(self, url):
'_type': 'url_transparent',
'url': smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, brightcove_id),
{'geo_countries': ['JP']}),
{'geo_countries': self._GEO_COUNTRIES}),
'ie_key': 'BrightcoveNew',
}
return {
**self._extract_from_streaks_api(video_info['streaks']['projectID'], streaks_id, {
project_id = video_info['streaks']['projectID']
key_idx = dt.datetime.fromtimestamp(time_seconds(hours=9), dt.timezone.utc).month % 6 or 6
try:
streaks_info = self._extract_from_streaks_api(project_id, streaks_id, {
'Origin': 'https://tver.jp',
'Referer': 'https://tver.jp/',
}),
'X-Streaks-Api-Key': self._STREAKS_API_INFO[project_id]['api_key'][f'key0{key_idx}'],
})
except GeoRestrictedError as e:
# Catch and re-raise with metadata_available to support --ignore-no-formats-error
self.raise_geo_restricted(e.orig_msg, countries=self._GEO_COUNTRIES, metadata_available=True)
streaks_info = {}
return {
**streaks_info,
**metadata,
'id': video_id,
'_old_archive_ids': [make_archive_id('BrightcoveNew', brightcove_id)] if brightcove_id else None,

View File

@ -10,12 +10,15 @@
unified_timestamp,
url_or_none,
)
from ..utils.traversal import find_element, traverse_obj
from ..utils.traversal import find_element, find_elements, traverse_obj
class TvwIE(InfoExtractor):
IE_NAME = 'tvw'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)'
_VALID_URL = [
r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)',
r'https?://(?:www\.)?tvw\.org/watch/?\?(?:[^#]+&)?eventID=(?P<id>\d+)',
]
_TESTS = [{
'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/',
'md5': '9ceb94fe2bb7fd726f74f16356825703',
@ -75,6 +78,20 @@ class TvwIE(InfoExtractor):
'display_id': 'washington-to-washington-a-new-space-race-2022041111',
'categories': ['Washington to Washington', 'General Interest'],
},
}, {
'url': 'https://tvw.org/watch?eventID=2025041235',
'md5': '7d697c02f110b37d6a47622ea608ca90',
'info_dict': {
'id': '2025041235',
'ext': 'mp4',
'title': 'Legislative Review - Medicaid Postpartum Bill Sparks Debate & Senate Approves Automatic Voter Registration',
'thumbnail': r're:^https?://.*\.(?:jpe?g|png)$',
'description': 'md5:37d0f3a9187ae520aac261b3959eaee6',
'timestamp': 1745006400,
'upload_date': '20250418',
'location': 'Hayner Media Center',
'categories': ['Legislative Review'],
},
}]
def _real_extract(self, url):
@ -125,6 +142,41 @@ def _real_extract(self, url):
}
class TvwNewsIE(InfoExtractor):
IE_NAME = 'tvw:news'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/\d{4}/\d{2}/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://tvw.org/2024/01/the-impact-issues-to-watch-in-the-2024-legislative-session/',
'info_dict': {
'id': 'the-impact-issues-to-watch-in-the-2024-legislative-session',
'title': 'The Impact - Issues to Watch in the 2024 Legislative Session',
'description': 'md5:65f0b33ec8f18ff1cd401c5547aa5441',
},
'playlist_count': 6,
}, {
'url': 'https://tvw.org/2024/06/the-impact-water-rights-and-the-skookumchuck-dam-debate/',
'info_dict': {
'id': 'the-impact-water-rights-and-the-skookumchuck-dam-debate',
'title': 'The Impact - Water Rights and the Skookumchuck Dam Debate',
'description': 'md5:185f3a2350ef81e3fa159ac3e040a94b',
},
'playlist_count': 1,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
video_ids = traverse_obj(webpage, (
{find_elements(cls='invintus-player', html=True)}, ..., {extract_attributes}, 'data-eventid'))
return self.playlist_from_matches(
video_ids, playlist_id,
playlist_title=remove_end(self._og_search_title(webpage, default=None), ' - TVW'),
playlist_description=self._og_search_description(webpage, default=None),
getter=lambda x: f'https://tvw.org/watch?eventID={x}', ie=TvwIE)
class TvwTvChannelsIE(InfoExtractor):
IE_NAME = 'tvw:tvchannels'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/tvchannels/(?P<id>[^/?#]+)'

View File

@ -49,7 +49,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'Cannot download embed-only video without embedding URL. Please call yt-dlp '
'with the URL of the page that embeds this video.')
_DEFAULT_CLIENT = 'android'
_DEFAULT_CLIENT = 'web'
_DEFAULT_AUTHED_CLIENT = 'web'
_CLIENT_HEADERS = {
'Accept': 'application/vnd.vimeo.*+json; version=3.4.10',
@ -58,7 +58,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
_CLIENT_CONFIGS = {
'android': {
'CACHE_KEY': 'oauth-token-android',
'CACHE_ONLY': False,
'CACHE_ONLY': True,
'VIEWER_JWT': False,
'REQUIRES_AUTH': False,
'AUTH': 'NzRmYTg5YjgxMWExY2JiNzUwZDg1MjhkMTYzZjQ4YWYyOGEyZGJlMTp4OGx2NFd3QnNvY1lkamI2UVZsdjdDYlNwSDUrdm50YzdNNThvWDcwN1JrenJGZC9tR1lReUNlRjRSVklZeWhYZVpRS0tBcU9YYzRoTGY2Z1dlVkJFYkdJc0dMRHpoZWFZbU0reDRqZ1dkZ1diZmdIdGUrNUM5RVBySlM0VG1qcw==',
@ -88,6 +88,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
),
},
'web': {
'CACHE_ONLY': False,
'VIEWER_JWT': True,
'REQUIRES_AUTH': True,
'USER_AGENT': None,
@ -142,7 +143,6 @@ def _perform_login(self, username, password):
'service': 'vimeo',
'token': viewer['xsrft'],
}
self._set_vimeo_cookie('vuid', viewer['vuid'])
try:
self._download_webpage(
self._LOGIN_URL, None, 'Logging in',
@ -151,16 +151,40 @@ def _perform_login(self, username, password):
'Referer': self._LOGIN_URL,
})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 418:
if isinstance(e.cause, HTTPError) and e.cause.status in (405, 418):
raise ExtractorError(
'Unable to log in: bad username or password',
expected=True)
raise ExtractorError('Unable to log in')
# Clear unauthenticated viewer info
self._viewer_info = None
def _real_initialize(self):
if self._LOGIN_REQUIRED and not self._is_logged_in:
if self._is_logged_in:
return
if self._LOGIN_REQUIRED:
self.raise_login_required()
if self._DEFAULT_CLIENT != 'web':
return
for client_name, client_config in self._CLIENT_CONFIGS.items():
if not client_config['CACHE_ONLY']:
continue
cache_key = client_config['CACHE_KEY']
if cache_key not in self._oauth_tokens:
if token := self.cache.load(self._NETRC_MACHINE, cache_key):
self._oauth_tokens[cache_key] = token
if self._oauth_tokens.get(cache_key):
self._DEFAULT_CLIENT = client_name
self.write_debug(
f'Found cached {client_name} token; using {client_name} as default API client')
return
def _get_video_password(self):
password = self.get_param('videopassword')
if password is None:
@ -200,9 +224,6 @@ def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
if vimeo_config:
return self._parse_json(vimeo_config, video_id)
def _set_vimeo_cookie(self, name, value):
self._set_cookie('vimeo.com', name, value)
def _parse_config(self, config, video_id):
video_data = config['video']
video_title = video_data.get('title')
@ -363,22 +384,26 @@ def _fetch_oauth_token(self, client):
return f'Bearer {self._oauth_tokens[cache_key]}'
def _get_requested_client(self):
default_client = self._DEFAULT_AUTHED_CLIENT if self._is_logged_in else self._DEFAULT_CLIENT
if client := self._configuration_arg('client', [None], ie_key=VimeoIE)[0]:
if client not in self._CLIENT_CONFIGS:
raise ExtractorError(
f'Unsupported API client "{client}" requested. '
f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True)
self.write_debug(
f'Using {client} API client as specified by extractor argument', only_once=True)
return client
client = self._configuration_arg('client', [default_client], ie_key=VimeoIE)[0]
if client not in self._CLIENT_CONFIGS:
raise ExtractorError(
f'Unsupported API client "{client}" requested. '
f'Supported clients are: {", ".join(self._CLIENT_CONFIGS)}', expected=True)
if self._is_logged_in:
return self._DEFAULT_AUTHED_CLIENT
return client
return self._DEFAULT_CLIENT
def _call_videos_api(self, video_id, unlisted_hash=None, path=None, *, force_client=None, query=None, **kwargs):
client = force_client or self._get_requested_client()
client_config = self._CLIENT_CONFIGS[client]
if client_config['REQUIRES_AUTH'] and not self._is_logged_in:
self.raise_login_required(f'The {client} client requires authentication')
self.raise_login_required(f'The {client} client only works when logged-in')
return self._download_json(
join_nonempty(
@ -1192,7 +1217,6 @@ def _try_album_password(self, url):
raise ExtractorError(
'This album is protected by a password, use the --video-password option',
expected=True)
self._set_vimeo_cookie('vuid', viewer['vuid'])
try:
self._download_json(
f'https://vimeo.com/showcase/{album_id}/auth',
@ -1589,7 +1613,6 @@ def _real_extract(self, url):
raise ExtractorError(
'This album is protected by a password, use the --video-password option',
expected=True)
self._set_vimeo_cookie('vuid', viewer['vuid'])
try:
hashed_pass = self._download_json(
f'https://vimeo.com/showcase/{album_id}/auth',

View File

@ -1,3 +1,4 @@
import os
from collections import defaultdict
# Please Note: Due to necessary changes and the complex nature involved in the plugin/globals system,
@ -28,3 +29,4 @@ def __repr__(self, /):
# Misc
IN_CLI = Indirect(False)
LAZY_EXTRACTORS = Indirect(None) # `False`=force, `None`=disabled, `True`=enabled
WINDOWS_VT_MODE = Indirect(False if os.name == 'nt' else None)

View File

@ -90,7 +90,7 @@ def run(self, info):
if info['ext'] == 'mp3':
options = [
'-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3',
'-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)']
'-metadata:s:v', 'title=Album cover', '-metadata:s:v', 'comment=Cover (front)']
self._report_run('ffmpeg', filename)
self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)

View File

@ -33,8 +33,17 @@ class XAttrMetadataPP(PostProcessor):
# (e.g., 4kB on ext4), and we don't want to have the other ones fail
'user.dublincore.description': 'description',
# 'user.xdg.comment': 'description',
'com.apple.metadata:kMDItemWhereFroms': 'webpage_url',
}
APPLE_PLIST_TEMPLATE = '''<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<array>
\t<string>%s</string>
</array>
</plist>'''
def run(self, info):
mtime = os.stat(info['filepath']).st_mtime
self.to_screen('Writing metadata to file\'s xattrs')
@ -44,6 +53,8 @@ def run(self, info):
if value:
if infoname == 'upload_date':
value = hyphenate_date(value)
elif xattrname == 'com.apple.metadata:kMDItemWhereFroms':
value = self.APPLE_PLIST_TEMPLATE % value
write_xattr(info['filepath'], xattrname, value.encode())
except XAttrUnavailableError as e:

View File

@ -139,7 +139,18 @@ def _get_binary_name():
def _get_system_deprecation():
MIN_SUPPORTED, MIN_RECOMMENDED = (3, 9), (3, 9)
MIN_SUPPORTED, MIN_RECOMMENDED = (3, 9), (3, 10)
EXE_MSG_TMPL = ('Support for {} has been deprecated. '
'See https://github.com/yt-dlp/yt-dlp/{} for details.\n{}')
STOP_MSG = 'You may stop receiving updates on this version at any time!'
variant = detect_variant()
# Temporary until macos_legacy executable builds are discontinued
if variant == 'darwin_legacy_exe':
return EXE_MSG_TMPL.format(
f'{variant} (the PyInstaller-bundled executable for macOS versions older than 10.15)',
'issues/13856', STOP_MSG)
if sys.version_info > MIN_RECOMMENDED:
return None
@ -150,6 +161,13 @@ def _get_system_deprecation():
if sys.version_info < MIN_SUPPORTED:
return f'Python version {major}.{minor} is no longer supported! {PYTHON_MSG}'
# Temporary until aarch64/armv7l build flow is bumped to Ubuntu 22.04 and Python 3.10
if variant in ('linux_aarch64_exe', 'linux_armv7l_exe'):
libc_ver = version_tuple(os.confstr('CS_GNU_LIBC_VERSION').partition(' ')[2])
if libc_ver < (2, 35):
return EXE_MSG_TMPL.format('system glibc version < 2.35', 'issues/13858', STOP_MSG)
return None
return f'Support for Python version {major}.{minor} has been deprecated. {PYTHON_MSG}'

View File

@ -52,7 +52,7 @@
compat_HTMLParseError,
)
from ..dependencies import xattr
from ..globals import IN_CLI
from ..globals import IN_CLI, WINDOWS_VT_MODE
__name__ = __name__.rsplit('.', 1)[0] # noqa: A001 # Pretend to be the parent module
@ -1875,6 +1875,11 @@ def parse_resolution(s, *, lenient=False):
if mobj:
return {'height': int(mobj.group(1)) * 540}
if lenient:
mobj = re.search(r'(?<!\d)(\d{2,5})w(?![a-zA-Z0-9])', s)
if mobj:
return {'width': int(mobj.group(1))}
return {}
@ -4759,13 +4764,10 @@ def jwt_decode_hs256(jwt):
return json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
WINDOWS_VT_MODE = False if os.name == 'nt' else None
@functools.cache
def supports_terminal_sequences(stream):
if os.name == 'nt':
if not WINDOWS_VT_MODE:
if not WINDOWS_VT_MODE.value:
return False
elif not os.getenv('TERM'):
return False
@ -4802,8 +4804,7 @@ def windows_enable_vt_mode():
finally:
os.close(handle)
global WINDOWS_VT_MODE
WINDOWS_VT_MODE = True
WINDOWS_VT_MODE.value = True
supports_terminal_sequences.cache_clear()

View File

@ -15,48 +15,10 @@
def random_user_agent():
_USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
_CHROME_VERSIONS = (
'90.0.4430.212',
'90.0.4430.24',
'90.0.4430.70',
'90.0.4430.72',
'90.0.4430.85',
'90.0.4430.93',
'91.0.4472.101',
'91.0.4472.106',
'91.0.4472.114',
'91.0.4472.124',
'91.0.4472.164',
'91.0.4472.19',
'91.0.4472.77',
'92.0.4515.107',
'92.0.4515.115',
'92.0.4515.131',
'92.0.4515.159',
'92.0.4515.43',
'93.0.4556.0',
'93.0.4577.15',
'93.0.4577.63',
'93.0.4577.82',
'94.0.4606.41',
'94.0.4606.54',
'94.0.4606.61',
'94.0.4606.71',
'94.0.4606.81',
'94.0.4606.85',
'95.0.4638.17',
'95.0.4638.50',
'95.0.4638.54',
'95.0.4638.69',
'95.0.4638.74',
'96.0.4664.18',
'96.0.4664.45',
'96.0.4664.55',
'96.0.4664.93',
'97.0.4692.20',
)
return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
USER_AGENT_TMPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{} Safari/537.36'
# Target versions released within the last ~6 months
CHROME_MAJOR_VERSION_RANGE = (132, 138)
return USER_AGENT_TMPL.format(f'{random.randint(*CHROME_MAJOR_VERSION_RANGE)}.0.0.0')
class HTTPHeaderDict(dict):