1
0
mirror of https://github.com/yt-dlp/yt-dlp.git synced 2025-06-28 09:28:33 +00:00

Merge remote-tracking branch 'origin/master' into tvw-news

This commit is contained in:
Fries 2025-04-18 20:39:48 -07:00
commit 3d06a713d4
18 changed files with 664 additions and 475 deletions

View File

@ -1770,7 +1770,7 @@ #### youtube
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios` * `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`

View File

@ -136,7 +136,7 @@ def _iter_differences(got, expected, field):
return return
if op == 'startswith': if op == 'startswith':
if not val.startswith(got): if not got.startswith(val):
yield field, f'should start with {val!r}, got {got!r}' yield field, f'should start with {val!r}, got {got!r}'
return return

View File

@ -39,6 +39,7 @@
from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3 from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
from yt_dlp.networking import ( from yt_dlp.networking import (
HEADRequest, HEADRequest,
PATCHRequest,
PUTRequest, PUTRequest,
Request, Request,
RequestDirector, RequestDirector,
@ -1856,6 +1857,7 @@ def test_method(self):
def test_request_helpers(self): def test_request_helpers(self):
assert HEADRequest('http://example.com').method == 'HEAD' assert HEADRequest('http://example.com').method == 'HEAD'
assert PATCHRequest('http://example.com').method == 'PATCH'
assert PUTRequest('http://example.com').method == 'PUT' assert PUTRequest('http://example.com').method == 'PUT'
def test_headers(self): def test_headers(self):

View File

@ -1783,7 +1783,6 @@
from .rtve import ( from .rtve import (
RTVEALaCartaIE, RTVEALaCartaIE,
RTVEAudioIE, RTVEAudioIE,
RTVEInfantilIE,
RTVELiveIE, RTVELiveIE,
RTVETelevisionIE, RTVETelevisionIE,
) )
@ -2237,7 +2236,11 @@
TVPlayIE, TVPlayIE,
) )
from .tvplayer import TVPlayerIE from .tvplayer import TVPlayerIE
from .tvw import TvwIE, TvwNewsIE from .tvw import (
TvwIE,
TvwNewsIE,
TvwTvChannelsIE,
)
from .tweakers import TweakersIE from .tweakers import TweakersIE
from .twentymin import TwentyMinutenIE from .twentymin import TwentyMinutenIE
from .twentythreevideo import TwentyThreeVideoIE from .twentythreevideo import TwentyThreeVideoIE

View File

@ -21,6 +21,7 @@
int_or_none, int_or_none,
time_seconds, time_seconds,
traverse_obj, traverse_obj,
update_url,
update_url_query, update_url_query,
) )
@ -417,6 +418,10 @@ def _real_extract(self, url):
'is_live': is_live, 'is_live': is_live,
'availability': availability, 'availability': availability,
}) })
if thumbnail := update_url(self._og_search_thumbnail(webpage, default=''), query=None):
info['thumbnails'] = [{'url': thumbnail}]
return info return info

View File

@ -1,64 +1,105 @@
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_age_limit,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
from ..utils.traversal import traverse_obj
class AtresPlayerIE(InfoExtractor): class AtresPlayerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})' _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/(?:[^/?#]+/){4}(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
_NETRC_MACHINE = 'atresplayer' _NETRC_MACHINE = 'atresplayer'
_TESTS = [ _TESTS = [{
{ 'url': 'https://www.atresplayer.com/lasexta/programas/el-objetivo/clips/mbappe-describe-como-entrenador-a-carlo-ancelotti-sabe-cuando-tiene-que-ser-padre-jefe-amigo-entrenador_67f2dfb2fb6ab0e4c7203849/',
'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/', 'info_dict': {
'info_dict': { 'ext': 'mp4',
'id': '5d4aa2c57ed1a88fc715a615', 'id': '67f2dfb2fb6ab0e4c7203849',
'ext': 'mp4', 'display_id': 'md5:c203f8d4e425ed115ba56a1c6e4b3e6c',
'title': 'Capítulo 7: Asuntos pendientes', 'title': 'Mbappé describe como entrenador a Carlo Ancelotti: "Sabe cuándo tiene que ser padre, jefe, amigo, entrenador..."',
'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc', 'channel': 'laSexta',
'duration': 3413, 'duration': 31,
}, 'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/06/B02DBE1E-D59B-4683-8404-1A9595D15269/1920x1080.jpg',
'skip': 'This video is only available for registered users', 'tags': ['Entrevista informativa', 'Actualidad', 'Debate informativo', 'Política', 'Economía', 'Sociedad', 'Cara a cara', 'Análisis', 'Más periodismo'],
'series': 'El Objetivo',
'season': 'Temporada 12',
'timestamp': 1743970079,
'upload_date': '20250406',
}, },
{ }, {
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/', 'url': 'https://www.atresplayer.com/antena3/programas/el-hormiguero/clips/revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero_67f836baa4a5b0e4147ca59a/',
'only_matching': True, 'info_dict': {
'ext': 'mp4',
'id': '67f836baa4a5b0e4147ca59a',
'display_id': 'revive-la-entrevista-completa-a-miguel-bose-en-el-hormiguero',
'title': 'Revive la entrevista completa a Miguel Bosé en El Hormiguero',
'description': 'md5:c6d2b591408d45a7bc2986dfb938eb72',
'channel': 'Antena 3',
'duration': 2556,
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages02/2025/04/10/9076395F-F1FD-48BE-9F18-540DBA10EBAD/1920x1080.jpg',
'tags': ['Entrevista', 'Variedades', 'Humor', 'Entretenimiento', 'Te sigo', 'Buen rollo', 'Cara a cara'],
'series': 'El Hormiguero ',
'season': 'Temporada 14',
'timestamp': 1744320111,
'upload_date': '20250410',
}, },
{ }, {
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/', 'url': 'https://www.atresplayer.com/flooxer/series/biara-proyecto-lazarus/temporada-1/capitulo-3-supervivientes_67a6038b64ceca00070f4f69/',
'only_matching': True, 'info_dict': {
'ext': 'mp4',
'id': '67a6038b64ceca00070f4f69',
'display_id': 'capitulo-3-supervivientes',
'title': 'Capítulo 3: Supervivientes',
'description': 'md5:65b231f20302f776c2b0dd24594599a1',
'channel': 'Flooxer',
'duration': 1196,
'thumbnail': 'https://imagenes.atresplayer.com/atp/clipping/cmsimages01/2025/02/14/17CF90D3-FE67-40C5-A941-7825B3E13992/1920x1080.jpg',
'tags': ['Juvenil', 'Terror', 'Piel de gallina', 'Te sigo', 'Un break', 'Del tirón'],
'series': 'BIARA: Proyecto Lázarus',
'season': 'Temporada 1',
'season_number': 1,
'episode': 'Episode 3',
'episode_number': 3,
'timestamp': 1743095191,
'upload_date': '20250327',
}, },
] }, {
'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
'only_matching': True,
}, {
'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
'only_matching': True,
}]
_API_BASE = 'https://api.atresplayer.com/' _API_BASE = 'https://api.atresplayer.com/'
def _perform_login(self, username, password): def _perform_login(self, username, password):
self._request_webpage(
self._API_BASE + 'login', None, 'Downloading login page')
try: try:
target_url = self._download_json( self._download_webpage(
'https://account.atresmedia.com/api/login', None, 'https://account.atresplayer.com/auth/v1/login', None,
'Logging in', headers={ 'Logging in', 'Failed to log in', data=urlencode_postdata({
'Content-Type': 'application/x-www-form-urlencoded',
}, data=urlencode_postdata({
'username': username, 'username': username,
'password': password, 'password': password,
}))['targetUrl'] }))
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 400: if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise ExtractorError('Invalid username and/or password', expected=True) raise ExtractorError('Invalid username and/or password', expected=True)
raise raise
self._request_webpage(target_url, None, 'Following Target URL')
def _real_extract(self, url): def _real_extract(self, url):
display_id, video_id = self._match_valid_url(url).groups() display_id, video_id = self._match_valid_url(url).groups()
metadata_url = self._download_json(
self._API_BASE + 'client/v1/url', video_id, 'Downloading API endpoint data',
query={'href': urllib.parse.urlparse(url).path})['href']
metadata = self._download_json(metadata_url, video_id)
try: try:
episode = self._download_json( video_data = self._download_json(metadata['urlVideo'], video_id, 'Downloading video data')
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 403: if isinstance(e.cause, HTTPError) and e.cause.status == 403:
error = self._parse_json(e.cause.response.read(), None) error = self._parse_json(e.cause.response.read(), None)
@ -67,37 +108,45 @@ def _real_extract(self, url):
raise ExtractorError(error['error_description'], expected=True) raise ExtractorError(error['error_description'], expected=True)
raise raise
title = episode['titulo']
formats = [] formats = []
subtitles = {} subtitles = {}
for source in episode.get('sources', []): for source in traverse_obj(video_data, ('sources', lambda _, v: url_or_none(v['src']))):
src = source.get('src') src_url = source['src']
if not src:
continue
src_type = source.get('type') src_type = source.get('type')
if src_type == 'application/vnd.apple.mpegurl': if src_type in ('application/vnd.apple.mpegurl', 'application/hls+legacy', 'application/hls+hevc'):
formats, subtitles = self._extract_m3u8_formats( fmts, subs = self._extract_m3u8_formats_and_subtitles(
src, video_id, 'mp4', 'm3u8_native', src_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
m3u8_id='hls', fatal=False) elif src_type in ('application/dash+xml', 'application/dash+hevc'):
elif src_type == 'application/dash+xml': fmts, subs = self._extract_mpd_formats_and_subtitles(
formats, subtitles = self._extract_mpd_formats( src_url, video_id, mpd_id='dash', fatal=False)
src, video_id, mpd_id='dash', fatal=False) else:
continue
heartbeat = episode.get('heartbeat') or {} formats.extend(fmts)
omniture = episode.get('omniture') or {} self._merge_subtitles(subs, target=subtitles)
get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
return { return {
'display_id': display_id, 'display_id': display_id,
'id': video_id, 'id': video_id,
'title': title,
'description': episode.get('descripcion'),
'thumbnail': episode.get('imgPoster'),
'duration': int_or_none(episode.get('duration')),
'formats': formats, 'formats': formats,
'channel': get_meta('channel'),
'season': get_meta('season'),
'episode_number': int_or_none(get_meta('episodeNumber')),
'subtitles': subtitles, 'subtitles': subtitles,
**traverse_obj(video_data, {
'title': ('titulo', {str}),
'description': ('descripcion', {str}),
'duration': ('duration', {int_or_none}),
'thumbnail': ('imgPoster', {url_or_none}, {lambda v: f'{v}1920x1080.jpg'}),
'age_limit': ('ageRating', {parse_age_limit}),
}),
**traverse_obj(metadata, {
'title': ('title', {str}),
'description': ('description', {str}),
'duration': ('duration', {int_or_none}),
'tags': ('tags', ..., 'title', {str}),
'age_limit': ('ageRating', {parse_age_limit}),
'series': ('format', 'title', {str}),
'season': ('currentSeason', 'title', {str}),
'season_number': ('currentSeason', 'seasonNumber', {int_or_none}),
'episode_number': ('numberOfEpisode', {int_or_none}),
'timestamp': ('publicationDate', {int_or_none(scale=1000)}),
'channel': ('channel', 'title', {str}),
}),
} }

View File

@ -353,7 +353,7 @@ def extract_format(page, version):
class CDAFolderIE(InfoExtractor): class CDAFolderIE(InfoExtractor):
_MAX_PAGE_SIZE = 36 _MAX_PAGE_SIZE = 36
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>\w+)/folder/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>[\w-]+)/folder/(?P<id>\d+)'
_TESTS = [ _TESTS = [
{ {
'url': 'https://www.cda.pl/domino264/folder/31188385', 'url': 'https://www.cda.pl/domino264/folder/31188385',
@ -378,6 +378,9 @@ class CDAFolderIE(InfoExtractor):
'title': 'TESTY KOSMETYKÓW', 'title': 'TESTY KOSMETYKÓW',
}, },
'playlist_mincount': 139, 'playlist_mincount': 139,
}, {
'url': 'https://www.cda.pl/FILMY-SERIALE-ANIME-KRESKOWKI-BAJKI/folder/18493422',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -82,7 +82,10 @@ def _get_video_id(self, video_data, course_slug, video_slug):
class LinkedInIE(LinkedInBaseIE): class LinkedInIE(LinkedInBaseIE):
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)' _VALID_URL = [
r'https?://(?:www\.)?linkedin\.com/posts/[^/?#]+-(?P<id>\d+)-\w{4}/?(?:[?#]|$)',
r'https?://(?:www\.)?linkedin\.com/feed/update/urn:li:activity:(?P<id>\d+)',
]
_TESTS = [{ _TESTS = [{
'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20', 'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20',
'info_dict': { 'info_dict': {
@ -106,6 +109,9 @@ class LinkedInIE(LinkedInBaseIE):
'like_count': int, 'like_count': int,
'subtitles': 'mincount:1', 'subtitles': 'mincount:1',
}, },
}, {
'url': 'https://www.linkedin.com/feed/update/urn:li:activity:7016901149999955968/?utm_source=share&utm_medium=member_desktop',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,5 +1,9 @@
import json
import random
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none, url_or_none from ..utils import int_or_none, jwt_decode_hs256, try_call, url_or_none
from ..utils.traversal import require, traverse_obj from ..utils.traversal import require, traverse_obj
@ -55,13 +59,81 @@ class LocoIE(InfoExtractor):
'upload_date': '20250226', 'upload_date': '20250226',
'modified_date': '20250226', 'modified_date': '20250226',
}, },
}, {
# Requires video authorization
'url': 'https://loco.com/stream/ac854641-ae0f-497c-a8ea-4195f6d8cc53',
'md5': '0513edf85c1e65c9521f555f665387d5',
'info_dict': {
'id': 'ac854641-ae0f-497c-a8ea-4195f6d8cc53',
'ext': 'mp4',
'title': 'DUAS CONTAS DESAFIANTE, RUSH TOP 1 NO BRASIL!',
'description': 'md5:aa77818edd6fe00dd4b6be75cba5f826',
'uploader_id': '7Y9JNAZC3Q',
'channel': 'ayellol',
'channel_follower_count': int,
'comment_count': int,
'view_count': int,
'concurrent_view_count': int,
'like_count': int,
'duration': 1229,
'thumbnail': 'https://static.ivory.getloconow.com/default_thumb/f5aa678b-6d04-45d9-a89a-859af0a8028f.jpg',
'tags': ['Gameplay', 'Carry'],
'series': 'League of Legends',
'timestamp': 1741182253,
'upload_date': '20250305',
'modified_timestamp': 1741182419,
'modified_date': '20250305',
},
}] }]
# From _app.js
_CLIENT_ID = 'TlwKp1zmF6eKFpcisn3FyR18WkhcPkZtzwPVEEC3'
_CLIENT_SECRET = 'Kp7tYlUN7LXvtcSpwYvIitgYcLparbtsQSe5AdyyCdiEJBP53Vt9J8eB4AsLdChIpcO2BM19RA3HsGtqDJFjWmwoonvMSG3ZQmnS8x1YIM8yl82xMXZGbE3NKiqmgBVU'
def _is_jwt_expired(self, token):
return jwt_decode_hs256(token)['exp'] - time.time() < 300
def _get_access_token(self, video_id):
access_token = try_call(lambda: self._get_cookies('https://loco.com')['access_token'].value)
if access_token and not self._is_jwt_expired(access_token):
return access_token
access_token = traverse_obj(self._download_json(
'https://api.getloconow.com/v3/user/device_profile/', video_id,
'Downloading access token', fatal=False, data=json.dumps({
'platform': 7,
'client_id': self._CLIENT_ID,
'client_secret': self._CLIENT_SECRET,
'model': 'Mozilla',
'os_name': 'Win32',
'os_ver': '5.0 (Windows)',
'app_ver': '5.0 (Windows)',
}).encode(), headers={
'Content-Type': 'application/json;charset=utf-8',
'DEVICE-ID': ''.join(random.choices('0123456789abcdef', k=32)) + 'live',
'X-APP-LANG': 'en',
'X-APP-LOCALE': 'en-US',
'X-CLIENT-ID': self._CLIENT_ID,
'X-CLIENT-SECRET': self._CLIENT_SECRET,
'X-PLATFORM': '7',
}), 'access_token')
if access_token and not self._is_jwt_expired(access_token):
self._set_cookie('.loco.com', 'access_token', access_token)
return access_token
def _real_extract(self, url): def _real_extract(self, url):
video_type, video_id = self._match_valid_url(url).group('type', 'id') video_type, video_id = self._match_valid_url(url).group('type', 'id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
stream = traverse_obj(self._search_nextjs_data(webpage, video_id), ( stream = traverse_obj(self._search_nextjs_data(webpage, video_id), (
'props', 'pageProps', ('liveStreamData', 'stream'), {dict}, any, {require('stream info')})) 'props', 'pageProps', ('liveStreamData', 'stream', 'liveStream'), {dict}, any, {require('stream info')}))
if access_token := self._get_access_token(video_id):
self._request_webpage(
'https://drm.loco.com/v1/streams/playback/', video_id,
'Downloading video authorization', fatal=False, headers={
'authorization': access_token,
}, query={
'stream_uid': stream['uid'],
})
return { return {
'formats': self._extract_m3u8_formats(stream['conf']['hls'], video_id), 'formats': self._extract_m3u8_formats(stream['conf']['hls'], video_id),

View File

@ -1,31 +1,38 @@
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html,
determine_ext, determine_ext,
extract_attributes,
int_or_none, int_or_none,
str_to_int, join_nonempty,
parse_count,
parse_duration,
parse_iso8601,
url_or_none, url_or_none,
urlencode_postdata,
) )
from ..utils.traversal import traverse_obj
class ManyVidsIE(InfoExtractor): class ManyVidsIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)' _VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# preview video # preview video
'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/', 'url': 'https://www.manyvids.com/Video/530341/mv-tips-tricks',
'md5': '03f11bb21c52dd12a05be21a5c7dcc97', 'md5': '738dc723f7735ee9602f7ea352a6d058',
'info_dict': { 'info_dict': {
'id': '133957', 'id': '530341-preview',
'ext': 'mp4', 'ext': 'mp4',
'title': 'everthing about me (Preview)', 'title': 'MV Tips & Tricks (Preview)',
'uploader': 'ellyxxix', 'description': r're:I will take you on a tour around .{1313}$',
'thumbnail': r're:https://cdn5\.manyvids\.com/php_uploads/video_images/DestinyDiaz/.+\.jpg',
'uploader': 'DestinyDiaz',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'release_timestamp': 1508419904,
'tags': ['AdultSchool', 'BBW', 'SFW', 'TeacherFetish'],
'release_date': '20171019',
'duration': 3167.0,
}, },
'expected_warnings': ['Only extracting preview'],
}, { }, {
# full video # full video
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/', 'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
@ -34,129 +41,68 @@ class ManyVidsIE(InfoExtractor):
'id': '935718', 'id': '935718',
'ext': 'mp4', 'ext': 'mp4',
'title': 'MY FACE REVEAL', 'title': 'MY FACE REVEAL',
'description': 'md5:ec5901d41808b3746fed90face161612', 'description': r're:Today is the day!! I am finally taking off my mask .{445}$',
'thumbnail': r're:https://ods\.manyvids\.com/1001061960/3aa5397f2a723ec4597e344df66ab845/screenshots/.+\.jpg',
'uploader': 'Sarah Calanthe', 'uploader': 'Sarah Calanthe',
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'release_date': '20181110',
'tags': ['EyeContact', 'Interviews', 'MaskFetish', 'MouthFetish', 'Redhead'],
'release_timestamp': 1541851200,
'duration': 224.0,
}, },
}] }]
_API_BASE = 'https://www.manyvids.com/bff/store/video'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json(f'{self._API_BASE}/{video_id}/private', video_id)['data']
formats, preview_only = [], True
real_url = f'https://www.manyvids.com/video/{video_id}/gtm.js' for format_id, path in [
try: ('preview', ['teaser', 'filepath']),
webpage = self._download_webpage(real_url, video_id) ('transcoded', ['transcodedFilepath']),
except Exception: ('filepath', ['filepath']),
# probably useless fallback ]:
webpage = self._download_webpage(url, video_id) format_url = traverse_obj(video_data, (*path, {url_or_none}))
if not format_url:
info = self._search_regex(
r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
webpage, 'meta details', default='')
info = extract_attributes(info)
player = self._search_regex(
r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
webpage, 'player details', default='')
player = extract_attributes(player)
video_urls_and_ids = (
(info.get('data-meta-video'), 'video'),
(player.get('data-video-transcoded'), 'transcoded'),
(player.get('data-video-filepath'), 'filepath'),
(self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
)
def txt_or_none(s, default=None):
return (s.strip() or default) if isinstance(s, str) else default
uploader = txt_or_none(info.get('data-meta-author'))
def mung_title(s):
if uploader:
s = re.sub(rf'^\s*{re.escape(uploader)}\s+[|-]', '', s)
return txt_or_none(s)
title = (
mung_title(info.get('data-meta-title'))
or self._html_search_regex(
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
webpage, 'title', default=None)
or self._html_search_meta(
'twitter:title', webpage, 'title', fatal=True))
title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
title += ' (Preview)'
mv_token = self._search_regex(
r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'mv token', default=None, group='value')
if mv_token:
# Sets some cookies
self._download_webpage(
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
video_id, note='Setting format cookies', fatal=False,
data=urlencode_postdata({
'mvtoken': mv_token,
'vid': video_id,
}), headers={
'Referer': url,
'X-Requested-With': 'XMLHttpRequest',
})
formats = []
for v_url, fmt in video_urls_and_ids:
v_url = url_or_none(v_url)
if not v_url:
continue continue
if determine_ext(v_url) == 'm3u8': if determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(format_url, video_id, 'mp4', m3u8_id=format_id))
v_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls'))
else: else:
formats.append({ formats.append({
'url': v_url, 'url': format_url,
'format_id': fmt, 'format_id': format_id,
'preference': -10 if format_id == 'preview' else None,
'quality': 10 if format_id == 'filepath' else None,
'height': int_or_none(
self._search_regex(r'_(\d{2,3}[02468])_', format_url, 'height', default=None)),
}) })
if format_id != 'preview':
preview_only = False
self._remove_duplicate_formats(formats) metadata = traverse_obj(
self._download_json(f'{self._API_BASE}/{video_id}', video_id, fatal=False), 'data')
title = traverse_obj(metadata, ('title', {clean_html}))
for f in formats: if preview_only:
if f.get('height') is None: title = join_nonempty(title, '(Preview)', delim=' ')
f['height'] = int_or_none( video_id += '-preview'
self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None)) self.report_warning(
if '/preview/' in f['url']: f'Only extracting preview. Video may be paid or subscription only. {self._login_hint()}')
f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
f['preference'] = -10
if 'transcoded' in f['format_id']:
f['preference'] = f.get('preference', -1) - 1
def get_likes():
likes = self._search_regex(
rf'''(<a\b[^>]*\bdata-id\s*=\s*(['"]){video_id}\2[^>]*>)''',
webpage, 'likes', default='')
likes = extract_attributes(likes)
return int_or_none(likes.get('data-likes'))
def get_views():
return str_to_int(self._html_search_regex(
r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
webpage, 'view count', default=None))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'description': txt_or_none(info.get('data-meta-description')), **traverse_obj(metadata, {
'uploader': txt_or_none(info.get('data-meta-author')), 'description': ('description', {clean_html}),
'thumbnail': ( 'uploader': ('model', 'displayName', {clean_html}),
url_or_none(info.get('data-meta-image')) 'thumbnail': (('screenshot', 'thumbnail'), {url_or_none}, any),
or url_or_none(player.get('data-video-screenshot'))), 'view_count': ('views', {parse_count}),
'view_count': get_views(), 'like_count': ('likes', {parse_count}),
'like_count': get_likes(), 'release_timestamp': ('launchDate', {parse_iso8601}),
'duration': ('videoDuration', {parse_duration}),
'tags': ('tagList', ..., 'label', {str}, filter, all, filter),
}),
} }

View File

@ -14,8 +14,9 @@
int_or_none, int_or_none,
parse_qs, parse_qs,
srt_subtitles_timecode, srt_subtitles_timecode,
traverse_obj, url_or_none,
) )
from ..utils.traversal import traverse_obj
class PanoptoBaseIE(InfoExtractor): class PanoptoBaseIE(InfoExtractor):
@ -345,21 +346,16 @@ def _extract_streams_formats_and_subtitles(self, video_id, streams, **fmt_kwargs
subtitles = {} subtitles = {}
for stream in streams or []: for stream in streams or []:
stream_formats = [] stream_formats = []
http_stream_url = stream.get('StreamHttpUrl') for stream_url in set(traverse_obj(stream, (('StreamHttpUrl', 'StreamUrl'), {url_or_none}))):
stream_url = stream.get('StreamUrl')
if http_stream_url:
stream_formats.append({'url': http_stream_url})
if stream_url:
media_type = stream.get('ViewerMediaFileTypeName') media_type = stream.get('ViewerMediaFileTypeName')
if media_type in ('hls', ): if media_type in ('hls', ):
m3u8_formats, stream_subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id) fmts, subs = self._extract_m3u8_formats_and_subtitles(stream_url, video_id, m3u8_id='hls', fatal=False)
stream_formats.extend(m3u8_formats) stream_formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, stream_subtitles) self._merge_subtitles(subs, target=subtitles)
else: else:
stream_formats.append({ stream_formats.append({
'url': stream_url, 'url': stream_url,
'ext': media_type,
}) })
for fmt in stream_formats: for fmt in stream_formats:
fmt.update({ fmt.update({

View File

@ -1,35 +1,142 @@
import base64 import base64
import io import io
import struct import struct
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html,
determine_ext, determine_ext,
float_or_none, float_or_none,
make_archive_id,
parse_iso8601,
qualities, qualities,
remove_end, url_or_none,
remove_start,
try_get,
) )
from ..utils.traversal import subs_list_to_dict, traverse_obj
class RTVEALaCartaIE(InfoExtractor): class RTVEBaseIE(InfoExtractor):
# Reimplementation of https://js2.rtve.es/pages/app-player/3.5.1/js/pf_video.js
@staticmethod
def _decrypt_url(png):
encrypted_data = io.BytesIO(base64.b64decode(png)[8:])
while True:
length_data = encrypted_data.read(4)
length = struct.unpack('!I', length_data)[0]
chunk_type = encrypted_data.read(4)
if chunk_type == b'IEND':
break
data = encrypted_data.read(length)
if chunk_type == b'tEXt':
data = bytes(filter(None, data))
alphabet_data, _, url_data = data.partition(b'#')
quality_str, _, url_data = url_data.rpartition(b'%%')
quality_str = quality_str.decode() or ''
alphabet = RTVEBaseIE._get_alphabet(alphabet_data)
url = RTVEBaseIE._get_url(alphabet, url_data)
yield quality_str, url
encrypted_data.read(4) # CRC
@staticmethod
def _get_url(alphabet, url_data):
url = ''
f = 0
e = 3
b = 1
for char in url_data.decode('iso-8859-1'):
if f == 0:
l = int(char) * 10
f = 1
else:
if e == 0:
l += int(char)
url += alphabet[l]
e = (b + 3) % 4
f = 0
b += 1
else:
e -= 1
return url
@staticmethod
def _get_alphabet(alphabet_data):
alphabet = []
e = 0
d = 0
for char in alphabet_data.decode('iso-8859-1'):
if d == 0:
alphabet.append(char)
d = e = (e + 1) % 4
else:
d -= 1
return alphabet
def _extract_png_formats_and_subtitles(self, video_id, media_type='videos'):
formats, subtitles = [], {}
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
for manager in ('rtveplayw', 'default'):
png = self._download_webpage(
f'http://www.rtve.es/ztnr/movil/thumbnail/{manager}/{media_type}/{video_id}.png',
video_id, 'Downloading url information', query={'q': 'v2'}, fatal=False)
if not png:
continue
for quality, video_url in self._decrypt_url(png):
ext = determine_ext(video_url)
if ext == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
elif ext == 'mpd':
fmts, subs = self._extract_mpd_formats_and_subtitles(
video_url, video_id, 'dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
formats.append({
'format_id': quality,
'quality': q(quality),
'url': video_url,
})
return formats, subtitles
def _parse_metadata(self, metadata):
return traverse_obj(metadata, {
'title': ('title', {str.strip}),
'alt_title': ('alt', {str.strip}),
'description': ('description', {clean_html}),
'timestamp': ('dateOfEmission', {parse_iso8601(delimiter=' ')}),
'release_timestamp': ('publicationDate', {parse_iso8601(delimiter=' ')}),
'modified_timestamp': ('modificationDate', {parse_iso8601(delimiter=' ')}),
'thumbnail': (('thumbnail', 'image', 'imageSEO'), {url_or_none}, any),
'duration': ('duration', {float_or_none(scale=1000)}),
'is_live': ('live', {bool}),
'series': (('programTitle', ('programInfo', 'title')), {clean_html}, any),
})
class RTVEALaCartaIE(RTVEBaseIE):
IE_NAME = 'rtve.es:alacarta' IE_NAME = 'rtve.es:alacarta'
IE_DESC = 'RTVE a la carta' IE_DESC = 'RTVE a la carta and Play'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' _VALID_URL = [
r'https?://(?:www\.)?rtve\.es/(?:m/)?(?:(?:alacarta|play)/videos|filmoteca)/(?!directo)(?:[^/?#]+/){2}(?P<id>\d+)',
r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/?#]+/video/[^/?#]+/(?P<id>\d+)',
]
_TESTS = [{ _TESTS = [{
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', 'url': 'http://www.rtve.es/alacarta/videos/la-aventura-del-saber/aventuraentornosilla/3088905/',
'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43', 'md5': 'a964547824359a5753aef09d79fe984b',
'info_dict': { 'info_dict': {
'id': '2491869', 'id': '3088905',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', 'title': 'En torno a la silla',
'duration': 5024.566, 'duration': 1216.981,
'series': 'Balonmano', 'series': 'La aventura del Saber',
'thumbnail': 'https://img2.rtve.es/v/aventuraentornosilla_3088905.png',
}, },
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
}, { }, {
'note': 'Live stream', 'note': 'Live stream',
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/', 'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
@ -38,140 +145,88 @@ class RTVEALaCartaIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'is_live': True, 'is_live': True,
'live_status': 'is_live',
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
}, },
'params': { 'params': {
'skip_download': 'live stream', 'skip_download': 'live stream',
}, },
}, { }, {
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/', 'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
'md5': 'd850f3c8731ea53952ebab489cf81cbf', 'md5': 'f3cf0d1902d008c48c793e736706c174',
'info_dict': { 'info_dict': {
'id': '4236788', 'id': '4236788',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Servir y proteger - Capítulo 104', 'title': 'Episodio 104',
'duration': 3222.0, 'duration': 3222.8,
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
'series': 'Servir y proteger',
}, },
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
}, { }, {
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', 'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', 'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.rtve.es/play/videos/saber-vivir/07-07-24/16177116/',
'md5': 'a5b24fcdfa3ff5cb7908aba53d22d4b6',
'info_dict': {
'id': '16177116',
'ext': 'mp4',
'title': 'Saber vivir - 07/07/24',
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
'duration': 2162.68,
'series': 'Saber vivir',
},
}, {
'url': 'https://www.rtve.es/infantil/serie/agus-lui-churros-crafts/video/gusano/7048976/',
'info_dict': {
'id': '7048976',
'ext': 'mp4',
'title': 'Gusano',
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
'duration': 292.86,
'series': 'Agus & Lui: Churros y Crafts',
'_old_archive_ids': ['rtveinfantil 7048976'],
},
}] }]
def _real_initialize(self): def _get_subtitles(self, video_id):
user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode()).decode('utf-8') subtitle_data = self._download_json(
self._manager = self._download_json( f'https://api2.rtve.es/api/videos/{video_id}/subtitulos.json', video_id,
'http://www.rtve.es/odin/loki/' + user_agent_b64, 'Downloading subtitles info')
None, 'Fetching manager info')['manager'] return traverse_obj(subtitle_data, ('page', 'items', ..., {
'id': ('lang', {str}),
@staticmethod 'url': ('src', {url_or_none}),
def _decrypt_url(png): }, all, {subs_list_to_dict(lang='es')}))
encrypted_data = io.BytesIO(base64.b64decode(png)[8:])
while True:
length = struct.unpack('!I', encrypted_data.read(4))[0]
chunk_type = encrypted_data.read(4)
if chunk_type == b'IEND':
break
data = encrypted_data.read(length)
if chunk_type == b'tEXt':
alphabet_data, text = data.split(b'\0')
quality, url_data = text.split(b'%%')
alphabet = []
e = 0
d = 0
for l in alphabet_data.decode('iso-8859-1'):
if d == 0:
alphabet.append(l)
d = e = (e + 1) % 4
else:
d -= 1
url = ''
f = 0
e = 3
b = 1
for letter in url_data.decode('iso-8859-1'):
if f == 0:
l = int(letter) * 10
f = 1
else:
if e == 0:
l += int(letter)
url += alphabet[l]
e = (b + 3) % 4
f = 0
b += 1
else:
e -= 1
yield quality.decode(), url
encrypted_data.read(4) # CRC
def _extract_png_formats(self, video_id):
png = self._download_webpage(
f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/videos/{video_id}.png',
video_id, 'Downloading url information', query={'q': 'v2'})
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
formats = []
for quality, video_url in self._decrypt_url(png):
ext = determine_ext(video_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
video_url, video_id, 'dash', fatal=False))
else:
formats.append({
'format_id': quality,
'quality': q(quality),
'url': video_url,
})
return formats
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
info = self._download_json( metadata = self._download_json(
f'http://www.rtve.es/api/videos/{video_id}/config/alacarta_videos.json', f'http://www.rtve.es/api/videos/{video_id}/config/alacarta_videos.json',
video_id)['page']['items'][0] video_id)['page']['items'][0]
if info['state'] == 'DESPU': if metadata['state'] == 'DESPU':
raise ExtractorError('The video is no longer available', expected=True) raise ExtractorError('The video is no longer available', expected=True)
title = info['title'].strip() formats, subtitles = self._extract_png_formats_and_subtitles(video_id)
formats = self._extract_png_formats(video_id)
subtitles = None self._merge_subtitles(self.extract_subtitles(video_id), target=subtitles)
sbt_file = info.get('sbtFile')
if sbt_file:
subtitles = self.extract_subtitles(video_id, sbt_file)
is_live = info.get('live') is True is_infantil = urllib.parse.urlparse(url).path.startswith('/infantil/')
return { return {
'id': video_id, 'id': video_id,
'title': title,
'formats': formats, 'formats': formats,
'thumbnail': info.get('image'),
'subtitles': subtitles, 'subtitles': subtitles,
'duration': float_or_none(info.get('duration'), 1000), **self._parse_metadata(metadata),
'is_live': is_live, '_old_archive_ids': [make_archive_id('rtveinfantil', video_id)] if is_infantil else None,
'series': info.get('programTitle'),
} }
def _get_subtitles(self, video_id, sub_file):
subs = self._download_json(
sub_file + '.json', video_id,
'Downloading subtitles info')['page']['items']
return dict(
(s['lang'], [{'ext': 'vtt', 'url': s['src']}])
for s in subs)
class RTVEAudioIE(RTVEBaseIE):
class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'rtve.es:audio' IE_NAME = 'rtve.es:audio'
IE_DESC = 'RTVE audio' IE_DESC = 'RTVE audio'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/(?:[^/?#]+/){2}(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/', 'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/',
@ -180,9 +235,11 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
'id': '5889192', 'id': '5889192',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Códigos informáticos', 'title': 'Códigos informáticos',
'thumbnail': r're:https?://.+/1598856591583.jpg', 'alt_title': 'Códigos informáticos - Escuchar ahora',
'duration': 349.440, 'duration': 349.440,
'series': 'A hombros de gigantes', 'series': 'A hombros de gigantes',
'description': 'md5:72b0d7c1ca20fd327bdfff7ac0171afb',
'thumbnail': 'https://img2.rtve.es/a/palabra-ingeniero-codigos-informaticos-270421_5889192.png',
}, },
}, { }, {
'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/', 'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/',
@ -191,9 +248,11 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
'id': '5791165', 'id': '5791165',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Ignatius Farray', 'title': 'Ignatius Farray',
'alt_title': 'En Radio 3 - Ignatius Farray - 13/02/21 - escuchar ahora',
'thumbnail': r're:https?://.+/1613243011863.jpg', 'thumbnail': r're:https?://.+/1613243011863.jpg',
'duration': 3559.559, 'duration': 3559.559,
'series': 'En Radio 3', 'series': 'En Radio 3',
'description': 'md5:124aa60b461e0b1724a380bad3bc4040',
}, },
}, { }, {
'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/', 'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/',
@ -202,126 +261,101 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
'id': '6082623', 'id': '6082623',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Capítulo 26 y último: La muerte de Victor', 'title': 'Capítulo 26 y último: La muerte de Victor',
'alt_title': 'Frankenstein o el moderno Prometeo - Capítulo 26 y último: La muerte de Victor',
'thumbnail': r're:https?://.+/1632147445707.jpg', 'thumbnail': r're:https?://.+/1632147445707.jpg',
'duration': 3174.086, 'duration': 3174.086,
'series': 'Frankenstein o el moderno Prometeo', 'series': 'Frankenstein o el moderno Prometeo',
'description': 'md5:4ee6fcb82ebe2e46d267e1d1c1a8f7b5',
}, },
}] }]
def _extract_png_formats(self, audio_id):
"""
This function retrieves media related png thumbnail which obfuscate
valuable information about the media. This information is decrypted
via base class _decrypt_url function providing media quality and
media url
"""
png = self._download_webpage(
f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/audios/{audio_id}.png',
audio_id, 'Downloading url information', query={'q': 'v2'})
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
formats = []
for quality, audio_url in self._decrypt_url(png):
ext = determine_ext(audio_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
audio_url, audio_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
audio_url, audio_id, 'dash', fatal=False))
else:
formats.append({
'format_id': quality,
'quality': q(quality),
'url': audio_url,
})
return formats
def _real_extract(self, url): def _real_extract(self, url):
audio_id = self._match_id(url) audio_id = self._match_id(url)
info = self._download_json( metadata = self._download_json(
f'https://www.rtve.es/api/audios/{audio_id}.json', f'https://www.rtve.es/api/audios/{audio_id}.json', audio_id)['page']['items'][0]
audio_id)['page']['items'][0]
formats, subtitles = self._extract_png_formats_and_subtitles(audio_id, media_type='audios')
return { return {
'id': audio_id, 'id': audio_id,
'title': info['title'].strip(), 'formats': formats,
'thumbnail': info.get('thumbnail'), 'subtitles': subtitles,
'duration': float_or_none(info.get('duration'), 1000), **self._parse_metadata(metadata),
'series': try_get(info, lambda x: x['programInfo']['title']),
'formats': self._extract_png_formats(audio_id),
} }
class RTVEInfantilIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE class RTVELiveIE(RTVEBaseIE):
IE_NAME = 'rtve.es:infantil'
IE_DESC = 'RTVE infantil'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
_TESTS = [{
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
'md5': '5747454717aedf9f9fdf212d1bcfc48d',
'info_dict': {
'id': '3040283',
'ext': 'mp4',
'title': 'Maneras de vivir',
'thumbnail': r're:https?://.+/1426182947956\.JPG',
'duration': 357.958,
},
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
}]
class RTVELiveIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE
IE_NAME = 'rtve.es:live' IE_NAME = 'rtve.es:live'
IE_DESC = 'RTVE.es live streams' IE_DESC = 'RTVE.es live streams'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' _VALID_URL = [
r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)',
r'https?://(?:www\.)?rtve\.es/play/videos/directo/[^/?#]+/(?P<id>[a-zA-Z0-9-]+)',
]
_TESTS = [{ _TESTS = [{
'url': 'http://www.rtve.es/directo/la-1/', 'url': 'http://www.rtve.es/directo/la-1/',
'info_dict': { 'info_dict': {
'id': 'la-1', 'id': 'la-1',
'ext': 'mp4', 'ext': 'mp4',
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'live_status': 'is_live',
'title': str,
'description': str,
'thumbnail': r're:https://img\d\.rtve\.es/resources/thumbslive/\d+\.jpg',
'timestamp': int,
'upload_date': str,
}, },
'params': { 'params': {'skip_download': 'live stream'},
'skip_download': 'live stream', }, {
'url': 'https://www.rtve.es/play/videos/directo/deportes/tdp/',
'info_dict': {
'id': 'tdp',
'ext': 'mp4',
'live_status': 'is_live',
'title': str,
'description': str,
'thumbnail': r're:https://img2\d\.rtve\.es/resources/thumbslive/\d+\.jpg',
'timestamp': int,
'upload_date': str,
}, },
'params': {'skip_download': 'live stream'},
}, {
'url': 'http://www.rtve.es/play/videos/directo/canales-lineales/la-1/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
title = remove_start(title, 'Estoy viendo ')
vidplayer_id = self._search_regex( data_setup = self._search_json(
(r'playerId=player([0-9]+)', r'<div[^>]+class="[^"]*videoPlayer[^"]*"[^>]*data-setup=\'',
r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)', webpage, 'data_setup', video_id)
r'data-id=["\'](\d+)'),
webpage, 'internal video ID') formats, subtitles = self._extract_png_formats_and_subtitles(data_setup['idAsset'])
return { return {
'id': video_id, 'id': video_id,
'title': title, **self._search_json_ld(webpage, video_id, fatal=False),
'formats': self._extract_png_formats(vidplayer_id), 'title': self._html_extract_title(webpage),
'formats': formats,
'subtitles': subtitles,
'is_live': True, 'is_live': True,
} }
class RTVETelevisionIE(InfoExtractor): class RTVETelevisionIE(InfoExtractor):
IE_NAME = 'rtve.es:television' IE_NAME = 'rtve.es:television'
_VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/?#]+/[^/?#]+/(?P<id>\d+).shtml'
_TEST = { _TEST = {
'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', 'url': 'https://www.rtve.es/television/20091103/video-inedito-del-8o-programa/299020.shtml',
'info_dict': { 'info_dict': {
'id': '3069778', 'id': '572515',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Documentos TV - La revolución del móvil', 'title': 'Clase inédita',
'duration': 3496.948, 'duration': 335.817,
'thumbnail': r're:https://img2\.rtve\.es/v/.*\.png',
'series': 'El coro de la cárcel',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -332,11 +366,8 @@ def _real_extract(self, url):
page_id = self._match_id(url) page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
alacarta_url = self._search_regex( play_url = self._html_search_meta('contentUrl', webpage)
r'data-location="alacarta_videos"[^<]+url&quot;:&quot;(http://www\.rtve\.es/alacarta.+?)&', if play_url is None:
webpage, 'alacarta url', default=None) raise ExtractorError('The webpage doesn\'t contain any video', expected=True)
if alacarta_url is None:
raise ExtractorError(
'The webpage doesn\'t contain any video', expected=True)
return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key()) return self.url_result(play_url, ie=RTVEALaCartaIE.ie_key())

View File

@ -513,7 +513,7 @@ def _parse_video(self, video, with_url=True):
class TVPVODVideoIE(TVPVODBaseIE): class TVPVODVideoIE(TVPVODBaseIE):
IE_NAME = 'tvp:vod' IE_NAME = 'tvp:vod'
_VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek-\d+,S\d+E\d+)?,(?P<id>\d+)/?(?:[?#]|$)' _VALID_URL = r'https?://vod\.tvp\.pl/(?P<category>[a-z\d-]+,\d+)/[a-z\d-]+(?<!-odcinki)(?:-odcinki,\d+/odcinek--?\d+,S-?\d+E-?\d+)?,(?P<id>\d+)/?(?:[?#]|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357', 'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357',
@ -568,6 +568,9 @@ class TVPVODVideoIE(TVPVODBaseIE):
'live_status': 'is_live', 'live_status': 'is_live',
'thumbnail': 're:https?://.+', 'thumbnail': 're:https?://.+',
}, },
}, {
'url': 'https://vod.tvp.pl/informacje-i-publicystyka,205/konskie-2025-debata-przedwyborcza-odcinki,2028435/odcinek--1,S01E-1,2028419',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -1,21 +1,13 @@
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import clean_html, extract_attributes, remove_end, unified_timestamp, url_or_none from ..utils import clean_html, extract_attributes, parse_qs, remove_end, require, unified_timestamp, url_or_none
from ..utils.traversal import find_elements, traverse_obj from ..utils.traversal import find_element, find_elements, traverse_obj
class TvwBaseIE(InfoExtractor): class TvwIE(InfoExtractor):
def _get_title(self, webpage): IE_NAME = 'tvw'
return remove_end(self._og_search_title(webpage, default=None), ' - TVW')
def _get_description(self, webpage):
return self._og_search_description(webpage, default=None)
class TvwIE(TvwBaseIE):
_VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?tvw\.org/video/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/', 'url': 'https://tvw.org/video/billy-frank-jr-statue-maquette-unveiling-ceremony-2024011211/',
'md5': '9ceb94fe2bb7fd726f74f16356825703', 'md5': '9ceb94fe2bb7fd726f74f16356825703',
@ -111,8 +103,8 @@ def _real_extract(self, url):
'display_id': display_id, 'display_id': display_id,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'title': self._get_title(webpage), 'title': remove_end(self._og_search_title(webpage, default=None), ' - TVW'),
'description': self._get_description(webpage), 'description': self._og_search_description(webpage, default=None),
**traverse_obj(video_data, { **traverse_obj(video_data, {
'title': ('title', {str}), 'title': ('title', {str}),
'description': ('description', {clean_html}), 'description': ('description', {clean_html}),
@ -125,7 +117,7 @@ def _real_extract(self, url):
} }
class TvwNewsIE(TvwBaseIE): class TvwNewsIE(InfoExtractor):
IE_NAME = 'Tvw:News' IE_NAME = 'Tvw:News'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/(\d{4})/(0[1-9]|1[0-2])/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?tvw\.org/(\d{4})/(0[1-9]|1[0-2])/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
@ -163,4 +155,44 @@ def _real_extract(self, url):
return self.playlist_result( return self.playlist_result(
(self.url_result(f'https://tvw.org/watch?eventID={video_id}') for video_id in video_ids), playlist_id, (self.url_result(f'https://tvw.org/watch?eventID={video_id}') for video_id in video_ids), playlist_id,
playlist_title=self._get_title(webpage), playlist_description=self._get_description(webpage)) playlist_title=remove_end(self._og_search_title(webpage, default=None), ' - TVW'), playlist_description=self._og_search_description(webpage, default=None))
class TvwTvChannelsIE(InfoExtractor):
IE_NAME = 'tvw:tvchannels'
_VALID_URL = r'https?://(?:www\.)?tvw\.org/tvchannels/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://tvw.org/tvchannels/air/',
'info_dict': {
'id': 'air',
'ext': 'mp4',
'title': r're:TVW Cable Channel Live Stream',
'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)$',
'live_status': 'is_live',
},
}, {
'url': 'https://tvw.org/tvchannels/tvw2/',
'info_dict': {
'id': 'tvw2',
'ext': 'mp4',
'title': r're:TVW-2 Broadcast Channel',
'thumbnail': r're:https?://.+/.+\.(?:jpe?g|png)$',
'live_status': 'is_live',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
m3u8_url = traverse_obj(webpage, (
{find_element(id='invintus-persistent-stream-frame', html=True)}, {extract_attributes},
'src', {parse_qs}, 'encoder', 0, {json.loads}, 'live247URI', {url_or_none}, {require('stream url')}))
return {
'id': video_id,
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', live=True),
'title': remove_end(self._og_search_title(webpage, default=None), ' - TVW'),
'thumbnail': self._og_search_thumbnail(webpage, default=None),
'is_live': True,
}

View File

@ -524,10 +524,16 @@ def _entries(self, tab, item_id, ytcfg, delegated_session_id, visitor_data):
response = self._extract_response( response = self._extract_response(
item_id=f'{item_id} page {page_num}', item_id=f'{item_id} page {page_num}',
query=continuation, headers=headers, ytcfg=ytcfg, query=continuation, headers=headers, ytcfg=ytcfg,
check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')) check_get_keys=(
'continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints',
# Playlist recommendations may return with no data - ignore
('responseContext', 'serviceTrackingParams', ..., 'params', ..., lambda k, v: k == 'key' and v == 'GetRecommendedMusicPlaylists_rid'),
))
if not response: if not response:
break break
continuation = None
# Extracting updated visitor data is required to prevent an infinite extraction loop in some cases # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
# See: https://github.com/ytdl-org/youtube-dl/issues/28702 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
visitor_data = self._extract_visitor_data(response) or visitor_data visitor_data = self._extract_visitor_data(response) or visitor_data
@ -564,7 +570,13 @@ def _entries(self, tab, item_id, ytcfg, delegated_session_id, visitor_data):
yield from func(video_items_renderer) yield from func(video_items_renderer)
continuation = continuation_list[0] or self._extract_continuation(video_items_renderer) continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
if not video_items_renderer: # In the case only a continuation is returned, try to follow it.
# We extract this after trying to extract non-continuation items as otherwise this
# may be prioritized over other continuations.
# see: https://github.com/yt-dlp/yt-dlp/issues/12933
continuation = continuation or self._extract_continuation({'contents': [continuation_item]})
if not continuation and not video_items_renderer:
break break
@staticmethod @staticmethod
@ -999,14 +1011,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 94, 'playlist_mincount': 94,
'info_dict': { 'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg', 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Igor Kleiner Ph.D. - Playlists', 'title': 'Igor Kleiner - Playlists',
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a', 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
'uploader': 'Igor Kleiner Ph.D.', 'uploader': 'Igor Kleiner ',
'uploader_id': '@IgorDataScience', 'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience', 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
'channel': 'Igor Kleiner Ph.D.', 'channel': 'Igor Kleiner ',
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg', 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'], 'tags': 'count:23',
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
'channel_follower_count': int, 'channel_follower_count': int,
}, },
@ -1016,18 +1028,19 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 94, 'playlist_mincount': 94,
'info_dict': { 'info_dict': {
'id': 'UCqj7Cz7revf5maW9g5pgNcg', 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
'title': 'Igor Kleiner Ph.D. - Playlists', 'title': 'Igor Kleiner - Playlists',
'description': 'md5:15d7dd9e333cb987907fcb0d604b233a', 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
'uploader': 'Igor Kleiner Ph.D.', 'uploader': 'Igor Kleiner ',
'uploader_id': '@IgorDataScience', 'uploader_id': '@IgorDataScience',
'uploader_url': 'https://www.youtube.com/@IgorDataScience', 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'], 'tags': 'count:23',
'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg', 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
'channel': 'Igor Kleiner Ph.D.', 'channel': 'Igor Kleiner ',
'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
'channel_follower_count': int, 'channel_follower_count': int,
}, },
}, { }, {
# TODO: fix channel_is_verified extraction
'note': 'playlists, series', 'note': 'playlists, series',
'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3', 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
'playlist_mincount': 5, 'playlist_mincount': 5,
@ -1066,22 +1079,23 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists', 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
'only_matching': True, 'only_matching': True,
}, { }, {
# TODO: fix availability extraction
'note': 'basic, single video playlist', 'note': 'basic, single video playlist',
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU',
'info_dict': { 'info_dict': {
'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', 'id': 'PLt5yu3-wZAlSLRHmI1qNm0wjyVNWw1pCU',
'title': 'youtube-dl public playlist', 'title': 'single video playlist',
'description': '', 'description': '',
'tags': [], 'tags': [],
'view_count': int, 'view_count': int,
'modified_date': '20201130', 'modified_date': '20250417',
'channel': 'Sergey M.', 'channel': 'cole-dlp-test-acc',
'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
'availability': 'public', 'availability': 'public',
'uploader': 'Sergey M.', 'uploader': 'cole-dlp-test-acc',
'uploader_url': 'https://www.youtube.com/@sergeym.6173', 'uploader_url': 'https://www.youtube.com/@coletdjnz',
'uploader_id': '@sergeym.6173', 'uploader_id': '@coletdjnz',
}, },
'playlist_count': 1, 'playlist_count': 1,
}, { }, {
@ -1171,11 +1185,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'playlist_mincount': 17, 'playlist_mincount': 17,
}, { }, {
'note': 'Community tab', 'note': 'Posts tab',
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community', 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
'info_dict': { 'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Community', 'title': 'lex will - Posts',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'channel': 'lex will', 'channel': 'lex will',
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
@ -1188,30 +1202,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'playlist_mincount': 18, 'playlist_mincount': 18,
}, { }, {
'note': 'Channels tab', # TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
'info_dict': {
'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'title': 'lex will - Channels',
'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
'channel': 'lex will',
'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
'tags': ['bible', 'history', 'prophesy'],
'channel_follower_count': int,
'uploader_url': 'https://www.youtube.com/@lexwill718',
'uploader_id': '@lexwill718',
'uploader': 'lex will',
},
'playlist_mincount': 12,
}, {
'note': 'Search tab', 'note': 'Search tab',
'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra', 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
'playlist_mincount': 40, 'playlist_mincount': 40,
'info_dict': { 'info_dict': {
'id': 'UCYO_jab_esuFRV4b17AJtAw', 'id': 'UCYO_jab_esuFRV4b17AJtAw',
'title': '3Blue1Brown - Search - linear algebra', 'title': '3Blue1Brown - Search - linear algebra',
'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9', 'description': 'md5:602e3789e6a0cb7d9d352186b720e395',
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw', 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
'tags': ['Mathematics'], 'tags': ['Mathematics'],
'channel': '3Blue1Brown', 'channel': '3Blue1Brown',
@ -1232,6 +1230,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
'only_matching': True, 'only_matching': True,
}, { }, {
# TODO: fix availability extraction
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
'info_dict': { 'info_dict': {
@ -1294,24 +1293,25 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'playlist_mincount': 21, 'playlist_mincount': 21,
}, { }, {
# TODO: fix availability extraction
'note': 'Playlist with "show unavailable videos" button', 'note': 'Playlist with "show unavailable videos" button',
'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q', 'url': 'https://www.youtube.com/playlist?list=PLYwq8WOe86_xGmR7FrcJq8Sb7VW8K3Tt2',
'info_dict': { 'info_dict': {
'title': 'Uploads from Phim Siêu Nhân Nhật Bản', 'title': 'The Memes Of 2010s.....',
'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q', 'id': 'PLYwq8WOe86_xGmR7FrcJq8Sb7VW8K3Tt2',
'view_count': int, 'view_count': int,
'channel': 'Phim Siêu Nhân Nhật Bản', 'channel': "I'm Not JiNxEd",
'tags': [], 'tags': [],
'description': '', 'description': 'md5:44dc3b315ba69394feaafa2f40e7b2a1',
'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q', 'channel_url': 'https://www.youtube.com/channel/UC5H5H85D1QE5-fuWWQ1hdNg',
'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q', 'channel_id': 'UC5H5H85D1QE5-fuWWQ1hdNg',
'modified_date': r're:\d{8}', 'modified_date': r're:\d{8}',
'availability': 'public', 'availability': 'public',
'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban', 'uploader_url': 'https://www.youtube.com/@imnotjinxed1998',
'uploader_id': '@phimsieunhannhatban', 'uploader_id': '@imnotjinxed1998',
'uploader': 'Phim Siêu Nhân Nhật Bản', 'uploader': "I'm Not JiNxEd",
}, },
'playlist_mincount': 200, 'playlist_mincount': 150,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, { }, {
'note': 'Playlist with unavailable videos in page 7', 'note': 'Playlist with unavailable videos in page 7',
@ -1334,6 +1334,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 1000, 'playlist_mincount': 1000,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, { }, {
# TODO: fix availability extraction
'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844', 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
'info_dict': { 'info_dict': {
@ -1384,7 +1385,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, { }, {
'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live', 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
'info_dict': { 'info_dict': {
'id': 'hGkQjiJLjWQ', # This will keep changing 'id': 'YDvsBbKfLPA', # This will keep changing
'ext': 'mp4', 'ext': 'mp4',
'title': str, 'title': str,
'upload_date': r're:\d{8}', 'upload_date': r're:\d{8}',
@ -1409,6 +1410,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_id': '@SkyNews', 'uploader_id': '@SkyNews',
'uploader': 'Sky News', 'uploader': 'Sky News',
'channel_is_verified': True, 'channel_is_verified': True,
'media_type': 'livestream',
'timestamp': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -1496,6 +1499,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng', 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
'only_matching': True, 'only_matching': True,
}, { }, {
# TODO: fix availability extraction
'note': 'VLPL, should redirect to playlist?list=PL...', 'note': 'VLPL, should redirect to playlist?list=PL...',
'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
'info_dict': { 'info_dict': {
@ -1537,6 +1541,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, { }, {
# Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg) # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
# Treat as a general feed # Treat as a general feed
# TODO: fix extraction
'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg', 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
'info_dict': { 'info_dict': {
'id': 'UCtFRv9O2AHqOZjjynzrv-xg', 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
@ -1560,21 +1565,21 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'expected_warnings': ['YouTube Music is not directly supported'], 'expected_warnings': ['YouTube Music is not directly supported'],
}, { }, {
'note': 'unlisted single video playlist', 'note': 'unlisted single video playlist',
'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf', 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
'info_dict': { 'info_dict': {
'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf', 'id': 'PLt5yu3-wZAlQLfIN0MMgp0wVV6MP3bM4_',
'title': 'yt-dlp unlisted playlist test', 'title': 'unlisted playlist',
'availability': 'unlisted', 'availability': 'unlisted',
'tags': [], 'tags': [],
'modified_date': '20220418', 'modified_date': '20250417',
'channel': 'colethedj', 'channel': 'cole-dlp-test-acc',
'view_count': int, 'view_count': int,
'description': '', 'description': '',
'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q', 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
'uploader_url': 'https://www.youtube.com/@colethedj1894', 'uploader_url': 'https://www.youtube.com/@coletdjnz',
'uploader_id': '@colethedj1894', 'uploader_id': '@coletdjnz',
'uploader': 'colethedj', 'uploader': 'cole-dlp-test-acc',
}, },
'playlist': [{ 'playlist': [{
'info_dict': { 'info_dict': {
@ -1596,6 +1601,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 1, 'playlist_count': 1,
'params': {'extract_flat': True}, 'params': {'extract_flat': True},
}, { }, {
# By default, recommended is always empty.
'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData', 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
'url': 'https://www.youtube.com/feed/recommended', 'url': 'https://www.youtube.com/feed/recommended',
'info_dict': { 'info_dict': {
@ -1603,7 +1609,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'title': 'recommended', 'title': 'recommended',
'tags': [], 'tags': [],
}, },
'playlist_mincount': 50, 'playlist_count': 0,
'params': { 'params': {
'skip_download': True, 'skip_download': True,
'extractor_args': {'youtubetab': {'skip': ['webpage']}}, 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
@ -1628,6 +1634,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'skip': 'Query for sorting no longer works', 'skip': 'Query for sorting no longer works',
}, { }, {
# TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
'note': 'API Fallback: Topic, should redirect to playlist?list=UU...', 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
'info_dict': { 'info_dict': {
@ -1654,11 +1661,12 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ', 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
'only_matching': True, 'only_matching': True,
}, { }, {
# TODO: fix metadata extraction
'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")', 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
'info_dict': { 'info_dict': {
'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
'modified_date': '20220407', 'modified_date': '20250115',
'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q', 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
'tags': [], 'tags': [],
'availability': 'unlisted', 'availability': 'unlisted',
@ -1692,6 +1700,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'expected_warnings': ['Preferring "ja"'], 'expected_warnings': ['Preferring "ja"'],
}, { }, {
# XXX: this should really check flat playlist entries, but the test suite doesn't support that # XXX: this should really check flat playlist entries, but the test suite doesn't support that
# TODO: fix availability extraction
'note': 'preferred lang set with playlist with translated video titles', 'note': 'preferred lang set with playlist with translated video titles',
'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0', 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
'info_dict': { 'info_dict': {
@ -1714,6 +1723,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, { }, {
# shorts audio pivot for 2GtVksBMYFM. # shorts audio pivot for 2GtVksBMYFM.
'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==', 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
# TODO: fix extraction
'info_dict': { 'info_dict': {
'id': 'sfv_audio_pivot', 'id': 'sfv_audio_pivot',
'title': 'sfv_audio_pivot', 'title': 'sfv_audio_pivot',
@ -1751,6 +1761,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 8, 'playlist_mincount': 8,
}, { }, {
# Should get three playlists for videos, shorts and streams tabs # Should get three playlists for videos, shorts and streams tabs
# TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA', 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
'info_dict': { 'info_dict': {
'id': 'UCK9V2B22uJYu3N7eR_BT9QA', 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
@ -1758,7 +1769,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_follower_count': int, 'channel_follower_count': int,
'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA', 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA', 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2', 'description': 'md5:01e53f350ab8ad6fcf7c4fedb3c1b99f',
'channel': 'Polka Ch. 尾丸ポルカ', 'channel': 'Polka Ch. 尾丸ポルカ',
'tags': 'count:35', 'tags': 'count:35',
'uploader_url': 'https://www.youtube.com/@OmaruPolka', 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
@ -1769,14 +1780,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 3, 'playlist_count': 3,
}, { }, {
# Shorts tab with channel with handle # Shorts tab with channel with handle
# TODO: fix channel description # TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/@NotJustBikes/shorts', 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
'info_dict': { 'info_dict': {
'id': 'UC0intLFzLaudFG-xAvUEO-A', 'id': 'UC0intLFzLaudFG-xAvUEO-A',
'title': 'Not Just Bikes - Shorts', 'title': 'Not Just Bikes - Shorts',
'tags': 'count:10', 'tags': 'count:10',
'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A', 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
'description': 'md5:5e82545b3a041345927a92d0585df247', 'description': 'md5:1d9fc1bad7f13a487299d1fe1712e031',
'channel_follower_count': int, 'channel_follower_count': int,
'channel_id': 'UC0intLFzLaudFG-xAvUEO-A', 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
'channel': 'Not Just Bikes', 'channel': 'Not Just Bikes',
@ -1797,7 +1808,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig', 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
'channel': '中村悠一', 'channel': '中村悠一',
'channel_follower_count': int, 'channel_follower_count': int,
'description': 'md5:e744f6c93dafa7a03c0c6deecb157300', 'description': 'md5:e8fd705073a594f27d6d6d020da560dc',
'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura', 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
'uploader_id': '@Yuichi-Nakamura', 'uploader_id': '@Yuichi-Nakamura',
'uploader': '中村悠一', 'uploader': '中村悠一',
@ -1815,6 +1826,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'only_matching': True, 'only_matching': True,
}, { }, {
# No videos tab but has a shorts tab # No videos tab but has a shorts tab
# TODO: fix metadata extraction
'url': 'https://www.youtube.com/c/TKFShorts', 'url': 'https://www.youtube.com/c/TKFShorts',
'info_dict': { 'info_dict': {
'id': 'UCgJ5_1F6yJhYLnyMszUdmUg', 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
@ -1851,6 +1863,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, { }, {
# Shorts url result in shorts tab # Shorts url result in shorts tab
# TODO: Fix channel id extraction # TODO: Fix channel id extraction
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts', 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
'info_dict': { 'info_dict': {
'id': 'UCiu-3thuViMebBjw_5nWYrA', 'id': 'UCiu-3thuViMebBjw_5nWYrA',
@ -1879,6 +1892,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'params': {'extract_flat': True}, 'params': {'extract_flat': True},
}, { }, {
# Live video status should be extracted # Live video status should be extracted
# TODO: fix test suite, 208163447408c78673b08c172beafe5c310fb167 broke this test
'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live', 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
'info_dict': { 'info_dict': {
'id': 'UCQvWX73GQygcwXOTSf_VDVg', 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
@ -1907,6 +1921,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 1, 'playlist_mincount': 1,
}, { }, {
# Channel renderer metadata. Contains number of videos on the channel # Channel renderer metadata. Contains number of videos on the channel
# TODO: channels tab removed, change this test to use another page with channel renderer
'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels', 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
'info_dict': { 'info_dict': {
'id': 'UCiu-3thuViMebBjw_5nWYrA', 'id': 'UCiu-3thuViMebBjw_5nWYrA',
@ -1940,7 +1955,9 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
}], }],
'params': {'extract_flat': True}, 'params': {'extract_flat': True},
'skip': 'channels tab removed',
}, { }, {
# TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/@3blue1brown/about', 'url': 'https://www.youtube.com/@3blue1brown/about',
'info_dict': { 'info_dict': {
'id': '@3blue1brown', 'id': '@3blue1brown',
@ -1950,7 +1967,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'channel_id': 'UCYO_jab_esuFRV4b17AJtAw', 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
'channel': '3Blue1Brown', 'channel': '3Blue1Brown',
'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw', 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9', 'description': 'md5:602e3789e6a0cb7d9d352186b720e395',
'uploader_url': 'https://www.youtube.com/@3blue1brown', 'uploader_url': 'https://www.youtube.com/@3blue1brown',
'uploader_id': '@3blue1brown', 'uploader_id': '@3blue1brown',
'uploader': '3Blue1Brown', 'uploader': '3Blue1Brown',
@ -1976,6 +1993,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_count': 5, 'playlist_count': 5,
}, { }, {
# Releases tab, with rich entry playlistRenderers (same as Podcasts tab) # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
# TODO: fix channel_is_verified extraction
'url': 'https://www.youtube.com/@AHimitsu/releases', 'url': 'https://www.youtube.com/@AHimitsu/releases',
'info_dict': { 'info_dict': {
'id': 'UCgFwu-j5-xNJml2FtTrrB3A', 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
@ -2015,6 +2033,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'playlist_mincount': 100, 'playlist_mincount': 100,
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
}, { }, {
# TODO: fix channel_is_verified extraction
'note': 'Tags containing spaces', 'note': 'Tags containing spaces',
'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ', 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
'playlist_count': 3, 'playlist_count': 3,
@ -2035,6 +2054,24 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'challenges', 'sketches', 'scary games', 'funny games', 'rage games', 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
'mark fischbach'], 'mark fischbach'],
}, },
}, {
# https://github.com/yt-dlp/yt-dlp/issues/12933
'note': 'streams tab, some scheduled streams. Empty intermediate response with only continuation - must follow',
'url': 'https://www.youtube.com/@sbcitygov/streams',
'playlist_mincount': 150,
'info_dict': {
'id': 'UCH6-qfQwlUgz9SAf05jvc_w',
'channel': 'sbcitygov',
'channel_id': 'UCH6-qfQwlUgz9SAf05jvc_w',
'title': 'sbcitygov - Live',
'channel_follower_count': int,
'description': 'md5:ca1a92059835c071e33b3db52f4a6d67',
'uploader_id': '@sbcitygov',
'uploader_url': 'https://www.youtube.com/@sbcitygov',
'uploader': 'sbcitygov',
'channel_url': 'https://www.youtube.com/channel/UCH6-qfQwlUgz9SAf05jvc_w',
'tags': [],
},
}] }]
@classmethod @classmethod

View File

@ -3646,6 +3646,8 @@ def feed_entry(name):
if 'sign in' in reason.lower(): if 'sign in' in reason.lower():
reason = remove_end(reason, 'This helps protect our community. Learn more') reason = remove_end(reason, 'This helps protect our community. Learn more')
reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}' reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}'
elif get_first(playability_statuses, ('errorScreen', 'playerCaptchaViewModel', {dict})):
reason += '. YouTube is requiring a captcha challenge before playback'
self.raise_no_formats(reason, expected=True) self.raise_no_formats(reason, expected=True)
keywords = get_first(video_details, 'keywords', expected_type=list) or [] keywords = get_first(video_details, 'keywords', expected_type=list) or []
@ -3874,7 +3876,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
if not traverse_obj(initial_data, 'contents'): if not traverse_obj(initial_data, 'contents'):
self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.') self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
initial_data = None initial_data = None
if not initial_data: if not initial_data and 'initial_data' not in self._configuration_arg('player_skip'):
query = {'videoId': video_id} query = {'videoId': video_id}
query.update(self._get_checkok_params()) query.update(self._get_checkok_params())
initial_data = self._extract_response( initial_data = self._extract_response(

View File

@ -3,6 +3,7 @@
from .common import ( from .common import (
HEADRequest, HEADRequest,
PATCHRequest,
PUTRequest, PUTRequest,
Request, Request,
RequestDirector, RequestDirector,

View File

@ -505,6 +505,7 @@ def copy(self):
HEADRequest = functools.partial(Request, method='HEAD') HEADRequest = functools.partial(Request, method='HEAD')
PATCHRequest = functools.partial(Request, method='PATCH')
PUTRequest = functools.partial(Request, method='PUT') PUTRequest = functools.partial(Request, method='PUT')